Changeset View
Changeset View
Standalone View
Standalone View
head/sys/dev/hyperv/hvsock/hv_sock.c
Property | Old Value | New Value |
---|---|---|
svn:eol-style | null | native \ No newline at end of property |
svn:keywords | null | FreeBSD=%H \ No newline at end of property |
svn:mime-type | null | text/plain \ No newline at end of property |
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
* | |||||
* Copyright (c) 2020 Microsoft Corp. | |||||
* All rights reserved. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice unmodified, this list of conditions, and the following | |||||
* disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <sys/param.h> | |||||
#include <sys/bus.h> | |||||
#include <sys/domain.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/types.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/module.h> | |||||
#include <sys/mutex.h> | |||||
#include <sys/proc.h> | |||||
#include <sys/protosw.h> | |||||
#include <sys/socket.h> | |||||
#include <sys/sysctl.h> | |||||
#include <sys/sysproto.h> | |||||
#include <sys/systm.h> | |||||
#include <sys/sockbuf.h> | |||||
#include <sys/sx.h> | |||||
#include <sys/uio.h> | |||||
#include <net/vnet.h> | |||||
#include <dev/hyperv/vmbus/vmbus_reg.h> | |||||
#include "hv_sock.h" | |||||
#define HVSOCK_DBG_NONE 0x0 | |||||
#define HVSOCK_DBG_INFO 0x1 | |||||
#define HVSOCK_DBG_ERR 0x2 | |||||
#define HVSOCK_DBG_VERBOSE 0x3 | |||||
SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket"); | |||||
static int hvs_dbg_level; | |||||
SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level, | |||||
0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose"); | |||||
#define HVSOCK_DBG(level, ...) do { \ | |||||
if (hvs_dbg_level >= (level)) \ | |||||
printf(__VA_ARGS__); \ | |||||
} while (0) | |||||
MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures"); | |||||
/* The MTU is 16KB per host side's design */ | |||||
#define HVSOCK_MTU_SIZE (1024 * 16) | |||||
#define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header)) | |||||
#define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header)) | |||||
#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ | |||||
roundup2(payload_len, 8) + \ | |||||
sizeof(uint64_t)) | |||||
static struct domain hv_socket_domain; | |||||
/* | |||||
* HyperV Transport sockets | |||||
*/ | |||||
static struct pr_usrreqs hvs_trans_usrreqs = { | |||||
.pru_attach = hvs_trans_attach, | |||||
.pru_bind = hvs_trans_bind, | |||||
.pru_listen = hvs_trans_listen, | |||||
.pru_accept = hvs_trans_accept, | |||||
.pru_connect = hvs_trans_connect, | |||||
.pru_peeraddr = hvs_trans_peeraddr, | |||||
.pru_sockaddr = hvs_trans_sockaddr, | |||||
.pru_soreceive = hvs_trans_soreceive, | |||||
.pru_sosend = hvs_trans_sosend, | |||||
.pru_disconnect = hvs_trans_disconnect, | |||||
.pru_close = hvs_trans_close, | |||||
.pru_detach = hvs_trans_detach, | |||||
.pru_shutdown = hvs_trans_shutdown, | |||||
.pru_abort = hvs_trans_abort, | |||||
}; | |||||
/* | |||||
* Definitions of protocols supported in HyperV socket domain | |||||
*/ | |||||
static struct protosw hv_socket_protosw[] = { | |||||
{ | |||||
.pr_type = SOCK_STREAM, | |||||
.pr_domain = &hv_socket_domain, | |||||
.pr_protocol = HYPERV_SOCK_PROTO_TRANS, | |||||
.pr_flags = PR_CONNREQUIRED, | |||||
.pr_init = hvs_trans_init, | |||||
.pr_usrreqs = &hvs_trans_usrreqs, | |||||
}, | |||||
}; | |||||
static struct domain hv_socket_domain = { | |||||
.dom_family = AF_HYPERV, | |||||
.dom_name = "hyperv", | |||||
.dom_protosw = hv_socket_protosw, | |||||
.dom_protoswNPROTOSW = &hv_socket_protosw[nitems(hv_socket_protosw)] | |||||
}; | |||||
VNET_DOMAIN_SET(hv_socket_); | |||||
#define MAX_PORT ((uint32_t)0xFFFFFFFF) | |||||
#define MIN_PORT ((uint32_t)0x0) | |||||
/* 00000000-facb-11e6-bd58-64006a7986d3 */ | |||||
static const struct hyperv_guid srv_id_template = { | |||||
.hv_guid = { | |||||
0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11, | |||||
0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 } | |||||
}; | |||||
static int hvsock_br_callback(void *, int, void *); | |||||
static uint32_t hvsock_canread_check(struct hvs_pcb *); | |||||
static uint32_t hvsock_canwrite_check(struct hvs_pcb *); | |||||
static int hvsock_send_data(struct vmbus_channel *chan, | |||||
struct uio *uio, uint32_t to_write, struct sockbuf *sb); | |||||
/* Globals */ | |||||
static struct sx hvs_trans_socks_sx; | |||||
static struct mtx hvs_trans_socks_mtx; | |||||
static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks; | |||||
static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks; | |||||
static uint32_t previous_auto_bound_port; | |||||
static void | |||||
hvsock_print_guid(struct hyperv_guid *guid) | |||||
{ | |||||
unsigned char *p = (unsigned char *)guid; | |||||
HVSOCK_DBG(HVSOCK_DBG_INFO, | |||||
"0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n", | |||||
*(unsigned int *)p, | |||||
*((unsigned short *) &p[4]), | |||||
*((unsigned short *) &p[6]), | |||||
p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); | |||||
} | |||||
static bool | |||||
is_valid_srv_id(const struct hyperv_guid *id) | |||||
{ | |||||
return !memcmp(&id->hv_guid[4], | |||||
&srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4); | |||||
} | |||||
static unsigned int | |||||
get_port_by_srv_id(const struct hyperv_guid *srv_id) | |||||
{ | |||||
return *((const unsigned int *)srv_id); | |||||
} | |||||
static void | |||||
set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port) | |||||
{ | |||||
*((unsigned int *)srv_id) = port; | |||||
} | |||||
static void | |||||
__hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list) | |||||
{ | |||||
struct hvs_pcb *p = NULL; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); | |||||
if (!pcb) | |||||
return; | |||||
if (list & HVS_LIST_BOUND) { | |||||
LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) | |||||
if (p == pcb) | |||||
LIST_REMOVE(p, bound_next); | |||||
} | |||||
if (list & HVS_LIST_CONNECTED) { | |||||
LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) | |||||
if (p == pcb) | |||||
LIST_REMOVE(pcb, connected_next); | |||||
} | |||||
} | |||||
static void | |||||
__hvs_remove_socket_from_list(struct socket *so, unsigned char list) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb); | |||||
__hvs_remove_pcb_from_list(pcb, list); | |||||
} | |||||
static void | |||||
__hvs_insert_socket_on_list(struct socket *so, unsigned char list) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
if (list & HVS_LIST_BOUND) | |||||
LIST_INSERT_HEAD(&hvs_trans_bound_socks, | |||||
pcb, bound_next); | |||||
if (list & HVS_LIST_CONNECTED) | |||||
LIST_INSERT_HEAD(&hvs_trans_connected_socks, | |||||
pcb, connected_next); | |||||
} | |||||
void | |||||
hvs_remove_socket_from_list(struct socket *so, unsigned char list) | |||||
{ | |||||
if (!so || !so->so_pcb) { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: socket or so_pcb is null\n", __func__); | |||||
return; | |||||
} | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
__hvs_remove_socket_from_list(so, list); | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
} | |||||
static void | |||||
hvs_insert_socket_on_list(struct socket *so, unsigned char list) | |||||
{ | |||||
if (!so || !so->so_pcb) { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: socket or so_pcb is null\n", __func__); | |||||
return; | |||||
} | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
__hvs_insert_socket_on_list(so, list); | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
} | |||||
static struct socket * | |||||
__hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) | |||||
{ | |||||
struct hvs_pcb *p = NULL; | |||||
if (list & HVS_LIST_BOUND) | |||||
LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next) | |||||
if (p->so != NULL && | |||||
addr->hvs_port == p->local_addr.hvs_port) | |||||
return p->so; | |||||
if (list & HVS_LIST_CONNECTED) | |||||
LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next) | |||||
if (p->so != NULL && | |||||
addr->hvs_port == p->local_addr.hvs_port) | |||||
return p->so; | |||||
return NULL; | |||||
} | |||||
static struct socket * | |||||
hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list) | |||||
{ | |||||
struct socket *s = NULL; | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
s = __hvs_find_socket_on_list(addr, list); | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
return s; | |||||
} | |||||
static inline void | |||||
hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port) | |||||
{ | |||||
memset(addr, 0, sizeof(*addr)); | |||||
addr->sa_family = AF_HYPERV; | |||||
addr->hvs_port = port; | |||||
} | |||||
void | |||||
hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id) | |||||
{ | |||||
hvs_addr_set(addr, get_port_by_srv_id(svr_id)); | |||||
} | |||||
int | |||||
hvs_trans_lock(void) | |||||
{ | |||||
sx_xlock(&hvs_trans_socks_sx); | |||||
return (0); | |||||
} | |||||
void | |||||
hvs_trans_unlock(void) | |||||
{ | |||||
sx_xunlock(&hvs_trans_socks_sx); | |||||
} | |||||
void | |||||
hvs_trans_init(void) | |||||
{ | |||||
/* Skip initialization of globals for non-default instances. */ | |||||
if (!IS_DEFAULT_VNET(curvnet)) | |||||
return; | |||||
if (vm_guest != VM_GUEST_HV) | |||||
return; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_init called\n", __func__); | |||||
/* Initialize Globals */ | |||||
previous_auto_bound_port = MAX_PORT; | |||||
sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx"); | |||||
mtx_init(&hvs_trans_socks_mtx, | |||||
"hvs_trans_socks_mtx", NULL, MTX_DEF); | |||||
LIST_INIT(&hvs_trans_bound_socks); | |||||
LIST_INIT(&hvs_trans_connected_socks); | |||||
} | |||||
/* | |||||
* Called in two cases: | |||||
* 1) When user calls socket(); | |||||
* 2) When we accept new incoming conneciton and call sonewconn(). | |||||
*/ | |||||
int | |||||
hvs_trans_attach(struct socket *so, int proto, struct thread *td) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_attach called\n", __func__); | |||||
if (so->so_type != SOCK_STREAM) | |||||
return (ESOCKTNOSUPPORT); | |||||
if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS) | |||||
return (EPROTONOSUPPORT); | |||||
if (pcb != NULL) | |||||
return (EISCONN); | |||||
pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO); | |||||
if (pcb == NULL) | |||||
return (ENOMEM); | |||||
pcb->so = so; | |||||
so->so_pcb = (void *)pcb; | |||||
return (0); | |||||
} | |||||
void | |||||
hvs_trans_detach(struct socket *so) | |||||
{ | |||||
struct hvs_pcb *pcb; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_detach called\n", __func__); | |||||
(void) hvs_trans_lock(); | |||||
pcb = so2hvspcb(so); | |||||
if (pcb == NULL) { | |||||
hvs_trans_unlock(); | |||||
return; | |||||
} | |||||
if (SOLISTENING(so)) { | |||||
bzero(pcb, sizeof(*pcb)); | |||||
free(pcb, M_HVSOCK); | |||||
} | |||||
so->so_pcb = NULL; | |||||
hvs_trans_unlock(); | |||||
} | |||||
int | |||||
hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr; | |||||
int error = 0; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_bind called\n", __func__); | |||||
if (sa == NULL) { | |||||
return (EINVAL); | |||||
} | |||||
if (pcb == NULL) { | |||||
return (EINVAL); | |||||
} | |||||
if (sa->sa_family != AF_HYPERV) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: Not supported, sa_family is %u\n", | |||||
__func__, sa->sa_family); | |||||
return (EAFNOSUPPORT); | |||||
} | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: binding port = 0x%x\n", __func__, sa->hvs_port); | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
if (__hvs_find_socket_on_list(sa, | |||||
HVS_LIST_BOUND | HVS_LIST_CONNECTED)) { | |||||
error = EADDRINUSE; | |||||
} else { | |||||
/* | |||||
* The address is available for us to bind. | |||||
* Add socket to the bound list. | |||||
*/ | |||||
hvs_addr_set(&pcb->local_addr, sa->hvs_port); | |||||
hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY); | |||||
__hvs_insert_socket_on_list(so, HVS_LIST_BOUND); | |||||
} | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
return (error); | |||||
} | |||||
int | |||||
hvs_trans_listen(struct socket *so, int backlog, struct thread *td) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
struct socket *bound_so; | |||||
int error; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_listen called\n", __func__); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
/* Check if the address is already bound and it was by us. */ | |||||
bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND); | |||||
if (bound_so == NULL || bound_so != so) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: Address not bound or not by us.\n", __func__); | |||||
return (EADDRNOTAVAIL); | |||||
} | |||||
SOCK_LOCK(so); | |||||
error = solisten_proto_check(so); | |||||
if (error == 0) | |||||
solisten_proto(so, backlog); | |||||
SOCK_UNLOCK(so); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket listen error = %d\n", __func__, error); | |||||
return (error); | |||||
} | |||||
int | |||||
hvs_trans_accept(struct socket *so, struct sockaddr **nam) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_accept called\n", __func__); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, | |||||
M_NOWAIT); | |||||
return ((*nam == NULL) ? ENOMEM : 0); | |||||
} | |||||
int | |||||
hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam; | |||||
bool found_auto_bound_port = false; | |||||
int i, error = 0; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_connect called, remote port is %x\n", | |||||
__func__, raddr->hvs_port); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
/* Verify the remote address */ | |||||
if (raddr == NULL) | |||||
return (EINVAL); | |||||
if (raddr->sa_family != AF_HYPERV) | |||||
return (EAFNOSUPPORT); | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
if (so->so_state & | |||||
(SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: socket connect in progress\n", | |||||
__func__); | |||||
error = EINPROGRESS; | |||||
goto out; | |||||
} | |||||
/* | |||||
* Find an available port for us to auto bind the local | |||||
* address. | |||||
*/ | |||||
hvs_addr_set(&pcb->local_addr, 0); | |||||
for (i = previous_auto_bound_port - 1; | |||||
i != previous_auto_bound_port; i --) { | |||||
if (i == MIN_PORT) | |||||
i = MAX_PORT; | |||||
pcb->local_addr.hvs_port = i; | |||||
if (__hvs_find_socket_on_list(&pcb->local_addr, | |||||
HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) { | |||||
found_auto_bound_port = true; | |||||
previous_auto_bound_port = i; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: found local bound port is %x\n", | |||||
__func__, pcb->local_addr.hvs_port); | |||||
break; | |||||
} | |||||
} | |||||
if (found_auto_bound_port == true) { | |||||
/* Found available port for auto bound, put on list */ | |||||
__hvs_insert_socket_on_list(so, HVS_LIST_BOUND); | |||||
/* Set VM service ID */ | |||||
pcb->vm_srv_id = srv_id_template; | |||||
set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port); | |||||
/* Set host service ID and remote port */ | |||||
pcb->host_srv_id = srv_id_template; | |||||
set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port); | |||||
hvs_addr_set(&pcb->remote_addr, raddr->hvs_port); | |||||
/* Change the socket state to SS_ISCONNECTING */ | |||||
soisconnecting(so); | |||||
} else { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: No local port available for auto bound\n", | |||||
__func__); | |||||
error = EADDRINUSE; | |||||
} | |||||
HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is "); | |||||
hvsock_print_guid(&pcb->vm_srv_id); | |||||
HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is "); | |||||
hvsock_print_guid(&pcb->host_srv_id); | |||||
out: | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
if (found_auto_bound_port == true) | |||||
vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id); | |||||
return (error); | |||||
} | |||||
int | |||||
hvs_trans_disconnect(struct socket *so) | |||||
{ | |||||
struct hvs_pcb *pcb; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_disconnect called\n", __func__); | |||||
(void) hvs_trans_lock(); | |||||
pcb = so2hvspcb(so); | |||||
if (pcb == NULL) { | |||||
hvs_trans_unlock(); | |||||
return (EINVAL); | |||||
} | |||||
/* If socket is already disconnected, skip this */ | |||||
if ((so->so_state & SS_ISDISCONNECTED) == 0) | |||||
soisdisconnecting(so); | |||||
hvs_trans_unlock(); | |||||
return (0); | |||||
} | |||||
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) | |||||
struct hvs_callback_arg { | |||||
struct uio *uio; | |||||
struct sockbuf *sb; | |||||
}; | |||||
int | |||||
hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr, | |||||
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
struct sockbuf *sb; | |||||
ssize_t orig_resid; | |||||
uint32_t canread, to_read; | |||||
int flags, error = 0; | |||||
struct hvs_callback_arg cbarg; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_soreceive called\n", __func__); | |||||
if (so->so_type != SOCK_STREAM) | |||||
return (EINVAL); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
if (flagsp != NULL) | |||||
flags = *flagsp &~ MSG_EOR; | |||||
else | |||||
flags = 0; | |||||
if (flags & MSG_PEEK) | |||||
return (EOPNOTSUPP); | |||||
/* If no space to copy out anything */ | |||||
if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ) | |||||
return (EINVAL); | |||||
sb = &so->so_rcv; | |||||
orig_resid = uio->uio_resid; | |||||
/* Prevent other readers from entering the socket. */ | |||||
error = sblock(sb, SBLOCKWAIT(flags)); | |||||
if (error) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: sblock returned error = %d\n", __func__, error); | |||||
return (error); | |||||
} | |||||
SOCKBUF_LOCK(sb); | |||||
cbarg.uio = uio; | |||||
cbarg.sb = sb; | |||||
/* | |||||
* If the socket is closing, there might still be some data | |||||
* in rx br to read. However we need to make sure | |||||
* the channel is still open. | |||||
*/ | |||||
if ((sb->sb_state & SBS_CANTRCVMORE) && | |||||
(so->so_state & SS_ISDISCONNECTED)) { | |||||
/* Other thread already closed the channel */ | |||||
error = EPIPE; | |||||
goto out; | |||||
} | |||||
while (true) { | |||||
while (uio->uio_resid > 0 && | |||||
(canread = hvsock_canread_check(pcb)) > 0) { | |||||
to_read = MIN(canread, uio->uio_resid); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: to_read = %u, skip = %u\n", __func__, to_read, | |||||
(unsigned int)(sizeof(struct hvs_pkt_header) + | |||||
pcb->recv_data_off)); | |||||
error = vmbus_chan_recv_peek_call(pcb->chan, to_read, | |||||
sizeof(struct hvs_pkt_header) + pcb->recv_data_off, | |||||
hvsock_br_callback, (void *)&cbarg); | |||||
/* | |||||
* It is possible socket is disconnected becasue | |||||
* we released lock in hvsock_br_callback. So we | |||||
* need to check the state to make sure it is not | |||||
* disconnected. | |||||
*/ | |||||
if (error || so->so_state & SS_ISDISCONNECTED) { | |||||
break; | |||||
} | |||||
pcb->recv_data_len -= to_read; | |||||
pcb->recv_data_off += to_read; | |||||
} | |||||
if (error) | |||||
break; | |||||
/* Abort if socket has reported problems. */ | |||||
if (so->so_error) { | |||||
if (so->so_error == ESHUTDOWN && | |||||
orig_resid > uio->uio_resid) { | |||||
/* | |||||
* Although we got a FIN, we also received | |||||
* some data in this round. Delivery it | |||||
* to user. | |||||
*/ | |||||
error = 0; | |||||
} else { | |||||
if (so->so_error != ESHUTDOWN) | |||||
error = so->so_error; | |||||
} | |||||
break; | |||||
} | |||||
/* Cannot received more. */ | |||||
if (sb->sb_state & SBS_CANTRCVMORE) | |||||
break; | |||||
/* We are done if buffer has been filled */ | |||||
if (uio->uio_resid == 0) | |||||
break; | |||||
if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid) | |||||
break; | |||||
/* Buffer ring is empty and we shall not block */ | |||||
if ((so->so_state & SS_NBIO) || | |||||
(flags & (MSG_DONTWAIT|MSG_NBIO))) { | |||||
if (orig_resid == uio->uio_resid) { | |||||
/* We have not read anything */ | |||||
error = EAGAIN; | |||||
} | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: non blocked read return, error %d.\n", | |||||
__func__, error); | |||||
break; | |||||
} | |||||
/* | |||||
* Wait and block until (more) data comes in. | |||||
* Note: Drops the sockbuf lock during wait. | |||||
*/ | |||||
error = sbwait(sb); | |||||
if (error) | |||||
break; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: wake up from sbwait, read available is %u\n", | |||||
__func__, vmbus_chan_read_available(pcb->chan)); | |||||
} | |||||
out: | |||||
SOCKBUF_UNLOCK(sb); | |||||
sbunlock(sb); | |||||
/* We recieved a FIN in this call */ | |||||
if (so->so_error == ESHUTDOWN) { | |||||
if (so->so_snd.sb_state & SBS_CANTSENDMORE) { | |||||
/* Send has already closed */ | |||||
soisdisconnecting(so); | |||||
} else { | |||||
/* Just close the receive side */ | |||||
socantrcvmore(so); | |||||
} | |||||
} | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: returning error = %d, so_error = %d\n", | |||||
__func__, error, so->so_error); | |||||
return (error); | |||||
} | |||||
int | |||||
hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, | |||||
struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
struct sockbuf *sb; | |||||
ssize_t orig_resid; | |||||
uint32_t canwrite, to_write; | |||||
int error = 0; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_sosend called, uio_resid = %lu\n", | |||||
__func__, uio->uio_resid); | |||||
if (so->so_type != SOCK_STREAM) | |||||
return (EINVAL); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
/* If nothing to send */ | |||||
if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE) | |||||
return (EINVAL); | |||||
sb = &so->so_snd; | |||||
orig_resid = uio->uio_resid; | |||||
/* Prevent other writers from entering the socket. */ | |||||
error = sblock(sb, SBLOCKWAIT(flags)); | |||||
if (error) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: sblock returned error = %d\n", __func__, error); | |||||
return (error); | |||||
} | |||||
SOCKBUF_LOCK(sb); | |||||
if ((sb->sb_state & SBS_CANTSENDMORE) || | |||||
so->so_error == ESHUTDOWN) { | |||||
error = EPIPE; | |||||
goto out; | |||||
} | |||||
while (uio->uio_resid > 0) { | |||||
canwrite = hvsock_canwrite_check(pcb); | |||||
if (canwrite == 0) { | |||||
/* We have sent some data */ | |||||
if (orig_resid > uio->uio_resid) | |||||
break; | |||||
/* | |||||
* We have not sent any data and it is | |||||
* non-blocked io | |||||
*/ | |||||
if (so->so_state & SS_NBIO || | |||||
(flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) { | |||||
error = EWOULDBLOCK; | |||||
break; | |||||
} else { | |||||
/* | |||||
* We are here because there is no space on | |||||
* send buffer ring. Signal the other side | |||||
* to read and free more space. | |||||
* Sleep wait until space avaiable to send | |||||
* Note: Drops the sockbuf lock during wait. | |||||
*/ | |||||
error = sbwait(sb); | |||||
if (error) | |||||
break; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: wake up from sbwait, space avail on " | |||||
"tx ring is %u\n", | |||||
__func__, | |||||
vmbus_chan_write_available(pcb->chan)); | |||||
continue; | |||||
} | |||||
} | |||||
to_write = MIN(canwrite, uio->uio_resid); | |||||
to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: canwrite is %u, to_write = %u\n", __func__, | |||||
canwrite, to_write); | |||||
error = hvsock_send_data(pcb->chan, uio, to_write, sb); | |||||
if (error) | |||||
break; | |||||
} | |||||
out: | |||||
SOCKBUF_UNLOCK(sb); | |||||
sbunlock(sb); | |||||
return (error); | |||||
} | |||||
int | |||||
hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_peeraddr called\n", __func__); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
*nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT); | |||||
return ((*nam == NULL)? ENOMEM : 0); | |||||
} | |||||
int | |||||
hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_sockaddr called\n", __func__); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
*nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT); | |||||
return ((*nam == NULL)? ENOMEM : 0); | |||||
} | |||||
void | |||||
hvs_trans_close(struct socket *so) | |||||
{ | |||||
struct hvs_pcb *pcb; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_close called\n", __func__); | |||||
(void) hvs_trans_lock(); | |||||
pcb = so2hvspcb(so); | |||||
if (!pcb) { | |||||
hvs_trans_unlock(); | |||||
return; | |||||
} | |||||
if (so->so_state & SS_ISCONNECTED) { | |||||
/* Send a FIN to peer */ | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: hvs_trans_close sending a FIN to host\n", __func__); | |||||
(void) hvsock_send_data(pcb->chan, NULL, 0, NULL); | |||||
} | |||||
if (so->so_state & | |||||
(SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) | |||||
soisdisconnected(so); | |||||
pcb->chan = NULL; | |||||
pcb->so = NULL; | |||||
if (SOLISTENING(so)) { | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
/* Remove from bound list */ | |||||
__hvs_remove_socket_from_list(so, HVS_LIST_BOUND); | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
} | |||||
hvs_trans_unlock(); | |||||
return; | |||||
} | |||||
void | |||||
hvs_trans_abort(struct socket *so) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_abort called\n", __func__); | |||||
(void) hvs_trans_lock(); | |||||
if (pcb == NULL) { | |||||
hvs_trans_unlock(); | |||||
return; | |||||
} | |||||
if (SOLISTENING(so)) { | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
/* Remove from bound list */ | |||||
__hvs_remove_socket_from_list(so, HVS_LIST_BOUND); | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
} | |||||
if (so->so_state & SS_ISCONNECTED) { | |||||
(void) sodisconnect(so); | |||||
} | |||||
hvs_trans_unlock(); | |||||
return; | |||||
} | |||||
int | |||||
hvs_trans_shutdown(struct socket *so) | |||||
{ | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
struct sockbuf *sb; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: HyperV Socket hvs_trans_shutdown called\n", __func__); | |||||
if (pcb == NULL) | |||||
return (EINVAL); | |||||
/* | |||||
* Only get called with the shutdown method is SHUT_WR or | |||||
* SHUT_RDWR. | |||||
* When the method is SHUT_RD or SHUT_RDWR, the caller | |||||
* already set the SBS_CANTRCVMORE on receive side socket | |||||
* buffer. | |||||
*/ | |||||
if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { | |||||
/* | |||||
* SHUT_WR only case. | |||||
* Receive side is still open. Just close | |||||
* the send side. | |||||
*/ | |||||
socantsendmore(so); | |||||
} else { | |||||
/* SHUT_RDWR case */ | |||||
if (so->so_state & SS_ISCONNECTED) { | |||||
/* Send a FIN to peer */ | |||||
sb = &so->so_snd; | |||||
SOCKBUF_LOCK(sb); | |||||
(void) hvsock_send_data(pcb->chan, NULL, 0, sb); | |||||
SOCKBUF_UNLOCK(sb); | |||||
soisdisconnecting(so); | |||||
} | |||||
} | |||||
return (0); | |||||
} | |||||
/* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is | |||||
* <port> (see struct sockaddr_hvs). | |||||
* | |||||
* On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV: | |||||
* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user- | |||||
* guide/make-integration-service, and the endpoint is <VmID, ServiceId> with | |||||
* the below sockaddr: | |||||
* | |||||
* struct SOCKADDR_HV | |||||
* { | |||||
* ADDRESS_FAMILY Family; | |||||
* USHORT Reserved; | |||||
* GUID VmId; | |||||
* GUID ServiceId; | |||||
* }; | |||||
* Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via | |||||
* VMBus, because here it's obvious the host and the VM can easily identify | |||||
* each other. Though the VmID is useful on the host, especially in the case | |||||
* of Windows container, FreeBSD VM doesn't need it at all. | |||||
* | |||||
* To be compatible with similar infrastructure in Linux VMs, we have | |||||
* to limit the available GUID space of SOCKADDR_HV so that we can create | |||||
* a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID. | |||||
* The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is: | |||||
* | |||||
**************************************************************************** | |||||
* The only valid Service GUIDs, from the perspectives of both the host and * | |||||
* FreeBSD VM, that can be connected by the other end, must conform to this * | |||||
* format: <port>-facb-11e6-bd58-64006a7986d3. * | |||||
**************************************************************************** | |||||
* | |||||
* When we write apps on the host to connect(), the GUID ServiceID is used. | |||||
* When we write apps in FreeBSD VM to connect(), we only need to specify the | |||||
* port and the driver will form the GUID and use that to request the host. | |||||
* | |||||
* From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the | |||||
* auto-generated remote port for a connect request initiated by the host's | |||||
* connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the | |||||
* FreeBSD guest. | |||||
*/ | |||||
/* | |||||
* Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before) | |||||
* restricts HyperV socket ring buffer size to six 4K pages. Newer | |||||
* HyperV hosts doen't have this limit. | |||||
*/ | |||||
#define HVS_RINGBUF_RCV_SIZE (PAGE_SIZE * 6) | |||||
#define HVS_RINGBUF_SND_SIZE (PAGE_SIZE * 6) | |||||
#define HVS_RINGBUF_MAX_SIZE (PAGE_SIZE * 64) | |||||
struct hvsock_sc { | |||||
device_t dev; | |||||
struct hvs_pcb *pcb; | |||||
struct vmbus_channel *channel; | |||||
}; | |||||
static bool | |||||
hvsock_chan_readable(struct vmbus_channel *chan) | |||||
{ | |||||
uint32_t readable = vmbus_chan_read_available(chan); | |||||
return (readable >= HVSOCK_PKT_LEN(0)); | |||||
} | |||||
static void | |||||
hvsock_chan_cb(struct vmbus_channel *chan, void *context) | |||||
{ | |||||
struct hvs_pcb *pcb = (struct hvs_pcb *) context; | |||||
struct socket *so; | |||||
uint32_t canwrite; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: host send us a wakeup on rb data, pcb = %p\n", | |||||
__func__, pcb); | |||||
/* | |||||
* Check if the socket is still attached and valid. | |||||
* Here we know channel is still open. Need to make | |||||
* sure the socket has not been closed or freed. | |||||
*/ | |||||
(void) hvs_trans_lock(); | |||||
so = hsvpcb2so(pcb); | |||||
if (pcb->chan != NULL && so != NULL) { | |||||
/* | |||||
* Wake up reader if there are data to read. | |||||
*/ | |||||
SOCKBUF_LOCK(&(so)->so_rcv); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: read available = %u\n", __func__, | |||||
vmbus_chan_read_available(pcb->chan)); | |||||
if (hvsock_chan_readable(pcb->chan)) | |||||
sorwakeup_locked(so); | |||||
else | |||||
SOCKBUF_UNLOCK(&(so)->so_rcv); | |||||
/* | |||||
* Wake up sender if space becomes available to write. | |||||
*/ | |||||
SOCKBUF_LOCK(&(so)->so_snd); | |||||
canwrite = hvsock_canwrite_check(pcb); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: canwrite = %u\n", __func__, canwrite); | |||||
if (canwrite > 0) { | |||||
sowwakeup_locked(so); | |||||
} else { | |||||
SOCKBUF_UNLOCK(&(so)->so_snd); | |||||
} | |||||
} | |||||
hvs_trans_unlock(); | |||||
return; | |||||
} | |||||
static int | |||||
hvsock_br_callback(void *datap, int cplen, void *cbarg) | |||||
{ | |||||
struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg; | |||||
struct uio *uio = arg->uio; | |||||
struct sockbuf *sb = arg->sb; | |||||
int error = 0; | |||||
if (cbarg == NULL || datap == NULL) | |||||
return (EINVAL); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: called, uio_rw = %s, uio_resid = %lu, cplen = %u, " | |||||
"datap = %p\n", | |||||
__func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br", | |||||
uio->uio_resid, cplen, datap); | |||||
if (sb) | |||||
SOCKBUF_UNLOCK(sb); | |||||
error = uiomove(datap, cplen, uio); | |||||
if (sb) | |||||
SOCKBUF_LOCK(sb); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: after uiomove, uio_resid = %lu, error = %d\n", | |||||
__func__, uio->uio_resid, error); | |||||
return (error); | |||||
} | |||||
static int | |||||
hvsock_send_data(struct vmbus_channel *chan, struct uio *uio, | |||||
uint32_t to_write, struct sockbuf *sb) | |||||
{ | |||||
struct hvs_pkt_header hvs_pkt; | |||||
int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0; | |||||
uint64_t pad = 0; | |||||
struct iovec iov[3]; | |||||
struct hvs_callback_arg cbarg; | |||||
if (chan == NULL) | |||||
return (ENOTCONN); | |||||
hlen = sizeof(struct vmbus_chanpkt_hdr); | |||||
hvs_pkthlen = sizeof(struct hvs_pkt_header); | |||||
hvs_pktlen = hvs_pkthlen + to_write; | |||||
pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, " | |||||
"pad_pktlen = %u, data_len = %u\n", | |||||
__func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write); | |||||
hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND; | |||||
hvs_pkt.chan_pkt_hdr.cph_flags = 0; | |||||
VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen); | |||||
VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen); | |||||
hvs_pkt.chan_pkt_hdr.cph_xactid = 0; | |||||
hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1; | |||||
hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write; | |||||
cbarg.uio = uio; | |||||
cbarg.sb = sb; | |||||
if (uio && to_write > 0) { | |||||
iov[0].iov_base = &hvs_pkt; | |||||
iov[0].iov_len = hvs_pkthlen; | |||||
iov[1].iov_base = NULL; | |||||
iov[1].iov_len = to_write; | |||||
iov[2].iov_base = &pad; | |||||
iov[2].iov_len = pad_pktlen - hvs_pktlen; | |||||
error = vmbus_chan_iov_send(chan, iov, 3, | |||||
hvsock_br_callback, &cbarg); | |||||
} else { | |||||
if (to_write == 0) { | |||||
iov[0].iov_base = &hvs_pkt; | |||||
iov[0].iov_len = hvs_pkthlen; | |||||
iov[1].iov_base = &pad; | |||||
iov[1].iov_len = pad_pktlen - hvs_pktlen; | |||||
error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL); | |||||
} | |||||
} | |||||
if (error) { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: error = %d\n", __func__, error); | |||||
} | |||||
return (error); | |||||
} | |||||
/* | |||||
* Check if we have data on current ring buffer to read | |||||
* or not. If not, advance the ring buffer read index to | |||||
* next packet. Update the recev_data_len and recev_data_off | |||||
* to new value. | |||||
* Return the number of bytes can read. | |||||
*/ | |||||
static uint32_t | |||||
hvsock_canread_check(struct hvs_pcb *pcb) | |||||
{ | |||||
uint32_t advance; | |||||
uint32_t tlen, hlen, dlen; | |||||
uint32_t bytes_canread = 0; | |||||
int error; | |||||
if (pcb == NULL || pcb->chan == NULL) { | |||||
pcb->so->so_error = EIO; | |||||
return (0); | |||||
} | |||||
/* Still have data not read yet on current packet */ | |||||
if (pcb->recv_data_len > 0) | |||||
return (pcb->recv_data_len); | |||||
if (pcb->rb_init) | |||||
advance = | |||||
VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen); | |||||
else | |||||
advance = 0; | |||||
bytes_canread = vmbus_chan_read_available(pcb->chan); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: bytes_canread on br = %u, advance = %u\n", | |||||
__func__, bytes_canread, advance); | |||||
if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) { | |||||
/* | |||||
* Nothing to read. Need to advance the rindex before | |||||
* calling sbwait, so host knows to wake us up when data | |||||
* is available to read on rb. | |||||
*/ | |||||
error = vmbus_chan_recv_idxadv(pcb->chan, advance); | |||||
if (error) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: after calling vmbus_chan_recv_idxadv, " | |||||
"got error = %d\n", __func__, error); | |||||
return (0); | |||||
} else { | |||||
pcb->rb_init = false; | |||||
pcb->recv_data_len = 0; | |||||
pcb->recv_data_off = 0; | |||||
bytes_canread = vmbus_chan_read_available(pcb->chan); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: advanced %u bytes, " | |||||
" bytes_canread on br now = %u\n", | |||||
__func__, advance, bytes_canread); | |||||
if (bytes_canread == 0) | |||||
return (0); | |||||
else | |||||
advance = 0; | |||||
} | |||||
} | |||||
if (bytes_canread < | |||||
advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t))) | |||||
return (0); | |||||
error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt, | |||||
sizeof(struct hvs_pkt_header), advance); | |||||
/* Don't have anything to read */ | |||||
if (error) { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: after calling vmbus_chan_recv_peek, got error = %d\n", | |||||
__func__, error); | |||||
return (0); | |||||
} | |||||
/* | |||||
* We just read in a new packet header. Do some sanity checks. | |||||
*/ | |||||
tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen); | |||||
hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen); | |||||
dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size; | |||||
if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) || | |||||
__predict_false(hlen > tlen) || | |||||
__predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"invalid tlen(%u), hlen(%u) or dlen(%u)\n", | |||||
tlen, hlen, dlen); | |||||
pcb->so->so_error = EIO; | |||||
return (0); | |||||
} | |||||
if (pcb->rb_init == false) | |||||
pcb->rb_init = true; | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n", | |||||
tlen, hlen, dlen); | |||||
/* The other side has sent a close FIN */ | |||||
if (dlen == 0) { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: Received FIN from other side\n", __func__); | |||||
/* inform the caller by seting so_error to ESHUTDOWN */ | |||||
pcb->so->so_error = ESHUTDOWN; | |||||
} | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: canread on receive ring is %u \n", __func__, dlen); | |||||
pcb->recv_data_len = dlen; | |||||
pcb->recv_data_off = 0; | |||||
return (pcb->recv_data_len); | |||||
} | |||||
static uint32_t | |||||
hvsock_canwrite_check(struct hvs_pcb *pcb) | |||||
{ | |||||
uint32_t writeable; | |||||
uint32_t ret; | |||||
if (pcb == NULL || pcb->chan == NULL) | |||||
return (0); | |||||
writeable = vmbus_chan_write_available(pcb->chan); | |||||
/* | |||||
* We must always reserve a 0-length-payload packet for the FIN. | |||||
*/ | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: writeable is %u, should be greater than %lu\n", | |||||
__func__, writeable, HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)); | |||||
if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) { | |||||
/* | |||||
* The Tx ring seems full. | |||||
*/ | |||||
return (0); | |||||
} | |||||
ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: available size is %u\n", __func__, rounddown2(ret, 8)); | |||||
return (rounddown2(ret, 8)); | |||||
} | |||||
static void | |||||
hvsock_set_chan_pending_send_size(struct vmbus_channel *chan) | |||||
{ | |||||
vmbus_chan_set_pending_send_size(chan, | |||||
HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ)); | |||||
} | |||||
static int | |||||
hvsock_open_channel(struct vmbus_channel *chan, struct socket *so) | |||||
{ | |||||
unsigned int rcvbuf, sndbuf; | |||||
struct hvs_pcb *pcb = so2hvspcb(so); | |||||
int ret; | |||||
if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) { | |||||
sndbuf = HVS_RINGBUF_SND_SIZE; | |||||
rcvbuf = HVS_RINGBUF_RCV_SIZE; | |||||
} else { | |||||
sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE); | |||||
sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE); | |||||
sndbuf = rounddown2(sndbuf, PAGE_SIZE); | |||||
rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE); | |||||
rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE); | |||||
rcvbuf = rounddown2(rcvbuf, PAGE_SIZE); | |||||
} | |||||
/* | |||||
* Can only read whatever user provided size of data | |||||
* from ring buffer. Turn off batched reading. | |||||
*/ | |||||
vmbus_chan_set_readbatch(chan, false); | |||||
ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0, | |||||
hvsock_chan_cb, pcb); | |||||
if (ret != 0) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: failed to open hvsock channel, sndbuf = %u, " | |||||
"rcvbuf = %u\n", __func__, sndbuf, rcvbuf); | |||||
} else { | |||||
HVSOCK_DBG(HVSOCK_DBG_INFO, | |||||
"%s: hvsock channel opened, sndbuf = %u, i" | |||||
"rcvbuf = %u\n", __func__, sndbuf, rcvbuf); | |||||
/* | |||||
* Se the pending send size so to receive wakeup | |||||
* signals from host when there is enough space on | |||||
* rx buffer ring to write. | |||||
*/ | |||||
hvsock_set_chan_pending_send_size(chan); | |||||
} | |||||
return ret; | |||||
} | |||||
/* | |||||
* Guest is listening passively on the socket. Open channel and | |||||
* create a new socket for the conneciton. | |||||
*/ | |||||
static void | |||||
hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so, | |||||
struct hvsock_sc *sc) | |||||
{ | |||||
struct socket *new_so; | |||||
struct hvs_pcb *new_pcb, *pcb; | |||||
int error; | |||||
/* Do nothing if socket is not listening */ | |||||
if ((so->so_options & SO_ACCEPTCONN) == 0) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: socket is not a listening one\n", __func__); | |||||
return; | |||||
} | |||||
/* | |||||
* Create a new socket. This will call pru_attach to complete | |||||
* the socket initialization and put the new socket onto | |||||
* listening socket's sol_incomp list, waiting to be promoted | |||||
* to sol_comp list. | |||||
* The new socket created has ref count 0. There is no other | |||||
* thread that changes the state of this new one at the | |||||
* moment, so we don't need to hold its lock while opening | |||||
* channel and filling out its pcb information. | |||||
*/ | |||||
new_so = sonewconn(so, 0); | |||||
if (!new_so) | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: creating new socket failed\n", __func__); | |||||
/* | |||||
* Now open the vmbus channel. If it fails, the socket will be | |||||
* on the listening socket's sol_incomp queue until it is | |||||
* replaced and aborted. | |||||
*/ | |||||
error = hvsock_open_channel(chan, new_so); | |||||
if (error) { | |||||
new_so->so_error = error; | |||||
return; | |||||
} | |||||
pcb = so->so_pcb; | |||||
new_pcb = new_so->so_pcb; | |||||
hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port); | |||||
/* Remote port is unknown to guest in this type of conneciton */ | |||||
hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN); | |||||
new_pcb->chan = chan; | |||||
new_pcb->recv_data_len = 0; | |||||
new_pcb->recv_data_off = 0; | |||||
new_pcb->rb_init = false; | |||||
new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan); | |||||
new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan); | |||||
hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED); | |||||
sc->pcb = new_pcb; | |||||
/* | |||||
* Change the socket state to SS_ISCONNECTED. This will promote | |||||
* the socket to sol_comp queue and wake up the thread which | |||||
* is accepting connection. | |||||
*/ | |||||
soisconnected(new_so); | |||||
} | |||||
/* | |||||
* Guest is actively connecting to host. | |||||
*/ | |||||
static void | |||||
hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so) | |||||
{ | |||||
struct hvs_pcb *pcb; | |||||
int error; | |||||
error = hvsock_open_channel(chan, so); | |||||
if (error) { | |||||
so->so_error = error; | |||||
return; | |||||
} | |||||
pcb = so->so_pcb; | |||||
pcb->chan = chan; | |||||
pcb->recv_data_len = 0; | |||||
pcb->recv_data_off = 0; | |||||
pcb->rb_init = false; | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
__hvs_remove_socket_from_list(so, HVS_LIST_BOUND); | |||||
__hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED); | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
/* | |||||
* Change the socket state to SS_ISCONNECTED. This will wake up | |||||
* the thread sleeping in connect call. | |||||
*/ | |||||
soisconnected(so); | |||||
} | |||||
static void | |||||
hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc) | |||||
{ | |||||
struct hyperv_guid *inst_guid, *type_guid; | |||||
bool conn_from_host; | |||||
struct sockaddr_hvs addr; | |||||
struct socket *so; | |||||
struct hvs_pcb *pcb; | |||||
type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan); | |||||
inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan); | |||||
conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan); | |||||
HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is "); | |||||
hvsock_print_guid(type_guid); | |||||
HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is "); | |||||
hvsock_print_guid(inst_guid); | |||||
HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n", | |||||
(conn_from_host == true ) ? "from" : "to"); | |||||
/* | |||||
* The listening port should be in [0, MAX_LISTEN_PORT] | |||||
*/ | |||||
if (!is_valid_srv_id(type_guid)) | |||||
return; | |||||
/* | |||||
* There should be a bound socket already created no matter | |||||
* it is a passive or active connection. | |||||
* For host initiated connection (passive on guest side), | |||||
* the type_guid contains the port which guest is bound and | |||||
* listening. | |||||
* For the guest initiated connection (active on guest side), | |||||
* the inst_guid contains the port that guest has auto bound | |||||
* to. | |||||
*/ | |||||
hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid); | |||||
so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND); | |||||
if (!so) { | |||||
HVSOCK_DBG(HVSOCK_DBG_ERR, | |||||
"%s: no bound socket found for port %u\n", | |||||
__func__, addr.hvs_port); | |||||
return; | |||||
} | |||||
if (conn_from_host) { | |||||
hvsock_open_conn_passive(chan, so, sc); | |||||
} else { | |||||
(void) hvs_trans_lock(); | |||||
pcb = so->so_pcb; | |||||
if (pcb && pcb->so) { | |||||
sc->pcb = so2hvspcb(so); | |||||
hvsock_open_conn_active(chan, so); | |||||
} else { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"%s: channel detached before open\n", __func__); | |||||
} | |||||
hvs_trans_unlock(); | |||||
} | |||||
} | |||||
static int | |||||
hvsock_probe(device_t dev) | |||||
{ | |||||
struct vmbus_channel *channel = vmbus_get_channel(dev); | |||||
if (!channel || !vmbus_chan_is_hvs(channel)) { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"hvsock_probe called but not a hvsock channel id %u\n", | |||||
vmbus_chan_id(channel)); | |||||
return ENXIO; | |||||
} else { | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"hvsock_probe got a hvsock channel id %u\n", | |||||
vmbus_chan_id(channel)); | |||||
return BUS_PROBE_DEFAULT; | |||||
} | |||||
} | |||||
static int | |||||
hvsock_attach(device_t dev) | |||||
{ | |||||
struct vmbus_channel *channel = vmbus_get_channel(dev); | |||||
struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n"); | |||||
hvsock_open_connection(channel, sc); | |||||
/* | |||||
* Always return success. On error the host will rescind the device | |||||
* in 30 seconds and we can do cleanup at that time in | |||||
* vmbus_chan_msgproc_chrescind(). | |||||
*/ | |||||
return (0); | |||||
} | |||||
static int | |||||
hvsock_detach(device_t dev) | |||||
{ | |||||
struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev); | |||||
struct socket *so; | |||||
int error, retry; | |||||
if (bootverbose) | |||||
device_printf(dev, "hvsock_detach called.\n"); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n"); | |||||
if (sc->pcb != NULL) { | |||||
(void) hvs_trans_lock(); | |||||
so = hsvpcb2so(sc->pcb); | |||||
if (so) { | |||||
/* Close the connection */ | |||||
if (so->so_state & | |||||
(SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) | |||||
soisdisconnected(so); | |||||
} | |||||
mtx_lock(&hvs_trans_socks_mtx); | |||||
__hvs_remove_pcb_from_list(sc->pcb, | |||||
HVS_LIST_BOUND | HVS_LIST_CONNECTED); | |||||
mtx_unlock(&hvs_trans_socks_mtx); | |||||
/* | |||||
* Close channel while no reader and sender are working | |||||
* on the buffer rings. | |||||
*/ | |||||
if (so) { | |||||
retry = 0; | |||||
while ((error = sblock(&so->so_rcv, 0)) == | |||||
EWOULDBLOCK) { | |||||
/* | |||||
* Someone is reading, rx br is busy | |||||
*/ | |||||
soisdisconnected(so); | |||||
DELAY(500); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"waiting for rx reader to exit, " | |||||
"retry = %d\n", retry++); | |||||
} | |||||
retry = 0; | |||||
while ((error = sblock(&so->so_snd, 0)) == | |||||
EWOULDBLOCK) { | |||||
/* | |||||
* Someone is sending, tx br is busy | |||||
*/ | |||||
soisdisconnected(so); | |||||
DELAY(500); | |||||
HVSOCK_DBG(HVSOCK_DBG_VERBOSE, | |||||
"waiting for tx sender to exit, " | |||||
"retry = %d\n", retry++); | |||||
} | |||||
} | |||||
bzero(sc->pcb, sizeof(struct hvs_pcb)); | |||||
free(sc->pcb, M_HVSOCK); | |||||
sc->pcb = NULL; | |||||
if (so) { | |||||
sbunlock(&so->so_rcv); | |||||
sbunlock(&so->so_snd); | |||||
so->so_pcb = NULL; | |||||
} | |||||
hvs_trans_unlock(); | |||||
} | |||||
vmbus_chan_close(vmbus_get_channel(dev)); | |||||
return (0); | |||||
} | |||||
static device_method_t hvsock_methods[] = { | |||||
/* Device interface */ | |||||
DEVMETHOD(device_probe, hvsock_probe), | |||||
DEVMETHOD(device_attach, hvsock_attach), | |||||
DEVMETHOD(device_detach, hvsock_detach), | |||||
DEVMETHOD_END | |||||
}; | |||||
static driver_t hvsock_driver = { | |||||
"hv_sock", | |||||
hvsock_methods, | |||||
sizeof(struct hvsock_sc) | |||||
}; | |||||
static devclass_t hvsock_devclass; | |||||
DRIVER_MODULE(hvsock, vmbus, hvsock_driver, hvsock_devclass, NULL, NULL); | |||||
MODULE_VERSION(hvsock, 1); | |||||
MODULE_DEPEND(hvsock, vmbus, 1, 1, 1); |