diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index bf4450f12eaf..76b45b046d7e 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -1,85 +1,91 @@ # # $FreeBSD$ # .include <src.opts.mk> +CFLAGS+=-I${SRCTOP}/sys +.PATH: ${SRCTOP}/sys/cam/ctl PROG= bhyve PACKAGE= bhyve DEBUG_FLAGS= -g -O0 MAN= bhyve.8 BHYVE_SYSDIR?=${SRCTOP} SRCS= \ atkbdc.c \ acpi.c \ bhyvegc.c \ bhyverun.c \ block_if.c \ bootrom.c \ console.c \ consport.c \ + ctl_util.c \ + ctl_scsi_all.c \ dbgport.c \ fwctl.c \ gdb.c \ inout.c \ ioapic.c \ mem.c \ mevent.c \ mptbl.c \ pci_ahci.c \ pci_e82545.c \ pci_emul.c \ pci_fbuf.c \ pci_hostbridge.c \ pci_irq.c \ pci_lpc.c \ pci_passthru.c \ pci_virtio_block.c \ pci_virtio_console.c \ pci_virtio_net.c \ pci_virtio_rnd.c \ + pci_virtio_scsi.c \ pci_uart.c \ pci_xhci.c \ pm.c \ post.c \ ps2kbd.c \ ps2mouse.c \ rfb.c \ rtc.c \ smbiostbl.c \ sockstream.c \ task_switch.c \ uart_emul.c \ usb_emul.c \ usb_mouse.c \ virtio.c \ vga.c \ xmsr.c \ - spinup_ap.c + spinup_ap.c \ + iov.c .PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm SRCS+= vmm_instruction_emul.c -LIBADD= vmmapi md pthread z +LIBADD= vmmapi md pthread z util sbuf cam .if ${MK_OPENSSL} == "no" CFLAGS+=-DNO_OPENSSL .else LIBADD+= crypto .endif CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000 CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller .ifdef GDB_LOG CFLAGS+=-DGDB_LOG .endif WARNS?= 2 .include <bsd.prog.mk> diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index 9b36576114f7..e0063059bbf3 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -1,566 +1,573 @@ .\" Copyright (c) 2013 Peter Grehan .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd May 1, 2018 +.Dd Jun 11, 2018 .Dt BHYVE 8 .Os .Sh NAME .Nm bhyve .Nd "run a guest operating system inside a virtual machine" .Sh SYNOPSIS .Nm .Op Fl abehuwxACHPSWY .Oo .Fl c\~ Ns .Oo .Op Ar cpus= Ns .Ar numcpus Ns .Oc Ns .Op Ar ,sockets=n Ns .Op Ar ,cores=n Ns .Op Ar ,threads=n .Oc .Op Fl g Ar gdbport .Op Fl l Ar lpcdev Ns Op , Ns Ar conf .Op Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t .Op Fl p Ar vcpu:hostcpu .Op Fl s Ar slot,emulation Ns Op , Ns Ar conf .Op Fl G Ar port .Op Fl U Ar uuid .Ar vmname .Sh DESCRIPTION .Nm is a hypervisor that runs guest operating systems inside a virtual machine. .Pp Parameters such as the number of virtual CPUs, amount of guest memory, and I/O connectivity can be specified with command-line parameters. .Pp If not using a boot ROM, the guest operating system must be loaded with .Xr bhyveload 8 or a similar boot loader before running .Nm , otherwise, it is enough to run .Nm with a boot ROM of choice. .Pp .Nm runs until the guest operating system reboots or an unhandled hypervisor exit is detected. .Sh OPTIONS .Bl -tag -width 10n .It Fl a The guest's local APIC is configured in xAPIC mode. The xAPIC mode is the default setting so this option is redundant. It will be deprecated in a future version. .It Fl A Generate ACPI tables. Required for .Fx Ns /amd64 guests. .It Fl b Enable a low-level console device supported by .Fx kernels compiled with .Cd "device bvmconsole" . This option will be deprecated in a future version. .It Fl c Op Ar setting ... Number of guest virtual CPUs and/or the CPU topology. The default value for each of .Ar numcpus , .Ar sockets , .Ar cores , and .Ar threads is 1. The current maximum number of guest virtual CPUs is 16. If .Ar numcpus is not specified then it will be calculated from the other arguments. The topology must be consistent in that the .Ar numcpus must equal the product of .Ar sockets , .Ar cores , and .Ar threads . If a .Ar setting is specified more than once the last one has precedence. .It Fl C Include guest memory in core file. .It Fl e Force .Nm to exit when a guest issues an access to an I/O port that is not emulated. This is intended for debug purposes. .It Fl g Ar gdbport For .Fx kernels compiled with .Cd "device bvmdebug" , allow a remote kernel kgdb to be relayed to the guest kernel gdb stub via a local IPv4 address and this port. This option will be deprecated in a future version. .It Fl G Ar port Start a debug server that uses the GDB protocol to export guest state to a debugger. An IPv4 TCP socket will be bound to the supplied .Ar port to listen for debugger connections. Only a single debugger may be attached to the debug server at a time. If .Ar port begins with .Sq w , .Nm will pause execution at the first instruction waiting for a debugger to attach. .It Fl h Print help message and exit. .It Fl H Yield the virtual CPU thread when a HLT instruction is detected. If this option is not specified, virtual CPUs will use 100% of a host CPU. .It Fl l Ar lpcdev Ns Op , Ns Ar conf Allow devices behind the LPC PCI-ISA bridge to be configured. The only supported devices are the TTY-class devices .Ar com1 and .Ar com2 and the boot ROM device .Ar bootrom . .It Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t Guest physical memory size in bytes. This must be the same size that was given to .Xr bhyveload 8 . .Pp The size argument may be suffixed with one of K, M, G or T (either upper or lower case) to indicate a multiple of kilobytes, megabytes, gigabytes, or terabytes. If no suffix is given, the value is assumed to be in megabytes. .Pp .Ar memsize defaults to 256M. .It Fl p Ar vcpu:hostcpu Pin guest's virtual CPU .Em vcpu to .Em hostcpu . .It Fl P Force the guest virtual CPU to exit when a PAUSE instruction is detected. .It Fl s Ar slot,emulation Ns Op , Ns Ar conf Configure a virtual PCI slot and function. .Pp .Nm provides PCI bus emulation and virtual devices that can be attached to slots on the bus. There are 32 available slots, with the option of providing up to 8 functions per slot. .Bl -tag -width 10n .It Ar slot .Ar pcislot[:function] .Ar bus:pcislot:function .Pp The .Ar pcislot value is 0 to 31. The optional .Ar function value is 0 to 7. The optional .Ar bus value is 0 to 255. If not specified, the .Ar function value defaults to 0. If not specified, the .Ar bus value defaults to 0. .It Ar emulation .Bl -tag -width 10n .It Li hostbridge | Li amd_hostbridge .Pp Provide a simple host bridge. This is usually configured at slot 0, and is required by most guest operating systems. The .Li amd_hostbridge emulation is identical but uses a PCI vendor ID of .Li AMD . .It Li passthru PCI pass-through device. .It Li virtio-net Virtio network interface. .It Li virtio-blk Virtio block storage interface. +.It Li virtio-scsi +Virtio SCSI interface. .It Li virtio-rnd Virtio RNG interface. .It Li virtio-console Virtio console interface, which exposes multiple ports to the guest in the form of simple char devices for simple IO between the guest and host userspaces. .It Li ahci AHCI controller attached to arbitrary devices. .It Li ahci-cd AHCI controller attached to an ATAPI CD/DVD. .It Li ahci-hd AHCI controller attached to a SATA hard-drive. .It Li e1000 Intel e82545 network interface. .It Li uart PCI 16550 serial device. .It Li lpc LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM. The LPC bridge emulation can only be configured on bus 0. .It Li fbuf Raw framebuffer device attached to VNC server. .It Li xhci eXtensible Host Controller Interface (xHCI) USB controller. .El .It Op Ar conf This optional parameter describes the backend for device emulations. If .Ar conf is not specified, the device emulation has no backend and can be considered unconnected. .Pp Network devices: .Bl -tag -width 10n .It Ar tapN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx .It Ar vmnetN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx .Pp If .Ar mac is not specified, the MAC address is derived from a fixed OUI and the remaining bytes from an MD5 hash of the slot and function numbers and the device name. .Pp The MAC address is an ASCII string in .Xr ethers 5 format. .El .Pp Block storage devices: .Bl -tag -width 10n .It Pa /filename Ns Oo , Ns Ar block-device-options Oc .It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc .El .Pp The .Ar block-device-options are: .Bl -tag -width 8n .It Li nocache Open the file with .Dv O_DIRECT . .It Li direct Open the file using .Dv O_SYNC . .It Li ro Force the file to be opened read-only. .It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc Specify the logical and physical sector sizes of the emulated disk. The physical sector size is optional and is equal to the logical sector size if not explicitly specified. .El .Pp +SCSI devices: +.Bl -tag -width 10n +.It Pa /dev/cam/ Ns Oo , Ns Ar port and initiator_id Oc +.El +.Pp TTY devices: .Bl -tag -width 10n .It Li stdio Connect the serial port to the standard input and output of the .Nm process. .It Pa /dev/xxx Use the host TTY device for serial port I/O. .El .Pp Boot ROM device: .Bl -tag -width 10n .It Pa romfile Map .Ar romfile in the guest address space reserved for boot firmware. .El .Pp Pass-through devices: .Bl -tag -width 10n .It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function Connect to a PCI device on the host at the selector described by .Ar slot , .Ar bus , and .Ar function numbers. .El .Pp Guest memory must be wired using the .Fl S option when a pass-through device is configured. .Pp The host device must have been reserved at boot-time using the .Va pptdev loader variable as described in .Xr vmm 4 . .Pp Virtio console devices: .Bl -tag -width 10n .It Li port1= Ns Pa /path/to/port1.sock Ns ,anotherport= Ns Pa ... A maximum of 16 ports per device can be created. Every port is named and corresponds to a Unix domain socket created by .Nm . .Nm accepts at most one connection per port at a time. .Pp Limitations: .Bl -bullet -offset 2n .It Due to lack of destructors in .Nm , sockets on the filesystem must be cleaned up manually after .Nm exits. .It There is no way to use the "console port" feature, nor the console port resize at present. .It Emergency write is advertised, but no-op at present. .El .El .Pp Framebuffer devices: .Bl -tag -width 10n .It Oo rfb= Ns Oo Ar IP: Oc Ns Ar port Oc Ns Oo ,w= Ns Ar width Oc Ns Oo ,h= Ns Ar height Oc Ns Oo ,vga= Ns Ar vgaconf Oc Ns Oo Ns ,wait Oc Ns Oo ,password= Ns Ar password Oc .Bl -tag -width 8n .It Ar IP:port An .Ar IP address and a .Ar port VNC should listen on. The default is to listen on localhost IPv4 address and default VNC port 5900. Listening on an IPv6 address is not supported. .It Ar width No and Ar height A display resolution, width and height, respectively. If not specified, a default resolution of 1024x768 pixels will be used. Minimal supported resolution is 640x480 pixels, and maximum is 1920x1200 pixels. .It Ar vgaconf Possible values for this option are .Dq io (default), .Dq on , and .Dq off . PCI graphics cards have a dual personality in that they are standard PCI devices with BAR addressing, but may also implicitly decode legacy VGA I/O space .Pq Ad 0x3c0-3df and memory space .Pq 64KB at Ad 0xA0000 . The default .Dq io option should be used for guests that attempt to issue BIOS calls which result in I/O port queries, and fail to boot if I/O decode is disabled. .Pp The .Dq on option should be used along with the CSM BIOS capability in UEFI to boot traditional BIOS guests that require the legacy VGA I/O and memory regions to be available. .Pp The .Dq off option should be used for the UEFI guests that assume that VGA adapter is present if they detect the I/O ports. An example of such a guest is .Ox in UEFI mode. .Pp Please refer to the .Nm .Fx wiki page .Pq Lk https://wiki.freebsd.org/bhyve for configuration notes of particular guests. .It wait Instruct .Nm to only boot upon the initiation of a VNC connection, simplifying the installation of operating systems that require immediate keyboard input. This can be removed for post-installation use. .It password This type of authentication is known to be cryptographically weak and is not intended for use on untrusted networks. Many implementations will want to use stronger security, such as running the session over an encrypted channel provided by IPsec or SSH. .El .El .Pp xHCI USB devices: .Bl -tag -width 10n .It Li tablet A USB tablet device which provides precise cursor synchronization when using VNC. .El .El .It Fl S Wire guest memory. .It Fl u RTC keeps UTC time. .It Fl U Ar uuid Set the universally unique identifier .Pq UUID in the guest's System Management BIOS System Information structure. By default a UUID is generated from the host's hostname and .Ar vmname . .It Fl w Ignore accesses to unimplemented Model Specific Registers (MSRs). This is intended for debug purposes. .It Fl W Force virtio PCI device emulations to use MSI interrupts instead of MSI-X interrupts. .It Fl x The guest's local APIC is configured in x2APIC mode. .It Fl Y Disable MPtable generation. .It Ar vmname Alphanumeric name of the guest. This should be the same as that created by .Xr bhyveload 8 . .El .Sh DEBUG SERVER The current debug server provides limited support for debuggers. .Ss Registers Each virtual CPU is exposed to the debugger as a thread. .Pp General purpose registers can be queried for each virtual CPU, but other registers such as floating-point and system registers cannot be queried. .Ss Memory Memory (including memory mapped I/O regions) can be read by the debugger, but not written. Memory operations use virtual addresses that are resolved to physical addresses via the current virtual CPU's active address translation. .Ss Control The running guest can be interrupted by the debugger at any time .Pq for example, by pressing Ctrl-C in the debugger . .Pp Single stepping is only supported on Intel CPUs supporting the MTRAP VM exit. .Pp Breakpoints are not supported. .Sh SIGNAL HANDLING .Nm deals with the following signals: .Pp .Bl -tag -width indent -compact .It SIGTERM Trigger ACPI poweroff for a VM .El .Sh EXIT STATUS Exit status indicates how the VM was terminated: .Pp .Bl -tag -width indent -compact .It 0 rebooted .It 1 powered off .It 2 halted .It 3 triple fault .El .Sh EXAMPLES If not using a boot ROM, the guest operating system must have been loaded with .Xr bhyveload 8 or a similar boot loader before .Xr bhyve 4 can be run. Otherwise, the boot loader is not needed. .Pp To run a virtual machine with 1GB of memory, two virtual CPUs, a virtio block device backed by the .Pa /my/image filesystem image, and a serial port for the console: .Bd -literal -offset indent bhyve -c 2 -s 0,hostbridge -s 1,lpc -s 2,virtio-blk,/my/image \\ -l com1,stdio -A -H -P -m 1G vm1 .Ed .Pp Run a 24GB single-CPU virtual machine with three network ports, one of which has a MAC address specified: .Bd -literal -offset indent bhyve -s 0,hostbridge -s 1,lpc -s 2:0,virtio-net,tap0 \\ -s 2:1,virtio-net,tap1 \\ -s 2:2,virtio-net,tap2,mac=00:be:fa:76:45:00 \\ -s 3,virtio-blk,/my/image -l com1,stdio \\ -A -H -P -m 24G bigvm .Ed .Pp Run an 8GB quad-CPU virtual machine with 8 AHCI SATA disks, an AHCI ATAPI CD-ROM, a single virtio network port, an AMD hostbridge, and the console port connected to an .Xr nmdm 4 null-modem device. .Bd -literal -offset indent bhyve -c 4 \\ -s 0,amd_hostbridge -s 1,lpc \\ -s 1:0,ahci,hd:/images/disk.1,hd:/images/disk.2,\\ hd:/images/disk.3,hd:/images/disk.4,\\ hd:/images/disk.5,hd:/images/disk.6,\\ hd:/images/disk.7,hd:/images/disk.8,\\ cd:/images/install.iso \\ -s 3,virtio-net,tap0 \\ -l com1,/dev/nmdm0A \\ -A -H -P -m 8G .Ed .Pp Run a UEFI virtual machine with a display resolution of 800 by 600 pixels that can be accessed via VNC at: 0.0.0.0:5900. .Bd -literal -offset indent bhyve -c 2 -m 4G -w -H \\ -s 0,hostbridge \\ -s 3,ahci-cd,/path/to/uefi-OS-install.iso \\ -s 4,ahci-hd,disk.img \\ -s 5,virtio-net,tap0 \\ -s 29,fbuf,tcp=0.0.0.0:5900,w=800,h=600,wait \\ -s 30,xhci,tablet \\ -s 31,lpc -l com1,stdio \\ -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \\ uefivm .Ed .Sh SEE ALSO .Xr bhyve 4 , .Xr nmdm 4 , .Xr vmm 4 , .Xr ethers 5 , .Xr bhyvectl 8 , .Xr bhyveload 8 .Sh HISTORY .Nm first appeared in .Fx 10.0 . .Sh AUTHORS .An Neel Natu Aq Mt neel@freebsd.org .An Peter Grehan Aq Mt grehan@freebsd.org diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c new file mode 100644 index 000000000000..c564bd8ae50f --- /dev/null +++ b/usr.sbin/bhyve/iov.c @@ -0,0 +1,141 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/uio.h> + +#include <stdlib.h> +#include <string.h> +#include "iov.h" + +void +seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, size_t *niov2, + size_t seek) +{ + size_t remainder = 0; + size_t left = seek; + size_t i, j; + + for (i = 0; i < niov1; i++) { + size_t toseek = MIN(left, iov1[i].iov_len); + left -= toseek; + + if (toseek == iov1[i].iov_len) + continue; + + if (left == 0) { + remainder = toseek; + break; + } + } + + for (j = i; j < niov1; j++) { + iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; + iov2[j - i].iov_len = iov1[j].iov_len - remainder; + remainder = 0; + } + + *niov2 = j - i; +} + +size_t +count_iov(struct iovec *iov, size_t niov) +{ + size_t i, total = 0; + + for (i = 0; i < niov; i++) + total += iov[i].iov_len; + + return (total); +} + +size_t +truncate_iov(struct iovec *iov, size_t niov, size_t length) +{ + size_t i, done = 0; + + for (i = 0; i < niov; i++) { + size_t toseek = MIN(length - done, iov[i].iov_len); + done += toseek; + + if (toseek < iov[i].iov_len) { + iov[i].iov_len = toseek; + return (i + 1); + } + } + + return (niov); +} + +ssize_t +iov_to_buf(struct iovec *iov, size_t niov, void **buf) +{ + size_t i, ptr = 0, total = 0; + + for (i = 0; i < niov; i++) { + total += iov[i].iov_len; + *buf = realloc(*buf, total); + if (*buf == NULL) + return (-1); + + memcpy(*buf + ptr, iov[i].iov_base, iov[i].iov_len); + ptr += iov[i].iov_len; + } + + return (total); +} + +ssize_t +buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, + size_t seek) +{ + struct iovec *diov; + size_t ndiov, i; + uintptr_t off = 0; + + if (seek > 0) { + diov = malloc(sizeof(struct iovec) * niov); + seek_iov(iov, niov, diov, &ndiov, seek); + } else { + diov = iov; + ndiov = niov; + } + + for (i = 0; i < ndiov; i++) { + memcpy(diov[i].iov_base, buf + off, diov[i].iov_len); + off += diov[i].iov_len; + } + + return ((ssize_t)off); +} + diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h new file mode 100644 index 000000000000..87fa4c1dcfe6 --- /dev/null +++ b/usr.sbin/bhyve/iov.h @@ -0,0 +1,43 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IOV_H_ +#define _IOV_H_ + +void seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, + size_t *niov2, size_t seek); +size_t truncate_iov(struct iovec *iov, size_t niov, size_t length); +size_t count_iov(struct iovec *iov, size_t niov); +ssize_t iov_to_buf(struct iovec *iov, size_t niov, void **buf); +ssize_t buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, + size_t seek); + +#endif /* _IOV_H_ */ diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c new file mode 100644 index 000000000000..c92699300754 --- /dev/null +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -0,0 +1,718 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>. + * Copyright (c) 2018 Marcelo Araujo <araujo@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/linker_set.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/time.h> +#include <sys/queue.h> +#include <sys/sbuf.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <pthread.h> +#include <pthread_np.h> + +#include <cam/scsi/scsi_all.h> +#include <cam/scsi/scsi_message.h> +#include <cam/ctl/ctl.h> +#include <cam/ctl/ctl_io.h> +#include <cam/ctl/ctl_backend.h> +#include <cam/ctl/ctl_ioctl.h> +#include <cam/ctl/ctl_util.h> +#include <cam/ctl/ctl_scsi_all.h> +#include <camlib.h> + +#include "bhyverun.h" +#include "pci_emul.h" +#include "virtio.h" +#include "iov.h" + +#define VTSCSI_RINGSZ 64 +#define VTSCSI_REQUESTQ 1 +#define VTSCSI_THR_PER_Q 16 +#define VTSCSI_MAXQ (VTSCSI_REQUESTQ + 2) +#define VTSCSI_MAXSEG 64 + +#define VTSCSI_IN_HEADER_LEN(_sc) \ + (sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size) + +#define VTSCSI_OUT_HEADER_LEN(_sc) \ + (sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size) + +#define VIRTIO_SCSI_MAX_CHANNEL 0 +#define VIRTIO_SCSI_MAX_TARGET 0 +#define VIRTIO_SCSI_MAX_LUN 16383 + +#define VIRTIO_SCSI_F_INOUT (1 << 0) +#define VIRTIO_SCSI_F_HOTPLUG (1 << 1) +#define VIRTIO_SCSI_F_CHANGE (1 << 2) + +static int pci_vtscsi_debug = 0; +#define DPRINTF(params) if (pci_vtscsi_debug) printf params +#define WPRINTF(params) printf params + +struct pci_vtscsi_config { + uint32_t num_queues; + uint32_t seg_max; + uint32_t max_sectors; + uint32_t cmd_per_lun; + uint32_t event_info_size; + uint32_t sense_size; + uint32_t cdb_size; + uint16_t max_channel; + uint16_t max_target; + uint32_t max_lun; +} __attribute__((packed)); + +struct pci_vtscsi_queue { + struct pci_vtscsi_softc * vsq_sc; + struct vqueue_info * vsq_vq; + int vsq_ctl_fd; + pthread_mutex_t vsq_mtx; + pthread_mutex_t vsq_qmtx; + pthread_cond_t vsq_cv; + STAILQ_HEAD(, pci_vtscsi_request) vsq_requests; + LIST_HEAD(, pci_vtscsi_worker) vsq_workers; +}; + +struct pci_vtscsi_worker { + struct pci_vtscsi_queue * vsw_queue; + pthread_t vsw_thread; + bool vsw_exiting; + LIST_ENTRY(pci_vtscsi_worker) vsw_link; +}; + +struct pci_vtscsi_request { + struct pci_vtscsi_queue * vsr_queue; + struct iovec vsr_iov_in[VTSCSI_MAXSEG]; + int vsr_niov_in; + struct iovec vsr_iov_out[VTSCSI_MAXSEG]; + int vsr_niov_out; + uint32_t vsr_idx; + STAILQ_ENTRY(pci_vtscsi_request) vsr_link; +}; + +/* + * Per-device softc + */ +struct pci_vtscsi_softc { + struct virtio_softc vss_vs; + struct vqueue_info vss_vq[VTSCSI_MAXQ]; + struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ]; + pthread_mutex_t vss_mtx; + int vss_iid; + int vss_ctl_fd; + uint32_t vss_features; + struct pci_vtscsi_config vss_config; +}; + +#define VIRTIO_SCSI_T_TMF 0 +#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 +#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 +#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 +#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 +#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 +#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 +#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 + +/* command-specific response values */ +#define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 +#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 +#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 + +struct pci_vtscsi_ctrl_tmf { + uint32_t type; + uint32_t subtype; + uint8_t lun[8]; + uint64_t id; + uint8_t response; +} __attribute__((packed)); + +#define VIRTIO_SCSI_T_AN_QUERY 1 +#define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2 +#define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4 +#define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8 +#define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16 +#define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32 +#define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64 + +struct pci_vtscsi_ctrl_an { + uint32_t type; + uint8_t lun[8]; + uint32_t event_requested; + uint32_t event_actual; + uint8_t response; +} __attribute__((packed)); + +/* command-specific response values */ +#define VIRTIO_SCSI_S_OK 0 +#define VIRTIO_SCSI_S_OVERRUN 1 +#define VIRTIO_SCSI_S_ABORTED 2 +#define VIRTIO_SCSI_S_BAD_TARGET 3 +#define VIRTIO_SCSI_S_RESET 4 +#define VIRTIO_SCSI_S_BUSY 5 +#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 +#define VIRTIO_SCSI_S_TARGET_FAILURE 7 +#define VIRTIO_SCSI_S_NEXUS_FAILURE 8 +#define VIRTIO_SCSI_S_FAILURE 9 +#define VIRTIO_SCSI_S_INCORRECT_LUN 12 + +/* task_attr */ +#define VIRTIO_SCSI_S_SIMPLE 0 +#define VIRTIO_SCSI_S_ORDERED 1 +#define VIRTIO_SCSI_S_HEAD 2 +#define VIRTIO_SCSI_S_ACA 3 + +struct pci_vtscsi_event { + uint32_t event; + uint8_t lun[8]; + uint32_t reason; +} __attribute__((packed)); + +struct pci_vtscsi_req_cmd_rd { + uint8_t lun[8]; + uint64_t id; + uint8_t task_attr; + uint8_t prio; + uint8_t crn; + uint8_t cdb[]; +} __attribute__((packed)); + +struct pci_vtscsi_req_cmd_wr { + uint32_t sense_len; + uint32_t residual; + uint16_t status_qualifier; + uint8_t status; + uint8_t response; + uint8_t sense[]; +} __attribute__((packed)); + +static void *pci_vtscsi_proc(void *); +static void pci_vtscsi_reset(void *); +static void pci_vtscsi_neg_features(void *, uint64_t); +static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); +static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); +static inline int pci_vtscsi_get_lun(uint8_t *); +static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); +static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, + struct pci_vtscsi_ctrl_tmf *); +static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, + struct pci_vtscsi_ctrl_an *); +static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, + int, struct iovec *, int); +static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *); +static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *); +static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *); +static int pci_vtscsi_init_queue(struct pci_vtscsi_softc *, + struct pci_vtscsi_queue *, int); +static int pci_vtscsi_init(struct vmctx *, struct pci_devinst *, char *); + +static struct virtio_consts vtscsi_vi_consts = { + "vtscsi", /* our name */ + VTSCSI_MAXQ, /* we support 2+n virtqueues */ + sizeof(struct pci_vtscsi_config), /* config reg size */ + pci_vtscsi_reset, /* reset */ + NULL, /* device-wide qnotify */ + pci_vtscsi_cfgread, /* read virtio config */ + pci_vtscsi_cfgwrite, /* write virtio config */ + pci_vtscsi_neg_features, /* apply negotiated features */ + 0, /* our capabilities */ +}; + +static void * +pci_vtscsi_proc(void *arg) +{ + struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg; + struct pci_vtscsi_queue *q = worker->vsw_queue; + struct pci_vtscsi_request *req; + int iolen; + + for (;;) { + pthread_mutex_lock(&q->vsq_mtx); + + while (STAILQ_EMPTY(&q->vsq_requests) + && !worker->vsw_exiting) + pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx); + + if (worker->vsw_exiting) + break; + + req = STAILQ_FIRST(&q->vsq_requests); + STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link); + + pthread_mutex_unlock(&q->vsq_mtx); + iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in, + req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out); + + pthread_mutex_lock(&q->vsq_qmtx); + vq_relchain(q->vsq_vq, req->vsr_idx, iolen); + vq_endchains(q->vsq_vq, 0); + pthread_mutex_unlock(&q->vsq_qmtx); + + DPRINTF(("virtio-scsi: request <idx=%d> completed\n", + req->vsr_idx)); + free(req); + } + + pthread_mutex_unlock(&q->vsq_mtx); + return (NULL); +} + +static void +pci_vtscsi_reset(void *vsc) +{ + struct pci_vtscsi_softc *sc; + + sc = vsc; + + DPRINTF(("vtscsi: device reset requested\n")); + vi_reset_dev(&sc->vss_vs); + + /* initialize config structure */ + sc->vss_config = (struct pci_vtscsi_config){ + .num_queues = VTSCSI_REQUESTQ, + .seg_max = VTSCSI_MAXSEG, + .max_sectors = 2, + .cmd_per_lun = 1, + .event_info_size = sizeof(struct pci_vtscsi_event), + .sense_size = 96, + .cdb_size = 32, + .max_channel = VIRTIO_SCSI_MAX_CHANNEL, + .max_target = VIRTIO_SCSI_MAX_TARGET, + .max_lun = VIRTIO_SCSI_MAX_LUN + }; +} + +static void +pci_vtscsi_neg_features(void *vsc, uint64_t negotiated_features) +{ + struct pci_vtscsi_softc *sc = vsc; + + sc->vss_features = negotiated_features; +} + +static int +pci_vtscsi_cfgread(void *vsc, int offset, int size, uint32_t *retval) +{ + struct pci_vtscsi_softc *sc = vsc; + void *ptr; + + ptr = (uint8_t *)&sc->vss_config + offset; + memcpy(retval, ptr, size); + return (0); +} + +static int +pci_vtscsi_cfgwrite(void *vsc, int offset, int size, uint32_t val) +{ + + return (0); +} + +static inline int +pci_vtscsi_get_lun(uint8_t *lun) +{ + + return (((lun[2] << 8) | lun[3]) & 0x3fff); +} + +static int +pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, + size_t bufsize) +{ + struct pci_vtscsi_ctrl_tmf *tmf; + struct pci_vtscsi_ctrl_an *an; + uint32_t type; + + type = *(uint32_t *)buf; + + if (type == VIRTIO_SCSI_T_TMF) { + tmf = (struct pci_vtscsi_ctrl_tmf *)buf; + return (pci_vtscsi_tmf_handle(sc, tmf)); + } + + if (type == VIRTIO_SCSI_T_AN_QUERY) { + an = (struct pci_vtscsi_ctrl_an *)buf; + return (pci_vtscsi_an_handle(sc, an)); + } + + return (0); +} + +static int +pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, + struct pci_vtscsi_ctrl_tmf *tmf) +{ + union ctl_io *io; + int err; + + io = ctl_scsi_alloc_io(sc->vss_iid); + ctl_scsi_zero_io(io); + + io->io_hdr.io_type = CTL_IO_TASK; + io->io_hdr.nexus.targ_port = tmf->lun[1]; + io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun); + io->taskio.tag_type = CTL_TAG_SIMPLE; + io->taskio.tag_num = (uint32_t)tmf->id; + + switch (tmf->subtype) { + case VIRTIO_SCSI_T_TMF_ABORT_TASK: + io->taskio.task_action = CTL_TASK_ABORT_TASK; + break; + + case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: + io->taskio.task_action = CTL_TASK_ABORT_TASK_SET; + break; + + case VIRTIO_SCSI_T_TMF_CLEAR_ACA: + io->taskio.task_action = CTL_TASK_CLEAR_ACA; + break; + + case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: + io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET; + break; + + case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: + io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; + break; + + case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: + io->taskio.task_action = CTL_TASK_LUN_RESET; + break; + + case VIRTIO_SCSI_T_TMF_QUERY_TASK: + io->taskio.task_action = CTL_TASK_QUERY_TASK; + break; + + case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET: + io->taskio.task_action = CTL_TASK_QUERY_TASK_SET; + break; + } + + if (pci_vtscsi_debug) { + struct sbuf *sb = sbuf_new_auto(); + ctl_io_sbuf(io, sb); + sbuf_finish(sb); + DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb))); + sbuf_delete(sb); + } + + err = ioctl(sc->vss_ctl_fd, CTL_IO, io); + if (err != 0) + WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); + + tmf->response = io->taskio.task_status; + ctl_scsi_free_io(io); + return (1); +} + +static int +pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc, + struct pci_vtscsi_ctrl_an *an) +{ + + return (0); +} + +static int +pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, + int niov_in, struct iovec *iov_out, int niov_out) +{ + struct pci_vtscsi_softc *sc = q->vsq_sc; + struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; + struct pci_vtscsi_req_cmd_wr *cmd_wr; + struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; + union ctl_io *io; + size_t data_niov_in, data_niov_out; + void *ext_data_ptr = NULL; + uint32_t ext_data_len = 0, ext_sg_entries = 0; + int err; + + seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, + VTSCSI_IN_HEADER_LEN(sc)); + seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, + VTSCSI_OUT_HEADER_LEN(sc)); + + truncate_iov(iov_in, niov_in, VTSCSI_IN_HEADER_LEN(sc)); + truncate_iov(iov_out, niov_out, VTSCSI_OUT_HEADER_LEN(sc)); + iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); + + cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc)); + io = ctl_scsi_alloc_io(sc->vss_iid); + ctl_scsi_zero_io(io); + + io->io_hdr.nexus.targ_port = cmd_rd->lun[1]; + io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun); + + io->io_hdr.io_type = CTL_IO_SCSI; + + if (data_niov_in > 0) { + ext_data_ptr = (void *)data_iov_in; + ext_sg_entries = data_niov_in; + ext_data_len = count_iov(data_iov_in, data_niov_in); + io->io_hdr.flags |= CTL_FLAG_DATA_OUT; + } else if (data_niov_out > 0) { + ext_data_ptr = (void *)data_iov_out; + ext_sg_entries = data_niov_out; + ext_data_len = count_iov(data_iov_out, data_niov_out); + io->io_hdr.flags |= CTL_FLAG_DATA_IN; + } + + io->scsiio.sense_len = sc->vss_config.sense_size; + io->scsiio.tag_num = (uint32_t)cmd_rd->id; + io->scsiio.tag_type = CTL_TAG_SIMPLE; + io->scsiio.ext_sg_entries = ext_sg_entries; + io->scsiio.ext_data_ptr = ext_data_ptr; + io->scsiio.ext_data_len = ext_data_len; + io->scsiio.ext_data_filled = 0; + io->scsiio.cdb_len = sc->vss_config.cdb_size; + memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size); + + if (pci_vtscsi_debug) { + struct sbuf *sb = sbuf_new_auto(); + ctl_io_sbuf(io, sb); + sbuf_finish(sb); + DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb))); + sbuf_delete(sb); + } + + err = ioctl(q->vsq_ctl_fd, CTL_IO, io); + if (err != 0) { + WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); + cmd_wr->response = VIRTIO_SCSI_S_FAILURE; + } else { + cmd_wr->sense_len = MIN(io->scsiio.sense_len, + sc->vss_config.sense_size); + cmd_wr->residual = io->scsiio.residual; + cmd_wr->status = io->scsiio.scsi_status; + cmd_wr->response = VIRTIO_SCSI_S_OK; + memcpy(&cmd_wr->sense, &io->scsiio.sense_data, + cmd_wr->sense_len); + } + + buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); + free(cmd_rd); + free(cmd_wr); + ctl_scsi_free_io(io); + return (VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled); +} + +static void +pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) +{ + struct pci_vtscsi_softc *sc; + struct iovec iov[VTSCSI_MAXSEG]; + uint16_t idx, n; + void *buf = NULL; + size_t bufsize; + int iolen; + + sc = vsc; + + while (vq_has_descs(vq)) { + n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL); + bufsize = iov_to_buf(iov, n, &buf); + iolen = pci_vtscsi_control_handle(sc, buf, bufsize); + buf_to_iov(buf + bufsize - iolen, iolen, iov, n, iolen); + + /* + * Release this chain and handle more + */ + vq_relchain(vq, idx, iolen); + } + vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ +} + +static void +pci_vtscsi_eventq_notify(void *vsc, struct vqueue_info *vq) +{ + + vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; +} + +static void +pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq) +{ + struct pci_vtscsi_softc *sc; + struct pci_vtscsi_queue *q; + struct pci_vtscsi_request *req; + struct iovec iov[VTSCSI_MAXSEG]; + uint16_t flags[VTSCSI_MAXSEG]; + uint16_t idx, n, i; + int readable; + + sc = vsc; + q = &sc->vss_queues[vq->vq_num - 2]; + + while (vq_has_descs(vq)) { + readable = 0; + n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, flags); + + /* Count readable descriptors */ + for (i = 0; i < n; i++) { + if (flags[i] & VRING_DESC_F_WRITE) + break; + + readable++; + } + + req = calloc(1, sizeof(struct pci_vtscsi_request)); + req->vsr_idx = idx; + req->vsr_queue = q; + req->vsr_niov_in = readable; + req->vsr_niov_out = n - readable; + memcpy(req->vsr_iov_in, iov, + req->vsr_niov_in * sizeof(struct iovec)); + memcpy(req->vsr_iov_out, iov + readable, + req->vsr_niov_out * sizeof(struct iovec)); + + pthread_mutex_lock(&q->vsq_mtx); + STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link); + pthread_cond_signal(&q->vsq_cv); + pthread_mutex_unlock(&q->vsq_mtx); + + DPRINTF(("virtio-scsi: request <idx=%d> enqueued\n", idx)); + } +} + +static int +pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, + struct pci_vtscsi_queue *queue, int num) +{ + struct pci_vtscsi_worker *worker; + char threadname[16]; + int i; + + queue->vsq_sc = sc; + queue->vsq_ctl_fd = open("/dev/cam/ctl", O_RDWR); + queue->vsq_vq = &sc->vss_vq[num + 2]; + + if (queue->vsq_ctl_fd < 0) { + WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno))); + return (-1); + } + + pthread_mutex_init(&queue->vsq_mtx, NULL); + pthread_mutex_init(&queue->vsq_qmtx, NULL); + pthread_cond_init(&queue->vsq_cv, NULL); + STAILQ_INIT(&queue->vsq_requests); + LIST_INIT(&queue->vsq_workers); + + for (i = 0; i < VTSCSI_THR_PER_Q; i++) { + worker = calloc(1, sizeof(struct pci_vtscsi_worker)); + worker->vsw_queue = queue; + + pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc, + (void *)worker); + + sprintf(threadname, "virtio-scsi:%d-%d", num, i); + pthread_set_name_np(worker->vsw_thread, threadname); + LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link); + } + + return (0); +} + +static int +pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) +{ + struct pci_vtscsi_softc *sc; + char *optname = NULL; + char *opt; + int i; + + sc = calloc(1, sizeof(struct pci_vtscsi_softc)); + sc->vss_ctl_fd = open("/dev/cam/ctl", O_RDWR); + + if (sc->vss_ctl_fd < 0) { + WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno))); + return (1); + } + + while ((opt = strsep(&opts, ",")) != NULL) { + if ((optname = strsep(&opt, "=")) != NULL) { + if (strcmp(optname, "iid") == 0) { + sc->vss_iid = strtoul(opt, NULL, 10); + } + } + } + + vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq); + sc->vss_vs.vs_mtx = &sc->vss_mtx; + + /* controlq */ + sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ; + sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify; + + /* eventq */ + sc->vss_vq[1].vq_qsize = VTSCSI_RINGSZ; + sc->vss_vq[1].vq_notify = pci_vtscsi_eventq_notify; + + /* request queues */ + for (i = 2; i < VTSCSI_MAXQ; i++) { + sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ; + sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify; + pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2); + } + + /* initialize config space */ + pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI); + pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_SCSI); + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); + + if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix())) + return (1); + vi_set_io_bar(&sc->vss_vs, 0); + + return (0); +} + + +struct pci_devemu pci_de_vscsi = { + .pe_emu = "virtio-scsi", + .pe_init = pci_vtscsi_init, + .pe_barwrite = vi_pci_write, + .pe_barread = vi_pci_read +}; +PCI_EMUL_SET(pci_de_vscsi); diff --git a/usr.sbin/bhyve/virtio.h b/usr.sbin/bhyve/virtio.h index cd28f557380d..da4e28431eb0 100644 --- a/usr.sbin/bhyve/virtio.h +++ b/usr.sbin/bhyve/virtio.h @@ -1,467 +1,468 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Chris Torek <torek @ torek net> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VIRTIO_H_ #define _VIRTIO_H_ /* * These are derived from several virtio specifications. * * Some useful links: * https://github.com/rustyrussell/virtio-spec * http://people.redhat.com/pbonzini/virtio-spec.pdf */ /* * A virtual device has zero or more "virtual queues" (virtqueue). * Each virtqueue uses at least two 4096-byte pages, laid out thus: * * +-----------------------------------------------+ * | "desc": <N> descriptors, 16 bytes each | * | ----------------------------------------- | * | "avail": 2 uint16; <N> uint16; 1 uint16 | * | ----------------------------------------- | * | pad to 4k boundary | * +-----------------------------------------------+ * | "used": 2 x uint16; <N> elems; 1 uint16 | * | ----------------------------------------- | * | pad to 4k boundary | * +-----------------------------------------------+ * * The number <N> that appears here is always a power of two and is * limited to no more than 32768 (as it must fit in a 16-bit field). * If <N> is sufficiently large, the above will occupy more than * two pages. In any case, all pages must be physically contiguous * within the guest's physical address space. * * The <N> 16-byte "desc" descriptors consist of a 64-bit guest * physical address <addr>, a 32-bit length <len>, a 16-bit * <flags>, and a 16-bit <next> field (all in guest byte order). * * There are three flags that may be set : * NEXT descriptor is chained, so use its "next" field * WRITE descriptor is for host to write into guest RAM * (else host is to read from guest RAM) * INDIRECT descriptor address field is (guest physical) * address of a linear array of descriptors * * Unless INDIRECT is set, <len> is the number of bytes that may * be read/written from guest physical address <addr>. If * INDIRECT is set, WRITE is ignored and <len> provides the length * of the indirect descriptors (and <len> must be a multiple of * 16). Note that NEXT may still be set in the main descriptor * pointing to the indirect, and should be set in each indirect * descriptor that uses the next descriptor (these should generally * be numbered sequentially). However, INDIRECT must not be set * in the indirect descriptors. Upon reaching an indirect descriptor * without a NEXT bit, control returns to the direct descriptors. * * Except inside an indirect, each <next> value must be in the * range [0 .. N) (i.e., the half-open interval). (Inside an * indirect, each <next> must be in the range [0 .. <len>/16).) * * The "avail" data structures reside in the same pages as the * "desc" structures since both together are used by the device to * pass information to the hypervisor's virtual driver. These * begin with a 16-bit <flags> field and 16-bit index <idx>, then * have <N> 16-bit <ring> values, followed by one final 16-bit * field <used_event>. The <N> <ring> entries are simply indices * indices into the descriptor ring (and thus must meet the same * constraints as each <next> value). However, <idx> is counted * up from 0 (initially) and simply wraps around after 65535; it * is taken mod <N> to find the next available entry. * * The "used" ring occupies a separate page or pages, and contains * values written from the virtual driver back to the guest OS. * This begins with a 16-bit <flags> and 16-bit <idx>, then there * are <N> "vring_used" elements, followed by a 16-bit <avail_event>. * The <N> "vring_used" elements consist of a 32-bit <id> and a * 32-bit <len> (vu_tlen below). The <id> is simply the index of * the head of a descriptor chain the guest made available * earlier, and the <len> is the number of bytes actually written, * e.g., in the case of a network driver that provided a large * receive buffer but received only a small amount of data. * * The two event fields, <used_event> and <avail_event>, in the * avail and used rings (respectively -- note the reversal!), are * always provided, but are used only if the virtual device * negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature * negotiation. Similarly, both rings provide a flag -- * VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in * their <flags> field, indicating that the guest does not need an * interrupt, or that the hypervisor driver does not need a * notify, when descriptors are added to the corresponding ring. * (These are provided only for interrupt optimization and need * not be implemented.) */ #define VRING_ALIGN 4096 #define VRING_DESC_F_NEXT (1 << 0) #define VRING_DESC_F_WRITE (1 << 1) #define VRING_DESC_F_INDIRECT (1 << 2) struct virtio_desc { /* AKA vring_desc */ uint64_t vd_addr; /* guest physical address */ uint32_t vd_len; /* length of scatter/gather seg */ uint16_t vd_flags; /* VRING_F_DESC_* */ uint16_t vd_next; /* next desc if F_NEXT */ } __packed; struct virtio_used { /* AKA vring_used_elem */ uint32_t vu_idx; /* head of used descriptor chain */ uint32_t vu_tlen; /* length written-to */ } __packed; #define VRING_AVAIL_F_NO_INTERRUPT 1 struct vring_avail { uint16_t va_flags; /* VRING_AVAIL_F_* */ uint16_t va_idx; /* counts to 65535, then cycles */ uint16_t va_ring[]; /* size N, reported in QNUM value */ /* uint16_t va_used_event; -- after N ring entries */ } __packed; #define VRING_USED_F_NO_NOTIFY 1 struct vring_used { uint16_t vu_flags; /* VRING_USED_F_* */ uint16_t vu_idx; /* counts to 65535, then cycles */ struct virtio_used vu_ring[]; /* size N */ /* uint16_t vu_avail_event; -- after N ring entries */ } __packed; /* * The address of any given virtual queue is determined by a single * Page Frame Number register. The guest writes the PFN into the * PCI config space. However, a device that has two or more * virtqueues can have a different PFN, and size, for each queue. * The number of queues is determinable via the PCI config space * VTCFG_R_QSEL register. Writes to QSEL select the queue: 0 means * queue #0, 1 means queue#1, etc. Once a queue is selected, the * remaining PFN and QNUM registers refer to that queue. * * QNUM is a read-only register containing a nonzero power of two * that indicates the (hypervisor's) queue size. Or, if reading it * produces zero, the hypervisor does not have a corresponding * queue. (The number of possible queues depends on the virtual * device. The block device has just one; the network device * provides either two -- 0 = receive, 1 = transmit -- or three, * with 2 = control.) * * PFN is a read/write register giving the physical page address of * the virtqueue in guest memory (the guest must allocate enough space * based on the hypervisor's provided QNUM). * * QNOTIFY is effectively write-only: when the guest writes a queue * number to the register, the hypervisor should scan the specified * virtqueue. (Reading QNOTIFY currently always gets 0). */ /* * PFN register shift amount */ #define VRING_PFN 12 /* * Virtio device types * * XXX Should really be merged with <dev/virtio/virtio.h> defines */ #define VIRTIO_TYPE_NET 1 #define VIRTIO_TYPE_BLOCK 2 #define VIRTIO_TYPE_CONSOLE 3 #define VIRTIO_TYPE_ENTROPY 4 #define VIRTIO_TYPE_BALLOON 5 #define VIRTIO_TYPE_IOMEMORY 6 #define VIRTIO_TYPE_RPMSG 7 #define VIRTIO_TYPE_SCSI 8 #define VIRTIO_TYPE_9P 9 /* experimental IDs start at 65535 and work down */ /* * PCI vendor/device IDs */ #define VIRTIO_VENDOR 0x1AF4 #define VIRTIO_DEV_NET 0x1000 #define VIRTIO_DEV_BLOCK 0x1001 #define VIRTIO_DEV_CONSOLE 0x1003 #define VIRTIO_DEV_RANDOM 0x1005 +#define VIRTIO_DEV_SCSI 0x1008 /* * PCI config space constants. * * If MSI-X is enabled, the ISR register is generally not used, * and the configuration vector and queue vector appear at offsets * 20 and 22 with the remaining configuration registers at 24. * If MSI-X is not enabled, those two registers disappear and * the remaining configuration registers start at offset 20. */ #define VTCFG_R_HOSTCAP 0 #define VTCFG_R_GUESTCAP 4 #define VTCFG_R_PFN 8 #define VTCFG_R_QNUM 12 #define VTCFG_R_QSEL 14 #define VTCFG_R_QNOTIFY 16 #define VTCFG_R_STATUS 18 #define VTCFG_R_ISR 19 #define VTCFG_R_CFGVEC 20 #define VTCFG_R_QVEC 22 #define VTCFG_R_CFG0 20 /* No MSI-X */ #define VTCFG_R_CFG1 24 /* With MSI-X */ #define VTCFG_R_MSIX 20 /* * Bits in VTCFG_R_STATUS. Guests need not actually set any of these, * but a guest writing 0 to this register means "please reset". */ #define VTCFG_STATUS_ACK 0x01 /* guest OS has acknowledged dev */ #define VTCFG_STATUS_DRIVER 0x02 /* guest OS driver is loaded */ #define VTCFG_STATUS_DRIVER_OK 0x04 /* guest OS driver ready */ #define VTCFG_STATUS_FAILED 0x80 /* guest has given up on this dev */ /* * Bits in VTCFG_R_ISR. These apply only if not using MSI-X. * * (We don't [yet?] ever use CONF_CHANGED.) */ #define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */ #define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */ #define VIRTIO_MSI_NO_VECTOR 0xFFFF /* * Feature flags. * Note: bits 0 through 23 are reserved to each device type. */ #define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24) #define VIRTIO_RING_F_INDIRECT_DESC (1 << 28) #define VIRTIO_RING_F_EVENT_IDX (1 << 29) /* From section 2.3, "Virtqueue Configuration", of the virtio specification */ static inline size_t vring_size(u_int qsz) { size_t size; /* constant 3 below = va_flags, va_idx, va_used_event */ size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz); size = roundup2(size, VRING_ALIGN); /* constant 3 below = vu_flags, vu_idx, vu_avail_event */ size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz; size = roundup2(size, VRING_ALIGN); return (size); } struct vmctx; struct pci_devinst; struct vqueue_info; /* * A virtual device, with some number (possibly 0) of virtual * queues and some size (possibly 0) of configuration-space * registers private to the device. The virtio_softc should come * at the front of each "derived class", so that a pointer to the * virtio_softc is also a pointer to the more specific, derived- * from-virtio driver's softc. * * Note: inside each hypervisor virtio driver, changes to these * data structures must be locked against other threads, if any. * Except for PCI config space register read/write, we assume each * driver does the required locking, but we need a pointer to the * lock (if there is one) for PCI config space read/write ops. * * When the guest reads or writes the device's config space, the * generic layer checks for operations on the special registers * described above. If the offset of the register(s) being read * or written is past the CFG area (CFG0 or CFG1), the request is * passed on to the virtual device, after subtracting off the * generic-layer size. (So, drivers can just use the offset as * an offset into "struct config", for instance.) * * (The virtio layer also makes sure that the read or write is to/ * from a "good" config offset, hence vc_cfgsize, and on BAR #0. * However, the driver must verify the read or write size and offset * and that no one is writing a readonly register.) * * The BROKED flag ("this thing done gone and broked") is for future * use. */ #define VIRTIO_USE_MSIX 0x01 #define VIRTIO_EVENT_IDX 0x02 /* use the event-index values */ #define VIRTIO_BROKED 0x08 /* ??? */ struct virtio_softc { struct virtio_consts *vs_vc; /* constants (see below) */ int vs_flags; /* VIRTIO_* flags from above */ pthread_mutex_t *vs_mtx; /* POSIX mutex, if any */ struct pci_devinst *vs_pi; /* PCI device instance */ uint32_t vs_negotiated_caps; /* negotiated capabilities */ struct vqueue_info *vs_queues; /* one per vc_nvq */ int vs_curq; /* current queue */ uint8_t vs_status; /* value from last status write */ uint8_t vs_isr; /* ISR flags, if not MSI-X */ uint16_t vs_msix_cfg_idx; /* MSI-X vector for config event */ }; #define VS_LOCK(vs) \ do { \ if (vs->vs_mtx) \ pthread_mutex_lock(vs->vs_mtx); \ } while (0) #define VS_UNLOCK(vs) \ do { \ if (vs->vs_mtx) \ pthread_mutex_unlock(vs->vs_mtx); \ } while (0) struct virtio_consts { const char *vc_name; /* name of driver (for diagnostics) */ int vc_nvq; /* number of virtual queues */ size_t vc_cfgsize; /* size of dev-specific config regs */ void (*vc_reset)(void *); /* called on virtual device reset */ void (*vc_qnotify)(void *, struct vqueue_info *); /* called on QNOTIFY if no VQ notify */ int (*vc_cfgread)(void *, int, int, uint32_t *); /* called to read config regs */ int (*vc_cfgwrite)(void *, int, int, uint32_t); /* called to write config regs */ void (*vc_apply_features)(void *, uint64_t); /* called to apply negotiated features */ uint64_t vc_hv_caps; /* hypervisor-provided capabilities */ }; /* * Data structure allocated (statically) per virtual queue. * * Drivers may change vq_qsize after a reset. When the guest OS * requests a device reset, the hypervisor first calls * vs->vs_vc->vc_reset(); then the data structure below is * reinitialized (for each virtqueue: vs->vs_vc->vc_nvq). * * The remaining fields should only be fussed-with by the generic * code. * * Note: the addresses of vq_desc, vq_avail, and vq_used are all * computable from each other, but it's a lot simpler if we just * keep a pointer to each one. The event indices are similarly * (but more easily) computable, and this time we'll compute them: * they're just XX_ring[N]. */ #define VQ_ALLOC 0x01 /* set once we have a pfn */ #define VQ_BROKED 0x02 /* ??? */ struct vqueue_info { uint16_t vq_qsize; /* size of this queue (a power of 2) */ void (*vq_notify)(void *, struct vqueue_info *); /* called instead of vc_notify, if not NULL */ struct virtio_softc *vq_vs; /* backpointer to softc */ uint16_t vq_num; /* we're the num'th queue in the softc */ uint16_t vq_flags; /* flags (see above) */ uint16_t vq_last_avail; /* a recent value of vq_avail->va_idx */ uint16_t vq_save_used; /* saved vq_used->vu_idx; see vq_endchains */ uint16_t vq_msix_idx; /* MSI-X index, or VIRTIO_MSI_NO_VECTOR */ uint32_t vq_pfn; /* PFN of virt queue (not shifted!) */ volatile struct virtio_desc *vq_desc; /* descriptor array */ volatile struct vring_avail *vq_avail; /* the "avail" ring */ volatile struct vring_used *vq_used; /* the "used" ring */ }; /* as noted above, these are sort of backwards, name-wise */ #define VQ_AVAIL_EVENT_IDX(vq) \ (*(volatile uint16_t *)&(vq)->vq_used->vu_ring[(vq)->vq_qsize]) #define VQ_USED_EVENT_IDX(vq) \ ((vq)->vq_avail->va_ring[(vq)->vq_qsize]) /* * Is this ring ready for I/O? */ static inline int vq_ring_ready(struct vqueue_info *vq) { return (vq->vq_flags & VQ_ALLOC); } /* * Are there "available" descriptors? (This does not count * how many, just returns True if there are some.) */ static inline int vq_has_descs(struct vqueue_info *vq) { return (vq_ring_ready(vq) && vq->vq_last_avail != vq->vq_avail->va_idx); } /* * Deliver an interrupt to guest on the given virtual queue * (if possible, or a generic MSI interrupt if not using MSI-X). */ static inline void vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq) { if (pci_msix_enabled(vs->vs_pi)) pci_generate_msix(vs->vs_pi, vq->vq_msix_idx); else { VS_LOCK(vs); vs->vs_isr |= VTCFG_ISR_QUEUES; pci_generate_msi(vs->vs_pi, 0); pci_lintr_assert(vs->vs_pi); VS_UNLOCK(vs); } } struct iovec; void vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc, void *dev_softc, struct pci_devinst *pi, struct vqueue_info *queues); int vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix); void vi_reset_dev(struct virtio_softc *); void vi_set_io_bar(struct virtio_softc *, int); int vq_getchain(struct vqueue_info *vq, uint16_t *pidx, struct iovec *iov, int n_iov, uint16_t *flags); void vq_retchain(struct vqueue_info *vq); void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen); void vq_endchains(struct vqueue_info *vq, int used_all_avail); uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size); void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value); #endif /* _VIRTIO_H_ */