Index: head/usr.sbin/bhyve/Makefile =================================================================== --- head/usr.sbin/bhyve/Makefile (revision 333139) +++ head/usr.sbin/bhyve/Makefile (revision 333140) @@ -1,80 +1,85 @@ # # $FreeBSD$ # .include PROG= bhyve PACKAGE= bhyve DEBUG_FLAGS= -g -O0 MAN= bhyve.8 BHYVE_SYSDIR?=${SRCTOP} SRCS= \ atkbdc.c \ acpi.c \ bhyvegc.c \ bhyverun.c \ block_if.c \ bootrom.c \ console.c \ consport.c \ dbgport.c \ fwctl.c \ + gdb.c \ inout.c \ ioapic.c \ mem.c \ mevent.c \ mptbl.c \ pci_ahci.c \ pci_e82545.c \ pci_emul.c \ pci_fbuf.c \ pci_hostbridge.c \ pci_irq.c \ pci_lpc.c \ pci_passthru.c \ pci_virtio_block.c \ pci_virtio_console.c \ pci_virtio_net.c \ pci_virtio_rnd.c \ pci_uart.c \ pci_xhci.c \ pm.c \ post.c \ ps2kbd.c \ ps2mouse.c \ rfb.c \ rtc.c \ smbiostbl.c \ sockstream.c \ task_switch.c \ uart_emul.c \ usb_emul.c \ usb_mouse.c \ virtio.c \ vga.c \ xmsr.c \ spinup_ap.c .PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm SRCS+= vmm_instruction_emul.c LIBADD= vmmapi md pthread z .if ${MK_OPENSSL} == "no" CFLAGS+=-DNO_OPENSSL .else LIBADD+= crypto .endif CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000 CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller + +.ifdef GDB_LOG +CFLAGS+=-DGDB_LOG +.endif WARNS?= 2 .include Index: head/usr.sbin/bhyve/bhyve.8 =================================================================== --- head/usr.sbin/bhyve/bhyve.8 (revision 333139) +++ head/usr.sbin/bhyve/bhyve.8 (revision 333140) @@ -1,534 +1,566 @@ .\" Copyright (c) 2013 Peter Grehan .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd April 6, 2018 +.Dd May 1, 2018 .Dt BHYVE 8 .Os .Sh NAME .Nm bhyve .Nd "run a guest operating system inside a virtual machine" .Sh SYNOPSIS .Nm .Op Fl abehuwxACHPSWY .Oo .Fl c\~ Ns .Oo .Op Ar cpus= Ns .Ar numcpus Ns .Oc Ns .Op Ar ,sockets=n Ns .Op Ar ,cores=n Ns .Op Ar ,threads=n .Oc .Op Fl g Ar gdbport .Op Fl l Ar lpcdev Ns Op , Ns Ar conf .Op Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t .Op Fl p Ar vcpu:hostcpu .Op Fl s Ar slot,emulation Ns Op , Ns Ar conf +.Op Fl G Ar port .Op Fl U Ar uuid .Ar vmname .Sh DESCRIPTION .Nm is a hypervisor that runs guest operating systems inside a virtual machine. .Pp Parameters such as the number of virtual CPUs, amount of guest memory, and I/O connectivity can be specified with command-line parameters. .Pp If not using a boot ROM, the guest operating system must be loaded with .Xr bhyveload 8 or a similar boot loader before running .Nm , otherwise, it is enough to run .Nm with a boot ROM of choice. .Pp .Nm runs until the guest operating system reboots or an unhandled hypervisor exit is detected. .Sh OPTIONS .Bl -tag -width 10n .It Fl a The guest's local APIC is configured in xAPIC mode. The xAPIC mode is the default setting so this option is redundant. It will be deprecated in a future version. .It Fl A Generate ACPI tables. Required for .Fx Ns /amd64 guests. .It Fl b Enable a low-level console device supported by .Fx kernels compiled with .Cd "device bvmconsole" . This option will be deprecated in a future version. .It Fl c Op Ar setting ... Number of guest virtual CPUs and/or the CPU topology. The default value for each of .Ar numcpus , .Ar sockets , .Ar cores , and .Ar threads is 1. The current maximum number of guest virtual CPUs is 16. If .Ar numcpus is not specified then it will be calculated from the other arguments. The topology must be consistent in that the .Ar numcpus must equal the product of .Ar sockets , .Ar cores , and .Ar threads . If a .Ar setting is specified more than once the last one has precedence. .It Fl C Include guest memory in core file. .It Fl e Force .Nm to exit when a guest issues an access to an I/O port that is not emulated. This is intended for debug purposes. .It Fl g Ar gdbport For .Fx kernels compiled with .Cd "device bvmdebug" , allow a remote kernel kgdb to be relayed to the guest kernel gdb stub via a local IPv4 address and this port. This option will be deprecated in a future version. +.It Fl G Ar port +Start a debug server that uses the GDB protocol to export guest state to a +debugger. +An IPv4 TCP socket will be bound to the supplied +.Ar port +to listen for debugger connections. +Only a single debugger may be attached to the debug server at a time. +If +.Ar port +begins with +.Sq w , +.Nm +will pause execution at the first instruction waiting for a debugger to attach. .It Fl h Print help message and exit. .It Fl H Yield the virtual CPU thread when a HLT instruction is detected. If this option is not specified, virtual CPUs will use 100% of a host CPU. .It Fl l Ar lpcdev Ns Op , Ns Ar conf Allow devices behind the LPC PCI-ISA bridge to be configured. The only supported devices are the TTY-class devices .Ar com1 and .Ar com2 and the boot ROM device .Ar bootrom . .It Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t Guest physical memory size in bytes. This must be the same size that was given to .Xr bhyveload 8 . .Pp The size argument may be suffixed with one of K, M, G or T (either upper or lower case) to indicate a multiple of kilobytes, megabytes, gigabytes, or terabytes. If no suffix is given, the value is assumed to be in megabytes. .Pp .Ar memsize defaults to 256M. .It Fl p Ar vcpu:hostcpu Pin guest's virtual CPU .Em vcpu to .Em hostcpu . .It Fl P Force the guest virtual CPU to exit when a PAUSE instruction is detected. .It Fl s Ar slot,emulation Ns Op , Ns Ar conf Configure a virtual PCI slot and function. .Pp .Nm provides PCI bus emulation and virtual devices that can be attached to slots on the bus. There are 32 available slots, with the option of providing up to 8 functions per slot. .Bl -tag -width 10n .It Ar slot .Ar pcislot[:function] .Ar bus:pcislot:function .Pp The .Ar pcislot value is 0 to 31. The optional .Ar function value is 0 to 7. The optional .Ar bus value is 0 to 255. If not specified, the .Ar function value defaults to 0. If not specified, the .Ar bus value defaults to 0. .It Ar emulation .Bl -tag -width 10n .It Li hostbridge | Li amd_hostbridge .Pp Provide a simple host bridge. This is usually configured at slot 0, and is required by most guest operating systems. The .Li amd_hostbridge emulation is identical but uses a PCI vendor ID of .Li AMD . .It Li passthru PCI pass-through device. .It Li virtio-net Virtio network interface. .It Li virtio-blk Virtio block storage interface. .It Li virtio-rnd Virtio RNG interface. .It Li virtio-console Virtio console interface, which exposes multiple ports to the guest in the form of simple char devices for simple IO between the guest and host userspaces. .It Li ahci AHCI controller attached to arbitrary devices. .It Li ahci-cd AHCI controller attached to an ATAPI CD/DVD. .It Li ahci-hd AHCI controller attached to a SATA hard-drive. .It Li e1000 Intel e82545 network interface. .It Li uart PCI 16550 serial device. .It Li lpc LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM. The LPC bridge emulation can only be configured on bus 0. .It Li fbuf Raw framebuffer device attached to VNC server. .It Li xhci eXtensible Host Controller Interface (xHCI) USB controller. .El .It Op Ar conf This optional parameter describes the backend for device emulations. If .Ar conf is not specified, the device emulation has no backend and can be considered unconnected. .Pp Network devices: .Bl -tag -width 10n .It Ar tapN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx .It Ar vmnetN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx .Pp If .Ar mac is not specified, the MAC address is derived from a fixed OUI and the remaining bytes from an MD5 hash of the slot and function numbers and the device name. .Pp The MAC address is an ASCII string in .Xr ethers 5 format. .El .Pp Block storage devices: .Bl -tag -width 10n .It Pa /filename Ns Oo , Ns Ar block-device-options Oc .It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc .El .Pp The .Ar block-device-options are: .Bl -tag -width 8n .It Li nocache Open the file with .Dv O_DIRECT . .It Li direct Open the file using .Dv O_SYNC . .It Li ro Force the file to be opened read-only. .It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc Specify the logical and physical sector sizes of the emulated disk. The physical sector size is optional and is equal to the logical sector size if not explicitly specified. .El .Pp TTY devices: .Bl -tag -width 10n .It Li stdio Connect the serial port to the standard input and output of the .Nm process. .It Pa /dev/xxx Use the host TTY device for serial port I/O. .El .Pp Boot ROM device: .Bl -tag -width 10n .It Pa romfile Map .Ar romfile in the guest address space reserved for boot firmware. .El .Pp Pass-through devices: .Bl -tag -width 10n .It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function Connect to a PCI device on the host at the selector described by .Ar slot , .Ar bus , and .Ar function numbers. .El .Pp Guest memory must be wired using the .Fl S option when a pass-through device is configured. .Pp The host device must have been reserved at boot-time using the .Va pptdev loader variable as described in .Xr vmm 4 . .Pp Virtio console devices: .Bl -tag -width 10n .It Li port1= Ns Pa /path/to/port1.sock Ns ,anotherport= Ns Pa ... A maximum of 16 ports per device can be created. Every port is named and corresponds to a Unix domain socket created by .Nm . .Nm accepts at most one connection per port at a time. .Pp Limitations: .Bl -bullet -offset 2n .It Due to lack of destructors in .Nm , sockets on the filesystem must be cleaned up manually after .Nm exits. .It There is no way to use the "console port" feature, nor the console port resize at present. .It Emergency write is advertised, but no-op at present. .El .El .Pp Framebuffer devices: .Bl -tag -width 10n .It Oo rfb= Ns Oo Ar IP: Oc Ns Ar port Oc Ns Oo ,w= Ns Ar width Oc Ns Oo ,h= Ns Ar height Oc Ns Oo ,vga= Ns Ar vgaconf Oc Ns Oo Ns ,wait Oc Ns Oo ,password= Ns Ar password Oc .Bl -tag -width 8n .It Ar IP:port An .Ar IP address and a .Ar port VNC should listen on. The default is to listen on localhost IPv4 address and default VNC port 5900. Listening on an IPv6 address is not supported. .It Ar width No and Ar height A display resolution, width and height, respectively. If not specified, a default resolution of 1024x768 pixels will be used. Minimal supported resolution is 640x480 pixels, and maximum is 1920x1200 pixels. .It Ar vgaconf Possible values for this option are .Dq io (default), .Dq on , and .Dq off . PCI graphics cards have a dual personality in that they are standard PCI devices with BAR addressing, but may also implicitly decode legacy VGA I/O space .Pq Ad 0x3c0-3df and memory space .Pq 64KB at Ad 0xA0000 . The default .Dq io option should be used for guests that attempt to issue BIOS calls which result in I/O port queries, and fail to boot if I/O decode is disabled. .Pp The .Dq on option should be used along with the CSM BIOS capability in UEFI to boot traditional BIOS guests that require the legacy VGA I/O and memory regions to be available. .Pp The .Dq off option should be used for the UEFI guests that assume that VGA adapter is present if they detect the I/O ports. An example of such a guest is .Ox in UEFI mode. .Pp Please refer to the .Nm .Fx wiki page .Pq Lk https://wiki.freebsd.org/bhyve for configuration notes of particular guests. .It wait Instruct .Nm to only boot upon the initiation of a VNC connection, simplifying the installation of operating systems that require immediate keyboard input. This can be removed for post-installation use. .It password This type of authentication is known to be cryptographically weak and is not intended for use on untrusted networks. Many implementations will want to use stronger security, such as running the session over an encrypted channel provided by IPsec or SSH. .El .El .Pp xHCI USB devices: .Bl -tag -width 10n .It Li tablet A USB tablet device which provides precise cursor synchronization when using VNC. .El .El .It Fl S Wire guest memory. .It Fl u RTC keeps UTC time. .It Fl U Ar uuid Set the universally unique identifier .Pq UUID in the guest's System Management BIOS System Information structure. By default a UUID is generated from the host's hostname and .Ar vmname . .It Fl w Ignore accesses to unimplemented Model Specific Registers (MSRs). This is intended for debug purposes. .It Fl W Force virtio PCI device emulations to use MSI interrupts instead of MSI-X interrupts. .It Fl x The guest's local APIC is configured in x2APIC mode. .It Fl Y Disable MPtable generation. .It Ar vmname Alphanumeric name of the guest. This should be the same as that created by .Xr bhyveload 8 . .El +.Sh DEBUG SERVER +The current debug server provides limited support for debuggers. +.Ss Registers +Each virtual CPU is exposed to the debugger as a thread. +.Pp +General purpose registers can be queried for each virtual CPU, but other +registers such as floating-point and system registers cannot be queried. +.Ss Memory +Memory (including memory mapped I/O regions) can be read by the debugger, +but not written. Memory operations use virtual addresses that are resolved +to physical addresses via the current virtual CPU's active address translation. +.Ss Control +The running guest can be interrupted by the debugger at any time +.Pq for example, by pressing Ctrl-C in the debugger . +.Pp +Single stepping is only supported on Intel CPUs supporting the MTRAP VM exit. +.Pp +Breakpoints are not supported. .Sh SIGNAL HANDLING .Nm deals with the following signals: .Pp .Bl -tag -width indent -compact .It SIGTERM Trigger ACPI poweroff for a VM .El .Sh EXIT STATUS Exit status indicates how the VM was terminated: .Pp .Bl -tag -width indent -compact .It 0 rebooted .It 1 powered off .It 2 halted .It 3 triple fault .El .Sh EXAMPLES If not using a boot ROM, the guest operating system must have been loaded with .Xr bhyveload 8 or a similar boot loader before .Xr bhyve 4 can be run. Otherwise, the boot loader is not needed. .Pp To run a virtual machine with 1GB of memory, two virtual CPUs, a virtio block device backed by the .Pa /my/image filesystem image, and a serial port for the console: .Bd -literal -offset indent bhyve -c 2 -s 0,hostbridge -s 1,lpc -s 2,virtio-blk,/my/image \\ -l com1,stdio -A -H -P -m 1G vm1 .Ed .Pp Run a 24GB single-CPU virtual machine with three network ports, one of which has a MAC address specified: .Bd -literal -offset indent bhyve -s 0,hostbridge -s 1,lpc -s 2:0,virtio-net,tap0 \\ -s 2:1,virtio-net,tap1 \\ -s 2:2,virtio-net,tap2,mac=00:be:fa:76:45:00 \\ -s 3,virtio-blk,/my/image -l com1,stdio \\ -A -H -P -m 24G bigvm .Ed .Pp Run an 8GB quad-CPU virtual machine with 8 AHCI SATA disks, an AHCI ATAPI CD-ROM, a single virtio network port, an AMD hostbridge, and the console port connected to an .Xr nmdm 4 null-modem device. .Bd -literal -offset indent bhyve -c 4 \\ -s 0,amd_hostbridge -s 1,lpc \\ -s 1:0,ahci,hd:/images/disk.1,hd:/images/disk.2,\\ hd:/images/disk.3,hd:/images/disk.4,\\ hd:/images/disk.5,hd:/images/disk.6,\\ hd:/images/disk.7,hd:/images/disk.8,\\ cd:/images/install.iso \\ -s 3,virtio-net,tap0 \\ -l com1,/dev/nmdm0A \\ -A -H -P -m 8G .Ed .Pp Run a UEFI virtual machine with a display resolution of 800 by 600 pixels that can be accessed via VNC at: 0.0.0.0:5900. .Bd -literal -offset indent bhyve -c 2 -m 4G -w -H \\ -s 0,hostbridge \\ -s 3,ahci-cd,/path/to/uefi-OS-install.iso \\ -s 4,ahci-hd,disk.img \\ -s 5,virtio-net,tap0 \\ -s 29,fbuf,tcp=0.0.0.0:5900,w=800,h=600,wait \\ -s 30,xhci,tablet \\ -s 31,lpc -l com1,stdio \\ -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \\ uefivm .Ed .Sh SEE ALSO .Xr bhyve 4 , .Xr nmdm 4 , .Xr vmm 4 , .Xr ethers 5 , .Xr bhyvectl 8 , .Xr bhyveload 8 .Sh HISTORY .Nm first appeared in .Fx 10.0 . .Sh AUTHORS .An Neel Natu Aq Mt neel@freebsd.org .An Peter Grehan Aq Mt grehan@freebsd.org Index: head/usr.sbin/bhyve/bhyverun.c =================================================================== --- head/usr.sbin/bhyve/bhyverun.c (revision 333139) +++ head/usr.sbin/bhyve/bhyverun.c (revision 333140) @@ -1,1101 +1,1128 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include "bhyverun.h" #include "acpi.h" #include "atkbdc.h" #include "inout.h" #include "dbgport.h" #include "fwctl.h" +#include "gdb.h" #include "ioapic.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "smbiostbl.h" #include "xmsr.h" #include "spinup_ap.h" #include "rtc.h" #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ #define MB (1024UL * 1024) #define GB (1024UL * MB) typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); char *vmname; int guest_ncpus; uint16_t cores, maxcpus, sockets, threads; char *guest_uuid_str; static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int virtio_msix = 1; static int x2apic_mode = 0; /* default is xAPIC */ static int strictio; static int strictmsr = 1; static int acpi; static char *progname; static const int BSP = 0; static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; uint64_t vmexit_inst_emul; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; } stats; struct mt_vmm_info { pthread_t mt_thr; struct vmctx *mt_ctx; int mt_vcpu; } mt_vmm_info[VM_MAXCPU]; static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; static void usage(int code) { fprintf(stderr, "Usage: %s [-abehuwxACHPSWY]\n" " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" " %*s [-g ] [-l ]\n" " %*s [-m mem] [-p vcpu:hostcpu] [-s ] [-U uuid] \n" " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" " -c: number of cpus and/or topology specification" " -C: include guest memory in core file\n" " -e: exit on unhandled I/O access\n" " -g: gdb port\n" " -h: help\n" " -H: vmexit from the guest on hlt\n" " -l: LPC device configuration\n" " -m: memory size in MB\n" " -p: pin 'vcpu' to 'hostcpu'\n" " -P: vmexit from the guest on pause\n" " -s: PCI slot config\n" " -S: guest memory cannot be swapped\n" " -u: RTC keeps UTC time\n" " -U: uuid\n" " -w: ignore unimplemented MSRs\n" " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", progname, (int)strlen(progname), "", (int)strlen(progname), "", (int)strlen(progname), ""); exit(code); } /* * XXX This parser is known to have the following issues: * 1. It accepts null key=value tokens ",,". * 2. It accepts whitespace after = and before value. * 3. Values out of range of INT are silently wrapped. * 4. It doesn't check non-final values. * 5. The apparently bogus limits of UINT16_MAX are for future expansion. * * The acceptance of a null specification ('-c ""') is by design to match the * manual page syntax specification, this results in a topology of 1 vCPU. */ static int topology_parse(const char *opt) { uint64_t ncpus; int c, chk, n, s, t, tmp; char *cp, *str; bool ns, scts; c = 1, n = 1, s = 1, t = 1; ns = false, scts = false; str = strdup(opt); while ((cp = strsep(&str, ",")) != NULL) { if (sscanf(cp, "%i%n", &tmp, &chk) == 1) { n = tmp; ns = true; } else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) { n = tmp; ns = true; } else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) { s = tmp; scts = true; } else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) { c = tmp; scts = true; } else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) { t = tmp; scts = true; #ifdef notyet /* Do not expose this until vmm.ko implements it */ } else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) { m = tmp; #endif /* Skip the empty argument case from -c "" */ } else if (cp[0] == '\0') continue; else return (-1); /* Any trailing garbage causes an error */ if (cp[chk] != '\0') return (-1); } /* * Range check 1 <= n <= UINT16_MAX all values */ if (n < 1 || s < 1 || c < 1 || t < 1 || n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX || t > UINT16_MAX) return (-1); /* If only the cpus was specified, use that as sockets */ if (!scts) s = n; /* * Compute sockets * cores * threads avoiding overflow * The range check above insures these are 16 bit values * If n was specified check it against computed ncpus */ ncpus = (uint64_t)s * c * t; if (ncpus > UINT16_MAX || (ns && n != ncpus)) return (-1); guest_ncpus = ncpus; sockets = s; cores = c; threads = t; return(0); } static int pincpu_parse(const char *opt) { int vcpu, pcpu; if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { fprintf(stderr, "invalid format: %s\n", opt); return (-1); } if (vcpu < 0 || vcpu >= VM_MAXCPU) { fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", vcpu, VM_MAXCPU - 1); return (-1); } if (pcpu < 0 || pcpu >= CPU_SETSIZE) { fprintf(stderr, "hostcpu '%d' outside valid range from " "0 to %d\n", pcpu, CPU_SETSIZE - 1); return (-1); } if (vcpumap[vcpu] == NULL) { if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { perror("malloc"); return (-1); } CPU_ZERO(vcpumap[vcpu]); } CPU_SET(pcpu, vcpumap[vcpu]); return (0); } void vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, int errcode) { struct vmctx *ctx; int error, restart_instruction; ctx = arg; restart_instruction = 1; error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, restart_instruction); assert(error == 0); } void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { return (vm_map_gpa(ctx, gaddr, len)); } int fbsdrun_vmexit_on_pause(void) { return (guest_vmexit_on_pause); } int fbsdrun_vmexit_on_hlt(void) { return (guest_vmexit_on_hlt); } int fbsdrun_virtio_msix(void) { return (virtio_msix); } static void * fbsdrun_start_thread(void *param) { char tname[MAXCOMLEN + 1]; struct mt_vmm_info *mtp; int vcpu; mtp = param; vcpu = mtp->mt_vcpu; snprintf(tname, sizeof(tname), "vcpu %d", vcpu); pthread_set_name_np(mtp->mt_thr, tname); + gdb_cpu_add(vcpu); + vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ exit(1); return (NULL); } void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) { int error; assert(fromcpu == BSP); /* * The 'newcpu' must be activated in the context of 'fromcpu'. If * vm_activate_cpu() is delayed until newcpu's pthread starts running * then vmm.ko is out-of-sync with bhyve and this can create a race * with vm_suspend(). */ error = vm_activate_cpu(ctx, newcpu); if (error != 0) err(EX_OSERR, "could not activate CPU %d", newcpu); CPU_SET_ATOMIC(newcpu, &cpumask); /* * Set up the vmexit struct to allow execution to start * at the given RIP */ vmexit[newcpu].rip = rip; vmexit[newcpu].inst_length = 0; mt_vmm_info[newcpu].mt_ctx = ctx; mt_vmm_info[newcpu].mt_vcpu = newcpu; error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, fbsdrun_start_thread, &mt_vmm_info[newcpu]); assert(error == 0); } static int fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) { if (!CPU_ISSET(vcpu, &cpumask)) { fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); exit(1); } CPU_CLR_ATOMIC(vcpu, &cpumask); return (CPU_EMPTY(&cpumask)); } static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) { #if BHYVE_DEBUG /* * put guest-driven debug here */ #endif return (VMEXIT_CONTINUE); } static int vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; int bytes, port, in, out; int vcpu; vcpu = *pvcpu; port = vme->u.inout.port; bytes = vme->u.inout.bytes; in = vme->u.inout.in; out = !in; /* Extra-special case of host notifications */ if (out && port == GUEST_NIO_PORT) { error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); return (error); } error = emulate_inout(ctx, vcpu, vme, strictio); if (error) { fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port, vmexit->rip); return (VMEXIT_ABORT); } else { return (VMEXIT_CONTINUE); } } static int vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { uint64_t val; uint32_t eax, edx; int error; val = 0; error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); if (error != 0) { fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } eax = val; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); assert(error == 0); edx = val >> 32; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); assert(error == 0); return (VMEXIT_CONTINUE); } static int vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); if (error != 0) { fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } return (VMEXIT_CONTINUE); } static int vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { (void)spinup_ap(ctx, *pvcpu, vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); return (VMEXIT_CONTINUE); } #define DEBUG_EPT_MISCONFIG #ifdef DEBUG_EPT_MISCONFIG #define EXIT_REASON_EPT_MISCONFIG 49 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 #define VMCS_IDENT(x) ((x) | 0x80000000) static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; static int ept_misconfig_ptenum; #endif static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tVMX\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); #ifdef DEBUG_EPT_MISCONFIG if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { vm_get_register(ctx, *pvcpu, VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), &ept_misconfig_gpa); vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, &ept_misconfig_ptenum); fprintf(stderr, "\tEPT misconfiguration:\n"); fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", ept_misconfig_ptenum, ept_misconfig_pte[0], ept_misconfig_pte[1], ept_misconfig_pte[2], ept_misconfig_pte[3]); } #endif /* DEBUG_EPT_MISCONFIG */ return (VMEXIT_ABORT); } static int vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tSVM\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); return (VMEXIT_ABORT); } static int vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_bogus++; return (VMEXIT_CONTINUE); } static int vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_reqidle++; return (VMEXIT_CONTINUE); } static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_hlt++; /* * Just continue execution with the next instruction. We use * the HLT VM exit as a way to be friendly with the host * scheduler. */ return (VMEXIT_CONTINUE); } static int vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_pause++; return (VMEXIT_CONTINUE); } static int vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_mtrap++; + gdb_cpu_mtrap(*pvcpu); + return (VMEXIT_CONTINUE); } static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { int err, i; struct vie *vie; stats.vmexit_inst_emul++; vie = &vmexit->u.inst_emul.vie; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie, &vmexit->u.inst_emul.paging); if (err) { if (err == ESRCH) { fprintf(stderr, "Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } fprintf(stderr, "Failed to emulate instruction ["); for (i = 0; i < vie->num_valid; i++) { fprintf(stderr, "0x%02x%s", vie->inst[i], i != (vie->num_valid - 1) ? " " : ""); } fprintf(stderr, "] at 0x%lx\n", vmexit->rip); return (VMEXIT_ABORT); } return (VMEXIT_CONTINUE); } static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; static int vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { enum vm_suspend_how how; how = vmexit->u.suspended.how; fbsdrun_deletecpu(ctx, *pvcpu); if (*pvcpu != BSP) { pthread_mutex_lock(&resetcpu_mtx); pthread_cond_signal(&resetcpu_cond); pthread_mutex_unlock(&resetcpu_mtx); pthread_exit(NULL); } pthread_mutex_lock(&resetcpu_mtx); while (!CPU_EMPTY(&cpumask)) { pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); } pthread_mutex_unlock(&resetcpu_mtx); switch (how) { case VM_SUSPEND_RESET: exit(0); case VM_SUSPEND_POWEROFF: exit(1); case VM_SUSPEND_HALT: exit(2); case VM_SUSPEND_TRIPLEFAULT: exit(3); default: fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); exit(100); } return (0); /* NOTREACHED */ } +static int +vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + gdb_cpu_suspend(*pvcpu); + return (VMEXIT_CONTINUE); +} + static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, [VM_EXITCODE_SUSPENDED] = vmexit_suspend, [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, + [VM_EXITCODE_DEBUG] = vmexit_debug, }; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) { int error, rc; enum vm_exitcode exitcode; cpuset_t active_cpus; if (vcpumap[vcpu] != NULL) { error = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t), vcpumap[vcpu]); assert(error == 0); } error = vm_active_cpus(ctx, &active_cpus); assert(CPU_ISSET(vcpu, &active_cpus)); error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); assert(error == 0); while (1) { error = vm_run(ctx, vcpu, &vmexit[vcpu]); if (error != 0) break; exitcode = vmexit[vcpu].exitcode; if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", exitcode); exit(1); } rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); switch (rc) { case VMEXIT_CONTINUE: break; case VMEXIT_ABORT: abort(); default: exit(1); } } fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); } static int num_vcpus_allowed(struct vmctx *ctx) { int tmp, error; error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); /* * The guest is allowed to spinup more than one processor only if the * UNRESTRICTED_GUEST capability is available. */ if (error == 0) return (VM_MAXCPU); else return (1); } void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) { int err, tmp; if (fbsdrun_vmexit_on_hlt()) { err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { fprintf(stderr, "VM exit on HLT not supported\n"); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_HLT] = vmexit_hlt; } if (fbsdrun_vmexit_on_pause()) { /* * pause exit support required for this mode */ err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { fprintf(stderr, "SMP mux requested, no pause support\n"); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_PAUSE] = vmexit_pause; } if (x2apic_mode) err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); else err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); if (err) { fprintf(stderr, "Unable to set x2apic state (%d)\n", err); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } static struct vmctx * do_open(const char *vmname) { struct vmctx *ctx; int error; bool reinit, romboot; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; const cap_ioctl_t *cmds; size_t ncmds; #endif reinit = romboot = false; if (lpc_bootrom()) romboot = true; error = vm_create(vmname); if (error) { if (errno == EEXIST) { if (romboot) { reinit = true; } else { /* * The virtual machine has been setup by the * userspace bootloader. */ } } else { perror("vm_create"); exit(1); } } else { if (!romboot) { /* * If the virtual machine was just created then a * bootrom must be configured to boot it. */ fprintf(stderr, "virtual machine cannot be booted\n"); exit(1); } } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(1); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); if (cap_rights_limit(vm_get_device_fd(ctx), &rights) == -1 && errno != ENOSYS) errx(EX_OSERR, "Unable to apply rights for sandbox"); vm_get_ioctls(&ncmds); cmds = vm_get_ioctls(NULL); if (cmds == NULL) errx(EX_OSERR, "out of memory"); if (cap_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1 && errno != ENOSYS) errx(EX_OSERR, "Unable to apply rights for sandbox"); free((cap_ioctl_t *)cmds); #endif if (reinit) { error = vm_reinit(ctx); if (error) { perror("vm_reinit"); exit(1); } } error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); if (error) errx(EX_OSERR, "vm_set_topology"); return (ctx); } int main(int argc, char *argv[]) { - int c, error, gdb_port, err, bvmcons; + int c, error, dbg_port, gdb_port, err, bvmcons; int max_vcpus, mptgen, memflags; int rtc_localtime; + bool gdb_stop; struct vmctx *ctx; uint64_t rip; size_t memsize; char *optstr; bvmcons = 0; progname = basename(argv[0]); + dbg_port = 0; gdb_port = 0; + gdb_stop = false; guest_ncpus = 1; sockets = cores = threads = 1; maxcpus = 0; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; memflags = 0; - optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; + optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:"; while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': x2apic_mode = 0; break; case 'A': acpi = 1; break; case 'b': bvmcons = 1; break; case 'p': if (pincpu_parse(optarg) != 0) { errx(EX_USAGE, "invalid vcpu pinning " "configuration '%s'", optarg); } break; case 'c': if (topology_parse(optarg) != 0) { errx(EX_USAGE, "invalid cpu topology " "'%s'", optarg); } break; case 'C': memflags |= VM_MEM_F_INCORE; break; case 'g': + dbg_port = atoi(optarg); + break; + case 'G': + if (optarg[0] == 'w') { + gdb_stop = true; + optarg++; + } gdb_port = atoi(optarg); break; case 'l': if (lpc_device_parse(optarg) != 0) { errx(EX_USAGE, "invalid lpc device " "configuration '%s'", optarg); } break; case 's': if (pci_parse_slot(optarg) != 0) exit(1); else break; case 'S': memflags |= VM_MEM_F_WIRED; break; case 'm': error = vm_parse_memsize(optarg, &memsize); if (error) errx(EX_USAGE, "invalid memsize '%s'", optarg); break; case 'H': guest_vmexit_on_hlt = 1; break; case 'I': /* * The "-I" option was used to add an ioapic to the * virtual machine. * * An ioapic is now provided unconditionally for each * virtual machine and this option is now deprecated. */ break; case 'P': guest_vmexit_on_pause = 1; break; case 'e': strictio = 1; break; case 'u': rtc_localtime = 0; break; case 'U': guest_uuid_str = optarg; break; case 'w': strictmsr = 0; break; case 'W': virtio_msix = 0; break; case 'x': x2apic_mode = 1; break; case 'Y': mptgen = 0; break; case 'h': usage(0); default: usage(1); } } argc -= optind; argv += optind; if (argc != 1) usage(1); vmname = argv[0]; ctx = do_open(vmname); max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", guest_ncpus, max_vcpus); exit(1); } fbsdrun_set_capabilities(ctx, BSP); vm_set_memflags(ctx, memflags); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(1); } error = init_msr(); if (error) { fprintf(stderr, "init_msr error %d", error); exit(1); } init_mem(); init_inout(); atkbdc_init(ctx); pci_irq_init(ctx); ioapic_init(ctx); rtc_init(ctx, rtc_localtime); sci_init(ctx); /* * Exit if a device emulation finds an error in its initilization */ if (init_pci(ctx) != 0) exit(1); + if (dbg_port != 0) + init_dbgport(dbg_port); + if (gdb_port != 0) - init_dbgport(gdb_port); + init_gdb(ctx, gdb_port, gdb_stop); if (bvmcons) init_bvmcons(); if (lpc_bootrom()) { if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { fprintf(stderr, "ROM boot failed: unrestricted guest " "capability not available\n"); exit(1); } error = vcpu_reset(ctx, BSP); assert(error == 0); } error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); /* * build the guest tables, MP etc. */ if (mptgen) { error = mptable_build(ctx, guest_ncpus); if (error) exit(1); } error = smbios_build(ctx); assert(error == 0); if (acpi) { error = acpi_build(ctx, guest_ncpus); assert(error == 0); } if (lpc_bootrom()) fwctl_init(); #ifndef WITHOUT_CAPSICUM caph_cache_catpages(); if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (cap_enter() == -1 && errno != ENOSYS) errx(EX_OSERR, "cap_enter() failed"); #endif /* * Change the proc title to include the VM name. */ setproctitle("%s", vmname); /* * Add CPU 0 */ fbsdrun_addcpu(ctx, BSP, BSP, rip); /* * Head off to the main event dispatch loop */ mevent_dispatch(); exit(1); } Index: head/usr.sbin/bhyve/gdb.c =================================================================== --- head/usr.sbin/bhyve/gdb.c (nonexistent) +++ head/usr.sbin/bhyve/gdb.c (revision 333140) @@ -0,0 +1,1313 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017-2018 John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#ifndef WITHOUT_CAPSICUM +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef WITHOUT_CAPSICUM +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bhyverun.h" +#include "mem.h" +#include "mevent.h" + +/* + * GDB_SIGNAL_* numbers are part of the GDB remote protocol. Most stops + * use SIGTRAP. + */ +#define GDB_SIGNAL_TRAP 5 + +static void gdb_resume_vcpus(void); +static void check_command(int fd); + +static struct mevent *read_event, *write_event; + +static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting; +static pthread_mutex_t gdb_lock; +static pthread_cond_t idle_vcpus; +static bool stop_pending, first_stop; +static int stepping_vcpu, stopped_vcpu; + +/* + * An I/O buffer contains 'capacity' bytes of room at 'data'. For a + * read buffer, 'start' is unused and 'len' contains the number of + * valid bytes in the buffer. For a write buffer, 'start' is set to + * the index of the next byte in 'data' to send, and 'len' contains + * the remaining number of valid bytes to send. + */ +struct io_buffer { + uint8_t *data; + size_t capacity; + size_t start; + size_t len; +}; + +static struct io_buffer cur_comm, cur_resp; +static uint8_t cur_csum; +static int cur_vcpu; +static struct vmctx *ctx; +static int cur_fd = -1; + +const int gdb_regset[] = { + VM_REG_GUEST_RAX, + VM_REG_GUEST_RBX, + VM_REG_GUEST_RCX, + VM_REG_GUEST_RDX, + VM_REG_GUEST_RSI, + VM_REG_GUEST_RDI, + VM_REG_GUEST_RBP, + VM_REG_GUEST_RSP, + VM_REG_GUEST_R8, + VM_REG_GUEST_R9, + VM_REG_GUEST_R10, + VM_REG_GUEST_R11, + VM_REG_GUEST_R12, + VM_REG_GUEST_R13, + VM_REG_GUEST_R14, + VM_REG_GUEST_R15, + VM_REG_GUEST_RIP, + VM_REG_GUEST_RFLAGS, + VM_REG_GUEST_CS, + VM_REG_GUEST_SS, + VM_REG_GUEST_DS, + VM_REG_GUEST_ES, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS +}; + +const int gdb_regsize[] = { + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 4, + 4, + 4, + 4, + 4, + 4, + 4 +}; + +#ifdef GDB_LOG +#include +#include + +static void __printflike(1, 2) +debug(const char *fmt, ...) +{ + static FILE *logfile; + va_list ap; + + if (logfile == NULL) { + logfile = fopen("/tmp/bhyve_gdb.log", "w"); + if (logfile == NULL) + return; +#ifndef WITHOUT_CAPSICUM + if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) { + fclose(logfile); + logfile = NULL; + return; + } +#endif + setlinebuf(logfile); + } + va_start(ap, fmt); + vfprintf(logfile, fmt, ap); + va_end(ap); +} +#else +#define debug(...) +#endif + +static int +guest_paging_info(int vcpu, struct vm_guest_paging *paging) +{ + uint64_t regs[4]; + const int regset[4] = { + VM_REG_GUEST_CR0, + VM_REG_GUEST_CR3, + VM_REG_GUEST_CR4, + VM_REG_GUEST_EFER + }; + + if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1) + return (-1); + + /* + * For the debugger, always pretend to be the kernel (CPL 0), + * and if long-mode is enabled, always parse addresses as if + * in 64-bit mode. + */ + paging->cr3 = regs[1]; + paging->cpl = 0; + if (regs[3] & EFER_LMA) + paging->cpu_mode = CPU_MODE_64BIT; + else if (regs[0] & CR0_PE) + paging->cpu_mode = CPU_MODE_PROTECTED; + else + paging->cpu_mode = CPU_MODE_REAL; + if (!(regs[0] & CR0_PG)) + paging->paging_mode = PAGING_MODE_FLAT; + else if (!(regs[2] & CR4_PAE)) + paging->paging_mode = PAGING_MODE_32; + else if (regs[3] & EFER_LME) + paging->paging_mode = PAGING_MODE_64; + else + paging->paging_mode = PAGING_MODE_PAE; + return (0); +} + +/* + * Map a guest virtual address to a physical address (for a given vcpu). + * If a guest virtual address is valid, return 1. If the address is + * not valid, return 0. If an error occurs obtaining the mapping, + * return -1. + */ +static int +guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr) +{ + struct vm_guest_paging paging; + int fault; + + if (guest_paging_info(vcpu, &paging) == -1) + return (-1); + + /* + * Always use PROT_READ. We really care if the VA is + * accessible, not if the current vCPU can write. + */ + if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr, + &fault) == -1) + return (-1); + if (fault) + return (0); + return (1); +} + +static void +io_buffer_reset(struct io_buffer *io) +{ + + io->start = 0; + io->len = 0; +} + +/* Available room for adding data. */ +static size_t +io_buffer_avail(struct io_buffer *io) +{ + + return (io->capacity - (io->start + io->len)); +} + +static uint8_t * +io_buffer_head(struct io_buffer *io) +{ + + return (io->data + io->start); +} + +static uint8_t * +io_buffer_tail(struct io_buffer *io) +{ + + return (io->data + io->start + io->len); +} + +static void +io_buffer_advance(struct io_buffer *io, size_t amount) +{ + + assert(amount <= io->len); + io->start += amount; + io->len -= amount; +} + +static void +io_buffer_consume(struct io_buffer *io, size_t amount) +{ + + io_buffer_advance(io, amount); + if (io->len == 0) { + io->start = 0; + return; + } + + /* + * XXX: Consider making this move optional and compacting on a + * future read() before realloc(). + */ + memmove(io->data, io_buffer_head(io), io->len); + io->start = 0; +} + +static void +io_buffer_grow(struct io_buffer *io, size_t newsize) +{ + uint8_t *new_data; + size_t avail, new_cap; + + avail = io_buffer_avail(io); + if (newsize <= avail) + return; + + new_cap = io->capacity + (newsize - avail); + new_data = realloc(io->data, new_cap); + if (new_data == NULL) + err(1, "Failed to grow GDB I/O buffer"); + io->data = new_data; + io->capacity = new_cap; +} + +static bool +response_pending(void) +{ + + if (cur_resp.start == 0 && cur_resp.len == 0) + return (false); + if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+') + return (false); + return (true); +} + +static void +close_connection(void) +{ + + /* + * XXX: This triggers a warning because mevent does the close + * before the EV_DELETE. + */ + pthread_mutex_lock(&gdb_lock); + mevent_delete(write_event); + mevent_delete_close(read_event); + write_event = NULL; + read_event = NULL; + io_buffer_reset(&cur_comm); + io_buffer_reset(&cur_resp); + cur_fd = -1; + + /* Resume any stopped vCPUs. */ + gdb_resume_vcpus(); + pthread_mutex_unlock(&gdb_lock); +} + +static uint8_t +hex_digit(uint8_t nibble) +{ + + if (nibble <= 9) + return (nibble + '0'); + else + return (nibble + 'a' - 10); +} + +static uint8_t +parse_digit(uint8_t v) +{ + + if (v >= '0' && v <= '9') + return (v - '0'); + if (v >= 'a' && v <= 'f') + return (v - 'a' + 10); + if (v >= 'A' && v <= 'F') + return (v - 'A' + 10); + return (0xF); +} + +/* Parses big-endian hexadecimal. */ +static uintmax_t +parse_integer(const uint8_t *p, size_t len) +{ + uintmax_t v; + + v = 0; + while (len > 0) { + v <<= 4; + v |= parse_digit(*p); + p++; + len--; + } + return (v); +} + +static uint8_t +parse_byte(const uint8_t *p) +{ + + return (parse_digit(p[0]) << 4 | parse_digit(p[1])); +} + +static void +send_pending_data(int fd) +{ + ssize_t nwritten; + + if (cur_resp.len == 0) { + mevent_disable(write_event); + return; + } + nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len); + if (nwritten == -1) { + warn("Write to GDB socket failed"); + close_connection(); + } else { + io_buffer_advance(&cur_resp, nwritten); + if (cur_resp.len == 0) + mevent_disable(write_event); + else + mevent_enable(write_event); + } +} + +/* Append a single character to the output buffer. */ +static void +send_char(uint8_t data) +{ + io_buffer_grow(&cur_resp, 1); + *io_buffer_tail(&cur_resp) = data; + cur_resp.len++; +} + +/* Append an array of bytes to the output buffer. */ +static void +send_data(const uint8_t *data, size_t len) +{ + + io_buffer_grow(&cur_resp, len); + memcpy(io_buffer_tail(&cur_resp), data, len); + cur_resp.len += len; +} + +static void +format_byte(uint8_t v, uint8_t *buf) +{ + + buf[0] = hex_digit(v >> 4); + buf[1] = hex_digit(v & 0xf); +} + +/* + * Append a single byte (formatted as two hex characters) to the + * output buffer. + */ +static void +send_byte(uint8_t v) +{ + uint8_t buf[2]; + + format_byte(v, buf); + send_data(buf, sizeof(buf)); +} + +static void +start_packet(void) +{ + + send_char('$'); + cur_csum = 0; +} + +static void +finish_packet(void) +{ + + send_char('#'); + send_byte(cur_csum); + debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp)); +} + +/* + * Append a single character (for the packet payload) and update the + * checksum. + */ +static void +append_char(uint8_t v) +{ + + send_char(v); + cur_csum += v; +} + +/* + * Append an array of bytes (for the packet payload) and update the + * checksum. + */ +static void +append_packet_data(const uint8_t *data, size_t len) +{ + + send_data(data, len); + while (len > 0) { + cur_csum += *data; + data++; + len--; + } +} + +static void +append_string(const char *str) +{ + + append_packet_data(str, strlen(str)); +} + +static void +append_byte(uint8_t v) +{ + uint8_t buf[2]; + + format_byte(v, buf); + append_packet_data(buf, sizeof(buf)); +} + +static void +append_unsigned_native(uintmax_t value, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + append_byte(value); + value >>= 8; + } +} + +static void +append_unsigned_be(uintmax_t value, size_t len) +{ + char buf[len * 2]; + size_t i; + + for (i = 0; i < len; i++) { + format_byte(value, buf + (len - i - 1) * 2); + value >>= 8; + } + append_packet_data(buf, sizeof(buf)); +} + +static void +append_integer(unsigned int value) +{ + + if (value == 0) + append_char('0'); + else + append_unsigned_be(value, fls(value) + 7 / 8); +} + +static void +append_asciihex(const char *str) +{ + + while (*str != '\0') { + append_byte(*str); + str++; + } +} + +static void +send_empty_response(void) +{ + + start_packet(); + finish_packet(); +} + +static void +send_error(int error) +{ + + start_packet(); + append_char('E'); + append_byte(error); + finish_packet(); +} + +static void +send_ok(void) +{ + + start_packet(); + append_string("OK"); + finish_packet(); +} + +static int +parse_threadid(const uint8_t *data, size_t len) +{ + + if (len == 1 && *data == '0') + return (0); + if (len == 2 && memcmp(data, "-1", 2) == 0) + return (-1); + if (len == 0) + return (-2); + return (parse_integer(data, len)); +} + +static void +report_stop(void) +{ + + start_packet(); + if (stopped_vcpu == -1) + append_char('S'); + else + append_char('T'); + append_byte(GDB_SIGNAL_TRAP); + if (stopped_vcpu != -1) { + append_string("thread:"); + append_integer(stopped_vcpu + 1); + append_char(';'); + } + stopped_vcpu = -1; + finish_packet(); +} + +static void +gdb_finish_suspend_vcpus(void) +{ + + if (first_stop) { + first_stop = false; + stopped_vcpu = -1; + } else if (response_pending()) + stop_pending = true; + else { + report_stop(); + send_pending_data(cur_fd); + } +} + +static void +_gdb_cpu_suspend(int vcpu, bool report_stop) +{ + + debug("$vCPU %d suspending\n", vcpu); + CPU_SET(vcpu, &vcpus_waiting); + if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) + gdb_finish_suspend_vcpus(); + while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu) + pthread_cond_wait(&idle_vcpus, &gdb_lock); + CPU_CLR(vcpu, &vcpus_waiting); + debug("$vCPU %d resuming\n", vcpu); +} + +void +gdb_cpu_add(int vcpu) +{ + + debug("$vCPU %d starting\n", vcpu); + pthread_mutex_lock(&gdb_lock); + CPU_SET(vcpu, &vcpus_active); + + /* + * If a vcpu is added while vcpus are stopped, suspend the new + * vcpu so that it will pop back out with a debug exit before + * executing the first instruction. + */ + if (!CPU_EMPTY(&vcpus_suspended)) { + CPU_SET(vcpu, &vcpus_suspended); + _gdb_cpu_suspend(vcpu, false); + } + pthread_mutex_unlock(&gdb_lock); +} + +void +gdb_cpu_suspend(int vcpu) +{ + + pthread_mutex_lock(&gdb_lock); + _gdb_cpu_suspend(vcpu, true); + pthread_mutex_unlock(&gdb_lock); +} + +void +gdb_cpu_mtrap(int vcpu) +{ + + debug("$vCPU %d MTRAP\n", vcpu); + pthread_mutex_lock(&gdb_lock); + if (vcpu == stepping_vcpu) { + stepping_vcpu = -1; + vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); + vm_suspend_cpu(ctx, vcpu); + assert(stopped_vcpu == -1); + stopped_vcpu = vcpu; + _gdb_cpu_suspend(vcpu, true); + } + pthread_mutex_unlock(&gdb_lock); +} + +static void +gdb_suspend_vcpus(void) +{ + + assert(pthread_mutex_isowned_np(&gdb_lock)); + debug("suspending all CPUs\n"); + vcpus_suspended = vcpus_active; + vm_suspend_cpu(ctx, -1); + if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) + gdb_finish_suspend_vcpus(); +} + +static bool +gdb_step_vcpu(int vcpu) +{ + int error, val; + + debug("$vCPU %d step\n", vcpu); + error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val); + if (error < 0) + return (false); + error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); + vm_resume_cpu(ctx, vcpu); + stepping_vcpu = vcpu; + pthread_cond_broadcast(&idle_vcpus); + return (true); +} + +static void +gdb_resume_vcpus(void) +{ + + assert(pthread_mutex_isowned_np(&gdb_lock)); + vm_resume_cpu(ctx, -1); + debug("resuming all CPUs\n"); + CPU_ZERO(&vcpus_suspended); + pthread_cond_broadcast(&idle_vcpus); +} + +static void +gdb_read_regs(void) +{ + uint64_t regvals[nitems(gdb_regset)]; + int i; + + if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset), + gdb_regset, regvals) == -1) { + send_error(errno); + return; + } + start_packet(); + for (i = 0; i < nitems(regvals); i++) + append_unsigned_native(regvals[i], gdb_regsize[i]); + finish_packet(); +} + +static void +gdb_read_mem(const uint8_t *data, size_t len) +{ + uint64_t gpa, gva, val; + uint8_t *cp; + size_t resid, todo, bytes; + bool started; + int error; + + cp = memchr(data, ',', len); + if (cp == NULL) { + send_error(EINVAL); + return; + } + gva = parse_integer(data + 1, cp - (data + 1)); + resid = parse_integer(cp + 1, len - (cp + 1 - data)); + started = false; + + while (resid > 0) { + error = guest_vaddr2paddr(cur_vcpu, gva, &gpa); + if (error == -1) { + if (started) + finish_packet(); + else + send_error(errno); + return; + } + if (error == 0) { + if (started) + finish_packet(); + else + send_error(EFAULT); + return; + } + + /* Read bytes from current page. */ + todo = getpagesize() - gpa % getpagesize(); + if (todo > resid) + todo = resid; + + cp = paddr_guest2host(ctx, gpa, todo); + if (cp != NULL) { + /* + * If this page is guest RAM, read it a byte + * at a time. + */ + if (!started) { + start_packet(); + started = true; + } + while (todo > 0) { + append_byte(*cp); + cp++; + gpa++; + gva++; + resid--; + todo--; + } + } else { + /* + * If this page isn't guest RAM, try to handle + * it via MMIO. For MMIO requests, use + * aligned reads of words when possible. + */ + while (todo > 0) { + if (gpa & 1 || todo == 1) + bytes = 1; + else if (gpa & 2 || todo == 2) + bytes = 2; + else + bytes = 4; + error = read_mem(ctx, cur_vcpu, gpa, &val, + bytes); + if (error == 0) { + if (!started) { + start_packet(); + started = true; + } + gpa += bytes; + gva += bytes; + resid -= bytes; + todo -= bytes; + while (bytes > 0) { + append_byte(val); + val >>= 8; + bytes--; + } + } else { + if (started) + finish_packet(); + else + send_error(EFAULT); + return; + } + } + } + assert(resid == 0 || gpa % getpagesize() == 0); + } + if (!started) + start_packet(); + finish_packet(); +} + +static bool +command_equals(const uint8_t *data, size_t len, const char *cmd) +{ + + if (strlen(cmd) > len) + return (false); + return (memcmp(data, cmd, strlen(cmd)) == 0); +} + +static void +gdb_query(const uint8_t *data, size_t len) +{ + + /* + * TODO: + * - qSearch + * - qSupported + */ + if (command_equals(data, len, "qAttached")) { + start_packet(); + append_char('1'); + finish_packet(); + } else if (command_equals(data, len, "qC")) { + start_packet(); + append_string("QC"); + append_integer(cur_vcpu + 1); + finish_packet(); + } else if (command_equals(data, len, "qfThreadInfo")) { + cpuset_t mask; + bool first; + int vcpu; + + if (CPU_EMPTY(&vcpus_active)) { + send_error(EINVAL); + return; + } + mask = vcpus_active; + start_packet(); + append_char('m'); + first = true; + while (!CPU_EMPTY(&mask)) { + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + if (first) + first = false; + else + append_char(','); + append_integer(vcpu + 1); + } + finish_packet(); + } else if (command_equals(data, len, "qsThreadInfo")) { + start_packet(); + append_char('l'); + finish_packet(); + } else if (command_equals(data, len, "qThreadExtraInfo")) { + char buf[16]; + int tid; + + data += strlen("qThreadExtraInfo"); + len -= strlen("qThreadExtraInfo"); + if (*data != ',') { + send_error(EINVAL); + return; + } + tid = parse_threadid(data + 1, len - 1); + if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) { + send_error(EINVAL); + return; + } + + snprintf(buf, sizeof(buf), "vCPU %d", tid - 1); + start_packet(); + append_asciihex(buf); + finish_packet(); + } else + send_empty_response(); +} + +static void +handle_command(const uint8_t *data, size_t len) +{ + + /* Reject packets with a sequence-id. */ + if (len >= 3 && data[0] >= '0' && data[0] <= '9' && + data[0] >= '0' && data[0] <= '9' && data[2] == ':') { + send_empty_response(); + return; + } + + switch (*data) { + case 'c': + if (len != 1) { + send_error(EINVAL); + break; + } + + /* Don't send a reply until a stop occurs. */ + gdb_resume_vcpus(); + break; + case 'D': + send_ok(); + + /* TODO: Resume any stopped CPUs. */ + break; + case 'g': { + gdb_read_regs(); + break; + } + case 'H': { + int tid; + + if (data[1] != 'g' && data[1] != 'c') { + send_error(EINVAL); + break; + } + tid = parse_threadid(data + 2, len - 2); + if (tid == -2) { + send_error(EINVAL); + break; + } + + if (CPU_EMPTY(&vcpus_active)) { + send_error(EINVAL); + break; + } + if (tid == -1 || tid == 0) + cur_vcpu = CPU_FFS(&vcpus_active) - 1; + else if (CPU_ISSET(tid - 1, &vcpus_active)) + cur_vcpu = tid - 1; + else { + send_error(EINVAL); + break; + } + send_ok(); + break; + } + case 'm': + gdb_read_mem(data, len); + break; + case 'T': { + int tid; + + tid = parse_threadid(data + 1, len - 1); + if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) { + send_error(EINVAL); + return; + } + send_ok(); + break; + } + case 'q': + gdb_query(data, len); + break; + case 's': + if (len != 1) { + send_error(EINVAL); + break; + } + + /* Don't send a reply until a stop occurs. */ + if (!gdb_step_vcpu(cur_vcpu)) { + send_error(EOPNOTSUPP); + break; + } + break; + case '?': + /* XXX: Only if stopped? */ + /* For now, just report that we are always stopped. */ + start_packet(); + append_char('S'); + append_byte(GDB_SIGNAL_TRAP); + finish_packet(); + break; + case 'G': /* TODO */ + case 'M': /* TODO */ + case 'v': + /* Handle 'vCont' */ + /* 'vCtrlC' */ + case 'p': /* TODO */ + case 'P': /* TODO */ + case 'Q': /* TODO */ + case 't': /* TODO */ + case 'X': /* TODO */ + case 'z': /* TODO */ + case 'Z': /* TODO */ + default: + send_empty_response(); + } +} + +/* Check for a valid packet in the command buffer. */ +static void +check_command(int fd) +{ + uint8_t *head, *hash, *p, sum; + size_t avail, plen; + + for (;;) { + avail = cur_comm.len; + if (avail == 0) + return; + head = io_buffer_head(&cur_comm); + switch (*head) { + case 0x03: + debug("<- Ctrl-C\n"); + io_buffer_consume(&cur_comm, 1); + + gdb_suspend_vcpus(); + break; + case '+': + /* ACK of previous response. */ + debug("<- +\n"); + if (response_pending()) + io_buffer_reset(&cur_resp); + io_buffer_consume(&cur_comm, 1); + if (stop_pending) { + stop_pending = false; + report_stop(); + send_pending_data(fd); + } + break; + case '-': + /* NACK of previous response. */ + debug("<- -\n"); + if (response_pending()) { + cur_resp.len += cur_resp.start; + cur_resp.start = 0; + if (cur_resp.data[0] == '+') + io_buffer_advance(&cur_resp, 1); + debug("-> %.*s\n", (int)cur_resp.len, + io_buffer_head(&cur_resp)); + } + io_buffer_consume(&cur_comm, 1); + send_pending_data(fd); + break; + case '$': + /* Packet. */ + + if (response_pending()) { + warnx("New GDB command while response in " + "progress"); + io_buffer_reset(&cur_resp); + } + + /* Is packet complete? */ + hash = memchr(head, '#', avail); + if (hash == NULL) + return; + plen = (hash - head + 1) + 2; + if (avail < plen) + return; + debug("<- %.*s\n", (int)plen, head); + + /* Verify checksum. */ + for (sum = 0, p = head + 1; p < hash; p++) + sum += *p; + if (sum != parse_byte(hash + 1)) { + io_buffer_consume(&cur_comm, plen); + debug("-> -\n"); + send_char('-'); + send_pending_data(fd); + break; + } + send_char('+'); + + handle_command(head + 1, hash - (head + 1)); + io_buffer_consume(&cur_comm, plen); + if (!response_pending()) + debug("-> +\n"); + send_pending_data(fd); + break; + default: + /* XXX: Possibly drop connection instead. */ + debug("-> %02x\n", *head); + io_buffer_consume(&cur_comm, 1); + break; + } + } +} + +static void +gdb_readable(int fd, enum ev_type event, void *arg) +{ + ssize_t nread; + int pending; + + if (ioctl(fd, FIONREAD, &pending) == -1) { + warn("FIONREAD on GDB socket"); + return; + } + + /* + * 'pending' might be zero due to EOF. We need to call read + * with a non-zero length to detect EOF. + */ + if (pending == 0) + pending = 1; + + /* Ensure there is room in the command buffer. */ + io_buffer_grow(&cur_comm, pending); + assert(io_buffer_avail(&cur_comm) >= pending); + + nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm)); + if (nread == 0) { + close_connection(); + } else if (nread == -1) { + if (errno == EAGAIN) + return; + + warn("Read from GDB socket"); + close_connection(); + } else { + cur_comm.len += nread; + pthread_mutex_lock(&gdb_lock); + check_command(fd); + pthread_mutex_unlock(&gdb_lock); + } +} + +static void +gdb_writable(int fd, enum ev_type event, void *arg) +{ + + send_pending_data(fd); +} + +static void +new_connection(int fd, enum ev_type event, void *arg) +{ + int optval, s; + + s = accept4(fd, NULL, NULL, SOCK_NONBLOCK); + if (s == -1) { + if (arg != NULL) + err(1, "Failed accepting initial GDB connection"); + + /* Silently ignore errors post-startup. */ + return; + } + + optval = 1; + if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) == + -1) { + warn("Failed to disable SIGPIPE for GDB connection"); + close(s); + return; + } + + pthread_mutex_lock(&gdb_lock); + if (cur_fd != -1) { + close(s); + warnx("Ignoring additional GDB connection."); + } + + read_event = mevent_add(s, EVF_READ, gdb_readable, NULL); + if (read_event == NULL) { + if (arg != NULL) + err(1, "Failed to setup initial GDB connection"); + pthread_mutex_unlock(&gdb_lock); + return; + } + write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL); + if (write_event == NULL) { + if (arg != NULL) + err(1, "Failed to setup initial GDB connection"); + mevent_delete_close(read_event); + read_event = NULL; + } + + cur_fd = s; + cur_vcpu = 0; + stepping_vcpu = -1; + stopped_vcpu = -1; + stop_pending = false; + + /* Break on attach. */ + first_stop = true; + gdb_suspend_vcpus(); + pthread_mutex_unlock(&gdb_lock); +} + +#ifndef WITHOUT_CAPSICUM +void +limit_gdb_socket(int s) +{ + cap_rights_t rights; + unsigned long ioctls[] = { FIONREAD }; + + cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE, + CAP_SETSOCKOPT, CAP_IOCTL); + if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS) + errx(EX_OSERR, "Unable to apply rights for sandbox"); + if (cap_ioctls_limit(s, ioctls, nitems(ioctls)) == -1 && errno != ENOSYS) + errx(EX_OSERR, "Unable to apply rights for sandbox"); +} +#endif + +void +init_gdb(struct vmctx *_ctx, int sport, bool wait) +{ + struct sockaddr_in sin; + int error, flags, s; + + debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not "); + + error = pthread_mutex_init(&gdb_lock, NULL); + if (error != 0) + errc(1, error, "gdb mutex init"); + error = pthread_cond_init(&idle_vcpus, NULL); + if (error != 0) + errc(1, error, "gdb cv init"); + + ctx = _ctx; + s = socket(PF_INET, SOCK_STREAM, 0); + if (s < 0) + err(1, "gdb socket create"); + + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(sport); + + if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) + err(1, "gdb socket bind"); + + if (listen(s, 1) < 0) + err(1, "gdb socket listen"); + + if (wait) { + /* + * Set vcpu 0 in vcpus_suspended. This will trigger the + * logic in gdb_cpu_add() to suspend the first vcpu before + * it starts execution. The vcpu will remain suspended + * until a debugger connects. + */ + stepping_vcpu = -1; + stopped_vcpu = -1; + CPU_SET(0, &vcpus_suspended); + } + + flags = fcntl(s, F_GETFL); + if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) + err(1, "Failed to mark gdb socket non-blocking"); + +#ifndef WITHOUT_CAPSICUM + limit_gdb_socket(s); +#endif + mevent_add(s, EVF_READ, new_connection, NULL); +} Property changes on: head/usr.sbin/bhyve/gdb.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/usr.sbin/bhyve/gdb.h =================================================================== --- head/usr.sbin/bhyve/gdb.h (nonexistent) +++ head/usr.sbin/bhyve/gdb.h (revision 333140) @@ -0,0 +1,39 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017 John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __GDB_H__ +#define __GDB_H__ + +void gdb_cpu_add(int vcpu); +void gdb_cpu_mtrap(int vcpu); +void gdb_cpu_suspend(int vcpu); +void init_gdb(struct vmctx *ctx, int sport, bool wait); + +#endif /* !__GDB_H__ */ Property changes on: head/usr.sbin/bhyve/gdb.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/usr.sbin/bhyve/mem.c =================================================================== --- head/usr.sbin/bhyve/mem.c (revision 333139) +++ head/usr.sbin/bhyve/mem.c (revision 333140) @@ -1,293 +1,348 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Memory ranges are represented with an RB tree. On insertion, the range * is checked for overlaps. On lookup, the key has the same base and limit * so it can be searched within the range. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "mem.h" struct mmio_rb_range { RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ struct mem_range mr_param; uint64_t mr_base; uint64_t mr_end; }; struct mmio_rb_tree; RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; /* * Per-vCPU cache. Since most accesses from a vCPU will be to * consecutive addresses in a range, it makes sense to cache the * result of a lookup. */ static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; static pthread_rwlock_t mmio_rwlock; static int mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) { if (a->mr_end < b->mr_base) return (-1); else if (a->mr_base > b->mr_end) return (1); return (0); } static int mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr, struct mmio_rb_range **entry) { struct mmio_rb_range find, *res; find.mr_base = find.mr_end = addr; res = RB_FIND(mmio_rb_tree, rbt, &find); if (res != NULL) { *entry = res; return (0); } return (ENOENT); } static int mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new) { struct mmio_rb_range *overlap; overlap = RB_INSERT(mmio_rb_tree, rbt, new); if (overlap != NULL) { #ifdef RB_DEBUG printf("overlap detected: new %lx:%lx, tree %lx:%lx\n", new->mr_base, new->mr_end, overlap->mr_base, overlap->mr_end); #endif return (EEXIST); } return (0); } #if 0 static void mmio_rb_dump(struct mmio_rb_tree *rbt) { struct mmio_rb_range *np; pthread_rwlock_rdlock(&mmio_rwlock); RB_FOREACH(np, mmio_rb_tree, rbt) { printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, np->mr_param.name); } pthread_rwlock_unlock(&mmio_rwlock); } #endif RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); +typedef int (mem_cb_t)(struct vmctx *ctx, int vcpu, uint64_t gpa, + struct mem_range *mr, void *arg); + static int mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) { int error; struct mem_range *mr = arg; error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size, rval, mr->arg1, mr->arg2); return (error); } static int mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) { int error; struct mem_range *mr = arg; error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size, &wval, mr->arg1, mr->arg2); return (error); } -int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, - struct vm_guest_paging *paging) - +static int +access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb, + void *arg) { struct mmio_rb_range *entry; int err, immutable; pthread_rwlock_rdlock(&mmio_rwlock); /* * First check the per-vCPU cache */ if (mmio_hint[vcpu] && paddr >= mmio_hint[vcpu]->mr_base && paddr <= mmio_hint[vcpu]->mr_end) { entry = mmio_hint[vcpu]; } else entry = NULL; if (entry == NULL) { if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { /* Update the per-vCPU cache */ mmio_hint[vcpu] = entry; } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { pthread_rwlock_unlock(&mmio_rwlock); return (ESRCH); } } assert(entry != NULL); /* * An 'immutable' memory range is guaranteed to be never removed * so there is no need to hold 'mmio_rwlock' while calling the * handler. * * XXX writes to the PCIR_COMMAND register can cause register_mem() * to be called. If the guest is using PCI extended config space * to modify the PCIR_COMMAND register then register_mem() can * deadlock on 'mmio_rwlock'. However by registering the extended * config space window as 'immutable' the deadlock can be avoided. */ immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE); if (immutable) pthread_rwlock_unlock(&mmio_rwlock); - err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging, - mem_read, mem_write, &entry->mr_param); + err = cb(ctx, vcpu, paddr, &entry->mr_param, arg); if (!immutable) pthread_rwlock_unlock(&mmio_rwlock); return (err); +} + +struct emulate_mem_args { + struct vie *vie; + struct vm_guest_paging *paging; +}; + +static int +emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, + void *arg) +{ + struct emulate_mem_args *ema; + + ema = arg; + return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging, + mem_read, mem_write, mr)); +} + +int +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging) + +{ + struct emulate_mem_args ema; + + ema.vie = vie; + ema.paging = paging; + return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema)); +} + +struct read_mem_args { + uint64_t *rval; + int size; +}; + +static int +read_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, + void *arg) +{ + struct read_mem_args *rma; + + rma = arg; + return (mr->handler(ctx, vcpu, MEM_F_READ, paddr, rma->size, + rma->rval, mr->arg1, mr->arg2)); +} + +int +read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size) +{ + struct read_mem_args rma; + + rma.rval = rval; + rma.size = size; + return (access_memory(ctx, vcpu, gpa, read_mem_cb, &rma)); } static int register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) { struct mmio_rb_range *entry, *mrp; int err; err = 0; mrp = malloc(sizeof(struct mmio_rb_range)); if (mrp != NULL) { mrp->mr_param = *memp; mrp->mr_base = memp->base; mrp->mr_end = memp->base + memp->size - 1; pthread_rwlock_wrlock(&mmio_rwlock); if (mmio_rb_lookup(rbt, memp->base, &entry) != 0) err = mmio_rb_add(rbt, mrp); pthread_rwlock_unlock(&mmio_rwlock); if (err) free(mrp); } else err = ENOMEM; return (err); } int register_mem(struct mem_range *memp) { return (register_mem_int(&mmio_rb_root, memp)); } int register_mem_fallback(struct mem_range *memp) { return (register_mem_int(&mmio_rb_fallback, memp)); } int unregister_mem(struct mem_range *memp) { struct mem_range *mr; struct mmio_rb_range *entry = NULL; int err, i; pthread_rwlock_wrlock(&mmio_rwlock); err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); if (err == 0) { mr = &entry->mr_param; assert(mr->name == memp->name); assert(mr->base == memp->base && mr->size == memp->size); assert((mr->flags & MEM_F_IMMUTABLE) == 0); RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); /* flush Per-vCPU cache */ for (i=0; i < VM_MAXCPU; i++) { if (mmio_hint[i] == entry) mmio_hint[i] = NULL; } } pthread_rwlock_unlock(&mmio_rwlock); if (entry) free(entry); return (err); } void init_mem(void) { RB_INIT(&mmio_rb_root); RB_INIT(&mmio_rb_fallback); pthread_rwlock_init(&mmio_rwlock, NULL); } Index: head/usr.sbin/bhyve/mem.h =================================================================== --- head/usr.sbin/bhyve/mem.h (revision 333139) +++ head/usr.sbin/bhyve/mem.h (revision 333140) @@ -1,63 +1,65 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MEM_H_ #define _MEM_H_ #include struct vmctx; typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2); struct mem_range { const char *name; int flags; mem_func_t handler; void *arg1; long arg2; uint64_t base; uint64_t size; }; #define MEM_F_READ 0x1 #define MEM_F_WRITE 0x2 #define MEM_F_RW 0x3 #define MEM_F_IMMUTABLE 0x4 /* mem_range cannot be unregistered */ void init_mem(void); int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, struct vm_guest_paging *paging); +int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, + int size); int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp); int unregister_mem(struct mem_range *memp); #endif /* _MEM_H_ */