diff --git a/etc/mtree/BSD.var.dist b/etc/mtree/BSD.var.dist index 6483df4a297d..f0d6f8694449 100644 --- a/etc/mtree/BSD.var.dist +++ b/etc/mtree/BSD.var.dist @@ -1,112 +1,114 @@ # $FreeBSD$ # # Please see the file src/etc/mtree/README before making changes to this file. # /set type=dir uname=root gname=wheel mode=0755 tags=package=runtime . account mode=0750 .. at /set uname=daemon jobs tags=package=at .. spool tags=package=at .. /set uname=root .. /set mode=0750 /set gname=audit audit dist uname=auditdistd gname=audit mode=0770 .. remote uname=auditdistd gname=wheel mode=0700 .. .. authpf uname=root gname=authpf mode=0770 .. /set gname=wheel backups .. cache mode=0755 .. crash .. cron tabs mode=0700 .. .. /set mode=0755 db entropy uname=operator gname=operator mode=0700 .. freebsd-update mode=0700 .. hyperv mode=0700 .. ipf mode=0700 .. ntp uname=ntpd gname=ntpd .. pkg .. ports .. portsnap .. zfsd cases .. .. .. empty mode=0555 flags=schg .. games gname=games mode=0775 .. heimdal mode=0700 .. log .. mail gname=mail mode=0775 .. msgs uname=daemon .. preserve .. run + bhyve + .. dhclient .. ppp gname=network mode=0770 .. wpa_supplicant .. .. rwho gname=daemon mode=0775 .. spool dma uname=root gname=mail mode=0770 tags=package=dma .. lock uname=uucp gname=dialer mode=0775 .. /set gname=daemon lpd .. opielocks mode=0700 .. output lpd .. .. /set gname=wheel .. tmp mode=01777 vi.recover mode=01777 .. .. unbound uname=unbound gname=unbound mode=0755 tags=package=unbound conf.d uname=unbound gname=unbound mode=0755 tags=package=unbound .. .. yp .. .. diff --git a/share/man/man7/hier.7 b/share/man/man7/hier.7 index 845c4a8a8f7c..d134e84f4c48 100644 --- a/share/man/man7/hier.7 +++ b/share/man/man7/hier.7 @@ -1,907 +1,909 @@ .\" Copyright (c) 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)hier.7 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd January 9, 2021 +.Dd February 22, 2021 .Dt HIER 7 .Os .Sh NAME .Nm hier .Nd layout of file systems .Sh DESCRIPTION A sketch of the file system hierarchy. .Bl -tag -width "/libexec/" .It Pa / root directory of the file system .It Pa /bin/ user utilities fundamental to both single-user and multi-user environments .It Pa /boot/ programs and configuration files used during operating system bootstrap .Pp .Bl -tag -width "defaults/" -compact .It Pa defaults/ default bootstrapping configuration files; see .Xr loader.conf 5 .It Pa dtb/ Compiled flattened device tree (FDT) files; see .Xr fdt 4 and .Xr dtc 1 .It Pa efi/ Mount point for EFI System Partition (ESP) on UEFI systems .It Pa firmware/ loadable kernel modules containing binary firmware for hardware that needs firmware downloaded to it to function .It Pa kernel/ pure kernel executable (the operating system loaded into memory at boot time) and kernel modules .It Pa modules/ third-party loadable kernel modules, such as the ones installed from .Xr ports 7 .It Pa overlays/ Compiled flattened device tree (FDT) overlays; see .Xr fdt 4 and .Xr dtc 1 .It Pa zfs/ .Xr zfs 8 zpool cache files .El .It Pa /cdrom/ default mount point for CD-ROM drives .It Pa /compat/ normally a link to .Pa /usr/compat . If not, then the .Pa /usr/compat comments apply .It Pa /dev/ device special files managed by .Xr devfs 5 .Pp .Bl -tag -width "defaults/" -compact .It Pa fd/ file descriptor files; see .Xr \&fd 4 .El .It Pa /etc/ system configuration files and scripts .Pp .Bl -tag -width "defaults/" -compact .It Pa defaults/ default system configuration files; see .Xr rc 8 .It Pa bluetooth/ bluetooth configuration files .It Pa localtime local timezone information; see .Xr ctime 3 .It Pa mail/ Sendmail control files .It Pa mtree/ mtree configuration files; see .Xr mtree 8 .It Pa pam.d/ configuration files for the Pluggable Authentication Modules (PAM) library .It Pa periodic/ scripts that are run daily, weekly, and monthly, via .Xr cron 8 ; see .Xr periodic 8 .It Pa rc.d/ system and daemon startup/control scripts; see .Xr rc 8 .It Pa security/ OpenBSM audit configuration files; see .Xr audit 8 .It Pa ppp/ ppp configuration files; see .Xr ppp 8 .It Pa ssh/ OpenSSH configuration files; see .Xr ssh 1 .It Pa ssl/ OpenSSL configuration files .El .It Pa /lib/ critical system libraries needed for binaries in .Pa /bin and .Pa /sbin .Pp .Bl -tag -width "defaults/" -compact .It Pa casper/ service-specific .Xr libcasper 3 Capsicum support libraries .It Pa geom/ class-specific libraries for the .Xr geom 8 utility .It Pa nvmecontrol/ vendor-specific libraries to extend the .Xr nvmecontrol 8 utility .El .It Pa /libexec/ critical system utilities needed for binaries in .Pa /bin and .Pa /sbin .It Pa /media/ contains subdirectories to be used as mount points for removable media such as CDs, USB drives, and floppy disks .It Pa /mnt/ empty directory commonly used by system administrators as a temporary mount point .It Pa /net/ automounted NFS shares; see .Xr auto_master 5 .It Pa /proc/ process file system; see .Xr procfs 5 .It Pa /rescue/ statically linked programs for emergency recovery; see .Xr rescue 8 .It Pa /root/ root's HOME directory .It Pa /sbin/ system programs and administration utilities fundamental to both single-user and multi-user environments .It Pa /tmp/ temporary files that are not guaranteed to persist across system reboots .It Pa /usr/ contains the majority of user utilities and applications .Pp .Bl -tag -width "defaults/" -compact .It Pa bin/ common utilities, programming tools, and applications .It Pa compat/ files needed to support binary compatibility with other operating systems; see .Xr linux 4 .It Pa include/ standard C include files .Pp .Bl -tag -width "kerberos5/" -compact .It Pa arpa/ C include files for Internet service protocols .It Pa bsnmp/ C include files for the SNMP daemon .It Pa c++/ C++ include files .It Pa cam/ C include files for the Common Access Methods Layer .Bl -tag -width "kerberos5/" -compact .It Pa scsi/ SCSI device on top of CAM .El .It Pa dev/ C include files for programming various .Fx devices .Bl -tag -width "kerberos5/" -compact .It Pa ic/ various header files describing driver- and bus-independent hardware circuits .It Pa ofw/ Open Firmware support .It Pa pbio/ 8255 PPI cards; see .Xr pbio 4 .It Pa ppbus/ parallel port bus; see .Xr ppbus 4 .It Pa usb/ USB subsystem .El .It Pa fs/ .Bl -tag -width "kerberos5/" -compact .It Pa fdescfs/ per-process file descriptors file system .It Pa msdosfs/ MS-DOS file system .It Pa nfs/ C include files for NFS (Network File System) version 2, 3 and 4 .It Pa nullfs/ loopback file system .It Pa procfs/ process file system .It Pa smbfs/ SMB/CIFS file system .It Pa udf/ UDF file system .It Pa unionfs union file system .El .It Pa geom/ GEOM framework .Bl -tag -width "kerberos5/" -compact .It Pa concat/ CONCAT GEOM class .It Pa gate/ GATE GEOM class .It Pa mirror/ MIRROR GEOM class .It Pa nop/ NOP GEOM class .It Pa raid3/ RAID3 GEOM class .It Pa stripe/ STRIPE GEOM class .El .It Pa libmilter/ C include files for libmilter, the .Xr sendmail 8 mail filter API .It Pa machine/ machine-specific C include files .It Pa net/ miscellaneous network C include files .Bl -tag -width Fl -compact .It Pa altq/ C include files for alternate queueing .El .It Pa net80211/ C include files for 802.11 wireless networking; see .Xr net80211 4 .It Pa netinet/ C include files for Internet standard protocols; see .Xr inet 4 .It Pa netinet6/ C include files for Internet protocol version 6; see .Xr inet6 4 .It Pa netipsec/ kernel key-management service; see .Xr ipsec 4 .It Pa netsmb/ SMB/CIFS requester .It Pa nfs/ C include files for NFS (Network File System) version 2 and 3 (legacy) .It Pa openssl/ OpenSSL (Cryptography/SSL toolkit) headers .It Pa protocols/ C include files for Berkeley service protocols .It Pa rpc/ remote procedure calls; see .Xr rpc 3 .It Pa rpcsvc/ definition of RPC service structures; see .Xr rpc 3 .It Pa security/ PAM; see .Xr pam 8 .It Pa sys/ system C include files (kernel data structures) .\" .It Pa tcl/ .\" Tcl language; .\" see .\" .Xr Tcl n .\" .Bl -tag -width "kerberos5/" -compact .\" .It Pa generic/ .\" ??? .\" .It Pa unix/ .\" ??? .\" .El .It Pa ufs/ C include files for UFS (The U-word File System) .Bl -tag -width "kerberos5/" -compact .It Pa ffs/ Fast file system .It Pa ufs/ UFS file system .El .It Pa vm/ virtual memory; see .Xr vmstat 8 .El .Pp .It Pa lib/ shared and archive .Xr ar 1 Ns -type libraries .Pp .Bl -tag -width Fl -compact .It Pa aout/ a.out archive libraries .It Pa compat/ shared libraries for compatibility .Bl -tag -width Fl -compact .It Pa aout/ a.out backward compatibility libraries .El .It Pa debug/ standalone debug data for the kernel and base system libraries and binaries .It Pa dtrace/ DTrace library scripts .It Pa engines/ OpenSSL (Cryptography/SSL toolkit) dynamically loadable engines .El .Pp .It Pa libdata/ miscellaneous utility data files .Pp .Bl -tag -width Fl -compact .It Pa gcc/ .Xr gcc 1 configuration data .It Pa ldscripts/ linker scripts; see .Xr ld 1 .El .Pp .It Pa libexec/ system daemons & system utilities (executed by other programs) .Pp .Bl -tag -width Fl -compact .It Pa aout/ utilities to manipulate a.out executables .It Pa elf/ utilities to manipulate ELF executables .It Pa lpr/ utilities and filters for LP print system; see .Xr lpr 1 .It Pa sendmail/ the .Xr sendmail 8 binary; see .Xr mailwrapper 8 .It Pa sm.bin/ restricted shell for .Xr sendmail 8 ; see .Xr smrsh 8 .El .Pp .It Pa local/ local executables, libraries, etc. Also used as the default destination for the .Xr ports 7 framework. Within .Pa local/ , the general layout sketched out by .Nm for .Pa /usr should be used. Exceptions are the ports documentation .Po in .Pa share/doc// Ns Pc , and .Pa /usr/local/etc .Po mimics .Pa /etc Ns Pc . .It Pa obj/ architecture-specific target tree produced by building the .Pa /usr/src tree .It Pa ports/ .Xr ports 7 , the .Fx ports collection .It Pa sbin/ system daemons & system utilities (executed by users) .It Pa share/ architecture-independent files .Pp .Bl -tag -width Fl -compact .It Pa calendar/ a variety of pre-fab calendar files; see .Xr calendar 1 .It Pa dict/ word lists; see .Xr look 1 .Bl -tag -width Fl -compact .It Pa freebsd .Fx Ns -specific terms, proper names, and jargon .It Pa web2 words from Webster's 2nd International .El .It Pa doc/ miscellaneous documentation; source for most of the printed .Bx manuals (available from the .Tn USENIX association) .Bl -tag -width Fl -compact .It Pa FAQ/ Frequently Asked Questions .It Pa IPv6/ implementation notes for IPv6 .It Pa es/ Spanish translations of documents in /usr/share/doc .It Pa handbook/ .Fx Handbook .It Pa ja/ Japanese translations of documents in /usr/share/doc .It Pa legal/ License files for vendor supplied firmware files .It Pa ncurses/ HTML documents pertaining to ncurses; see .Xr ncurses 3 .It Pa ntp/ HTML documents pertaining to the Network Time Protocol .It Pa ru/ Russian translations of documents in /usr/share/doc .It Pa tutorials/ .Fx tutorials .It Pa zh/ Chinese translations of documents in /usr/share/doc .El .It Pa examples/ various examples for users and programmers .It Pa firmware/ firmware images loaded by userland programs .It Pa games/ ASCII text files used by various games .It Pa keys/ known trusted and revoked keys. .Bl -tag -width Fl -compact .It Pa pkg/ fingerprints for .Xr pkg 7 and .Xr pkg 8 .El .It Pa locale/ localization files; see .Xr setlocale 3 .It Pa man/ manual pages .It Pa misc/ miscellaneous system-wide ASCII text files .Bl -tag -width Fl -compact .It Pa fonts/ ??? .It Pa termcap terminal characteristics database; see .Xr termcap 5 .El .It Pa mk/ templates for make; see .Xr make 1 .It Pa nls/ national language support files .It Pa security/ data files for security policies such as .Xr mac_lomac 4 .It Pa sendmail/ .Xr sendmail 8 configuration files .It Pa skel/ example .Pa .\& (dot) files for new accounts .It Pa snmp/ MIBs, example files and tree definitions for the SNMP daemon. .Bl -tag -width Fl -compact .It Pa defs/ tree definition files for use with .Xr gensnmptree 1 .It Pa mibs/ MIB files .El .It Pa syscons/ files used by syscons; see .Xr syscons 4 .Bl -tag -width Fl -compact .It Pa fonts/ console fonts; see .Xr vidcontrol 1 and .Xr vidfont 1 .It Pa keymaps/ console keyboard maps; see .Xr kbdcontrol 1 and .Xr kbdmap 1 .It Pa scrnmaps/ console screen maps .El .It Pa tabset/ tab description files for a variety of terminals; used in the termcap file; see .Xr termcap 5 .It Pa vi/ localization support and utilities for .Xr vi 1 .It Pa vt/ files used by vt; see .Xr vt 4 .Bl -tag -width Fl -compact .It Pa fonts/ console fonts; see .Xr vidcontrol 1 and .Xr vidfont 1 .It Pa keymaps/ console keyboard maps; see .Xr kbdcontrol 1 and .Xr kbdmap 1 .\" .It Pa scrnmaps/ .\" console screen maps .El .It Pa zoneinfo/ timezone configuration information; see .Xr tzfile 5 .El .Pp .It Pa src/ .Fx source code .Pp .Bl -tag -width "kerberos5/" -compact .It Pa bin/ source code for files in /bin .It Pa cddl/ utilities covered by the Common Development and Distribution License .It Pa contrib/ source code for contributed software .It Pa crypto/ source code for contributed cryptography software .It Pa etc/ source code for files in .Pa /etc .It Pa gnu/ utilities covered by the GNU General Public License .It Pa include/ source code for files in .Pa /usr/include .It Pa kerberos5/ build infrastructure for Kerberos version 5 .It Pa lib/ source code for files in .Pa /lib and .Pa /usr/lib .It Pa libexec/ source code for files in .Pa /usr/libexec .It Pa release/ files required to produce a .Fx release .It Pa rescue/ source code for files in .Pa /rescue .It Pa sbin/ source code for files in .Pa /sbin .It Pa secure/ build directory for files in .Pa /usr/src/crypto .It Pa share/ source for files in .Pa /usr/share .It Pa stand/ boot loader source code .It Pa sys/ kernel source code .Bl -tag -width Fl -compact .It Pa amd64/ AMD64 architecture support .It Pa arm/ ARM architecture support .It Pa arm64/ ARMv8 architecture support .It Pa cam/ .Xr cam 4 and .Xr ctl 4 .It Pa cddl/ CDDL-licensed optional sources, including ZFS and DTrace .It Pa ddb/ .Xr ddb 4 .It Pa fs/ most filesystems .It Pa dev/ device drivers .It Pa geom/ .Xr geom 4 .It Pa i386/ i386 (32 bit) architecture support .It Pa kern/ main part of the kernel .It Pa mips/ MIPS architecture support .It Pa net80211/ .Xr net80211 4 .It Pa netgraph/ .Xr netgraph 4 .It Pa netinet/ .Xr inet 4 .It Pa netinet6/ .Xr inet6 4 .It Pa netipsec/ .Xr ipsec 4 .It Pa netpfil/ .Xr ipfw 4 and .Xr pf 4 .It Pa opencrypto/ .Xr crypto 7 .It Pa powerpc/ PowerPC/POWER architecture support .It Pa riscv/ RISC-V architecture support .It Pa security/ .Xr audit 4 and .Xr mac 4 .It Pa sparc64/ SPARC64 architecture support .It Pa sys/ kernel headers .It Pa ufs/ Unix File System .It Pa x86/ code shared by AMD64 and i386 architectures .El .It Pa targets/ support for experimental DIRDEPS_BUILD .It Pa tests/ source code for files in .Pa /usr/tests .It Pa tools/ tools used for maintenance and testing of .Fx .It Pa usr.bin/ source code for files in .Pa /usr/bin .It Pa usr.sbin/ source code for files in .Pa /usr/sbin .El .Pp .It Pa tests/ The .Fx test suite; see .Xr tests 7 .El .It Pa /var/ multi-purpose log, temporary, transient, and spool files .Pp .Bl -tag -width "defaults/" -compact .It Pa account/ system accounting files .Pp .Bl -tag -width Fl -compact .It Pa acct execution accounting file; see .Xr acct 5 .El .Pp .It Pa at/ timed command scheduling files; see .Xr \&at 1 .Pp .Bl -tag -width Fl -compact .It Pa jobs/ directory containing job files .It Pa spool/ directory containing output spool files .El .Pp .It Pa backups/ miscellaneous backup files .It Pa cache/ miscellaneous cached files .Pp .Bl -tag -width Fl -compact .It Pa pkg/ cached packages for .Xr pkg 8 .El .Pp .It Pa crash/ default directory to store kernel crash dumps; see .Xr crash 8 and .Xr savecore 8 .It Pa cron/ files used by cron; see .Xr cron 8 .Pp .Bl -tag -width Fl -compact .It Pa tabs/ crontab files; see .Xr crontab 5 .El .Pp .It Pa db/ miscellaneous automatically generated system-specific database files .It Pa empty/ empty directory for use by programs that need a specifically empty directory. Used for instance by .Xr sshd 8 for privilege separation. .It Pa games/ miscellaneous game status and score files .It Pa heimdal/ Kerberos server databases; see .Xr kdc 8 .It Pa log/ miscellaneous system log files .Pp .Bl -tag -width Fl -compact .It Pa utx.lastlogin last login log; see .Xr getutxent 3 .It Pa utx.log login/logout log; see .Xr getutxent 3 .El .Pp .It Pa mail/ user mailbox files .It Pa msgs/ system messages database; see .Xr msgs 1 .It Pa preserve/ temporary home of files preserved after an accidental death of an editor; see .Xr \&ex 1 .It Pa quotas/ file system quota information files .It Pa run/ system information files describing various info about system since it was booted .Pp .Bl -tag -width Fl -compact +.It Pa bhyve/ +bhyve vm unix domain sockets .It Pa ppp/ writable by the .Dq network group for command connection sockets; see .Xr ppp 8 .It Pa utx.active database of current users; see .Xr getutxent 3 .El .Pp .It Pa rwho/ rwho data files; see .Xr rwhod 8 , .Xr rwho 1 , and .Xr ruptime 1 .It Pa spool/ miscellaneous printer and mail system spooling directories .Pp .Bl -tag -width Fl -compact .It Pa clientmqueue/ undelivered submission mail queue; see .Xr sendmail 8 .It Pa ftp/ commonly ~ftp; the anonymous ftp root directory .It Pa mqueue/ undelivered mail queue; see .Xr sendmail 8 .It Pa output/ line printer spooling directories .El .Pp .It Pa tmp/ temporary files that are kept between system reboots .Pp .Bl -tag -width Fl -compact .It Pa vi.recover/ the directory where recovery files are stored .El .Pp .It Pa yp/ the NIS maps .El .El .Sh NOTES This manual page documents the default .Fx file system layout, but the actual hierarchy on a given system is defined at the system administrator's discretion. A well-maintained installation will include a customized version of this document. .Sh SEE ALSO .Xr apropos 1 , .Xr find 1 , .Xr finger 1 , .Xr grep 1 , .Xr ls 1 , .Xr whatis 1 , .Xr whereis 1 , .Xr which 1 , .Xr fd 4 , .Xr devfs 5 , .Xr fsck 8 .Sh HISTORY A .Nm manual page appeared in .At v7 . diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c index 4ffdb315883c..11ff963ae1fd 100644 --- a/usr.sbin/bhyve/snapshot.c +++ b/usr.sbin/bhyve/snapshot.c @@ -1,1742 +1,1714 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Flavius Anton * Copyright (c) 2016 Mihai Tiganus * Copyright (c) 2016-2019 Mihai Carabas * Copyright (c) 2017-2019 Darius Mihai * Copyright (c) 2017-2019 Elena Mihailescu * Copyright (c) 2018-2019 Sergiu Weisz * All rights reserved. * The bhyve-snapshot feature was developed under sponsorships * from Matthew Grooms. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include "bhyverun.h" #include "acpi.h" #include "atkbdc.h" #include "inout.h" #include "fwctl.h" #include "ioapic.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "smbiostbl.h" #include "snapshot.h" #include "xmsr.h" #include "spinup_ap.h" #include "rtc.h" #include #include struct spinner_info { const size_t *crtval; const size_t maxval; const size_t total; }; extern int guest_ncpus; static struct winsize winsize; static sig_t old_winch_handler; #define KB (1024UL) #define MB (1024UL * KB) #define GB (1024UL * MB) #define SNAPSHOT_CHUNK (4 * MB) #define PROG_BUF_SZ (8192) -#define BHYVE_RUN_DIR "/var/run/bhyve" -#define CHECKPOINT_RUN_DIR BHYVE_RUN_DIR "/checkpoint" #define MAX_VMNAME 100 #define MAX_MSG_SIZE 1024 #define SNAPSHOT_BUFFER_SIZE (20 * MB) #define JSON_STRUCT_ARR_KEY "structs" #define JSON_DEV_ARR_KEY "devices" #define JSON_BASIC_METADATA_KEY "basic metadata" #define JSON_SNAPSHOT_REQ_KEY "snapshot_req" #define JSON_SIZE_KEY "size" #define JSON_FILE_OFFSET_KEY "file_offset" #define JSON_NCPUS_KEY "ncpus" #define JSON_VMNAME_KEY "vmname" #define JSON_MEMSIZE_KEY "memsize" #define JSON_MEMFLAGS_KEY "memflags" #define min(a,b) \ ({ \ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; \ }) const struct vm_snapshot_dev_info snapshot_devs[] = { { "atkbdc", atkbdc_snapshot, NULL, NULL }, { "virtio-net", pci_snapshot, pci_pause, pci_resume }, { "virtio-blk", pci_snapshot, pci_pause, pci_resume }, { "virtio-rnd", pci_snapshot, NULL, NULL }, { "lpc", pci_snapshot, NULL, NULL }, { "fbuf", pci_snapshot, NULL, NULL }, { "xhci", pci_snapshot, NULL, NULL }, { "e1000", pci_snapshot, NULL, NULL }, { "ahci", pci_snapshot, pci_pause, pci_resume }, { "ahci-hd", pci_snapshot, pci_pause, pci_resume }, { "ahci-cd", pci_snapshot, pci_pause, pci_resume }, }; const struct vm_snapshot_kern_info snapshot_kern_structs[] = { { "vhpet", STRUCT_VHPET }, { "vm", STRUCT_VM }, { "vmx", STRUCT_VMX }, { "vioapic", STRUCT_VIOAPIC }, { "vlapic", STRUCT_VLAPIC }, { "vmcx", STRUCT_VMCX }, { "vatpit", STRUCT_VATPIT }, { "vatpic", STRUCT_VATPIC }, { "vpmtmr", STRUCT_VPMTMR }, { "vrtc", STRUCT_VRTC }, }; static cpuset_t vcpus_active, vcpus_suspended; static pthread_mutex_t vcpu_lock; static pthread_cond_t vcpus_idle, vcpus_can_run; static bool checkpoint_active; /* * TODO: Harden this function and all of its callers since 'base_str' is a user * provided string. */ static char * strcat_extension(const char *base_str, const char *ext) { char *res; size_t base_len, ext_len; base_len = strnlen(base_str, MAX_VMNAME); ext_len = strnlen(ext, MAX_VMNAME); if (base_len + ext_len > MAX_VMNAME) { fprintf(stderr, "Filename exceeds maximum length.\n"); return (NULL); } res = malloc(base_len + ext_len + 1); if (res == NULL) { perror("Failed to allocate memory."); return (NULL); } memcpy(res, base_str, base_len); memcpy(res + base_len, ext, ext_len); res[base_len + ext_len] = 0; return (res); } void destroy_restore_state(struct restore_state *rstate) { if (rstate == NULL) { fprintf(stderr, "Attempting to destroy NULL restore struct.\n"); return; } if (rstate->kdata_map != MAP_FAILED) munmap(rstate->kdata_map, rstate->kdata_len); if (rstate->kdata_fd > 0) close(rstate->kdata_fd); if (rstate->vmmem_fd > 0) close(rstate->vmmem_fd); if (rstate->meta_root_obj != NULL) ucl_object_unref(rstate->meta_root_obj); if (rstate->meta_parser != NULL) ucl_parser_free(rstate->meta_parser); } static int load_vmmem_file(const char *filename, struct restore_state *rstate) { struct stat sb; int err; rstate->vmmem_fd = open(filename, O_RDONLY); if (rstate->vmmem_fd < 0) { perror("Failed to open restore file"); return (-1); } err = fstat(rstate->vmmem_fd, &sb); if (err < 0) { perror("Failed to stat restore file"); goto err_load_vmmem; } if (sb.st_size == 0) { fprintf(stderr, "Restore file is empty.\n"); goto err_load_vmmem; } rstate->vmmem_len = sb.st_size; return (0); err_load_vmmem: if (rstate->vmmem_fd > 0) close(rstate->vmmem_fd); return (-1); } static int load_kdata_file(const char *filename, struct restore_state *rstate) { struct stat sb; int err; rstate->kdata_fd = open(filename, O_RDONLY); if (rstate->kdata_fd < 0) { perror("Failed to open kernel data file"); return (-1); } err = fstat(rstate->kdata_fd, &sb); if (err < 0) { perror("Failed to stat kernel data file"); goto err_load_kdata; } if (sb.st_size == 0) { fprintf(stderr, "Kernel data file is empty.\n"); goto err_load_kdata; } rstate->kdata_len = sb.st_size; rstate->kdata_map = mmap(NULL, rstate->kdata_len, PROT_READ, MAP_SHARED, rstate->kdata_fd, 0); if (rstate->kdata_map == MAP_FAILED) { perror("Failed to map restore file"); goto err_load_kdata; } return (0); err_load_kdata: if (rstate->kdata_fd > 0) close(rstate->kdata_fd); return (-1); } static int load_metadata_file(const char *filename, struct restore_state *rstate) { const ucl_object_t *obj; struct ucl_parser *parser; int err; parser = ucl_parser_new(UCL_PARSER_DEFAULT); if (parser == NULL) { fprintf(stderr, "Failed to initialize UCL parser.\n"); goto err_load_metadata; } err = ucl_parser_add_file(parser, filename); if (err == 0) { fprintf(stderr, "Failed to parse metadata file: '%s'\n", filename); err = -1; goto err_load_metadata; } obj = ucl_parser_get_object(parser); if (obj == NULL) { fprintf(stderr, "Failed to parse object.\n"); err = -1; goto err_load_metadata; } rstate->meta_parser = parser; rstate->meta_root_obj = (ucl_object_t *)obj; return (0); err_load_metadata: if (parser != NULL) ucl_parser_free(parser); return (err); } int load_restore_file(const char *filename, struct restore_state *rstate) { int err = 0; char *kdata_filename = NULL, *meta_filename = NULL; assert(filename != NULL); assert(rstate != NULL); memset(rstate, 0, sizeof(*rstate)); rstate->kdata_map = MAP_FAILED; err = load_vmmem_file(filename, rstate); if (err != 0) { fprintf(stderr, "Failed to load guest RAM file.\n"); goto err_restore; } kdata_filename = strcat_extension(filename, ".kern"); if (kdata_filename == NULL) { fprintf(stderr, "Failed to construct kernel data filename.\n"); goto err_restore; } err = load_kdata_file(kdata_filename, rstate); if (err != 0) { fprintf(stderr, "Failed to load guest kernel data file.\n"); goto err_restore; } meta_filename = strcat_extension(filename, ".meta"); if (meta_filename == NULL) { fprintf(stderr, "Failed to construct kernel metadata filename.\n"); goto err_restore; } err = load_metadata_file(meta_filename, rstate); if (err != 0) { fprintf(stderr, "Failed to load guest metadata file.\n"); goto err_restore; } return (0); err_restore: destroy_restore_state(rstate); if (kdata_filename != NULL) free(kdata_filename); if (meta_filename != NULL) free(meta_filename); return (-1); } #define JSON_GET_INT_OR_RETURN(key, obj, result_ptr, ret) \ do { \ const ucl_object_t *obj__; \ obj__ = ucl_object_lookup(obj, key); \ if (obj__ == NULL) { \ fprintf(stderr, "Missing key: '%s'", key); \ return (ret); \ } \ if (!ucl_object_toint_safe(obj__, result_ptr)) { \ fprintf(stderr, "Cannot convert '%s' value to int.", key); \ return (ret); \ } \ } while(0) #define JSON_GET_STRING_OR_RETURN(key, obj, result_ptr, ret) \ do { \ const ucl_object_t *obj__; \ obj__ = ucl_object_lookup(obj, key); \ if (obj__ == NULL) { \ fprintf(stderr, "Missing key: '%s'", key); \ return (ret); \ } \ if (!ucl_object_tostring_safe(obj__, result_ptr)) { \ fprintf(stderr, "Cannot convert '%s' value to string.", key); \ return (ret); \ } \ } while(0) static void * lookup_struct(enum snapshot_req struct_id, struct restore_state *rstate, size_t *struct_size) { const ucl_object_t *structs = NULL, *obj = NULL; ucl_object_iter_t it = NULL; int64_t snapshot_req, size, file_offset; structs = ucl_object_lookup(rstate->meta_root_obj, JSON_STRUCT_ARR_KEY); if (structs == NULL) { fprintf(stderr, "Failed to find '%s' object.\n", JSON_STRUCT_ARR_KEY); return (NULL); } if (ucl_object_type((ucl_object_t *)structs) != UCL_ARRAY) { fprintf(stderr, "Object '%s' is not an array.\n", JSON_STRUCT_ARR_KEY); return (NULL); } while ((obj = ucl_object_iterate(structs, &it, true)) != NULL) { snapshot_req = -1; JSON_GET_INT_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj, &snapshot_req, NULL); assert(snapshot_req >= 0); if ((enum snapshot_req) snapshot_req == struct_id) { JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj, &size, NULL); assert(size >= 0); JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj, &file_offset, NULL); assert(file_offset >= 0); assert(file_offset + size <= rstate->kdata_len); *struct_size = (size_t)size; return (rstate->kdata_map + file_offset); } } return (NULL); } static void * lookup_check_dev(const char *dev_name, struct restore_state *rstate, const ucl_object_t *obj, size_t *data_size) { const char *snapshot_req; int64_t size, file_offset; snapshot_req = NULL; JSON_GET_STRING_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj, &snapshot_req, NULL); assert(snapshot_req != NULL); if (!strcmp(snapshot_req, dev_name)) { JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj, &size, NULL); assert(size >= 0); JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj, &file_offset, NULL); assert(file_offset >= 0); assert(file_offset + size <= rstate->kdata_len); *data_size = (size_t)size; return (rstate->kdata_map + file_offset); } return (NULL); } static void* lookup_dev(const char *dev_name, struct restore_state *rstate, size_t *data_size) { const ucl_object_t *devs = NULL, *obj = NULL; ucl_object_iter_t it = NULL; void *ret; devs = ucl_object_lookup(rstate->meta_root_obj, JSON_DEV_ARR_KEY); if (devs == NULL) { fprintf(stderr, "Failed to find '%s' object.\n", JSON_DEV_ARR_KEY); return (NULL); } if (ucl_object_type((ucl_object_t *)devs) != UCL_ARRAY) { fprintf(stderr, "Object '%s' is not an array.\n", JSON_DEV_ARR_KEY); return (NULL); } while ((obj = ucl_object_iterate(devs, &it, true)) != NULL) { ret = lookup_check_dev(dev_name, rstate, obj, data_size); if (ret != NULL) return (ret); } return (NULL); } static const ucl_object_t * lookup_basic_metadata_object(struct restore_state *rstate) { const ucl_object_t *basic_meta_obj = NULL; basic_meta_obj = ucl_object_lookup(rstate->meta_root_obj, JSON_BASIC_METADATA_KEY); if (basic_meta_obj == NULL) { fprintf(stderr, "Failed to find '%s' object.\n", JSON_BASIC_METADATA_KEY); return (NULL); } if (ucl_object_type((ucl_object_t *)basic_meta_obj) != UCL_OBJECT) { fprintf(stderr, "Object '%s' is not a JSON object.\n", JSON_BASIC_METADATA_KEY); return (NULL); } return (basic_meta_obj); } const char * lookup_vmname(struct restore_state *rstate) { const char *vmname; const ucl_object_t *obj; obj = lookup_basic_metadata_object(rstate); if (obj == NULL) return (NULL); JSON_GET_STRING_OR_RETURN(JSON_VMNAME_KEY, obj, &vmname, NULL); return (vmname); } int lookup_memflags(struct restore_state *rstate) { int64_t memflags; const ucl_object_t *obj; obj = lookup_basic_metadata_object(rstate); if (obj == NULL) return (0); JSON_GET_INT_OR_RETURN(JSON_MEMFLAGS_KEY, obj, &memflags, 0); return ((int)memflags); } size_t lookup_memsize(struct restore_state *rstate) { int64_t memsize; const ucl_object_t *obj; obj = lookup_basic_metadata_object(rstate); if (obj == NULL) return (0); JSON_GET_INT_OR_RETURN(JSON_MEMSIZE_KEY, obj, &memsize, 0); if (memsize < 0) memsize = 0; return ((size_t)memsize); } int lookup_guest_ncpus(struct restore_state *rstate) { int64_t ncpus; const ucl_object_t *obj; obj = lookup_basic_metadata_object(rstate); if (obj == NULL) return (0); JSON_GET_INT_OR_RETURN(JSON_NCPUS_KEY, obj, &ncpus, 0); return ((int)ncpus); } static void winch_handler(int signal) { #ifdef TIOCGWINSZ ioctl(STDOUT_FILENO, TIOCGWINSZ, &winsize); #endif /* TIOCGWINSZ */ } static int print_progress(size_t crtval, const size_t maxval) { size_t rc; double crtval_gb, maxval_gb; size_t i, win_width, prog_start, prog_done, prog_end; int mval_len; static char prog_buf[PROG_BUF_SZ]; static const size_t len = sizeof(prog_buf); static size_t div; static char *div_str; static char wip_bar[] = { '/', '-', '\\', '|' }; static int wip_idx = 0; if (maxval == 0) { printf("[0B / 0B]\r\n"); return (0); } if (crtval > maxval) crtval = maxval; if (maxval > 10 * GB) { div = GB; div_str = "GiB"; } else if (maxval > 10 * MB) { div = MB; div_str = "MiB"; } else { div = KB; div_str = "KiB"; } crtval_gb = (double) crtval / div; maxval_gb = (double) maxval / div; rc = snprintf(prog_buf, len, "%.03lf", maxval_gb); if (rc == len) { fprintf(stderr, "Maxval too big\n"); return (-1); } mval_len = rc; rc = snprintf(prog_buf, len, "\r[%*.03lf%s / %.03lf%s] |", mval_len, crtval_gb, div_str, maxval_gb, div_str); if (rc == len) { fprintf(stderr, "Buffer too small to print progress\n"); return (-1); } win_width = min(winsize.ws_col, len); prog_start = rc; if (prog_start < (win_width - 2)) { prog_end = win_width - prog_start - 2; prog_done = prog_end * (crtval_gb / maxval_gb); for (i = prog_start; i < prog_start + prog_done; i++) prog_buf[i] = '#'; if (crtval != maxval) { prog_buf[i] = wip_bar[wip_idx]; wip_idx = (wip_idx + 1) % sizeof(wip_bar); i++; } else { prog_buf[i++] = '#'; } for (; i < win_width - 2; i++) prog_buf[i] = '_'; prog_buf[win_width - 2] = '|'; } prog_buf[win_width - 1] = '\0'; write(STDOUT_FILENO, prog_buf, win_width); return (0); } static void * snapshot_spinner_cb(void *arg) { int rc; size_t crtval, maxval, total; struct spinner_info *si; struct timespec ts; si = arg; if (si == NULL) pthread_exit(NULL); ts.tv_sec = 0; ts.tv_nsec = 50 * 1000 * 1000; /* 50 ms sleep time */ do { crtval = *si->crtval; maxval = si->maxval; total = si->total; rc = print_progress(crtval, total); if (rc < 0) { fprintf(stderr, "Failed to parse progress\n"); break; } nanosleep(&ts, NULL); } while (crtval < maxval); pthread_exit(NULL); return NULL; } static int vm_snapshot_mem_part(const int snapfd, const size_t foff, void *src, const size_t len, const size_t totalmem, const bool op_wr) { int rc; size_t part_done, todo, rem; ssize_t done; bool show_progress; pthread_t spinner_th; struct spinner_info *si; if (lseek(snapfd, foff, SEEK_SET) < 0) { perror("Failed to change file offset"); return (-1); } show_progress = false; if (isatty(STDIN_FILENO) && (winsize.ws_col != 0)) show_progress = true; part_done = foff; rem = len; if (show_progress) { si = &(struct spinner_info) { .crtval = &part_done, .maxval = foff + len, .total = totalmem }; rc = pthread_create(&spinner_th, 0, snapshot_spinner_cb, si); if (rc) { perror("Unable to create spinner thread"); show_progress = false; } } while (rem > 0) { if (show_progress) todo = min(SNAPSHOT_CHUNK, rem); else todo = rem; if (op_wr) done = write(snapfd, src, todo); else done = read(snapfd, src, todo); if (done < 0) { perror("Failed to write in file"); return (-1); } src += done; part_done += done; rem -= done; } if (show_progress) { rc = pthread_join(spinner_th, NULL); if (rc) perror("Unable to end spinner thread"); } return (0); } static size_t vm_snapshot_mem(struct vmctx *ctx, int snapfd, size_t memsz, const bool op_wr) { int ret; size_t lowmem, highmem, totalmem; char *baseaddr; ret = vm_get_guestmem_from_ctx(ctx, &baseaddr, &lowmem, &highmem); if (ret) { fprintf(stderr, "%s: unable to retrieve guest memory size\r\n", __func__); return (0); } totalmem = lowmem + highmem; if ((op_wr == false) && (totalmem != memsz)) { fprintf(stderr, "%s: mem size mismatch: %ld vs %ld\r\n", __func__, totalmem, memsz); return (0); } winsize.ws_col = 80; #ifdef TIOCGWINSZ ioctl(STDOUT_FILENO, TIOCGWINSZ, &winsize); #endif /* TIOCGWINSZ */ old_winch_handler = signal(SIGWINCH, winch_handler); ret = vm_snapshot_mem_part(snapfd, 0, baseaddr, lowmem, totalmem, op_wr); if (ret) { fprintf(stderr, "%s: Could not %s lowmem\r\n", __func__, op_wr ? "write" : "read"); totalmem = 0; goto done; } if (highmem == 0) goto done; ret = vm_snapshot_mem_part(snapfd, lowmem, baseaddr + 4*GB, highmem, totalmem, op_wr); if (ret) { fprintf(stderr, "%s: Could not %s highmem\r\n", __func__, op_wr ? "write" : "read"); totalmem = 0; goto done; } done: printf("\r\n"); signal(SIGWINCH, old_winch_handler); return (totalmem); } int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate) { size_t restored; restored = vm_snapshot_mem(ctx, rstate->vmmem_fd, rstate->vmmem_len, false); if (restored != rstate->vmmem_len) return (-1); return (0); } static int vm_restore_kern_struct(struct vmctx *ctx, struct restore_state *rstate, const struct vm_snapshot_kern_info *info) { void *struct_ptr; size_t struct_size; int ret; struct vm_snapshot_meta *meta; struct_ptr = lookup_struct(info->req, rstate, &struct_size); if (struct_ptr == NULL) { fprintf(stderr, "%s: Failed to lookup struct %s\r\n", __func__, info->struct_name); ret = -1; goto done; } if (struct_size == 0) { fprintf(stderr, "%s: Kernel struct size was 0 for: %s\r\n", __func__, info->struct_name); ret = -1; goto done; } meta = &(struct vm_snapshot_meta) { .ctx = ctx, .dev_name = info->struct_name, .dev_req = info->req, .buffer.buf_start = struct_ptr, .buffer.buf_size = struct_size, .buffer.buf = struct_ptr, .buffer.buf_rem = struct_size, .op = VM_SNAPSHOT_RESTORE, }; ret = vm_snapshot_req(meta); if (ret != 0) { fprintf(stderr, "%s: Failed to restore struct: %s\r\n", __func__, info->struct_name); goto done; } done: return (ret); } int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate) { int ret; int i; for (i = 0; i < nitems(snapshot_kern_structs); i++) { ret = vm_restore_kern_struct(ctx, rstate, &snapshot_kern_structs[i]); if (ret != 0) return (ret); } return (0); } int vm_restore_user_dev(struct vmctx *ctx, struct restore_state *rstate, const struct vm_snapshot_dev_info *info) { void *dev_ptr; size_t dev_size; int ret; struct vm_snapshot_meta *meta; dev_ptr = lookup_dev(info->dev_name, rstate, &dev_size); if (dev_ptr == NULL) { fprintf(stderr, "Failed to lookup dev: %s\r\n", info->dev_name); fprintf(stderr, "Continuing the restore/migration process\r\n"); return (0); } if (dev_size == 0) { fprintf(stderr, "%s: Device size is 0. " "Assuming %s is not used\r\n", __func__, info->dev_name); return (0); } meta = &(struct vm_snapshot_meta) { .ctx = ctx, .dev_name = info->dev_name, .buffer.buf_start = dev_ptr, .buffer.buf_size = dev_size, .buffer.buf = dev_ptr, .buffer.buf_rem = dev_size, .op = VM_SNAPSHOT_RESTORE, }; ret = (*info->snapshot_cb)(meta); if (ret != 0) { fprintf(stderr, "Failed to restore dev: %s\r\n", info->dev_name); return (-1); } return (0); } int vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate) { int ret; int i; for (i = 0; i < nitems(snapshot_devs); i++) { ret = vm_restore_user_dev(ctx, rstate, &snapshot_devs[i]); if (ret != 0) return (ret); } return 0; } int vm_pause_user_devs(struct vmctx *ctx) { const struct vm_snapshot_dev_info *info; int ret; int i; for (i = 0; i < nitems(snapshot_devs); i++) { info = &snapshot_devs[i]; if (info->pause_cb == NULL) continue; ret = info->pause_cb(ctx, info->dev_name); if (ret != 0) return (ret); } return (0); } int vm_resume_user_devs(struct vmctx *ctx) { const struct vm_snapshot_dev_info *info; int ret; int i; for (i = 0; i < nitems(snapshot_devs); i++) { info = &snapshot_devs[i]; if (info->resume_cb == NULL) continue; ret = info->resume_cb(ctx, info->dev_name); if (ret != 0) return (ret); } return (0); } static int vm_snapshot_kern_struct(int data_fd, xo_handle_t *xop, const char *array_key, struct vm_snapshot_meta *meta, off_t *offset) { int ret; size_t data_size; ssize_t write_cnt; ret = vm_snapshot_req(meta); if (ret != 0) { fprintf(stderr, "%s: Failed to snapshot struct %s\r\n", __func__, meta->dev_name); ret = -1; goto done; } data_size = vm_get_snapshot_size(meta); write_cnt = write(data_fd, meta->buffer.buf_start, data_size); if (write_cnt != data_size) { perror("Failed to write all snapshotted data."); ret = -1; goto done; } /* Write metadata. */ xo_open_instance_h(xop, array_key); xo_emit_h(xop, "{:debug_name/%s}\n", meta->dev_name); xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%d}\n", meta->dev_req); xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size); xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset); xo_close_instance_h(xop, JSON_STRUCT_ARR_KEY); *offset += data_size; done: return (ret); } static int vm_snapshot_kern_structs(struct vmctx *ctx, int data_fd, xo_handle_t *xop) { int ret, i, error; size_t offset, buf_size; char *buffer; struct vm_snapshot_meta *meta; error = 0; offset = 0; buf_size = SNAPSHOT_BUFFER_SIZE; buffer = malloc(SNAPSHOT_BUFFER_SIZE * sizeof(char)); if (buffer == NULL) { error = ENOMEM; perror("Failed to allocate memory for snapshot buffer"); goto err_vm_snapshot_kern_data; } meta = &(struct vm_snapshot_meta) { .ctx = ctx, .buffer.buf_start = buffer, .buffer.buf_size = buf_size, .op = VM_SNAPSHOT_SAVE, }; xo_open_list_h(xop, JSON_STRUCT_ARR_KEY); for (i = 0; i < nitems(snapshot_kern_structs); i++) { meta->dev_name = snapshot_kern_structs[i].struct_name; meta->dev_req = snapshot_kern_structs[i].req; memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); meta->buffer.buf = meta->buffer.buf_start; meta->buffer.buf_rem = meta->buffer.buf_size; ret = vm_snapshot_kern_struct(data_fd, xop, JSON_DEV_ARR_KEY, meta, &offset); if (ret != 0) { error = -1; goto err_vm_snapshot_kern_data; } } xo_close_list_h(xop, JSON_STRUCT_ARR_KEY); err_vm_snapshot_kern_data: if (buffer != NULL) free(buffer); return (error); } static int vm_snapshot_basic_metadata(struct vmctx *ctx, xo_handle_t *xop, size_t memsz) { int error; int memflags; char vmname_buf[MAX_VMNAME]; memset(vmname_buf, 0, MAX_VMNAME); error = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); if (error != 0) { perror("Failed to get VM name"); goto err; } memflags = vm_get_memflags(ctx); xo_open_container_h(xop, JSON_BASIC_METADATA_KEY); xo_emit_h(xop, "{:" JSON_NCPUS_KEY "/%ld}\n", guest_ncpus); xo_emit_h(xop, "{:" JSON_VMNAME_KEY "/%s}\n", vmname_buf); xo_emit_h(xop, "{:" JSON_MEMSIZE_KEY "/%lu}\n", memsz); xo_emit_h(xop, "{:" JSON_MEMFLAGS_KEY "/%d}\n", memflags); xo_close_container_h(xop, JSON_BASIC_METADATA_KEY); err: return (error); } static int vm_snapshot_dev_write_data(int data_fd, xo_handle_t *xop, const char *array_key, struct vm_snapshot_meta *meta, off_t *offset) { int ret; size_t data_size; data_size = vm_get_snapshot_size(meta); ret = write(data_fd, meta->buffer.buf_start, data_size); if (ret != data_size) { perror("Failed to write all snapshotted data."); return (-1); } /* Write metadata. */ xo_open_instance_h(xop, array_key); xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%s}\n", meta->dev_name); xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size); xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset); xo_close_instance_h(xop, array_key); *offset += data_size; return (0); } static int vm_snapshot_user_dev(const struct vm_snapshot_dev_info *info, int data_fd, xo_handle_t *xop, struct vm_snapshot_meta *meta, off_t *offset) { int ret; ret = (*info->snapshot_cb)(meta); if (ret != 0) { fprintf(stderr, "Failed to snapshot %s; ret=%d\r\n", meta->dev_name, ret); return (ret); } ret = vm_snapshot_dev_write_data(data_fd, xop, JSON_DEV_ARR_KEY, meta, offset); if (ret != 0) return (ret); return (0); } static int vm_snapshot_user_devs(struct vmctx *ctx, int data_fd, xo_handle_t *xop) { int ret, i; off_t offset; void *buffer; size_t buf_size; struct vm_snapshot_meta *meta; buf_size = SNAPSHOT_BUFFER_SIZE; offset = lseek(data_fd, 0, SEEK_CUR); if (offset < 0) { perror("Failed to get data file current offset."); return (-1); } buffer = malloc(buf_size); if (buffer == NULL) { perror("Failed to allocate memory for snapshot buffer"); ret = ENOSPC; goto snapshot_err; } meta = &(struct vm_snapshot_meta) { .ctx = ctx, .buffer.buf_start = buffer, .buffer.buf_size = buf_size, .op = VM_SNAPSHOT_SAVE, }; xo_open_list_h(xop, JSON_DEV_ARR_KEY); /* Restore other devices that support this feature */ for (i = 0; i < nitems(snapshot_devs); i++) { meta->dev_name = snapshot_devs[i].dev_name; memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); meta->buffer.buf = meta->buffer.buf_start; meta->buffer.buf_rem = meta->buffer.buf_size; ret = vm_snapshot_user_dev(&snapshot_devs[i], data_fd, xop, meta, &offset); if (ret != 0) goto snapshot_err; } xo_close_list_h(xop, JSON_DEV_ARR_KEY); snapshot_err: if (buffer != NULL) free(buffer); return (ret); } void checkpoint_cpu_add(int vcpu) { pthread_mutex_lock(&vcpu_lock); CPU_SET(vcpu, &vcpus_active); if (checkpoint_active) { CPU_SET(vcpu, &vcpus_suspended); while (checkpoint_active) pthread_cond_wait(&vcpus_can_run, &vcpu_lock); CPU_CLR(vcpu, &vcpus_suspended); } pthread_mutex_unlock(&vcpu_lock); } /* * When a vCPU is suspended for any reason, it calls * checkpoint_cpu_suspend(). This records that the vCPU is idle. * Before returning from suspension, checkpoint_cpu_resume() is * called. In suspend we note that the vCPU is idle. In resume we * pause the vCPU thread until the checkpoint is complete. The reason * for the two-step process is that vCPUs might already be stopped in * the debug server when a checkpoint is requested. This approach * allows us to account for and handle those vCPUs. */ void checkpoint_cpu_suspend(int vcpu) { pthread_mutex_lock(&vcpu_lock); CPU_SET(vcpu, &vcpus_suspended); if (checkpoint_active && CPU_CMP(&vcpus_active, &vcpus_suspended) == 0) pthread_cond_signal(&vcpus_idle); pthread_mutex_unlock(&vcpu_lock); } void checkpoint_cpu_resume(int vcpu) { pthread_mutex_lock(&vcpu_lock); while (checkpoint_active) pthread_cond_wait(&vcpus_can_run, &vcpu_lock); CPU_CLR(vcpu, &vcpus_suspended); pthread_mutex_unlock(&vcpu_lock); } static void vm_vcpu_pause(struct vmctx *ctx) { pthread_mutex_lock(&vcpu_lock); checkpoint_active = true; vm_suspend_cpu(ctx, -1); while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0) pthread_cond_wait(&vcpus_idle, &vcpu_lock); pthread_mutex_unlock(&vcpu_lock); } static void vm_vcpu_resume(struct vmctx *ctx) { pthread_mutex_lock(&vcpu_lock); checkpoint_active = false; pthread_mutex_unlock(&vcpu_lock); vm_resume_cpu(ctx, -1); pthread_cond_broadcast(&vcpus_can_run); } static int vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm) { int fd_checkpoint = 0, kdata_fd = 0; int ret = 0; int error = 0; size_t memsz; xo_handle_t *xop = NULL; char *meta_filename = NULL; char *kdata_filename = NULL; FILE *meta_file = NULL; kdata_filename = strcat_extension(checkpoint_file, ".kern"); if (kdata_filename == NULL) { fprintf(stderr, "Failed to construct kernel data filename.\n"); return (-1); } kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); if (kdata_fd < 0) { perror("Failed to open kernel data snapshot file."); error = -1; goto done; } fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); if (fd_checkpoint < 0) { perror("Failed to create checkpoint file"); error = -1; goto done; } meta_filename = strcat_extension(checkpoint_file, ".meta"); if (meta_filename == NULL) { fprintf(stderr, "Failed to construct vm metadata filename.\n"); goto done; } meta_file = fopen(meta_filename, "w"); if (meta_file == NULL) { perror("Failed to open vm metadata snapshot file."); goto done; } xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY); if (xop == NULL) { perror("Failed to get libxo handle on metadata file."); goto done; } vm_vcpu_pause(ctx); ret = vm_pause_user_devs(ctx); if (ret != 0) { fprintf(stderr, "Could not pause devices\r\n"); error = ret; goto done; } memsz = vm_snapshot_mem(ctx, fd_checkpoint, 0, true); if (memsz == 0) { perror("Could not write guest memory to file"); error = -1; goto done; } ret = vm_snapshot_basic_metadata(ctx, xop, memsz); if (ret != 0) { fprintf(stderr, "Failed to snapshot vm basic metadata.\n"); error = -1; goto done; } ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop); if (ret != 0) { fprintf(stderr, "Failed to snapshot vm kernel data.\n"); error = -1; goto done; } ret = vm_snapshot_user_devs(ctx, kdata_fd, xop); if (ret != 0) { fprintf(stderr, "Failed to snapshot device state.\n"); error = -1; goto done; } xo_finish_h(xop); if (stop_vm) { vm_destroy(ctx); exit(0); } done: ret = vm_resume_user_devs(ctx); if (ret != 0) fprintf(stderr, "Could not resume devices\r\n"); vm_vcpu_resume(ctx); if (fd_checkpoint > 0) close(fd_checkpoint); if (meta_filename != NULL) free(meta_filename); if (kdata_filename != NULL) free(kdata_filename); if (xop != NULL) xo_destroy(xop); if (meta_file != NULL) fclose(meta_file); if (kdata_fd > 0) close(kdata_fd); return (error); } int get_checkpoint_msg(int conn_fd, struct vmctx *ctx) { unsigned char buf[MAX_MSG_SIZE]; struct checkpoint_op *checkpoint_op; int len, recv_len, total_recv = 0; int err = 0; len = sizeof(struct checkpoint_op); /* expected length */ while ((recv_len = recv(conn_fd, buf + total_recv, len - total_recv, 0)) > 0) { total_recv += recv_len; } if (recv_len < 0) { perror("Error while receiving data from bhyvectl"); err = -1; goto done; } checkpoint_op = (struct checkpoint_op *)buf; switch (checkpoint_op->op) { case START_CHECKPOINT: err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, false); break; case START_SUSPEND: err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, true); break; default: fprintf(stderr, "Unrecognized checkpoint operation.\n"); err = -1; } done: close(conn_fd); return (err); } /* * Listen for commands from bhyvectl */ void * checkpoint_thread(void *param) { struct checkpoint_thread_info *thread_info; int conn_fd, ret; pthread_set_name_np(pthread_self(), "checkpoint thread"); thread_info = (struct checkpoint_thread_info *)param; while ((conn_fd = accept(thread_info->socket_fd, NULL, NULL)) > -1) { ret = get_checkpoint_msg(conn_fd, thread_info->ctx); if (ret != 0) { fprintf(stderr, "Failed to read message on checkpoint " "socket. Retrying.\n"); } } if (conn_fd < -1) { perror("Failed to accept connection"); } return (NULL); } -/* - * Create directory tree to store runtime specific information: - * i.e. UNIX sockets for IPC with bhyvectl. - */ -static int -make_checkpoint_dir(void) -{ - int err; - - err = mkdir(BHYVE_RUN_DIR, 0755); - if (err < 0 && errno != EEXIST) - return (err); - - err = mkdir(CHECKPOINT_RUN_DIR, 0755); - if (err < 0 && errno != EEXIST) - return (err); - - return 0; -} - /* * Create the listening socket for IPC with bhyvectl */ int init_checkpoint_thread(struct vmctx *ctx) { struct checkpoint_thread_info *checkpoint_info = NULL; struct sockaddr_un addr; int socket_fd; pthread_t checkpoint_pthread; char vmname_buf[MAX_VMNAME]; int ret, err = 0; memset(&addr, 0, sizeof(addr)); err = pthread_mutex_init(&vcpu_lock, NULL); if (err != 0) errc(1, err, "checkpoint mutex init"); err = pthread_cond_init(&vcpus_idle, NULL); if (err == 0) err = pthread_cond_init(&vcpus_can_run, NULL); if (err != 0) errc(1, err, "checkpoint cv init"); socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); if (socket_fd < 0) { perror("Socket creation failed (IPC with bhyvectl"); err = -1; goto fail; } - err = make_checkpoint_dir(); - if (err < 0) { - perror("Failed to create checkpoint runtime directory"); - goto fail; - } - addr.sun_family = AF_UNIX; err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); if (err != 0) { perror("Failed to get VM name"); goto fail; } - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", - CHECKPOINT_RUN_DIR, vmname_buf); + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s%s", + BHYVE_RUN_DIR, vmname_buf); addr.sun_len = SUN_LEN(&addr); unlink(addr.sun_path); if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { perror("Failed to bind socket (IPC with bhyvectl)"); err = -1; goto fail; } if (listen(socket_fd, 10) < 0) { perror("Failed to listen on socket (IPC with bhyvectl)"); err = -1; goto fail; } checkpoint_info = calloc(1, sizeof(*checkpoint_info)); checkpoint_info->ctx = ctx; checkpoint_info->socket_fd = socket_fd; ret = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, checkpoint_info); if (ret < 0) { err = ret; goto fail; } return (0); fail: free(checkpoint_info); if (socket_fd > 0) close(socket_fd); unlink(addr.sun_path); return (err); } void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op) { const char *__op; if (op == VM_SNAPSHOT_SAVE) __op = "save"; else if (op == VM_SNAPSHOT_RESTORE) __op = "restore"; else __op = "unknown"; fprintf(stderr, "%s: snapshot-%s failed for %s\r\n", __func__, __op, bufname); } int vm_snapshot_buf(volatile void *data, size_t data_size, struct vm_snapshot_meta *meta) { struct vm_snapshot_buffer *buffer; int op; buffer = &meta->buffer; op = meta->op; if (buffer->buf_rem < data_size) { fprintf(stderr, "%s: buffer too small\r\n", __func__); return (E2BIG); } if (op == VM_SNAPSHOT_SAVE) memcpy(buffer->buf, (uint8_t *) data, data_size); else if (op == VM_SNAPSHOT_RESTORE) memcpy((uint8_t *) data, buffer->buf, data_size); else return (EINVAL); buffer->buf += data_size; buffer->buf_rem -= data_size; return (0); } size_t vm_get_snapshot_size(struct vm_snapshot_meta *meta) { size_t length; struct vm_snapshot_buffer *buffer; buffer = &meta->buffer; if (buffer->buf_size < buffer->buf_rem) { fprintf(stderr, "%s: Invalid buffer: size = %zu, rem = %zu\r\n", __func__, buffer->buf_size, buffer->buf_rem); length = 0; } else { length = buffer->buf_size - buffer->buf_rem; } return (length); } int vm_snapshot_guest2host_addr(void **addrp, size_t len, bool restore_null, struct vm_snapshot_meta *meta) { int ret; vm_paddr_t gaddr; if (meta->op == VM_SNAPSHOT_SAVE) { gaddr = paddr_host2guest(meta->ctx, *addrp); if (gaddr == (vm_paddr_t) -1) { if (!restore_null || (restore_null && (*addrp != NULL))) { ret = EFAULT; goto done; } } SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done); } else if (meta->op == VM_SNAPSHOT_RESTORE) { SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done); if (gaddr == (vm_paddr_t) -1) { if (!restore_null) { ret = EFAULT; goto done; } } *addrp = paddr_guest2host(meta->ctx, gaddr, len); } else { ret = EINVAL; } done: return (ret); } int vm_snapshot_buf_cmp(volatile void *data, size_t data_size, struct vm_snapshot_meta *meta) { struct vm_snapshot_buffer *buffer; int op; int ret; buffer = &meta->buffer; op = meta->op; if (buffer->buf_rem < data_size) { fprintf(stderr, "%s: buffer too small\r\n", __func__); ret = E2BIG; goto done; } if (op == VM_SNAPSHOT_SAVE) { ret = 0; memcpy(buffer->buf, (uint8_t *) data, data_size); } else if (op == VM_SNAPSHOT_RESTORE) { ret = memcmp((uint8_t *) data, buffer->buf, data_size); } else { ret = EINVAL; goto done; } buffer->buf += data_size; buffer->buf_rem -= data_size; done: return (ret); } diff --git a/usr.sbin/bhyve/snapshot.h b/usr.sbin/bhyve/snapshot.h index eba46c497ab2..caa7256ffbfa 100644 --- a/usr.sbin/bhyve/snapshot.h +++ b/usr.sbin/bhyve/snapshot.h @@ -1,117 +1,118 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Flavius Anton * Copyright (c) 2016 Mihai Tiganus * Copyright (c) 2016-2019 Mihai Carabas * Copyright (c) 2017-2019 Darius Mihai * Copyright (c) 2017-2019 Elena Mihailescu * Copyright (c) 2018-2019 Sergiu Weisz * All rights reserved. * The bhyve-snapshot feature was developed under sponsorships * from Matthew Grooms. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BHYVE_SNAPSHOT_ #define _BHYVE_SNAPSHOT_ #include #include #include +#define BHYVE_RUN_DIR "/var/run/bhyve/" #define MAX_SNAPSHOT_VMNAME 100 struct vmctx; struct restore_state { int kdata_fd; int vmmem_fd; void *kdata_map; size_t kdata_len; size_t vmmem_len; struct ucl_parser *meta_parser; ucl_object_t *meta_root_obj; }; enum checkpoint_opcodes { START_CHECKPOINT = 0, START_SUSPEND = 1, }; struct checkpoint_op { unsigned int op; char snapshot_filename[MAX_SNAPSHOT_VMNAME]; }; struct checkpoint_thread_info { struct vmctx *ctx; int socket_fd; }; typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *); typedef int (*vm_pause_dev_cb) (struct vmctx *, const char *); typedef int (*vm_resume_dev_cb) (struct vmctx *, const char *); struct vm_snapshot_dev_info { const char *dev_name; /* device name */ vm_snapshot_dev_cb snapshot_cb; /* callback for device snapshot */ vm_pause_dev_cb pause_cb; /* callback for device pause */ vm_resume_dev_cb resume_cb; /* callback for device resume */ }; struct vm_snapshot_kern_info { const char *struct_name; /* kernel structure name*/ enum snapshot_req req; /* request type */ }; void destroy_restore_state(struct restore_state *rstate); const char *lookup_vmname(struct restore_state *rstate); int lookup_memflags(struct restore_state *rstate); size_t lookup_memsize(struct restore_state *rstate); int lookup_guest_ncpus(struct restore_state *rstate); void checkpoint_cpu_add(int vcpu); void checkpoint_cpu_resume(int vcpu); void checkpoint_cpu_suspend(int vcpu); int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate); int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate); int vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate); int vm_pause_user_devs(struct vmctx *ctx); int vm_resume_user_devs(struct vmctx *ctx); int get_checkpoint_msg(int conn_fd, struct vmctx *ctx); void *checkpoint_thread(void *param); int init_checkpoint_thread(struct vmctx *ctx); int load_restore_file(const char *filename, struct restore_state *rstate); #endif diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c index b404f9f3c255..7b3a73cc3668 100644 --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -1,2428 +1,2427 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "amd/vmcb.h" #include "intel/vmcs.h" #ifdef BHYVE_SNAPSHOT #include "snapshot.h" #endif #define MB (1UL << 20) #define GB (1UL << 30) #define REQ_ARG required_argument #define NO_ARG no_argument #define OPT_ARG optional_argument -#define CHECKPOINT_RUN_DIR "/var/run/bhyve/checkpoint" #define MAX_VMNAME 100 static const char *progname; static void usage(bool cpu_intel) { (void)fprintf(stderr, "Usage: %s --vm=\n" " [--cpu=]\n" " [--create]\n" " [--destroy]\n" #ifdef BHYVE_SNAPSHOT " [--checkpoint=]\n" " [--suspend=]\n" #endif " [--get-all]\n" " [--get-stats]\n" " [--set-desc-ds]\n" " [--get-desc-ds]\n" " [--set-desc-es]\n" " [--get-desc-es]\n" " [--set-desc-gs]\n" " [--get-desc-gs]\n" " [--set-desc-fs]\n" " [--get-desc-fs]\n" " [--set-desc-cs]\n" " [--get-desc-cs]\n" " [--set-desc-ss]\n" " [--get-desc-ss]\n" " [--set-desc-tr]\n" " [--get-desc-tr]\n" " [--set-desc-ldtr]\n" " [--get-desc-ldtr]\n" " [--set-desc-gdtr]\n" " [--get-desc-gdtr]\n" " [--set-desc-idtr]\n" " [--get-desc-idtr]\n" " [--run]\n" " [--capname=]\n" " [--getcap]\n" " [--setcap=<0|1>]\n" " [--desc-base=]\n" " [--desc-limit=]\n" " [--desc-access=]\n" " [--set-cr0=]\n" " [--get-cr0]\n" " [--set-cr2=]\n" " [--get-cr2]\n" " [--set-cr3=]\n" " [--get-cr3]\n" " [--set-cr4=]\n" " [--get-cr4]\n" " [--set-dr0=]\n" " [--get-dr0]\n" " [--set-dr1=]\n" " [--get-dr1]\n" " [--set-dr2=]\n" " [--get-dr2]\n" " [--set-dr3=]\n" " [--get-dr3]\n" " [--set-dr6=]\n" " [--get-dr6]\n" " [--set-dr7=]\n" " [--get-dr7]\n" " [--set-rsp=]\n" " [--get-rsp]\n" " [--set-rip=]\n" " [--get-rip]\n" " [--get-rax]\n" " [--set-rax=]\n" " [--get-rbx]\n" " [--get-rcx]\n" " [--get-rdx]\n" " [--get-rsi]\n" " [--get-rdi]\n" " [--get-rbp]\n" " [--get-r8]\n" " [--get-r9]\n" " [--get-r10]\n" " [--get-r11]\n" " [--get-r12]\n" " [--get-r13]\n" " [--get-r14]\n" " [--get-r15]\n" " [--set-rflags=]\n" " [--get-rflags]\n" " [--set-cs]\n" " [--get-cs]\n" " [--set-ds]\n" " [--get-ds]\n" " [--set-es]\n" " [--get-es]\n" " [--set-fs]\n" " [--get-fs]\n" " [--set-gs]\n" " [--get-gs]\n" " [--set-ss]\n" " [--get-ss]\n" " [--get-tr]\n" " [--get-ldtr]\n" " [--set-x2apic-state=]\n" " [--get-x2apic-state]\n" " [--unassign-pptdev=]\n" " [--set-mem=]\n" " [--get-lowmem]\n" " [--get-highmem]\n" " [--get-gpa-pmap]\n" " [--assert-lapic-lvt=]\n" " [--inject-nmi]\n" " [--force-reset]\n" " [--force-poweroff]\n" " [--get-rtc-time]\n" " [--set-rtc-time=]\n" " [--get-rtc-nvram]\n" " [--set-rtc-nvram=]\n" " [--rtc-nvram-offset=]\n" " [--get-active-cpus]\n" " [--get-suspended-cpus]\n" " [--get-intinfo]\n" " [--get-eptp]\n" " [--set-exception-bitmap]\n" " [--get-exception-bitmap]\n" " [--get-tsc-offset]\n" " [--get-guest-pat]\n" " [--get-io-bitmap-address]\n" " [--get-msr-bitmap]\n" " [--get-msr-bitmap-address]\n" " [--get-guest-sysenter]\n" " [--get-exit-reason]\n" " [--get-cpu-topology]\n", progname); if (cpu_intel) { (void)fprintf(stderr, " [--get-vmcs-pinbased-ctls]\n" " [--get-vmcs-procbased-ctls]\n" " [--get-vmcs-procbased-ctls2]\n" " [--get-vmcs-entry-interruption-info]\n" " [--set-vmcs-entry-interruption-info=]\n" " [--get-vmcs-guest-physical-address\n" " [--get-vmcs-guest-linear-address\n" " [--get-vmcs-host-pat]\n" " [--get-vmcs-host-cr0]\n" " [--get-vmcs-host-cr3]\n" " [--get-vmcs-host-cr4]\n" " [--get-vmcs-host-rip]\n" " [--get-vmcs-host-rsp]\n" " [--get-vmcs-cr0-mask]\n" " [--get-vmcs-cr0-shadow]\n" " [--get-vmcs-cr4-mask]\n" " [--get-vmcs-cr4-shadow]\n" " [--get-vmcs-cr3-targets]\n" " [--get-vmcs-apic-access-address]\n" " [--get-vmcs-virtual-apic-address]\n" " [--get-vmcs-tpr-threshold]\n" " [--get-vmcs-vpid]\n" " [--get-vmcs-instruction-error]\n" " [--get-vmcs-exit-ctls]\n" " [--get-vmcs-entry-ctls]\n" " [--get-vmcs-link]\n" " [--get-vmcs-exit-qualification]\n" " [--get-vmcs-exit-interruption-info]\n" " [--get-vmcs-exit-interruption-error]\n" " [--get-vmcs-interruptibility]\n" ); } else { (void)fprintf(stderr, " [--get-vmcb-intercepts]\n" " [--get-vmcb-asid]\n" " [--get-vmcb-exit-details]\n" " [--get-vmcb-tlb-ctrl]\n" " [--get-vmcb-virq]\n" " [--get-avic-apic-bar]\n" " [--get-avic-backing-page]\n" " [--get-avic-table]\n" ); } exit(1); } static int get_rtc_time, set_rtc_time; static int get_rtc_nvram, set_rtc_nvram; static int rtc_nvram_offset; static uint8_t rtc_nvram_value; static time_t rtc_secs; static int get_stats, getcap, setcap, capval, get_gpa_pmap; static int inject_nmi, assert_lapic_lvt; static int force_reset, force_poweroff; static const char *capname; static int create, destroy, get_memmap, get_memseg; static int get_intinfo; static int get_active_cpus, get_suspended_cpus; static uint64_t memsize; static int set_cr0, get_cr0, set_cr2, get_cr2, set_cr3, get_cr3; static int set_cr4, get_cr4; static int set_efer, get_efer; static int set_dr0, get_dr0; static int set_dr1, get_dr1; static int set_dr2, get_dr2; static int set_dr3, get_dr3; static int set_dr6, get_dr6; static int set_dr7, get_dr7; static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags; static int set_rax, get_rax; static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp; static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15; static int set_desc_ds, get_desc_ds; static int set_desc_es, get_desc_es; static int set_desc_fs, get_desc_fs; static int set_desc_gs, get_desc_gs; static int set_desc_cs, get_desc_cs; static int set_desc_ss, get_desc_ss; static int set_desc_gdtr, get_desc_gdtr; static int set_desc_idtr, get_desc_idtr; static int set_desc_tr, get_desc_tr; static int set_desc_ldtr, get_desc_ldtr; static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; static int unassign_pptdev, bus, slot, func; static int run; static int get_cpu_topology; #ifdef BHYVE_SNAPSHOT static int vm_checkpoint_opt; static int vm_suspend_opt; #endif /* * VMCB specific. */ static int get_vmcb_intercept, get_vmcb_exit_details, get_vmcb_tlb_ctrl; static int get_vmcb_virq, get_avic_table; /* * VMCS-specific fields */ static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2; static int get_eptp, get_io_bitmap, get_tsc_offset; static int get_vmcs_entry_interruption_info; static int get_vmcs_interruptibility; uint32_t vmcs_entry_interruption_info; static int get_vmcs_gpa, get_vmcs_gla; static int get_exception_bitmap; static int get_cr0_mask, get_cr0_shadow; static int get_cr4_mask, get_cr4_shadow; static int get_cr3_targets; static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; static int get_msr_bitmap, get_msr_bitmap_address; static int get_vpid_asid; static int get_inst_err, get_exit_ctls, get_entry_ctls; static int get_host_cr0, get_host_cr3, get_host_cr4; static int get_host_rip, get_host_rsp; static int get_guest_pat, get_host_pat; static int get_guest_sysenter, get_vmcs_link; static int get_exit_reason, get_vmcs_exit_qualification; static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; static int get_vmcs_exit_inst_length; static uint64_t desc_base; static uint32_t desc_limit, desc_access; static int get_all; static void dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) { printf("vm exit[%d]\n", vcpu); printf("\trip\t\t0x%016lx\n", vmexit->rip); printf("\tinst_length\t%d\n", vmexit->inst_length); switch (vmexit->exitcode) { case VM_EXITCODE_INOUT: printf("\treason\t\tINOUT\n"); printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT"); printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes); printf("\tflags\t\t%s%s\n", vmexit->u.inout.string ? "STRING " : "", vmexit->u.inout.rep ? "REP " : ""); printf("\tport\t\t0x%04x\n", vmexit->u.inout.port); printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax); break; case VM_EXITCODE_VMX: printf("\treason\t\tVMX\n"); printf("\tstatus\t\t%d\n", vmexit->u.vmx.status); printf("\texit_reason\t0x%08x (%u)\n", vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason); printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); printf("\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); printf("\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); break; case VM_EXITCODE_SVM: printf("\treason\t\tSVM\n"); printf("\texit_reason\t\t%#lx\n", vmexit->u.svm.exitcode); printf("\texitinfo1\t\t%#lx\n", vmexit->u.svm.exitinfo1); printf("\texitinfo2\t\t%#lx\n", vmexit->u.svm.exitinfo2); break; default: printf("*** unknown vm run exitcode %d\n", vmexit->exitcode); break; } } /* AMD 6th generation and Intel compatible MSRs */ #define MSR_AMD6TH_START 0xC0000000 #define MSR_AMD6TH_END 0xC0001FFF /* AMD 7th and 8th generation compatible MSRs */ #define MSR_AMD7TH_START 0xC0010000 #define MSR_AMD7TH_END 0xC0011FFF static const char * msr_name(uint32_t msr) { static char buf[32]; switch(msr) { case MSR_TSC: return ("MSR_TSC"); case MSR_EFER: return ("MSR_EFER"); case MSR_STAR: return ("MSR_STAR"); case MSR_LSTAR: return ("MSR_LSTAR"); case MSR_CSTAR: return ("MSR_CSTAR"); case MSR_SF_MASK: return ("MSR_SF_MASK"); case MSR_FSBASE: return ("MSR_FSBASE"); case MSR_GSBASE: return ("MSR_GSBASE"); case MSR_KGSBASE: return ("MSR_KGSBASE"); case MSR_SYSENTER_CS_MSR: return ("MSR_SYSENTER_CS_MSR"); case MSR_SYSENTER_ESP_MSR: return ("MSR_SYSENTER_ESP_MSR"); case MSR_SYSENTER_EIP_MSR: return ("MSR_SYSENTER_EIP_MSR"); case MSR_PAT: return ("MSR_PAT"); } snprintf(buf, sizeof(buf), "MSR %#08x", msr); return (buf); } static inline void print_msr_pm(uint64_t msr, int vcpu, int readable, int writeable) { if (readable || writeable) { printf("%-20s[%d]\t\t%c%c\n", msr_name(msr), vcpu, readable ? 'R' : '-', writeable ? 'W' : '-'); } } /* * Reference APM vol2, section 15.11 MSR Intercepts. */ static void dump_amd_msr_pm(const char *bitmap, int vcpu) { int byte, bit, readable, writeable; uint32_t msr; for (msr = 0; msr < 0x2000; msr++) { byte = msr / 4; bit = (msr % 4) * 2; /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; print_msr_pm(msr, vcpu, readable, writeable); /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ byte += 2048; readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, writeable); /* MSR 0xC0010000 to 0xC0011FF is only for AMD */ byte += 4096; readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; print_msr_pm(msr + MSR_AMD7TH_START, vcpu, readable, writeable); } } /* * Reference Intel SDM Vol3 Section 24.6.9 MSR-Bitmap Address */ static void dump_intel_msr_pm(const char *bitmap, int vcpu) { int byte, bit, readable, writeable; uint32_t msr; for (msr = 0; msr < 0x2000; msr++) { byte = msr / 8; bit = msr & 0x7; /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; print_msr_pm(msr, vcpu, readable, writeable); /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ byte += 1024; readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, writeable); } } static int dump_msr_bitmap(int vcpu, uint64_t addr, bool cpu_intel) { int error, fd, map_size; const char *bitmap; error = -1; bitmap = MAP_FAILED; fd = open("/dev/mem", O_RDONLY, 0); if (fd < 0) { perror("Couldn't open /dev/mem"); goto done; } if (cpu_intel) map_size = PAGE_SIZE; else map_size = 2 * PAGE_SIZE; bitmap = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, addr); if (bitmap == MAP_FAILED) { perror("mmap failed"); goto done; } if (cpu_intel) dump_intel_msr_pm(bitmap, vcpu); else dump_amd_msr_pm(bitmap, vcpu); error = 0; done: if (bitmap != MAP_FAILED) munmap((void *)bitmap, map_size); if (fd >= 0) close(fd); return (error); } static int vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) { return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); } static int vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, uint64_t *ret_val) { return (vm_get_register(ctx, vcpu, VMCB_ACCESS(off, bytes), ret_val)); } enum { VMNAME = 1000, /* avoid collision with return values from getopt */ VCPU, SET_MEM, SET_EFER, SET_CR0, SET_CR2, SET_CR3, SET_CR4, SET_DR0, SET_DR1, SET_DR2, SET_DR3, SET_DR6, SET_DR7, SET_RSP, SET_RIP, SET_RAX, SET_RFLAGS, DESC_BASE, DESC_LIMIT, DESC_ACCESS, SET_CS, SET_DS, SET_ES, SET_FS, SET_GS, SET_SS, SET_TR, SET_LDTR, SET_X2APIC_STATE, SET_CAP, CAPNAME, UNASSIGN_PPTDEV, GET_GPA_PMAP, ASSERT_LAPIC_LVT, SET_RTC_TIME, SET_RTC_NVRAM, RTC_NVRAM_OFFSET, #ifdef BHYVE_SNAPSHOT SET_CHECKPOINT_FILE, SET_SUSPEND_FILE, #endif }; static void print_cpus(const char *banner, const cpuset_t *cpus) { int i, first; first = 1; printf("%s:\t", banner); if (!CPU_EMPTY(cpus)) { for (i = 0; i < CPU_SETSIZE; i++) { if (CPU_ISSET(i, cpus)) { printf("%s%d", first ? " " : ", ", i); first = 0; } } } else printf(" (none)"); printf("\n"); } static void print_intinfo(const char *banner, uint64_t info) { int type; printf("%s:\t", banner); if (info & VM_INTINFO_VALID) { type = info & VM_INTINFO_TYPE; switch (type) { case VM_INTINFO_HWINTR: printf("extint"); break; case VM_INTINFO_NMI: printf("nmi"); break; case VM_INTINFO_SWINTR: printf("swint"); break; default: printf("exception"); break; } printf(" vector %d", (int)VM_INTINFO_VECTOR(info)); if (info & VM_INTINFO_DEL_ERRCODE) printf(" errcode %#x", (u_int)(info >> 32)); } else { printf("n/a"); } printf("\n"); } static bool cpu_vendor_intel(void) { u_int regs[4]; char cpu_vendor[13]; do_cpuid(0, regs); ((u_int *)&cpu_vendor)[0] = regs[1]; ((u_int *)&cpu_vendor)[1] = regs[3]; ((u_int *)&cpu_vendor)[2] = regs[2]; cpu_vendor[12] = '\0'; if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { return (false); } else if (strcmp(cpu_vendor, "HygonGenuine") == 0) { return (false); } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { return (true); } else { fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor); exit(1); } } static int get_all_registers(struct vmctx *ctx, int vcpu) { uint64_t cr0, cr2, cr3, cr4, dr0, dr1, dr2, dr3, dr6, dr7; uint64_t rsp, rip, rflags, efer; uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; uint64_t r8, r9, r10, r11, r12, r13, r14, r15; int error = 0; if (!error && (get_efer || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer); if (error == 0) printf("efer[%d]\t\t0x%016lx\n", vcpu, efer); } if (!error && (get_cr0 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0); if (error == 0) printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } if (!error && (get_cr2 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR2, &cr2); if (error == 0) printf("cr2[%d]\t\t0x%016lx\n", vcpu, cr2); } if (!error && (get_cr3 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3); if (error == 0) printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } if (!error && (get_cr4 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4); if (error == 0) printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } if (!error && (get_dr0 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR0, &dr0); if (error == 0) printf("dr0[%d]\t\t0x%016lx\n", vcpu, dr0); } if (!error && (get_dr1 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR1, &dr1); if (error == 0) printf("dr1[%d]\t\t0x%016lx\n", vcpu, dr1); } if (!error && (get_dr2 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR2, &dr2); if (error == 0) printf("dr2[%d]\t\t0x%016lx\n", vcpu, dr2); } if (!error && (get_dr3 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR3, &dr3); if (error == 0) printf("dr3[%d]\t\t0x%016lx\n", vcpu, dr3); } if (!error && (get_dr6 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR6, &dr6); if (error == 0) printf("dr6[%d]\t\t0x%016lx\n", vcpu, dr6); } if (!error && (get_dr7 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); if (error == 0) printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7); } if (!error && (get_rsp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp); if (error == 0) printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } if (!error && (get_rip || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); if (error == 0) printf("rip[%d]\t\t0x%016lx\n", vcpu, rip); } if (!error && (get_rax || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax); if (error == 0) printf("rax[%d]\t\t0x%016lx\n", vcpu, rax); } if (!error && (get_rbx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx); if (error == 0) printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx); } if (!error && (get_rcx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx); if (error == 0) printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx); } if (!error && (get_rdx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx); if (error == 0) printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx); } if (!error && (get_rsi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi); if (error == 0) printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi); } if (!error && (get_rdi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi); if (error == 0) printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi); } if (!error && (get_rbp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp); if (error == 0) printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp); } if (!error && (get_r8 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8); if (error == 0) printf("r8[%d]\t\t0x%016lx\n", vcpu, r8); } if (!error && (get_r9 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9); if (error == 0) printf("r9[%d]\t\t0x%016lx\n", vcpu, r9); } if (!error && (get_r10 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10); if (error == 0) printf("r10[%d]\t\t0x%016lx\n", vcpu, r10); } if (!error && (get_r11 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11); if (error == 0) printf("r11[%d]\t\t0x%016lx\n", vcpu, r11); } if (!error && (get_r12 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12); if (error == 0) printf("r12[%d]\t\t0x%016lx\n", vcpu, r12); } if (!error && (get_r13 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13); if (error == 0) printf("r13[%d]\t\t0x%016lx\n", vcpu, r13); } if (!error && (get_r14 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14); if (error == 0) printf("r14[%d]\t\t0x%016lx\n", vcpu, r14); } if (!error && (get_r15 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15); if (error == 0) printf("r15[%d]\t\t0x%016lx\n", vcpu, r15); } if (!error && (get_rflags || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error == 0) printf("rflags[%d]\t0x%016lx\n", vcpu, rflags); } return (error); } static int get_all_segments(struct vmctx *ctx, int vcpu) { uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; int error = 0; if (!error && (get_desc_ds || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_es || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_fs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_gs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_ss || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_cs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_tr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_ldtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_gdtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("gdtr[%d]\t\t0x%016lx/0x%08x\n", vcpu, desc_base, desc_limit); } } if (!error && (get_desc_idtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("idtr[%d]\t\t0x%016lx/0x%08x\n", vcpu, desc_base, desc_limit); } } if (!error && (get_cs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs); if (error == 0) printf("cs[%d]\t\t0x%04lx\n", vcpu, cs); } if (!error && (get_ds || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds); if (error == 0) printf("ds[%d]\t\t0x%04lx\n", vcpu, ds); } if (!error && (get_es || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es); if (error == 0) printf("es[%d]\t\t0x%04lx\n", vcpu, es); } if (!error && (get_fs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs); if (error == 0) printf("fs[%d]\t\t0x%04lx\n", vcpu, fs); } if (!error && (get_gs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs); if (error == 0) printf("gs[%d]\t\t0x%04lx\n", vcpu, gs); } if (!error && (get_ss || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss); if (error == 0) printf("ss[%d]\t\t0x%04lx\n", vcpu, ss); } if (!error && (get_tr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr); if (error == 0) printf("tr[%d]\t\t0x%04lx\n", vcpu, tr); } if (!error && (get_ldtr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr); if (error == 0) printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); } return (error); } static int get_misc_vmcs(struct vmctx *ctx, int vcpu) { uint64_t ctl, cr0, cr3, cr4, rsp, rip, pat, addr, u64; int error = 0; if (!error && (get_cr0_mask || get_all)) { uint64_t cr0mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask); if (error == 0) printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask); } if (!error && (get_cr0_shadow || get_all)) { uint64_t cr0shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW, &cr0shadow); if (error == 0) printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow); } if (!error && (get_cr4_mask || get_all)) { uint64_t cr4mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask); if (error == 0) printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask); } if (!error && (get_cr4_shadow || get_all)) { uint64_t cr4shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW, &cr4shadow); if (error == 0) printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); } if (!error && (get_cr3_targets || get_all)) { uint64_t target_count, target_addr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, &target_count); if (error == 0) { printf("cr3_target_count[%d]\t0x%016lx\n", vcpu, target_count); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0, &target_addr); if (error == 0) { printf("cr3_target0[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1, &target_addr); if (error == 0) { printf("cr3_target1[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2, &target_addr); if (error == 0) { printf("cr3_target2[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3, &target_addr); if (error == 0) { printf("cr3_target3[%d]\t\t0x%016lx\n", vcpu, target_addr); } } if (!error && (get_pinbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); if (error == 0) printf("pinbased_ctls[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_procbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PRI_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_procbased_ctls2 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_SEC_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls2[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_vmcs_gla || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_LINEAR_ADDRESS, &u64); if (error == 0) printf("gla[%d]\t\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_gpa || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_PHYSICAL_ADDRESS, &u64); if (error == 0) printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_entry_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); if (error == 0) { printf("entry_interruption_info[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_tpr_threshold || get_all)) { uint64_t threshold; error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD, &threshold); if (error == 0) printf("tpr_threshold[%d]\t0x%016lx\n", vcpu, threshold); } if (!error && (get_inst_err || get_all)) { uint64_t insterr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR, &insterr); if (error == 0) { printf("instruction_error[%d]\t0x%016lx\n", vcpu, insterr); } } if (!error && (get_exit_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl); if (error == 0) printf("exit_ctls[%d]\t\t0x%016lx\n", vcpu, ctl); } if (!error && (get_entry_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl); if (error == 0) printf("entry_ctls[%d]\t\t0x%016lx\n", vcpu, ctl); } if (!error && (get_host_pat || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat); if (error == 0) printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat); } if (!error && (get_host_cr0 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0); if (error == 0) printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } if (!error && (get_host_cr3 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3); if (error == 0) printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } if (!error && (get_host_cr4 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4); if (error == 0) printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } if (!error && (get_host_rip || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip); if (error == 0) printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip); } if (!error && (get_host_rsp || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); if (error == 0) printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } if (!error && (get_vmcs_link || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr); if (error == 0) printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_vmcs_exit_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_INFO, &u64); if (error == 0) { printf("vmcs_exit_interruption_info[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_vmcs_exit_interruption_error || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_ERRCODE, &u64); if (error == 0) { printf("vmcs_exit_interruption_error[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_vmcs_interruptibility || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_INTERRUPTIBILITY, &u64); if (error == 0) { printf("vmcs_guest_interruptibility[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_vmcs_exit_inst_length || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INSTRUCTION_LENGTH, &u64); if (error == 0) printf("vmcs_exit_inst_length[%d]\t0x%08x\n", vcpu, (uint32_t)u64); } if (!error && (get_vmcs_exit_qualification || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION, &u64); if (error == 0) printf("vmcs_exit_qualification[%d]\t0x%016lx\n", vcpu, u64); } return (error); } static int get_misc_vmcb(struct vmctx *ctx, int vcpu) { uint64_t ctl, addr; int error = 0; if (!error && (get_vmcb_intercept || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_CR_INTERCEPT, 4, &ctl); if (error == 0) printf("cr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_DR_INTERCEPT, 4, &ctl); if (error == 0) printf("dr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXC_INTERCEPT, 4, &ctl); if (error == 0) printf("exc_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST1_INTERCEPT, 4, &ctl); if (error == 0) printf("inst1_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST2_INTERCEPT, 4, &ctl); if (error == 0) printf("inst2_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); } if (!error && (get_vmcb_tlb_ctrl || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_TLB_CTRL, 4, &ctl); if (error == 0) printf("TLB ctrl[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_vmcb_exit_details || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO1, 8, &ctl); if (error == 0) printf("exitinfo1[%d]\t0x%016lx\n", vcpu, ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO2, 8, &ctl); if (error == 0) printf("exitinfo2[%d]\t0x%016lx\n", vcpu, ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINTINFO, 8, &ctl); if (error == 0) printf("exitintinfo[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_vmcb_virq || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_VIRQ, 8, &ctl); if (error == 0) printf("v_irq/tpr[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_apic_access_addr || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_BAR, 8, &addr); if (error == 0) printf("AVIC apic_bar[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_virtual_apic_addr || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PAGE, 8, &addr); if (error == 0) printf("AVIC backing page[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_avic_table || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_LT, 8, &addr); if (error == 0) printf("AVIC logical table[%d]\t0x%016lx\n", vcpu, addr); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PT, 8, &addr); if (error == 0) printf("AVIC physical table[%d]\t0x%016lx\n", vcpu, addr); } return (error); } static struct option * setup_options(bool cpu_intel) { const struct option common_opts[] = { { "vm", REQ_ARG, 0, VMNAME }, { "cpu", REQ_ARG, 0, VCPU }, { "set-mem", REQ_ARG, 0, SET_MEM }, { "set-efer", REQ_ARG, 0, SET_EFER }, { "set-cr0", REQ_ARG, 0, SET_CR0 }, { "set-cr2", REQ_ARG, 0, SET_CR2 }, { "set-cr3", REQ_ARG, 0, SET_CR3 }, { "set-cr4", REQ_ARG, 0, SET_CR4 }, { "set-dr0", REQ_ARG, 0, SET_DR0 }, { "set-dr1", REQ_ARG, 0, SET_DR1 }, { "set-dr2", REQ_ARG, 0, SET_DR2 }, { "set-dr3", REQ_ARG, 0, SET_DR3 }, { "set-dr6", REQ_ARG, 0, SET_DR6 }, { "set-dr7", REQ_ARG, 0, SET_DR7 }, { "set-rsp", REQ_ARG, 0, SET_RSP }, { "set-rip", REQ_ARG, 0, SET_RIP }, { "set-rax", REQ_ARG, 0, SET_RAX }, { "set-rflags", REQ_ARG, 0, SET_RFLAGS }, { "desc-base", REQ_ARG, 0, DESC_BASE }, { "desc-limit", REQ_ARG, 0, DESC_LIMIT }, { "desc-access",REQ_ARG, 0, DESC_ACCESS }, { "set-cs", REQ_ARG, 0, SET_CS }, { "set-ds", REQ_ARG, 0, SET_DS }, { "set-es", REQ_ARG, 0, SET_ES }, { "set-fs", REQ_ARG, 0, SET_FS }, { "set-gs", REQ_ARG, 0, SET_GS }, { "set-ss", REQ_ARG, 0, SET_SS }, { "set-tr", REQ_ARG, 0, SET_TR }, { "set-ldtr", REQ_ARG, 0, SET_LDTR }, { "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE }, { "capname", REQ_ARG, 0, CAPNAME }, { "unassign-pptdev", REQ_ARG, 0, UNASSIGN_PPTDEV }, { "setcap", REQ_ARG, 0, SET_CAP }, { "get-gpa-pmap", REQ_ARG, 0, GET_GPA_PMAP }, { "assert-lapic-lvt", REQ_ARG, 0, ASSERT_LAPIC_LVT }, { "get-rtc-time", NO_ARG, &get_rtc_time, 1 }, { "set-rtc-time", REQ_ARG, 0, SET_RTC_TIME }, { "rtc-nvram-offset", REQ_ARG, 0, RTC_NVRAM_OFFSET }, { "get-rtc-nvram", NO_ARG, &get_rtc_nvram, 1 }, { "set-rtc-nvram", REQ_ARG, 0, SET_RTC_NVRAM }, { "getcap", NO_ARG, &getcap, 1 }, { "get-stats", NO_ARG, &get_stats, 1 }, { "get-desc-ds",NO_ARG, &get_desc_ds, 1 }, { "set-desc-ds",NO_ARG, &set_desc_ds, 1 }, { "get-desc-es",NO_ARG, &get_desc_es, 1 }, { "set-desc-es",NO_ARG, &set_desc_es, 1 }, { "get-desc-ss",NO_ARG, &get_desc_ss, 1 }, { "set-desc-ss",NO_ARG, &set_desc_ss, 1 }, { "get-desc-cs",NO_ARG, &get_desc_cs, 1 }, { "set-desc-cs",NO_ARG, &set_desc_cs, 1 }, { "get-desc-fs",NO_ARG, &get_desc_fs, 1 }, { "set-desc-fs",NO_ARG, &set_desc_fs, 1 }, { "get-desc-gs",NO_ARG, &get_desc_gs, 1 }, { "set-desc-gs",NO_ARG, &set_desc_gs, 1 }, { "get-desc-tr",NO_ARG, &get_desc_tr, 1 }, { "set-desc-tr",NO_ARG, &set_desc_tr, 1 }, { "set-desc-ldtr", NO_ARG, &set_desc_ldtr, 1 }, { "get-desc-ldtr", NO_ARG, &get_desc_ldtr, 1 }, { "set-desc-gdtr", NO_ARG, &set_desc_gdtr, 1 }, { "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 }, { "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 }, { "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 }, { "get-memmap", NO_ARG, &get_memmap, 1 }, { "get-memseg", NO_ARG, &get_memseg, 1 }, { "get-efer", NO_ARG, &get_efer, 1 }, { "get-cr0", NO_ARG, &get_cr0, 1 }, { "get-cr2", NO_ARG, &get_cr2, 1 }, { "get-cr3", NO_ARG, &get_cr3, 1 }, { "get-cr4", NO_ARG, &get_cr4, 1 }, { "get-dr0", NO_ARG, &get_dr0, 1 }, { "get-dr1", NO_ARG, &get_dr1, 1 }, { "get-dr2", NO_ARG, &get_dr2, 1 }, { "get-dr3", NO_ARG, &get_dr3, 1 }, { "get-dr6", NO_ARG, &get_dr6, 1 }, { "get-dr7", NO_ARG, &get_dr7, 1 }, { "get-rsp", NO_ARG, &get_rsp, 1 }, { "get-rip", NO_ARG, &get_rip, 1 }, { "get-rax", NO_ARG, &get_rax, 1 }, { "get-rbx", NO_ARG, &get_rbx, 1 }, { "get-rcx", NO_ARG, &get_rcx, 1 }, { "get-rdx", NO_ARG, &get_rdx, 1 }, { "get-rsi", NO_ARG, &get_rsi, 1 }, { "get-rdi", NO_ARG, &get_rdi, 1 }, { "get-rbp", NO_ARG, &get_rbp, 1 }, { "get-r8", NO_ARG, &get_r8, 1 }, { "get-r9", NO_ARG, &get_r9, 1 }, { "get-r10", NO_ARG, &get_r10, 1 }, { "get-r11", NO_ARG, &get_r11, 1 }, { "get-r12", NO_ARG, &get_r12, 1 }, { "get-r13", NO_ARG, &get_r13, 1 }, { "get-r14", NO_ARG, &get_r14, 1 }, { "get-r15", NO_ARG, &get_r15, 1 }, { "get-rflags", NO_ARG, &get_rflags, 1 }, { "get-cs", NO_ARG, &get_cs, 1 }, { "get-ds", NO_ARG, &get_ds, 1 }, { "get-es", NO_ARG, &get_es, 1 }, { "get-fs", NO_ARG, &get_fs, 1 }, { "get-gs", NO_ARG, &get_gs, 1 }, { "get-ss", NO_ARG, &get_ss, 1 }, { "get-tr", NO_ARG, &get_tr, 1 }, { "get-ldtr", NO_ARG, &get_ldtr, 1 }, { "get-eptp", NO_ARG, &get_eptp, 1 }, { "get-exception-bitmap", NO_ARG, &get_exception_bitmap, 1 }, { "get-io-bitmap-address", NO_ARG, &get_io_bitmap, 1 }, { "get-tsc-offset", NO_ARG, &get_tsc_offset, 1 }, { "get-msr-bitmap", NO_ARG, &get_msr_bitmap, 1 }, { "get-msr-bitmap-address", NO_ARG, &get_msr_bitmap_address, 1 }, { "get-guest-pat", NO_ARG, &get_guest_pat, 1 }, { "get-guest-sysenter", NO_ARG, &get_guest_sysenter, 1 }, { "get-exit-reason", NO_ARG, &get_exit_reason, 1 }, { "get-x2apic-state", NO_ARG, &get_x2apic_state, 1 }, { "get-all", NO_ARG, &get_all, 1 }, { "run", NO_ARG, &run, 1 }, { "create", NO_ARG, &create, 1 }, { "destroy", NO_ARG, &destroy, 1 }, { "inject-nmi", NO_ARG, &inject_nmi, 1 }, { "force-reset", NO_ARG, &force_reset, 1 }, { "force-poweroff", NO_ARG, &force_poweroff, 1 }, { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, { "get-intinfo", NO_ARG, &get_intinfo, 1 }, { "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 }, #ifdef BHYVE_SNAPSHOT { "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE}, { "suspend", REQ_ARG, 0, SET_SUSPEND_FILE}, #endif }; const struct option intel_opts[] = { { "get-vmcs-pinbased-ctls", NO_ARG, &get_pinbased_ctls, 1 }, { "get-vmcs-procbased-ctls", NO_ARG, &get_procbased_ctls, 1 }, { "get-vmcs-procbased-ctls2", NO_ARG, &get_procbased_ctls2, 1 }, { "get-vmcs-guest-linear-address", NO_ARG, &get_vmcs_gla, 1 }, { "get-vmcs-guest-physical-address", NO_ARG, &get_vmcs_gpa, 1 }, { "get-vmcs-entry-interruption-info", NO_ARG, &get_vmcs_entry_interruption_info, 1}, { "get-vmcs-cr0-mask", NO_ARG, &get_cr0_mask, 1 }, { "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 }, { "get-vmcs-cr4-mask", NO_ARG, &get_cr4_mask, 1 }, { "get-vmcs-cr4-shadow", NO_ARG, &get_cr4_shadow, 1 }, { "get-vmcs-cr3-targets", NO_ARG, &get_cr3_targets, 1 }, { "get-vmcs-tpr-threshold", NO_ARG, &get_tpr_threshold, 1 }, { "get-vmcs-vpid", NO_ARG, &get_vpid_asid, 1 }, { "get-vmcs-exit-ctls", NO_ARG, &get_exit_ctls, 1 }, { "get-vmcs-entry-ctls", NO_ARG, &get_entry_ctls, 1 }, { "get-vmcs-instruction-error", NO_ARG, &get_inst_err, 1 }, { "get-vmcs-host-pat", NO_ARG, &get_host_pat, 1 }, { "get-vmcs-host-cr0", NO_ARG, &get_host_cr0, 1 }, { "get-vmcs-exit-qualification", NO_ARG, &get_vmcs_exit_qualification, 1 }, { "get-vmcs-exit-inst-length", NO_ARG, &get_vmcs_exit_inst_length, 1 }, { "get-vmcs-interruptibility", NO_ARG, &get_vmcs_interruptibility, 1 }, { "get-vmcs-exit-interruption-error", NO_ARG, &get_vmcs_exit_interruption_error, 1 }, { "get-vmcs-exit-interruption-info", NO_ARG, &get_vmcs_exit_interruption_info, 1 }, { "get-vmcs-link", NO_ARG, &get_vmcs_link, 1 }, { "get-vmcs-host-cr3", NO_ARG, &get_host_cr3, 1 }, { "get-vmcs-host-cr4", NO_ARG, &get_host_cr4, 1 }, { "get-vmcs-host-rip", NO_ARG, &get_host_rip, 1 }, { "get-vmcs-host-rsp", NO_ARG, &get_host_rsp, 1 }, { "get-apic-access-address", NO_ARG, &get_apic_access_addr, 1}, { "get-virtual-apic-address", NO_ARG, &get_virtual_apic_addr, 1} }; const struct option amd_opts[] = { { "get-vmcb-intercepts", NO_ARG, &get_vmcb_intercept, 1 }, { "get-vmcb-asid", NO_ARG, &get_vpid_asid, 1 }, { "get-vmcb-exit-details", NO_ARG, &get_vmcb_exit_details, 1 }, { "get-vmcb-tlb-ctrl", NO_ARG, &get_vmcb_tlb_ctrl, 1 }, { "get-vmcb-virq", NO_ARG, &get_vmcb_virq, 1 }, { "get-avic-apic-bar", NO_ARG, &get_apic_access_addr, 1 }, { "get-avic-backing-page", NO_ARG, &get_virtual_apic_addr, 1 }, { "get-avic-table", NO_ARG, &get_avic_table, 1 } }; const struct option null_opt = { NULL, 0, NULL, 0 }; struct option *all_opts; char *cp; int optlen; optlen = sizeof(common_opts); if (cpu_intel) optlen += sizeof(intel_opts); else optlen += sizeof(amd_opts); optlen += sizeof(null_opt); all_opts = malloc(optlen); cp = (char *)all_opts; memcpy(cp, common_opts, sizeof(common_opts)); cp += sizeof(common_opts); if (cpu_intel) { memcpy(cp, intel_opts, sizeof(intel_opts)); cp += sizeof(intel_opts); } else { memcpy(cp, amd_opts, sizeof(amd_opts)); cp += sizeof(amd_opts); } memcpy(cp, &null_opt, sizeof(null_opt)); cp += sizeof(null_opt); return (all_opts); } static const char * wday_str(int idx) { static const char *weekdays[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; if (idx >= 0 && idx < 7) return (weekdays[idx]); else return ("UNK"); } static const char * mon_str(int idx) { static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; if (idx >= 0 && idx < 12) return (months[idx]); else return ("UNK"); } static int show_memmap(struct vmctx *ctx) { char name[SPECNAMELEN + 1], numbuf[8]; vm_ooffset_t segoff; vm_paddr_t gpa; size_t maplen, seglen; int error, flags, prot, segid, delim; printf("Address Length Segment Offset "); printf("Prot Flags\n"); gpa = 0; while (1) { error = vm_mmap_getnext(ctx, &gpa, &segid, &segoff, &maplen, &prot, &flags); if (error) return (errno == ENOENT ? 0 : error); error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name)); if (error) return (error); printf("%-12lX", gpa); humanize_number(numbuf, sizeof(numbuf), maplen, "B", HN_AUTOSCALE, HN_NOSPACE); printf("%-12s", numbuf); printf("%-12s", name[0] ? name : "sysmem"); printf("%-12lX", segoff); printf("%c%c%c ", prot & PROT_READ ? 'R' : '-', prot & PROT_WRITE ? 'W' : '-', prot & PROT_EXEC ? 'X' : '-'); delim = '\0'; if (flags & VM_MEMMAP_F_WIRED) { printf("%cwired", delim); delim = '/'; } if (flags & VM_MEMMAP_F_IOMMU) { printf("%ciommu", delim); delim = '/'; } printf("\n"); gpa += maplen; } } static int show_memseg(struct vmctx *ctx) { char name[SPECNAMELEN + 1], numbuf[8]; size_t seglen; int error, segid; printf("ID Length Name\n"); segid = 0; while (1) { error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name)); if (error) return (errno == EINVAL ? 0 : error); if (seglen) { printf("%-4d", segid); humanize_number(numbuf, sizeof(numbuf), seglen, "B", HN_AUTOSCALE, HN_NOSPACE); printf("%-12s", numbuf); printf("%s", name[0] ? name : "sysmem"); printf("\n"); } segid++; } } #ifdef BHYVE_SNAPSHOT static int send_checkpoint_op_req(struct vmctx *ctx, struct checkpoint_op *op) { struct sockaddr_un addr; int socket_fd, len, len_sent, total_sent; int err = 0; char vmname_buf[MAX_VMNAME]; socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); if (socket_fd < 0) { perror("Error creating bhyvectl socket"); err = -1; goto done; } memset(&addr, 0, sizeof(struct sockaddr_un)); addr.sun_family = AF_UNIX; err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); if (err != 0) { perror("Failed to get VM name"); goto done; } - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", CHECKPOINT_RUN_DIR, vmname_buf); + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s%s", BHYVE_RUN_DIR, vmname_buf); if (connect(socket_fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)) != 0) { perror("Connect to VM socket failed"); err = -1; goto done; } len = sizeof(*op); total_sent = 0; while ((len_sent = send(socket_fd, (char *)op + total_sent, len - total_sent, 0)) > 0) { total_sent += len_sent; } if (len_sent < 0) { perror("Failed to send checkpoint operation request"); err = -1; } done: if (socket_fd > 0) close(socket_fd); return (err); } static int send_start_checkpoint(struct vmctx *ctx, const char *checkpoint_file) { struct checkpoint_op op; op.op = START_CHECKPOINT; strncpy(op.snapshot_filename, checkpoint_file, MAX_SNAPSHOT_VMNAME); op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0; return (send_checkpoint_op_req(ctx, &op)); } static int send_start_suspend(struct vmctx *ctx, const char *suspend_file) { struct checkpoint_op op; op.op = START_SUSPEND; strncpy(op.snapshot_filename, suspend_file, MAX_SNAPSHOT_VMNAME); op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0; return (send_checkpoint_op_req(ctx, &op)); } #endif int main(int argc, char *argv[]) { char *vmname; int error, ch, vcpu, ptenum; vm_paddr_t gpa_pmap; struct vm_exit vmexit; uint64_t rax, cr0, cr2, cr3, cr4, dr0, dr1, dr2, dr3, dr6, dr7; uint64_t rsp, rip, rflags, efer, pat; uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2]; struct vmctx *ctx; cpuset_t cpus; bool cpu_intel; uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; struct tm tm; struct option *opts; #ifdef BHYVE_SNAPSHOT char *checkpoint_file, *suspend_file; #endif cpu_intel = cpu_vendor_intel(); opts = setup_options(cpu_intel); vcpu = 0; vmname = NULL; assert_lapic_lvt = -1; progname = basename(argv[0]); while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) { switch (ch) { case 0: break; case VMNAME: vmname = optarg; break; case VCPU: vcpu = atoi(optarg); break; case SET_MEM: memsize = atoi(optarg) * MB; memsize = roundup(memsize, 2 * MB); break; case SET_EFER: efer = strtoul(optarg, NULL, 0); set_efer = 1; break; case SET_CR0: cr0 = strtoul(optarg, NULL, 0); set_cr0 = 1; break; case SET_CR2: cr2 = strtoul(optarg, NULL, 0); set_cr2 = 1; break; case SET_CR3: cr3 = strtoul(optarg, NULL, 0); set_cr3 = 1; break; case SET_CR4: cr4 = strtoul(optarg, NULL, 0); set_cr4 = 1; break; case SET_DR0: dr0 = strtoul(optarg, NULL, 0); set_dr0 = 1; break; case SET_DR1: dr1 = strtoul(optarg, NULL, 0); set_dr1 = 1; break; case SET_DR2: dr2 = strtoul(optarg, NULL, 0); set_dr2 = 1; break; case SET_DR3: dr3 = strtoul(optarg, NULL, 0); set_dr3 = 1; break; case SET_DR6: dr6 = strtoul(optarg, NULL, 0); set_dr6 = 1; break; case SET_DR7: dr7 = strtoul(optarg, NULL, 0); set_dr7 = 1; break; case SET_RSP: rsp = strtoul(optarg, NULL, 0); set_rsp = 1; break; case SET_RIP: rip = strtoul(optarg, NULL, 0); set_rip = 1; break; case SET_RAX: rax = strtoul(optarg, NULL, 0); set_rax = 1; break; case SET_RFLAGS: rflags = strtoul(optarg, NULL, 0); set_rflags = 1; break; case DESC_BASE: desc_base = strtoul(optarg, NULL, 0); break; case DESC_LIMIT: desc_limit = strtoul(optarg, NULL, 0); break; case DESC_ACCESS: desc_access = strtoul(optarg, NULL, 0); break; case SET_CS: cs = strtoul(optarg, NULL, 0); set_cs = 1; break; case SET_DS: ds = strtoul(optarg, NULL, 0); set_ds = 1; break; case SET_ES: es = strtoul(optarg, NULL, 0); set_es = 1; break; case SET_FS: fs = strtoul(optarg, NULL, 0); set_fs = 1; break; case SET_GS: gs = strtoul(optarg, NULL, 0); set_gs = 1; break; case SET_SS: ss = strtoul(optarg, NULL, 0); set_ss = 1; break; case SET_TR: tr = strtoul(optarg, NULL, 0); set_tr = 1; break; case SET_LDTR: ldtr = strtoul(optarg, NULL, 0); set_ldtr = 1; break; case SET_X2APIC_STATE: x2apic_state = strtol(optarg, NULL, 0); set_x2apic_state = 1; break; case SET_CAP: capval = strtoul(optarg, NULL, 0); setcap = 1; break; case SET_RTC_TIME: rtc_secs = strtoul(optarg, NULL, 0); set_rtc_time = 1; break; case SET_RTC_NVRAM: rtc_nvram_value = (uint8_t)strtoul(optarg, NULL, 0); set_rtc_nvram = 1; break; case RTC_NVRAM_OFFSET: rtc_nvram_offset = strtoul(optarg, NULL, 0); break; case GET_GPA_PMAP: gpa_pmap = strtoul(optarg, NULL, 0); get_gpa_pmap = 1; break; case CAPNAME: capname = optarg; break; case UNASSIGN_PPTDEV: unassign_pptdev = 1; if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3) usage(cpu_intel); break; case ASSERT_LAPIC_LVT: assert_lapic_lvt = atoi(optarg); break; #ifdef BHYVE_SNAPSHOT case SET_CHECKPOINT_FILE: vm_checkpoint_opt = 1; checkpoint_file = optarg; break; case SET_SUSPEND_FILE: vm_suspend_opt = 1; suspend_file = optarg; break; #endif default: usage(cpu_intel); } } argc -= optind; argv += optind; if (vmname == NULL) usage(cpu_intel); error = 0; if (!error && create) error = vm_create(vmname); if (!error) { ctx = vm_open(vmname); if (ctx == NULL) { printf("VM:%s is not created.\n", vmname); exit (1); } } if (!error && memsize) error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (!error && set_efer) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer); if (!error && set_cr0) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0); if (!error && set_cr2) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR2, cr2); if (!error && set_cr3) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3); if (!error && set_cr4) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4); if (!error && set_dr0) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR0, dr0); if (!error && set_dr1) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR1, dr1); if (!error && set_dr2) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR2, dr2); if (!error && set_dr3) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR3, dr3); if (!error && set_dr6) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR6, dr6); if (!error && set_dr7) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); if (!error && set_rsp) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp); if (!error && set_rip) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip); if (!error && set_rax) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax); if (!error && set_rflags) { error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); } if (!error && set_desc_ds) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, desc_base, desc_limit, desc_access); } if (!error && set_desc_es) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES, desc_base, desc_limit, desc_access); } if (!error && set_desc_ss) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, desc_base, desc_limit, desc_access); } if (!error && set_desc_cs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, desc_base, desc_limit, desc_access); } if (!error && set_desc_fs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS, desc_base, desc_limit, desc_access); } if (!error && set_desc_gs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS, desc_base, desc_limit, desc_access); } if (!error && set_desc_tr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, desc_base, desc_limit, desc_access); } if (!error && set_desc_ldtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR, desc_base, desc_limit, desc_access); } if (!error && set_desc_gdtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, desc_base, desc_limit, 0); } if (!error && set_desc_idtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, desc_base, desc_limit, 0); } if (!error && set_cs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs); if (!error && set_ds) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds); if (!error && set_es) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es); if (!error && set_fs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs); if (!error && set_gs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs); if (!error && set_ss) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss); if (!error && set_tr) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr); if (!error && set_ldtr) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr); if (!error && set_x2apic_state) error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); if (!error && unassign_pptdev) error = vm_unassign_pptdev(ctx, bus, slot, func); if (!error && inject_nmi) { error = vm_inject_nmi(ctx, vcpu); } if (!error && assert_lapic_lvt != -1) { error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt); } if (!error && (get_memseg || get_all)) error = show_memseg(ctx); if (!error && (get_memmap || get_all)) error = show_memmap(ctx); if (!error) error = get_all_registers(ctx, vcpu); if (!error) error = get_all_segments(ctx, vcpu); if (!error) { if (cpu_intel) error = get_misc_vmcs(ctx, vcpu); else error = get_misc_vmcb(ctx, vcpu); } if (!error && (get_x2apic_state || get_all)) { error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); if (error == 0) printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state); } if (!error && (get_eptp || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_NPT_BASE, 8, &eptp); if (error == 0) printf("%s[%d]\t\t0x%016lx\n", cpu_intel ? "eptp" : "rvi/npt", vcpu, eptp); } if (!error && (get_exception_bitmap || get_all)) { if(cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, &bm); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXC_INTERCEPT, 4, &bm); if (error == 0) printf("exception_bitmap[%d]\t%#lx\n", vcpu, bm); } if (!error && (get_io_bitmap || get_all)) { if (cpu_intel) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm); if (error == 0) printf("io_bitmap_a[%d]\t%#lx\n", vcpu, bm); error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B, &bm); if (error == 0) printf("io_bitmap_b[%d]\t%#lx\n", vcpu, bm); } else { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_IO_PERM, 8, &bm); if (error == 0) printf("io_bitmap[%d]\t%#lx\n", vcpu, bm); } } if (!error && (get_tsc_offset || get_all)) { uint64_t tscoff; if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_TSC_OFFSET, 8, &tscoff); if (error == 0) printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); } if (!error && (get_msr_bitmap_address || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_MSR_PERM, 8, &addr); if (error == 0) printf("msr_bitmap[%d]\t\t%#lx\n", vcpu, addr); } if (!error && (get_msr_bitmap || get_all)) { if (cpu_intel) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); } else { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_MSR_PERM, 8, &addr); } if (error == 0) error = dump_msr_bitmap(vcpu, addr, cpu_intel); } if (!error && (get_vpid_asid || get_all)) { uint64_t vpid; if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, 4, &vpid); if (error == 0) printf("%s[%d]\t\t0x%04lx\n", cpu_intel ? "vpid" : "asid", vcpu, vpid); } if (!error && (get_guest_pat || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_GUEST_PAT, 8, &pat); if (error == 0) printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); } if (!error && (get_guest_sysenter || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_CS, &cs); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_SYSENTER_CS, 8, &cs); if (error == 0) printf("guest_sysenter_cs[%d]\t%#lx\n", vcpu, cs); if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_ESP, &rsp); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_SYSENTER_ESP, 8, &rsp); if (error == 0) printf("guest_sysenter_sp[%d]\t%#lx\n", vcpu, rsp); if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_EIP, &rip); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_SYSENTER_EIP, 8, &rip); if (error == 0) printf("guest_sysenter_ip[%d]\t%#lx\n", vcpu, rip); } if (!error && (get_exit_reason || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXIT_REASON, 8, &u64); if (error == 0) printf("exit_reason[%d]\t%#lx\n", vcpu, u64); } if (!error && setcap) { int captype; captype = vm_capability_name2type(capname); error = vm_set_capability(ctx, vcpu, captype, capval); if (error != 0 && errno == ENOENT) printf("Capability \"%s\" is not available\n", capname); } if (!error && get_gpa_pmap) { error = vm_get_gpa_pmap(ctx, gpa_pmap, pteval, &ptenum); if (error == 0) { printf("gpa %#lx:", gpa_pmap); pte = &pteval[0]; while (ptenum-- > 0) printf(" %#lx", *pte++); printf("\n"); } } if (!error && set_rtc_nvram) error = vm_rtc_write(ctx, rtc_nvram_offset, rtc_nvram_value); if (!error && (get_rtc_nvram || get_all)) { error = vm_rtc_read(ctx, rtc_nvram_offset, &rtc_nvram_value); if (error == 0) { printf("rtc nvram[%03d]: 0x%02x\n", rtc_nvram_offset, rtc_nvram_value); } } if (!error && set_rtc_time) error = vm_rtc_settime(ctx, rtc_secs); if (!error && (get_rtc_time || get_all)) { error = vm_rtc_gettime(ctx, &rtc_secs); if (error == 0) { gmtime_r(&rtc_secs, &tm); printf("rtc time %#lx: %s %s %02d %02d:%02d:%02d %d\n", rtc_secs, wday_str(tm.tm_wday), mon_str(tm.tm_mon), tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 1900 + tm.tm_year); } } if (!error && (getcap || get_all)) { int captype, val, getcaptype; if (getcap && capname) getcaptype = vm_capability_name2type(capname); else getcaptype = -1; for (captype = 0; captype < VM_CAP_MAX; captype++) { if (getcaptype >= 0 && captype != getcaptype) continue; error = vm_get_capability(ctx, vcpu, captype, &val); if (error == 0) { printf("Capability \"%s\" is %s on vcpu %d\n", vm_capability_type2name(captype), val ? "set" : "not set", vcpu); } else if (errno == ENOENT) { error = 0; printf("Capability \"%s\" is not available\n", vm_capability_type2name(captype)); } else { break; } } } if (!error && (get_active_cpus || get_all)) { error = vm_active_cpus(ctx, &cpus); if (!error) print_cpus("active cpus", &cpus); } if (!error && (get_suspended_cpus || get_all)) { error = vm_suspended_cpus(ctx, &cpus); if (!error) print_cpus("suspended cpus", &cpus); } if (!error && (get_intinfo || get_all)) { error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]); if (!error) { print_intinfo("pending", info[0]); print_intinfo("current", info[1]); } } if (!error && (get_stats || get_all)) { int i, num_stats; uint64_t *stats; struct timeval tv; const char *desc; stats = vm_get_stats(ctx, vcpu, &tv, &num_stats); if (stats != NULL) { printf("vcpu%d stats:\n", vcpu); for (i = 0; i < num_stats; i++) { desc = vm_get_stat_desc(ctx, i); printf("%-40s\t%ld\n", desc, stats[i]); } } } if (!error && (get_cpu_topology || get_all)) { uint16_t sockets, cores, threads, maxcpus; vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus); printf("cpu_topology:\tsockets=%hu, cores=%hu, threads=%hu, " "maxcpus=%hu\n", sockets, cores, threads, maxcpus); } if (!error && run) { error = vm_run(ctx, vcpu, &vmexit); if (error == 0) dump_vm_run_exitcode(&vmexit, vcpu); else printf("vm_run error %d\n", error); } if (!error && force_reset) error = vm_suspend(ctx, VM_SUSPEND_RESET); if (!error && force_poweroff) error = vm_suspend(ctx, VM_SUSPEND_POWEROFF); if (error) printf("errno = %d\n", errno); if (!error && destroy) vm_destroy(ctx); #ifdef BHYVE_SNAPSHOT if (!error && vm_checkpoint_opt) error = send_start_checkpoint(ctx, checkpoint_file); if (!error && vm_suspend_opt) error = send_start_suspend(ctx, suspend_file); #endif free (opts); exit(error); }