diff --git a/libexec/rc/rc.conf b/libexec/rc/rc.conf --- a/libexec/rc/rc.conf +++ b/libexec/rc/rc.conf @@ -733,6 +733,27 @@ iovctl_files="" # Config files for iovctl(8) +############################################################## +### BHYVE autostart Configuration (see rc.conf(5) manual page) +############################################################## +bhyve_enable="NO" # User needs to enable, see "bhyve_startvms". +bhyve_cfgdir="/etc/bhyve.d" # Directory containing bhyve_config(5) files. +bhyve_startupjournal="/var/run/bhyve/startupjournal" # Trivial newline separated + # text file to track order and minimal state info. +bhyve_stop_any="NO" # Kill all bhyve(4) ps(1) finds if no specific VM name + # was defined as CLI argument. +bhyve_unreversed_stop="NO" # Shut down VMs in same order as started. +bhyve_start_timeout="20" # seconds to wait for ps(1) to find the started VM + # before moving on to next start. +bhyve_stop_timeout="50" # seconds to wait for shutdown before continuing with + # next VM to stop. +bhyve_vmdisk_default_pool="zroot" # Can be referenced in bhyve_config(5) without +bhyve_vmdisk_default_dataset="bhyveVOL/sys" # the bhyve_ prefix (see also /usr/ +bhyve_vmdisk_default_imagedir="" # share/examples/bhyve) +bhyve_startvms="AUTO" # Defines VMs and order to start/stop. AUTO covers all + # VMs which are defined via bhyve_config(5) and do not + # have 'autostart=false' defined. + ############################################################## ### Jail Configuration (see rc.conf(5) manual page) ########## ############################################################## diff --git a/libexec/rc/rc.d/Makefile b/libexec/rc/rc.d/Makefile --- a/libexec/rc/rc.d/Makefile +++ b/libexec/rc/rc.d/Makefile @@ -173,6 +173,12 @@ CONFS+= autounmountd .endif +.if ${MK_BHYVE} != "no" +CONFGROUPS+= BHYVE +BHYVE+= bhyve +BHYVEPACKAGE= bhyve +.endif + .if ${MK_BLACKLIST} != "no" _blacklistd+= blacklistd .endif diff --git a/libexec/rc/rc.d/bhyve b/libexec/rc/rc.d/bhyve new file mode 100644 --- /dev/null +++ b/libexec/rc/rc.d/bhyve @@ -0,0 +1,734 @@ +#!/bin/sh + +# PROVIDE: bhyve +# REQUIRE: FILESYSTEMS +# BEFORE: jail +# KEYWORD: shutdown + +. /etc/rc.subr + +name="bhyve" +desc="Running guest operating systems as defined in VM config files" +rcvar="bhyve_enable" +start_precmd="getallvms_cfgparms" +start_cmd="sequential_vmstart" +stop_precmd="getallvms_disabled_autostart" +stop_cmd="sequential_stopvms" +required_modules="vmm" + +load_rc_config $name + +: ${bhyve_enable:=NO} # User has to enable manually +: ${bhyve_cfgdir:=/etc/bhyve.d} # Where bhyve_config(5) files are stored +: ${bhyve_stop_any:=NO} # Ignore startup-journal if bhyve_startvms=AUTO and kill any bhyve(4) instance +: ${bhyve_unreversed_stop:=NO} +: ${bhyve_start_timeout:=20} # seconds to wait for ps(1) to find the started VM before moving on to next start +: ${bhyve_stop_timeout:=50} # seconds to wait for shutdown before continuing with next VM to stop + # CLI args override bhyve_startvms (and bhyve_stopvms) from rc.conf. + if [ $# -ge 2 ]; then + bhyve_startvms="${@#${1}[[:blank:]]}" + bhyve_stopvms=${bhyve_startvms} + _vmnames_clidefined=1 + else + : ${bhyve_startvms:=AUTO} + unset _vmnames_clidefined + fi + # bhyve_stopvms will be AUTO if $bhyve_startvms wasn't user-defined, otherwise + # will be equal to $bhyve_startvms (in reverse order bu default) + if [ -z "${bhyve_stopvms}" ] && checkyesno bhyve_unreversed_stop; then + bhyve_stopvms="${bhyve_startvms}" + elif [ -z "${bhyve_stopvms}" ]; then + # If bhyve_startvms is AUTO or ALL reverse_list() has no effect of course + bhyve_stopvms="$(reverse_list ${bhyve_startvms})" + fi +: ${bhyve_startupjournal:=/var/run/bhyve/startupjournal} + +bhyve_config_varlist="name lpc.bootrom memory.size memory.wired lpc.com1.path" + +msglineappend() +{ + check_startmsgs && echo -n "$@" +} +msgline() +{ + check_startmsgs && echo -e "$@" +} + +__expand_pervm_config_vars() +{ + local bhcfgvar IFS=$'\n' + # + # Check each line of file $1 for %(var) pattern + # and record it in !dash-separated! $pervm_config_vars + # + for bhcfgvar in $(command sed -n -E 's/^[[:alnum:]._]+[[:blank:]]*=[[:blank:]]*[^%]*(%\([[:alnum:]._]+\))/&/p' $1 | command grep -oE '%\([[:alnum:]._]+\)') + do + # Peel out var name (strip surrounding %()) + bhcfgvar=${bhcfgvar#%(}; bhcfgvar=${bhcfgvar%)} + # Continue with next var name if this one is already recorded + [ "${pervm_config_vars#*-${bhcfgvar}-}" = "${pervm_config_vars}" ] || continue + # $pervm_config_vars is always predefined with autostart and $bhyve_config_varlist + pervm_config_vars="${pervm_config_vars}${bhcfgvar}--" + done +} # expand_pervm_config_vars() + +__extract_vmconfig() +{ + local vmparm bhcfgvar IFS=$'\n' + # + # Process each line starting with 'validvarname="value"' of file $1 + # + for vmparm in $(command sed -n -E 's/^([[:alnum:]._]+)[[:blank:]]*=[[:blank:]]*([^[[:blank:]]+|$)/\1="\2"/p' $1) + do + # Store bhyve config varibale name + bhcfgvar="${vmparm%%=*}" + # Skip $vmparm line if varibale not relevant for us ($pervm_config_vars + # is a dynamically expanded dash-sparated list of relevant variables) + [ "${pervm_config_vars#*-${bhcfgvar}-}" != "${pervm_config_vars}" ] || continue + # Don't double add any vmparm-line to current list of $extracted_cfgparms + [ X"${extracted_cfgparms#*${vmparm}}" != X"${extracted_cfgparms}" ] && continue + extracted_cfgparms="$([ -n "${extracted_cfgparms}" ] && echo "${extracted_cfgparms}"; echo "${vmparm}")" + done +} # extract_vmconfig() + +__setvar_vmbootdiskimage() +# Not yet finished, bootdisk selection needs smarter approch, see below +{ + local re_pci_pat='^(pci\.[[:digit:]]\.[[:digit:]]\.[[:digit:]])\.' + re_pci_pat="${re_pci_pat}"'device=(virtio-blk|nvme|ahci).*$' + local blkdevlines="$(command sed -n -E 's/'"$re_pci_pat"'/\2:\1/p' "${1}")" + for devpciid in ${blkdevlines}; do + dev=${devpciid%%:*} + pciid=${devpciid#*:} + # Take 1st of type nvme or virtio-blk - TO BE IMPROVED + if [ "${dev}" = nvme ] || [ "${dev}" = virtio-blk ]; then + vmbootdiskimage="$(command grep -F "${pciid}.path=" "${1}")" + vmbootdiskimage="${vmbootdiskimage#*=}" + fi + done + unset pciid dev devpciid +} # __setvar_vmbootdiskimage() + +substitute_bhcfgvar() +{ + local bhcfgvar vmvar vm_val _outcome="$1" IFS=' ' + [ $# -eq 2 ] || return + for bhcfgvar in $(echo "$1" | command \ + sed -E 's/(^|[^%]*)(%\([[:alnum:].._]+\))/\2 /g') + do + [ "${bhcfgvar}" != "$1" ] || continue + vmvar=${bhcfgvar#%(} + vmvar=${vmvar%)} + eval vm_val=\"\$vm_${vmvar}\" + _outcome="$(echo "${_outcome}" | command sed \ + "s#${bhcfgvar}#${vm_val}#g")" + done + eval ${2}=\"\${_outcome}\" + [ -n "${_outcome}" ] && [ "$1" != "${_outcome}" ] || return 2 +} # substitute_bhcfgvar() + +check_isstarted() +{ + local element=logicallyneededtobeanonexistingvmname + [ $# -eq 1 ] || return + gather_bhyve_pids + for element in ${bhyve_existingvms}; do + [ "${element}" = "${1}" ] || continue + element='' + done + # If a running VM matches $1, $element is emptied, so return + # '2' (=not running) if not empty. Otherwise this successfull test will + # make sure we return with 0. + [ -z "${element}" ] || return 2 +} # check_isstarted() + +getallvms_cfgparms() +{ + local extracted_cfgparms # (used in __extract_vmconfig()) + local pervm_config_vars # (used in __expand_pervm_config_vars() + # and __extract_vmconfig()) + local name autostart vmname + # + # Inspect all bhyve_config(5) files found (in $bhyve_cfgdir) and extract + # (relevant) key-value lines, which contain any of $pervm_config_vars. We + # will have one variable for each config file: \$$bhyve_cfgparms_${vmname} + # Also collect all VM names in $configured_vmnames, which corresponds to + # the list of VM to be started if $bhyve_startvms=AUTO (undef, none by CLI). + # And separately record the setting of the non-bhyve_config(5) 'autostart' + # option in \$$bhyve_autostart_${vmname}. + # + for vmcfgfile in $(command ls -1v ${bhyve_cfgdir}) + do + # $pervm_config_vars is a dash separated list (allowing cheap + # shell-internal checks) of bhyve-tree-config key names. + # Initialize for each config file: + pervm_config_vars="--autostart--$(echo "${bhyve_config_varlist} " |\ + command sed -E 's/[[:blank:]]+/--/g')" + + # gather all %(var) which are in use, we might need them aswell, so add + # them to (dash-separated) list of $pervm_config_vars + __expand_pervm_config_vars "${bhyve_cfgdir}/$vmcfgfile" + + # Intermediately store all relevant bhyve_config(5) lines + extracted_cfgparms= + __extract_vmconfig "${bhyve_cfgdir}/$vmcfgfile" + + # + # Evaluate 'name' and 'autostart' setting from selected bhyve_config(5) + # for the following skip-checks. + # + for var in name autostart; do + eval $(echo "${extracted_cfgparms}" |\ + command grep -m1 $var= || echo $var=\'--\') + [ ${var} != name ] && continue + # name becomes vmname to avoid confusion: rc(8) files occupy 'name' + vmname=$name + done + unset var + # Skip registering this (invalid) VM for starting automatically if + # the currently parsed config file doesn't specify a 'name' at all. + [ -z "${vmname#--}" ] && continue + # 'autostart' is an optional bhyve_config(5)-foreign config key. + # If it is set to prevent autostart, likewise skip registering this VM. + case ${autostart} in + 0|[nN][oO]|[oO][fF][fF]|[fF][aA][lL][sS][eE]) + if [ "${bhyve_startvms}" != ALL ]; then + # Check if currently inspected VM, which has 'autostart' epli- + # citly disabled, occurs in user-defined list. + for name in ${bhyve_startvms#AUTO}; do + [ ${name} = ${vmname} ] && name='' && break + done + eval bhyve_autostart_${vmname}=FALSIFIED + # Only skip VM if $bhyve_startvms wasn't CLI-defined + [ -n "${_vmnames_clidefined}" ] || continue + fi # "ALL" overrides 'autostart=false' setting, don't continue + # 'cfgfile will replace 'autostart' in $extracted_cfgparms since + ;& # we have ';&' here! + # Separately store 'autostart' setting for priority handling + --) eval bhyve_autostart_${vmname}=\'\' + # Add bhyve_config(5)-foreign 'cfgfile' argument (we need to + # pass the config file later with the '-k' argument). + extracted_cfgparms="cfgfile=\"${bhyve_cfgdir}/${vmcfgfile}\""$'\n'"${extracted_cfgparms}" + ;; + *) eval bhyve_autostart_${vmname}=\"${autostart}\" + # Replace bhyve_config(5) foreign key 'autostart' with likewise + # foreign 'cfgfile' + extracted_cfgparms="$(echo "${extracted_cfgparms}" | command sed \ + "s,^autostart=.*,cfgfile=\"${bhyve_cfgdir}/${vmcfgfile}\",")" + ;; + esac + + # + # Store bhyve_config(5) lines in vmname-specific variable + # + eval bhyve_cfgparms_${vmname}=\"\${extracted_cfgparms}\" + + # + # Register this VM to be started in case $bhyve_startvms=AUTO + # (no order yet). + # + configured_vmnames="$configured_vmnames${configured_vmnames+ }${vmname}" + + done + +} # getallvms_cfgparms() + +getallvms_disabled_autostart() +{ + local extracted_cfgparms # (used in __extract_vmconfig()) + local pervm_config_vars # (used in __extract_vmconfig()) + local name autostart vmname + # + # trimed-down version of getallvms_cfgparms(): We only inspect 'autostart'. + # If this non-bhyve_config(5) option is explicitly disabled, add the VM name + # to an exclusion list NOT to be stopped in case $bhyve_startvms=AUTO. + # + for vmcfgfile in $(command ls -1v ${bhyve_cfgdir}) + do + # We only need to check if autostart is diasbled + pervm_config_vars="--autostart--name--" + + # Intermediately store autostart setting + extracted_cfgparms= + __extract_vmconfig "${bhyve_cfgdir}/$vmcfgfile" + # + # Evaluate 'name' and 'autostart' setting from selected bhyve_config(5) + # for the following skip-checks. + # + for var in name autostart; do + eval $(echo "${extracted_cfgparms}" |\ + command grep -m1 $var= || echo $var=\'--\') + [ ${var} != name ] && continue + # name becomes vmname to avoid confusion: rc(8) files occupy 'name' + vmname=$name + done + unset var + # Skip adding this (invalid) VM to the list of instances to be stopped + # if the currently parsed config file doesn't specify a 'name' at all. + [ -z "${vmname#--}" ] && continue + + case ${autostart} in + 0|[nN][oO]|[oO][fF][fF]|[fF][aA][lL][sS][eE]) + # Register this VM to be NOT stopped. + bhyve_autoskipvms="${bhyve_autoskipvms}${bhyve_autoskipvms+ }${vmname}" + ;; + esac + done + unset vmcfgfile +} # getallvms_disabled_autostart() + +gather_bhyve_pids() +{ + local namepid vmname IFS=$'\n' \ + re_bhpidnames_pat='^[[:blank:]]*([[:digit:]]{4,})[[:blank:]]+([^[:blank:]]*bhyve[:)[:blank:]].*)$' + # Utilize ps(1) to get semicolon-separated pid:command tuple + for namepid in $(command ps -o pid= -o command= | command sed -nE \ + "s/${re_bhpidnames_pat}/\1:\2/p") + do + # Split tuple: Separator unconditionall is ':' following a [[:digit:]] + vmname="${namepid#*bhyve:[[:blank:]]}" + vmname="${vmname%%[[:blank:]]*}" + # If bhyve hasn't altered the command to be 'bhyve: vmname (bhyve)', the + # vmname can't be used + [ -n "${vmname#*bhyve*}" ] || vmname=${namepid%%:*} + # Record vmname + bhyve_existingvms="${bhyve_existingvms}${bhyve_existingvms+ }${vmname}" + # Store pid of vmname + eval bhyve_pid_${vmname}=${namepid%%:*} + done +} # gather_bhyve_pids() + +do_vm_spawn() +{ + local line cfgfile _val _use_bootrom # will be set if lpc.bootrom is defined + local vm_ramsize vm_ramflags vm_com1 vm_name=$1 + # + # Evaluate host relevant bhyve_config(5) guest options + # + for line in $(eval echo \"\${bhyve_cfgparms_${vmname}}\"); do + eval _val=${line#*=} # We added double quotes when parsing + case ${line%%=*} in + memory.size) vm_ramsize=${_val} + ;; + memory.wired) if [ "${_val}" = true ] || [ "${_val}" = 1 ]; then + vm_ramflags=" -S" + fi ;; + lpc.bootrom) [ -n "${_val}" ] && _use_bootrom=true + ;; + lpc.com1.path) vm_com1=${_val#*nmdm} # store nA resp. nB + if [ ${_val} != ${vm_com1} ]; then + # store nmdm(4) index number in _val + _val=${vm_com1%?} + # $vmcom1 is still nA resp. nB, make it the + # correct counterpart + if [ ${vm_com1%B} != ${vm_com1} ]; then + vm_com1=/dev/nmdm${_val}A + else + vm_com1=/dev/nmdm${_val}B + fi + fi # lpc.com1.path contains nmdm + ;; # ignore lpc.com1.path otherwise! + name) [ ${vm_name} = ${_val} ] || { echo panic; exit 1; } + ;; # Safety check with questionable benefit + *) eval local vm_${line%%=*}=\"${_val}\" ;; + esac + done + + # User can defice rc.conf(5) variables which can be referenced in bhyve- + # _config(5). We need to vm_-define these too for bhyveload(8) e.g. + for rccfg_var in vmdisk_default_pool vmdisk_default_dataset vmdisk_default_imagedir + do + eval _val=\"\$\{bhyve_${rccfg_var}\}\" + [ -n "${_val}" ] || continue + eval vm_${rccfg_var}=\"\$\{bhyve_${rccfg_var}\}\" + bhyve_cmd_args="${bhyve_cmd_args}${bhyve_cmd_args:+ }-o ${rccfg_var}=\"${_val}\"" + done + + if [ -z "${vm_cfgfile}" ] || [ ! -s "${vm_cfgfile}" ]; then + msgline "\rSkipping VM \"${vm_name}\", config file not found (${vm_cfgfile})!" + return 2 + fi + + # We need to know the virtual boot disk from the VM config file + # in case guest doesn't boot via lpc.bootrom + if [ -n "${_use_bootrom}" ]; then + msglineappend "waiting)..." + else + __setvar_vmbootdiskimage "${vm_cfgfile}" + + # "Resolve" bhyve_config(5) line + if substitute_bhcfgvar "${vmbootdiskimage}" line; then + vmbootdiskimage="${line}" + fi + + if [ ! -r "${vmbootdiskimage}" ]; then + msgline "\b\b FAILED to start with fatal error!" + warn "Bootdisk image for VM \"${vm_name}\" not found (${vmbootdiskimage})." + return 2 + else + msglineappend "loading)..." + fi + + command bhyveload -d "${vmbootdiskimage}" \ + -m ${vm_ramsize}${vm_ramflags} \ + ${vm_com1:+ -c ${vm_com1}} \ + ${vm_name} >/var/log/bhyve-${vm_name}.log 2>&1 || + return + printf "\b\b\b\b\b\b\bed, " # Replace 'ing)...' with 'ed, ' + msglineappend "waiting)..." + fi + + # Leave a comment how to access the console, log file is otherwise empty + if [ -n "${vm_com1}" ]; then + echo "${vm_name} has console attached to ${vm_com1} (use" \ + "\"cu -l ${vm_com1}\" e.g.)" >>/var/log/bhyve-${vm_name}.log + fi + + # Rebooting the VM without destroying it only works if we're booting the + # guest OS with the by bootrom. If bhyveload(8) or grub-bhyve is used, + # bhyvectl(8) needs to destroy the VM before it can be started again, + # even if bhyve(8) terminated with exit status 0. + if [ -n "${_use_bootrom}" ]; then + set -- "while true; do" \ + "command bhyve ${bhyve_cmd_args} -k \"${vm_cfgfile}\"" \ + " >/var/log/bhyve-${vm_name}.log 2>&1;" \ + '[ $? -ne 0 ] && break;' \ + "done;" \ + "command bhyvectl --destroy --vm=${vm_name}" \ + " >>/var/log/bhyve-${vm_name}.log 2>&1." + else + set -- "while true; do" \ + "command bhyve ${bhyve_cmd_args} -k \"${vm_cfgfile}\"" \ + " >>/var/log/bhyve-${vm_name}.log 2>&1;" \ + 'bhyve_exit=$?;' \ + "command bhyvectl --destroy --vm=${vm_name}" \ + " >>/var/log/bhyve-${vm_name}.log 2>&1;" \ + '[ $bhyve_exit -ne 0 ] && break;' \ + "command bhyveload -d \"${vmbootdiskimage}\"" \ + "-m ${vm_ramsize}${vm_ramflags}" \ + "${vm_com1:+ -c ${vm_com1}}" \ + "${vm_name}" \ + ">>/var/log/bhyve-${vm_name}.log 2>&1;" \ + "done;" + fi + # Invoke bhyve(8) in spearate shell and restart it in case it was termi- + # nated with exit status 0 (=rebooted, 1=powered off, 2=halted, ...). If + # terminated for any other reason, VM will be destroyed before shell exits. + eval command env -i HOME=/var/crash nohup /bin/sh -c \'$@\' >> /var/log/bhyve-${vm_name}.log \& + +} # do_vm_spawn() + +sequential_vmstart() +{ + local name vmname autostart namepid secondslapsed _sts _autostart_idx _startvms_idx _cfgf_idx=0 \ + re_bhpidnames_pat='^[[:blank:]]*([[:digit:]]{4,})[[:blank:]]+([^[:blank:]]*bhyve[:)[:blank:]].*)$' + local timeout=${bhyve_start_timeout} + for vmname in ${configured_vmnames} + do + check_isstarted ${vmname} + if [ $? -ne 2 ]; then + continue + fi + eval autostart=\"\$\{bhyve_autostart_${vmname}\}\" + _startvms_idx='' + _autostart_idx='' + : $((_cfgf_idx+=1)) + if [ "${bhyve_startvms}" != AUTO ] && [ "${bhyve_startvms}" != ALL ]; then + # Check if VM is on list to be started after booting + _startvms_idx=0 + for name in ${bhyve_startvms}; do + : $((_startvms_idx+=1)) + [ X${name} = X${vmname} ] && break + name='' + done + [ -n "${name}" ] || continue + else # bhyve_startvms was defined AUTO or ALL + case ${autostart} in + '') ;; + 1|[oO][nN]|[yY][eE][sS]|[tT][rR][uU][eE]) [ "${autostart}" = 1 ] && _autostart_idx=0 ;; + *) [ -z "${autostart%%[[:digit:]]}" ] || break + # autostart config value has precedence over $bhyve_startvms + # order if user defined it as positive integer + _autostart_idx=$((autostart-=1)) + ;; + esac + : ${_autostart_idx:=99${_cfgf_idx}} + fi + # TODO: Fix/implement corrept priority handling: + # If autostart isn't a positive integer, filename sorting counts. + # If any one config defaines autostart, the integer number counts, which + # means we need to re-number the file-sort-list. + # In addition, bhyve_startvms mightalso be re-ordered to make the + # autostart option the top priority element... For now + # this incorrect prio handliong is good enough: bhyve_startvms wins! + bhyve_priolist_unordered="${bhyve_priolist_unordered}${bhyve_priolist_unordered:+ }${_startvms_idx:-${_autostart_idx}}:${vmname}" + done + unset configured_vmnames + # Utilize sort(1) to get startup VM list in correct order + for vmname in $(echo "${bhyve_priolist_unordered}" | command tr '[[:blank:]]' '\n' | command sort -n) + do + bhyve_ordered_startup="${bhyve_ordered_startup}${bhyve_ordered_startup:+ }${vmname}" + done + # strip idx prefix + bhyve_ordered_startup="$(echo "${bhyve_ordered_startup}" | command sed -E 's/(^|[[:blank:]])[[:digit:]]+:/\1/g')" + + # + # If $bhyve_startvms lists a VM which isn't also found in + # $bhyve_ordered_startup (the list we really process), print an + # explanation why users request can't be fulfilled. + # + for vmname in ${bhyve_startvms#AUTO}; do + [ ${vmname} = ALL ] && break + for element in ${bhyve_ordered_startup}; do + [ ${element} = ${vmname} ] && continue 2 + done + unset element + if [ -n "$(eval echo \$bhyve_pid_${vmname})" ]; then + [ "${bhyve_startvms}" != AUTO ] && + startmsg "VM \"${vmname}\" is already started, ignoring!" + continue + fi + if [ "$(eval echo \$bhyve_autostart_${vmname})" = FALSIFIED ]; then + # $bhyve_startvms was user-defined, inform user about the + # reason why we won't start (one of) her VMs specified + startmsg \ + "Ignoring \"${vmname}\", 'autostart' is explicitly disabled!" + continue + fi + warn "No bhyve_config(5) for VM \"${vmname}\" found, cannot start!" + done + + # Return early with 'success' if nothing to really launch + [ -z "${bhyve_ordered_startup}" ] && return + + startmsg "Sequentially starting bhyve VMs: " + + for vmname in ${bhyve_ordered_startup} + do + msglineappend "${vmname} (" + + do_vm_spawn ${vmname} + _sts=$? + + if [ $_sts -eq 1 ]; then + msgline "\r${vmname} failed to start, see" \ + "/var/log/bhyve-${vmname}.log!" + continue + elif [ $_sts -ne 0 ]; then + # Message was already printed in do_vm_spawn() + continue + else + _sts=$(command date +%s) + _vm_pid='' + secondslapsed=$(( $(command date +%s) - $_sts )) + while [ -z "$_vm_pid" ]; do + namepid=$(command ps -o pid= -o command= |\ + command sed -nE "s/${re_bhpidnames_pat}/\1:\2/p") + # Separator unconditionally is ':' following a [[:digit:]] + name="${namepid#*bhyve:[[:blank:]]}" + [ ${vmname} = "${name%%[[:blank:]]*}" ] && + _vm_pid=${namepid%%:*} && break + [ $secondslapsed -lt $timeout ] || break + sleep 1 + : $((secondslapsed+=1)) + done + fi + # Erase journal leftovers if we are booting. + [ X"$autoboot" = Xyes ] && true > "${bhyve_startupjournal}" + if [ -n "$_vm_pid" ]; then + # Delete 'waiting)...' before appending pid=... + msgline "\b\b\b\b\b\b\b\b\b\b\bpid=$_vm_pid) started" \ + "successfully after $secondslapsed sec." + echo "${_vm_pid}:${vmname}" >> "${bhyve_startupjournal}" + else + msgline "\r${vmname} FAILED to start (within $secondslapsed" \ + "seconds)!" + echo "${vmname}" >> "${bhyve_startupjournal}" + fi + unset _vm_pid + done + +} # sequential_vmstart() + +sequential_stopvms() +{ + local name vmname pidname _pid stop_directive bhyve_ordered_shutdown \ + _filtered_stopvms='' _lostvms='' + local secondslapsed timeout=${bhyve_stop_timeout} + + # $bhyve_startvms is either user-defined in rc.conf(5) or set to CLI-arg + # (at top of the file where we define defaults). + # Two more possibilities are AUTO and ALL. + # If AUTO or ALL, we redefine $bhyve_stopvms with VM names from the journal + # in reverse order usually (bhyve_unreversed_stop overrides default). + if [ -z "${bhyve_stopvms#AUTO}" ] || [ "${bhyve_stopvms}" = ALL ]; then + checkyesno bhyve_unreversed_stop && + _tailargs='-n 999' || + _tailargs='-r -n 999' + stop_directive="${bhyve_stopvms}" && bhyve_stopvms='' + # Assume we need to stop all VMs which were recorded in our journal + for pidname in $(command tail $_tailargs "${bhyve_startupjournal}"); do + vmname=${pidname#*:} + # Only if user defined "ALL" we try to stop apparently failed too + [ ${vmname} != ${pidname} ] || + [ X"${stop_directive}" = XALL ] || + continue + for name in ${bhyve_stopvms}; do + [ ${vmname} = ${name} ] && continue 2 # Don't add twice + done + bhyve_stopvms="${bhyve_stopvms}${bhyve_stopvms:+ }${vmname}" + done + fi + + # $bhyve_stopvms was either user-defined or built from journal, so basically + # it's clear which VMs to stop in what order. + for vmname in ${bhyve_stopvms}; do + # Exclusion list from stop_precemd=getallvms_disabled_autostart + # only counts if $bhyve_stopvms was NOT CLI-defined. We do start + # a CLI-defined VM, despite it's config has 'autostart' explicitly + # disabled, because CLI-defined is not auto! + for name in ${bhyve_autoskipvms}; do + [ -z "${_vmnames_clidefined}" ] || break + [ ${vmname} = ${name} ] && continue 2 + done + # Intermediately store filtered $bhyve_stopvms in original order + # (separate list to be able to determine the skip reason later) + for name in ${_filtered_stopvms}; do + [ ${vmname} = ${name} ] && continue # Don't add twice + done + _filtered_stopvms="${_filtered_stopvms}${_filtered_stopvms:+ }${vmname}" + done + + # $bhyve_existingvms will list variables to evaluate for the PID to send + # SIGTERM to after we invoked gather_bhyve_pids(). + gather_bhyve_pids + + for vmname in ${_filtered_stopvms}; do + # If $bhyve_existingvms from gather_bhyve_pids() doesn't list the VM + # name from $_filtered_stopvms, we can't kill it - cleanup journal only. + for name in ${bhyve_existingvms:-logicallymustbenomatch}; do + [ ${vmname} = ${name} ] && name='' && break + done + # Don't add to bhyve_ordered_shutdown if not running (anymore) + if [ -n "${name}" ]; then + # VM is dead, so remove all lines from the startup journal which + # contain the name of the CLI-defined stop request. + # Record $vmname as lost first, if it was listed with PID in journal + command grep -qE \ + '^[[:blank:]]*[[:digit:]]+:'"${vmname}"'[[:blank:]]*$' \ + "${bhyve_startupjournal}" \ + && _lostvms="${_lostvms}${_lostvms:+ }${vmname}" + # Remove also lines without PID which list $vmname + command sed -i "" -E -e \ + '/^([[:blank:]]*[[:digit:]]+:)*'"${vmname}"'[[:blank:]]*$/d' \ + "${bhyve_startupjournal}" + continue + fi + bhyve_ordered_shutdown="${bhyve_ordered_shutdown}${bhyve_ordered_shutdown:+ }${vmname}" + done + + # If $bhyve_stop_any is enabled we need to check if $bhyve_existingvms lists + # more VMs than we are goint to stop. In case user also defined VM name(s) + # on CLI, we ignore the overkill switch. Otherwise (or if ALL was defined) + # we add all VM names found by gather_bhyve_pids() to be killed. + # (post already defined $bhyve_ordered_shutdown) + if [ -z "${_vmnames_clidefined}" ] || [ -z "${bhyve_stopvms#ALL}" ] && + checkyesno bhyve_stop_any + then + for vmname in ${bhyve_existingvms}; do + for name in ${bhyve_ordered_shutdown}; do + [ ${vmname} = ${name} ] && continue + done + bhyve_ordered_shutdown="${bhyve_ordered_shutdown}${bhyve_ordered_shutdown:+ }${vmname}" + done + fi + + # Print an explanation if any CLI/rc.conf-defined VM name hasn't made it + # onto list of VMs to be stopped + for vmname in ${bhyve_stopvms}; do + # Keep silence if $bhyve_stopvms originally was AUTO + [ X"${stop_directive}" = XAUTO ] && continue + + for name in ${_filtered_stopvms:-logicallymustobenomatch}; do + [ ${vmname} = ${name} ] && name='' && break + done + if [ -n "${name}" ]; then + startmsg "Ignoring \"${vmname}\", 'autostart' is explicitly disabled!" + continue + fi + + for name in ${bhyve_ordered_shutdown:-logicallymustobenomatch}; do + [ ${vmname} = ${name} ] && break + name= + done + if [ -z "${name}" ]; then + for _match in ${_lostvms:-logicallymustobenomatch}; do + [ ${vmname} = ${_match} ] && break + _match='' + done + if [ -n "${_match}" ] || + [ -n "${stop_directive}" -a -z "${_vmnames_clidefined}" ] + then + startmsg "VM \"${vmname}\" is not running anymore, skipping." + else + startmsg "VM \"${vmname}\" was not started, ignoring." + fi + unset _match + fi + done + + # Silently return if there are no VMs to stop + if [ -z "${bhyve_ordered_shutdown}" ]; then + return + fi + + startmsg "Sequentially stopping bhyve VMs: " + for vmname in ${bhyve_ordered_shutdown} + do + eval _pid=\$\{bhyve_pid_${vmname}\} + [ -n "${_pid}" ] && test "$_pid" -gt 1000 2>/dev/null \ + || { warn "VM \"${vmname}\" not started, skipping."; continue; } + + msglineappend "${vmname} (pid=${_pid}), waiting " + secondslapsed=0 + while [ $timeout -gt $secondslapsed ]; do + _wt=$(( $timeout - $secondslapsed )) + _bs=$(( ${#_wt} + 11 )) + if [ $secondslapsed -eq 0 ]; then + builtin kill -s TERM $_pid # Translates to ACPI poweroff + printf "${_wt} seconds..." + else + [ X$(command ps -o pid= $_pid) != X$_pid ] && break + while [ $_bs -gt 0 ]; do + : $((_bs-=1)) + # Delete ${#}-n times the number of digits + suffix + printf "\b" + done + printf "${_wt} seconds..." + fi + sleep 1 + : $((secondslapsed+=1)) + done + unset _wt _bs + if [ $timeout -gt $secondslapsed ]; then + msgline "\r${vmname} stopped sucessfully (after $secondslapsed" \ + "seconds)." + # Assuming VM-names will always have to be unique, remove all lines + # from the startup journal which contain the name of the just killed + # VM (journal isn't meant to reflect the state but just to maintain + # the proper shutdown order). + command sed -i "" -E -e \ + '/^([[:blank:]]*[[:digit:]]+:)*'"${vmname}"'[[:blank:]]*$/d' \ + "${bhyve_startupjournal}" + else + msgline "\r${vmname} not successfully shut down within" \ + "$secondslapsed seconds!" + # Don't remove VM-name from journal shut down wasn't confirmed to + # be successfull. Granting another attemt doesn't harm. + fi + done + +} # sequential_stopvms() + +run_rc_command "$1" + diff --git a/share/examples/Makefile b/share/examples/Makefile --- a/share/examples/Makefile +++ b/share/examples/Makefile @@ -46,7 +46,9 @@ .if ${MK_BHYVE} != "no" LDIRS+= bhyve SE_DIRS+= bhyve -SE_BHYVE= vmrun.sh +SE_BHYVE= vmrun.sh \ + FreeBSD_guest.cfg \ + Windows_guest.cfg PACKAGE_bhyve/vmrun.sh= bhyve .endif .endif diff --git a/share/examples/bhyve/FreeBSD_guest.cfg b/share/examples/bhyve/FreeBSD_guest.cfg new file mode 100644 --- /dev/null +++ b/share/examples/bhyve/FreeBSD_guest.cfg @@ -0,0 +1,92 @@ +# /etc/bhyve.d/FreeBSD_guest.cfg: +# bhyve VM configuration set for an example guest (see below for description) +name=example0 +# Autostart is not part of bhyve_config, but evaluated by /etc/rc.d/bhyve +#autostart=0 +cpus=2 +sockets=1 +cores=2 +# If bhyveload(8) is used, make sure to use exactly the same RAM size. +# /etc/rc.d/bhyve parses this value and applies accordingly. +memory.size=2048 +# We have passthrough device(s), need to hard wire memory +memory.wired=true +# Your VM specific nullmodem serial port (connected to device lpc) +lpc.com1.path=/dev/nmdm0A +# If needed/supported by guest OS, we provide UEFI firmware (comment otherwise) +#lpc.bootrom=/usr/local/share/uefi-firmware/BHYVE_UEFI.fd + +# +# PCI devices +# ================ +# +# Obligatories: +# ------------------------- +# Slot0 PCI host CPU brigde, NetApp ven&pID=0x1275 (alternative: amd_hostbridge) +pci.0.0.0.device=hostbridge +# 82371SB PIIX3 ISA [Natoma/Triton II] ISA/COM emulation +pci.0.31.0.device=lpc +# ------------------------- +# optional emulated USB3 (Intel Series7 [PID_1E31] xHCI+Hub, picky Win drivers!) +# with absolute "HID mouse" pointing device +#pci.0.30.0.device=xhci +#pci.0.30.0.slot.1.device=tablet +# optional VGA+VESA compatible framebuffer device emulation +# (TO BE ADJUSTED: IP address for the VNC tcp socket +#pci.0.29.0.device=fbuf +#pci.0.29.0.vga=off +#pci.0.29.0.tcp=192.0.2.100:5901 +# optional, but almost always recommended: Virtio RNG +pci.0.28.0.device=virtio-rnd +# mass storage: HDD 0 +# (TO BE ADJUSTED: pool-component of path for HDD and optionally an ID-suffix if +# if more than one mass storage device is in use) +#pci.0.3.0.device=nvme +#pci.0.3.0.sectsz=4096 +pci.0.3.0.device=virtio-blk +pci.0.3.0.sectorsize=512/4096 +pci.0.3.0.path=/dev/zvol/%(vmdisk_default_pool)/%(vmdisk_default_dataset)/%(name) +# ------------------------------------------------------------------------------ +# +# Network Interface Cards +# +## pptdev 2/0/2: i350 port#3 (igb2) nic3pch: +## Parent for example jails' vimage NICs (vldmz, vlhsm, ...) +#pci.0.2.0.device=passthru +#pci.0.2.0.bus=2 +#pci.0.2.0.slot=0 +#pci.0.2.0.func=2 +## briged (brnic1dmz) to vid 224 child (nic1_dmz) of nic1bmc +#pci.0.1.1.device=virtio-net +#pci.0.1.1.mac=42:c9:f9:fc:82:01 +#pci.0.1.1.mtu=1500 +#pci.0.1.1.backend=netgraph +#pci.0.1.1.path=brnic1dmz: +#pci.0.1.1.peerhook=link2 +#pci.0.1.1.socket=%(name)-vnic1 +## Native management interface: +## briged (brnic1hsm) to vid 75 child (nic1_hsm) of nic1bmc +#pci.0.1.0.device=virtio-net +#pci.0.1.0.mac=42:c9:f9:fc:82:00 +#pci.0.1.0.mtu=1500 +#pci.0.1.0.backend=netgraph +#pci.0.1.0.path=brnic1hsm: +#pci.0.1.0.peerhook=link10 +#pci.0.1.0.socket=%(name)-vnic4 + +# +# misc +# -------- +# No UEFI-bootrom, so provide ACPI tables +acpi_tables=true +# Guest OS doesn't access unsupported MSRs (ModelSpecificRegister), be strict +x86.strictmsr=true +x86.vmexit_on_pause=true +x86.vmexit_on_hlt=true +rtc.use_localtime=false + +############################## +# Description block till EOF # +############################## +# This guest runs FreeBSD --/amd64 and is jail host for ... services: +# Your description diff --git a/share/examples/bhyve/Windows_guest.cfg b/share/examples/bhyve/Windows_guest.cfg new file mode 100644 --- /dev/null +++ b/share/examples/bhyve/Windows_guest.cfg @@ -0,0 +1,176 @@ +# /etc/bhyve.d/Windows_guest.cfg: +# bhyve VM configuration set for guest (Win 2k12R2) example: +# ======== +# (see bottom of this file for local hypervisor description and conventions) +# +# VMID=203 [search for VMID to get all VMID-counter specific settings] +name=dafi +cpus=4 +sockets=1 +cores=4 +# bhyveload(8) RAM must be equal (not relevant if lpc.bootrom=uefi-firmware) +# Our /etc/rc.local script parses this value and applies accordingly. +memory.size=16G +# memory needs to be hard wired only if passthrough devices are in use (ppt(4)). +memory.wired=false +# UUID for the guest's smbios System Information structure is generated from +# the host's hostname and vmname, if not defined different here (recommended for +# windows, retrievable by 'wmic path win32_computersystemproduct get uuid' +# [linux: sudo dmidecode -s system-uuid]) +uuid=F04442AC-B45B-8A35-839F-1610604F0D21 +# +# Private (non-bhyve, config-local) variable: +datadriveparentds=bhyveVOL/data +sysdriveparentds=bhyveVOL/sys +syszpool=osirisPsys +# Virtual ethernet nics get privatized and 8bit-shifted MAC address derived +# from the NIC's MAC address which the hypervisor uses as uplink interface. +# (see bottom of this file for hypervisor info) +brgl0_ulpfx=e2:ba:79:00:11 +# On primary: brgl0_ulpfx=e2:ba:80:00:11 + +# +# PCI bus enumeration +# ======================= +# +# Guest-OS indipendent standard/obligatory devices: +# ------------------------------------------------- +# Slot0 PCI host CPU brigde, NetApp ven&pID=0x1275 (alternative: amd_hostbridge) +pci.0.0.0.device=hostbridge +# 82371SB PIIX3 ISA [Natoma/Triton II] ISA/COM emulation +pci.0.31.0.device=lpc +# Guest-OS specific standard devices: +# ----------------------------------- +# USB / HID (always wanted for Windows): +# Intel Series7 [PID_1E31] xHCI+Hub (USB3) emulation (picky Win-driver if <8.1!) +pci.0.30.0.device=xhci +# Absolute "HID mouse" pointing device (tablet/digitizer) +pci.0.30.0.slot.1.device=tablet +# VGA/VESA (always wanted for Windows): +# Framebuffer device emulation (with VNC backend) +pci.0.29.0.device=fbuf +# Set to "io" if guest attempts to issue BIOS calls, otherwise to "off'. +pci.0.29.0.vga=off +# guest-counter (non-VMID) based port for the VNC tcp socket (IP of hypervisor's mgnt-interface). +pci.0.29.0.tcp=172.17.8.10:5903 +#primary@polaris: pci.0.29.0.tcp=172.17.8.20:5904 +# VirtIO Random Number Generator, almost always wanted. +pci.0.28.0.device=virtio-rnd + +# +# OpticalDiskDrive (ODD) +# +#pci.0.6.0.device=ahci +#pci.0.6.0.port.0.type=cd +#pci.0.6.0.port.0.path=/zfs/.depot/virtio-win-0.1.185.iso + +# +# mass storage +# +# TO BE VERIFIED: path private variables defiend above for HDD (and optionally +# an ID-suffix if more than one mass storage device is in use) +# HDD 1 +# "{EFF17CDA-A013-40F4-8B6E-E25E6E46A97D}" +pci.0.4.0.device=nvme +pci.0.4.0.path=/dev/zvol/%(syszpool)/%(datadriveparentds)/%(name)-pvs +#pci.0.4.0.maxq=16 +#pci.0.4.0.qsz=2058 +#pci.0.4.0.ioslots=8 +pci.0.4.0.sectsz=512/4096 +# HDD 0 +# "{7D470E63-52AA-4F92-BB22-D1EE5597A7F5}" +pci.0.3.0.device=nvme +pci.0.3.0.path=/dev/zvol/%(syszpool)/%(sysdriveparentds)/%(name) +#pci.0.3.0.maxq=16 +#pci.0.3.0.qsz=2058 +#pci.0.3.0.ioslots=8 +pci.0.3.0.sectsz=512/4096 +#pci.0.3.0.device=virtio-blk +#pci.0.3.0.sectorsize=512/4096 + +# +# Network Interface Cards (enumerate top-down) +# +## igb4@pci0:7:0:0, 82768 (Kawela), 00:00:00:00:00:ee, Port 5 (00:00:00:00:00:00@alice) +#pci.0.2.0.device=passthru +#pci.0.2.0.bus=7 +#pci.0.2.0.slot=0 +#pci.0.2.0.func=0 +### igb5@pci0:7:0:1, 82576 (Kawela), 00:00:00:00:00:ef, Port 6 (00:00:00:00:00:00@alice) +##pci.0.2.1.device=passthru +##pci.0.2.1.bus=7 +##pci.0.2.1.slot=0 +##pci.0.2.1.func=1 + +# NIC2 - SAN +pci.0.1.1.device=virtio-net +pci.0.1.1.backend=netgraph +pci.0.1.1.path=brgl0san: +# peerhook number is non-VMID related counter +pci.0.1.1.peerhook=link1 +pci.0.1.1.socket=%(name)-vnic1 +pci.0.1.1.mac=%(brgl0_ulpfx):01 +pci.0.1.1.mtu=9000 +# NIC1 - CML (guestlagg0 ng_bridge, link# matches mac tail) +pci.0.1.0.device=virtio-net +pci.0.1.0.backend=netgraph +pci.0.1.0.path=brgl0cml: +# peerhook number is non-VMID related counter +pci.0.1.0.peerhook=link1 +pci.0.1.0.socket=%(name)-vnic0 +pci.0.1.0.mac=%(brgl0_ulpfx):30 +#pci.0.2.0.mtu=1500 +# (see description block for MAC adressing convention) +# -------------------------------------------------- +# End of PCI bus enumeration +# ############################# + +# VM lifetime is controlled by launcher-loop (rc.local), don't destroy on exit. +destroy_on_poweroff=false +# +# Guest OS-specific settings / misc +# ---------------------------------- +# UEFI-bootrom generates ACPI tables, let bhyve generate them otherwise. +acpi_tables=false +# Make sure to have TZ corectly defined on the hypervisor if you want your guest +# os to use RTC with localtime (for Windows, set to true) +rtc.use_localtime=true +# For non-FreeBSD guests, we utilize UEFI firmware (comment otherwise). +lpc.bootrom=/usr/local/share/uefi-firmware/BHYVE_UEFI.fd +# guest-counter (non-VMID) based nullmodem serial port ('A'side for device lpc). +#lpc.com1.path=/dev/nmdmNA +# If guestOS doesn't access unsupported MSRs (ModelSpecificRegister) you can +# enable x86.strictmsr, otherwise (for Windows e.g.) set to false. +x86.strictmsr=false +# Enable vmexit_on_pause and vmexit_on_hlt for any guest OS: +x86.vmexit_on_pause=true +x86.vmexit_on_hlt=true + + +############################## +# Description block till EOF # +############################## + +# The 4 low-bits of this guestVMs last vnic MAC address byte (:_f) is a vm- +# specific counter and can corresponds to the PCI numbering. +# The 4 high-bits (:f_) of each guest's last vnic MAC address byte (:ff) is +# the _hex value_ of the VMID (last two dezimal digits only). +# (this convention limits the priority/guest counter to 16 VMs per host +# and needs to be replaced on hosts where not feasible) +# +# There are non-VMID based guest counters to be adjusted/traced per hypervisor! +# device=fbuf (tcp port) and lpc.comX.path e.g. - usually FreeBSD guests don't +# need framebuffer but nullmodem, while other OSes urgently want a framebuffer). + +# About this VM's host (example): +# ============================== +# Administrative VMID consists of primary hypervisor host number (1xx=alice, +# 2xx=bob) and a hypervisor/vhost-specific counter. +# +# nic4bmc has hardware MAC address 00:00:00:00:00:00 +# guestlagg0 has hardware MAC address 00:00:00:00:00:00 +# This parent (trunk) interface(s) provide VLAN children exclusively for uplink- +# hooks in all vlan-specific netgrapgh bridges (ng_bridge(4), ngctl(8)). +# For virio-net devices with .backend=netgraph (prefered over tap/vmnet), the +# '.peerhook=' link number is a non-VMID related counter. +