Index: stable/12/usr.sbin/bhyve/bhyverun.c =================================================================== --- stable/12/usr.sbin/bhyve/bhyverun.c (revision 358183) +++ stable/12/usr.sbin/bhyve/bhyverun.c (revision 358184) @@ -1,1222 +1,1224 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include "bhyverun.h" #include "acpi.h" #include "atkbdc.h" #include "inout.h" #include "dbgport.h" #include "fwctl.h" #include "gdb.h" #include "ioapic.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "smbiostbl.h" #include "xmsr.h" #include "spinup_ap.h" #include "rtc.h" #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ #define MB (1024UL * 1024) #define GB (1024UL * MB) static const char * const vmx_exit_reason_desc[] = { [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", [EXIT_REASON_EXT_INTR] = "External interrupt", [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", [EXIT_REASON_INIT] = "INIT signal", [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", [EXIT_REASON_SMI] = "Other SMI", [EXIT_REASON_INTR_WINDOW] = "Interrupt window", [EXIT_REASON_NMI_WINDOW] = "NMI window", [EXIT_REASON_TASK_SWITCH] = "Task switch", [EXIT_REASON_CPUID] = "CPUID", [EXIT_REASON_GETSEC] = "GETSEC", [EXIT_REASON_HLT] = "HLT", [EXIT_REASON_INVD] = "INVD", [EXIT_REASON_INVLPG] = "INVLPG", [EXIT_REASON_RDPMC] = "RDPMC", [EXIT_REASON_RDTSC] = "RDTSC", [EXIT_REASON_RSM] = "RSM", [EXIT_REASON_VMCALL] = "VMCALL", [EXIT_REASON_VMCLEAR] = "VMCLEAR", [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", [EXIT_REASON_VMPTRLD] = "VMPTRLD", [EXIT_REASON_VMPTRST] = "VMPTRST", [EXIT_REASON_VMREAD] = "VMREAD", [EXIT_REASON_VMRESUME] = "VMRESUME", [EXIT_REASON_VMWRITE] = "VMWRITE", [EXIT_REASON_VMXOFF] = "VMXOFF", [EXIT_REASON_VMXON] = "VMXON", [EXIT_REASON_CR_ACCESS] = "Control-register accesses", [EXIT_REASON_DR_ACCESS] = "MOV DR", [EXIT_REASON_INOUT] = "I/O instruction", [EXIT_REASON_RDMSR] = "RDMSR", [EXIT_REASON_WRMSR] = "WRMSR", [EXIT_REASON_INVAL_VMCS] = "VM-entry failure due to invalid guest state", [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", [EXIT_REASON_MWAIT] = "MWAIT", [EXIT_REASON_MTF] = "Monitor trap flag", [EXIT_REASON_MONITOR] = "MONITOR", [EXIT_REASON_PAUSE] = "PAUSE", [EXIT_REASON_MCE_DURING_ENTRY] = "VM-entry failure due to machine-check event", [EXIT_REASON_TPR] = "TPR below threshold", [EXIT_REASON_APIC_ACCESS] = "APIC access", [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", [EXIT_REASON_EPT_FAULT] = "EPT violation", [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", [EXIT_REASON_INVEPT] = "INVEPT", [EXIT_REASON_RDTSCP] = "RDTSCP", [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", [EXIT_REASON_INVVPID] = "INVVPID", [EXIT_REASON_WBINVD] = "WBINVD", [EXIT_REASON_XSETBV] = "XSETBV", [EXIT_REASON_APIC_WRITE] = "APIC write", [EXIT_REASON_RDRAND] = "RDRAND", [EXIT_REASON_INVPCID] = "INVPCID", [EXIT_REASON_VMFUNC] = "VMFUNC", [EXIT_REASON_ENCLS] = "ENCLS", [EXIT_REASON_RDSEED] = "RDSEED", [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", [EXIT_REASON_XSAVES] = "XSAVES", [EXIT_REASON_XRSTORS] = "XRSTORS" }; typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); char *vmname; int guest_ncpus; uint16_t cores, maxcpus, sockets, threads; char *guest_uuid_str; +int raw_stdio = 0; + static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int virtio_msix = 1; static int x2apic_mode = 0; /* default is xAPIC */ static int strictio; static int strictmsr = 1; static int acpi; static char *progname; static const int BSP = 0; static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; uint64_t vmexit_inst_emul; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; } stats; struct mt_vmm_info { pthread_t mt_thr; struct vmctx *mt_ctx; int mt_vcpu; } mt_vmm_info[VM_MAXCPU]; static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; static void usage(int code) { fprintf(stderr, "Usage: %s [-abehuwxACHPSWY]\n" " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" " %*s [-g ] [-l ]\n" " %*s [-m mem] [-p vcpu:hostcpu] [-s ] [-U uuid] \n" " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" " -c: number of cpus and/or topology specification\n" " -C: include guest memory in core file\n" " -e: exit on unhandled I/O access\n" " -g: gdb port\n" " -h: help\n" " -H: vmexit from the guest on hlt\n" " -l: LPC device configuration\n" " -m: memory size in MB\n" " -p: pin 'vcpu' to 'hostcpu'\n" " -P: vmexit from the guest on pause\n" " -s: PCI slot config\n" " -S: guest memory cannot be swapped\n" " -u: RTC keeps UTC time\n" " -U: uuid\n" " -w: ignore unimplemented MSRs\n" " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", progname, (int)strlen(progname), "", (int)strlen(progname), "", (int)strlen(progname), ""); exit(code); } /* * XXX This parser is known to have the following issues: * 1. It accepts null key=value tokens ",,". * 2. It accepts whitespace after = and before value. * 3. Values out of range of INT are silently wrapped. * 4. It doesn't check non-final values. * 5. The apparently bogus limits of UINT16_MAX are for future expansion. * * The acceptance of a null specification ('-c ""') is by design to match the * manual page syntax specification, this results in a topology of 1 vCPU. */ static int topology_parse(const char *opt) { uint64_t ncpus; int c, chk, n, s, t, tmp; char *cp, *str; bool ns, scts; c = 1, n = 1, s = 1, t = 1; ns = false, scts = false; str = strdup(opt); if (str == NULL) goto out; while ((cp = strsep(&str, ",")) != NULL) { if (sscanf(cp, "%i%n", &tmp, &chk) == 1) { n = tmp; ns = true; } else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) { n = tmp; ns = true; } else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) { s = tmp; scts = true; } else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) { c = tmp; scts = true; } else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) { t = tmp; scts = true; #ifdef notyet /* Do not expose this until vmm.ko implements it */ } else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) { m = tmp; #endif /* Skip the empty argument case from -c "" */ } else if (cp[0] == '\0') continue; else goto out; /* Any trailing garbage causes an error */ if (cp[chk] != '\0') goto out; } free(str); str = NULL; /* * Range check 1 <= n <= UINT16_MAX all values */ if (n < 1 || s < 1 || c < 1 || t < 1 || n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX || t > UINT16_MAX) return (-1); /* If only the cpus was specified, use that as sockets */ if (!scts) s = n; /* * Compute sockets * cores * threads avoiding overflow * The range check above insures these are 16 bit values * If n was specified check it against computed ncpus */ ncpus = (uint64_t)s * c * t; if (ncpus > UINT16_MAX || (ns && n != ncpus)) return (-1); guest_ncpus = ncpus; sockets = s; cores = c; threads = t; return(0); out: free(str); return (-1); } static int pincpu_parse(const char *opt) { int vcpu, pcpu; if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { fprintf(stderr, "invalid format: %s\n", opt); return (-1); } if (vcpu < 0 || vcpu >= VM_MAXCPU) { fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", vcpu, VM_MAXCPU - 1); return (-1); } if (pcpu < 0 || pcpu >= CPU_SETSIZE) { fprintf(stderr, "hostcpu '%d' outside valid range from " "0 to %d\n", pcpu, CPU_SETSIZE - 1); return (-1); } if (vcpumap[vcpu] == NULL) { if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { perror("malloc"); return (-1); } CPU_ZERO(vcpumap[vcpu]); } CPU_SET(pcpu, vcpumap[vcpu]); return (0); } void vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, int errcode) { struct vmctx *ctx; int error, restart_instruction; ctx = arg; restart_instruction = 1; error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, restart_instruction); assert(error == 0); } void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { return (vm_map_gpa(ctx, gaddr, len)); } int fbsdrun_vmexit_on_pause(void) { return (guest_vmexit_on_pause); } int fbsdrun_vmexit_on_hlt(void) { return (guest_vmexit_on_hlt); } int fbsdrun_virtio_msix(void) { return (virtio_msix); } static void * fbsdrun_start_thread(void *param) { char tname[MAXCOMLEN + 1]; struct mt_vmm_info *mtp; int vcpu; mtp = param; vcpu = mtp->mt_vcpu; snprintf(tname, sizeof(tname), "vcpu %d", vcpu); pthread_set_name_np(mtp->mt_thr, tname); gdb_cpu_add(vcpu); vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ exit(1); return (NULL); } void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) { int error; assert(fromcpu == BSP); /* * The 'newcpu' must be activated in the context of 'fromcpu'. If * vm_activate_cpu() is delayed until newcpu's pthread starts running * then vmm.ko is out-of-sync with bhyve and this can create a race * with vm_suspend(). */ error = vm_activate_cpu(ctx, newcpu); if (error != 0) err(EX_OSERR, "could not activate CPU %d", newcpu); CPU_SET_ATOMIC(newcpu, &cpumask); /* * Set up the vmexit struct to allow execution to start * at the given RIP */ vmexit[newcpu].rip = rip; vmexit[newcpu].inst_length = 0; mt_vmm_info[newcpu].mt_ctx = ctx; mt_vmm_info[newcpu].mt_vcpu = newcpu; error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, fbsdrun_start_thread, &mt_vmm_info[newcpu]); assert(error == 0); } static int fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) { if (!CPU_ISSET(vcpu, &cpumask)) { fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); exit(4); } CPU_CLR_ATOMIC(vcpu, &cpumask); return (CPU_EMPTY(&cpumask)); } static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) { #if BHYVE_DEBUG /* * put guest-driven debug here */ #endif return (VMEXIT_CONTINUE); } static int vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; int bytes, port, in, out; int vcpu; vcpu = *pvcpu; port = vme->u.inout.port; bytes = vme->u.inout.bytes; in = vme->u.inout.in; out = !in; /* Extra-special case of host notifications */ if (out && port == GUEST_NIO_PORT) { error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); return (error); } error = emulate_inout(ctx, vcpu, vme, strictio); if (error) { fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port, vmexit->rip); return (VMEXIT_ABORT); } else { return (VMEXIT_CONTINUE); } } static int vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { uint64_t val; uint32_t eax, edx; int error; val = 0; error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); if (error != 0) { fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } eax = val; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); assert(error == 0); edx = val >> 32; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); assert(error == 0); return (VMEXIT_CONTINUE); } static int vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); if (error != 0) { fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (strictmsr) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } return (VMEXIT_CONTINUE); } static int vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { (void)spinup_ap(ctx, *pvcpu, vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); return (VMEXIT_CONTINUE); } #define DEBUG_EPT_MISCONFIG #ifdef DEBUG_EPT_MISCONFIG #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; static int ept_misconfig_ptenum; #endif static const char * vmexit_vmx_desc(uint32_t exit_reason) { if (exit_reason >= nitems(vmx_exit_reason_desc) || vmx_exit_reason_desc[exit_reason] == NULL) return ("Unknown"); return (vmx_exit_reason_desc[exit_reason]); } static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tVMX\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason, vmexit_vmx_desc(vmexit->u.vmx.exit_reason)); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); #ifdef DEBUG_EPT_MISCONFIG if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { vm_get_register(ctx, *pvcpu, VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), &ept_misconfig_gpa); vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, &ept_misconfig_ptenum); fprintf(stderr, "\tEPT misconfiguration:\n"); fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", ept_misconfig_ptenum, ept_misconfig_pte[0], ept_misconfig_pte[1], ept_misconfig_pte[2], ept_misconfig_pte[3]); } #endif /* DEBUG_EPT_MISCONFIG */ return (VMEXIT_ABORT); } static int vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tSVM\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); return (VMEXIT_ABORT); } static int vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_bogus++; return (VMEXIT_CONTINUE); } static int vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_reqidle++; return (VMEXIT_CONTINUE); } static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_hlt++; /* * Just continue execution with the next instruction. We use * the HLT VM exit as a way to be friendly with the host * scheduler. */ return (VMEXIT_CONTINUE); } static int vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_pause++; return (VMEXIT_CONTINUE); } static int vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_mtrap++; gdb_cpu_mtrap(*pvcpu); return (VMEXIT_CONTINUE); } static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { int err, i; struct vie *vie; stats.vmexit_inst_emul++; vie = &vmexit->u.inst_emul.vie; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie, &vmexit->u.inst_emul.paging); if (err) { if (err == ESRCH) { fprintf(stderr, "Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } fprintf(stderr, "Failed to emulate instruction ["); for (i = 0; i < vie->num_valid; i++) { fprintf(stderr, "0x%02x%s", vie->inst[i], i != (vie->num_valid - 1) ? " " : ""); } fprintf(stderr, "] at 0x%lx\n", vmexit->rip); return (VMEXIT_ABORT); } return (VMEXIT_CONTINUE); } static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; static int vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { enum vm_suspend_how how; how = vmexit->u.suspended.how; fbsdrun_deletecpu(ctx, *pvcpu); if (*pvcpu != BSP) { pthread_mutex_lock(&resetcpu_mtx); pthread_cond_signal(&resetcpu_cond); pthread_mutex_unlock(&resetcpu_mtx); pthread_exit(NULL); } pthread_mutex_lock(&resetcpu_mtx); while (!CPU_EMPTY(&cpumask)) { pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); } pthread_mutex_unlock(&resetcpu_mtx); switch (how) { case VM_SUSPEND_RESET: exit(0); case VM_SUSPEND_POWEROFF: exit(1); case VM_SUSPEND_HALT: exit(2); case VM_SUSPEND_TRIPLEFAULT: exit(3); default: fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); exit(100); } return (0); /* NOTREACHED */ } static int vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { gdb_cpu_suspend(*pvcpu); return (VMEXIT_CONTINUE); } static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, [VM_EXITCODE_SUSPENDED] = vmexit_suspend, [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, [VM_EXITCODE_DEBUG] = vmexit_debug, }; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) { int error, rc; enum vm_exitcode exitcode; cpuset_t active_cpus; if (vcpumap[vcpu] != NULL) { error = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t), vcpumap[vcpu]); assert(error == 0); } error = vm_active_cpus(ctx, &active_cpus); assert(CPU_ISSET(vcpu, &active_cpus)); error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); assert(error == 0); while (1) { error = vm_run(ctx, vcpu, &vmexit[vcpu]); if (error != 0) break; exitcode = vmexit[vcpu].exitcode; if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", exitcode); exit(4); } rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); switch (rc) { case VMEXIT_CONTINUE: break; case VMEXIT_ABORT: abort(); default: exit(4); } } fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); } static int num_vcpus_allowed(struct vmctx *ctx) { int tmp, error; error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); /* * The guest is allowed to spinup more than one processor only if the * UNRESTRICTED_GUEST capability is available. */ if (error == 0) return (VM_MAXCPU); else return (1); } void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) { int err, tmp; if (fbsdrun_vmexit_on_hlt()) { err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { fprintf(stderr, "VM exit on HLT not supported\n"); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_HLT] = vmexit_hlt; } if (fbsdrun_vmexit_on_pause()) { /* * pause exit support required for this mode */ err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { fprintf(stderr, "SMP mux requested, no pause support\n"); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_PAUSE] = vmexit_pause; } if (x2apic_mode) err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); else err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); if (err) { fprintf(stderr, "Unable to set x2apic state (%d)\n", err); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } static struct vmctx * do_open(const char *vmname) { struct vmctx *ctx; int error; bool reinit, romboot; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; const cap_ioctl_t *cmds; size_t ncmds; #endif reinit = romboot = false; if (lpc_bootrom()) romboot = true; error = vm_create(vmname); if (error) { if (errno == EEXIST) { if (romboot) { reinit = true; } else { /* * The virtual machine has been setup by the * userspace bootloader. */ } } else { perror("vm_create"); exit(4); } } else { if (!romboot) { /* * If the virtual machine was just created then a * bootrom must be configured to boot it. */ fprintf(stderr, "virtual machine cannot be booted\n"); exit(4); } } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(4); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); vm_get_ioctls(&ncmds); cmds = vm_get_ioctls(NULL); if (cmds == NULL) errx(EX_OSERR, "out of memory"); if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); free((cap_ioctl_t *)cmds); #endif if (reinit) { error = vm_reinit(ctx); if (error) { perror("vm_reinit"); exit(4); } } error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); if (error) errx(EX_OSERR, "vm_set_topology"); return (ctx); } int main(int argc, char *argv[]) { int c, error, dbg_port, gdb_port, err, bvmcons; int max_vcpus, mptgen, memflags; int rtc_localtime; bool gdb_stop; struct vmctx *ctx; uint64_t rip; size_t memsize; char *optstr; bvmcons = 0; progname = basename(argv[0]); dbg_port = 0; gdb_port = 0; gdb_stop = false; guest_ncpus = 1; sockets = cores = threads = 1; maxcpus = 0; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; memflags = 0; optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:"; while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': x2apic_mode = 0; break; case 'A': acpi = 1; break; case 'b': bvmcons = 1; break; case 'p': if (pincpu_parse(optarg) != 0) { errx(EX_USAGE, "invalid vcpu pinning " "configuration '%s'", optarg); } break; case 'c': if (topology_parse(optarg) != 0) { errx(EX_USAGE, "invalid cpu topology " "'%s'", optarg); } break; case 'C': memflags |= VM_MEM_F_INCORE; break; case 'g': dbg_port = atoi(optarg); break; case 'G': if (optarg[0] == 'w') { gdb_stop = true; optarg++; } gdb_port = atoi(optarg); break; case 'l': if (strncmp(optarg, "help", strlen(optarg)) == 0) { lpc_print_supported_devices(); exit(0); } else if (lpc_device_parse(optarg) != 0) { errx(EX_USAGE, "invalid lpc device " "configuration '%s'", optarg); } break; case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { pci_print_supported_devices(); exit(0); } else if (pci_parse_slot(optarg) != 0) exit(4); else break; case 'S': memflags |= VM_MEM_F_WIRED; break; case 'm': error = vm_parse_memsize(optarg, &memsize); if (error) errx(EX_USAGE, "invalid memsize '%s'", optarg); break; case 'H': guest_vmexit_on_hlt = 1; break; case 'I': /* * The "-I" option was used to add an ioapic to the * virtual machine. * * An ioapic is now provided unconditionally for each * virtual machine and this option is now deprecated. */ break; case 'P': guest_vmexit_on_pause = 1; break; case 'e': strictio = 1; break; case 'u': rtc_localtime = 0; break; case 'U': guest_uuid_str = optarg; break; case 'w': strictmsr = 0; break; case 'W': virtio_msix = 0; break; case 'x': x2apic_mode = 1; break; case 'Y': mptgen = 0; break; case 'h': usage(0); default: usage(1); } } argc -= optind; argv += optind; if (argc != 1) usage(1); vmname = argv[0]; ctx = do_open(vmname); max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", guest_ncpus, max_vcpus); exit(4); } fbsdrun_set_capabilities(ctx, BSP); vm_set_memflags(ctx, memflags); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(4); } error = init_msr(); if (error) { fprintf(stderr, "init_msr error %d", error); exit(4); } init_mem(); init_inout(); atkbdc_init(ctx); pci_irq_init(ctx); ioapic_init(ctx); rtc_init(ctx, rtc_localtime); sci_init(ctx); /* * Exit if a device emulation finds an error in its initilization */ if (init_pci(ctx) != 0) { perror("device emulation initialization error"); exit(4); } if (dbg_port != 0) init_dbgport(dbg_port); if (gdb_port != 0) init_gdb(ctx, gdb_port, gdb_stop); if (bvmcons) init_bvmcons(); if (lpc_bootrom()) { if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { fprintf(stderr, "ROM boot failed: unrestricted guest " "capability not available\n"); exit(4); } error = vcpu_reset(ctx, BSP); assert(error == 0); } error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); /* * build the guest tables, MP etc. */ if (mptgen) { error = mptable_build(ctx, guest_ncpus); if (error) { perror("error to build the guest tables"); exit(4); } } error = smbios_build(ctx); assert(error == 0); if (acpi) { error = acpi_build(ctx, guest_ncpus); assert(error == 0); } if (lpc_bootrom()) fwctl_init(); /* * Change the proc title to include the VM name. */ setproctitle("%s", vmname); #ifndef WITHOUT_CAPSICUM caph_cache_catpages(); if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_enter() == -1) errx(EX_OSERR, "cap_enter() failed"); #endif /* * Add CPU 0 */ fbsdrun_addcpu(ctx, BSP, BSP, rip); /* * Head off to the main event dispatch loop */ mevent_dispatch(); exit(4); } Index: stable/12/usr.sbin/bhyve/block_if.c =================================================================== --- stable/12/usr.sbin/bhyve/block_if.c (revision 358183) +++ stable/12/usr.sbin/bhyve/block_if.c (revision 358184) @@ -1,849 +1,850 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Peter Grehan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "mevent.h" #include "block_if.h" #define BLOCKIF_SIG 0xb109b109 #define BLOCKIF_NUMTHR 8 #define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) enum blockop { BOP_READ, BOP_WRITE, BOP_FLUSH, BOP_DELETE }; enum blockstat { BST_FREE, BST_BLOCK, BST_PEND, BST_BUSY, BST_DONE }; struct blockif_elem { TAILQ_ENTRY(blockif_elem) be_link; struct blockif_req *be_req; enum blockop be_op; enum blockstat be_status; pthread_t be_tid; off_t be_block; }; struct blockif_ctxt { int bc_magic; int bc_fd; int bc_ischr; int bc_isgeom; int bc_candelete; int bc_rdonly; off_t bc_size; int bc_sectsz; int bc_psectsz; int bc_psectoff; int bc_closing; pthread_t bc_btid[BLOCKIF_NUMTHR]; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; /* Request elements and free/pending/busy queues */ TAILQ_HEAD(, blockif_elem) bc_freeq; TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; struct blockif_sig_elem { pthread_mutex_t bse_mtx; pthread_cond_t bse_cond; int bse_pending; struct blockif_sig_elem *bse_next; }; static struct blockif_sig_elem *blockif_bse_head; static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { struct blockif_elem *be, *tbe; off_t off; int i; be = TAILQ_FIRST(&bc->bc_freeq); assert(be != NULL); assert(be->be_status == BST_FREE); TAILQ_REMOVE(&bc->bc_freeq, be, be_link); be->be_req = breq; be->be_op = op; switch (op) { case BOP_READ: case BOP_WRITE: case BOP_DELETE: off = breq->br_offset; for (i = 0; i < breq->br_iovcnt; i++) off += breq->br_iov[i].iov_len; break; default: off = OFF_MAX; } be->be_block = off; TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_block == breq->br_offset) break; } if (tbe == NULL) { TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { if (tbe->be_block == breq->br_offset) break; } } if (tbe == NULL) be->be_status = BST_PEND; else be->be_status = BST_BLOCK; TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); return (be->be_status == BST_PEND); } static int blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) { struct blockif_elem *be; TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_status == BST_PEND) break; assert(be->be_status == BST_BLOCK); } if (be == NULL) return (0); TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_BUSY; be->be_tid = t; TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); *bep = be; return (1); } static void blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_elem *tbe; if (be->be_status == BST_DONE || be->be_status == BST_BUSY) TAILQ_REMOVE(&bc->bc_busyq, be, be_link); else TAILQ_REMOVE(&bc->bc_pendq, be, be_link); TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_req->br_offset == be->be_block) tbe->be_status = BST_PEND; } be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); } static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) { struct blockif_req *br; off_t arg[2]; ssize_t clen, len, off, boff, voff; int i, err; br = be->be_req; if (br->br_iovcnt <= 1) buf = NULL; err = 0; switch (be->be_op) { case BOP_READ: if (buf == NULL) { if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset)) < 0) err = errno; else br->br_resid -= len; break; } i = 0; off = voff = 0; while (br->br_resid > 0) { len = MIN(br->br_resid, MAXPHYS); if (pread(bc->bc_fd, buf, len, br->br_offset + off) < 0) { err = errno; break; } boff = 0; do { clen = MIN(len - boff, br->br_iov[i].iov_len - voff); memcpy(br->br_iov[i].iov_base + voff, buf + boff, clen); if (clen < br->br_iov[i].iov_len - voff) voff += clen; else { i++; voff = 0; } boff += clen; } while (boff < len); off += len; br->br_resid -= len; } break; case BOP_WRITE: if (bc->bc_rdonly) { err = EROFS; break; } if (buf == NULL) { if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset)) < 0) err = errno; else br->br_resid -= len; break; } i = 0; off = voff = 0; while (br->br_resid > 0) { len = MIN(br->br_resid, MAXPHYS); boff = 0; do { clen = MIN(len - boff, br->br_iov[i].iov_len - voff); memcpy(buf + boff, br->br_iov[i].iov_base + voff, clen); if (clen < br->br_iov[i].iov_len - voff) voff += clen; else { i++; voff = 0; } boff += clen; } while (boff < len); if (pwrite(bc->bc_fd, buf, len, br->br_offset + off) < 0) { err = errno; break; } off += len; br->br_resid -= len; } break; case BOP_FLUSH: if (bc->bc_ischr) { if (ioctl(bc->bc_fd, DIOCGFLUSH)) err = errno; } else if (fsync(bc->bc_fd)) err = errno; break; case BOP_DELETE: if (!bc->bc_candelete) err = EOPNOTSUPP; else if (bc->bc_rdonly) err = EROFS; else if (bc->bc_ischr) { arg[0] = br->br_offset; arg[1] = br->br_resid; if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) err = errno; else br->br_resid = 0; } else err = EOPNOTSUPP; break; default: err = EINVAL; break; } be->be_status = BST_DONE; (*br->br_callback)(br, err); } static void * blockif_thr(void *arg) { struct blockif_ctxt *bc; struct blockif_elem *be; pthread_t t; uint8_t *buf; bc = arg; if (bc->bc_isgeom) buf = malloc(MAXPHYS); else buf = NULL; t = pthread_self(); pthread_mutex_lock(&bc->bc_mtx); for (;;) { while (blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); blockif_proc(bc, be, buf); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) break; pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } pthread_mutex_unlock(&bc->bc_mtx); if (buf) free(buf); pthread_exit(NULL); return (NULL); } static void blockif_sigcont_handler(int signal, enum ev_type type, void *arg) { struct blockif_sig_elem *bse; for (;;) { /* * Process the entire list even if not intended for * this thread. */ do { bse = blockif_bse_head; if (bse == NULL) return; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)bse, (uintptr_t)bse->bse_next)); pthread_mutex_lock(&bse->bse_mtx); bse->bse_pending = 0; pthread_cond_signal(&bse->bse_cond); pthread_mutex_unlock(&bse->bse_mtx); } } static void blockif_init(void) { mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); (void) signal(SIGCONT, SIG_IGN); } struct blockif_ctxt * blockif_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; char name[MAXPATHLEN]; char *nopt, *xopts, *cp; struct blockif_ctxt *bc; struct stat sbuf; struct diocgattr_arg arg; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; int nocache, sync, ro, candelete, geom, ssopt, pssopt; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; #endif pthread_once(&blockif_once, blockif_init); fd = -1; ssopt = 0; nocache = 0; sync = 0; ro = 0; /* * The first element in the optstring is always a pathname. * Optional elements follow */ nopt = xopts = strdup(optstr); while (xopts != NULL) { cp = strsep(&xopts, ","); if (cp == nopt) /* file or device pathname */ continue; else if (!strcmp(cp, "nocache")) nocache = 1; else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) sync = 1; else if (!strcmp(cp, "ro")) ro = 1; else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) ; else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) pssopt = ssopt; else { - fprintf(stderr, "Invalid device option \"%s\"\n", cp); + EPRINTLN("Invalid device option \"%s\"", cp); goto err; } } extra = 0; if (nocache) extra |= O_DIRECT; if (sync) extra |= O_SYNC; fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); if (fd < 0 && !ro) { /* Attempt a r/w fail with a r/o open */ fd = open(nopt, O_RDONLY | extra); ro = 1; } if (fd < 0) { warn("Could not open backing file: %s", nopt); goto err; } if (fstat(fd, &sbuf) < 0) { warn("Could not stat backing file %s", nopt); goto err; } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, CAP_WRITE); if (ro) cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif /* * Deal with raw devices */ size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; candelete = geom = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { perror("Could not fetch dev blk/sector size"); goto err; } assert(size != 0); assert(sectsz != 0); if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); arg.len = sizeof(arg.value.i); if (ioctl(fd, DIOCGATTR, &arg) == 0) candelete = arg.value.i; if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) geom = 1; } else psectsz = sbuf.st_blksize; #ifndef WITHOUT_CAPSICUM if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif if (ssopt != 0) { if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || ssopt > pssopt) { - fprintf(stderr, "Invalid sector size %d/%d\n", + EPRINTLN("Invalid sector size %d/%d", ssopt, pssopt); goto err; } /* * Some backend drivers (e.g. cd0, ada0) require that the I/O * size be a multiple of the device's sector size. * * Validate that the emulated sector size complies with this * requirement. */ if (S_ISCHR(sbuf.st_mode)) { if (ssopt < sectsz || (ssopt % sectsz) != 0) { - fprintf(stderr, "Sector size %d incompatible " - "with underlying device sector size %d\n", + EPRINTLN("Sector size %d incompatible " + "with underlying device sector size %d", ssopt, sectsz); goto err; } } sectsz = ssopt; psectsz = pssopt; psectoff = 0; } bc = calloc(1, sizeof(struct blockif_ctxt)); if (bc == NULL) { perror("calloc"); goto err; } bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; bc->bc_ischr = S_ISCHR(sbuf.st_mode); bc->bc_isgeom = geom; bc->bc_candelete = candelete; bc->bc_rdonly = ro; bc->bc_size = size; bc->bc_sectsz = sectsz; bc->bc_psectsz = psectsz; bc->bc_psectoff = psectoff; pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); } for (i = 0; i < BLOCKIF_NUMTHR; i++) { pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); pthread_set_name_np(bc->bc_btid[i], tname); } return (bc); err: if (fd >= 0) close(fd); return (NULL); } static int blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { int err; err = 0; pthread_mutex_lock(&bc->bc_mtx); if (!TAILQ_EMPTY(&bc->bc_freeq)) { /* * Enqueue and inform the block i/o thread * that there is work available */ if (blockif_enqueue(bc, breq, op)) pthread_cond_signal(&bc->bc_cond); } else { /* * Callers are not allowed to enqueue more than * the specified blockif queue limit. Return an * error to indicate that the queue length has been * exceeded. */ err = E2BIG; } pthread_mutex_unlock(&bc->bc_mtx); return (err); } int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_READ)); } int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_WRITE)); } int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_FLUSH)); } int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_DELETE)); } int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { struct blockif_elem *be; assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); /* * Check pending requests. */ TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_req == breq) break; } if (be != NULL) { /* * Found it. */ blockif_complete(bc, be); pthread_mutex_unlock(&bc->bc_mtx); return (0); } /* * Check in-flight requests. */ TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { if (be->be_req == breq) break; } if (be == NULL) { /* * Didn't find it. */ pthread_mutex_unlock(&bc->bc_mtx); return (EINVAL); } /* * Interrupt the processing thread to force it return * prematurely via it's normal callback path. */ while (be->be_status == BST_BUSY) { struct blockif_sig_elem bse, *old_head; pthread_mutex_init(&bse.bse_mtx, NULL); pthread_cond_init(&bse.bse_cond, NULL); bse.bse_pending = 1; do { old_head = blockif_bse_head; bse.bse_next = old_head; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)old_head, (uintptr_t)&bse)); pthread_kill(be->be_tid, SIGCONT); pthread_mutex_lock(&bse.bse_mtx); while (bse.bse_pending) pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); pthread_mutex_unlock(&bse.bse_mtx); } pthread_mutex_unlock(&bc->bc_mtx); /* * The processing thread has been interrupted. Since it's not * clear if the callback has been invoked yet, return EBUSY. */ return (EBUSY); } int blockif_close(struct blockif_ctxt *bc) { void *jval; int i; assert(bc->bc_magic == BLOCKIF_SIG); /* * Stop the block i/o thread */ pthread_mutex_lock(&bc->bc_mtx); bc->bc_closing = 1; pthread_mutex_unlock(&bc->bc_mtx); pthread_cond_broadcast(&bc->bc_cond); for (i = 0; i < BLOCKIF_NUMTHR; i++) pthread_join(bc->bc_btid[i], &jval); /* XXX Cancel queued i/o's ??? */ /* * Release resources */ bc->bc_magic = 0; close(bc->bc_fd); free(bc); return (0); } /* * Return virtual C/H/S values for a given block. Use the algorithm * outlined in the VHD specification to calculate values. */ void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) { off_t sectors; /* total sectors of the block dev */ off_t hcyl; /* cylinders times heads */ uint16_t secpt; /* sectors per track */ uint8_t heads; assert(bc->bc_magic == BLOCKIF_SIG); sectors = bc->bc_size / bc->bc_sectsz; /* Clamp the size to the largest possible with CHS */ if (sectors > 65535UL*16*255) sectors = 65535UL*16*255; if (sectors >= 65536UL*16*63) { secpt = 255; heads = 16; hcyl = sectors / secpt; } else { secpt = 17; hcyl = sectors / secpt; heads = (hcyl + 1023) / 1024; if (heads < 4) heads = 4; if (hcyl >= (heads * 1024) || heads > 16) { secpt = 31; heads = 16; hcyl = sectors / secpt; } if (hcyl >= (heads * 1024)) { secpt = 63; heads = 16; hcyl = sectors / secpt; } } *c = hcyl / heads; *h = heads; *s = secpt; } /* * Accessors */ off_t blockif_size(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_size); } int blockif_sectsz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_sectsz); } void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) { assert(bc->bc_magic == BLOCKIF_SIG); *size = bc->bc_psectsz; *off = bc->bc_psectoff; } int blockif_queuesz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } int blockif_is_ro(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } int blockif_candelete(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } Index: stable/12/usr.sbin/bhyve/bootrom.c =================================================================== --- stable/12/usr.sbin/bhyve/bootrom.c (revision 358183) +++ stable/12/usr.sbin/bhyve/bootrom.c (revision 358184) @@ -1,113 +1,114 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "bootrom.h" +#include "debug.h" #define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ int bootrom_init(struct vmctx *ctx, const char *romfile) { struct stat sbuf; vm_paddr_t gpa; ssize_t rlen; char *ptr; int fd, i, rv, prot; rv = -1; fd = open(romfile, O_RDONLY); if (fd < 0) { - fprintf(stderr, "Error opening bootrom \"%s\": %s\n", + EPRINTLN("Error opening bootrom \"%s\": %s", romfile, strerror(errno)); goto done; } if (fstat(fd, &sbuf) < 0) { - fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n", + EPRINTLN("Could not fstat bootrom file \"%s\": %s", romfile, strerror(errno)); goto done; } /* * Limit bootrom size to 16MB so it doesn't encroach into reserved * MMIO space (e.g. APIC, HPET, MSI). */ if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) { - fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size); + EPRINTLN("Invalid bootrom size %ld", sbuf.st_size); goto done; } if (sbuf.st_size & PAGE_MASK) { - fprintf(stderr, "Bootrom size %ld is not a multiple of the " - "page size\n", sbuf.st_size); + EPRINTLN("Bootrom size %ld is not a multiple of the " + "page size", sbuf.st_size); goto done; } ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size); if (ptr == MAP_FAILED) goto done; /* Map the bootrom into the guest address space */ prot = PROT_READ | PROT_EXEC; gpa = (1ULL << 32) - sbuf.st_size; if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0) goto done; /* Read 'romfile' into the guest address space */ for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) { rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE); if (rlen != PAGE_SIZE) { - fprintf(stderr, "Incomplete read of page %d of bootrom " - "file %s: %ld bytes\n", i, romfile, rlen); + EPRINTLN("Incomplete read of page %d of bootrom " + "file %s: %ld bytes", i, romfile, rlen); goto done; } } rv = 0; done: if (fd >= 0) close(fd); return (rv); } Index: stable/12/usr.sbin/bhyve/consport.c =================================================================== --- stable/12/usr.sbin/bhyve/consport.c (revision 358183) +++ stable/12/usr.sbin/bhyve/consport.c (revision 358184) @@ -1,176 +1,178 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include "inout.h" #include "pci_lpc.h" +#include "debug.h" #define BVM_CONSOLE_PORT 0x220 #define BVM_CONS_SIG ('b' << 8 | 'v') static struct termios tio_orig, tio_new; static void ttyclose(void) { tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig); } static void ttyopen(void) { tcgetattr(STDIN_FILENO, &tio_orig); cfmakeraw(&tio_new); tcsetattr(STDIN_FILENO, TCSANOW, &tio_new); + raw_stdio = 1; atexit(ttyclose); } static bool tty_char_available(void) { fd_set rfds; struct timeval tv; FD_ZERO(&rfds); FD_SET(STDIN_FILENO, &rfds); tv.tv_sec = 0; tv.tv_usec = 0; if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) { return (true); } else { return (false); } } static int ttyread(void) { char rb; if (tty_char_available()) { read(STDIN_FILENO, &rb, 1); return (rb & 0xff); } else { return (-1); } } static void ttywrite(unsigned char wb) { (void) write(STDOUT_FILENO, &wb, 1); } static int console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { static int opened; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ }; #endif if (bytes == 2 && in) { *eax = BVM_CONS_SIG; return (0); } /* * Guests might probe this port to look for old ISA devices * using single-byte reads. Return 0xff for those. */ if (bytes == 1 && in) { *eax = 0xff; return (0); } if (bytes != 4) return (-1); if (!opened) { #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); if (caph_rights_limit(STDIN_FILENO, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_ioctls_limit(STDIN_FILENO, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif ttyopen(); opened = 1; } if (in) *eax = ttyread(); else ttywrite(*eax); return (0); } SYSRES_IO(BVM_CONSOLE_PORT, 4); static struct inout_port consport = { "bvmcons", BVM_CONSOLE_PORT, 1, IOPORT_F_INOUT, console_handler }; void init_bvmcons(void) { register_inout(&consport); } Index: stable/12/usr.sbin/bhyve/debug.h =================================================================== --- stable/12/usr.sbin/bhyve/debug.h (nonexistent) +++ stable/12/usr.sbin/bhyve/debug.h (revision 358184) @@ -0,0 +1,47 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Vincenzo Maffione + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _DEBUG_H_ +#define _DEBUG_H_ + + +extern int raw_stdio; + +#define FPRINTLN(filep, fmt, arg...) \ + do { \ + if (raw_stdio) \ + fprintf(filep, fmt "\r\n", ##arg); \ + else \ + fprintf(filep, fmt "\n", ##arg); \ + } while (0) + +#define PRINTLN(fmt, arg...) FPRINTLN(stdout, fmt, ##arg) +#define EPRINTLN(fmt, arg...) FPRINTLN(stderr, fmt, ##arg) + +#endif Property changes on: stable/12/usr.sbin/bhyve/debug.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/12/usr.sbin/bhyve/mptbl.c =================================================================== --- stable/12/usr.sbin/bhyve/mptbl.c (revision 358183) +++ stable/12/usr.sbin/bhyve/mptbl.c (revision 358184) @@ -1,379 +1,380 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "acpi.h" +#include "debug.h" #include "bhyverun.h" #include "mptbl.h" #include "pci_emul.h" #define MPTABLE_BASE 0xF0000 /* floating pointer length + maximum length of configuration table */ #define MPTABLE_MAX_LENGTH (65536 + 16) #define LAPIC_PADDR 0xFEE00000 #define LAPIC_VERSION 16 #define IOAPIC_PADDR 0xFEC00000 #define IOAPIC_VERSION 0x11 #define MP_SPECREV 4 #define MPFP_SIG "_MP_" /* Configuration header defines */ #define MPCH_SIG "PCMP" #define MPCH_OEMID "BHyVe " #define MPCH_OEMID_LEN 8 #define MPCH_PRODID "Hypervisor " #define MPCH_PRODID_LEN 12 /* Processor entry defines */ #define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */ #define MPEP_SIG_MODEL 26 #define MPEP_SIG_STEPPING 5 #define MPEP_SIG \ ((MPEP_SIG_FAMILY << 8) | \ (MPEP_SIG_MODEL << 4) | \ (MPEP_SIG_STEPPING)) #define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */ /* Number of local intr entries */ #define MPEII_NUM_LOCAL_IRQ 2 /* Bus entry defines */ #define MPE_NUM_BUSES 2 #define MPE_BUSNAME_LEN 6 #define MPE_BUSNAME_ISA "ISA " #define MPE_BUSNAME_PCI "PCI " static void *oem_tbl_start; static int oem_tbl_size; static uint8_t mpt_compute_checksum(void *base, size_t len) { uint8_t *bytes; uint8_t sum; for(bytes = base, sum = 0; len > 0; len--) { sum += *bytes++; } return (256 - sum); } static void mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa) { memset(mpfp, 0, sizeof(*mpfp)); memcpy(mpfp->signature, MPFP_SIG, 4); mpfp->pap = gpa + sizeof(*mpfp); mpfp->length = 1; mpfp->spec_rev = MP_SPECREV; mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp)); } static void mpt_build_mpch(mpcth_t mpch) { memset(mpch, 0, sizeof(*mpch)); memcpy(mpch->signature, MPCH_SIG, 4); mpch->spec_rev = MP_SPECREV; memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN); memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN); mpch->apic_address = LAPIC_PADDR; } static void mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu) { int i; for (i = 0; i < ncpu; i++) { memset(mpep, 0, sizeof(*mpep)); mpep->type = MPCT_ENTRY_PROCESSOR; mpep->apic_id = i; // XXX mpep->apic_version = LAPIC_VERSION; mpep->cpu_flags = PROCENTRY_FLAG_EN; if (i == 0) mpep->cpu_flags |= PROCENTRY_FLAG_BP; mpep->cpu_signature = MPEP_SIG; mpep->feature_flags = MPEP_FEATURES; mpep++; } } static void mpt_build_localint_entries(int_entry_ptr mpie) { /* Hardcode LINT0 as ExtINT on all CPUs. */ memset(mpie, 0, sizeof(*mpie)); mpie->type = MPCT_ENTRY_LOCAL_INT; mpie->int_type = INTENTRY_TYPE_EXTINT; mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | INTENTRY_FLAGS_TRIGGER_CONFORM; mpie->dst_apic_id = 0xff; mpie->dst_apic_int = 0; mpie++; /* Hardcode LINT1 as NMI on all CPUs. */ memset(mpie, 0, sizeof(*mpie)); mpie->type = MPCT_ENTRY_LOCAL_INT; mpie->int_type = INTENTRY_TYPE_NMI; mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | INTENTRY_FLAGS_TRIGGER_CONFORM; mpie->dst_apic_id = 0xff; mpie->dst_apic_int = 1; } static void mpt_build_bus_entries(bus_entry_ptr mpeb) { memset(mpeb, 0, sizeof(*mpeb)); mpeb->type = MPCT_ENTRY_BUS; mpeb->bus_id = 0; memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN); mpeb++; memset(mpeb, 0, sizeof(*mpeb)); mpeb->type = MPCT_ENTRY_BUS; mpeb->bus_id = 1; memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN); } static void mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id) { memset(mpei, 0, sizeof(*mpei)); mpei->type = MPCT_ENTRY_IOAPIC; mpei->apic_id = id; mpei->apic_version = IOAPIC_VERSION; mpei->apic_flags = IOAPICENTRY_FLAG_EN; mpei->apic_address = IOAPIC_PADDR; } static int mpt_count_ioint_entries(void) { int bus, count; count = 0; for (bus = 0; bus <= PCI_BUSMAX; bus++) count += pci_count_lintr(bus); /* * Always include entries for the first 16 pins along with a entry * for each active PCI INTx pin. */ return (16 + count); } static void mpt_generate_pci_int(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, void *arg) { int_entry_ptr *mpiep, mpie; mpiep = arg; mpie = *mpiep; memset(mpie, 0, sizeof(*mpie)); /* * This is always after another I/O interrupt entry, so cheat * and fetch the I/O APIC ID from the prior entry. */ mpie->type = MPCT_ENTRY_INT; mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_id = bus; mpie->src_bus_irq = slot << 2 | (pin - 1); mpie->dst_apic_id = mpie[-1].dst_apic_id; mpie->dst_apic_int = ioapic_irq; *mpiep = mpie + 1; } static void mpt_build_ioint_entries(int_entry_ptr mpie, int id) { int pin, bus; /* * The following config is taken from kernel mptable.c * mptable_parse_default_config_ints(...), for now * just use the default config, tweek later if needed. */ /* First, generate the first 16 pins. */ for (pin = 0; pin < 16; pin++) { memset(mpie, 0, sizeof(*mpie)); mpie->type = MPCT_ENTRY_INT; mpie->src_bus_id = 1; mpie->dst_apic_id = id; /* * All default configs route IRQs from bus 0 to the first 16 * pins of the first I/O APIC with an APIC ID of 2. */ mpie->dst_apic_int = pin; switch (pin) { case 0: /* Pin 0 is an ExtINT pin. */ mpie->int_type = INTENTRY_TYPE_EXTINT; break; case 2: /* IRQ 0 is routed to pin 2. */ mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = 0; break; case SCI_INT: /* ACPI SCI is level triggered and active-lo. */ mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO | INTENTRY_FLAGS_TRIGGER_LEVEL; mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = SCI_INT; break; default: /* All other pins are identity mapped. */ mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = pin; break; } mpie++; } /* Next, generate entries for any PCI INTx interrupts. */ for (bus = 0; bus <= PCI_BUSMAX; bus++) pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); } void mptable_add_oemtbl(void *tbl, int tblsz) { oem_tbl_start = tbl; oem_tbl_size = tblsz; } int mptable_build(struct vmctx *ctx, int ncpu) { mpcth_t mpch; bus_entry_ptr mpeb; io_apic_entry_ptr mpei; proc_entry_ptr mpep; mpfps_t mpfp; int_entry_ptr mpie; int ioints, bus; char *curraddr; char *startaddr; startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH); if (startaddr == NULL) { - fprintf(stderr, "mptable requires mapped mem\n"); + EPRINTLN("mptable requires mapped mem"); return (ENOMEM); } /* * There is no way to advertise multiple PCI hierarchies via MPtable * so require that there is no PCI hierarchy with a non-zero bus * number. */ for (bus = 1; bus <= PCI_BUSMAX; bus++) { if (pci_bus_configured(bus)) { - fprintf(stderr, "MPtable is incompatible with " - "multiple PCI hierarchies.\r\n"); - fprintf(stderr, "MPtable generation can be disabled " - "by passing the -Y option to bhyve(8).\r\n"); + EPRINTLN("MPtable is incompatible with " + "multiple PCI hierarchies."); + EPRINTLN("MPtable generation can be disabled " + "by passing the -Y option to bhyve(8)."); return (EINVAL); } } curraddr = startaddr; mpfp = (mpfps_t)curraddr; mpt_build_mpfp(mpfp, MPTABLE_BASE); curraddr += sizeof(*mpfp); mpch = (mpcth_t)curraddr; mpt_build_mpch(mpch); curraddr += sizeof(*mpch); mpep = (proc_entry_ptr)curraddr; mpt_build_proc_entries(mpep, ncpu); curraddr += sizeof(*mpep) * ncpu; mpch->entry_count += ncpu; mpeb = (bus_entry_ptr) curraddr; mpt_build_bus_entries(mpeb); curraddr += sizeof(*mpeb) * MPE_NUM_BUSES; mpch->entry_count += MPE_NUM_BUSES; mpei = (io_apic_entry_ptr)curraddr; mpt_build_ioapic_entries(mpei, 0); curraddr += sizeof(*mpei); mpch->entry_count++; mpie = (int_entry_ptr) curraddr; ioints = mpt_count_ioint_entries(); mpt_build_ioint_entries(mpie, 0); curraddr += sizeof(*mpie) * ioints; mpch->entry_count += ioints; mpie = (int_entry_ptr)curraddr; mpt_build_localint_entries(mpie); curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ; mpch->entry_count += MPEII_NUM_LOCAL_IRQ; if (oem_tbl_start) { mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE; mpch->oem_table_size = oem_tbl_size; memcpy(curraddr, oem_tbl_start, oem_tbl_size); } mpch->base_table_length = curraddr - (char *)mpch; mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length); return (0); } Index: stable/12/usr.sbin/bhyve/net_backends.c =================================================================== --- stable/12/usr.sbin/bhyve/net_backends.c (revision 358183) +++ stable/12/usr.sbin/bhyve/net_backends.c (revision 358184) @@ -1,872 +1,873 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 Vincenzo Maffione * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * This file implements multiple network backends (tap, netmap, ...), * to be used by network frontends such as virtio-net and e1000. * The API to access the backend (e.g. send/receive packets, negotiate * features) is exported by net_backends.h. */ #include __FBSDID("$FreeBSD$"); #include /* u_short etc */ #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #define NETMAP_WITH_LIBS #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include "debug.h" #include "iov.h" #include "mevent.h" #include "net_backends.h" #include /* * Each network backend registers a set of function pointers that are * used to implement the net backends API. * This might need to be exposed if we implement backends in separate files. */ struct net_backend { const char *prefix; /* prefix matching this backend */ /* * Routines used to initialize and cleanup the resources needed * by a backend. The cleanup function is used internally, * and should not be called by the frontend. */ int (*init)(struct net_backend *be, const char *devname, net_be_rxeof_t cb, void *param); void (*cleanup)(struct net_backend *be); /* * Called to serve a guest transmit request. The scatter-gather * vector provided by the caller has 'iovcnt' elements and contains * the packet to send. */ ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt); /* * Called to receive a packet from the backend. When the function * returns a positive value 'len', the scatter-gather vector * provided by the caller contains a packet with such length. * The function returns 0 if the backend doesn't have a new packet to * receive. */ ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt); /* * Ask the backend to enable or disable receive operation in the * backend. On return from a disable operation, it is guaranteed * that the receive callback won't be called until receive is * enabled again. Note however that it is up to the caller to make * sure that netbe_recv() is not currently being executed by another * thread. */ void (*recv_enable)(struct net_backend *be); void (*recv_disable)(struct net_backend *be); /* * Ask the backend for the virtio-net features it is able to * support. Possible features are TSO, UFO and checksum offloading * in both rx and tx direction and for both IPv4 and IPv6. */ uint64_t (*get_cap)(struct net_backend *be); /* * Tell the backend to enable/disable the specified virtio-net * features (capabilities). */ int (*set_cap)(struct net_backend *be, uint64_t features, unsigned int vnet_hdr_len); struct pci_vtnet_softc *sc; int fd; /* * Length of the virtio-net header used by the backend and the * frontend, respectively. A zero value means that the header * is not used. */ unsigned int be_vnet_hdr_len; unsigned int fe_vnet_hdr_len; /* Size of backend-specific private data. */ size_t priv_size; /* Room for backend-specific data. */ char opaque[0]; }; SET_DECLARE(net_backend_set, struct net_backend); #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) -#define WPRINTF(params) printf params +#define WPRINTF(params) PRINTLN params /* * The tap backend */ struct tap_priv { struct mevent *mevp; }; static void tap_cleanup(struct net_backend *be) { struct tap_priv *priv = (struct tap_priv *)be->opaque; if (priv->mevp) { mevent_delete(priv->mevp); } if (be->fd != -1) { close(be->fd); be->fd = -1; } } static int tap_init(struct net_backend *be, const char *devname, net_be_rxeof_t cb, void *param) { struct tap_priv *priv = (struct tap_priv *)be->opaque; char tbuf[80]; int opt = 1; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif if (cb == NULL) { - WPRINTF(("TAP backend requires non-NULL callback\n")); + WPRINTF(("TAP backend requires non-NULL callback")); return (-1); } strcpy(tbuf, "/dev/"); strlcat(tbuf, devname, sizeof(tbuf)); be->fd = open(tbuf, O_RDWR); if (be->fd == -1) { - WPRINTF(("open of tap device %s failed\n", tbuf)); + WPRINTF(("open of tap device %s failed", tbuf)); goto error; } /* * Set non-blocking and register for read * notifications with the event loop */ if (ioctl(be->fd, FIONBIO, &opt) < 0) { - WPRINTF(("tap device O_NONBLOCK failed\n")); + WPRINTF(("tap device O_NONBLOCK failed")); goto error; } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); if (caph_rights_limit(be->fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { - WPRINTF(("Could not register event\n")); + WPRINTF(("Could not register event")); goto error; } return (0); error: tap_cleanup(be); return (-1); } /* * Called to send a buffer chain out to the tap device */ static ssize_t tap_send(struct net_backend *be, struct iovec *iov, int iovcnt) { return (writev(be->fd, iov, iovcnt)); } static ssize_t tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) { ssize_t ret; /* Should never be called without a valid tap fd */ assert(be->fd != -1); ret = readv(be->fd, iov, iovcnt); if (ret < 0 && errno == EWOULDBLOCK) { return (0); } return (ret); } static void tap_recv_enable(struct net_backend *be) { struct tap_priv *priv = (struct tap_priv *)be->opaque; mevent_enable(priv->mevp); } static void tap_recv_disable(struct net_backend *be) { struct tap_priv *priv = (struct tap_priv *)be->opaque; mevent_disable(priv->mevp); } static uint64_t tap_get_cap(struct net_backend *be) { return (0); /* no capabilities for now */ } static int tap_set_cap(struct net_backend *be, uint64_t features, unsigned vnet_hdr_len) { return ((features || vnet_hdr_len) ? -1 : 0); } static struct net_backend tap_backend = { .prefix = "tap", .priv_size = sizeof(struct tap_priv), .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, .recv = tap_recv, .recv_enable = tap_recv_enable, .recv_disable = tap_recv_disable, .get_cap = tap_get_cap, .set_cap = tap_set_cap, }; /* A clone of the tap backend, with a different prefix. */ static struct net_backend vmnet_backend = { .prefix = "vmnet", .priv_size = sizeof(struct tap_priv), .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, .recv = tap_recv, .recv_enable = tap_recv_enable, .recv_disable = tap_recv_disable, .get_cap = tap_get_cap, .set_cap = tap_set_cap, }; DATA_SET(net_backend_set, tap_backend); DATA_SET(net_backend_set, vmnet_backend); /* * The netmap backend */ /* The virtio-net features supported by netmap. */ #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \ VIRTIO_NET_F_MRG_RXBUF) struct netmap_priv { char ifname[IFNAMSIZ]; struct nm_desc *nmd; uint16_t memid; struct netmap_ring *rx; struct netmap_ring *tx; struct mevent *mevp; net_be_rxeof_t cb; void *cb_param; }; static void nmreq_init(struct nmreq *req, char *ifname) { memset(req, 0, sizeof(*req)); strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); req->nr_version = NETMAP_API; } static int netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) { int err; struct nmreq req; struct netmap_priv *priv = (struct netmap_priv *)be->opaque; nmreq_init(&req, priv->ifname); req.nr_cmd = NETMAP_BDG_VNET_HDR; req.nr_arg1 = vnet_hdr_len; err = ioctl(be->fd, NIOCREGIF, &req); if (err) { - WPRINTF(("Unable to set vnet header length %d\n", + WPRINTF(("Unable to set vnet header length %d", vnet_hdr_len)); return (err); } be->be_vnet_hdr_len = vnet_hdr_len; return (0); } static int netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) { int prev_hdr_len = be->be_vnet_hdr_len; int ret; if (vnet_hdr_len == prev_hdr_len) { return (1); } ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); if (ret) { return (0); } netmap_set_vnet_hdr_len(be, prev_hdr_len); return (1); } static uint64_t netmap_get_cap(struct net_backend *be) { return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? NETMAP_FEATURES : 0); } static int netmap_set_cap(struct net_backend *be, uint64_t features, unsigned vnet_hdr_len) { return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); } static int netmap_init(struct net_backend *be, const char *devname, net_be_rxeof_t cb, void *param) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; strlcpy(priv->ifname, devname, sizeof(priv->ifname)); priv->ifname[sizeof(priv->ifname) - 1] = '\0'; priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); if (priv->nmd == NULL) { - WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n", + WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)", devname, strerror(errno))); free(priv); return (-1); } priv->memid = priv->nmd->req.nr_arg2; priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); priv->cb = cb; priv->cb_param = param; be->fd = priv->nmd->fd; priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { - WPRINTF(("Could not register event\n")); + WPRINTF(("Could not register event")); return (-1); } return (0); } static void netmap_cleanup(struct net_backend *be) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; if (priv->mevp) { mevent_delete(priv->mevp); } if (priv->nmd) { nm_close(priv->nmd); } be->fd = -1; } static ssize_t netmap_send(struct net_backend *be, struct iovec *iov, int iovcnt) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; struct netmap_ring *ring; ssize_t totlen = 0; int nm_buf_size; int nm_buf_len; uint32_t head; void *nm_buf; int j; ring = priv->tx; head = ring->head; if (head == ring->tail) { - WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt))); + WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); goto txsync; } nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); nm_buf_size = ring->nr_buf_size; nm_buf_len = 0; for (j = 0; j < iovcnt; j++) { int iov_frag_size = iov[j].iov_len; void *iov_frag_buf = iov[j].iov_base; totlen += iov_frag_size; /* * Split each iovec fragment over more netmap slots, if * necessary. */ for (;;) { int copylen; copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; memcpy(nm_buf, iov_frag_buf, copylen); iov_frag_buf += copylen; iov_frag_size -= copylen; nm_buf += copylen; nm_buf_size -= copylen; nm_buf_len += copylen; if (iov_frag_size == 0) { break; } ring->slot[head].len = nm_buf_len; ring->slot[head].flags = NS_MOREFRAG; head = nm_ring_next(ring, head); if (head == ring->tail) { /* * We ran out of netmap slots while * splitting the iovec fragments. */ - WPRINTF(("No space, drop %zu bytes\n", + WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); goto txsync; } nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); nm_buf_size = ring->nr_buf_size; nm_buf_len = 0; } } /* Complete the last slot, which must not have NS_MOREFRAG set. */ ring->slot[head].len = nm_buf_len; ring->slot[head].flags = 0; head = nm_ring_next(ring, head); /* Now update ring->head and ring->cur. */ ring->head = ring->cur = head; txsync: ioctl(be->fd, NIOCTXSYNC, NULL); return (totlen); } static ssize_t netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; struct netmap_slot *slot = NULL; struct netmap_ring *ring; void *iov_frag_buf; int iov_frag_size; ssize_t totlen = 0; uint32_t head; assert(iovcnt); ring = priv->rx; head = ring->head; iov_frag_buf = iov->iov_base; iov_frag_size = iov->iov_len; do { int nm_buf_len; void *nm_buf; if (head == ring->tail) { return (0); } slot = ring->slot + head; nm_buf = NETMAP_BUF(ring, slot->buf_idx); nm_buf_len = slot->len; for (;;) { int copylen = nm_buf_len < iov_frag_size ? nm_buf_len : iov_frag_size; memcpy(iov_frag_buf, nm_buf, copylen); nm_buf += copylen; nm_buf_len -= copylen; iov_frag_buf += copylen; iov_frag_size -= copylen; totlen += copylen; if (nm_buf_len == 0) { break; } iov++; iovcnt--; if (iovcnt == 0) { /* No space to receive. */ - WPRINTF(("Short iov, drop %zd bytes\n", + WPRINTF(("Short iov, drop %zd bytes", totlen)); return (-ENOSPC); } iov_frag_buf = iov->iov_base; iov_frag_size = iov->iov_len; } head = nm_ring_next(ring, head); } while (slot->flags & NS_MOREFRAG); /* Release slots to netmap. */ ring->head = ring->cur = head; return (totlen); } static void netmap_recv_enable(struct net_backend *be) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; mevent_enable(priv->mevp); } static void netmap_recv_disable(struct net_backend *be) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; mevent_disable(priv->mevp); } static struct net_backend netmap_backend = { .prefix = "netmap", .priv_size = sizeof(struct netmap_priv), .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, .recv = netmap_recv, .recv_enable = netmap_recv_enable, .recv_disable = netmap_recv_disable, .get_cap = netmap_get_cap, .set_cap = netmap_set_cap, }; /* A clone of the netmap backend, with a different prefix. */ static struct net_backend vale_backend = { .prefix = "vale", .priv_size = sizeof(struct netmap_priv), .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, .recv = netmap_recv, .recv_enable = netmap_recv_enable, .recv_disable = netmap_recv_disable, .get_cap = netmap_get_cap, .set_cap = netmap_set_cap, }; DATA_SET(net_backend_set, netmap_backend); DATA_SET(net_backend_set, vale_backend); /* * Initialize a backend and attach to the frontend. * This is called during frontend initialization. * @pbe is a pointer to the backend to be initialized * @devname is the backend-name as supplied on the command line, * e.g. -s 2:0,frontend-name,backend-name[,other-args] * @cb is the receive callback supplied by the frontend, * and it is invoked in the event loop when a receive * event is generated in the hypervisor, * @param is a pointer to the frontend, and normally used as * the argument for the callback. */ int netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, void *param) { struct net_backend **pbe, *nbe, *tbe = NULL; int err; /* * Find the network backend that matches the user-provided * device name. net_backend_set is built using a linker set. */ SET_FOREACH(pbe, net_backend_set) { if (strncmp(devname, (*pbe)->prefix, strlen((*pbe)->prefix)) == 0) { tbe = *pbe; assert(tbe->init != NULL); assert(tbe->cleanup != NULL); assert(tbe->send != NULL); assert(tbe->recv != NULL); assert(tbe->get_cap != NULL); assert(tbe->set_cap != NULL); break; } } *ret = NULL; if (tbe == NULL) return (EINVAL); nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); *nbe = *tbe; /* copy the template */ nbe->fd = -1; nbe->sc = param; nbe->be_vnet_hdr_len = 0; nbe->fe_vnet_hdr_len = 0; /* Initialize the backend. */ err = nbe->init(nbe, devname, cb, param); if (err) { free(nbe); return (err); } *ret = nbe; return (0); } void netbe_cleanup(struct net_backend *be) { if (be != NULL) { be->cleanup(be); free(be); } } uint64_t netbe_get_cap(struct net_backend *be) { assert(be != NULL); return (be->get_cap(be)); } int netbe_set_cap(struct net_backend *be, uint64_t features, unsigned vnet_hdr_len) { int ret; assert(be != NULL); /* There are only three valid lengths, i.e., 0, 10 and 12. */ if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) return (-1); be->fe_vnet_hdr_len = vnet_hdr_len; ret = be->set_cap(be, features, vnet_hdr_len); assert(be->be_vnet_hdr_len == 0 || be->be_vnet_hdr_len == be->fe_vnet_hdr_len); return (ret); } static __inline struct iovec * iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen) { struct iovec *riov; /* XXX short-cut: assume first segment is >= tlen */ assert(iov[0].iov_len >= tlen); iov[0].iov_len -= tlen; if (iov[0].iov_len == 0) { assert(*iovcnt > 1); *iovcnt -= 1; riov = &iov[1]; } else { iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); riov = &iov[0]; } return (riov); } ssize_t netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) { assert(be != NULL); if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) { /* * The frontend uses a virtio-net header, but the backend * does not. We ignore it (as it must be all zeroes) and * strip it. */ assert(be->be_vnet_hdr_len == 0); iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len); } return (be->send(be, iov, iovcnt)); } /* * Try to read a packet from the backend, without blocking. * If no packets are available, return 0. In case of success, return * the length of the packet just read. Return -1 in case of errors. */ ssize_t netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt) { /* Length of prepended virtio-net header. */ unsigned int hlen = be->fe_vnet_hdr_len; int ret; assert(be != NULL); if (hlen && hlen != be->be_vnet_hdr_len) { /* * The frontend uses a virtio-net header, but the backend * does not. We need to prepend a zeroed header. */ struct virtio_net_rxhdr *vh; assert(be->be_vnet_hdr_len == 0); /* * Get a pointer to the rx header, and use the * data immediately following it for the packet buffer. */ vh = iov[0].iov_base; iov = iov_trim(iov, &iovcnt, hlen); /* * The only valid field in the rx packet header is the * number of buffers if merged rx bufs were negotiated. */ memset(vh, 0, hlen); if (hlen == VNET_HDR_LEN) { vh->vrh_bufs = 1; } } ret = be->recv(be, iov, iovcnt); if (ret > 0) { ret += hlen; } return (ret); } /* * Read a packet from the backend and discard it. * Returns the size of the discarded packet or zero if no packet was available. * A negative error code is returned in case of read error. */ ssize_t netbe_rx_discard(struct net_backend *be) { /* * MP note: the dummybuf is only used to discard frames, * so there is no need for it to be per-vtnet or locked. * We only make it large enough for TSO-sized segment. */ static uint8_t dummybuf[65536 + 64]; struct iovec iov; iov.iov_base = dummybuf; iov.iov_len = sizeof(dummybuf); return netbe_recv(be, &iov, 1); } void netbe_rx_disable(struct net_backend *be) { return be->recv_disable(be); } void netbe_rx_enable(struct net_backend *be) { return be->recv_enable(be); } Index: stable/12/usr.sbin/bhyve/net_utils.c =================================================================== --- stable/12/usr.sbin/bhyve/net_utils.c (revision 358183) +++ stable/12/usr.sbin/bhyve/net_utils.c (revision 358184) @@ -1,85 +1,89 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "net_utils.h" #include "bhyverun.h" #include #include #include #include #include +#include "bhyverun.h" +#include "debug.h" +#include "net_utils.h" + int net_parsemac(char *mac_str, uint8_t *mac_addr) { struct ether_addr *ea; char *tmpstr; char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; tmpstr = strsep(&mac_str,"="); if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { ea = ether_aton(mac_str); if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { - fprintf(stderr, "Invalid MAC %s\n", mac_str); + EPRINTLN("Invalid MAC %s", mac_str); return (EINVAL); } else memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); } return (0); } void net_genmac(struct pci_devinst *pi, uint8_t *macaddr) { /* * The default MAC address is the standard NetApp OUI of 00-a0-98, * followed by an MD5 of the PCI slot/func number and dev name */ MD5_CTX mdctx; unsigned char digest[16]; char nstr[80]; snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, pi->pi_func, vmname); MD5Init(&mdctx); MD5Update(&mdctx, nstr, (unsigned int)strlen(nstr)); MD5Final(digest, &mdctx); macaddr[0] = 0x00; macaddr[1] = 0xa0; macaddr[2] = 0x98; macaddr[3] = digest[0]; macaddr[4] = digest[1]; macaddr[5] = digest[2]; } Index: stable/12/usr.sbin/bhyve/pci_e82545.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_e82545.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_e82545.c (revision 358184) @@ -1,2369 +1,2370 @@ /* * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Alexander Motin * Copyright (c) 2015 Peter Grehan * Copyright (c) 2013 Jeremiah Lott, Avere Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include "e1000_regs.h" #include "e1000_defines.h" #include "mii.h" #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "mevent.h" #include "net_utils.h" #include "net_backends.h" /* Hardware/register definitions XXX: move some to common code. */ #define E82545_VENDOR_ID_INTEL 0x8086 #define E82545_DEV_ID_82545EM_COPPER 0x100F #define E82545_SUBDEV_ID 0x1008 #define E82545_REVISION_4 4 #define E82545_MDIC_DATA_MASK 0x0000FFFF #define E82545_MDIC_OP_MASK 0x0c000000 #define E82545_MDIC_IE 0x20000000 #define E82545_EECD_FWE_DIS 0x00000010 /* Flash writes disabled */ #define E82545_EECD_FWE_EN 0x00000020 /* Flash writes enabled */ #define E82545_EECD_FWE_MASK 0x00000030 /* Flash writes mask */ #define E82545_BAR_REGISTER 0 #define E82545_BAR_REGISTER_LEN (128*1024) #define E82545_BAR_FLASH 1 #define E82545_BAR_FLASH_LEN (64*1024) #define E82545_BAR_IO 2 #define E82545_BAR_IO_LEN 8 #define E82545_IOADDR 0x00000000 #define E82545_IODATA 0x00000004 #define E82545_IO_REGISTER_MAX 0x0001FFFF #define E82545_IO_FLASH_BASE 0x00080000 #define E82545_IO_FLASH_MAX 0x000FFFFF #define E82545_ARRAY_ENTRY(reg, offset) (reg + (offset<<2)) #define E82545_RAR_MAX 15 #define E82545_MTA_MAX 127 #define E82545_VFTA_MAX 127 /* Slightly modified from the driver versions, hardcoded for 3 opcode bits, * followed by 6 address bits. * TODO: make opcode bits and addr bits configurable? * NVM Commands - Microwire */ #define E82545_NVM_OPCODE_BITS 3 #define E82545_NVM_ADDR_BITS 6 #define E82545_NVM_DATA_BITS 16 #define E82545_NVM_OPADDR_BITS (E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS) #define E82545_NVM_ADDR_MASK ((1 << E82545_NVM_ADDR_BITS)-1) #define E82545_NVM_OPCODE_MASK \ (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS) #define E82545_NVM_OPCODE_READ (0x6 << E82545_NVM_ADDR_BITS) /* read */ #define E82545_NVM_OPCODE_WRITE (0x5 << E82545_NVM_ADDR_BITS) /* write */ #define E82545_NVM_OPCODE_ERASE (0x7 << E82545_NVM_ADDR_BITS) /* erase */ #define E82545_NVM_OPCODE_EWEN (0x4 << E82545_NVM_ADDR_BITS) /* wr-enable */ #define E82545_NVM_EEPROM_SIZE 64 /* 64 * 16-bit values == 128K */ #define E1000_ICR_SRPD 0x00010000 /* This is an arbitrary number. There is no hard limit on the chip. */ #define I82545_MAX_TXSEGS 64 /* Legacy receive descriptor */ struct e1000_rx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ uint16_t length; /* Length of data DMAed into data buffer */ uint16_t csum; /* Packet checksum */ uint8_t status; /* Descriptor status */ uint8_t errors; /* Descriptor Errors */ uint16_t special; }; /* Transmit descriptor types */ #define E1000_TXD_MASK (E1000_TXD_CMD_DEXT | 0x00F00000) #define E1000_TXD_TYP_L (0) #define E1000_TXD_TYP_C (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C) #define E1000_TXD_TYP_D (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D) /* Legacy transmit descriptor */ struct e1000_tx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ union { uint32_t data; struct { uint16_t length; /* Data buffer length */ uint8_t cso; /* Checksum offset */ uint8_t cmd; /* Descriptor control */ } flags; } lower; union { uint32_t data; struct { uint8_t status; /* Descriptor status */ uint8_t css; /* Checksum start */ uint16_t special; } fields; } upper; }; /* Context descriptor */ struct e1000_context_desc { union { uint32_t ip_config; struct { uint8_t ipcss; /* IP checksum start */ uint8_t ipcso; /* IP checksum offset */ uint16_t ipcse; /* IP checksum end */ } ip_fields; } lower_setup; union { uint32_t tcp_config; struct { uint8_t tucss; /* TCP checksum start */ uint8_t tucso; /* TCP checksum offset */ uint16_t tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; uint32_t cmd_and_length; union { uint32_t data; struct { uint8_t status; /* Descriptor status */ uint8_t hdr_len; /* Header length */ uint16_t mss; /* Maximum segment size */ } fields; } tcp_seg_setup; }; /* Data descriptor */ struct e1000_data_desc { uint64_t buffer_addr; /* Address of the descriptor's buffer address */ union { uint32_t data; struct { uint16_t length; /* Data buffer length */ uint8_t typ_len_ext; uint8_t cmd; } flags; } lower; union { uint32_t data; struct { uint8_t status; /* Descriptor status */ uint8_t popts; /* Packet Options */ uint16_t special; } fields; } upper; }; union e1000_tx_udesc { struct e1000_tx_desc td; struct e1000_context_desc cd; struct e1000_data_desc dd; }; /* Tx checksum info for a packet. */ struct ck_info { int ck_valid; /* ck_info is valid */ uint8_t ck_start; /* start byte of cksum calcuation */ uint8_t ck_off; /* offset of cksum insertion */ uint16_t ck_len; /* length of cksum calc: 0 is to packet-end */ }; /* * Debug printf */ static int e82545_debug = 0; -#define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params) -#define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params) +#define WPRINTF(msg,params...) PRINTLN("e82545: " msg, params) +#define DPRINTF(msg,params...) if (e82545_debug) WPRINTF(msg, params) #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) /* s/w representation of the RAL/RAH regs */ struct eth_uni { int eu_valid; int eu_addrsel; struct ether_addr eu_eth; }; struct e82545_softc { struct pci_devinst *esc_pi; struct vmctx *esc_ctx; struct mevent *esc_mevpitr; pthread_mutex_t esc_mtx; struct ether_addr esc_mac; net_backend_t *esc_be; /* General */ uint32_t esc_CTRL; /* x0000 device ctl */ uint32_t esc_FCAL; /* x0028 flow ctl addr lo */ uint32_t esc_FCAH; /* x002C flow ctl addr hi */ uint32_t esc_FCT; /* x0030 flow ctl type */ uint32_t esc_VET; /* x0038 VLAN eth type */ uint32_t esc_FCTTV; /* x0170 flow ctl tx timer */ uint32_t esc_LEDCTL; /* x0E00 LED control */ uint32_t esc_PBA; /* x1000 pkt buffer allocation */ /* Interrupt control */ int esc_irq_asserted; uint32_t esc_ICR; /* x00C0 cause read/clear */ uint32_t esc_ITR; /* x00C4 intr throttling */ uint32_t esc_ICS; /* x00C8 cause set */ uint32_t esc_IMS; /* x00D0 mask set/read */ uint32_t esc_IMC; /* x00D8 mask clear */ /* Transmit */ union e1000_tx_udesc *esc_txdesc; struct e1000_context_desc esc_txctx; pthread_t esc_tx_tid; pthread_cond_t esc_tx_cond; int esc_tx_enabled; int esc_tx_active; uint32_t esc_TXCW; /* x0178 transmit config */ uint32_t esc_TCTL; /* x0400 transmit ctl */ uint32_t esc_TIPG; /* x0410 inter-packet gap */ uint16_t esc_AIT; /* x0458 Adaptive Interframe Throttle */ uint64_t esc_tdba; /* verified 64-bit desc table addr */ uint32_t esc_TDBAL; /* x3800 desc table addr, low bits */ uint32_t esc_TDBAH; /* x3804 desc table addr, hi 32-bits */ uint32_t esc_TDLEN; /* x3808 # descriptors in bytes */ uint16_t esc_TDH; /* x3810 desc table head idx */ uint16_t esc_TDHr; /* internal read version of TDH */ uint16_t esc_TDT; /* x3818 desc table tail idx */ uint32_t esc_TIDV; /* x3820 intr delay */ uint32_t esc_TXDCTL; /* x3828 desc control */ uint32_t esc_TADV; /* x382C intr absolute delay */ /* L2 frame acceptance */ struct eth_uni esc_uni[16]; /* 16 x unicast MAC addresses */ uint32_t esc_fmcast[128]; /* Multicast filter bit-match */ uint32_t esc_fvlan[128]; /* VLAN 4096-bit filter */ /* Receive */ struct e1000_rx_desc *esc_rxdesc; pthread_cond_t esc_rx_cond; int esc_rx_enabled; int esc_rx_active; int esc_rx_loopback; uint32_t esc_RCTL; /* x0100 receive ctl */ uint32_t esc_FCRTL; /* x2160 flow cntl thresh, low */ uint32_t esc_FCRTH; /* x2168 flow cntl thresh, hi */ uint64_t esc_rdba; /* verified 64-bit desc table addr */ uint32_t esc_RDBAL; /* x2800 desc table addr, low bits */ uint32_t esc_RDBAH; /* x2804 desc table addr, hi 32-bits*/ uint32_t esc_RDLEN; /* x2808 #descriptors */ uint16_t esc_RDH; /* x2810 desc table head idx */ uint16_t esc_RDT; /* x2818 desc table tail idx */ uint32_t esc_RDTR; /* x2820 intr delay */ uint32_t esc_RXDCTL; /* x2828 desc control */ uint32_t esc_RADV; /* x282C intr absolute delay */ uint32_t esc_RSRPD; /* x2C00 recv small packet detect */ uint32_t esc_RXCSUM; /* x5000 receive cksum ctl */ /* IO Port register access */ uint32_t io_addr; /* Shadow copy of MDIC */ uint32_t mdi_control; /* Shadow copy of EECD */ uint32_t eeprom_control; /* Latest NVM in/out */ uint16_t nvm_data; uint16_t nvm_opaddr; /* stats */ uint32_t missed_pkt_count; /* dropped for no room in rx queue */ uint32_t pkt_rx_by_size[6]; uint32_t pkt_tx_by_size[6]; uint32_t good_pkt_rx_count; uint32_t bcast_pkt_rx_count; uint32_t mcast_pkt_rx_count; uint32_t good_pkt_tx_count; uint32_t bcast_pkt_tx_count; uint32_t mcast_pkt_tx_count; uint32_t oversize_rx_count; uint32_t tso_tx_count; uint64_t good_octets_rx; uint64_t good_octets_tx; uint64_t missed_octets; /* counts missed and oversized */ uint8_t nvm_bits:6; /* number of bits remaining in/out */ uint8_t nvm_mode:2; #define E82545_NVM_MODE_OPADDR 0x0 #define E82545_NVM_MODE_DATAIN 0x1 #define E82545_NVM_MODE_DATAOUT 0x2 /* EEPROM data */ uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE]; }; static void e82545_reset(struct e82545_softc *sc, int dev); static void e82545_rx_enable(struct e82545_softc *sc); static void e82545_rx_disable(struct e82545_softc *sc); static void e82545_rx_callback(int fd, enum ev_type type, void *param); static void e82545_tx_start(struct e82545_softc *sc); static void e82545_tx_enable(struct e82545_softc *sc); static void e82545_tx_disable(struct e82545_softc *sc); static inline int e82545_size_stat_index(uint32_t size) { if (size <= 64) { return 0; } else if (size >= 1024) { return 5; } else { /* should be 1-4 */ return (ffs(size) - 6); } } static void e82545_init_eeprom(struct e82545_softc *sc) { uint16_t checksum, i; /* mac addr */ sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) | (((uint16_t)sc->esc_mac.octet[1]) << 8); sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) | (((uint16_t)sc->esc_mac.octet[3]) << 8); sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) | (((uint16_t)sc->esc_mac.octet[5]) << 8); /* pci ids */ sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID; sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL; sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER; sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL; /* fill in the checksum */ checksum = 0; for (i = 0; i < NVM_CHECKSUM_REG; i++) { checksum += sc->eeprom_data[i]; } checksum = NVM_SUM - checksum; sc->eeprom_data[NVM_CHECKSUM_REG] = checksum; - DPRINTF("eeprom checksum: 0x%x\r\n", checksum); + DPRINTF("eeprom checksum: 0x%x", checksum); } static void e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr, uint8_t phy_addr, uint32_t data) { - DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data); + DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x", reg_addr, phy_addr, data); } static uint32_t e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, uint8_t phy_addr) { - //DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); + //DPRINTF("Read mdi reg:0x%x phy:0x%x", reg_addr, phy_addr); switch (reg_addr) { case PHY_STATUS: return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | MII_SR_AUTONEG_COMPLETE); case PHY_AUTONEG_ADV: return NWAY_AR_SELECTOR_FIELD; case PHY_LP_ABILITY: return 0; case PHY_1000T_STATUS: return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS | SR_1000T_LOCAL_RX_STATUS); case PHY_ID1: return (M88E1011_I_PHY_ID >> 16) & 0xFFFF; case PHY_ID2: return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF; default: - DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); + DPRINTF("Unknown mdi read reg:0x%x phy:0x%x", reg_addr, phy_addr); return 0; } /* not reached */ } static void e82545_eecd_strobe(struct e82545_softc *sc) { /* Microwire state machine */ /* DPRINTF("eeprom state machine srtobe " - "0x%x 0x%x 0x%x 0x%x\r\n", + "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data);*/ if (sc->nvm_bits == 0) { DPRINTF("eeprom state machine not expecting data! " - "0x%x 0x%x 0x%x 0x%x\r\n", + "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data); return; } sc->nvm_bits--; if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) { /* shifting out */ if (sc->nvm_data & 0x8000) { sc->eeprom_control |= E1000_EECD_DO; } else { sc->eeprom_control &= ~E1000_EECD_DO; } sc->nvm_data <<= 1; if (sc->nvm_bits == 0) { /* read done, back to opcode mode. */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; } } else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) { /* shifting in */ sc->nvm_data <<= 1; if (sc->eeprom_control & E1000_EECD_DI) { sc->nvm_data |= 1; } if (sc->nvm_bits == 0) { /* eeprom write */ uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; if (op != E82545_NVM_OPCODE_WRITE) { - DPRINTF("Illegal eeprom write op 0x%x\r\n", + DPRINTF("Illegal eeprom write op 0x%x", sc->nvm_opaddr); } else if (addr >= E82545_NVM_EEPROM_SIZE) { - DPRINTF("Illegal eeprom write addr 0x%x\r\n", + DPRINTF("Illegal eeprom write addr 0x%x", sc->nvm_opaddr); } else { - DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n", + DPRINTF("eeprom write eeprom[0x%x] = 0x%x", addr, sc->nvm_data); sc->eeprom_data[addr] = sc->nvm_data; } /* back to opcode mode */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; } } else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) { sc->nvm_opaddr <<= 1; if (sc->eeprom_control & E1000_EECD_DI) { sc->nvm_opaddr |= 1; } if (sc->nvm_bits == 0) { uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; switch (op) { case E82545_NVM_OPCODE_EWEN: - DPRINTF("eeprom write enable: 0x%x\r\n", + DPRINTF("eeprom write enable: 0x%x", sc->nvm_opaddr); /* back to opcode mode */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; break; case E82545_NVM_OPCODE_READ: { uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; sc->nvm_mode = E82545_NVM_MODE_DATAOUT; sc->nvm_bits = E82545_NVM_DATA_BITS; if (addr < E82545_NVM_EEPROM_SIZE) { sc->nvm_data = sc->eeprom_data[addr]; - DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n", + DPRINTF("eeprom read: eeprom[0x%x] = 0x%x", addr, sc->nvm_data); } else { - DPRINTF("eeprom illegal read: 0x%x\r\n", + DPRINTF("eeprom illegal read: 0x%x", sc->nvm_opaddr); sc->nvm_data = 0; } break; } case E82545_NVM_OPCODE_WRITE: sc->nvm_mode = E82545_NVM_MODE_DATAIN; sc->nvm_bits = E82545_NVM_DATA_BITS; sc->nvm_data = 0; break; default: - DPRINTF("eeprom unknown op: 0x%x\r\r", + DPRINTF("eeprom unknown op: 0x%x", sc->nvm_opaddr); /* back to opcode mode */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; } } } else { DPRINTF("eeprom state machine wrong state! " - "0x%x 0x%x 0x%x 0x%x\r\n", + "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data); } } static void e82545_itr_callback(int fd, enum ev_type type, void *param) { uint32_t new; struct e82545_softc *sc = param; pthread_mutex_lock(&sc->esc_mtx); new = sc->esc_ICR & sc->esc_IMS; if (new && !sc->esc_irq_asserted) { - DPRINTF("itr callback: lintr assert %x\r\n", new); + DPRINTF("itr callback: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); } else { mevent_delete(sc->esc_mevpitr); sc->esc_mevpitr = NULL; } pthread_mutex_unlock(&sc->esc_mtx); } static void e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) { uint32_t new; - DPRINTF("icr assert: 0x%x\r\n", bits); + DPRINTF("icr assert: 0x%x", bits); /* * An interrupt is only generated if bits are set that * aren't already in the ICR, these bits are unmasked, * and there isn't an interrupt already pending. */ new = bits & ~sc->esc_ICR & sc->esc_IMS; sc->esc_ICR |= bits; if (new == 0) { - DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("icr assert: masked %x, ims %x", new, sc->esc_IMS); } else if (sc->esc_mevpitr != NULL) { - DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("icr assert: throttled %x, ims %x", new, sc->esc_IMS); } else if (!sc->esc_irq_asserted) { - DPRINTF("icr assert: lintr assert %x\r\n", new); + DPRINTF("icr assert: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { sc->esc_mevpitr = mevent_add( (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ EVF_TIMER, e82545_itr_callback, sc); } } } static void e82545_ims_change(struct e82545_softc *sc, uint32_t bits) { uint32_t new; /* * Changing the mask may allow previously asserted * but masked interrupt requests to generate an interrupt. */ new = bits & sc->esc_ICR & ~sc->esc_IMS; sc->esc_IMS |= bits; if (new == 0) { - DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("ims change: masked %x, ims %x", new, sc->esc_IMS); } else if (sc->esc_mevpitr != NULL) { - DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("ims change: throttled %x, ims %x", new, sc->esc_IMS); } else if (!sc->esc_irq_asserted) { - DPRINTF("ims change: lintr assert %x\n\r", new); + DPRINTF("ims change: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { sc->esc_mevpitr = mevent_add( (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ EVF_TIMER, e82545_itr_callback, sc); } } } static void e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) { - DPRINTF("icr deassert: 0x%x\r\n", bits); + DPRINTF("icr deassert: 0x%x", bits); sc->esc_ICR &= ~bits; /* * If there are no longer any interrupt sources and there * was an asserted interrupt, clear it */ if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) { - DPRINTF("icr deassert: lintr deassert %x\r\n", bits); + DPRINTF("icr deassert: lintr deassert %x", bits); pci_lintr_deassert(sc->esc_pi); sc->esc_irq_asserted = 0; } } static void e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value) { - DPRINTF("intr_write: off %x, val %x\n\r", offset, value); + DPRINTF("intr_write: off %x, val %x", offset, value); switch (offset) { case E1000_ICR: e82545_icr_deassert(sc, value); break; case E1000_ITR: sc->esc_ITR = value; break; case E1000_ICS: sc->esc_ICS = value; /* not used: store for debug */ e82545_icr_assert(sc, value); break; case E1000_IMS: e82545_ims_change(sc, value); break; case E1000_IMC: sc->esc_IMC = value; /* for debug */ sc->esc_IMS &= ~value; // XXX clear interrupts if all ICR bits now masked // and interrupt was pending ? break; default: break; } } static uint32_t e82545_intr_read(struct e82545_softc *sc, uint32_t offset) { uint32_t retval; retval = 0; - DPRINTF("intr_read: off %x\n\r", offset); + DPRINTF("intr_read: off %x", offset); switch (offset) { case E1000_ICR: retval = sc->esc_ICR; sc->esc_ICR = 0; e82545_icr_deassert(sc, ~0); break; case E1000_ITR: retval = sc->esc_ITR; break; case E1000_ICS: /* write-only register */ break; case E1000_IMS: retval = sc->esc_IMS; break; case E1000_IMC: /* write-only register */ break; default: break; } return (retval); } static void e82545_devctl(struct e82545_softc *sc, uint32_t val) { sc->esc_CTRL = val & ~E1000_CTRL_RST; if (val & E1000_CTRL_RST) { - DPRINTF("e1k: s/w reset, ctl %x\n", val); + DPRINTF("e1k: s/w reset, ctl %x", val); e82545_reset(sc, 1); } /* XXX check for phy reset ? */ } static void e82545_rx_update_rdba(struct e82545_softc *sc) { /* XXX verify desc base/len within phys mem range */ sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 | sc->esc_RDBAL; /* Cache host mapping of guest descriptor array */ sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx, sc->esc_rdba, sc->esc_RDLEN); } static void e82545_rx_ctl(struct e82545_softc *sc, uint32_t val) { int on; on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN); /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */ sc->esc_RCTL = val & ~0xF9204c01; - DPRINTF("rx_ctl - %s RCTL %x, val %x\n", + DPRINTF("rx_ctl - %s RCTL %x, val %x", on ? "on" : "off", sc->esc_RCTL, val); /* state change requested */ if (on != sc->esc_rx_enabled) { if (on) { /* Catch disallowed/unimplemented settings */ //assert(!(val & E1000_RCTL_LBM_TCVR)); if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) { sc->esc_rx_loopback = 1; } else { sc->esc_rx_loopback = 0; } e82545_rx_update_rdba(sc); e82545_rx_enable(sc); } else { e82545_rx_disable(sc); sc->esc_rx_loopback = 0; sc->esc_rdba = 0; sc->esc_rxdesc = NULL; } } } static void e82545_tx_update_tdba(struct e82545_softc *sc) { /* XXX verify desc base/len within phys mem range */ sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL; /* Cache host mapping of guest descriptor array */ sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba, sc->esc_TDLEN); } static void e82545_tx_ctl(struct e82545_softc *sc, uint32_t val) { int on; on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN); /* ignore TCTL_EN settings that don't change state */ if (on == sc->esc_tx_enabled) return; if (on) { e82545_tx_update_tdba(sc); e82545_tx_enable(sc); } else { e82545_tx_disable(sc); sc->esc_tdba = 0; sc->esc_txdesc = NULL; } /* Save TCTL value after stripping reserved bits 31:25,23,2,0 */ sc->esc_TCTL = val & ~0xFE800005; } int e82545_bufsz(uint32_t rctl) { switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) { case (E1000_RCTL_SZ_2048): return (2048); case (E1000_RCTL_SZ_1024): return (1024); case (E1000_RCTL_SZ_512): return (512); case (E1000_RCTL_SZ_256): return (256); case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384); case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192); case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096); } return (256); /* Forbidden value. */ } /* XXX one packet at a time until this is debugged */ static void e82545_rx_callback(int fd, enum ev_type type, void *param) { struct e82545_softc *sc = param; struct e1000_rx_desc *rxd; struct iovec vec[64]; int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size; uint32_t cause = 0; uint16_t *tp, tag, head; pthread_mutex_lock(&sc->esc_mtx); - DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); + DPRINTF("rx_run: head %x, tail %x", sc->esc_RDH, sc->esc_RDT); if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { - DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n", + DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped", sc->esc_rx_enabled, sc->esc_rx_loopback); while (netbe_rx_discard(sc->esc_be) > 0) { } goto done1; } bufsz = e82545_bufsz(sc->esc_RCTL); maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522; maxpktdesc = (maxpktsz + bufsz - 1) / bufsz; size = sc->esc_RDLEN / 16; head = sc->esc_RDH; left = (size + sc->esc_RDT - head) % size; if (left < maxpktdesc) { - DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n", + DPRINTF("rx overflow (%d < %d) -- packet(s) dropped", left, maxpktdesc); while (netbe_rx_discard(sc->esc_be) > 0) { } goto done1; } sc->esc_rx_active = 1; pthread_mutex_unlock(&sc->esc_mtx); for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) { /* Grab rx descriptor pointed to by the head pointer */ for (i = 0; i < maxpktdesc; i++) { rxd = &sc->esc_rxdesc[(head + i) % size]; vec[i].iov_base = paddr_guest2host(sc->esc_ctx, rxd->buffer_addr, bufsz); vec[i].iov_len = bufsz; } len = netbe_recv(sc->esc_be, vec, maxpktdesc); if (len <= 0) { - DPRINTF("netbe_recv() returned %d\n", len); + DPRINTF("netbe_recv() returned %d", len); goto done; } /* * Adjust the packet length based on whether the CRC needs * to be stripped or if the packet is less than the minimum * eth packet size. */ if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) len = ETHER_MIN_LEN - ETHER_CRC_LEN; if (!(sc->esc_RCTL & E1000_RCTL_SECRC)) len += ETHER_CRC_LEN; n = (len + bufsz - 1) / bufsz; - DPRINTF("packet read %d bytes, %d segs, head %d\r\n", + DPRINTF("packet read %d bytes, %d segs, head %d", len, n, head); /* Apply VLAN filter. */ tp = (uint16_t *)vec[0].iov_base + 6; if ((sc->esc_RCTL & E1000_RCTL_VFE) && (ntohs(tp[0]) == sc->esc_VET)) { tag = ntohs(tp[1]) & 0x0fff; if ((sc->esc_fvlan[tag >> 5] & (1 << (tag & 0x1f))) != 0) { - DPRINTF("known VLAN %d\r\n", tag); + DPRINTF("known VLAN %d", tag); } else { - DPRINTF("unknown VLAN %d\r\n", tag); + DPRINTF("unknown VLAN %d", tag); n = 0; continue; } } /* Update all consumed descriptors. */ for (i = 0; i < n - 1; i++) { rxd = &sc->esc_rxdesc[(head + i) % size]; rxd->length = bufsz; rxd->csum = 0; rxd->errors = 0; rxd->special = 0; rxd->status = E1000_RXD_STAT_DD; } rxd = &sc->esc_rxdesc[(head + i) % size]; rxd->length = len % bufsz; rxd->csum = 0; rxd->errors = 0; rxd->special = 0; /* XXX signal no checksum for now */ rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM | E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD; /* Schedule receive interrupts. */ if (len <= sc->esc_RSRPD) { cause |= E1000_ICR_SRPD | E1000_ICR_RXT0; } else { /* XXX: RDRT and RADV timers should be here. */ cause |= E1000_ICR_RXT0; } head = (head + n) % size; left -= n; } done: pthread_mutex_lock(&sc->esc_mtx); sc->esc_rx_active = 0; if (sc->esc_rx_enabled == 0) pthread_cond_signal(&sc->esc_rx_cond); sc->esc_RDH = head; /* Respect E1000_RCTL_RDMTS */ left = (size + sc->esc_RDT - head) % size; if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1))) cause |= E1000_ICR_RXDMT0; /* Assert all accumulated interrupts. */ if (cause != 0) e82545_icr_assert(sc, cause); done1: - DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); + DPRINTF("rx_run done: head %x, tail %x", sc->esc_RDH, sc->esc_RDT); pthread_mutex_unlock(&sc->esc_mtx); } static uint16_t e82545_carry(uint32_t sum) { sum = (sum & 0xFFFF) + (sum >> 16); if (sum > 0xFFFF) sum -= 0xFFFF; return (sum); } static uint16_t e82545_buf_checksum(uint8_t *buf, int len) { int i; uint32_t sum = 0; /* Checksum all the pairs of bytes first... */ for (i = 0; i < (len & ~1U); i += 2) sum += *((u_int16_t *)(buf + i)); /* * If there's a single byte left over, checksum it, too. * Network byte order is big-endian, so the remaining byte is * the high byte. */ if (i < len) sum += htons(buf[i] << 8); return (e82545_carry(sum)); } static uint16_t e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len) { int now, odd; uint32_t sum = 0, s; /* Skip completely unneeded vectors. */ while (iovcnt > 0 && iov->iov_len <= off && off > 0) { off -= iov->iov_len; iov++; iovcnt--; } /* Calculate checksum of requested range. */ odd = 0; while (len > 0 && iovcnt > 0) { now = MIN(len, iov->iov_len - off); s = e82545_buf_checksum(iov->iov_base + off, now); sum += odd ? (s << 8) : s; odd ^= (now & 1); len -= now; off = 0; iov++; iovcnt--; } return (e82545_carry(sum)); } /* * Return the transmit descriptor type. */ int e82545_txdesc_type(uint32_t lower) { int type; type = 0; if (lower & E1000_TXD_CMD_DEXT) type = lower & E1000_TXD_MASK; return (type); } static void e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck) { uint16_t cksum; int cklen; - DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d\r\n", + DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d", iovcnt, ck->ck_start, ck->ck_off, ck->ck_len); cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX; cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen); *(uint16_t *)((uint8_t *)iov[0].iov_base + ck->ck_off) = ~cksum; } static void e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt) { if (sc->esc_be == NULL) return; (void) netbe_send(sc->esc_be, iov, iovcnt); } static void e82545_transmit_done(struct e82545_softc *sc, uint16_t head, uint16_t tail, uint16_t dsize, int *tdwb) { union e1000_tx_udesc *dsc; for ( ; head != tail; head = (head + 1) % dsize) { dsc = &sc->esc_txdesc[head]; if (dsc->td.lower.data & E1000_TXD_CMD_RS) { dsc->td.upper.data |= E1000_TXD_STAT_DD; *tdwb = 1; } } } static int e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, uint16_t dsize, uint16_t *rhead, int *tdwb) { uint8_t *hdr, *hdrp; struct iovec iovb[I82545_MAX_TXSEGS + 2]; struct iovec tiov[I82545_MAX_TXSEGS + 2]; struct e1000_context_desc *cd; struct ck_info ckinfo[2]; struct iovec *iov; union e1000_tx_udesc *dsc; int desc, dtype, len, ntype, iovcnt, tlen, tcp, tso; int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff; unsigned hdrlen, vlen; uint32_t tcpsum, tcpseq; uint16_t ipcs, tcpcs, ipid, ohead; ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0; iovcnt = 0; tlen = 0; ntype = 0; tso = 0; ohead = head; /* iovb[0/1] may be used for writable copy of headers. */ iov = &iovb[2]; for (desc = 0; ; desc++, head = (head + 1) % dsize) { if (head == tail) { *rhead = head; return (0); } dsc = &sc->esc_txdesc[head]; dtype = e82545_txdesc_type(dsc->td.lower.data); if (desc == 0) { switch (dtype) { case E1000_TXD_TYP_C: DPRINTF("tx ctxt desc idx %d: %016jx " - "%08x%08x\r\n", + "%08x%08x", head, dsc->td.buffer_addr, dsc->td.upper.data, dsc->td.lower.data); /* Save context and return */ sc->esc_txctx = dsc->cd; goto done; case E1000_TXD_TYP_L: - DPRINTF("tx legacy desc idx %d: %08x%08x\r\n", + DPRINTF("tx legacy desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); /* * legacy cksum start valid in first descriptor */ ntype = dtype; ckinfo[0].ck_start = dsc->td.upper.fields.css; break; case E1000_TXD_TYP_D: - DPRINTF("tx data desc idx %d: %08x%08x\r\n", + DPRINTF("tx data desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); ntype = dtype; break; default: break; } } else { /* Descriptor type must be consistent */ assert(dtype == ntype); - DPRINTF("tx next desc idx %d: %08x%08x\r\n", + DPRINTF("tx next desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); } len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length : dsc->dd.lower.data & 0xFFFFF; if (len > 0) { /* Strip checksum supplied by guest. */ if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 && (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0) len -= 2; tlen += len; if (iovcnt < I82545_MAX_TXSEGS) { iov[iovcnt].iov_base = paddr_guest2host( sc->esc_ctx, dsc->td.buffer_addr, len); iov[iovcnt].iov_len = len; } iovcnt++; } /* * Pull out info that is valid in the final descriptor * and exit descriptor loop. */ if (dsc->td.lower.data & E1000_TXD_CMD_EOP) { if (dtype == E1000_TXD_TYP_L) { if (dsc->td.lower.data & E1000_TXD_CMD_IC) { ckinfo[0].ck_valid = 1; ckinfo[0].ck_off = dsc->td.lower.flags.cso; ckinfo[0].ck_len = 0; } } else { cd = &sc->esc_txctx; if (dsc->dd.lower.data & E1000_TXD_CMD_TSE) tso = 1; if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_IXSM) ckinfo[0].ck_valid = 1; if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_IXSM || tso) { ckinfo[0].ck_start = cd->lower_setup.ip_fields.ipcss; ckinfo[0].ck_off = cd->lower_setup.ip_fields.ipcso; ckinfo[0].ck_len = cd->lower_setup.ip_fields.ipcse; } if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_TXSM) ckinfo[1].ck_valid = 1; if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_TXSM || tso) { ckinfo[1].ck_start = cd->upper_setup.tcp_fields.tucss; ckinfo[1].ck_off = cd->upper_setup.tcp_fields.tucso; ckinfo[1].ck_len = cd->upper_setup.tcp_fields.tucse; } } break; } } if (iovcnt > I82545_MAX_TXSEGS) { - WPRINTF("tx too many descriptors (%d > %d) -- dropped\r\n", + WPRINTF("tx too many descriptors (%d > %d) -- dropped", iovcnt, I82545_MAX_TXSEGS); goto done; } hdrlen = vlen = 0; /* Estimate writable space for VLAN header insertion. */ if ((sc->esc_CTRL & E1000_CTRL_VME) && (dsc->td.lower.data & E1000_TXD_CMD_VLE)) { hdrlen = ETHER_ADDR_LEN*2; vlen = ETHER_VLAN_ENCAP_LEN; } if (!tso) { /* Estimate required writable space for checksums. */ if (ckinfo[0].ck_valid) hdrlen = MAX(hdrlen, ckinfo[0].ck_off + 2); if (ckinfo[1].ck_valid) hdrlen = MAX(hdrlen, ckinfo[1].ck_off + 2); /* Round up writable space to the first vector. */ if (hdrlen != 0 && iov[0].iov_len > hdrlen && iov[0].iov_len < hdrlen + 100) hdrlen = iov[0].iov_len; } else { /* In case of TSO header length provided by software. */ hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len; /* * Cap the header length at 240 based on 7.2.4.5 of * the Intel 82576EB (Rev 2.63) datasheet. */ if (hdrlen > 240) { - WPRINTF("TSO hdrlen too large: %d\r\n", hdrlen); + WPRINTF("TSO hdrlen too large: %d", hdrlen); goto done; } /* * If VLAN insertion is requested, ensure the header * at least holds the amount of data copied during * VLAN insertion below. * * XXX: Realistic packets will include a full Ethernet * header before the IP header at ckinfo[0].ck_start, * but this check is sufficient to prevent * out-of-bounds access below. */ if (vlen != 0 && hdrlen < ETHER_ADDR_LEN*2) { WPRINTF("TSO hdrlen too small for vlan insertion " - "(%d vs %d) -- dropped\r\n", hdrlen, + "(%d vs %d) -- dropped", hdrlen, ETHER_ADDR_LEN*2); goto done; } /* * Ensure that the header length covers the used fields * in the IP and TCP headers as well as the IP and TCP * checksums. The following fields are accessed below: * * Header | Field | Offset | Length * -------+-------+--------+------- * IPv4 | len | 2 | 2 * IPv4 | ID | 4 | 2 * IPv6 | len | 4 | 2 * TCP | seq # | 4 | 4 * TCP | flags | 13 | 1 * UDP | len | 4 | 4 */ if (hdrlen < ckinfo[0].ck_start + 6 || hdrlen < ckinfo[0].ck_off + 2) { WPRINTF("TSO hdrlen too small for IP fields (%d) " - "-- dropped\r\n", hdrlen); + "-- dropped", hdrlen); goto done; } if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) { if (hdrlen < ckinfo[1].ck_start + 14 || (ckinfo[1].ck_valid && hdrlen < ckinfo[1].ck_off + 2)) { WPRINTF("TSO hdrlen too small for TCP fields " - "(%d) -- dropped\r\n", hdrlen); + "(%d) -- dropped", hdrlen); goto done; } } else { if (hdrlen < ckinfo[1].ck_start + 8) { WPRINTF("TSO hdrlen too small for UDP fields " - "(%d) -- dropped\r\n", hdrlen); + "(%d) -- dropped", hdrlen); goto done; } } } /* Allocate, fill and prepend writable header vector. */ if (hdrlen != 0) { hdr = __builtin_alloca(hdrlen + vlen); hdr += vlen; for (left = hdrlen, hdrp = hdr; left > 0; left -= now, hdrp += now) { now = MIN(left, iov->iov_len); memcpy(hdrp, iov->iov_base, now); iov->iov_base += now; iov->iov_len -= now; if (iov->iov_len == 0) { iov++; iovcnt--; } } iov--; iovcnt++; iov->iov_base = hdr; iov->iov_len = hdrlen; } else hdr = NULL; /* Insert VLAN tag. */ if (vlen != 0) { hdr -= ETHER_VLAN_ENCAP_LEN; memmove(hdr, hdr + ETHER_VLAN_ENCAP_LEN, ETHER_ADDR_LEN*2); hdrlen += ETHER_VLAN_ENCAP_LEN; hdr[ETHER_ADDR_LEN*2 + 0] = sc->esc_VET >> 8; hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff; hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8; hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff; iov->iov_base = hdr; iov->iov_len += ETHER_VLAN_ENCAP_LEN; /* Correct checksum offsets after VLAN tag insertion. */ ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN; ckinfo[0].ck_off += ETHER_VLAN_ENCAP_LEN; if (ckinfo[0].ck_len != 0) ckinfo[0].ck_len += ETHER_VLAN_ENCAP_LEN; ckinfo[1].ck_start += ETHER_VLAN_ENCAP_LEN; ckinfo[1].ck_off += ETHER_VLAN_ENCAP_LEN; if (ckinfo[1].ck_len != 0) ckinfo[1].ck_len += ETHER_VLAN_ENCAP_LEN; } /* Simple non-TSO case. */ if (!tso) { /* Calculate checksums and transmit. */ if (ckinfo[0].ck_valid) e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]); if (ckinfo[1].ck_valid) e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]); e82545_transmit_backend(sc, iov, iovcnt); goto done; } /* Doing TSO. */ tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0; mss = sc->esc_txctx.tcp_seg_setup.fields.mss; paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff); - DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs\r\n", + DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs", tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt); ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]); tcpseq = 0; if (tcp) tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]); ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off]; tcpcs = 0; if (ckinfo[1].ck_valid) /* Save partial pseudo-header checksum. */ tcpcs = *(uint16_t *)&hdr[ckinfo[1].ck_off]; pv = 1; pvoff = 0; for (seg = 0, left = paylen; left > 0; seg++, left -= now) { now = MIN(left, mss); /* Construct IOVs for the segment. */ /* Include whole original header. */ tiov[0].iov_base = hdr; tiov[0].iov_len = hdrlen; tiovcnt = 1; /* Include respective part of payload IOV. */ for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) { nnow = MIN(nleft, iov[pv].iov_len - pvoff); tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff; tiov[tiovcnt++].iov_len = nnow; if (pvoff + nnow == iov[pv].iov_len) { pv++; pvoff = 0; } else pvoff += nnow; } - DPRINTF("tx segment %d %d+%d bytes %d iovs\r\n", + DPRINTF("tx segment %d %d+%d bytes %d iovs", seg, hdrlen, now, tiovcnt); /* Update IP header. */ if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_IP) { /* IPv4 -- set length and ID */ *(uint16_t *)&hdr[ckinfo[0].ck_start + 2] = htons(hdrlen - ckinfo[0].ck_start + now); *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = htons(ipid + seg); } else { /* IPv6 -- set length */ *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = htons(hdrlen - ckinfo[0].ck_start - 40 + now); } /* Update pseudo-header checksum. */ tcpsum = tcpcs; tcpsum += htons(hdrlen - ckinfo[1].ck_start + now); /* Update TCP/UDP headers. */ if (tcp) { /* Update sequence number and FIN/PUSH flags. */ *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = htonl(tcpseq + paylen - left); if (now < left) { hdr[ckinfo[1].ck_start + 13] &= ~(TH_FIN | TH_PUSH); } } else { /* Update payload length. */ *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = hdrlen - ckinfo[1].ck_start + now; } /* Calculate checksums and transmit. */ if (ckinfo[0].ck_valid) { *(uint16_t *)&hdr[ckinfo[0].ck_off] = ipcs; e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[0]); } if (ckinfo[1].ck_valid) { *(uint16_t *)&hdr[ckinfo[1].ck_off] = e82545_carry(tcpsum); e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]); } e82545_transmit_backend(sc, tiov, tiovcnt); } done: head = (head + 1) % dsize; e82545_transmit_done(sc, ohead, head, dsize, tdwb); *rhead = head; return (desc + 1); } static void e82545_tx_run(struct e82545_softc *sc) { uint32_t cause; uint16_t head, rhead, tail, size; int lim, tdwb, sent; head = sc->esc_TDH; tail = sc->esc_TDT; size = sc->esc_TDLEN / 16; - DPRINTF("tx_run: head %x, rhead %x, tail %x\r\n", + DPRINTF("tx_run: head %x, rhead %x, tail %x", sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); pthread_mutex_unlock(&sc->esc_mtx); rhead = head; tdwb = 0; for (lim = size / 4; sc->esc_tx_enabled && lim > 0; lim -= sent) { sent = e82545_transmit(sc, head, tail, size, &rhead, &tdwb); if (sent == 0) break; head = rhead; } pthread_mutex_lock(&sc->esc_mtx); sc->esc_TDH = head; sc->esc_TDHr = rhead; cause = 0; if (tdwb) cause |= E1000_ICR_TXDW; if (lim != size / 4 && sc->esc_TDH == sc->esc_TDT) cause |= E1000_ICR_TXQE; if (cause) e82545_icr_assert(sc, cause); - DPRINTF("tx_run done: head %x, rhead %x, tail %x\r\n", + DPRINTF("tx_run done: head %x, rhead %x, tail %x", sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); } static _Noreturn void * e82545_tx_thread(void *param) { struct e82545_softc *sc = param; pthread_mutex_lock(&sc->esc_mtx); for (;;) { while (!sc->esc_tx_enabled || sc->esc_TDHr == sc->esc_TDT) { if (sc->esc_tx_enabled && sc->esc_TDHr != sc->esc_TDT) break; sc->esc_tx_active = 0; if (sc->esc_tx_enabled == 0) pthread_cond_signal(&sc->esc_tx_cond); pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); } sc->esc_tx_active = 1; /* Process some tx descriptors. Lock dropped inside. */ e82545_tx_run(sc); } } static void e82545_tx_start(struct e82545_softc *sc) { if (sc->esc_tx_active == 0) pthread_cond_signal(&sc->esc_tx_cond); } static void e82545_tx_enable(struct e82545_softc *sc) { sc->esc_tx_enabled = 1; } static void e82545_tx_disable(struct e82545_softc *sc) { sc->esc_tx_enabled = 0; while (sc->esc_tx_active) pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); } static void e82545_rx_enable(struct e82545_softc *sc) { sc->esc_rx_enabled = 1; } static void e82545_rx_disable(struct e82545_softc *sc) { sc->esc_rx_enabled = 0; while (sc->esc_rx_active) pthread_cond_wait(&sc->esc_rx_cond, &sc->esc_mtx); } static void e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval) { struct eth_uni *eu; int idx; idx = reg >> 1; assert(idx < 15); eu = &sc->esc_uni[idx]; if (reg & 0x1) { /* RAH */ eu->eu_valid = ((wval & E1000_RAH_AV) == E1000_RAH_AV); eu->eu_addrsel = (wval >> 16) & 0x3; eu->eu_eth.octet[5] = wval >> 8; eu->eu_eth.octet[4] = wval; } else { /* RAL */ eu->eu_eth.octet[3] = wval >> 24; eu->eu_eth.octet[2] = wval >> 16; eu->eu_eth.octet[1] = wval >> 8; eu->eu_eth.octet[0] = wval; } } static uint32_t e82545_read_ra(struct e82545_softc *sc, int reg) { struct eth_uni *eu; uint32_t retval; int idx; idx = reg >> 1; assert(idx < 15); eu = &sc->esc_uni[idx]; if (reg & 0x1) { /* RAH */ retval = (eu->eu_valid << 31) | (eu->eu_addrsel << 16) | (eu->eu_eth.octet[5] << 8) | eu->eu_eth.octet[4]; } else { /* RAL */ retval = (eu->eu_eth.octet[3] << 24) | (eu->eu_eth.octet[2] << 16) | (eu->eu_eth.octet[1] << 8) | eu->eu_eth.octet[0]; } return (retval); } static void e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) { int ridx; if (offset & 0x3) { - DPRINTF("Unaligned register write offset:0x%x value:0x%x\r\n", offset, value); + DPRINTF("Unaligned register write offset:0x%x value:0x%x", offset, value); return; } - DPRINTF("Register write: 0x%x value: 0x%x\r\n", offset, value); + DPRINTF("Register write: 0x%x value: 0x%x", offset, value); switch (offset) { case E1000_CTRL: case E1000_CTRL_DUP: e82545_devctl(sc, value); break; case E1000_FCAL: sc->esc_FCAL = value; break; case E1000_FCAH: sc->esc_FCAH = value & ~0xFFFF0000; break; case E1000_FCT: sc->esc_FCT = value & ~0xFFFF0000; break; case E1000_VET: sc->esc_VET = value & ~0xFFFF0000; break; case E1000_FCTTV: sc->esc_FCTTV = value & ~0xFFFF0000; break; case E1000_LEDCTL: sc->esc_LEDCTL = value & ~0x30303000; break; case E1000_PBA: sc->esc_PBA = value & 0x0000FF80; break; case E1000_ICR: case E1000_ITR: case E1000_ICS: case E1000_IMS: case E1000_IMC: e82545_intr_write(sc, offset, value); break; case E1000_RCTL: e82545_rx_ctl(sc, value); break; case E1000_FCRTL: sc->esc_FCRTL = value & ~0xFFFF0007; break; case E1000_FCRTH: sc->esc_FCRTH = value & ~0xFFFF0007; break; case E1000_RDBAL(0): sc->esc_RDBAL = value & ~0xF; if (sc->esc_rx_enabled) { /* Apparently legal: update cached address */ e82545_rx_update_rdba(sc); } break; case E1000_RDBAH(0): assert(!sc->esc_rx_enabled); sc->esc_RDBAH = value; break; case E1000_RDLEN(0): assert(!sc->esc_rx_enabled); sc->esc_RDLEN = value & ~0xFFF0007F; break; case E1000_RDH(0): /* XXX should only ever be zero ? Range check ? */ sc->esc_RDH = value; break; case E1000_RDT(0): /* XXX if this opens up the rx ring, do something ? */ sc->esc_RDT = value; break; case E1000_RDTR: /* ignore FPD bit 31 */ sc->esc_RDTR = value & ~0xFFFF0000; break; case E1000_RXDCTL(0): sc->esc_RXDCTL = value & ~0xFEC0C0C0; break; case E1000_RADV: sc->esc_RADV = value & ~0xFFFF0000; break; case E1000_RSRPD: sc->esc_RSRPD = value & ~0xFFFFF000; break; case E1000_RXCSUM: sc->esc_RXCSUM = value & ~0xFFFFF800; break; case E1000_TXCW: sc->esc_TXCW = value & ~0x3FFF0000; break; case E1000_TCTL: e82545_tx_ctl(sc, value); break; case E1000_TIPG: sc->esc_TIPG = value; break; case E1000_AIT: sc->esc_AIT = value; break; case E1000_TDBAL(0): sc->esc_TDBAL = value & ~0xF; if (sc->esc_tx_enabled) { /* Apparently legal */ e82545_tx_update_tdba(sc); } break; case E1000_TDBAH(0): //assert(!sc->esc_tx_enabled); sc->esc_TDBAH = value; break; case E1000_TDLEN(0): //assert(!sc->esc_tx_enabled); sc->esc_TDLEN = value & ~0xFFF0007F; break; case E1000_TDH(0): //assert(!sc->esc_tx_enabled); /* XXX should only ever be zero ? Range check ? */ sc->esc_TDHr = sc->esc_TDH = value; break; case E1000_TDT(0): /* XXX range check ? */ sc->esc_TDT = value; if (sc->esc_tx_enabled) e82545_tx_start(sc); break; case E1000_TIDV: sc->esc_TIDV = value & ~0xFFFF0000; break; case E1000_TXDCTL(0): //assert(!sc->esc_tx_enabled); sc->esc_TXDCTL = value & ~0xC0C0C0; break; case E1000_TADV: sc->esc_TADV = value & ~0xFFFF0000; break; case E1000_RAL(0) ... E1000_RAH(15): /* convert to u32 offset */ ridx = (offset - E1000_RAL(0)) >> 2; e82545_write_ra(sc, ridx, value); break; case E1000_MTA ... (E1000_MTA + (127*4)): sc->esc_fmcast[(offset - E1000_MTA) >> 2] = value; break; case E1000_VFTA ... (E1000_VFTA + (127*4)): sc->esc_fvlan[(offset - E1000_VFTA) >> 2] = value; break; case E1000_EECD: { - //DPRINTF("EECD write 0x%x -> 0x%x\r\n", sc->eeprom_control, value); + //DPRINTF("EECD write 0x%x -> 0x%x", sc->eeprom_control, value); /* edge triggered low->high */ uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ? 0 : (value & E1000_EECD_SK)); uint32_t eecd_mask = (E1000_EECD_SK|E1000_EECD_CS| E1000_EECD_DI|E1000_EECD_REQ); sc->eeprom_control &= ~eecd_mask; sc->eeprom_control |= (value & eecd_mask); /* grant/revoke immediately */ if (value & E1000_EECD_REQ) { sc->eeprom_control |= E1000_EECD_GNT; } else { sc->eeprom_control &= ~E1000_EECD_GNT; } if (eecd_strobe && (sc->eeprom_control & E1000_EECD_CS)) { e82545_eecd_strobe(sc); } return; } case E1000_MDIC: { uint8_t reg_addr = (uint8_t)((value & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); uint8_t phy_addr = (uint8_t)((value & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT); sc->mdi_control = (value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST)); if ((value & E1000_MDIC_READY) != 0) { - DPRINTF("Incorrect MDIC ready bit: 0x%x\r\n", value); + DPRINTF("Incorrect MDIC ready bit: 0x%x", value); return; } switch (value & E82545_MDIC_OP_MASK) { case E1000_MDIC_OP_READ: sc->mdi_control &= ~E82545_MDIC_DATA_MASK; sc->mdi_control |= e82545_read_mdi(sc, reg_addr, phy_addr); break; case E1000_MDIC_OP_WRITE: e82545_write_mdi(sc, reg_addr, phy_addr, value & E82545_MDIC_DATA_MASK); break; default: - DPRINTF("Unknown MDIC op: 0x%x\r\n", value); + DPRINTF("Unknown MDIC op: 0x%x", value); return; } /* TODO: barrier? */ sc->mdi_control |= E1000_MDIC_READY; if (value & E82545_MDIC_IE) { // TODO: generate interrupt } return; } case E1000_MANC: case E1000_STATUS: return; default: - DPRINTF("Unknown write register: 0x%x value:%x\r\n", offset, value); + DPRINTF("Unknown write register: 0x%x value:%x", offset, value); return; } } static uint32_t e82545_read_register(struct e82545_softc *sc, uint32_t offset) { uint32_t retval; int ridx; if (offset & 0x3) { - DPRINTF("Unaligned register read offset:0x%x\r\n", offset); + DPRINTF("Unaligned register read offset:0x%x", offset); return 0; } - DPRINTF("Register read: 0x%x\r\n", offset); + DPRINTF("Register read: 0x%x", offset); switch (offset) { case E1000_CTRL: retval = sc->esc_CTRL; break; case E1000_STATUS: retval = E1000_STATUS_FD | E1000_STATUS_LU | E1000_STATUS_SPEED_1000; break; case E1000_FCAL: retval = sc->esc_FCAL; break; case E1000_FCAH: retval = sc->esc_FCAH; break; case E1000_FCT: retval = sc->esc_FCT; break; case E1000_VET: retval = sc->esc_VET; break; case E1000_FCTTV: retval = sc->esc_FCTTV; break; case E1000_LEDCTL: retval = sc->esc_LEDCTL; break; case E1000_PBA: retval = sc->esc_PBA; break; case E1000_ICR: case E1000_ITR: case E1000_ICS: case E1000_IMS: case E1000_IMC: retval = e82545_intr_read(sc, offset); break; case E1000_RCTL: retval = sc->esc_RCTL; break; case E1000_FCRTL: retval = sc->esc_FCRTL; break; case E1000_FCRTH: retval = sc->esc_FCRTH; break; case E1000_RDBAL(0): retval = sc->esc_RDBAL; break; case E1000_RDBAH(0): retval = sc->esc_RDBAH; break; case E1000_RDLEN(0): retval = sc->esc_RDLEN; break; case E1000_RDH(0): retval = sc->esc_RDH; break; case E1000_RDT(0): retval = sc->esc_RDT; break; case E1000_RDTR: retval = sc->esc_RDTR; break; case E1000_RXDCTL(0): retval = sc->esc_RXDCTL; break; case E1000_RADV: retval = sc->esc_RADV; break; case E1000_RSRPD: retval = sc->esc_RSRPD; break; case E1000_RXCSUM: retval = sc->esc_RXCSUM; break; case E1000_TXCW: retval = sc->esc_TXCW; break; case E1000_TCTL: retval = sc->esc_TCTL; break; case E1000_TIPG: retval = sc->esc_TIPG; break; case E1000_AIT: retval = sc->esc_AIT; break; case E1000_TDBAL(0): retval = sc->esc_TDBAL; break; case E1000_TDBAH(0): retval = sc->esc_TDBAH; break; case E1000_TDLEN(0): retval = sc->esc_TDLEN; break; case E1000_TDH(0): retval = sc->esc_TDH; break; case E1000_TDT(0): retval = sc->esc_TDT; break; case E1000_TIDV: retval = sc->esc_TIDV; break; case E1000_TXDCTL(0): retval = sc->esc_TXDCTL; break; case E1000_TADV: retval = sc->esc_TADV; break; case E1000_RAL(0) ... E1000_RAH(15): /* convert to u32 offset */ ridx = (offset - E1000_RAL(0)) >> 2; retval = e82545_read_ra(sc, ridx); break; case E1000_MTA ... (E1000_MTA + (127*4)): retval = sc->esc_fmcast[(offset - E1000_MTA) >> 2]; break; case E1000_VFTA ... (E1000_VFTA + (127*4)): retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2]; break; case E1000_EECD: - //DPRINTF("EECD read %x\r\n", sc->eeprom_control); + //DPRINTF("EECD read %x", sc->eeprom_control); retval = sc->eeprom_control; break; case E1000_MDIC: retval = sc->mdi_control; break; case E1000_MANC: retval = 0; break; /* stats that we emulate. */ case E1000_MPC: retval = sc->missed_pkt_count; break; case E1000_PRC64: retval = sc->pkt_rx_by_size[0]; break; case E1000_PRC127: retval = sc->pkt_rx_by_size[1]; break; case E1000_PRC255: retval = sc->pkt_rx_by_size[2]; break; case E1000_PRC511: retval = sc->pkt_rx_by_size[3]; break; case E1000_PRC1023: retval = sc->pkt_rx_by_size[4]; break; case E1000_PRC1522: retval = sc->pkt_rx_by_size[5]; break; case E1000_GPRC: retval = sc->good_pkt_rx_count; break; case E1000_BPRC: retval = sc->bcast_pkt_rx_count; break; case E1000_MPRC: retval = sc->mcast_pkt_rx_count; break; case E1000_GPTC: case E1000_TPT: retval = sc->good_pkt_tx_count; break; case E1000_GORCL: retval = (uint32_t)sc->good_octets_rx; break; case E1000_GORCH: retval = (uint32_t)(sc->good_octets_rx >> 32); break; case E1000_TOTL: case E1000_GOTCL: retval = (uint32_t)sc->good_octets_tx; break; case E1000_TOTH: case E1000_GOTCH: retval = (uint32_t)(sc->good_octets_tx >> 32); break; case E1000_ROC: retval = sc->oversize_rx_count; break; case E1000_TORL: retval = (uint32_t)(sc->good_octets_rx + sc->missed_octets); break; case E1000_TORH: retval = (uint32_t)((sc->good_octets_rx + sc->missed_octets) >> 32); break; case E1000_TPR: retval = sc->good_pkt_rx_count + sc->missed_pkt_count + sc->oversize_rx_count; break; case E1000_PTC64: retval = sc->pkt_tx_by_size[0]; break; case E1000_PTC127: retval = sc->pkt_tx_by_size[1]; break; case E1000_PTC255: retval = sc->pkt_tx_by_size[2]; break; case E1000_PTC511: retval = sc->pkt_tx_by_size[3]; break; case E1000_PTC1023: retval = sc->pkt_tx_by_size[4]; break; case E1000_PTC1522: retval = sc->pkt_tx_by_size[5]; break; case E1000_MPTC: retval = sc->mcast_pkt_tx_count; break; case E1000_BPTC: retval = sc->bcast_pkt_tx_count; break; case E1000_TSCTC: retval = sc->tso_tx_count; break; /* stats that are always 0. */ case E1000_CRCERRS: case E1000_ALGNERRC: case E1000_SYMERRS: case E1000_RXERRC: case E1000_SCC: case E1000_ECOL: case E1000_MCC: case E1000_LATECOL: case E1000_COLC: case E1000_DC: case E1000_TNCRS: case E1000_SEC: case E1000_CEXTERR: case E1000_RLEC: case E1000_XONRXC: case E1000_XONTXC: case E1000_XOFFRXC: case E1000_XOFFTXC: case E1000_FCRUC: case E1000_RNBC: case E1000_RUC: case E1000_RFC: case E1000_RJC: case E1000_MGTPRC: case E1000_MGTPDC: case E1000_MGTPTC: case E1000_TSCTFC: retval = 0; break; default: - DPRINTF("Unknown read register: 0x%x\r\n", offset); + DPRINTF("Unknown read register: 0x%x", offset); retval = 0; break; } return (retval); } static void e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct e82545_softc *sc; - //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d\r\n", baridx, offset, value, size); + //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d", baridx, offset, value, size); sc = pi->pi_arg; pthread_mutex_lock(&sc->esc_mtx); switch (baridx) { case E82545_BAR_IO: switch (offset) { case E82545_IOADDR: if (size != 4) { - DPRINTF("Wrong io addr write sz:%d value:0x%lx\r\n", size, value); + DPRINTF("Wrong io addr write sz:%d value:0x%lx", size, value); } else sc->io_addr = (uint32_t)value; break; case E82545_IODATA: if (size != 4) { - DPRINTF("Wrong io data write size:%d value:0x%lx\r\n", size, value); + DPRINTF("Wrong io data write size:%d value:0x%lx", size, value); } else if (sc->io_addr > E82545_IO_REGISTER_MAX) { - DPRINTF("Non-register io write addr:0x%x value:0x%lx\r\n", sc->io_addr, value); + DPRINTF("Non-register io write addr:0x%x value:0x%lx", sc->io_addr, value); } else e82545_write_register(sc, sc->io_addr, (uint32_t)value); break; default: - DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d\r\n", offset, value, size); + DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d", offset, value, size); break; } break; case E82545_BAR_REGISTER: if (size != 4) { - DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx\r\n", size, offset, value); + DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx", size, offset, value); } else e82545_write_register(sc, (uint32_t)offset, (uint32_t)value); break; default: - DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d\r\n", + DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d", baridx, offset, value, size); } pthread_mutex_unlock(&sc->esc_mtx); } static uint64_t e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct e82545_softc *sc; uint64_t retval; - //DPRINTF("Read bar:%d offset:0x%lx size:%d\r\n", baridx, offset, size); + //DPRINTF("Read bar:%d offset:0x%lx size:%d", baridx, offset, size); sc = pi->pi_arg; retval = 0; pthread_mutex_lock(&sc->esc_mtx); switch (baridx) { case E82545_BAR_IO: switch (offset) { case E82545_IOADDR: if (size != 4) { - DPRINTF("Wrong io addr read sz:%d\r\n", size); + DPRINTF("Wrong io addr read sz:%d", size); } else retval = sc->io_addr; break; case E82545_IODATA: if (size != 4) { - DPRINTF("Wrong io data read sz:%d\r\n", size); + DPRINTF("Wrong io data read sz:%d", size); } if (sc->io_addr > E82545_IO_REGISTER_MAX) { - DPRINTF("Non-register io read addr:0x%x\r\n", + DPRINTF("Non-register io read addr:0x%x", sc->io_addr); } else retval = e82545_read_register(sc, sc->io_addr); break; default: - DPRINTF("Unknown io bar read offset:0x%lx size:%d\r\n", + DPRINTF("Unknown io bar read offset:0x%lx size:%d", offset, size); break; } break; case E82545_BAR_REGISTER: if (size != 4) { - DPRINTF("Wrong register read size:%d offset:0x%lx\r\n", + DPRINTF("Wrong register read size:%d offset:0x%lx", size, offset); } else retval = e82545_read_register(sc, (uint32_t)offset); break; default: - DPRINTF("Unknown read bar:%d offset:0x%lx size:%d\r\n", + DPRINTF("Unknown read bar:%d offset:0x%lx size:%d", baridx, offset, size); break; } pthread_mutex_unlock(&sc->esc_mtx); return (retval); } static void e82545_reset(struct e82545_softc *sc, int drvr) { int i; e82545_rx_disable(sc); e82545_tx_disable(sc); /* clear outstanding interrupts */ if (sc->esc_irq_asserted) pci_lintr_deassert(sc->esc_pi); /* misc */ if (!drvr) { sc->esc_FCAL = 0; sc->esc_FCAH = 0; sc->esc_FCT = 0; sc->esc_VET = 0; sc->esc_FCTTV = 0; } sc->esc_LEDCTL = 0x07061302; sc->esc_PBA = 0x00100030; /* start nvm in opcode mode. */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; sc->eeprom_control = E1000_EECD_PRES | E82545_EECD_FWE_EN; e82545_init_eeprom(sc); /* interrupt */ sc->esc_ICR = 0; sc->esc_ITR = 250; sc->esc_ICS = 0; sc->esc_IMS = 0; sc->esc_IMC = 0; /* L2 filters */ if (!drvr) { memset(sc->esc_fvlan, 0, sizeof(sc->esc_fvlan)); memset(sc->esc_fmcast, 0, sizeof(sc->esc_fmcast)); memset(sc->esc_uni, 0, sizeof(sc->esc_uni)); /* XXX not necessary on 82545 ?? */ sc->esc_uni[0].eu_valid = 1; memcpy(sc->esc_uni[0].eu_eth.octet, sc->esc_mac.octet, ETHER_ADDR_LEN); } else { /* Clear RAH valid bits */ for (i = 0; i < 16; i++) sc->esc_uni[i].eu_valid = 0; } /* receive */ if (!drvr) { sc->esc_RDBAL = 0; sc->esc_RDBAH = 0; } sc->esc_RCTL = 0; sc->esc_FCRTL = 0; sc->esc_FCRTH = 0; sc->esc_RDLEN = 0; sc->esc_RDH = 0; sc->esc_RDT = 0; sc->esc_RDTR = 0; sc->esc_RXDCTL = (1 << 24) | (1 << 16); /* default GRAN/WTHRESH */ sc->esc_RADV = 0; sc->esc_RXCSUM = 0; /* transmit */ if (!drvr) { sc->esc_TDBAL = 0; sc->esc_TDBAH = 0; sc->esc_TIPG = 0; sc->esc_AIT = 0; sc->esc_TIDV = 0; sc->esc_TADV = 0; } sc->esc_tdba = 0; sc->esc_txdesc = NULL; sc->esc_TXCW = 0; sc->esc_TCTL = 0; sc->esc_TDLEN = 0; sc->esc_TDT = 0; sc->esc_TDHr = sc->esc_TDH = 0; sc->esc_TXDCTL = 0; } static int e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { char nstr[80]; struct e82545_softc *sc; char *devname; char *vtopts; int mac_provided; - DPRINTF("Loading with options: %s\r\n", opts); + DPRINTF("Loading with options: %s", opts); /* Setup our softc */ sc = calloc(1, sizeof(*sc)); pi->pi_arg = sc; sc->esc_pi = pi; sc->esc_ctx = ctx; pthread_mutex_init(&sc->esc_mtx, NULL); pthread_cond_init(&sc->esc_rx_cond, NULL); pthread_cond_init(&sc->esc_tx_cond, NULL); pthread_create(&sc->esc_tx_tid, NULL, e82545_tx_thread, sc); snprintf(nstr, sizeof(nstr), "e82545-%d:%d tx", pi->pi_slot, pi->pi_func); pthread_set_name_np(sc->esc_tx_tid, nstr); pci_set_cfgdata16(pi, PCIR_DEVICE, E82545_DEV_ID_82545EM_COPPER); pci_set_cfgdata16(pi, PCIR_VENDOR, E82545_VENDOR_ID_INTEL); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, E82545_SUBDEV_ID); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, E82545_VENDOR_ID_INTEL); pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL); pci_set_cfgdata8(pi, PCIR_INTPIN, 0x1); /* TODO: this card also supports msi, but the freebsd driver for it * does not, so I have not implemented it. */ pci_lintr_request(pi); pci_emul_alloc_bar(pi, E82545_BAR_REGISTER, PCIBAR_MEM32, E82545_BAR_REGISTER_LEN); pci_emul_alloc_bar(pi, E82545_BAR_FLASH, PCIBAR_MEM32, E82545_BAR_FLASH_LEN); pci_emul_alloc_bar(pi, E82545_BAR_IO, PCIBAR_IO, E82545_BAR_IO_LEN); /* * Attempt to open the net backend and read the MAC address * if specified. Copied from virtio-net, slightly modified. */ mac_provided = 0; sc->esc_be = NULL; if (opts != NULL) { int err; devname = vtopts = strdup(opts); (void) strsep(&vtopts, ","); if (vtopts != NULL) { err = net_parsemac(vtopts, sc->esc_mac.octet); if (err != 0) { free(devname); return (err); } mac_provided = 1; } err = netbe_init(&sc->esc_be, devname, e82545_rx_callback, sc); free(devname); if (err) return (err); } if (!mac_provided) { net_genmac(pi, sc->esc_mac.octet); } netbe_rx_enable(sc->esc_be); /* H/w initiated reset */ e82545_reset(sc, 0); return (0); } struct pci_devemu pci_de_e82545 = { .pe_emu = "e1000", .pe_init = e82545_init, .pe_barwrite = e82545_write, .pe_barread = e82545_read }; PCI_EMUL_SET(pci_de_e82545); Index: stable/12/usr.sbin/bhyve/pci_emul.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_emul.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_emul.c (revision 358184) @@ -1,2141 +1,2142 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi.h" #include "bhyverun.h" +#include "debug.h" #include "inout.h" #include "ioapic.h" #include "mem.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #define CONF1_ADDR_PORT 0x0cf8 #define CONF1_DATA_PORT 0x0cfc #define CONF1_ENABLE 0x80000000ul #define MAXBUSES (PCI_BUSMAX + 1) #define MAXSLOTS (PCI_SLOTMAX + 1) #define MAXFUNCS (PCI_FUNCMAX + 1) struct funcinfo { char *fi_name; char *fi_param; struct pci_devinst *fi_devi; }; struct intxinfo { int ii_count; int ii_pirq_pin; int ii_ioapic_irq; }; struct slotinfo { struct intxinfo si_intpins[4]; struct funcinfo si_funcs[MAXFUNCS]; }; struct businfo { uint16_t iobase, iolimit; /* I/O window */ uint32_t membase32, memlimit32; /* mmio window below 4GB */ uint64_t membase64, memlimit64; /* mmio window above 4GB */ struct slotinfo slotinfo[MAXSLOTS]; }; static struct businfo *pci_businfo[MAXBUSES]; SET_DECLARE(pci_devemu_set, struct pci_devemu); static uint64_t pci_emul_iobase; static uint64_t pci_emul_membase32; static uint64_t pci_emul_membase64; #define PCI_EMUL_IOBASE 0x2000 #define PCI_EMUL_IOLIMIT 0x10000 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE #define PCI_EMUL_MEMBASE64 0xD000000000UL #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL static struct pci_devemu *pci_emul_finddev(char *name); static void pci_lintr_route(struct pci_devinst *pi); static void pci_lintr_update(struct pci_devinst *pi); static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, int coff, int bytes, uint32_t *val); static __inline void CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) { if (bytes == 1) pci_set_cfgdata8(pi, coff, val); else if (bytes == 2) pci_set_cfgdata16(pi, coff, val); else pci_set_cfgdata32(pi, coff, val); } static __inline uint32_t CFGREAD(struct pci_devinst *pi, int coff, int bytes) { if (bytes == 1) return (pci_get_cfgdata8(pi, coff)); else if (bytes == 2) return (pci_get_cfgdata16(pi, coff)); else return (pci_get_cfgdata32(pi, coff)); } /* * I/O access */ /* * Slot options are in the form: * * ::,[,] * [:],[,] * * slot is 0..31 * func is 0..7 * emul is a string describing the type of PCI device e.g. virtio-net * config is an optional string, depending on the device, that can be * used for configuration. * Examples are: * 1,virtio-net,tap0 * 3:0,dummy */ static void pci_parse_slot_usage(char *aopt) { - fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); + EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); } int pci_parse_slot(char *opt) { struct businfo *bi; struct slotinfo *si; char *emul, *config, *str, *cp; int error, bnum, snum, fnum; error = -1; str = strdup(opt); emul = config = NULL; if ((cp = strchr(str, ',')) != NULL) { *cp = '\0'; emul = cp + 1; if ((cp = strchr(emul, ',')) != NULL) { *cp = '\0'; config = cp + 1; } } else { pci_parse_slot_usage(opt); goto done; } /* :: */ if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { bnum = 0; /* : */ if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { fnum = 0; /* */ if (sscanf(str, "%d", &snum) != 1) { snum = -1; } } } if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) { pci_parse_slot_usage(opt); goto done; } if (pci_businfo[bnum] == NULL) pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); bi = pci_businfo[bnum]; si = &bi->slotinfo[snum]; if (si->si_funcs[fnum].fi_name != NULL) { - fprintf(stderr, "pci slot %d:%d already occupied!\n", + EPRINTLN("pci slot %d:%d already occupied!", snum, fnum); goto done; } if (pci_emul_finddev(emul) == NULL) { - fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", + EPRINTLN("pci slot %d:%d: unknown device \"%s\"", snum, fnum, emul); goto done; } error = 0; si->si_funcs[fnum].fi_name = emul; si->si_funcs[fnum].fi_param = config; done: if (error) free(str); return (error); } void pci_print_supported_devices() { struct pci_devemu **pdpp, *pdp; SET_FOREACH(pdpp, pci_devemu_set) { pdp = *pdpp; printf("%s\n", pdp->pe_emu); } } static int pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) { if (offset < pi->pi_msix.pba_offset) return (0); if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { return (0); } return (1); } int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, uint64_t value) { int msix_entry_offset; int tab_index; char *dest; /* support only 4 or 8 byte writes */ if (size != 4 && size != 8) return (-1); /* * Return if table index is beyond what device supports */ tab_index = offset / MSIX_TABLE_ENTRY_SIZE; if (tab_index >= pi->pi_msix.table_count) return (-1); msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; /* support only aligned writes */ if ((msix_entry_offset % size) != 0) return (-1); dest = (char *)(pi->pi_msix.table + tab_index); dest += msix_entry_offset; if (size == 4) *((uint32_t *)dest) = value; else *((uint64_t *)dest) = value; return (0); } uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) { char *dest; int msix_entry_offset; int tab_index; uint64_t retval = ~0; /* * The PCI standard only allows 4 and 8 byte accesses to the MSI-X * table but we also allow 1 byte access to accommodate reads from * ddb. */ if (size != 1 && size != 4 && size != 8) return (retval); msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; /* support only aligned reads */ if ((msix_entry_offset % size) != 0) { return (retval); } tab_index = offset / MSIX_TABLE_ENTRY_SIZE; if (tab_index < pi->pi_msix.table_count) { /* valid MSI-X Table access */ dest = (char *)(pi->pi_msix.table + tab_index); dest += msix_entry_offset; if (size == 1) retval = *((uint8_t *)dest); else if (size == 4) retval = *((uint32_t *)dest); else retval = *((uint64_t *)dest); } else if (pci_valid_pba_offset(pi, offset)) { /* return 0 for PBA access */ retval = 0; } return (retval); } int pci_msix_table_bar(struct pci_devinst *pi) { if (pi->pi_msix.table != NULL) return (pi->pi_msix.table_bar); else return (-1); } int pci_msix_pba_bar(struct pci_devinst *pi) { if (pi->pi_msix.table != NULL) return (pi->pi_msix.pba_bar); else return (-1); } static int pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { struct pci_devinst *pdi = arg; struct pci_devemu *pe = pdi->pi_d; uint64_t offset; int i; for (i = 0; i <= PCI_BARMAX; i++) { if (pdi->pi_bar[i].type == PCIBAR_IO && port >= pdi->pi_bar[i].addr && port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { offset = port - pdi->pi_bar[i].addr; if (in) *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, offset, bytes); else (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, bytes, *eax); return (0); } } return (-1); } static int pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) { struct pci_devinst *pdi = arg1; struct pci_devemu *pe = pdi->pi_d; uint64_t offset; int bidx = (int) arg2; assert(bidx <= PCI_BARMAX); assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || pdi->pi_bar[bidx].type == PCIBAR_MEM64); assert(addr >= pdi->pi_bar[bidx].addr && addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); offset = addr - pdi->pi_bar[bidx].addr; if (dir == MEM_F_WRITE) { if (size == 8) { (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 4, *val & 0xffffffff); (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 4, *val >> 32); } else { (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); } } else { if (size == 8) { *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, 4); *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset + 4, 4) << 32; } else { *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); } } return (0); } static int pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, uint64_t *addr) { uint64_t base; assert((size & (size - 1)) == 0); /* must be a power of 2 */ base = roundup2(*baseptr, size); if (base + size <= limit) { *addr = base; *baseptr = base + size; return (0); } else return (-1); } int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size) { return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); } /* * Register (or unregister) the MMIO or I/O region associated with the BAR * register 'idx' of an emulated pci device. */ static void modify_bar_registration(struct pci_devinst *pi, int idx, int registration) { int error; struct inout_port iop; struct mem_range mr; switch (pi->pi_bar[idx].type) { case PCIBAR_IO: bzero(&iop, sizeof(struct inout_port)); iop.name = pi->pi_name; iop.port = pi->pi_bar[idx].addr; iop.size = pi->pi_bar[idx].size; if (registration) { iop.flags = IOPORT_F_INOUT; iop.handler = pci_emul_io_handler; iop.arg = pi; error = register_inout(&iop); } else error = unregister_inout(&iop); break; case PCIBAR_MEM32: case PCIBAR_MEM64: bzero(&mr, sizeof(struct mem_range)); mr.name = pi->pi_name; mr.base = pi->pi_bar[idx].addr; mr.size = pi->pi_bar[idx].size; if (registration) { mr.flags = MEM_F_RW; mr.handler = pci_emul_mem_handler; mr.arg1 = pi; mr.arg2 = idx; error = register_mem(&mr); } else error = unregister_mem(&mr); break; default: error = EINVAL; break; } assert(error == 0); } static void unregister_bar(struct pci_devinst *pi, int idx) { modify_bar_registration(pi, idx, 0); } static void register_bar(struct pci_devinst *pi, int idx) { modify_bar_registration(pi, idx, 1); } /* Are we decoding i/o port accesses for the emulated pci device? */ static int porten(struct pci_devinst *pi) { uint16_t cmd; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); return (cmd & PCIM_CMD_PORTEN); } /* Are we decoding memory accesses for the emulated pci device? */ static int memen(struct pci_devinst *pi) { uint16_t cmd; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); return (cmd & PCIM_CMD_MEMEN); } /* * Update the MMIO or I/O address that is decoded by the BAR register. * * If the pci device has enabled the address space decoding then intercept * the address range decoded by the BAR register. */ static void update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) { int decode; if (pi->pi_bar[idx].type == PCIBAR_IO) decode = porten(pi); else decode = memen(pi); if (decode) unregister_bar(pi, idx); switch (type) { case PCIBAR_IO: case PCIBAR_MEM32: pi->pi_bar[idx].addr = addr; break; case PCIBAR_MEM64: pi->pi_bar[idx].addr &= ~0xffffffffUL; pi->pi_bar[idx].addr |= addr; break; case PCIBAR_MEMHI64: pi->pi_bar[idx].addr &= 0xffffffff; pi->pi_bar[idx].addr |= addr; break; default: assert(0); } if (decode) register_bar(pi, idx); } int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, enum pcibar_type type, uint64_t size) { int error; uint64_t *baseptr, limit, addr, mask, lobits, bar; uint16_t cmd, enbit; assert(idx >= 0 && idx <= PCI_BARMAX); if ((size & (size - 1)) != 0) size = 1UL << flsl(size); /* round up to a power of 2 */ /* Enforce minimum BAR sizes required by the PCI standard */ if (type == PCIBAR_IO) { if (size < 4) size = 4; } else { if (size < 16) size = 16; } switch (type) { case PCIBAR_NONE: baseptr = NULL; addr = mask = lobits = enbit = 0; break; case PCIBAR_IO: baseptr = &pci_emul_iobase; limit = PCI_EMUL_IOLIMIT; mask = PCIM_BAR_IO_BASE; lobits = PCIM_BAR_IO_SPACE; enbit = PCIM_CMD_PORTEN; break; case PCIBAR_MEM64: /* * XXX * Some drivers do not work well if the 64-bit BAR is allocated * above 4GB. Allow for this by allocating small requests under * 4GB unless then allocation size is larger than some arbitrary * number (32MB currently). */ if (size > 32 * 1024 * 1024) { /* * XXX special case for device requiring peer-peer DMA */ if (size == 0x100000000UL) baseptr = &hostbase; else baseptr = &pci_emul_membase64; limit = PCI_EMUL_MEMLIMIT64; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | PCIM_BAR_MEM_PREFETCH; } else { baseptr = &pci_emul_membase32; limit = PCI_EMUL_MEMLIMIT32; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; } enbit = PCIM_CMD_MEMEN; break; case PCIBAR_MEM32: baseptr = &pci_emul_membase32; limit = PCI_EMUL_MEMLIMIT32; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; enbit = PCIM_CMD_MEMEN; break; default: printf("pci_emul_alloc_base: invalid bar type %d\n", type); assert(0); } if (baseptr != NULL) { error = pci_emul_alloc_resource(baseptr, limit, size, &addr); if (error != 0) return (error); } pdi->pi_bar[idx].type = type; pdi->pi_bar[idx].addr = addr; pdi->pi_bar[idx].size = size; /* Initialize the BAR register in config space */ bar = (addr & mask) | lobits; pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); if (type == PCIBAR_MEM64) { assert(idx + 1 <= PCI_BARMAX); pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); } cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); if ((cmd & enbit) != enbit) pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); register_bar(pdi, idx); return (0); } #define CAP_START_OFFSET 0x40 static int pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) { int i, capoff, reallen; uint16_t sts; assert(caplen > 0); reallen = roundup2(caplen, 4); /* dword aligned */ sts = pci_get_cfgdata16(pi, PCIR_STATUS); if ((sts & PCIM_STATUS_CAPPRESENT) == 0) capoff = CAP_START_OFFSET; else capoff = pi->pi_capend + 1; /* Check if we have enough space */ if (capoff + reallen > PCI_REGMAX + 1) return (-1); /* Set the previous capability pointer */ if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); } else pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); /* Copy the capability */ for (i = 0; i < caplen; i++) pci_set_cfgdata8(pi, capoff + i, capdata[i]); /* Set the next capability pointer */ pci_set_cfgdata8(pi, capoff + 1, 0); pi->pi_prevcap = capoff; pi->pi_capend = capoff + reallen - 1; return (0); } static struct pci_devemu * pci_emul_finddev(char *name) { struct pci_devemu **pdpp, *pdp; SET_FOREACH(pdpp, pci_devemu_set) { pdp = *pdpp; if (!strcmp(pdp->pe_emu, name)) { return (pdp); } } return (NULL); } static int pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, int func, struct funcinfo *fi) { struct pci_devinst *pdi; int err; pdi = calloc(1, sizeof(struct pci_devinst)); pdi->pi_vmctx = ctx; pdi->pi_bus = bus; pdi->pi_slot = slot; pdi->pi_func = func; pthread_mutex_init(&pdi->pi_lintr.lock, NULL); pdi->pi_lintr.pin = 0; pdi->pi_lintr.state = IDLE; pdi->pi_lintr.pirq_pin = 0; pdi->pi_lintr.ioapic_irq = 0; pdi->pi_d = pde; snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); /* Disable legacy interrupts */ pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); err = (*pde->pe_init)(ctx, pdi, fi->fi_param); if (err == 0) fi->fi_devi = pdi; else free(pdi); return (err); } void pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) { int mmc; /* Number of msi messages must be a power of 2 between 1 and 32 */ assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); mmc = ffs(msgnum) - 1; bzero(msicap, sizeof(struct msicap)); msicap->capid = PCIY_MSI; msicap->nextptr = nextptr; msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); } int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) { struct msicap msicap; pci_populate_msicap(&msicap, msgnum, 0); return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); } static void pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, uint32_t msix_tab_size) { assert(msix_tab_size % 4096 == 0); bzero(msixcap, sizeof(struct msixcap)); msixcap->capid = PCIY_MSIX; /* * Message Control Register, all fields set to * zero except for the Table Size. * Note: Table size N is encoded as N-1 */ msixcap->msgctrl = msgnum - 1; /* * MSI-X BAR setup: * - MSI-X table start at offset 0 * - PBA table starts at a 4K aligned offset after the MSI-X table */ msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); } static void pci_msix_table_init(struct pci_devinst *pi, int table_entries) { int i, table_size; assert(table_entries > 0); assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; pi->pi_msix.table = calloc(1, table_size); /* set mask bit of vector control register */ for (i = 0; i < table_entries; i++) pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; } int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) { uint32_t tab_size; struct msixcap msixcap; assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; /* Align table size to nearest 4K */ tab_size = roundup2(tab_size, 4096); pi->pi_msix.table_bar = barnum; pi->pi_msix.pba_bar = barnum; pi->pi_msix.table_offset = 0; pi->pi_msix.table_count = msgnum; pi->pi_msix.pba_offset = tab_size; pi->pi_msix.pba_size = PBA_SIZE(msgnum); pci_msix_table_init(pi, msgnum); pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); /* allocate memory for MSI-X Table and PBA */ pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, tab_size + pi->pi_msix.pba_size); return (pci_emul_add_capability(pi, (u_char *)&msixcap, sizeof(msixcap))); } void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { uint16_t msgctrl, rwmask; int off; off = offset - capoff; /* Message Control Register */ if (off == 2 && bytes == 2) { rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; msgctrl = pci_get_cfgdata16(pi, offset); msgctrl &= ~rwmask; msgctrl |= val & rwmask; val = msgctrl; pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; pci_lintr_update(pi); } CFGWRITE(pi, offset, val, bytes); } void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { uint16_t msgctrl, rwmask, msgdata, mme; uint32_t addrlo; /* * If guest is writing to the message control register make sure * we do not overwrite read-only fields. */ if ((offset - capoff) == 2 && bytes == 2) { rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; msgctrl = pci_get_cfgdata16(pi, offset); msgctrl &= ~rwmask; msgctrl |= val & rwmask; val = msgctrl; addrlo = pci_get_cfgdata32(pi, capoff + 4); if (msgctrl & PCIM_MSICTRL_64BIT) msgdata = pci_get_cfgdata16(pi, capoff + 12); else msgdata = pci_get_cfgdata16(pi, capoff + 8); mme = msgctrl & PCIM_MSICTRL_MME_MASK; pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; if (pi->pi_msi.enabled) { pi->pi_msi.addr = addrlo; pi->pi_msi.msg_data = msgdata; pi->pi_msi.maxmsgnum = 1 << (mme >> 4); } else { pi->pi_msi.maxmsgnum = 0; } pci_lintr_update(pi); } CFGWRITE(pi, offset, val, bytes); } void pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { /* XXX don't write to the readonly parts */ CFGWRITE(pi, offset, val, bytes); } #define PCIECAP_VERSION 0x2 int pci_emul_add_pciecap(struct pci_devinst *pi, int type) { int err; struct pciecap pciecap; bzero(&pciecap, sizeof(pciecap)); /* * Use the integrated endpoint type for endpoints on a root complex bus. * * NB: bhyve currently only supports a single PCI bus that is the root * complex bus, so all endpoints are integrated. */ if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) type = PCIEM_TYPE_ROOT_INT_EP; pciecap.capid = PCIY_EXPRESS; pciecap.pcie_capabilities = PCIECAP_VERSION | type; if (type != PCIEM_TYPE_ROOT_INT_EP) { pciecap.link_capabilities = 0x411; /* gen1, x1 */ pciecap.link_status = 0x11; /* gen1, x1 */ } err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); return (err); } /* * This function assumes that 'coff' is in the capabilities region of the * config space. */ static void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) { int capid; uint8_t capoff, nextoff; /* Do not allow un-aligned writes */ if ((offset & (bytes - 1)) != 0) return; /* Find the capability that we want to update */ capoff = CAP_START_OFFSET; while (1) { nextoff = pci_get_cfgdata8(pi, capoff + 1); if (nextoff == 0) break; if (offset >= capoff && offset < nextoff) break; capoff = nextoff; } assert(offset >= capoff); /* * Capability ID and Next Capability Pointer are readonly. * However, some o/s's do 4-byte writes that include these. * For this case, trim the write back to 2 bytes and adjust * the data. */ if (offset == capoff || offset == capoff + 1) { if (offset == capoff && bytes == 4) { bytes = 2; offset += 2; val >>= 16; } else return; } capid = pci_get_cfgdata8(pi, capoff); switch (capid) { case PCIY_MSI: msicap_cfgwrite(pi, capoff, offset, bytes, val); break; case PCIY_MSIX: msixcap_cfgwrite(pi, capoff, offset, bytes, val); break; case PCIY_EXPRESS: pciecap_cfgwrite(pi, capoff, offset, bytes, val); break; default: break; } } static int pci_emul_iscap(struct pci_devinst *pi, int offset) { uint16_t sts; sts = pci_get_cfgdata16(pi, PCIR_STATUS); if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) return (1); } return (0); } static int pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) { /* * Ignore writes; return 0xff's for reads. The mem read code * will take care of truncating to the correct size. */ if (dir == MEM_F_READ) { *val = 0xffffffffffffffff; } return (0); } static int pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int bytes, uint64_t *val, void *arg1, long arg2) { int bus, slot, func, coff, in; coff = addr & 0xfff; func = (addr >> 12) & 0x7; slot = (addr >> 15) & 0x1f; bus = (addr >> 20) & 0xff; in = (dir == MEM_F_READ); if (in) *val = ~0UL; pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); return (0); } uint64_t pci_ecfg_base(void) { return (PCI_EMUL_ECFG_BASE); } #define BUSIO_ROUNDUP 32 #define BUSMEM_ROUNDUP (1024 * 1024) int init_pci(struct vmctx *ctx) { struct mem_range mr; struct pci_devemu *pde; struct businfo *bi; struct slotinfo *si; struct funcinfo *fi; size_t lowmem; int bus, slot, func; int error; pci_emul_iobase = PCI_EMUL_IOBASE; pci_emul_membase32 = vm_get_lowmem_limit(ctx); pci_emul_membase64 = PCI_EMUL_MEMBASE64; for (bus = 0; bus < MAXBUSES; bus++) { if ((bi = pci_businfo[bus]) == NULL) continue; /* * Keep track of the i/o and memory resources allocated to * this bus. */ bi->iobase = pci_emul_iobase; bi->membase32 = pci_emul_membase32; bi->membase64 = pci_emul_membase64; for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { fi = &si->si_funcs[func]; if (fi->fi_name == NULL) continue; pde = pci_emul_finddev(fi->fi_name); assert(pde != NULL); error = pci_emul_init(ctx, pde, bus, slot, func, fi); if (error) return (error); } } /* * Add some slop to the I/O and memory resources decoded by * this bus to give a guest some flexibility if it wants to * reprogram the BARs. */ pci_emul_iobase += BUSIO_ROUNDUP; pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); bi->iolimit = pci_emul_iobase; pci_emul_membase32 += BUSMEM_ROUNDUP; pci_emul_membase32 = roundup2(pci_emul_membase32, BUSMEM_ROUNDUP); bi->memlimit32 = pci_emul_membase32; pci_emul_membase64 += BUSMEM_ROUNDUP; pci_emul_membase64 = roundup2(pci_emul_membase64, BUSMEM_ROUNDUP); bi->memlimit64 = pci_emul_membase64; } /* * PCI backends are initialized before routing INTx interrupts * so that LPC devices are able to reserve ISA IRQs before * routing PIRQ pins. */ for (bus = 0; bus < MAXBUSES; bus++) { if ((bi = pci_businfo[bus]) == NULL) continue; for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { fi = &si->si_funcs[func]; if (fi->fi_devi == NULL) continue; pci_lintr_route(fi->fi_devi); } } } lpc_pirq_routed(); /* * The guest physical memory map looks like the following: * [0, lowmem) guest system memory * [lowmem, lowmem_limit) memory hole (may be absent) * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) * [0xE0000000, 0xF0000000) PCI extended config window * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware * [4GB, 4GB + highmem) */ /* * Accesses to memory addresses that are not allocated to system * memory or PCI devices return 0xff's. */ lowmem = vm_get_lowmem_size(ctx); bzero(&mr, sizeof(struct mem_range)); mr.name = "PCI hole"; mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; mr.base = lowmem; mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; mr.handler = pci_emul_fallback_handler; error = register_mem_fallback(&mr); assert(error == 0); /* PCI extended config space */ bzero(&mr, sizeof(struct mem_range)); mr.name = "PCI ECFG"; mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; mr.base = PCI_EMUL_ECFG_BASE; mr.size = PCI_EMUL_ECFG_SIZE; mr.handler = pci_emul_ecfg_handler; error = register_mem(&mr); assert(error == 0); return (0); } static void pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, void *arg) { dsdt_line(" Package ()"); dsdt_line(" {"); dsdt_line(" 0x%X,", slot << 16 | 0xffff); dsdt_line(" 0x%02X,", pin - 1); dsdt_line(" Zero,"); dsdt_line(" 0x%X", ioapic_irq); dsdt_line(" },"); } static void pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, void *arg) { char *name; name = lpc_pirq_name(pirq_pin); if (name == NULL) return; dsdt_line(" Package ()"); dsdt_line(" {"); dsdt_line(" 0x%X,", slot << 16 | 0xffff); dsdt_line(" 0x%02X,", pin - 1); dsdt_line(" %s,", name); dsdt_line(" 0x00"); dsdt_line(" },"); free(name); } /* * A bhyve virtual machine has a flat PCI hierarchy with a root port * corresponding to each PCI bus. */ static void pci_bus_write_dsdt(int bus) { struct businfo *bi; struct slotinfo *si; struct pci_devinst *pi; int count, func, slot; /* * If there are no devices on this 'bus' then just return. */ if ((bi = pci_businfo[bus]) == NULL) { /* * Bus 0 is special because it decodes the I/O ports used * for PCI config space access even if there are no devices * on it. */ if (bus != 0) return; } dsdt_line(" Device (PC%02X)", bus); dsdt_line(" {"); dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); dsdt_line(" Method (_BBN, 0, NotSerialized)"); dsdt_line(" {"); dsdt_line(" Return (0x%08X)", bus); dsdt_line(" }"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " "MaxFixed, PosDecode,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x%04X, // Range Minimum", bus); dsdt_line(" 0x%04X, // Range Maximum", bus); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x0001, // Length"); dsdt_line(" ,, )"); if (bus == 0) { dsdt_indent(3); dsdt_fixed_ioport(0xCF8, 8); dsdt_unindent(3); dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x0000, // Range Minimum"); dsdt_line(" 0x0CF7, // Range Maximum"); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x0CF8, // Length"); dsdt_line(" ,, , TypeStatic)"); dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x0D00, // Range Minimum"); dsdt_line(" 0x%04X, // Range Maximum", PCI_EMUL_IOBASE - 1); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x%04X, // Length", PCI_EMUL_IOBASE - 0x0D00); dsdt_line(" ,, , TypeStatic)"); if (bi == NULL) { dsdt_line(" })"); goto done; } } assert(bi != NULL); /* i/o window */ dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); dsdt_line(" 0x%04X, // Range Maximum", bi->iolimit - 1); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x%04X, // Length", bi->iolimit - bi->iobase); dsdt_line(" ,, , TypeStatic)"); /* mmio window (32-bit) */ dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x00000000, // Granularity"); dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); dsdt_line(" 0x%08X, // Range Maximum\n", bi->memlimit32 - 1); dsdt_line(" 0x00000000, // Translation Offset"); dsdt_line(" 0x%08X, // Length\n", bi->memlimit32 - bi->membase32); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); /* mmio window (64-bit) */ dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x0000000000000000, // Granularity"); dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); dsdt_line(" 0x%016lX, // Range Maximum\n", bi->memlimit64 - 1); dsdt_line(" 0x0000000000000000, // Translation Offset"); dsdt_line(" 0x%016lX, // Length\n", bi->memlimit64 - bi->membase64); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); dsdt_line(" })"); count = pci_count_lintr(bus); if (count != 0) { dsdt_indent(2); dsdt_line("Name (PPRT, Package ()"); dsdt_line("{"); pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); dsdt_line("})"); dsdt_line("Name (APRT, Package ()"); dsdt_line("{"); pci_walk_lintr(bus, pci_apic_prt_entry, NULL); dsdt_line("})"); dsdt_line("Method (_PRT, 0, NotSerialized)"); dsdt_line("{"); dsdt_line(" If (PICM)"); dsdt_line(" {"); dsdt_line(" Return (APRT)"); dsdt_line(" }"); dsdt_line(" Else"); dsdt_line(" {"); dsdt_line(" Return (PPRT)"); dsdt_line(" }"); dsdt_line("}"); dsdt_unindent(2); } dsdt_indent(2); for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { pi = si->si_funcs[func].fi_devi; if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) pi->pi_d->pe_write_dsdt(pi); } } dsdt_unindent(2); done: dsdt_line(" }"); } void pci_write_dsdt(void) { int bus; dsdt_indent(1); dsdt_line("Name (PICM, 0x00)"); dsdt_line("Method (_PIC, 1, NotSerialized)"); dsdt_line("{"); dsdt_line(" Store (Arg0, PICM)"); dsdt_line("}"); dsdt_line(""); dsdt_line("Scope (_SB)"); dsdt_line("{"); for (bus = 0; bus < MAXBUSES; bus++) pci_bus_write_dsdt(bus); dsdt_line("}"); dsdt_unindent(1); } int pci_bus_configured(int bus) { assert(bus >= 0 && bus < MAXBUSES); return (pci_businfo[bus] != NULL); } int pci_msi_enabled(struct pci_devinst *pi) { return (pi->pi_msi.enabled); } int pci_msi_maxmsgnum(struct pci_devinst *pi) { if (pi->pi_msi.enabled) return (pi->pi_msi.maxmsgnum); else return (0); } int pci_msix_enabled(struct pci_devinst *pi) { return (pi->pi_msix.enabled && !pi->pi_msi.enabled); } void pci_generate_msix(struct pci_devinst *pi, int index) { struct msix_table_entry *mte; if (!pci_msix_enabled(pi)) return; if (pi->pi_msix.function_mask) return; if (index >= pi->pi_msix.table_count) return; mte = &pi->pi_msix.table[index]; if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { /* XXX Set PBA bit if interrupt is disabled */ vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); } } void pci_generate_msi(struct pci_devinst *pi, int index) { if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, pi->pi_msi.msg_data + index); } } static bool pci_lintr_permitted(struct pci_devinst *pi) { uint16_t cmd; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || (cmd & PCIM_CMD_INTxDIS))); } void pci_lintr_request(struct pci_devinst *pi) { struct businfo *bi; struct slotinfo *si; int bestpin, bestcount, pin; bi = pci_businfo[pi->pi_bus]; assert(bi != NULL); /* * Just allocate a pin from our slot. The pin will be * assigned IRQs later when interrupts are routed. */ si = &bi->slotinfo[pi->pi_slot]; bestpin = 0; bestcount = si->si_intpins[0].ii_count; for (pin = 1; pin < 4; pin++) { if (si->si_intpins[pin].ii_count < bestcount) { bestpin = pin; bestcount = si->si_intpins[pin].ii_count; } } si->si_intpins[bestpin].ii_count++; pi->pi_lintr.pin = bestpin + 1; pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); } static void pci_lintr_route(struct pci_devinst *pi) { struct businfo *bi; struct intxinfo *ii; if (pi->pi_lintr.pin == 0) return; bi = pci_businfo[pi->pi_bus]; assert(bi != NULL); ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; /* * Attempt to allocate an I/O APIC pin for this intpin if one * is not yet assigned. */ if (ii->ii_ioapic_irq == 0) ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); assert(ii->ii_ioapic_irq > 0); /* * Attempt to allocate a PIRQ pin for this intpin if one is * not yet assigned. */ if (ii->ii_pirq_pin == 0) ii->ii_pirq_pin = pirq_alloc_pin(pi); assert(ii->ii_pirq_pin > 0); pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); } void pci_lintr_assert(struct pci_devinst *pi) { assert(pi->pi_lintr.pin > 0); pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == IDLE) { if (pci_lintr_permitted(pi)) { pi->pi_lintr.state = ASSERTED; pci_irq_assert(pi); } else pi->pi_lintr.state = PENDING; } pthread_mutex_unlock(&pi->pi_lintr.lock); } void pci_lintr_deassert(struct pci_devinst *pi) { assert(pi->pi_lintr.pin > 0); pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == ASSERTED) { pi->pi_lintr.state = IDLE; pci_irq_deassert(pi); } else if (pi->pi_lintr.state == PENDING) pi->pi_lintr.state = IDLE; pthread_mutex_unlock(&pi->pi_lintr.lock); } static void pci_lintr_update(struct pci_devinst *pi) { pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { pci_irq_deassert(pi); pi->pi_lintr.state = PENDING; } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { pi->pi_lintr.state = ASSERTED; pci_irq_assert(pi); } pthread_mutex_unlock(&pi->pi_lintr.lock); } int pci_count_lintr(int bus) { int count, slot, pin; struct slotinfo *slotinfo; count = 0; if (pci_businfo[bus] != NULL) { for (slot = 0; slot < MAXSLOTS; slot++) { slotinfo = &pci_businfo[bus]->slotinfo[slot]; for (pin = 0; pin < 4; pin++) { if (slotinfo->si_intpins[pin].ii_count != 0) count++; } } } return (count); } void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) { struct businfo *bi; struct slotinfo *si; struct intxinfo *ii; int slot, pin; if ((bi = pci_businfo[bus]) == NULL) return; for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (pin = 0; pin < 4; pin++) { ii = &si->si_intpins[pin]; if (ii->ii_count != 0) cb(bus, slot, pin + 1, ii->ii_pirq_pin, ii->ii_ioapic_irq, arg); } } } /* * Return 1 if the emulated device in 'slot' is a multi-function device. * Return 0 otherwise. */ static int pci_emul_is_mfdev(int bus, int slot) { struct businfo *bi; struct slotinfo *si; int f, numfuncs; numfuncs = 0; if ((bi = pci_businfo[bus]) != NULL) { si = &bi->slotinfo[slot]; for (f = 0; f < MAXFUNCS; f++) { if (si->si_funcs[f].fi_devi != NULL) { numfuncs++; } } } return (numfuncs > 1); } /* * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on * whether or not is a multi-function being emulated in the pci 'slot'. */ static void pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) { int mfdev; if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { mfdev = pci_emul_is_mfdev(bus, slot); switch (bytes) { case 1: case 2: *rv &= ~PCIM_MFDEV; if (mfdev) { *rv |= PCIM_MFDEV; } break; case 4: *rv &= ~(PCIM_MFDEV << 16); if (mfdev) { *rv |= (PCIM_MFDEV << 16); } break; } } } /* * Update device state in response to changes to the PCI command * register. */ void pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) { int i; uint16_t changed, new; new = pci_get_cfgdata16(pi, PCIR_COMMAND); changed = old ^ new; /* * If the MMIO or I/O address space decoding has changed then * register/unregister all BARs that decode that address space. */ for (i = 0; i <= PCI_BARMAX; i++) { switch (pi->pi_bar[i].type) { case PCIBAR_NONE: case PCIBAR_MEMHI64: break; case PCIBAR_IO: /* I/O address space decoding changed? */ if (changed & PCIM_CMD_PORTEN) { if (new & PCIM_CMD_PORTEN) register_bar(pi, i); else unregister_bar(pi, i); } break; case PCIBAR_MEM32: case PCIBAR_MEM64: /* MMIO address space decoding changed? */ if (changed & PCIM_CMD_MEMEN) { if (new & PCIM_CMD_MEMEN) register_bar(pi, i); else unregister_bar(pi, i); } break; default: assert(0); } } /* * If INTx has been unmasked and is pending, assert the * interrupt. */ pci_lintr_update(pi); } static void pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) { int rshift; uint32_t cmd, old, readonly; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ /* * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. * * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are * 'write 1 to clear'. However these bits are not set to '1' by * any device emulation so it is simpler to treat them as readonly. */ rshift = (coff & 0x3) * 8; readonly = 0xFFFFF880 >> rshift; old = CFGREAD(pi, coff, bytes); new &= ~readonly; new |= (old & readonly); CFGWRITE(pi, coff, new, bytes); /* update config */ pci_emul_cmd_changed(pi, cmd); } static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, int coff, int bytes, uint32_t *eax) { struct businfo *bi; struct slotinfo *si; struct pci_devinst *pi; struct pci_devemu *pe; int idx, needcfg; uint64_t addr, bar, mask; if ((bi = pci_businfo[bus]) != NULL) { si = &bi->slotinfo[slot]; pi = si->si_funcs[func].fi_devi; } else pi = NULL; /* * Just return if there is no device at this slot:func or if the * the guest is doing an un-aligned access. */ if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || (coff & (bytes - 1)) != 0) { if (in) *eax = 0xffffffff; return; } /* * Ignore all writes beyond the standard config space and return all * ones on reads. */ if (coff >= PCI_REGMAX + 1) { if (in) { *eax = 0xffffffff; /* * Extended capabilities begin at offset 256 in config * space. Absence of extended capabilities is signaled * with all 0s in the extended capability header at * offset 256. */ if (coff <= PCI_REGMAX + 4) *eax = 0x00000000; } return; } pe = pi->pi_d; /* * Config read */ if (in) { /* Let the device emulation override the default handler */ if (pe->pe_cfgread != NULL) { needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, eax); } else { needcfg = 1; } if (needcfg) *eax = CFGREAD(pi, coff, bytes); pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); } else { /* Let the device emulation override the default handler */ if (pe->pe_cfgwrite != NULL && (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) return; /* * Special handling for write to BAR registers */ if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { /* * Ignore writes to BAR registers that are not * 4-byte aligned. */ if (bytes != 4 || (coff & 0x3) != 0) return; idx = (coff - PCIR_BAR(0)) / 4; mask = ~(pi->pi_bar[idx].size - 1); switch (pi->pi_bar[idx].type) { case PCIBAR_NONE: pi->pi_bar[idx].addr = bar = 0; break; case PCIBAR_IO: addr = *eax & mask; addr &= 0xffff; bar = addr | PCIM_BAR_IO_SPACE; /* * Register the new BAR value for interception */ if (addr != pi->pi_bar[idx].addr) { update_bar_address(pi, addr, idx, PCIBAR_IO); } break; case PCIBAR_MEM32: addr = bar = *eax & mask; bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; if (addr != pi->pi_bar[idx].addr) { update_bar_address(pi, addr, idx, PCIBAR_MEM32); } break; case PCIBAR_MEM64: addr = bar = *eax & mask; bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | PCIM_BAR_MEM_PREFETCH; if (addr != (uint32_t)pi->pi_bar[idx].addr) { update_bar_address(pi, addr, idx, PCIBAR_MEM64); } break; case PCIBAR_MEMHI64: mask = ~(pi->pi_bar[idx - 1].size - 1); addr = ((uint64_t)*eax << 32) & mask; bar = addr >> 32; if (bar != pi->pi_bar[idx - 1].addr >> 32) { update_bar_address(pi, addr, idx - 1, PCIBAR_MEMHI64); } break; default: assert(0); } pci_set_cfgdata32(pi, coff, bar); } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax); } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { pci_emul_cmdsts_write(pi, coff, *eax, bytes); } else { CFGWRITE(pi, coff, *eax, bytes); } } } static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; static int pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { uint32_t x; if (bytes != 4) { if (in) *eax = (bytes == 2) ? 0xffff : 0xff; return (0); } if (in) { x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; if (cfgenable) x |= CONF1_ENABLE; *eax = x; } else { x = *eax; cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; cfgoff = x & PCI_REGMAX; cfgfunc = (x >> 8) & PCI_FUNCMAX; cfgslot = (x >> 11) & PCI_SLOTMAX; cfgbus = (x >> 16) & PCI_BUSMAX; } return (0); } INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); static int pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { int coff; assert(bytes == 1 || bytes == 2 || bytes == 4); coff = cfgoff + (port - CONF1_DATA_PORT); if (cfgenable) { pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, eax); } else { /* Ignore accesses to cfgdata if not enabled by cfgaddr */ if (in) *eax = 0xffffffff; } return (0); } INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); #define PCI_EMUL_TEST #ifdef PCI_EMUL_TEST /* * Define a dummy test device */ #define DIOSZ 8 #define DMEMSZ 4096 struct pci_emul_dsoftc { uint8_t ioregs[DIOSZ]; uint8_t memregs[2][DMEMSZ]; }; #define PCI_EMUL_MSI_MSGS 4 #define PCI_EMUL_MSIX_MSGS 16 static int pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { int error; struct pci_emul_dsoftc *sc; sc = calloc(1, sizeof(struct pci_emul_dsoftc)); pi->pi_arg = sc; pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); assert(error == 0); error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); assert(error == 0); error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); assert(error == 0); error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); assert(error == 0); return (0); } static void pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { int i; struct pci_emul_dsoftc *sc = pi->pi_arg; if (baridx == 0) { if (offset + size > DIOSZ) { printf("diow: iow too large, offset %ld size %d\n", offset, size); return; } if (size == 1) { sc->ioregs[offset] = value & 0xff; } else if (size == 2) { *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; } else if (size == 4) { *(uint32_t *)&sc->ioregs[offset] = value; } else { printf("diow: iow unknown size %d\n", size); } /* * Special magic value to generate an interrupt */ if (offset == 4 && size == 4 && pci_msi_enabled(pi)) pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); if (value == 0xabcdef) { for (i = 0; i < pci_msi_maxmsgnum(pi); i++) pci_generate_msi(pi, i); } } if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("diow: memw too large, offset %ld size %d\n", offset, size); return; } i = baridx - 1; /* 'memregs' index */ if (size == 1) { sc->memregs[i][offset] = value; } else if (size == 2) { *(uint16_t *)&sc->memregs[i][offset] = value; } else if (size == 4) { *(uint32_t *)&sc->memregs[i][offset] = value; } else if (size == 8) { *(uint64_t *)&sc->memregs[i][offset] = value; } else { printf("diow: memw unknown size %d\n", size); } /* * magic interrupt ?? */ } if (baridx > 2 || baridx < 0) { printf("diow: unknown bar idx %d\n", baridx); } } static uint64_t pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_emul_dsoftc *sc = pi->pi_arg; uint32_t value; int i; if (baridx == 0) { if (offset + size > DIOSZ) { printf("dior: ior too large, offset %ld size %d\n", offset, size); return (0); } value = 0; if (size == 1) { value = sc->ioregs[offset]; } else if (size == 2) { value = *(uint16_t *) &sc->ioregs[offset]; } else if (size == 4) { value = *(uint32_t *) &sc->ioregs[offset]; } else { printf("dior: ior unknown size %d\n", size); } } if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("dior: memr too large, offset %ld size %d\n", offset, size); return (0); } i = baridx - 1; /* 'memregs' index */ if (size == 1) { value = sc->memregs[i][offset]; } else if (size == 2) { value = *(uint16_t *) &sc->memregs[i][offset]; } else if (size == 4) { value = *(uint32_t *) &sc->memregs[i][offset]; } else if (size == 8) { value = *(uint64_t *) &sc->memregs[i][offset]; } else { printf("dior: ior unknown size %d\n", size); } } if (baridx > 2 || baridx < 0) { printf("dior: unknown bar idx %d\n", baridx); return (0); } return (value); } struct pci_devemu pci_dummy = { .pe_emu = "dummy", .pe_init = pci_emul_dinit, .pe_barwrite = pci_emul_diow, .pe_barread = pci_emul_dior }; PCI_EMUL_SET(pci_dummy); #endif /* PCI_EMUL_TEST */ Index: stable/12/usr.sbin/bhyve/pci_fbuf.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_fbuf.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_fbuf.c (revision 358184) @@ -1,449 +1,450 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Nahanni Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "bhyvegc.h" #include "bhyverun.h" +#include "debug.h" #include "console.h" #include "inout.h" #include "pci_emul.h" #include "rfb.h" #include "vga.h" /* * bhyve Framebuffer device emulation. * BAR0 points to the current mode information. * BAR1 is the 32-bit framebuffer address. * * -s ,fbuf,wait,vga=on|io|off,rfb=:port,w=width,h=height */ static int fbuf_debug = 1; #define DEBUG_INFO 1 #define DEBUG_VERBOSE 4 -#define DPRINTF(level, params) if (level <= fbuf_debug) printf params +#define DPRINTF(level, params) if (level <= fbuf_debug) PRINTLN params #define KB (1024UL) #define MB (1024 * 1024UL) #define DMEMSZ 128 #define FB_SIZE (16*MB) #define COLS_MAX 1920 #define ROWS_MAX 1200 #define COLS_DEFAULT 1024 #define ROWS_DEFAULT 768 #define COLS_MIN 640 #define ROWS_MIN 480 struct pci_fbuf_softc { struct pci_devinst *fsc_pi; struct { uint32_t fbsize; uint16_t width; uint16_t height; uint16_t depth; uint16_t refreshrate; uint8_t reserved[116]; } __packed memregs; /* rfb server */ char *rfb_host; char *rfb_password; int rfb_port; int rfb_wait; int vga_enabled; int vga_full; uint32_t fbaddr; char *fb_base; uint16_t gc_width; uint16_t gc_height; void *vgasc; struct bhyvegc_image *gc_image; }; static struct pci_fbuf_softc *fbuf_sc; #define PCI_FBUF_MSI_MSGS 4 static void pci_fbuf_usage(char *opt) { - fprintf(stderr, "Invalid fbuf emulation option \"%s\"\r\n", opt); - fprintf(stderr, "fbuf: {wait,}{vga=on|io|off,}rfb=:port" - "{,w=width}{,h=height}\r\n"); + EPRINTLN("Invalid fbuf emulation option \"%s\"", opt); + EPRINTLN("fbuf: {wait,}{vga=on|io|off,}rfb=:port" + "{,w=width}{,h=height}"); } static void pci_fbuf_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct pci_fbuf_softc *sc; uint8_t *p; assert(baridx == 0); sc = pi->pi_arg; DPRINTF(DEBUG_VERBOSE, ("fbuf wr: offset 0x%lx, size: %d, value: 0x%lx\n", offset, size, value)); if (offset + size > DMEMSZ) { printf("fbuf: write too large, offset %ld size %d\n", offset, size); return; } p = (uint8_t *)&sc->memregs + offset; switch (size) { case 1: *p = value; break; case 2: *(uint16_t *)p = value; break; case 4: *(uint32_t *)p = value; break; case 8: *(uint64_t *)p = value; break; default: printf("fbuf: write unknown size %d\n", size); break; } if (!sc->gc_image->vgamode && sc->memregs.width == 0 && sc->memregs.height == 0) { - DPRINTF(DEBUG_INFO, ("switching to VGA mode\r\n")); + DPRINTF(DEBUG_INFO, ("switching to VGA mode")); sc->gc_image->vgamode = 1; sc->gc_width = 0; sc->gc_height = 0; } else if (sc->gc_image->vgamode && sc->memregs.width != 0 && sc->memregs.height != 0) { - DPRINTF(DEBUG_INFO, ("switching to VESA mode\r\n")); + DPRINTF(DEBUG_INFO, ("switching to VESA mode")); sc->gc_image->vgamode = 0; } } uint64_t pci_fbuf_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_fbuf_softc *sc; uint8_t *p; uint64_t value; assert(baridx == 0); sc = pi->pi_arg; if (offset + size > DMEMSZ) { printf("fbuf: read too large, offset %ld size %d\n", offset, size); return (0); } p = (uint8_t *)&sc->memregs + offset; value = 0; switch (size) { case 1: value = *p; break; case 2: value = *(uint16_t *)p; break; case 4: value = *(uint32_t *)p; break; case 8: value = *(uint64_t *)p; break; default: printf("fbuf: read unknown size %d\n", size); break; } DPRINTF(DEBUG_VERBOSE, ("fbuf rd: offset 0x%lx, size: %d, value: 0x%lx\n", offset, size, value)); return (value); } static int pci_fbuf_parse_opts(struct pci_fbuf_softc *sc, char *opts) { char *uopts, *xopts, *config; char *tmpstr; int ret; ret = 0; uopts = strdup(opts); for (xopts = strtok(uopts, ","); xopts != NULL; xopts = strtok(NULL, ",")) { if (strcmp(xopts, "wait") == 0) { sc->rfb_wait = 1; continue; } if ((config = strchr(xopts, '=')) == NULL) { pci_fbuf_usage(xopts); ret = -1; goto done; } *config++ = '\0'; - DPRINTF(DEBUG_VERBOSE, ("pci_fbuf option %s = %s\r\n", + DPRINTF(DEBUG_VERBOSE, ("pci_fbuf option %s = %s", xopts, config)); if (!strcmp(xopts, "tcp") || !strcmp(xopts, "rfb")) { /* * IPv4 -- host-ip:port * IPv6 -- [host-ip%zone]:port * XXX for now port is mandatory. */ tmpstr = strsep(&config, "]"); if (config) { if (tmpstr[0] == '[') tmpstr++; sc->rfb_host = tmpstr; if (config[0] == ':') config++; else { pci_fbuf_usage(xopts); ret = -1; goto done; } sc->rfb_port = atoi(config); } else { config = tmpstr; tmpstr = strsep(&config, ":"); if (!config) sc->rfb_port = atoi(tmpstr); else { sc->rfb_port = atoi(config); sc->rfb_host = tmpstr; } } } else if (!strcmp(xopts, "vga")) { if (!strcmp(config, "off")) { sc->vga_enabled = 0; } else if (!strcmp(config, "io")) { sc->vga_enabled = 1; sc->vga_full = 0; } else if (!strcmp(config, "on")) { sc->vga_enabled = 1; sc->vga_full = 1; } else { pci_fbuf_usage(xopts); ret = -1; goto done; } } else if (!strcmp(xopts, "w")) { sc->memregs.width = atoi(config); if (sc->memregs.width > COLS_MAX) { pci_fbuf_usage(xopts); ret = -1; goto done; } else if (sc->memregs.width == 0) sc->memregs.width = 1920; } else if (!strcmp(xopts, "h")) { sc->memregs.height = atoi(config); if (sc->memregs.height > ROWS_MAX) { pci_fbuf_usage(xopts); ret = -1; goto done; } else if (sc->memregs.height == 0) sc->memregs.height = 1080; } else if (!strcmp(xopts, "password")) { sc->rfb_password = config; } else { pci_fbuf_usage(xopts); ret = -1; goto done; } } done: return (ret); } extern void vga_render(struct bhyvegc *gc, void *arg); void pci_fbuf_render(struct bhyvegc *gc, void *arg) { struct pci_fbuf_softc *sc; sc = arg; if (sc->vga_full && sc->gc_image->vgamode) { /* TODO: mode switching to vga and vesa should use the special * EFI-bhyve protocol port. */ vga_render(gc, sc->vgasc); return; } if (sc->gc_width != sc->memregs.width || sc->gc_height != sc->memregs.height) { bhyvegc_resize(gc, sc->memregs.width, sc->memregs.height); sc->gc_width = sc->memregs.width; sc->gc_height = sc->memregs.height; } return; } static int pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { int error, prot; struct pci_fbuf_softc *sc; if (fbuf_sc != NULL) { - fprintf(stderr, "Only one frame buffer device is allowed.\n"); + EPRINTLN("Only one frame buffer device is allowed."); return (-1); } sc = calloc(1, sizeof(struct pci_fbuf_softc)); pi->pi_arg = sc; /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, 0x40FB); pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_DISPLAY); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_DISPLAY_VGA); error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, DMEMSZ); assert(error == 0); error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, FB_SIZE); assert(error == 0); error = pci_emul_add_msicap(pi, PCI_FBUF_MSI_MSGS); assert(error == 0); sc->fbaddr = pi->pi_bar[1].addr; sc->memregs.fbsize = FB_SIZE; sc->memregs.width = COLS_DEFAULT; sc->memregs.height = ROWS_DEFAULT; sc->memregs.depth = 32; sc->vga_enabled = 1; sc->vga_full = 0; sc->fsc_pi = pi; error = pci_fbuf_parse_opts(sc, opts); if (error != 0) goto done; /* XXX until VGA rendering is enabled */ if (sc->vga_full != 0) { - fprintf(stderr, "pci_fbuf: VGA rendering not enabled"); + EPRINTLN("pci_fbuf: VGA rendering not enabled"); goto done; } sc->fb_base = vm_create_devmem(ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE); if (sc->fb_base == MAP_FAILED) { error = -1; goto done; } - DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]\r\n", + DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]", sc->fb_base, FB_SIZE)); /* * Map the framebuffer into the guest address space. * XXX This may fail if the BAR is different than a prior * run. In this case flag the error. This will be fixed * when a change_memseg api is available. */ prot = PROT_READ | PROT_WRITE; if (vm_mmap_memseg(ctx, sc->fbaddr, VM_FRAMEBUFFER, 0, FB_SIZE, prot) != 0) { - fprintf(stderr, "pci_fbuf: mapseg failed - try deleting VM and restarting\n"); + EPRINTLN("pci_fbuf: mapseg failed - try deleting VM and restarting"); error = -1; goto done; } console_init(sc->memregs.width, sc->memregs.height, sc->fb_base); console_fb_register(pci_fbuf_render, sc); if (sc->vga_enabled) sc->vgasc = vga_init(!sc->vga_full); sc->gc_image = console_get_image(); fbuf_sc = sc; memset((void *)sc->fb_base, 0, FB_SIZE); error = rfb_init(sc->rfb_host, sc->rfb_port, sc->rfb_wait, sc->rfb_password); done: if (error) free(sc); return (error); } struct pci_devemu pci_fbuf = { .pe_emu = "fbuf", .pe_init = pci_fbuf_init, .pe_barwrite = pci_fbuf_write, .pe_barread = pci_fbuf_read }; PCI_EMUL_SET(pci_fbuf); Index: stable/12/usr.sbin/bhyve/pci_lpc.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_lpc.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_lpc.c (revision 358184) @@ -1,462 +1,463 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Neel Natu * Copyright (c) 2013 Tycho Nightingale * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "acpi.h" +#include "debug.h" #include "bootrom.h" #include "inout.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "uart_emul.h" #define IO_ICU1 0x20 #define IO_ICU2 0xA0 SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt); SET_DECLARE(lpc_sysres_set, struct lpc_sysres); #define ELCR_PORT 0x4d0 SYSRES_IO(ELCR_PORT, 2); #define IO_TIMER1_PORT 0x40 #define NMISC_PORT 0x61 SYSRES_IO(NMISC_PORT, 1); static struct pci_devinst *lpc_bridge; static const char *romfile; #define LPC_UART_NUM 2 static struct lpc_uart_softc { struct uart_softc *uart_softc; const char *opts; int iobase; int irq; int enabled; } lpc_uart_softc[LPC_UART_NUM]; static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" }; /* * LPC device configuration is in the following form: * [,] * For e.g. "com1,stdio" or "bootrom,/var/romfile" */ int lpc_device_parse(const char *opts) { int unit, error; char *str, *cpy, *lpcdev; error = -1; str = cpy = strdup(opts); lpcdev = strsep(&str, ","); if (lpcdev != NULL) { if (strcasecmp(lpcdev, "bootrom") == 0) { romfile = str; error = 0; goto done; } for (unit = 0; unit < LPC_UART_NUM; unit++) { if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) { lpc_uart_softc[unit].opts = str; error = 0; goto done; } } } done: if (error) free(cpy); return (error); } void lpc_print_supported_devices() { size_t i; printf("bootrom\n"); for (i = 0; i < LPC_UART_NUM; i++) printf("%s\n", lpc_uart_names[i]); } const char * lpc_bootrom(void) { return (romfile); } static void lpc_uart_intr_assert(void *arg) { struct lpc_uart_softc *sc = arg; assert(sc->irq >= 0); vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq); } static void lpc_uart_intr_deassert(void *arg) { /* * The COM devices on the LPC bus generate edge triggered interrupts, * so nothing more to do here. */ } static int lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { int offset; struct lpc_uart_softc *sc = arg; offset = port - sc->iobase; switch (bytes) { case 1: if (in) *eax = uart_read(sc->uart_softc, offset); else uart_write(sc->uart_softc, offset, *eax); break; case 2: if (in) { *eax = uart_read(sc->uart_softc, offset); *eax |= uart_read(sc->uart_softc, offset + 1) << 8; } else { uart_write(sc->uart_softc, offset, *eax); uart_write(sc->uart_softc, offset + 1, *eax >> 8); } break; default: return (-1); } return (0); } static int lpc_init(struct vmctx *ctx) { struct lpc_uart_softc *sc; struct inout_port iop; const char *name; int unit, error; if (romfile != NULL) { error = bootrom_init(ctx, romfile); if (error) return (error); } /* COM1 and COM2 */ for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; name = lpc_uart_names[unit]; if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) { - fprintf(stderr, "Unable to allocate resources for " - "LPC device %s\n", name); + EPRINTLN("Unable to allocate resources for " + "LPC device %s", name); return (-1); } pci_irq_reserve(sc->irq); sc->uart_softc = uart_init(lpc_uart_intr_assert, lpc_uart_intr_deassert, sc); if (uart_set_backend(sc->uart_softc, sc->opts) != 0) { - fprintf(stderr, "Unable to initialize backend '%s' " - "for LPC device %s\n", sc->opts, name); + EPRINTLN("Unable to initialize backend '%s' " + "for LPC device %s", sc->opts, name); return (-1); } bzero(&iop, sizeof(struct inout_port)); iop.name = name; iop.port = sc->iobase; iop.size = UART_IO_BAR_SIZE; iop.flags = IOPORT_F_INOUT; iop.handler = lpc_uart_io_handler; iop.arg = sc; error = register_inout(&iop); assert(error == 0); sc->enabled = 1; } return (0); } static void pci_lpc_write_dsdt(struct pci_devinst *pi) { struct lpc_dsdt **ldpp, *ldp; dsdt_line(""); dsdt_line("Device (ISA)"); dsdt_line("{"); dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func); dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)"); dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)"); dsdt_line(" {"); dsdt_line(" Offset (0x60),"); dsdt_line(" PIRA, 8,"); dsdt_line(" PIRB, 8,"); dsdt_line(" PIRC, 8,"); dsdt_line(" PIRD, 8,"); dsdt_line(" Offset (0x68),"); dsdt_line(" PIRE, 8,"); dsdt_line(" PIRF, 8,"); dsdt_line(" PIRG, 8,"); dsdt_line(" PIRH, 8"); dsdt_line(" }"); dsdt_line(""); dsdt_indent(1); SET_FOREACH(ldpp, lpc_dsdt_set) { ldp = *ldpp; ldp->handler(); } dsdt_line(""); dsdt_line("Device (PIC)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0000\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_ICU1, 2); dsdt_fixed_ioport(IO_ICU2, 2); dsdt_fixed_irq(2); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_line(""); dsdt_line("Device (TIMR)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0100\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_TIMER1_PORT, 4); dsdt_fixed_irq(0); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_unindent(1); dsdt_line("}"); } static void pci_lpc_sysres_dsdt(void) { struct lpc_sysres **lspp, *lsp; dsdt_line(""); dsdt_line("Device (SIO)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0C02\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); SET_FOREACH(lspp, lpc_sysres_set) { lsp = *lspp; switch (lsp->type) { case LPC_SYSRES_IO: dsdt_fixed_ioport(lsp->base, lsp->length); break; case LPC_SYSRES_MEM: dsdt_fixed_mem32(lsp->base, lsp->length); break; } } dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } LPC_DSDT(pci_lpc_sysres_dsdt); static void pci_lpc_uart_dsdt(void) { struct lpc_uart_softc *sc; int unit; for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; if (!sc->enabled) continue; dsdt_line(""); dsdt_line("Device (%s)", lpc_uart_names[unit]); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0501\"))"); dsdt_line(" Name (_UID, %d)", unit + 1); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(sc->iobase, UART_IO_BAR_SIZE); dsdt_fixed_irq(sc->irq); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } } LPC_DSDT(pci_lpc_uart_dsdt); static int pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t val) { int pirq_pin; if (bytes == 1) { pirq_pin = 0; if (coff >= 0x60 && coff <= 0x63) pirq_pin = coff - 0x60 + 1; if (coff >= 0x68 && coff <= 0x6b) pirq_pin = coff - 0x68 + 5; if (pirq_pin != 0) { pirq_write(ctx, pirq_pin, val); pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin)); return (0); } } return (-1); } static void pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { } static uint64_t pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { return (0); } #define LPC_DEV 0x7000 #define LPC_VENDOR 0x8086 static int pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { /* * Do not allow more than one LPC bridge to be configured. */ if (lpc_bridge != NULL) { - fprintf(stderr, "Only one LPC bridge is allowed.\n"); + EPRINTLN("Only one LPC bridge is allowed."); return (-1); } /* * Enforce that the LPC can only be configured on bus 0. This * simplifies the ACPI DSDT because it can provide a decode for * all legacy i/o ports behind bus 0. */ if (pi->pi_bus != 0) { - fprintf(stderr, "LPC bridge can be present only on bus 0.\n"); + EPRINTLN("LPC bridge can be present only on bus 0."); return (-1); } if (lpc_init(ctx) != 0) return (-1); /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV); pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA); lpc_bridge = pi; return (0); } char * lpc_pirq_name(int pin) { char *name; if (lpc_bridge == NULL) return (NULL); asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1); return (name); } void lpc_pirq_routed(void) { int pin; if (lpc_bridge == NULL) return; for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1)); for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5)); } struct pci_devemu pci_de_lpc = { .pe_emu = "lpc", .pe_init = pci_lpc_init, .pe_write_dsdt = pci_lpc_write_dsdt, .pe_cfgwrite = pci_lpc_cfgwrite, .pe_barwrite = pci_lpc_write, .pe_barread = pci_lpc_read }; PCI_EMUL_SET(pci_de_lpc); Index: stable/12/usr.sbin/bhyve/pci_nvme.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_nvme.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_nvme.c (revision 358184) @@ -1,2033 +1,2034 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 Shunsuke Mie * Copyright (c) 2018 Leon Dang * * Function crc16 Copyright (c) 2017, Fedor Uporov * Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * bhyve PCIe-NVMe device emulation. * * options: * -s ,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=# * * accepted devpath: * /dev/blockdev * /path/to/image * ram=size_in_MiB * * maxq = max number of queues * qsz = max elements in each queue * ioslots = max number of concurrent io requests * sectsz = sector size (defaults to blockif sector size) * ser = serial number (20-chars max) * eui64 = IEEE Extended Unique Identifier (8 byte value) * */ /* TODO: - create async event for smart and log - intr coalesce */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "block_if.h" +#include "debug.h" #include "pci_emul.h" static int nvme_debug = 0; -#define DPRINTF(params) if (nvme_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (nvme_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* defaults; can be overridden */ #define NVME_MSIX_BAR 4 #define NVME_IOSLOTS 8 /* The NVMe spec defines bits 13:4 in BAR0 as reserved */ #define NVME_MMIO_SPACE_MIN (1 << 14) #define NVME_QUEUES 16 #define NVME_MAX_QENTRIES 2048 #define NVME_PRP2_ITEMS (PAGE_SIZE/sizeof(uint64_t)) #define NVME_MAX_BLOCKIOVS 512 /* This is a synthetic status code to indicate there is no status */ #define NVME_NO_STATUS 0xffff #define NVME_COMPLETION_VALID(c) ((c).status != NVME_NO_STATUS) /* helpers */ /* Convert a zero-based value into a one-based value */ #define ONE_BASED(zero) ((zero) + 1) /* Convert a one-based value into a zero-based value */ #define ZERO_BASED(one) ((one) - 1) /* Encode number of SQ's and CQ's for Set/Get Features */ #define NVME_FEATURE_NUM_QUEUES(sc) \ (ZERO_BASED((sc)->num_squeues) & 0xffff) | \ (ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16; #define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell) enum nvme_controller_register_offsets { NVME_CR_CAP_LOW = 0x00, NVME_CR_CAP_HI = 0x04, NVME_CR_VS = 0x08, NVME_CR_INTMS = 0x0c, NVME_CR_INTMC = 0x10, NVME_CR_CC = 0x14, NVME_CR_CSTS = 0x1c, NVME_CR_NSSR = 0x20, NVME_CR_AQA = 0x24, NVME_CR_ASQ_LOW = 0x28, NVME_CR_ASQ_HI = 0x2c, NVME_CR_ACQ_LOW = 0x30, NVME_CR_ACQ_HI = 0x34, }; enum nvme_cmd_cdw11 { NVME_CMD_CDW11_PC = 0x0001, NVME_CMD_CDW11_IEN = 0x0002, NVME_CMD_CDW11_IV = 0xFFFF0000, }; #define NVME_CQ_INTEN 0x01 #define NVME_CQ_INTCOAL 0x02 struct nvme_completion_queue { struct nvme_completion *qbase; uint32_t size; uint16_t tail; /* nvme progress */ uint16_t head; /* guest progress */ uint16_t intr_vec; uint32_t intr_en; pthread_mutex_t mtx; }; struct nvme_submission_queue { struct nvme_command *qbase; uint32_t size; uint16_t head; /* nvme progress */ uint16_t tail; /* guest progress */ uint16_t cqid; /* completion queue id */ int busy; /* queue is being processed */ int qpriority; }; enum nvme_storage_type { NVME_STOR_BLOCKIF = 0, NVME_STOR_RAM = 1, }; struct pci_nvme_blockstore { enum nvme_storage_type type; void *ctx; uint64_t size; uint32_t sectsz; uint32_t sectsz_bits; uint64_t eui64; }; struct pci_nvme_ioreq { struct pci_nvme_softc *sc; struct pci_nvme_ioreq *next; struct nvme_submission_queue *nvme_sq; uint16_t sqid; /* command information */ uint16_t opc; uint16_t cid; uint32_t nsid; uint64_t prev_gpaddr; size_t prev_size; /* * lock if all iovs consumed (big IO); * complete transaction before continuing */ pthread_mutex_t mtx; pthread_cond_t cv; struct blockif_req io_req; /* pad to fit up to 512 page descriptors from guest IO request */ struct iovec iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX]; }; struct pci_nvme_softc { struct pci_devinst *nsc_pi; pthread_mutex_t mtx; struct nvme_registers regs; struct nvme_namespace_data nsdata; struct nvme_controller_data ctrldata; struct nvme_error_information_entry err_log; struct nvme_health_information_page health_log; struct nvme_firmware_page fw_log; struct pci_nvme_blockstore nvstore; uint16_t max_qentries; /* max entries per queue */ uint32_t max_queues; /* max number of IO SQ's or CQ's */ uint32_t num_cqueues; uint32_t num_squeues; struct pci_nvme_ioreq *ioreqs; struct pci_nvme_ioreq *ioreqs_free; /* free list of ioreqs */ uint32_t pending_ios; uint32_t ioslots; sem_t iosemlock; /* * Memory mapped Submission and Completion queues * Each array includes both Admin and IO queues */ struct nvme_completion_queue *compl_queues; struct nvme_submission_queue *submit_queues; /* controller features */ uint32_t intr_coales_aggr_time; /* 0x08: uS to delay intr */ uint32_t intr_coales_aggr_thresh; /* 0x08: compl-Q entries */ uint32_t async_ev_config; /* 0x0B: async event config */ }; static void pci_nvme_io_partial(struct blockif_req *br, int err); /* Controller Configuration utils */ #define NVME_CC_GET_EN(cc) \ ((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK) #define NVME_CC_GET_CSS(cc) \ ((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK) #define NVME_CC_GET_SHN(cc) \ ((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK) #define NVME_CC_GET_IOSQES(cc) \ ((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK) #define NVME_CC_GET_IOCQES(cc) \ ((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK) #define NVME_CC_WRITE_MASK \ ((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \ (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \ (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT)) #define NVME_CC_NEN_WRITE_MASK \ ((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \ (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \ (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT)) /* Controller Status utils */ #define NVME_CSTS_GET_RDY(sts) \ ((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK) #define NVME_CSTS_RDY (1 << NVME_CSTS_REG_RDY_SHIFT) /* Completion Queue status word utils */ #define NVME_STATUS_P (1 << NVME_STATUS_P_SHIFT) #define NVME_STATUS_MASK \ ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\ (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT)) static __inline void cpywithpad(char *dst, size_t dst_size, const char *src, char pad) { size_t len; len = strnlen(src, dst_size); memset(dst, pad, dst_size); memcpy(dst, src, len); } static __inline void pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code) { *status &= ~NVME_STATUS_MASK; *status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT | (code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; } static __inline void pci_nvme_status_genc(uint16_t *status, uint16_t code) { pci_nvme_status_tc(status, NVME_SCT_GENERIC, code); } static __inline void pci_nvme_toggle_phase(uint16_t *status, int prev) { if (prev) *status &= ~NVME_STATUS_P; else *status |= NVME_STATUS_P; } static void pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) { struct nvme_controller_data *cd = &sc->ctrldata; cd->vid = 0xFB5D; cd->ssvid = 0x0000; cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' '); cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' '); /* Num of submission commands that we can handle at a time (2^rab) */ cd->rab = 4; /* FreeBSD OUI */ cd->ieee[0] = 0x58; cd->ieee[1] = 0x9c; cd->ieee[2] = 0xfc; cd->mic = 0; cd->mdts = 9; /* max data transfer size (2^mdts * CAP.MPSMIN) */ cd->ver = 0x00010300; cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT; cd->acl = 2; cd->aerl = 4; cd->lpa = 0; /* TODO: support some simple things like SMART */ cd->elpe = 0; /* max error log page entries */ cd->npss = 1; /* number of power states support */ /* Warning Composite Temperature Threshold */ cd->wctemp = 0x0157; cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) | (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT); cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) | (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT); cd->nn = 1; /* number of namespaces */ cd->fna = 0x03; cd->power_state[0].mp = 10; } /* * Calculate the CRC-16 of the given buffer * See copyright attribution at top of file */ static uint16_t crc16(uint16_t crc, const void *buffer, unsigned int len) { const unsigned char *cp = buffer; /* CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1). */ static uint16_t const crc16_table[256] = { 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 }; while (len--) crc = (((crc >> 8) & 0xffU) ^ crc16_table[(crc ^ *cp++) & 0xffU]) & 0x0000ffffU; return crc; } static void pci_nvme_init_nsdata(struct pci_nvme_softc *sc, struct nvme_namespace_data *nd, uint32_t nsid, uint64_t eui64) { nd->nsze = sc->nvstore.size / sc->nvstore.sectsz; nd->ncap = nd->nsze; nd->nuse = nd->nsze; /* Get LBA and backstore information from backing store */ nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */ nd->flbas = 0; /* Create an EUI-64 if user did not provide one */ if (eui64 == 0) { char *data = NULL; asprintf(&data, "%s%u%u%u", vmname, sc->nsc_pi->pi_bus, sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); if (data != NULL) { eui64 = OUI_FREEBSD_NVME_LOW | crc16(0, data, strlen(data)); free(data); } eui64 = (eui64 << 16) | (nsid & 0xffff); } be64enc(nd->eui64, eui64); /* LBA data-sz = 2^lbads */ nd->lbaf[0] = sc->nvstore.sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT; } static void pci_nvme_init_logpages(struct pci_nvme_softc *sc) { memset(&sc->err_log, 0, sizeof(sc->err_log)); memset(&sc->health_log, 0, sizeof(sc->health_log)); memset(&sc->fw_log, 0, sizeof(sc->fw_log)); } static void pci_nvme_reset_locked(struct pci_nvme_softc *sc) { - DPRINTF(("%s\r\n", __func__)); + DPRINTF(("%s", __func__)); sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | (1 << NVME_CAP_LO_REG_CQR_SHIFT) | (60 << NVME_CAP_LO_REG_TO_SHIFT); sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT; sc->regs.vs = 0x00010300; /* NVMe v1.3 */ sc->regs.cc = 0; sc->regs.csts = 0; sc->num_cqueues = sc->num_squeues = sc->max_queues; if (sc->submit_queues != NULL) { for (int i = 0; i < sc->num_squeues + 1; i++) { /* * The Admin Submission Queue is at index 0. * It must not be changed at reset otherwise the * emulation will be out of sync with the guest. */ if (i != 0) { sc->submit_queues[i].qbase = NULL; sc->submit_queues[i].size = 0; sc->submit_queues[i].cqid = 0; } sc->submit_queues[i].tail = 0; sc->submit_queues[i].head = 0; sc->submit_queues[i].busy = 0; } } else sc->submit_queues = calloc(sc->num_squeues + 1, sizeof(struct nvme_submission_queue)); if (sc->compl_queues != NULL) { for (int i = 0; i < sc->num_cqueues + 1; i++) { /* See Admin Submission Queue note above */ if (i != 0) { sc->compl_queues[i].qbase = NULL; sc->compl_queues[i].size = 0; } sc->compl_queues[i].tail = 0; sc->compl_queues[i].head = 0; } } else { sc->compl_queues = calloc(sc->num_cqueues + 1, sizeof(struct nvme_completion_queue)); for (int i = 0; i < sc->num_cqueues + 1; i++) pthread_mutex_init(&sc->compl_queues[i].mtx, NULL); } } static void pci_nvme_reset(struct pci_nvme_softc *sc) { pthread_mutex_lock(&sc->mtx); pci_nvme_reset_locked(sc); pthread_mutex_unlock(&sc->mtx); } static void pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) { uint16_t acqs, asqs; - DPRINTF(("%s\r\n", __func__)); + DPRINTF(("%s", __func__)); asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; sc->submit_queues[0].size = asqs; sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, sizeof(struct nvme_command) * asqs); - DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p\r\n", + DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p", __func__, sc->regs.asq, sc->submit_queues[0].qbase)); acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & NVME_AQA_REG_ACQS_MASK) + 1; sc->compl_queues[0].size = acqs; sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq, sizeof(struct nvme_completion) * acqs); - DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p\r\n", + DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p", __func__, sc->regs.acq, sc->compl_queues[0].qbase)); } static int nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *src, size_t len) { uint8_t *dst; size_t bytes; if (len > (8 * 1024)) { return (-1); } /* Copy from the start of prp1 to the end of the physical page */ bytes = PAGE_SIZE - (prp1 & PAGE_MASK); bytes = MIN(bytes, len); dst = vm_map_gpa(ctx, prp1, bytes); if (dst == NULL) { return (-1); } memcpy(dst, src, bytes); src += bytes; len -= bytes; if (len == 0) { return (0); } len = MIN(len, PAGE_SIZE); dst = vm_map_gpa(ctx, prp2, len); if (dst == NULL) { return (-1); } memcpy(dst, src, len); return (0); } static int nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint16_t qid = command->cdw10 & 0xffff; - DPRINTF(("%s DELETE_IO_SQ %u\r\n", __func__, qid)); + DPRINTF(("%s DELETE_IO_SQ %u", __func__, qid)); if (qid == 0 || qid > sc->num_squeues) { - WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u\r\n", + WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } sc->submit_queues[qid].qbase = NULL; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { if (command->cdw11 & NVME_CMD_CDW11_PC) { uint16_t qid = command->cdw10 & 0xffff; struct nvme_submission_queue *nsq; if ((qid == 0) || (qid > sc->num_squeues)) { - WPRINTF(("%s queue index %u > num_squeues %u\r\n", + WPRINTF(("%s queue index %u > num_squeues %u", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } nsq = &sc->submit_queues[qid]; nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(struct nvme_command) * (size_t)nsq->size); nsq->cqid = (command->cdw11 >> 16) & 0xffff; nsq->qpriority = (command->cdw11 >> 1) & 0x03; - DPRINTF(("%s sq %u size %u gaddr %p cqid %u\r\n", __func__, + DPRINTF(("%s sq %u size %u gaddr %p cqid %u", __func__, qid, nsq->size, nsq->qbase, nsq->cqid)); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); - DPRINTF(("%s completed creating IOSQ qid %u\r\n", + DPRINTF(("%s completed creating IOSQ qid %u", __func__, qid)); } else { /* * Guest sent non-cont submission queue request. * This setting is unsupported by this emulation. */ WPRINTF(("%s unsupported non-contig (list-based) " - "create i/o submission queue\r\n", __func__)); + "create i/o submission queue", __func__)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); } return (1); } static int nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint16_t qid = command->cdw10 & 0xffff; - DPRINTF(("%s DELETE_IO_CQ %u\r\n", __func__, qid)); + DPRINTF(("%s DELETE_IO_CQ %u", __func__, qid)); if (qid == 0 || qid > sc->num_cqueues) { - WPRINTF(("%s queue index %u / num_cqueues %u\r\n", + WPRINTF(("%s queue index %u / num_cqueues %u", __func__, qid, sc->num_cqueues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } sc->compl_queues[qid].qbase = NULL; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { if (command->cdw11 & NVME_CMD_CDW11_PC) { uint16_t qid = command->cdw10 & 0xffff; struct nvme_completion_queue *ncq; if ((qid == 0) || (qid > sc->num_cqueues)) { - WPRINTF(("%s queue index %u > num_cqueues %u\r\n", + WPRINTF(("%s queue index %u > num_cqueues %u", __func__, qid, sc->num_cqueues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } ncq = &sc->compl_queues[qid]; ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(struct nvme_command) * (size_t)ncq->size); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); } else { /* * Non-contig completion queue unsupported. */ WPRINTF(("%s unsupported non-contig (list-based) " - "create i/o completion queue\r\n", + "create i/o completion queue", __func__)); /* 0x12 = Invalid Use of Controller Memory Buffer */ pci_nvme_status_genc(&compl->status, 0x12); } return (1); } static int nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2; uint8_t logpage = command->cdw10 & 0xFF; - DPRINTF(("%s log page %u len %u\r\n", __func__, logpage, logsize)); + DPRINTF(("%s log page %u len %u", __func__, logpage, logsize)); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); switch (logpage) { case NVME_LOG_ERROR: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->err_log, logsize); break; case NVME_LOG_HEALTH_INFORMATION: /* TODO: present some smart info */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->health_log, logsize); break; case NVME_LOG_FIRMWARE_SLOT: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->fw_log, logsize); break; default: - WPRINTF(("%s get log page %x command not supported\r\n", + WPRINTF(("%s get log page %x command not supported", __func__, logpage)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_LOG_PAGE); } return (1); } static int nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { void *dest; - DPRINTF(("%s identify 0x%x nsid 0x%x\r\n", __func__, + DPRINTF(("%s identify 0x%x nsid 0x%x", __func__, command->cdw10 & 0xFF, command->nsid)); switch (command->cdw10 & 0xFF) { case 0x00: /* return Identify Namespace data structure */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata)); break; case 0x01: /* return Identify Controller data structure */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->ctrldata, sizeof(sc->ctrldata)); break; case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(uint32_t) * 1024); ((uint32_t *)dest)[0] = 1; ((uint32_t *)dest)[1] = 0; break; case 0x11: pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_NAMESPACE_OR_FORMAT); return (1); case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */ case 0x10: case 0x12: case 0x13: case 0x14: case 0x15: default: - DPRINTF(("%s unsupported identify command requested 0x%x\r\n", + DPRINTF(("%s unsupported identify command requested 0x%x", __func__, command->cdw10 & 0xFF)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_set_feature_queues(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint16_t nqr; /* Number of Queues Requested */ nqr = command->cdw11 & 0xFFFF; if (nqr == 0xffff) { - WPRINTF(("%s: Illegal NSQR value %#x\n", __func__, nqr)); + WPRINTF(("%s: Illegal NSQR value %#x", __func__, nqr)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (-1); } sc->num_squeues = ONE_BASED(nqr); if (sc->num_squeues > sc->max_queues) { - DPRINTF(("NSQR=%u is greater than max %u\n", sc->num_squeues, + DPRINTF(("NSQR=%u is greater than max %u", sc->num_squeues, sc->max_queues)); sc->num_squeues = sc->max_queues; } nqr = (command->cdw11 >> 16) & 0xFFFF; if (nqr == 0xffff) { - WPRINTF(("%s: Illegal NCQR value %#x\n", __func__, nqr)); + WPRINTF(("%s: Illegal NCQR value %#x", __func__, nqr)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (-1); } sc->num_cqueues = ONE_BASED(nqr); if (sc->num_cqueues > sc->max_queues) { - DPRINTF(("NCQR=%u is greater than max %u\n", sc->num_cqueues, + DPRINTF(("NCQR=%u is greater than max %u", sc->num_cqueues, sc->max_queues)); sc->num_cqueues = sc->max_queues; } compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); return (0); } static int nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { int feature = command->cdw10 & 0xFF; uint32_t iv; - DPRINTF(("%s feature 0x%x\r\n", __func__, feature)); + DPRINTF(("%s feature 0x%x", __func__, feature)); compl->cdw0 = 0; switch (feature) { case NVME_FEAT_ARBITRATION: - DPRINTF((" arbitration 0x%x\r\n", command->cdw11)); + DPRINTF((" arbitration 0x%x", command->cdw11)); break; case NVME_FEAT_POWER_MANAGEMENT: - DPRINTF((" power management 0x%x\r\n", command->cdw11)); + DPRINTF((" power management 0x%x", command->cdw11)); break; case NVME_FEAT_LBA_RANGE_TYPE: - DPRINTF((" lba range 0x%x\r\n", command->cdw11)); + DPRINTF((" lba range 0x%x", command->cdw11)); break; case NVME_FEAT_TEMPERATURE_THRESHOLD: - DPRINTF((" temperature threshold 0x%x\r\n", command->cdw11)); + DPRINTF((" temperature threshold 0x%x", command->cdw11)); break; case NVME_FEAT_ERROR_RECOVERY: - DPRINTF((" error recovery 0x%x\r\n", command->cdw11)); + DPRINTF((" error recovery 0x%x", command->cdw11)); break; case NVME_FEAT_VOLATILE_WRITE_CACHE: - DPRINTF((" volatile write cache 0x%x\r\n", command->cdw11)); + DPRINTF((" volatile write cache 0x%x", command->cdw11)); break; case NVME_FEAT_NUMBER_OF_QUEUES: nvme_set_feature_queues(sc, command, compl); break; case NVME_FEAT_INTERRUPT_COALESCING: - DPRINTF((" interrupt coalescing 0x%x\r\n", command->cdw11)); + DPRINTF((" interrupt coalescing 0x%x", command->cdw11)); /* in uS */ sc->intr_coales_aggr_time = ((command->cdw11 >> 8) & 0xFF)*100; sc->intr_coales_aggr_thresh = command->cdw11 & 0xFF; break; case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: iv = command->cdw11 & 0xFFFF; - DPRINTF((" interrupt vector configuration 0x%x\r\n", + DPRINTF((" interrupt vector configuration 0x%x", command->cdw11)); for (uint32_t i = 0; i < sc->num_cqueues + 1; i++) { if (sc->compl_queues[i].intr_vec == iv) { if (command->cdw11 & (1 << 16)) sc->compl_queues[i].intr_en |= NVME_CQ_INTCOAL; else sc->compl_queues[i].intr_en &= ~NVME_CQ_INTCOAL; } } break; case NVME_FEAT_WRITE_ATOMICITY: - DPRINTF((" write atomicity 0x%x\r\n", command->cdw11)); + DPRINTF((" write atomicity 0x%x", command->cdw11)); break; case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: - DPRINTF((" async event configuration 0x%x\r\n", + DPRINTF((" async event configuration 0x%x", command->cdw11)); sc->async_ev_config = command->cdw11; break; case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: - DPRINTF((" software progress marker 0x%x\r\n", + DPRINTF((" software progress marker 0x%x", command->cdw11)); break; case 0x0C: - DPRINTF((" autonomous power state transition 0x%x\r\n", + DPRINTF((" autonomous power state transition 0x%x", command->cdw11)); break; default: - WPRINTF(("%s invalid feature\r\n", __func__)); + WPRINTF(("%s invalid feature", __func__)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { int feature = command->cdw10 & 0xFF; - DPRINTF(("%s feature 0x%x\r\n", __func__, feature)); + DPRINTF(("%s feature 0x%x", __func__, feature)); compl->cdw0 = 0; switch (feature) { case NVME_FEAT_ARBITRATION: - DPRINTF((" arbitration\r\n")); + DPRINTF((" arbitration")); break; case NVME_FEAT_POWER_MANAGEMENT: - DPRINTF((" power management\r\n")); + DPRINTF((" power management")); break; case NVME_FEAT_LBA_RANGE_TYPE: - DPRINTF((" lba range\r\n")); + DPRINTF((" lba range")); break; case NVME_FEAT_TEMPERATURE_THRESHOLD: - DPRINTF((" temperature threshold\r\n")); + DPRINTF((" temperature threshold")); switch ((command->cdw11 >> 20) & 0x3) { case 0: /* Over temp threshold */ compl->cdw0 = 0xFFFF; break; case 1: /* Under temp threshold */ compl->cdw0 = 0; break; default: - WPRINTF((" invalid threshold type select\r\n")); + WPRINTF((" invalid threshold type select")); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } break; case NVME_FEAT_ERROR_RECOVERY: - DPRINTF((" error recovery\r\n")); + DPRINTF((" error recovery")); break; case NVME_FEAT_VOLATILE_WRITE_CACHE: - DPRINTF((" volatile write cache\r\n")); + DPRINTF((" volatile write cache")); break; case NVME_FEAT_NUMBER_OF_QUEUES: compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); - DPRINTF((" number of queues (submit %u, completion %u)\r\n", + DPRINTF((" number of queues (submit %u, completion %u)", compl->cdw0 & 0xFFFF, (compl->cdw0 >> 16) & 0xFFFF)); break; case NVME_FEAT_INTERRUPT_COALESCING: - DPRINTF((" interrupt coalescing\r\n")); + DPRINTF((" interrupt coalescing")); break; case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: - DPRINTF((" interrupt vector configuration\r\n")); + DPRINTF((" interrupt vector configuration")); break; case NVME_FEAT_WRITE_ATOMICITY: - DPRINTF((" write atomicity\r\n")); + DPRINTF((" write atomicity")); break; case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: - DPRINTF((" async event configuration\r\n")); + DPRINTF((" async event configuration")); sc->async_ev_config = command->cdw11; break; case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: - DPRINTF((" software progress marker\r\n")); + DPRINTF((" software progress marker")); break; case 0x0C: - DPRINTF((" autonomous power state transition\r\n")); + DPRINTF((" autonomous power state transition")); break; default: - WPRINTF(("%s invalid feature 0x%x\r\n", __func__, feature)); + WPRINTF(("%s invalid feature 0x%x", __func__, feature)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - DPRINTF(("%s submission queue %u, command ID 0x%x\r\n", __func__, + DPRINTF(("%s submission queue %u, command ID 0x%x", __func__, command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF)); /* TODO: search for the command ID and abort it */ compl->cdw0 = 1; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_async_event_req(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - DPRINTF(("%s async event request 0x%x\r\n", __func__, command->cdw11)); + DPRINTF(("%s async event request 0x%x", __func__, command->cdw11)); /* * TODO: raise events when they happen based on the Set Features cmd. * These events happen async, so only set completion successful if * there is an event reflective of the request to get event. */ pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); return (0); } static void pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) { struct nvme_completion compl; struct nvme_command *cmd; struct nvme_submission_queue *sq; struct nvme_completion_queue *cq; int do_intr = 0; uint16_t sqhead; - DPRINTF(("%s index %u\r\n", __func__, (uint32_t)value)); + DPRINTF(("%s index %u", __func__, (uint32_t)value)); sq = &sc->submit_queues[0]; sqhead = atomic_load_acq_short(&sq->head); if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s SQ busy, head %u, tail %u\r\n", + DPRINTF(("%s SQ busy, head %u, tail %u", __func__, sqhead, sq->tail)); return; } - DPRINTF(("sqhead %u, tail %u\r\n", sqhead, sq->tail)); + DPRINTF(("sqhead %u, tail %u", sqhead, sq->tail)); while (sqhead != atomic_load_acq_short(&sq->tail)) { cmd = &(sq->qbase)[sqhead]; compl.status = 0; switch (cmd->opc) { case NVME_OPC_DELETE_IO_SQ: - DPRINTF(("%s command DELETE_IO_SQ\r\n", __func__)); + DPRINTF(("%s command DELETE_IO_SQ", __func__)); do_intr |= nvme_opc_delete_io_sq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_SQ: - DPRINTF(("%s command CREATE_IO_SQ\r\n", __func__)); + DPRINTF(("%s command CREATE_IO_SQ", __func__)); do_intr |= nvme_opc_create_io_sq(sc, cmd, &compl); break; case NVME_OPC_DELETE_IO_CQ: - DPRINTF(("%s command DELETE_IO_CQ\r\n", __func__)); + DPRINTF(("%s command DELETE_IO_CQ", __func__)); do_intr |= nvme_opc_delete_io_cq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_CQ: - DPRINTF(("%s command CREATE_IO_CQ\r\n", __func__)); + DPRINTF(("%s command CREATE_IO_CQ", __func__)); do_intr |= nvme_opc_create_io_cq(sc, cmd, &compl); break; case NVME_OPC_GET_LOG_PAGE: - DPRINTF(("%s command GET_LOG_PAGE\r\n", __func__)); + DPRINTF(("%s command GET_LOG_PAGE", __func__)); do_intr |= nvme_opc_get_log_page(sc, cmd, &compl); break; case NVME_OPC_IDENTIFY: - DPRINTF(("%s command IDENTIFY\r\n", __func__)); + DPRINTF(("%s command IDENTIFY", __func__)); do_intr |= nvme_opc_identify(sc, cmd, &compl); break; case NVME_OPC_ABORT: - DPRINTF(("%s command ABORT\r\n", __func__)); + DPRINTF(("%s command ABORT", __func__)); do_intr |= nvme_opc_abort(sc, cmd, &compl); break; case NVME_OPC_SET_FEATURES: - DPRINTF(("%s command SET_FEATURES\r\n", __func__)); + DPRINTF(("%s command SET_FEATURES", __func__)); do_intr |= nvme_opc_set_features(sc, cmd, &compl); break; case NVME_OPC_GET_FEATURES: - DPRINTF(("%s command GET_FEATURES\r\n", __func__)); + DPRINTF(("%s command GET_FEATURES", __func__)); do_intr |= nvme_opc_get_features(sc, cmd, &compl); break; case NVME_OPC_ASYNC_EVENT_REQUEST: - DPRINTF(("%s command ASYNC_EVENT_REQ\r\n", __func__)); + DPRINTF(("%s command ASYNC_EVENT_REQ", __func__)); /* XXX dont care, unhandled for now do_intr |= nvme_opc_async_event_req(sc, cmd, &compl); */ compl.status = NVME_NO_STATUS; break; default: - WPRINTF(("0x%x command is not implemented\r\n", + WPRINTF(("0x%x command is not implemented", cmd->opc)); pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); do_intr |= 1; } if (NVME_COMPLETION_VALID(compl)) { struct nvme_completion *cp; int phase; cq = &sc->compl_queues[0]; cp = &(cq->qbase)[cq->tail]; cp->cdw0 = compl.cdw0; cp->sqid = 0; cp->sqhd = sqhead; cp->cid = cmd->cid; phase = NVME_STATUS_GET_P(cp->status); cp->status = compl.status; pci_nvme_toggle_phase(&cp->status, phase); cq->tail = (cq->tail + 1) % cq->size; } sqhead = (sqhead + 1) % sq->size; } - DPRINTF(("setting sqhead %u\r\n", sqhead)); + DPRINTF(("setting sqhead %u", sqhead)); atomic_store_short(&sq->head, sqhead); atomic_store_int(&sq->busy, 0); if (do_intr) pci_generate_msix(sc->nsc_pi, 0); } static int pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, uint64_t gpaddr, size_t size, int do_write, uint64_t lba) { int iovidx; if (req != NULL) { /* concatenate contig block-iovs to minimize number of iovs */ if ((req->prev_gpaddr + req->prev_size) == gpaddr) { iovidx = req->io_req.br_iovcnt - 1; req->io_req.br_iov[iovidx].iov_base = paddr_guest2host(req->sc->nsc_pi->pi_vmctx, req->prev_gpaddr, size); req->prev_size += size; req->io_req.br_resid += size; req->io_req.br_iov[iovidx].iov_len = req->prev_size; } else { pthread_mutex_lock(&req->mtx); iovidx = req->io_req.br_iovcnt; if (iovidx == NVME_MAX_BLOCKIOVS) { int err = 0; - DPRINTF(("large I/O, doing partial req\r\n")); + DPRINTF(("large I/O, doing partial req")); iovidx = 0; req->io_req.br_iovcnt = 0; req->io_req.br_callback = pci_nvme_io_partial; if (!do_write) err = blockif_read(sc->nvstore.ctx, &req->io_req); else err = blockif_write(sc->nvstore.ctx, &req->io_req); /* wait until req completes before cont */ if (err == 0) pthread_cond_wait(&req->cv, &req->mtx); } if (iovidx == 0) { req->io_req.br_offset = lba; req->io_req.br_resid = 0; req->io_req.br_param = req; } req->io_req.br_iov[iovidx].iov_base = paddr_guest2host(req->sc->nsc_pi->pi_vmctx, gpaddr, size); req->io_req.br_iov[iovidx].iov_len = size; req->prev_gpaddr = gpaddr; req->prev_size = size; req->io_req.br_resid += size; req->io_req.br_iovcnt++; pthread_mutex_unlock(&req->mtx); } } else { /* RAM buffer: read/write directly */ void *p = sc->nvstore.ctx; void *gptr; if ((lba + size) > sc->nvstore.size) { - WPRINTF(("%s write would overflow RAM\r\n", __func__)); + WPRINTF(("%s write would overflow RAM", __func__)); return (-1); } p = (void *)((uintptr_t)p + (uintptr_t)lba); gptr = paddr_guest2host(sc->nsc_pi->pi_vmctx, gpaddr, size); if (do_write) memcpy(p, gptr, size); else memcpy(gptr, p, size); } return (0); } static void pci_nvme_set_completion(struct pci_nvme_softc *sc, struct nvme_submission_queue *sq, int sqid, uint16_t cid, uint32_t cdw0, uint16_t status, int ignore_busy) { struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; struct nvme_completion *compl; int do_intr = 0; int phase; - DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x\r\n", + DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x", __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status), NVME_STATUS_GET_SC(status))); pthread_mutex_lock(&cq->mtx); assert(cq->qbase != NULL); compl = &cq->qbase[cq->tail]; compl->sqhd = atomic_load_acq_short(&sq->head); compl->sqid = sqid; compl->cid = cid; // toggle phase phase = NVME_STATUS_GET_P(compl->status); compl->status = status; pci_nvme_toggle_phase(&compl->status, phase); cq->tail = (cq->tail + 1) % cq->size; if (cq->intr_en & NVME_CQ_INTEN) do_intr = 1; pthread_mutex_unlock(&cq->mtx); if (ignore_busy || !atomic_load_acq_int(&sq->busy)) if (do_intr) pci_generate_msix(sc->nsc_pi, cq->intr_vec); } static void pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req) { req->sc = NULL; req->nvme_sq = NULL; req->sqid = 0; pthread_mutex_lock(&sc->mtx); req->next = sc->ioreqs_free; sc->ioreqs_free = req; sc->pending_ios--; /* when no more IO pending, can set to ready if device reset/enabled */ if (sc->pending_ios == 0 && NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts))) sc->regs.csts |= NVME_CSTS_RDY; pthread_mutex_unlock(&sc->mtx); sem_post(&sc->iosemlock); } static struct pci_nvme_ioreq * pci_nvme_get_ioreq(struct pci_nvme_softc *sc) { struct pci_nvme_ioreq *req = NULL;; sem_wait(&sc->iosemlock); pthread_mutex_lock(&sc->mtx); req = sc->ioreqs_free; assert(req != NULL); sc->ioreqs_free = req->next; req->next = NULL; req->sc = sc; sc->pending_ios++; pthread_mutex_unlock(&sc->mtx); req->io_req.br_iovcnt = 0; req->io_req.br_offset = 0; req->io_req.br_resid = 0; req->io_req.br_param = req; req->prev_gpaddr = 0; req->prev_size = 0; return req; } static void pci_nvme_io_done(struct blockif_req *br, int err) { struct pci_nvme_ioreq *req = br->br_param; struct nvme_submission_queue *sq = req->nvme_sq; uint16_t code, status; - DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err))); + DPRINTF(("%s error %d %s", __func__, err, strerror(err))); /* TODO return correct error */ code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status, 0); pci_nvme_release_ioreq(req->sc, req); } static void pci_nvme_io_partial(struct blockif_req *br, int err) { struct pci_nvme_ioreq *req = br->br_param; - DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err))); + DPRINTF(("%s error %d %s", __func__, err, strerror(err))); pthread_cond_signal(&req->cv); } static void pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) { struct nvme_submission_queue *sq; uint16_t status; uint16_t sqhead; int err; /* handle all submissions up to sq->tail index */ sq = &sc->submit_queues[idx]; if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s sqid %u busy\r\n", __func__, idx)); + DPRINTF(("%s sqid %u busy", __func__, idx)); return; } sqhead = atomic_load_acq_short(&sq->head); - DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p\r\n", + DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p", idx, sqhead, sq->tail, sq->qbase)); while (sqhead != atomic_load_acq_short(&sq->tail)) { struct nvme_command *cmd; struct pci_nvme_ioreq *req = NULL; uint64_t lba; uint64_t nblocks, bytes, size, cpsz; /* TODO: support scatter gather list handling */ cmd = &sq->qbase[sqhead]; sqhead = (sqhead + 1) % sq->size; lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; if (cmd->opc == NVME_OPC_FLUSH) { pci_nvme_status_genc(&status, NVME_SC_SUCCESS); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, status, 1); continue; } else if (cmd->opc == 0x08) { /* TODO: write zeroes */ - WPRINTF(("%s write zeroes lba 0x%lx blocks %u\r\n", + WPRINTF(("%s write zeroes lba 0x%lx blocks %u", __func__, lba, cmd->cdw12 & 0xFFFF)); pci_nvme_status_genc(&status, NVME_SC_SUCCESS); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, status, 1); continue; } nblocks = (cmd->cdw12 & 0xFFFF) + 1; bytes = nblocks * sc->nvstore.sectsz; if (sc->nvstore.type == NVME_STOR_BLOCKIF) { req = pci_nvme_get_ioreq(sc); req->nvme_sq = sq; req->sqid = idx; } /* * If data starts mid-page and flows into the next page, then * increase page count */ DPRINTF(("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu " - "(%lu-bytes)\r\n", + "(%lu-bytes)", sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size, cmd->opc == NVME_OPC_WRITE ? "WRITE" : "READ", lba, nblocks, bytes)); cmd->prp1 &= ~(0x03UL); cmd->prp2 &= ~(0x03UL); - DPRINTF((" prp1 0x%lx prp2 0x%lx\r\n", cmd->prp1, cmd->prp2)); + DPRINTF((" prp1 0x%lx prp2 0x%lx", cmd->prp1, cmd->prp2)); size = bytes; lba *= sc->nvstore.sectsz; cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE); if (cpsz > bytes) cpsz = bytes; if (req != NULL) { req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; req->opc = cmd->opc; req->cid = cmd->cid; req->nsid = cmd->nsid; } err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz, cmd->opc == NVME_OPC_WRITE, lba); lba += cpsz; size -= cpsz; if (size == 0) goto iodone; if (size <= PAGE_SIZE) { /* prp2 is second (and final) page in transfer */ err = pci_nvme_append_iov_req(sc, req, cmd->prp2, size, cmd->opc == NVME_OPC_WRITE, lba); } else { uint64_t *prp_list; int i; /* prp2 is pointer to a physical region page list */ prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx, cmd->prp2, PAGE_SIZE); i = 0; while (size != 0) { cpsz = MIN(size, PAGE_SIZE); /* * Move to linked physical region page list * in last item. */ if (i == (NVME_PRP2_ITEMS-1) && size > PAGE_SIZE) { assert((prp_list[i] & (PAGE_SIZE-1)) == 0); prp_list = paddr_guest2host( sc->nsc_pi->pi_vmctx, prp_list[i], PAGE_SIZE); i = 0; } if (prp_list[i] == 0) { - WPRINTF(("PRP2[%d] = 0 !!!\r\n", i)); + WPRINTF(("PRP2[%d] = 0 !!!", i)); err = 1; break; } err = pci_nvme_append_iov_req(sc, req, prp_list[i], cpsz, cmd->opc == NVME_OPC_WRITE, lba); if (err) break; lba += cpsz; size -= cpsz; i++; } } iodone: if (sc->nvstore.type == NVME_STOR_RAM) { uint16_t code, status; code = err ? NVME_SC_LBA_OUT_OF_RANGE : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, status, 1); continue; } if (err) goto do_error; req->io_req.br_callback = pci_nvme_io_done; err = 0; switch (cmd->opc) { case NVME_OPC_READ: err = blockif_read(sc->nvstore.ctx, &req->io_req); break; case NVME_OPC_WRITE: err = blockif_write(sc->nvstore.ctx, &req->io_req); break; default: - WPRINTF(("%s unhandled io command 0x%x\r\n", + WPRINTF(("%s unhandled io command 0x%x", __func__, cmd->opc)); err = 1; } do_error: if (err) { uint16_t status; pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, status, 1); pci_nvme_release_ioreq(sc, req); } } atomic_store_short(&sq->head, sqhead); atomic_store_int(&sq->busy, 0); } static void pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, uint64_t idx, int is_sq, uint64_t value) { - DPRINTF(("nvme doorbell %lu, %s, val 0x%lx\r\n", + DPRINTF(("nvme doorbell %lu, %s, val 0x%lx", idx, is_sq ? "SQ" : "CQ", value & 0xFFFF)); if (is_sq) { atomic_store_short(&sc->submit_queues[idx].tail, (uint16_t)value); if (idx == 0) { pci_nvme_handle_admin_cmd(sc, value); } else { /* submission queue; handle new entries in SQ */ if (idx > sc->num_squeues) { WPRINTF(("%s SQ index %lu overflow from " - "guest (max %u)\r\n", + "guest (max %u)", __func__, idx, sc->num_squeues)); return; } pci_nvme_handle_io_cmd(sc, (uint16_t)idx); } } else { if (idx > sc->num_cqueues) { WPRINTF(("%s queue index %lu overflow from " - "guest (max %u)\r\n", + "guest (max %u)", __func__, idx, sc->num_cqueues)); return; } sc->compl_queues[idx].head = (uint16_t)value; } } static void pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite) { const char *s = iswrite ? "WRITE" : "READ"; switch (offset) { case NVME_CR_CAP_LOW: - DPRINTF(("%s %s NVME_CR_CAP_LOW\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CAP_LOW", func, s)); break; case NVME_CR_CAP_HI: - DPRINTF(("%s %s NVME_CR_CAP_HI\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CAP_HI", func, s)); break; case NVME_CR_VS: - DPRINTF(("%s %s NVME_CR_VS\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_VS", func, s)); break; case NVME_CR_INTMS: - DPRINTF(("%s %s NVME_CR_INTMS\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_INTMS", func, s)); break; case NVME_CR_INTMC: - DPRINTF(("%s %s NVME_CR_INTMC\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_INTMC", func, s)); break; case NVME_CR_CC: - DPRINTF(("%s %s NVME_CR_CC\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CC", func, s)); break; case NVME_CR_CSTS: - DPRINTF(("%s %s NVME_CR_CSTS\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CSTS", func, s)); break; case NVME_CR_NSSR: - DPRINTF(("%s %s NVME_CR_NSSR\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_NSSR", func, s)); break; case NVME_CR_AQA: - DPRINTF(("%s %s NVME_CR_AQA\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_AQA", func, s)); break; case NVME_CR_ASQ_LOW: - DPRINTF(("%s %s NVME_CR_ASQ_LOW\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ASQ_LOW", func, s)); break; case NVME_CR_ASQ_HI: - DPRINTF(("%s %s NVME_CR_ASQ_HI\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ASQ_HI", func, s)); break; case NVME_CR_ACQ_LOW: - DPRINTF(("%s %s NVME_CR_ACQ_LOW\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ACQ_LOW", func, s)); break; case NVME_CR_ACQ_HI: - DPRINTF(("%s %s NVME_CR_ACQ_HI\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ACQ_HI", func, s)); break; default: - DPRINTF(("unknown nvme bar-0 offset 0x%lx\r\n", offset)); + DPRINTF(("unknown nvme bar-0 offset 0x%lx", offset)); } } static void pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, uint64_t offset, int size, uint64_t value) { uint32_t ccreg; if (offset >= NVME_DOORBELL_OFFSET) { uint64_t belloffset = offset - NVME_DOORBELL_OFFSET; uint64_t idx = belloffset / 8; /* door bell size = 2*int */ int is_sq = (belloffset % 8) < 4; if (belloffset > ((sc->max_queues+1) * 8 - 4)) { WPRINTF(("guest attempted an overflow write offset " "0x%lx, val 0x%lx in %s", offset, value, __func__)); return; } pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value); return; } - DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx\r\n", + DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx", offset, size, value)); if (size != 4) { WPRINTF(("guest wrote invalid size %d (offset 0x%lx, " "val 0x%lx) to bar0 in %s", size, offset, value, __func__)); /* TODO: shutdown device */ return; } pci_nvme_bar0_reg_dumps(__func__, offset, 1); pthread_mutex_lock(&sc->mtx); switch (offset) { case NVME_CR_CAP_LOW: case NVME_CR_CAP_HI: /* readonly */ break; case NVME_CR_VS: /* readonly */ break; case NVME_CR_INTMS: /* MSI-X, so ignore */ break; case NVME_CR_INTMC: /* MSI-X, so ignore */ break; case NVME_CR_CC: ccreg = (uint32_t)value; DPRINTF(("%s NVME_CR_CC en %x css %x shn %x iosqes %u " - "iocqes %u\r\n", + "iocqes %u", __func__, NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg), NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg), NVME_CC_GET_IOCQES(ccreg))); if (NVME_CC_GET_SHN(ccreg)) { /* perform shutdown - flush out data to backend */ sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK << NVME_CSTS_REG_SHST_SHIFT); sc->regs.csts |= NVME_SHST_COMPLETE << NVME_CSTS_REG_SHST_SHIFT; } if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) { if (NVME_CC_GET_EN(ccreg) == 0) /* transition 1-> causes controller reset */ pci_nvme_reset_locked(sc); else pci_nvme_init_controller(ctx, sc); } /* Insert the iocqes, iosqes and en bits from the write */ sc->regs.cc &= ~NVME_CC_WRITE_MASK; sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK; if (NVME_CC_GET_EN(ccreg) == 0) { /* Insert the ams, mps and css bit fields */ sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK; sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK; sc->regs.csts &= ~NVME_CSTS_RDY; } else if (sc->pending_ios == 0) { sc->regs.csts |= NVME_CSTS_RDY; } break; case NVME_CR_CSTS: break; case NVME_CR_NSSR: /* ignore writes; don't support subsystem reset */ break; case NVME_CR_AQA: sc->regs.aqa = (uint32_t)value; break; case NVME_CR_ASQ_LOW: sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) | (0xFFFFF000 & value); break; case NVME_CR_ASQ_HI: sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) | (value << 32); break; case NVME_CR_ACQ_LOW: sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) | (0xFFFFF000 & value); break; case NVME_CR_ACQ_HI: sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) | (value << 32); break; default: - DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d\r\n", + DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d", __func__, offset, value, size)); } pthread_mutex_unlock(&sc->mtx); } static void pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct pci_nvme_softc* sc = pi->pi_arg; if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { DPRINTF(("nvme-write baridx %d, msix: off 0x%lx, size %d, " - " value 0x%lx\r\n", baridx, offset, size, value)); + " value 0x%lx", baridx, offset, size, value)); pci_emul_msix_twrite(pi, offset, size, value); return; } switch (baridx) { case 0: pci_nvme_write_bar_0(ctx, sc, offset, size, value); break; default: - DPRINTF(("%s unknown baridx %d, val 0x%lx\r\n", + DPRINTF(("%s unknown baridx %d, val 0x%lx", __func__, baridx, value)); } } static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, uint64_t offset, int size) { uint64_t value; pci_nvme_bar0_reg_dumps(__func__, offset, 0); if (offset < NVME_DOORBELL_OFFSET) { void *p = &(sc->regs); pthread_mutex_lock(&sc->mtx); memcpy(&value, (void *)((uintptr_t)p + offset), size); pthread_mutex_unlock(&sc->mtx); } else { value = 0; - WPRINTF(("pci_nvme: read invalid offset %ld\r\n", offset)); + WPRINTF(("pci_nvme: read invalid offset %ld", offset)); } switch (size) { case 1: value &= 0xFF; break; case 2: value &= 0xFFFF; break; case 4: value &= 0xFFFFFFFF; break; } - DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x\r\n", + DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x", offset, size, (uint32_t)value)); return (value); } static uint64_t pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_nvme_softc* sc = pi->pi_arg; if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { - DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d\r\n", + DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d", baridx, offset, size)); return pci_emul_msix_tread(pi, offset, size); } switch (baridx) { case 0: return pci_nvme_read_bar_0(sc, offset, size); default: - DPRINTF(("unknown bar %d, 0x%lx\r\n", baridx, offset)); + DPRINTF(("unknown bar %d, 0x%lx", baridx, offset)); } return (0); } static int pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) { char bident[sizeof("XX:X:X")]; char *uopt, *xopts, *config; uint32_t sectsz; int optidx; sc->max_queues = NVME_QUEUES; sc->max_qentries = NVME_MAX_QENTRIES; sc->ioslots = NVME_IOSLOTS; sc->num_squeues = sc->max_queues; sc->num_cqueues = sc->max_queues; sectsz = 0; uopt = strdup(opts); optidx = 0; snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn), "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); for (xopts = strtok(uopt, ","); xopts != NULL; xopts = strtok(NULL, ",")) { if ((config = strchr(xopts, '=')) != NULL) *config++ = '\0'; if (!strcmp("maxq", xopts)) { sc->max_queues = atoi(config); } else if (!strcmp("qsz", xopts)) { sc->max_qentries = atoi(config); } else if (!strcmp("ioslots", xopts)) { sc->ioslots = atoi(config); } else if (!strcmp("sectsz", xopts)) { sectsz = atoi(config); } else if (!strcmp("ser", xopts)) { /* * This field indicates the Product Serial Number in * 7-bit ASCII, unused bytes should be space characters. * Ref: NVMe v1.3c. */ cpywithpad((char *)sc->ctrldata.sn, sizeof(sc->ctrldata.sn), config, ' '); } else if (!strcmp("ram", xopts)) { uint64_t sz = strtoull(&xopts[4], NULL, 10); sc->nvstore.type = NVME_STOR_RAM; sc->nvstore.size = sz * 1024 * 1024; sc->nvstore.ctx = calloc(1, sc->nvstore.size); sc->nvstore.sectsz = 4096; sc->nvstore.sectsz_bits = 12; if (sc->nvstore.ctx == NULL) { perror("Unable to allocate RAM"); free(uopt); return (-1); } } else if (!strcmp("eui64", xopts)) { sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0)); } else if (optidx == 0) { snprintf(bident, sizeof(bident), "%d:%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); sc->nvstore.ctx = blockif_open(xopts, bident); if (sc->nvstore.ctx == NULL) { perror("Could not open backing file"); free(uopt); return (-1); } sc->nvstore.type = NVME_STOR_BLOCKIF; sc->nvstore.size = blockif_size(sc->nvstore.ctx); } else { - fprintf(stderr, "Invalid option %s\n", xopts); + EPRINTLN("Invalid option %s", xopts); free(uopt); return (-1); } optidx++; } free(uopt); if (sc->nvstore.ctx == NULL || sc->nvstore.size == 0) { - fprintf(stderr, "backing store not specified\n"); + EPRINTLN("backing store not specified"); return (-1); } if (sectsz == 512 || sectsz == 4096 || sectsz == 8192) sc->nvstore.sectsz = sectsz; else if (sc->nvstore.type != NVME_STOR_RAM) sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx); for (sc->nvstore.sectsz_bits = 9; (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz; sc->nvstore.sectsz_bits++); if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES) sc->max_queues = NVME_QUEUES; if (sc->max_qentries <= 0) { - fprintf(stderr, "Invalid qsz option\n"); + EPRINTLN("Invalid qsz option"); return (-1); } if (sc->ioslots <= 0) { - fprintf(stderr, "Invalid ioslots option\n"); + EPRINTLN("Invalid ioslots option"); return (-1); } return (0); } static int pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_nvme_softc *sc; uint32_t pci_membar_sz; int error; error = 0; sc = calloc(1, sizeof(struct pci_nvme_softc)); pi->pi_arg = sc; sc->nsc_pi = pi; error = pci_nvme_parse_opts(sc, opts); if (error < 0) goto done; else error = 0; sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq)); for (int i = 0; i < sc->ioslots; i++) { if (i < (sc->ioslots-1)) sc->ioreqs[i].next = &sc->ioreqs[i+1]; pthread_mutex_init(&sc->ioreqs[i].mtx, NULL); pthread_cond_init(&sc->ioreqs[i].cv, NULL); } sc->ioreqs_free = sc->ioreqs; sc->intr_coales_aggr_thresh = 1; pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A); pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM); pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0); /* * Allocate size of NVMe registers + doorbell space for all queues. * * The specification requires a minimum memory I/O window size of 16K. * The Windows driver will refuse to start a device with a smaller * window. */ pci_membar_sz = sizeof(struct nvme_registers) + 2 * sizeof(uint32_t) * (sc->max_queues + 1); pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN); - DPRINTF(("nvme membar size: %u\r\n", pci_membar_sz)); + DPRINTF(("nvme membar size: %u", pci_membar_sz)); error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz); if (error) { - WPRINTF(("%s pci alloc mem bar failed\r\n", __func__)); + WPRINTF(("%s pci alloc mem bar failed", __func__)); goto done; } error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR); if (error) { - WPRINTF(("%s pci add msixcap failed\r\n", __func__)); + WPRINTF(("%s pci add msixcap failed", __func__)); goto done; } error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP); if (error) { - WPRINTF(("%s pci add Express capability failed\r\n", __func__)); + WPRINTF(("%s pci add Express capability failed", __func__)); goto done; } pthread_mutex_init(&sc->mtx, NULL); sem_init(&sc->iosemlock, 0, sc->ioslots); pci_nvme_reset(sc); pci_nvme_init_ctrldata(sc); pci_nvme_init_nsdata(sc, &sc->nsdata, 1, sc->nvstore.eui64); pci_nvme_init_logpages(sc); pci_lintr_request(pi); done: return (error); } struct pci_devemu pci_de_nvme = { .pe_emu = "nvme", .pe_init = pci_nvme_init, .pe_barwrite = pci_nvme_write, .pe_barread = pci_nvme_read }; PCI_EMUL_SET(pci_de_nvme); Index: stable/12/usr.sbin/bhyve/pci_uart.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_uart.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_uart.c (revision 358184) @@ -1,121 +1,122 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "uart_emul.h" /* * Pick a PCI vid/did of a chip with a single uart at * BAR0, that most versions of FreeBSD can understand: * Siig CyberSerial 1-port. */ #define COM_VENDOR 0x131f #define COM_DEV 0x2000 static void pci_uart_intr_assert(void *arg) { struct pci_devinst *pi = arg; pci_lintr_assert(pi); } static void pci_uart_intr_deassert(void *arg) { struct pci_devinst *pi = arg; pci_lintr_deassert(pi); } static void pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { assert(baridx == 0); assert(size == 1); uart_write(pi->pi_arg, offset, value); } uint64_t pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { uint8_t val; assert(baridx == 0); assert(size == 1); val = uart_read(pi->pi_arg, offset); return (val); } static int pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct uart_softc *sc; pci_emul_alloc_bar(pi, 0, PCIBAR_IO, UART_IO_BAR_SIZE); pci_lintr_request(pi); /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV); pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM); sc = uart_init(pci_uart_intr_assert, pci_uart_intr_deassert, pi); pi->pi_arg = sc; if (uart_set_backend(sc, opts) != 0) { - fprintf(stderr, "Unable to initialize backend '%s' for " - "pci uart at %d:%d\n", opts, pi->pi_slot, pi->pi_func); + EPRINTLN("Unable to initialize backend '%s' for " + "pci uart at %d:%d", opts, pi->pi_slot, pi->pi_func); return (-1); } return (0); } struct pci_devemu pci_de_com = { .pe_emu = "uart", .pe_init = pci_uart_init, .pe_barwrite = pci_uart_write, .pe_barread = pci_uart_read }; PCI_EMUL_SET(pci_de_com); Index: stable/12/usr.sbin/bhyve/pci_virtio_block.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_virtio_block.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_virtio_block.c (revision 358184) @@ -1,424 +1,425 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * Copyright (c) 2019 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "block_if.h" #define VTBLK_RINGSZ 128 _Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request"); #define VTBLK_S_OK 0 #define VTBLK_S_IOERR 1 #define VTBLK_S_UNSUPP 2 #define VTBLK_BLK_ID_BYTES 20 + 1 /* Capability bits */ #define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ #define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */ #define VTBLK_F_FLUSH (1 << 9) /* Cache flush support */ #define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */ /* * Host capabilities */ #define VTBLK_S_HOSTCAPS \ ( VTBLK_F_SEG_MAX | \ VTBLK_F_BLK_SIZE | \ VTBLK_F_FLUSH | \ VTBLK_F_TOPOLOGY | \ VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ /* * Config space "registers" */ struct vtblk_config { uint64_t vbc_capacity; uint32_t vbc_size_max; uint32_t vbc_seg_max; struct { uint16_t cylinders; uint8_t heads; uint8_t sectors; } vbc_geometry; uint32_t vbc_blk_size; struct { uint8_t physical_block_exp; uint8_t alignment_offset; uint16_t min_io_size; uint32_t opt_io_size; } vbc_topology; uint8_t vbc_writeback; } __packed; /* * Fixed-size block header */ struct virtio_blk_hdr { #define VBH_OP_READ 0 #define VBH_OP_WRITE 1 #define VBH_OP_FLUSH 4 #define VBH_OP_FLUSH_OUT 5 #define VBH_OP_IDENT 8 #define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ uint32_t vbh_type; uint32_t vbh_ioprio; uint64_t vbh_sector; } __packed; /* * Debug printf */ static int pci_vtblk_debug; -#define DPRINTF(params) if (pci_vtblk_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params struct pci_vtblk_ioreq { struct blockif_req io_req; struct pci_vtblk_softc *io_sc; uint8_t *io_status; uint16_t io_idx; }; /* * Per-device softc */ struct pci_vtblk_softc { struct virtio_softc vbsc_vs; pthread_mutex_t vsc_mtx; struct vqueue_info vbsc_vq; struct vtblk_config vbsc_cfg; struct blockif_ctxt *bc; char vbsc_ident[VTBLK_BLK_ID_BYTES]; struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ]; }; static void pci_vtblk_reset(void *); static void pci_vtblk_notify(void *, struct vqueue_info *); static int pci_vtblk_cfgread(void *, int, int, uint32_t *); static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); static struct virtio_consts vtblk_vi_consts = { "vtblk", /* our name */ 1, /* we support 1 virtqueue */ sizeof(struct vtblk_config), /* config reg size */ pci_vtblk_reset, /* reset */ pci_vtblk_notify, /* device-wide qnotify */ pci_vtblk_cfgread, /* read PCI config */ pci_vtblk_cfgwrite, /* write PCI config */ NULL, /* apply negotiated features */ VTBLK_S_HOSTCAPS, /* our capabilities */ }; static void pci_vtblk_reset(void *vsc) { struct pci_vtblk_softc *sc = vsc; - DPRINTF(("vtblk: device reset requested !\n")); + DPRINTF(("vtblk: device reset requested !")); vi_reset_dev(&sc->vbsc_vs); } static void pci_vtblk_done(struct blockif_req *br, int err) { struct pci_vtblk_ioreq *io = br->br_param; struct pci_vtblk_softc *sc = io->io_sc; /* convert errno into a virtio block error return */ if (err == EOPNOTSUPP || err == ENOSYS) *io->io_status = VTBLK_S_UNSUPP; else if (err != 0) *io->io_status = VTBLK_S_IOERR; else *io->io_status = VTBLK_S_OK; /* * Return the descriptor back to the host. * We wrote 1 byte (our status) to host. */ pthread_mutex_lock(&sc->vsc_mtx); vq_relchain(&sc->vbsc_vq, io->io_idx, 1); vq_endchains(&sc->vbsc_vq, 0); pthread_mutex_unlock(&sc->vsc_mtx); } static void pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) { struct virtio_blk_hdr *vbh; struct pci_vtblk_ioreq *io; int i, n; int err; ssize_t iolen; int writeop, type; struct iovec iov[BLOCKIF_IOV_MAX + 2]; uint16_t idx, flags[BLOCKIF_IOV_MAX + 2]; n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags); /* * The first descriptor will be the read-only fixed header, * and the last is for status (hence +2 above and below). * The remaining iov's are the actual data I/O vectors. * * XXX - note - this fails on crash dump, which does a * VIRTIO_BLK_T_FLUSH with a zero transfer length */ assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2); io = &sc->vbsc_ios[idx]; assert((flags[0] & VRING_DESC_F_WRITE) == 0); assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); vbh = iov[0].iov_base; memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2)); io->io_req.br_iovcnt = n - 2; io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE; io->io_status = iov[--n].iov_base; assert(iov[n].iov_len == 1); assert(flags[n] & VRING_DESC_F_WRITE); /* * XXX * The guest should not be setting the BARRIER flag because * we don't advertise the capability. */ type = vbh->vbh_type & ~VBH_FLAG_BARRIER; writeop = (type == VBH_OP_WRITE); iolen = 0; for (i = 1; i < n; i++) { /* * - write op implies read-only descriptor, * - read/ident op implies write-only descriptor, * therefore test the inverse of the descriptor bit * to the op. */ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); iolen += iov[i].iov_len; } io->io_req.br_resid = iolen; - DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r", + DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld", writeop ? "write" : "read/ident", iolen, i - 1, io->io_req.br_offset)); switch (type) { case VBH_OP_READ: err = blockif_read(sc->bc, &io->io_req); break; case VBH_OP_WRITE: err = blockif_write(sc->bc, &io->io_req); break; case VBH_OP_FLUSH: case VBH_OP_FLUSH_OUT: err = blockif_flush(sc->bc, &io->io_req); break; case VBH_OP_IDENT: /* Assume a single buffer */ /* S/n equal to buffer is not zero-terminated. */ memset(iov[1].iov_base, 0, iov[1].iov_len); strncpy(iov[1].iov_base, sc->vbsc_ident, MIN(iov[1].iov_len, sizeof(sc->vbsc_ident))); pci_vtblk_done(&io->io_req, 0); return; default: pci_vtblk_done(&io->io_req, EOPNOTSUPP); return; } assert(err == 0); } static void pci_vtblk_notify(void *vsc, struct vqueue_info *vq) { struct pci_vtblk_softc *sc = vsc; while (vq_has_descs(vq)) pci_vtblk_proc(sc, vq); } static int pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { char bident[sizeof("XX:X:X")]; struct blockif_ctxt *bctxt; MD5_CTX mdctx; u_char digest[16]; struct pci_vtblk_softc *sc; off_t size; int i, sectsz, sts, sto; if (opts == NULL) { - printf("virtio-block: backing device required\n"); + WPRINTF(("virtio-block: backing device required")); return (1); } /* * The supplied backing file has to exist */ snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func); bctxt = blockif_open(opts, bident); if (bctxt == NULL) { perror("Could not open backing file"); return (1); } size = blockif_size(bctxt); sectsz = blockif_sectsz(bctxt); blockif_psectsz(bctxt, &sts, &sto); sc = calloc(1, sizeof(struct pci_vtblk_softc)); sc->bc = bctxt; for (i = 0; i < VTBLK_RINGSZ; i++) { struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i]; io->io_req.br_callback = pci_vtblk_done; io->io_req.br_param = io; io->io_sc = sc; io->io_idx = i; } pthread_mutex_init(&sc->vsc_mtx, NULL); /* init virtio softc and virtqueues */ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); sc->vbsc_vs.vs_mtx = &sc->vsc_mtx; sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ /* * Create an identifier for the backing file. Use parts of the * md5 sum of the filename */ MD5Init(&mdctx); MD5Update(&mdctx, opts, strlen(opts)); MD5Final(digest, &mdctx); snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES, "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); /* setup virtio block config space */ sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */ sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ /* * If Linux is presented with a seg_max greater than the virtio queue * size, it can stumble into situations where it violates its own * invariants and panics. For safety, we keep seg_max clamped, paying * heed to the two extra descriptors needed for the header and status * of a request. */ sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX); sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */ sc->vbsc_cfg.vbc_geometry.heads = 0; sc->vbsc_cfg.vbc_geometry.sectors = 0; sc->vbsc_cfg.vbc_blk_size = sectsz; sc->vbsc_cfg.vbc_topology.physical_block_exp = (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0; sc->vbsc_cfg.vbc_topology.alignment_offset = (sto != 0) ? ((sts - sto) / sectsz) : 0; sc->vbsc_cfg.vbc_topology.min_io_size = 0; sc->vbsc_cfg.vbc_topology.opt_io_size = 0; sc->vbsc_cfg.vbc_writeback = 0; /* * Should we move some of this into virtio.c? Could * have the device, class, and subdev_0 as fields in * the virtio constants structure. */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) { blockif_close(sc->bc); free(sc); return (1); } vi_set_io_bar(&sc->vbsc_vs, 0); return (0); } static int pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) { - DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); + DPRINTF(("vtblk: write to readonly reg %d", offset)); return (1); } static int pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtblk_softc *sc = vsc; void *ptr; /* our caller has already verified offset and size */ ptr = (uint8_t *)&sc->vbsc_cfg + offset; memcpy(retval, ptr, size); return (0); } struct pci_devemu pci_de_vblk = { .pe_emu = "virtio-blk", .pe_init = pci_vtblk_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vblk); Index: stable/12/usr.sbin/bhyve/pci_virtio_console.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_virtio_console.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_virtio_console.c (revision 358184) @@ -1,678 +1,679 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 iXsystems Inc. * All rights reserved. * * This software was developed by Jakub Klama * under sponsorship from iXsystems Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "mevent.h" #include "sockstream.h" #define VTCON_RINGSZ 64 #define VTCON_MAXPORTS 16 #define VTCON_MAXQ (VTCON_MAXPORTS * 2 + 2) #define VTCON_DEVICE_READY 0 #define VTCON_DEVICE_ADD 1 #define VTCON_DEVICE_REMOVE 2 #define VTCON_PORT_READY 3 #define VTCON_CONSOLE_PORT 4 #define VTCON_CONSOLE_RESIZE 5 #define VTCON_PORT_OPEN 6 #define VTCON_PORT_NAME 7 #define VTCON_F_SIZE 0 #define VTCON_F_MULTIPORT 1 #define VTCON_F_EMERG_WRITE 2 #define VTCON_S_HOSTCAPS \ (VTCON_F_SIZE | VTCON_F_MULTIPORT | VTCON_F_EMERG_WRITE) static int pci_vtcon_debug; -#define DPRINTF(params) if (pci_vtcon_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtcon_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params struct pci_vtcon_softc; struct pci_vtcon_port; struct pci_vtcon_config; typedef void (pci_vtcon_cb_t)(struct pci_vtcon_port *, void *, struct iovec *, int); struct pci_vtcon_port { struct pci_vtcon_softc * vsp_sc; int vsp_id; const char * vsp_name; bool vsp_enabled; bool vsp_console; bool vsp_rx_ready; bool vsp_open; int vsp_rxq; int vsp_txq; void * vsp_arg; pci_vtcon_cb_t * vsp_cb; }; struct pci_vtcon_sock { struct pci_vtcon_port * vss_port; const char * vss_path; struct mevent * vss_server_evp; struct mevent * vss_conn_evp; int vss_server_fd; int vss_conn_fd; bool vss_open; }; struct pci_vtcon_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_queues[VTCON_MAXQ]; pthread_mutex_t vsc_mtx; uint64_t vsc_cfg; uint64_t vsc_features; char * vsc_rootdir; int vsc_kq; int vsc_nports; bool vsc_ready; struct pci_vtcon_port vsc_control_port; struct pci_vtcon_port vsc_ports[VTCON_MAXPORTS]; struct pci_vtcon_config *vsc_config; }; struct pci_vtcon_config { uint16_t cols; uint16_t rows; uint32_t max_nr_ports; uint32_t emerg_wr; } __attribute__((packed)); struct pci_vtcon_control { uint32_t id; uint16_t event; uint16_t value; } __attribute__((packed)); struct pci_vtcon_console_resize { uint16_t cols; uint16_t rows; } __attribute__((packed)); static void pci_vtcon_reset(void *); static void pci_vtcon_notify_rx(void *, struct vqueue_info *); static void pci_vtcon_notify_tx(void *, struct vqueue_info *); static int pci_vtcon_cfgread(void *, int, int, uint32_t *); static int pci_vtcon_cfgwrite(void *, int, int, uint32_t); static void pci_vtcon_neg_features(void *, uint64_t); static void pci_vtcon_sock_accept(int, enum ev_type, void *); static void pci_vtcon_sock_rx(int, enum ev_type, void *); static void pci_vtcon_sock_tx(struct pci_vtcon_port *, void *, struct iovec *, int); static void pci_vtcon_control_send(struct pci_vtcon_softc *, struct pci_vtcon_control *, const void *, size_t); static void pci_vtcon_announce_port(struct pci_vtcon_port *); static void pci_vtcon_open_port(struct pci_vtcon_port *, bool); static struct virtio_consts vtcon_vi_consts = { "vtcon", /* our name */ VTCON_MAXQ, /* we support VTCON_MAXQ virtqueues */ sizeof(struct pci_vtcon_config), /* config reg size */ pci_vtcon_reset, /* reset */ NULL, /* device-wide qnotify */ pci_vtcon_cfgread, /* read virtio config */ pci_vtcon_cfgwrite, /* write virtio config */ pci_vtcon_neg_features, /* apply negotiated features */ VTCON_S_HOSTCAPS, /* our capabilities */ }; static void pci_vtcon_reset(void *vsc) { struct pci_vtcon_softc *sc; sc = vsc; - DPRINTF(("vtcon: device reset requested!\n")); + DPRINTF(("vtcon: device reset requested!")); vi_reset_dev(&sc->vsc_vs); } static void pci_vtcon_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtcon_softc *sc = vsc; sc->vsc_features = negotiated_features; } static int pci_vtcon_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtcon_softc *sc = vsc; void *ptr; ptr = (uint8_t *)sc->vsc_config + offset; memcpy(retval, ptr, size); return (0); } static int pci_vtcon_cfgwrite(void *vsc, int offset, int size, uint32_t val) { return (0); } static inline struct pci_vtcon_port * pci_vtcon_vq_to_port(struct pci_vtcon_softc *sc, struct vqueue_info *vq) { uint16_t num = vq->vq_num; if (num == 0 || num == 1) return (&sc->vsc_ports[0]); if (num == 2 || num == 3) return (&sc->vsc_control_port); return (&sc->vsc_ports[(num / 2) - 1]); } static inline struct vqueue_info * pci_vtcon_port_to_vq(struct pci_vtcon_port *port, bool tx_queue) { int qnum; qnum = tx_queue ? port->vsp_txq : port->vsp_rxq; return (&port->vsp_sc->vsc_queues[qnum]); } static struct pci_vtcon_port * pci_vtcon_port_add(struct pci_vtcon_softc *sc, const char *name, pci_vtcon_cb_t *cb, void *arg) { struct pci_vtcon_port *port; if (sc->vsc_nports == VTCON_MAXPORTS) { errno = EBUSY; return (NULL); } port = &sc->vsc_ports[sc->vsc_nports++]; port->vsp_id = sc->vsc_nports - 1; port->vsp_sc = sc; port->vsp_name = name; port->vsp_cb = cb; port->vsp_arg = arg; if (port->vsp_id == 0) { /* port0 */ port->vsp_txq = 0; port->vsp_rxq = 1; } else { port->vsp_txq = sc->vsc_nports * 2; port->vsp_rxq = port->vsp_txq + 1; } port->vsp_enabled = true; return (port); } static int pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *name, const char *path) { struct pci_vtcon_sock *sock; struct sockaddr_un sun; char *pathcopy; int s = -1, fd = -1, error = 0; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif sock = calloc(1, sizeof(struct pci_vtcon_sock)); if (sock == NULL) { error = -1; goto out; } s = socket(AF_UNIX, SOCK_STREAM, 0); if (s < 0) { error = -1; goto out; } pathcopy = strdup(path); if (pathcopy == NULL) { error = -1; goto out; } fd = open(dirname(pathcopy), O_RDONLY | O_DIRECTORY); if (fd < 0) { free(pathcopy); error = -1; goto out; } sun.sun_family = AF_UNIX; sun.sun_len = sizeof(struct sockaddr_un); strcpy(pathcopy, path); strlcpy(sun.sun_path, basename(pathcopy), sizeof(sun.sun_path)); free(pathcopy); if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0) { error = -1; goto out; } if (fcntl(s, F_SETFL, O_NONBLOCK) < 0) { error = -1; goto out; } if (listen(s, 1) < 0) { error = -1; goto out; } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE); if (caph_rights_limit(s, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif sock->vss_port = pci_vtcon_port_add(sc, name, pci_vtcon_sock_tx, sock); if (sock->vss_port == NULL) { error = -1; goto out; } sock->vss_open = false; sock->vss_conn_fd = -1; sock->vss_server_fd = s; sock->vss_server_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_accept, sock); if (sock->vss_server_evp == NULL) { error = -1; goto out; } out: if (fd != -1) close(fd); if (error != 0 && s != -1) close(s); return (error); } static void pci_vtcon_sock_accept(int fd __unused, enum ev_type t __unused, void *arg) { struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg; int s; s = accept(sock->vss_server_fd, NULL, NULL); if (s < 0) return; if (sock->vss_open) { close(s); return; } sock->vss_open = true; sock->vss_conn_fd = s; sock->vss_conn_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_rx, sock); pci_vtcon_open_port(sock->vss_port, true); } static void pci_vtcon_sock_rx(int fd __unused, enum ev_type t __unused, void *arg) { struct pci_vtcon_port *port; struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg; struct vqueue_info *vq; struct iovec iov; static char dummybuf[2048]; int len, n; uint16_t idx; port = sock->vss_port; vq = pci_vtcon_port_to_vq(port, true); if (!sock->vss_open || !port->vsp_rx_ready) { len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf)); if (len == 0) goto close; return; } if (!vq_has_descs(vq)) { len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf)); vq_endchains(vq, 1); if (len == 0) goto close; return; } do { n = vq_getchain(vq, &idx, &iov, 1, NULL); len = readv(sock->vss_conn_fd, &iov, n); if (len == 0 || (len < 0 && errno == EWOULDBLOCK)) { vq_retchains(vq, 1); vq_endchains(vq, 0); if (len == 0) goto close; return; } vq_relchain(vq, idx, len); } while (vq_has_descs(vq)); vq_endchains(vq, 1); close: mevent_delete_close(sock->vss_conn_evp); sock->vss_conn_fd = -1; sock->vss_open = false; } static void pci_vtcon_sock_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov, int niov) { struct pci_vtcon_sock *sock; int i, ret; sock = (struct pci_vtcon_sock *)arg; if (sock->vss_conn_fd == -1) return; for (i = 0; i < niov; i++) { ret = stream_write(sock->vss_conn_fd, iov[i].iov_base, iov[i].iov_len); if (ret <= 0) break; } if (ret <= 0) { mevent_delete_close(sock->vss_conn_evp); sock->vss_conn_fd = -1; sock->vss_open = false; } } static void pci_vtcon_control_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov, int niov) { struct pci_vtcon_softc *sc; struct pci_vtcon_port *tmp; struct pci_vtcon_control resp, *ctrl; int i; assert(niov == 1); sc = port->vsp_sc; ctrl = (struct pci_vtcon_control *)iov->iov_base; switch (ctrl->event) { case VTCON_DEVICE_READY: sc->vsc_ready = true; /* set port ready events for registered ports */ for (i = 0; i < VTCON_MAXPORTS; i++) { tmp = &sc->vsc_ports[i]; if (tmp->vsp_enabled) pci_vtcon_announce_port(tmp); if (tmp->vsp_open) pci_vtcon_open_port(tmp, true); } break; case VTCON_PORT_READY: if (ctrl->id >= sc->vsc_nports) { - WPRINTF(("VTCON_PORT_READY event for unknown port %d\n", + WPRINTF(("VTCON_PORT_READY event for unknown port %d", ctrl->id)); return; } tmp = &sc->vsc_ports[ctrl->id]; if (tmp->vsp_console) { resp.event = VTCON_CONSOLE_PORT; resp.id = ctrl->id; resp.value = 1; pci_vtcon_control_send(sc, &resp, NULL, 0); } break; } } static void pci_vtcon_announce_port(struct pci_vtcon_port *port) { struct pci_vtcon_control event; event.id = port->vsp_id; event.event = VTCON_DEVICE_ADD; event.value = 1; pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0); event.event = VTCON_PORT_NAME; pci_vtcon_control_send(port->vsp_sc, &event, port->vsp_name, strlen(port->vsp_name)); } static void pci_vtcon_open_port(struct pci_vtcon_port *port, bool open) { struct pci_vtcon_control event; if (!port->vsp_sc->vsc_ready) { port->vsp_open = true; return; } event.id = port->vsp_id; event.event = VTCON_PORT_OPEN; event.value = (int)open; pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0); } static void pci_vtcon_control_send(struct pci_vtcon_softc *sc, struct pci_vtcon_control *ctrl, const void *payload, size_t len) { struct vqueue_info *vq; struct iovec iov; uint16_t idx; int n; vq = pci_vtcon_port_to_vq(&sc->vsc_control_port, true); if (!vq_has_descs(vq)) return; n = vq_getchain(vq, &idx, &iov, 1, NULL); assert(n == 1); memcpy(iov.iov_base, ctrl, sizeof(struct pci_vtcon_control)); if (payload != NULL && len > 0) memcpy(iov.iov_base + sizeof(struct pci_vtcon_control), payload, len); vq_relchain(vq, idx, sizeof(struct pci_vtcon_control) + len); vq_endchains(vq, 1); } static void pci_vtcon_notify_tx(void *vsc, struct vqueue_info *vq) { struct pci_vtcon_softc *sc; struct pci_vtcon_port *port; struct iovec iov[1]; uint16_t idx, n; uint16_t flags[8]; sc = vsc; port = pci_vtcon_vq_to_port(sc, vq); while (vq_has_descs(vq)) { n = vq_getchain(vq, &idx, iov, 1, flags); assert(n >= 1); if (port != NULL) port->vsp_cb(port, port->vsp_arg, iov, 1); /* * Release this chain and handle more */ vq_relchain(vq, idx, 0); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ } static void pci_vtcon_notify_rx(void *vsc, struct vqueue_info *vq) { struct pci_vtcon_softc *sc; struct pci_vtcon_port *port; sc = vsc; port = pci_vtcon_vq_to_port(sc, vq); if (!port->vsp_rx_ready) { port->vsp_rx_ready = 1; vq_kick_disable(vq); } } static int pci_vtcon_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_vtcon_softc *sc; char *portname = NULL; char *portpath = NULL; char *opt; int i; sc = calloc(1, sizeof(struct pci_vtcon_softc)); sc->vsc_config = calloc(1, sizeof(struct pci_vtcon_config)); sc->vsc_config->max_nr_ports = VTCON_MAXPORTS; sc->vsc_config->cols = 80; sc->vsc_config->rows = 25; vi_softc_linkup(&sc->vsc_vs, &vtcon_vi_consts, sc, pi, sc->vsc_queues); sc->vsc_vs.vs_mtx = &sc->vsc_mtx; for (i = 0; i < VTCON_MAXQ; i++) { sc->vsc_queues[i].vq_qsize = VTCON_RINGSZ; sc->vsc_queues[i].vq_notify = i % 2 == 0 ? pci_vtcon_notify_rx : pci_vtcon_notify_tx; } /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_CONSOLE); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_CONSOLE); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) return (1); vi_set_io_bar(&sc->vsc_vs, 0); /* create control port */ sc->vsc_control_port.vsp_sc = sc; sc->vsc_control_port.vsp_txq = 2; sc->vsc_control_port.vsp_rxq = 3; sc->vsc_control_port.vsp_cb = pci_vtcon_control_tx; sc->vsc_control_port.vsp_enabled = true; while ((opt = strsep(&opts, ",")) != NULL) { portname = strsep(&opt, "="); portpath = opt; /* create port */ if (pci_vtcon_sock_add(sc, portname, portpath) < 0) { - fprintf(stderr, "cannot create port %s: %s\n", + EPRINTLN("cannot create port %s: %s", portname, strerror(errno)); return (1); } } return (0); } struct pci_devemu pci_de_vcon = { .pe_emu = "virtio-console", .pe_init = pci_vtcon_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vcon); Index: stable/12/usr.sbin/bhyve/pci_virtio_net.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_virtio_net.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_virtio_net.c (revision 358184) @@ -1,602 +1,603 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include /* IFNAMSIZ */ #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "mevent.h" #include "virtio.h" #include "net_utils.h" #include "net_backends.h" #include "iov.h" #define VTNET_RINGSZ 1024 #define VTNET_MAXSEGS 256 #define VTNET_MAX_PKT_LEN (65536 + 64) #define VTNET_S_HOSTCAPS \ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) /* * PCI config-space "registers" */ struct virtio_net_config { uint8_t mac[6]; uint16_t status; } __packed; /* * Queue definitions. */ #define VTNET_RXQ 0 #define VTNET_TXQ 1 #define VTNET_CTLQ 2 /* NB: not yet supported */ #define VTNET_MAXQ 3 /* * Debug printf */ static int pci_vtnet_debug; -#define DPRINTF(params) if (pci_vtnet_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* * Per-device softc */ struct pci_vtnet_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; pthread_mutex_t vsc_mtx; net_backend_t *vsc_be; int resetting; /* protected by tx_mtx */ uint64_t vsc_features; /* negotiated features */ pthread_mutex_t rx_mtx; int rx_merge; /* merged rx bufs in use */ pthread_t tx_tid; pthread_mutex_t tx_mtx; pthread_cond_t tx_cond; int tx_in_progress; struct virtio_net_config vsc_config; struct virtio_consts vsc_consts; }; static void pci_vtnet_reset(void *); /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ static int pci_vtnet_cfgread(void *, int, int, uint32_t *); static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); static void pci_vtnet_neg_features(void *, uint64_t); static struct virtio_consts vtnet_vi_consts = { "vtnet", /* our name */ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ sizeof(struct virtio_net_config), /* config reg size */ pci_vtnet_reset, /* reset */ NULL, /* device-wide qnotify -- not used */ pci_vtnet_cfgread, /* read PCI config */ pci_vtnet_cfgwrite, /* write PCI config */ pci_vtnet_neg_features, /* apply negotiated features */ VTNET_S_HOSTCAPS, /* our capabilities */ }; static void pci_vtnet_reset(void *vsc) { struct pci_vtnet_softc *sc = vsc; - DPRINTF(("vtnet: device reset requested !\n")); + DPRINTF(("vtnet: device reset requested !")); /* Acquire the RX lock to block RX processing. */ pthread_mutex_lock(&sc->rx_mtx); /* * Make sure receive operation is disabled at least until we * re-negotiate the features, since receive operation depends * on the value of sc->rx_merge and the header length, which * are both set in pci_vtnet_neg_features(). * Receive operation will be enabled again once the guest adds * the first receive buffers and kicks us. */ netbe_rx_disable(sc->vsc_be); /* Set sc->resetting and give a chance to the TX thread to stop. */ pthread_mutex_lock(&sc->tx_mtx); sc->resetting = 1; while (sc->tx_in_progress) { pthread_mutex_unlock(&sc->tx_mtx); usleep(10000); pthread_mutex_lock(&sc->tx_mtx); } /* * Now reset rings, MSI-X vectors, and negotiated capabilities. * Do that with the TX lock held, since we need to reset * sc->resetting. */ vi_reset_dev(&sc->vsc_vs); sc->resetting = 0; pthread_mutex_unlock(&sc->tx_mtx); pthread_mutex_unlock(&sc->rx_mtx); } struct virtio_mrg_rxbuf_info { uint16_t idx; uint16_t pad; uint32_t len; }; static void pci_vtnet_rx(struct pci_vtnet_softc *sc) { struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; struct iovec iov[VTNET_MAXSEGS + 1]; struct vqueue_info *vq; uint32_t cur_iov_bytes; struct iovec *cur_iov; uint16_t cur_iov_len; uint32_t ulen; int n_chains; int len; vq = &sc->vsc_queues[VTNET_RXQ]; for (;;) { /* * Get a descriptor chain to store the next ingress * packet. In case of mergeable rx buffers, get as * many chains as necessary in order to make room * for a maximum sized LRO packet. */ cur_iov_bytes = 0; cur_iov_len = 0; cur_iov = iov; n_chains = 0; do { int n = vq_getchain(vq, &info[n_chains].idx, cur_iov, VTNET_MAXSEGS - cur_iov_len, NULL); if (n == 0) { /* * No rx buffers. Enable RX kicks and double * check. */ vq_kick_enable(vq); if (!vq_has_descs(vq)) { /* * Still no buffers. Return the unused * chains (if any), interrupt if needed * (including for NOTIFY_ON_EMPTY), and * disable the backend until the next * kick. */ vq_retchains(vq, n_chains); vq_endchains(vq, /*used_all_avail=*/1); netbe_rx_disable(sc->vsc_be); return; } /* More rx buffers found, so keep going. */ vq_kick_disable(vq); continue; } assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS); cur_iov_len += n; if (!sc->rx_merge) { n_chains = 1; break; } info[n_chains].len = (uint32_t)count_iov(cur_iov, n); cur_iov_bytes += info[n_chains].len; cur_iov += n; n_chains++; } while (cur_iov_bytes < VTNET_MAX_PKT_LEN && cur_iov_len < VTNET_MAXSEGS); len = netbe_recv(sc->vsc_be, iov, cur_iov_len); if (len <= 0) { /* * No more packets (len == 0), or backend errored * (err < 0). Return unused available buffers * and stop. */ vq_retchains(vq, n_chains); /* Interrupt if needed/appropriate and stop. */ vq_endchains(vq, /*used_all_avail=*/0); return; } ulen = (uint32_t)len; /* avoid too many casts below */ /* Publish the used buffers to the guest. */ if (!sc->rx_merge) { vq_relchain(vq, info[0].idx, ulen); } else { struct virtio_net_rxhdr *hdr = iov[0].iov_base; uint32_t iolen; int i = 0; assert(iov[0].iov_len >= sizeof(*hdr)); do { iolen = info[i].len; if (iolen > ulen) { iolen = ulen; } vq_relchain_prepare(vq, info[i].idx, iolen); ulen -= iolen; i++; assert(i <= n_chains); } while (ulen > 0); hdr->vrh_bufs = i; vq_relchain_publish(vq); vq_retchains(vq, n_chains - i); } } } /* * Called when there is read activity on the backend file descriptor. * Each buffer posted by the guest is assumed to be able to contain * an entire ethernet frame + rx header. */ static void pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) { struct pci_vtnet_softc *sc = param; pthread_mutex_lock(&sc->rx_mtx); pci_vtnet_rx(sc); pthread_mutex_unlock(&sc->rx_mtx); } /* Called on RX kick. */ static void pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * A qnotify means that the rx process can now begin. */ pthread_mutex_lock(&sc->rx_mtx); vq_kick_disable(vq); netbe_rx_enable(sc->vsc_be); pthread_mutex_unlock(&sc->rx_mtx); } /* TX virtqueue processing, called by the TX thread. */ static void pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) { struct iovec iov[VTNET_MAXSEGS + 1]; uint16_t idx; ssize_t len; int n; /* * Obtain chain of descriptors. The first descriptor also * contains the virtio-net header. */ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); assert(n >= 1 && n <= VTNET_MAXSEGS); len = netbe_send(sc->vsc_be, iov, n); /* chain is processed, release it and set len */ vq_relchain(vq, idx, len > 0 ? len : 0); } /* Called on TX kick. */ static void pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * Any ring entries to process? */ if (!vq_has_descs(vq)) return; /* Signal the tx thread for processing */ pthread_mutex_lock(&sc->tx_mtx); vq_kick_disable(vq); if (sc->tx_in_progress == 0) pthread_cond_signal(&sc->tx_cond); pthread_mutex_unlock(&sc->tx_mtx); } /* * Thread which will handle processing of TX desc */ static void * pci_vtnet_tx_thread(void *param) { struct pci_vtnet_softc *sc = param; struct vqueue_info *vq; int error; vq = &sc->vsc_queues[VTNET_TXQ]; /* * Let us wait till the tx queue pointers get initialised & * first tx signaled */ pthread_mutex_lock(&sc->tx_mtx); error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); for (;;) { /* note - tx mutex is locked here */ while (sc->resetting || !vq_has_descs(vq)) { vq_kick_enable(vq); if (!sc->resetting && vq_has_descs(vq)) break; sc->tx_in_progress = 0; error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); } vq_kick_disable(vq); sc->tx_in_progress = 1; pthread_mutex_unlock(&sc->tx_mtx); do { /* * Run through entries, placing them into * iovecs and sending when an end-of-packet * is found */ pci_vtnet_proctx(sc, vq); } while (vq_has_descs(vq)); /* * Generate an interrupt if needed. */ vq_endchains(vq, /*used_all_avail=*/1); pthread_mutex_lock(&sc->tx_mtx); } } #ifdef notyet static void pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) { - DPRINTF(("vtnet: control qnotify!\n\r")); + DPRINTF(("vtnet: control qnotify!")); } #endif static int pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_vtnet_softc *sc; char tname[MAXCOMLEN + 1]; int mac_provided; /* * Allocate data structures for further virtio initializations. * sc also contains a copy of vtnet_vi_consts, since capabilities * change depending on the backend. */ sc = calloc(1, sizeof(struct pci_vtnet_softc)); sc->vsc_consts = vtnet_vi_consts; pthread_mutex_init(&sc->vsc_mtx, NULL); sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; #ifdef notyet sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; #endif /* * Attempt to open the backend device and read the MAC address * if specified. */ mac_provided = 0; if (opts != NULL) { char *devname; char *vtopts; int err; devname = vtopts = strdup(opts); (void) strsep(&vtopts, ","); if (vtopts != NULL) { err = net_parsemac(vtopts, sc->vsc_config.mac); if (err != 0) { free(devname); free(sc); return (err); } mac_provided = 1; } err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback, sc); free(devname); if (err) { free(sc); return (err); } sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be); } if (!mac_provided) { net_genmac(pi, sc->vsc_config.mac); } /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); /* Link is up if we managed to open backend device. */ sc->vsc_config.status = (opts == NULL || sc->vsc_be); vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); sc->vsc_vs.vs_mtx = &sc->vsc_mtx; /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { free(sc); return (1); } /* use BAR 0 to map config regs in IO space */ vi_set_io_bar(&sc->vsc_vs, 0); sc->resetting = 0; sc->rx_merge = 0; pthread_mutex_init(&sc->rx_mtx, NULL); /* * Initialize tx semaphore & spawn TX processing thread. * As of now, only one thread for TX desc processing is * spawned. */ sc->tx_in_progress = 0; pthread_mutex_init(&sc->tx_mtx, NULL); pthread_cond_init(&sc->tx_cond, NULL); pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, pi->pi_func); pthread_set_name_np(sc->tx_tid, tname); return (0); } static int pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) { struct pci_vtnet_softc *sc = vsc; void *ptr; if (offset < (int)sizeof(sc->vsc_config.mac)) { assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); /* * The driver is allowed to change the MAC address */ ptr = &sc->vsc_config.mac[offset]; memcpy(ptr, &value, size); } else { /* silently ignore other writes */ - DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); + DPRINTF(("vtnet: write to readonly reg %d", offset)); } return (0); } static int pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtnet_softc *sc = vsc; void *ptr; ptr = (uint8_t *)&sc->vsc_config + offset; memcpy(retval, ptr, size); return (0); } static void pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtnet_softc *sc = vsc; unsigned int rx_vhdrlen; sc->vsc_features = negotiated_features; if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) { rx_vhdrlen = sizeof(struct virtio_net_rxhdr); sc->rx_merge = 1; } else { /* * Without mergeable rx buffers, virtio-net header is 2 * bytes shorter than sizeof(struct virtio_net_rxhdr). */ rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; sc->rx_merge = 0; } /* Tell the backend to enable some capabilities it has advertised. */ netbe_set_cap(sc->vsc_be, negotiated_features, rx_vhdrlen); } static struct pci_devemu pci_de_vnet = { .pe_emu = "virtio-net", .pe_init = pci_vtnet_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vnet); Index: stable/12/usr.sbin/bhyve/pci_virtio_rnd.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_virtio_rnd.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_virtio_rnd.c (revision 358184) @@ -1,208 +1,210 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Nahanni Systems Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * virtio entropy device emulation. * Randomness is sourced from /dev/random which does not block * once it has been seeded at bootup. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" #define VTRND_RINGSZ 64 static int pci_vtrnd_debug; -#define DPRINTF(params) if (pci_vtrnd_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtrnd_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* * Per-device softc */ struct pci_vtrnd_softc { struct virtio_softc vrsc_vs; struct vqueue_info vrsc_vq; pthread_mutex_t vrsc_mtx; uint64_t vrsc_cfg; int vrsc_fd; }; static void pci_vtrnd_reset(void *); static void pci_vtrnd_notify(void *, struct vqueue_info *); static struct virtio_consts vtrnd_vi_consts = { "vtrnd", /* our name */ 1, /* we support 1 virtqueue */ 0, /* config reg size */ pci_vtrnd_reset, /* reset */ pci_vtrnd_notify, /* device-wide qnotify */ NULL, /* read virtio config */ NULL, /* write virtio config */ NULL, /* apply negotiated features */ 0, /* our capabilities */ }; static void pci_vtrnd_reset(void *vsc) { struct pci_vtrnd_softc *sc; sc = vsc; - DPRINTF(("vtrnd: device reset requested !\n")); + DPRINTF(("vtrnd: device reset requested !")); vi_reset_dev(&sc->vrsc_vs); } static void pci_vtrnd_notify(void *vsc, struct vqueue_info *vq) { struct iovec iov; struct pci_vtrnd_softc *sc; int len; uint16_t idx; sc = vsc; if (sc->vrsc_fd < 0) { vq_endchains(vq, 0); return; } while (vq_has_descs(vq)) { vq_getchain(vq, &idx, &iov, 1, NULL); len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len); - DPRINTF(("vtrnd: vtrnd_notify(): %d\r\n", len)); + DPRINTF(("vtrnd: vtrnd_notify(): %d", len)); /* Catastrophe if unable to read from /dev/random */ assert(len > 0); /* * Release this chain and handle more */ vq_relchain(vq, idx, len); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ } static int pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_vtrnd_softc *sc; int fd; int len; uint8_t v; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif /* * Should always be able to open /dev/random. */ fd = open("/dev/random", O_RDONLY | O_NONBLOCK); assert(fd >= 0); #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_READ); if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif /* * Check that device is seeded and non-blocking. */ len = read(fd, &v, sizeof(v)); if (len <= 0) { WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len)); + close(fd); return (1); } sc = calloc(1, sizeof(struct pci_vtrnd_softc)); vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq); sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx; sc->vrsc_vq.vq_qsize = VTRND_RINGSZ; /* keep /dev/random opened while emulating */ sc->vrsc_fd = fd; /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_RANDOM); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix())) return (1); vi_set_io_bar(&sc->vrsc_vs, 0); return (0); } struct pci_devemu pci_de_vrnd = { .pe_emu = "virtio-rnd", .pe_init = pci_vtrnd_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vrnd); Index: stable/12/usr.sbin/bhyve/pci_virtio_scsi.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_virtio_scsi.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_virtio_scsi.c (revision 358184) @@ -1,738 +1,739 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Jakub Klama . * Copyright (c) 2018 Marcelo Araujo . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "iov.h" #define VTSCSI_RINGSZ 64 #define VTSCSI_REQUESTQ 1 #define VTSCSI_THR_PER_Q 16 #define VTSCSI_MAXQ (VTSCSI_REQUESTQ + 2) #define VTSCSI_MAXSEG 64 #define VTSCSI_IN_HEADER_LEN(_sc) \ (sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size) #define VTSCSI_OUT_HEADER_LEN(_sc) \ (sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size) #define VIRTIO_SCSI_MAX_CHANNEL 0 #define VIRTIO_SCSI_MAX_TARGET 0 #define VIRTIO_SCSI_MAX_LUN 16383 #define VIRTIO_SCSI_F_INOUT (1 << 0) #define VIRTIO_SCSI_F_HOTPLUG (1 << 1) #define VIRTIO_SCSI_F_CHANGE (1 << 2) static int pci_vtscsi_debug = 0; -#define DPRINTF(params) if (pci_vtscsi_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtscsi_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params struct pci_vtscsi_config { uint32_t num_queues; uint32_t seg_max; uint32_t max_sectors; uint32_t cmd_per_lun; uint32_t event_info_size; uint32_t sense_size; uint32_t cdb_size; uint16_t max_channel; uint16_t max_target; uint32_t max_lun; } __attribute__((packed)); struct pci_vtscsi_queue { struct pci_vtscsi_softc * vsq_sc; struct vqueue_info * vsq_vq; pthread_mutex_t vsq_mtx; pthread_mutex_t vsq_qmtx; pthread_cond_t vsq_cv; STAILQ_HEAD(, pci_vtscsi_request) vsq_requests; LIST_HEAD(, pci_vtscsi_worker) vsq_workers; }; struct pci_vtscsi_worker { struct pci_vtscsi_queue * vsw_queue; pthread_t vsw_thread; bool vsw_exiting; LIST_ENTRY(pci_vtscsi_worker) vsw_link; }; struct pci_vtscsi_request { struct pci_vtscsi_queue * vsr_queue; struct iovec vsr_iov_in[VTSCSI_MAXSEG]; int vsr_niov_in; struct iovec vsr_iov_out[VTSCSI_MAXSEG]; int vsr_niov_out; uint32_t vsr_idx; STAILQ_ENTRY(pci_vtscsi_request) vsr_link; }; /* * Per-device softc */ struct pci_vtscsi_softc { struct virtio_softc vss_vs; struct vqueue_info vss_vq[VTSCSI_MAXQ]; struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ]; pthread_mutex_t vss_mtx; int vss_iid; int vss_ctl_fd; uint32_t vss_features; struct pci_vtscsi_config vss_config; }; #define VIRTIO_SCSI_T_TMF 0 #define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 #define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 #define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 #define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 #define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 #define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 #define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 #define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 /* command-specific response values */ #define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 #define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 #define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 struct pci_vtscsi_ctrl_tmf { uint32_t type; uint32_t subtype; uint8_t lun[8]; uint64_t id; uint8_t response; } __attribute__((packed)); #define VIRTIO_SCSI_T_AN_QUERY 1 #define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2 #define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4 #define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8 #define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16 #define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32 #define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64 struct pci_vtscsi_ctrl_an { uint32_t type; uint8_t lun[8]; uint32_t event_requested; uint32_t event_actual; uint8_t response; } __attribute__((packed)); /* command-specific response values */ #define VIRTIO_SCSI_S_OK 0 #define VIRTIO_SCSI_S_OVERRUN 1 #define VIRTIO_SCSI_S_ABORTED 2 #define VIRTIO_SCSI_S_BAD_TARGET 3 #define VIRTIO_SCSI_S_RESET 4 #define VIRTIO_SCSI_S_BUSY 5 #define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 #define VIRTIO_SCSI_S_TARGET_FAILURE 7 #define VIRTIO_SCSI_S_NEXUS_FAILURE 8 #define VIRTIO_SCSI_S_FAILURE 9 #define VIRTIO_SCSI_S_INCORRECT_LUN 12 /* task_attr */ #define VIRTIO_SCSI_S_SIMPLE 0 #define VIRTIO_SCSI_S_ORDERED 1 #define VIRTIO_SCSI_S_HEAD 2 #define VIRTIO_SCSI_S_ACA 3 struct pci_vtscsi_event { uint32_t event; uint8_t lun[8]; uint32_t reason; } __attribute__((packed)); struct pci_vtscsi_req_cmd_rd { uint8_t lun[8]; uint64_t id; uint8_t task_attr; uint8_t prio; uint8_t crn; uint8_t cdb[]; } __attribute__((packed)); struct pci_vtscsi_req_cmd_wr { uint32_t sense_len; uint32_t residual; uint16_t status_qualifier; uint8_t status; uint8_t response; uint8_t sense[]; } __attribute__((packed)); static void *pci_vtscsi_proc(void *); static void pci_vtscsi_reset(void *); static void pci_vtscsi_neg_features(void *, uint64_t); static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); static inline int pci_vtscsi_get_lun(uint8_t *); static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_tmf *); static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_an *); static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, int, struct iovec *, int); static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *); static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *); static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *); static int pci_vtscsi_init_queue(struct pci_vtscsi_softc *, struct pci_vtscsi_queue *, int); static int pci_vtscsi_init(struct vmctx *, struct pci_devinst *, char *); static struct virtio_consts vtscsi_vi_consts = { "vtscsi", /* our name */ VTSCSI_MAXQ, /* we support 2+n virtqueues */ sizeof(struct pci_vtscsi_config), /* config reg size */ pci_vtscsi_reset, /* reset */ NULL, /* device-wide qnotify */ pci_vtscsi_cfgread, /* read virtio config */ pci_vtscsi_cfgwrite, /* write virtio config */ pci_vtscsi_neg_features, /* apply negotiated features */ 0, /* our capabilities */ }; static void * pci_vtscsi_proc(void *arg) { struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg; struct pci_vtscsi_queue *q = worker->vsw_queue; struct pci_vtscsi_request *req; int iolen; for (;;) { pthread_mutex_lock(&q->vsq_mtx); while (STAILQ_EMPTY(&q->vsq_requests) && !worker->vsw_exiting) pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx); if (worker->vsw_exiting) break; req = STAILQ_FIRST(&q->vsq_requests); STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link); pthread_mutex_unlock(&q->vsq_mtx); iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in, req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out); pthread_mutex_lock(&q->vsq_qmtx); vq_relchain(q->vsq_vq, req->vsr_idx, iolen); vq_endchains(q->vsq_vq, 0); pthread_mutex_unlock(&q->vsq_qmtx); - DPRINTF(("virtio-scsi: request completed\n", + DPRINTF(("virtio-scsi: request completed", req->vsr_idx)); free(req); } pthread_mutex_unlock(&q->vsq_mtx); return (NULL); } static void pci_vtscsi_reset(void *vsc) { struct pci_vtscsi_softc *sc; sc = vsc; - DPRINTF(("vtscsi: device reset requested\n")); + DPRINTF(("vtscsi: device reset requested")); vi_reset_dev(&sc->vss_vs); /* initialize config structure */ sc->vss_config = (struct pci_vtscsi_config){ .num_queues = VTSCSI_REQUESTQ, /* Leave room for the request and the response. */ .seg_max = VTSCSI_MAXSEG - 2, .max_sectors = 2, .cmd_per_lun = 1, .event_info_size = sizeof(struct pci_vtscsi_event), .sense_size = 96, .cdb_size = 32, .max_channel = VIRTIO_SCSI_MAX_CHANNEL, .max_target = VIRTIO_SCSI_MAX_TARGET, .max_lun = VIRTIO_SCSI_MAX_LUN }; } static void pci_vtscsi_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtscsi_softc *sc = vsc; sc->vss_features = negotiated_features; } static int pci_vtscsi_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtscsi_softc *sc = vsc; void *ptr; ptr = (uint8_t *)&sc->vss_config + offset; memcpy(retval, ptr, size); return (0); } static int pci_vtscsi_cfgwrite(void *vsc, int offset, int size, uint32_t val) { return (0); } static inline int pci_vtscsi_get_lun(uint8_t *lun) { return (((lun[2] << 8) | lun[3]) & 0x3fff); } static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, size_t bufsize) { struct pci_vtscsi_ctrl_tmf *tmf; struct pci_vtscsi_ctrl_an *an; uint32_t type; type = *(uint32_t *)buf; if (type == VIRTIO_SCSI_T_TMF) { tmf = (struct pci_vtscsi_ctrl_tmf *)buf; return (pci_vtscsi_tmf_handle(sc, tmf)); } if (type == VIRTIO_SCSI_T_AN_QUERY) { an = (struct pci_vtscsi_ctrl_an *)buf; return (pci_vtscsi_an_handle(sc, an)); } return (0); } static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_tmf *tmf) { union ctl_io *io; int err; io = ctl_scsi_alloc_io(sc->vss_iid); ctl_scsi_zero_io(io); io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun); io->taskio.tag_type = CTL_TAG_SIMPLE; io->taskio.tag_num = (uint32_t)tmf->id; switch (tmf->subtype) { case VIRTIO_SCSI_T_TMF_ABORT_TASK: io->taskio.task_action = CTL_TASK_ABORT_TASK; break; case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: io->taskio.task_action = CTL_TASK_ABORT_TASK_SET; break; case VIRTIO_SCSI_T_TMF_CLEAR_ACA: io->taskio.task_action = CTL_TASK_CLEAR_ACA; break; case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET; break; case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; break; case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: io->taskio.task_action = CTL_TASK_LUN_RESET; break; case VIRTIO_SCSI_T_TMF_QUERY_TASK: io->taskio.task_action = CTL_TASK_QUERY_TASK; break; case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET: io->taskio.task_action = CTL_TASK_QUERY_TASK_SET; break; } if (pci_vtscsi_debug) { struct sbuf *sb = sbuf_new_auto(); ctl_io_sbuf(io, sb); sbuf_finish(sb); DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb))); sbuf_delete(sb); } err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) - WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); + WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno))); tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); return (1); } static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_an *an) { return (0); } static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, int niov_in, struct iovec *iov_out, int niov_out) { struct pci_vtscsi_softc *sc = q->vsq_sc; struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; struct pci_vtscsi_req_cmd_wr *cmd_wr; struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; union ctl_io *io; int data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err; seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, VTSCSI_IN_HEADER_LEN(sc)); seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, VTSCSI_OUT_HEADER_LEN(sc)); truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc)); io = ctl_scsi_alloc_io(sc->vss_iid); ctl_scsi_zero_io(io); io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun); io->io_hdr.io_type = CTL_IO_SCSI; if (data_niov_in > 0) { ext_data_ptr = (void *)data_iov_in; ext_sg_entries = data_niov_in; ext_data_len = count_iov(data_iov_in, data_niov_in); io->io_hdr.flags |= CTL_FLAG_DATA_OUT; } else if (data_niov_out > 0) { ext_data_ptr = (void *)data_iov_out; ext_sg_entries = data_niov_out; ext_data_len = count_iov(data_iov_out, data_niov_out); io->io_hdr.flags |= CTL_FLAG_DATA_IN; } io->scsiio.sense_len = sc->vss_config.sense_size; io->scsiio.tag_num = (uint32_t)cmd_rd->id; switch (cmd_rd->task_attr) { case VIRTIO_SCSI_S_ORDERED: io->scsiio.tag_type = CTL_TAG_ORDERED; break; case VIRTIO_SCSI_S_HEAD: io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; break; case VIRTIO_SCSI_S_ACA: io->scsiio.tag_type = CTL_TAG_ACA; break; case VIRTIO_SCSI_S_SIMPLE: default: io->scsiio.tag_type = CTL_TAG_SIMPLE; break; } io->scsiio.ext_sg_entries = ext_sg_entries; io->scsiio.ext_data_ptr = ext_data_ptr; io->scsiio.ext_data_len = ext_data_len; io->scsiio.ext_data_filled = 0; io->scsiio.cdb_len = sc->vss_config.cdb_size; memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size); if (pci_vtscsi_debug) { struct sbuf *sb = sbuf_new_auto(); ctl_io_sbuf(io, sb); sbuf_finish(sb); DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb))); sbuf_delete(sb); } err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) { - WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); + WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno))); cmd_wr->response = VIRTIO_SCSI_S_FAILURE; } else { cmd_wr->sense_len = MIN(io->scsiio.sense_len, sc->vss_config.sense_size); cmd_wr->residual = io->scsiio.residual; cmd_wr->status = io->scsiio.scsi_status; cmd_wr->response = VIRTIO_SCSI_S_OK; memcpy(&cmd_wr->sense, &io->scsiio.sense_data, cmd_wr->sense_len); } buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); free(cmd_rd); free(cmd_wr); ctl_scsi_free_io(io); return (VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled); } static void pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) { struct pci_vtscsi_softc *sc; struct iovec iov[VTSCSI_MAXSEG]; uint16_t idx, n; void *buf = NULL; size_t bufsize; int iolen; sc = vsc; while (vq_has_descs(vq)) { n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL); bufsize = iov_to_buf(iov, n, &buf); iolen = pci_vtscsi_control_handle(sc, buf, bufsize); buf_to_iov(buf + bufsize - iolen, iolen, iov, n, bufsize - iolen); /* * Release this chain and handle more */ vq_relchain(vq, idx, iolen); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ free(buf); } static void pci_vtscsi_eventq_notify(void *vsc, struct vqueue_info *vq) { vq_kick_disable(vq); } static void pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq) { struct pci_vtscsi_softc *sc; struct pci_vtscsi_queue *q; struct pci_vtscsi_request *req; struct iovec iov[VTSCSI_MAXSEG]; uint16_t flags[VTSCSI_MAXSEG]; uint16_t idx, n, i; int readable; sc = vsc; q = &sc->vss_queues[vq->vq_num - 2]; while (vq_has_descs(vq)) { readable = 0; n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, flags); /* Count readable descriptors */ for (i = 0; i < n; i++) { if (flags[i] & VRING_DESC_F_WRITE) break; readable++; } req = calloc(1, sizeof(struct pci_vtscsi_request)); req->vsr_idx = idx; req->vsr_queue = q; req->vsr_niov_in = readable; req->vsr_niov_out = n - readable; memcpy(req->vsr_iov_in, iov, req->vsr_niov_in * sizeof(struct iovec)); memcpy(req->vsr_iov_out, iov + readable, req->vsr_niov_out * sizeof(struct iovec)); pthread_mutex_lock(&q->vsq_mtx); STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link); pthread_cond_signal(&q->vsq_cv); pthread_mutex_unlock(&q->vsq_mtx); - DPRINTF(("virtio-scsi: request enqueued\n", idx)); + DPRINTF(("virtio-scsi: request enqueued", idx)); } } static int pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, struct pci_vtscsi_queue *queue, int num) { struct pci_vtscsi_worker *worker; char tname[MAXCOMLEN + 1]; int i; queue->vsq_sc = sc; queue->vsq_vq = &sc->vss_vq[num + 2]; pthread_mutex_init(&queue->vsq_mtx, NULL); pthread_mutex_init(&queue->vsq_qmtx, NULL); pthread_cond_init(&queue->vsq_cv, NULL); STAILQ_INIT(&queue->vsq_requests); LIST_INIT(&queue->vsq_workers); for (i = 0; i < VTSCSI_THR_PER_Q; i++) { worker = calloc(1, sizeof(struct pci_vtscsi_worker)); worker->vsw_queue = queue; pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc, (void *)worker); snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i); pthread_set_name_np(worker->vsw_thread, tname); LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link); } return (0); } static int pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_vtscsi_softc *sc; char *opt, *optname; const char *devname; int i, optidx = 0; sc = calloc(1, sizeof(struct pci_vtscsi_softc)); devname = "/dev/cam/ctl"; while ((opt = strsep(&opts, ",")) != NULL) { optname = strsep(&opt, "="); if (opt == NULL && optidx == 0) { if (optname[0] != 0) devname = optname; } else if (strcmp(optname, "dev") == 0 && opt != NULL) { devname = opt; } else if (strcmp(optname, "iid") == 0 && opt != NULL) { sc->vss_iid = strtoul(opt, NULL, 10); } else { - fprintf(stderr, "Invalid option %s\n", optname); + EPRINTLN("Invalid option %s", optname); free(sc); return (1); } optidx++; } sc->vss_ctl_fd = open(devname, O_RDWR); if (sc->vss_ctl_fd < 0) { - WPRINTF(("cannot open %s: %s\n", devname, strerror(errno))); + WPRINTF(("cannot open %s: %s", devname, strerror(errno))); free(sc); return (1); } vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq); sc->vss_vs.vs_mtx = &sc->vss_mtx; /* controlq */ sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify; /* eventq */ sc->vss_vq[1].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[1].vq_notify = pci_vtscsi_eventq_notify; /* request queues */ for (i = 2; i < VTSCSI_MAXQ; i++) { sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify; pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2); } /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_SCSI); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix())) return (1); vi_set_io_bar(&sc->vss_vs, 0); return (0); } struct pci_devemu pci_de_vscsi = { .pe_emu = "virtio-scsi", .pe_init = pci_vtscsi_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vscsi); Index: stable/12/usr.sbin/bhyve/pci_xhci.c =================================================================== --- stable/12/usr.sbin/bhyve/pci_xhci.c (revision 358183) +++ stable/12/usr.sbin/bhyve/pci_xhci.c (revision 358184) @@ -1,2864 +1,2866 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Leon Dang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* XHCI options: -s ,xhci,{devices} devices: tablet USB tablet mouse */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "pci_xhci.h" #include "usb_emul.h" static int xhci_debug = 0; -#define DPRINTF(params) if (xhci_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (xhci_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params #define XHCI_NAME "xhci" #define XHCI_MAX_DEVS 8 /* 4 USB3 + 4 USB2 devs */ #define XHCI_MAX_SLOTS 64 /* min allowed by Windows drivers */ /* * XHCI data structures can be up to 64k, but limit paddr_guest2host mapping * to 4k to avoid going over the guest physical memory barrier. */ #define XHCI_PADDR_SZ 4096 /* paddr_guest2host max size */ #define XHCI_ERST_MAX 0 /* max 2^entries event ring seg tbl */ #define XHCI_CAPLEN (4*8) /* offset of op register space */ #define XHCI_HCCPRAMS2 0x1C /* offset of HCCPARAMS2 register */ #define XHCI_PORTREGS_START 0x400 #define XHCI_DOORBELL_MAX 256 #define XHCI_STREAMS_MAX 1 /* 4-15 in XHCI spec */ /* caplength and hci-version registers */ #define XHCI_SET_CAPLEN(x) ((x) & 0xFF) #define XHCI_SET_HCIVERSION(x) (((x) & 0xFFFF) << 16) #define XHCI_GET_HCIVERSION(x) (((x) >> 16) & 0xFFFF) /* hcsparams1 register */ #define XHCI_SET_HCSP1_MAXSLOTS(x) ((x) & 0xFF) #define XHCI_SET_HCSP1_MAXINTR(x) (((x) & 0x7FF) << 8) #define XHCI_SET_HCSP1_MAXPORTS(x) (((x) & 0xFF) << 24) /* hcsparams2 register */ #define XHCI_SET_HCSP2_IST(x) ((x) & 0x0F) #define XHCI_SET_HCSP2_ERSTMAX(x) (((x) & 0x0F) << 4) #define XHCI_SET_HCSP2_MAXSCRATCH_HI(x) (((x) & 0x1F) << 21) #define XHCI_SET_HCSP2_MAXSCRATCH_LO(x) (((x) & 0x1F) << 27) /* hcsparams3 register */ #define XHCI_SET_HCSP3_U1EXITLATENCY(x) ((x) & 0xFF) #define XHCI_SET_HCSP3_U2EXITLATENCY(x) (((x) & 0xFFFF) << 16) /* hccparams1 register */ #define XHCI_SET_HCCP1_AC64(x) ((x) & 0x01) #define XHCI_SET_HCCP1_BNC(x) (((x) & 0x01) << 1) #define XHCI_SET_HCCP1_CSZ(x) (((x) & 0x01) << 2) #define XHCI_SET_HCCP1_PPC(x) (((x) & 0x01) << 3) #define XHCI_SET_HCCP1_PIND(x) (((x) & 0x01) << 4) #define XHCI_SET_HCCP1_LHRC(x) (((x) & 0x01) << 5) #define XHCI_SET_HCCP1_LTC(x) (((x) & 0x01) << 6) #define XHCI_SET_HCCP1_NSS(x) (((x) & 0x01) << 7) #define XHCI_SET_HCCP1_PAE(x) (((x) & 0x01) << 8) #define XHCI_SET_HCCP1_SPC(x) (((x) & 0x01) << 9) #define XHCI_SET_HCCP1_SEC(x) (((x) & 0x01) << 10) #define XHCI_SET_HCCP1_CFC(x) (((x) & 0x01) << 11) #define XHCI_SET_HCCP1_MAXPSA(x) (((x) & 0x0F) << 12) #define XHCI_SET_HCCP1_XECP(x) (((x) & 0xFFFF) << 16) /* hccparams2 register */ #define XHCI_SET_HCCP2_U3C(x) ((x) & 0x01) #define XHCI_SET_HCCP2_CMC(x) (((x) & 0x01) << 1) #define XHCI_SET_HCCP2_FSC(x) (((x) & 0x01) << 2) #define XHCI_SET_HCCP2_CTC(x) (((x) & 0x01) << 3) #define XHCI_SET_HCCP2_LEC(x) (((x) & 0x01) << 4) #define XHCI_SET_HCCP2_CIC(x) (((x) & 0x01) << 5) /* other registers */ #define XHCI_SET_DOORBELL(x) ((x) & ~0x03) #define XHCI_SET_RTSOFFSET(x) ((x) & ~0x0F) /* register masks */ #define XHCI_PS_PLS_MASK (0xF << 5) /* port link state */ #define XHCI_PS_SPEED_MASK (0xF << 10) /* port speed */ #define XHCI_PS_PIC_MASK (0x3 << 14) /* port indicator */ /* port register set */ #define XHCI_PORTREGS_BASE 0x400 /* base offset */ #define XHCI_PORTREGS_PORT0 0x3F0 #define XHCI_PORTREGS_SETSZ 0x10 /* size of a set */ #define MASK_64_HI(x) ((x) & ~0xFFFFFFFFULL) #define MASK_64_LO(x) ((x) & 0xFFFFFFFFULL) #define FIELD_REPLACE(a,b,m,s) (((a) & ~((m) << (s))) | \ (((b) & (m)) << (s))) #define FIELD_COPY(a,b,m,s) (((a) & ~((m) << (s))) | \ (((b) & ((m) << (s))))) struct pci_xhci_trb_ring { uint64_t ringaddr; /* current dequeue guest address */ uint32_t ccs; /* consumer cycle state */ }; /* device endpoint transfer/stream rings */ struct pci_xhci_dev_ep { union { struct xhci_trb *_epu_tr; struct xhci_stream_ctx *_epu_sctx; } _ep_trbsctx; #define ep_tr _ep_trbsctx._epu_tr #define ep_sctx _ep_trbsctx._epu_sctx union { struct pci_xhci_trb_ring _epu_trb; struct pci_xhci_trb_ring *_epu_sctx_trbs; } _ep_trb_rings; #define ep_ringaddr _ep_trb_rings._epu_trb.ringaddr #define ep_ccs _ep_trb_rings._epu_trb.ccs #define ep_sctx_trbs _ep_trb_rings._epu_sctx_trbs struct usb_data_xfer *ep_xfer; /* transfer chain */ }; /* device context base address array: maps slot->device context */ struct xhci_dcbaa { uint64_t dcba[USB_MAX_DEVICES+1]; /* xhci_dev_ctx ptrs */ }; /* port status registers */ struct pci_xhci_portregs { uint32_t portsc; /* port status and control */ uint32_t portpmsc; /* port pwr mgmt status & control */ uint32_t portli; /* port link info */ uint32_t porthlpmc; /* port hardware LPM control */ } __packed; #define XHCI_PS_SPEED_SET(x) (((x) & 0xF) << 10) /* xHC operational registers */ struct pci_xhci_opregs { uint32_t usbcmd; /* usb command */ uint32_t usbsts; /* usb status */ uint32_t pgsz; /* page size */ uint32_t dnctrl; /* device notification control */ uint64_t crcr; /* command ring control */ uint64_t dcbaap; /* device ctx base addr array ptr */ uint32_t config; /* configure */ /* guest mapped addresses: */ struct xhci_trb *cr_p; /* crcr dequeue */ struct xhci_dcbaa *dcbaa_p; /* dev ctx array ptr */ }; /* xHC runtime registers */ struct pci_xhci_rtsregs { uint32_t mfindex; /* microframe index */ struct { /* interrupter register set */ uint32_t iman; /* interrupter management */ uint32_t imod; /* interrupter moderation */ uint32_t erstsz; /* event ring segment table size */ uint32_t rsvd; uint64_t erstba; /* event ring seg-tbl base addr */ uint64_t erdp; /* event ring dequeue ptr */ } intrreg __packed; /* guest mapped addresses */ struct xhci_event_ring_seg *erstba_p; struct xhci_trb *erst_p; /* event ring segment tbl */ int er_deq_seg; /* event ring dequeue segment */ int er_enq_idx; /* event ring enqueue index - xHCI */ int er_enq_seg; /* event ring enqueue segment */ uint32_t er_events_cnt; /* number of events in ER */ uint32_t event_pcs; /* producer cycle state flag */ }; struct pci_xhci_softc; /* * USB device emulation container. * This is referenced from usb_hci->hci_sc; 1 pci_xhci_dev_emu for each * emulated device instance. */ struct pci_xhci_dev_emu { struct pci_xhci_softc *xsc; /* XHCI contexts */ struct xhci_dev_ctx *dev_ctx; struct pci_xhci_dev_ep eps[XHCI_MAX_ENDPOINTS]; int dev_slotstate; struct usb_devemu *dev_ue; /* USB emulated dev */ void *dev_sc; /* device's softc */ struct usb_hci hci; }; struct pci_xhci_softc { struct pci_devinst *xsc_pi; pthread_mutex_t mtx; uint32_t caplength; /* caplen & hciversion */ uint32_t hcsparams1; /* structural parameters 1 */ uint32_t hcsparams2; /* structural parameters 2 */ uint32_t hcsparams3; /* structural parameters 3 */ uint32_t hccparams1; /* capability parameters 1 */ uint32_t dboff; /* doorbell offset */ uint32_t rtsoff; /* runtime register space offset */ uint32_t hccparams2; /* capability parameters 2 */ uint32_t regsend; /* end of configuration registers */ struct pci_xhci_opregs opregs; struct pci_xhci_rtsregs rtsregs; struct pci_xhci_portregs *portregs; struct pci_xhci_dev_emu **devices; /* XHCI[port] = device */ struct pci_xhci_dev_emu **slots; /* slots assigned from 1 */ int ndevices; int usb2_port_start; int usb3_port_start; }; /* portregs and devices arrays are set up to start from idx=1 */ #define XHCI_PORTREG_PTR(x,n) &(x)->portregs[(n)] #define XHCI_DEVINST_PTR(x,n) (x)->devices[(n)] #define XHCI_SLOTDEV_PTR(x,n) (x)->slots[(n)] #define XHCI_HALTED(sc) ((sc)->opregs.usbsts & XHCI_STS_HCH) #define XHCI_GADDR(sc,a) paddr_guest2host((sc)->xsc_pi->pi_vmctx, \ (a), \ XHCI_PADDR_SZ - ((a) & (XHCI_PADDR_SZ-1))) static int xhci_in_use; /* map USB errors to XHCI */ static const int xhci_usb_errors[USB_ERR_MAX] = { [USB_ERR_NORMAL_COMPLETION] = XHCI_TRB_ERROR_SUCCESS, [USB_ERR_PENDING_REQUESTS] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NOT_STARTED] = XHCI_TRB_ERROR_ENDP_NOT_ON, [USB_ERR_INVAL] = XHCI_TRB_ERROR_INVALID, [USB_ERR_NOMEM] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_CANCELLED] = XHCI_TRB_ERROR_STOPPED, [USB_ERR_BAD_ADDRESS] = XHCI_TRB_ERROR_PARAMETER, [USB_ERR_BAD_BUFSIZE] = XHCI_TRB_ERROR_PARAMETER, [USB_ERR_BAD_FLAG] = XHCI_TRB_ERROR_PARAMETER, [USB_ERR_NO_CALLBACK] = XHCI_TRB_ERROR_STALL, [USB_ERR_IN_USE] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NO_ADDR] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NO_PIPE] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_ZERO_NFRAMES] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_ZERO_MAXP] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_SET_ADDR_FAILED] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NO_POWER] = XHCI_TRB_ERROR_ENDP_NOT_ON, [USB_ERR_TOO_DEEP] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_IOERROR] = XHCI_TRB_ERROR_TRB, [USB_ERR_NOT_CONFIGURED] = XHCI_TRB_ERROR_ENDP_NOT_ON, [USB_ERR_TIMEOUT] = XHCI_TRB_ERROR_CMD_ABORTED, [USB_ERR_SHORT_XFER] = XHCI_TRB_ERROR_SHORT_PKT, [USB_ERR_STALLED] = XHCI_TRB_ERROR_STALL, [USB_ERR_INTERRUPTED] = XHCI_TRB_ERROR_CMD_ABORTED, [USB_ERR_DMA_LOAD_FAILED] = XHCI_TRB_ERROR_DATA_BUF, [USB_ERR_BAD_CONTEXT] = XHCI_TRB_ERROR_TRB, [USB_ERR_NO_ROOT_HUB] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_NO_INTR_THREAD] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_NOT_LOCKED] = XHCI_TRB_ERROR_UNDEFINED, }; #define USB_TO_XHCI_ERR(e) ((e) < USB_ERR_MAX ? xhci_usb_errors[(e)] : \ XHCI_TRB_ERROR_INVALID) static int pci_xhci_insert_event(struct pci_xhci_softc *sc, struct xhci_trb *evtrb, int do_intr); static void pci_xhci_dump_trb(struct xhci_trb *trb); static void pci_xhci_assert_interrupt(struct pci_xhci_softc *sc); static void pci_xhci_reset_slot(struct pci_xhci_softc *sc, int slot); static void pci_xhci_reset_port(struct pci_xhci_softc *sc, int portn, int warm); static void pci_xhci_update_ep_ring(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, uint32_t streamid, uint64_t ringaddr, int ccs); static void pci_xhci_set_evtrb(struct xhci_trb *evtrb, uint64_t port, uint32_t errcode, uint32_t evtype) { evtrb->qwTrb0 = port << 24; evtrb->dwTrb2 = XHCI_TRB_2_ERROR_SET(errcode); evtrb->dwTrb3 = XHCI_TRB_3_TYPE_SET(evtype); } /* controller reset */ static void pci_xhci_reset(struct pci_xhci_softc *sc) { int i; sc->rtsregs.er_enq_idx = 0; sc->rtsregs.er_events_cnt = 0; sc->rtsregs.event_pcs = 1; for (i = 1; i <= XHCI_MAX_SLOTS; i++) { pci_xhci_reset_slot(sc, i); } } static uint32_t pci_xhci_usbcmd_write(struct pci_xhci_softc *sc, uint32_t cmd) { int do_intr = 0; int i; if (cmd & XHCI_CMD_RS) { do_intr = (sc->opregs.usbcmd & XHCI_CMD_RS) == 0; sc->opregs.usbcmd |= XHCI_CMD_RS; sc->opregs.usbsts &= ~XHCI_STS_HCH; sc->opregs.usbsts |= XHCI_STS_PCD; /* Queue port change event on controller run from stop */ if (do_intr) for (i = 1; i <= XHCI_MAX_DEVS; i++) { struct pci_xhci_dev_emu *dev; struct pci_xhci_portregs *port; struct xhci_trb evtrb; if ((dev = XHCI_DEVINST_PTR(sc, i)) == NULL) continue; port = XHCI_PORTREG_PTR(sc, i); port->portsc |= XHCI_PS_CSC | XHCI_PS_CCS; port->portsc &= ~XHCI_PS_PLS_MASK; /* * XHCI 4.19.3 USB2 RxDetect->Polling, * USB3 Polling->U0 */ if (dev->dev_ue->ue_usbver == 2) port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_POLL); else port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_U0); pci_xhci_set_evtrb(&evtrb, i, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); if (pci_xhci_insert_event(sc, &evtrb, 0) != XHCI_TRB_ERROR_SUCCESS) break; } } else { sc->opregs.usbcmd &= ~XHCI_CMD_RS; sc->opregs.usbsts |= XHCI_STS_HCH; sc->opregs.usbsts &= ~XHCI_STS_PCD; } /* start execution of schedule; stop when set to 0 */ cmd |= sc->opregs.usbcmd & XHCI_CMD_RS; if (cmd & XHCI_CMD_HCRST) { /* reset controller */ pci_xhci_reset(sc); cmd &= ~XHCI_CMD_HCRST; } cmd &= ~(XHCI_CMD_CSS | XHCI_CMD_CRS); if (do_intr) pci_xhci_assert_interrupt(sc); return (cmd); } static void pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { struct xhci_trb evtrb; struct pci_xhci_portregs *p; int port; uint32_t oldpls, newpls; if (sc->portregs == NULL) return; port = (offset - XHCI_PORTREGS_PORT0) / XHCI_PORTREGS_SETSZ; offset = (offset - XHCI_PORTREGS_PORT0) % XHCI_PORTREGS_SETSZ; - DPRINTF(("pci_xhci: portregs wr offset 0x%lx, port %u: 0x%lx\r\n", + DPRINTF(("pci_xhci: portregs wr offset 0x%lx, port %u: 0x%lx", offset, port, value)); assert(port >= 0); if (port > XHCI_MAX_DEVS) { - DPRINTF(("pci_xhci: portregs_write port %d > ndevices\r\n", + DPRINTF(("pci_xhci: portregs_write port %d > ndevices", port)); return; } if (XHCI_DEVINST_PTR(sc, port) == NULL) { - DPRINTF(("pci_xhci: portregs_write to unattached port %d\r\n", + DPRINTF(("pci_xhci: portregs_write to unattached port %d", port)); } p = XHCI_PORTREG_PTR(sc, port); switch (offset) { case 0: /* port reset or warm reset */ if (value & (XHCI_PS_PR | XHCI_PS_WPR)) { pci_xhci_reset_port(sc, port, value & XHCI_PS_WPR); break; } if ((p->portsc & XHCI_PS_PP) == 0) { WPRINTF(("pci_xhci: portregs_write to unpowered " - "port %d\r\n", port)); + "port %d", port)); break; } /* Port status and control register */ oldpls = XHCI_PS_PLS_GET(p->portsc); newpls = XHCI_PS_PLS_GET(value); p->portsc &= XHCI_PS_PED | XHCI_PS_PLS_MASK | XHCI_PS_SPEED_MASK | XHCI_PS_PIC_MASK; if (XHCI_DEVINST_PTR(sc, port)) p->portsc |= XHCI_PS_CCS; p->portsc |= (value & ~(XHCI_PS_OCA | XHCI_PS_PR | XHCI_PS_PED | XHCI_PS_PLS_MASK | /* link state */ XHCI_PS_SPEED_MASK | XHCI_PS_PIC_MASK | /* port indicator */ XHCI_PS_LWS | XHCI_PS_DR | XHCI_PS_WPR)); /* clear control bits */ p->portsc &= ~(value & (XHCI_PS_CSC | XHCI_PS_PEC | XHCI_PS_WRC | XHCI_PS_OCC | XHCI_PS_PRC | XHCI_PS_PLC | XHCI_PS_CEC | XHCI_PS_CAS)); /* port disable request; for USB3, don't care */ if (value & XHCI_PS_PED) - DPRINTF(("Disable port %d request\r\n", port)); + DPRINTF(("Disable port %d request", port)); if (!(value & XHCI_PS_LWS)) break; - DPRINTF(("Port new PLS: %d\r\n", newpls)); + DPRINTF(("Port new PLS: %d", newpls)); switch (newpls) { case 0: /* U0 */ case 3: /* U3 */ if (oldpls != newpls) { p->portsc &= ~XHCI_PS_PLS_MASK; p->portsc |= XHCI_PS_PLS_SET(newpls) | XHCI_PS_PLC; if (oldpls != 0 && newpls == 0) { pci_xhci_set_evtrb(&evtrb, port, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); pci_xhci_insert_event(sc, &evtrb, 1); } } break; default: - DPRINTF(("Unhandled change port %d PLS %u\r\n", + DPRINTF(("Unhandled change port %d PLS %u", port, newpls)); break; } break; case 4: /* Port power management status and control register */ p->portpmsc = value; break; case 8: /* Port link information register */ - DPRINTF(("pci_xhci attempted write to PORTLI, port %d\r\n", + DPRINTF(("pci_xhci attempted write to PORTLI, port %d", port)); break; case 12: /* * Port hardware LPM control register. * For USB3, this register is reserved. */ p->porthlpmc = value; break; } } struct xhci_dev_ctx * pci_xhci_get_dev_ctx(struct pci_xhci_softc *sc, uint32_t slot) { uint64_t devctx_addr; struct xhci_dev_ctx *devctx; assert(slot > 0 && slot <= sc->ndevices); assert(sc->opregs.dcbaa_p != NULL); devctx_addr = sc->opregs.dcbaa_p->dcba[slot]; if (devctx_addr == 0) { - DPRINTF(("get_dev_ctx devctx_addr == 0\r\n")); + DPRINTF(("get_dev_ctx devctx_addr == 0")); return (NULL); } - DPRINTF(("pci_xhci: get dev ctx, slot %u devctx addr %016lx\r\n", + DPRINTF(("pci_xhci: get dev ctx, slot %u devctx addr %016lx", slot, devctx_addr)); devctx = XHCI_GADDR(sc, devctx_addr & ~0x3FUL); return (devctx); } struct xhci_trb * pci_xhci_trb_next(struct pci_xhci_softc *sc, struct xhci_trb *curtrb, uint64_t *guestaddr) { struct xhci_trb *next; assert(curtrb != NULL); if (XHCI_TRB_3_TYPE_GET(curtrb->dwTrb3) == XHCI_TRB_TYPE_LINK) { if (guestaddr) *guestaddr = curtrb->qwTrb0 & ~0xFUL; next = XHCI_GADDR(sc, curtrb->qwTrb0 & ~0xFUL); } else { if (guestaddr) *guestaddr += sizeof(struct xhci_trb) & ~0xFUL; next = curtrb + 1; } return (next); } static void pci_xhci_assert_interrupt(struct pci_xhci_softc *sc) { sc->rtsregs.intrreg.erdp |= XHCI_ERDP_LO_BUSY; sc->rtsregs.intrreg.iman |= XHCI_IMAN_INTR_PEND; sc->opregs.usbsts |= XHCI_STS_EINT; /* only trigger interrupt if permitted */ if ((sc->opregs.usbcmd & XHCI_CMD_INTE) && (sc->rtsregs.intrreg.iman & XHCI_IMAN_INTR_ENA)) { if (pci_msi_enabled(sc->xsc_pi)) pci_generate_msi(sc->xsc_pi, 0); else pci_lintr_assert(sc->xsc_pi); } } static void pci_xhci_deassert_interrupt(struct pci_xhci_softc *sc) { if (!pci_msi_enabled(sc->xsc_pi)) pci_lintr_assert(sc->xsc_pi); } static void pci_xhci_init_ep(struct pci_xhci_dev_emu *dev, int epid) { struct xhci_dev_ctx *dev_ctx; struct pci_xhci_dev_ep *devep; struct xhci_endp_ctx *ep_ctx; uint32_t pstreams; int i; dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; devep = &dev->eps[epid]; pstreams = XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0); if (pstreams > 0) { - DPRINTF(("init_ep %d with pstreams %d\r\n", epid, pstreams)); + DPRINTF(("init_ep %d with pstreams %d", epid, pstreams)); assert(devep->ep_sctx_trbs == NULL); devep->ep_sctx = XHCI_GADDR(dev->xsc, ep_ctx->qwEpCtx2 & XHCI_EPCTX_2_TR_DQ_PTR_MASK); devep->ep_sctx_trbs = calloc(pstreams, sizeof(struct pci_xhci_trb_ring)); for (i = 0; i < pstreams; i++) { devep->ep_sctx_trbs[i].ringaddr = devep->ep_sctx[i].qwSctx0 & XHCI_SCTX_0_TR_DQ_PTR_MASK; devep->ep_sctx_trbs[i].ccs = XHCI_SCTX_0_DCS_GET(devep->ep_sctx[i].qwSctx0); } } else { - DPRINTF(("init_ep %d with no pstreams\r\n", epid)); + DPRINTF(("init_ep %d with no pstreams", epid)); devep->ep_ringaddr = ep_ctx->qwEpCtx2 & XHCI_EPCTX_2_TR_DQ_PTR_MASK; devep->ep_ccs = XHCI_EPCTX_2_DCS_GET(ep_ctx->qwEpCtx2); devep->ep_tr = XHCI_GADDR(dev->xsc, devep->ep_ringaddr); - DPRINTF(("init_ep tr DCS %x\r\n", devep->ep_ccs)); + DPRINTF(("init_ep tr DCS %x", devep->ep_ccs)); } if (devep->ep_xfer == NULL) { devep->ep_xfer = malloc(sizeof(struct usb_data_xfer)); USB_DATA_XFER_INIT(devep->ep_xfer); } } static void pci_xhci_disable_ep(struct pci_xhci_dev_emu *dev, int epid) { struct xhci_dev_ctx *dev_ctx; struct pci_xhci_dev_ep *devep; struct xhci_endp_ctx *ep_ctx; - DPRINTF(("pci_xhci disable_ep %d\r\n", epid)); + DPRINTF(("pci_xhci disable_ep %d", epid)); dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_DISABLED; devep = &dev->eps[epid]; if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) > 0 && devep->ep_sctx_trbs != NULL) free(devep->ep_sctx_trbs); if (devep->ep_xfer != NULL) { free(devep->ep_xfer); devep->ep_xfer = NULL; } memset(devep, 0, sizeof(struct pci_xhci_dev_ep)); } /* reset device at slot and data structures related to it */ static void pci_xhci_reset_slot(struct pci_xhci_softc *sc, int slot) { struct pci_xhci_dev_emu *dev; dev = XHCI_SLOTDEV_PTR(sc, slot); if (!dev) { - DPRINTF(("xhci reset unassigned slot (%d)?\r\n", slot)); + DPRINTF(("xhci reset unassigned slot (%d)?", slot)); } else { dev->dev_slotstate = XHCI_ST_DISABLED; } /* TODO: reset ring buffer pointers */ } static int pci_xhci_insert_event(struct pci_xhci_softc *sc, struct xhci_trb *evtrb, int do_intr) { struct pci_xhci_rtsregs *rts; uint64_t erdp; int erdp_idx; int err; struct xhci_trb *evtrbptr; err = XHCI_TRB_ERROR_SUCCESS; rts = &sc->rtsregs; erdp = rts->intrreg.erdp & ~0xF; erdp_idx = (erdp - rts->erstba_p[rts->er_deq_seg].qwEvrsTablePtr) / sizeof(struct xhci_trb); - DPRINTF(("pci_xhci: insert event 0[%lx] 2[%x] 3[%x]\r\n" - "\terdp idx %d/seg %d, enq idx %d/seg %d, pcs %u\r\n" - "\t(erdp=0x%lx, erst=0x%lx, tblsz=%u, do_intr %d)\r\n", - evtrb->qwTrb0, evtrb->dwTrb2, evtrb->dwTrb3, + DPRINTF(("pci_xhci: insert event 0[%lx] 2[%x] 3[%x]", + evtrb->qwTrb0, evtrb->dwTrb2, evtrb->dwTrb3)); + DPRINTF(("\terdp idx %d/seg %d, enq idx %d/seg %d, pcs %u", erdp_idx, rts->er_deq_seg, rts->er_enq_idx, - rts->er_enq_seg, - rts->event_pcs, erdp, rts->erstba_p->qwEvrsTablePtr, + rts->er_enq_seg, rts->event_pcs)); + DPRINTF(("\t(erdp=0x%lx, erst=0x%lx, tblsz=%u, do_intr %d)", + erdp, rts->erstba_p->qwEvrsTablePtr, rts->erstba_p->dwEvrsTableSize, do_intr)); evtrbptr = &rts->erst_p[rts->er_enq_idx]; /* TODO: multi-segment table */ if (rts->er_events_cnt >= rts->erstba_p->dwEvrsTableSize) { - DPRINTF(("pci_xhci[%d] cannot insert event; ring full\r\n", + DPRINTF(("pci_xhci[%d] cannot insert event; ring full", __LINE__)); err = XHCI_TRB_ERROR_EV_RING_FULL; goto done; } if (rts->er_events_cnt == rts->erstba_p->dwEvrsTableSize - 1) { struct xhci_trb errev; if ((evtrbptr->dwTrb3 & 0x1) == (rts->event_pcs & 0x1)) { - DPRINTF(("pci_xhci[%d] insert evt err: ring full\r\n", + DPRINTF(("pci_xhci[%d] insert evt err: ring full", __LINE__)); errev.qwTrb0 = 0; errev.dwTrb2 = XHCI_TRB_2_ERROR_SET( XHCI_TRB_ERROR_EV_RING_FULL); errev.dwTrb3 = XHCI_TRB_3_TYPE_SET( XHCI_TRB_EVENT_HOST_CTRL) | rts->event_pcs; rts->er_events_cnt++; memcpy(&rts->erst_p[rts->er_enq_idx], &errev, sizeof(struct xhci_trb)); rts->er_enq_idx = (rts->er_enq_idx + 1) % rts->erstba_p->dwEvrsTableSize; err = XHCI_TRB_ERROR_EV_RING_FULL; do_intr = 1; goto done; } } else { rts->er_events_cnt++; } evtrb->dwTrb3 &= ~XHCI_TRB_3_CYCLE_BIT; evtrb->dwTrb3 |= rts->event_pcs; memcpy(&rts->erst_p[rts->er_enq_idx], evtrb, sizeof(struct xhci_trb)); rts->er_enq_idx = (rts->er_enq_idx + 1) % rts->erstba_p->dwEvrsTableSize; if (rts->er_enq_idx == 0) rts->event_pcs ^= 1; done: if (do_intr) pci_xhci_assert_interrupt(sc); return (err); } static uint32_t pci_xhci_cmd_enable_slot(struct pci_xhci_softc *sc, uint32_t *slot) { struct pci_xhci_dev_emu *dev; uint32_t cmderr; int i; cmderr = XHCI_TRB_ERROR_NO_SLOTS; if (sc->portregs != NULL) for (i = 1; i <= XHCI_MAX_SLOTS; i++) { dev = XHCI_SLOTDEV_PTR(sc, i); if (dev && dev->dev_slotstate == XHCI_ST_DISABLED) { *slot = i; dev->dev_slotstate = XHCI_ST_ENABLED; cmderr = XHCI_TRB_ERROR_SUCCESS; dev->hci.hci_address = i; break; } } - DPRINTF(("pci_xhci enable slot (error=%d) slot %u\r\n", + DPRINTF(("pci_xhci enable slot (error=%d) slot %u", cmderr != XHCI_TRB_ERROR_SUCCESS, *slot)); return (cmderr); } static uint32_t pci_xhci_cmd_disable_slot(struct pci_xhci_softc *sc, uint32_t slot) { struct pci_xhci_dev_emu *dev; uint32_t cmderr; - DPRINTF(("pci_xhci disable slot %u\r\n", slot)); + DPRINTF(("pci_xhci disable slot %u", slot)); cmderr = XHCI_TRB_ERROR_NO_SLOTS; if (sc->portregs == NULL) goto done; if (slot > sc->ndevices) { cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; goto done; } dev = XHCI_SLOTDEV_PTR(sc, slot); if (dev) { if (dev->dev_slotstate == XHCI_ST_DISABLED) { cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; } else { dev->dev_slotstate = XHCI_ST_DISABLED; cmderr = XHCI_TRB_ERROR_SUCCESS; /* TODO: reset events and endpoints */ } } done: return (cmderr); } static uint32_t pci_xhci_cmd_reset_device(struct pci_xhci_softc *sc, uint32_t slot) { struct pci_xhci_dev_emu *dev; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; uint32_t cmderr; int i; cmderr = XHCI_TRB_ERROR_NO_SLOTS; if (sc->portregs == NULL) goto done; - DPRINTF(("pci_xhci reset device slot %u\r\n", slot)); + DPRINTF(("pci_xhci reset device slot %u", slot)); dev = XHCI_SLOTDEV_PTR(sc, slot); if (!dev || dev->dev_slotstate == XHCI_ST_DISABLED) cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; else { dev->dev_slotstate = XHCI_ST_DEFAULT; dev->hci.hci_address = 0; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); /* slot state */ dev_ctx->ctx_slot.dwSctx3 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx3, XHCI_ST_SLCTX_DEFAULT, 0x1F, 27); /* number of contexts */ dev_ctx->ctx_slot.dwSctx0 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx0, 1, 0x1F, 27); /* reset all eps other than ep-0 */ for (i = 2; i <= 31; i++) { ep_ctx = &dev_ctx->ctx_ep[i]; ep_ctx->dwEpCtx0 = FIELD_REPLACE( ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_DISABLED, 0x7, 0); } cmderr = XHCI_TRB_ERROR_SUCCESS; } pci_xhci_reset_slot(sc, slot); done: return (cmderr); } static uint32_t pci_xhci_cmd_address_device(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct pci_xhci_dev_emu *dev; struct xhci_input_dev_ctx *input_ctx; struct xhci_slot_ctx *islot_ctx; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep0_ctx; uint32_t cmderr; input_ctx = XHCI_GADDR(sc, trb->qwTrb0 & ~0xFUL); islot_ctx = &input_ctx->ctx_slot; ep0_ctx = &input_ctx->ctx_ep[1]; cmderr = XHCI_TRB_ERROR_SUCCESS; - DPRINTF(("pci_xhci: address device, input ctl: D 0x%08x A 0x%08x,\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", - input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1, + DPRINTF(("pci_xhci: address device, input ctl: D 0x%08x A 0x%08x,", + input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1)); + DPRINTF((" slot %08x %08x %08x %08x", islot_ctx->dwSctx0, islot_ctx->dwSctx1, - islot_ctx->dwSctx2, islot_ctx->dwSctx3, + islot_ctx->dwSctx2, islot_ctx->dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); /* when setting address: drop-ctx=0, add-ctx=slot+ep0 */ if ((input_ctx->ctx_input.dwInCtx0 != 0) || (input_ctx->ctx_input.dwInCtx1 & 0x03) != 0x03) { - DPRINTF(("pci_xhci: address device, input ctl invalid\r\n")); + DPRINTF(("pci_xhci: address device, input ctl invalid")); cmderr = XHCI_TRB_ERROR_TRB; goto done; } /* assign address to slot */ dev_ctx = pci_xhci_get_dev_ctx(sc, slot); - DPRINTF(("pci_xhci: address device, dev ctx\r\n" - " slot %08x %08x %08x %08x\r\n", + DPRINTF(("pci_xhci: address device, dev ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); dev->hci.hci_address = slot; dev->dev_ctx = dev_ctx; if (dev->dev_ue->ue_reset == NULL || dev->dev_ue->ue_reset(dev->dev_sc) < 0) { cmderr = XHCI_TRB_ERROR_ENDP_NOT_ON; goto done; } memcpy(&dev_ctx->ctx_slot, islot_ctx, sizeof(struct xhci_slot_ctx)); dev_ctx->ctx_slot.dwSctx3 = XHCI_SCTX_3_SLOT_STATE_SET(XHCI_ST_SLCTX_ADDRESSED) | XHCI_SCTX_3_DEV_ADDR_SET(slot); memcpy(&dev_ctx->ctx_ep[1], ep0_ctx, sizeof(struct xhci_endp_ctx)); ep0_ctx = &dev_ctx->ctx_ep[1]; ep0_ctx->dwEpCtx0 = (ep0_ctx->dwEpCtx0 & ~0x7) | XHCI_EPCTX_0_EPSTATE_SET(XHCI_ST_EPCTX_RUNNING); pci_xhci_init_ep(dev, 1); dev->dev_slotstate = XHCI_ST_ADDRESSED; - DPRINTF(("pci_xhci: address device, output ctx\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: address device, output ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, - dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3, + dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); done: return (cmderr); } static uint32_t pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct xhci_input_dev_ctx *input_ctx; struct pci_xhci_dev_emu *dev; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx, *iep_ctx; uint32_t cmderr; int i; cmderr = XHCI_TRB_ERROR_SUCCESS; - DPRINTF(("pci_xhci config_ep slot %u\r\n", slot)); + DPRINTF(("pci_xhci config_ep slot %u", slot)); dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); if ((trb->dwTrb3 & XHCI_TRB_3_DCEP_BIT) != 0) { - DPRINTF(("pci_xhci config_ep - deconfigure ep slot %u\r\n", + DPRINTF(("pci_xhci config_ep - deconfigure ep slot %u", slot)); if (dev->dev_ue->ue_stop != NULL) dev->dev_ue->ue_stop(dev->dev_sc); dev->dev_slotstate = XHCI_ST_ADDRESSED; dev->hci.hci_address = 0; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); /* number of contexts */ dev_ctx->ctx_slot.dwSctx0 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx0, 1, 0x1F, 27); /* slot state */ dev_ctx->ctx_slot.dwSctx3 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx3, XHCI_ST_SLCTX_ADDRESSED, 0x1F, 27); /* disable endpoints */ for (i = 2; i < 32; i++) pci_xhci_disable_ep(dev, i); cmderr = XHCI_TRB_ERROR_SUCCESS; goto done; } if (dev->dev_slotstate < XHCI_ST_ADDRESSED) { - DPRINTF(("pci_xhci: config_ep slotstate x%x != addressed\r\n", + DPRINTF(("pci_xhci: config_ep slotstate x%x != addressed", dev->dev_slotstate)); cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; goto done; } /* In addressed/configured state; * for each drop endpoint ctx flag: * ep->state = DISABLED * for each add endpoint ctx flag: * cp(ep-in, ep-out) * ep->state = RUNNING * for each drop+add endpoint flag: * reset ep resources * cp(ep-in, ep-out) * ep->state = RUNNING * if input->DisabledCtx[2-31] < 30: (at least 1 ep not disabled) * slot->state = configured */ input_ctx = XHCI_GADDR(sc, trb->qwTrb0 & ~0xFUL); dev_ctx = dev->dev_ctx; - DPRINTF(("pci_xhci: config_ep inputctx: D:x%08x A:x%08x 7:x%08x\r\n", + DPRINTF(("pci_xhci: config_ep inputctx: D:x%08x A:x%08x 7:x%08x", input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1, input_ctx->ctx_input.dwInCtx7)); for (i = 2; i <= 31; i++) { ep_ctx = &dev_ctx->ctx_ep[i]; if (input_ctx->ctx_input.dwInCtx0 & XHCI_INCTX_0_DROP_MASK(i)) { - DPRINTF((" config ep - dropping ep %d\r\n", i)); + DPRINTF((" config ep - dropping ep %d", i)); pci_xhci_disable_ep(dev, i); } if (input_ctx->ctx_input.dwInCtx1 & XHCI_INCTX_1_ADD_MASK(i)) { iep_ctx = &input_ctx->ctx_ep[i]; - DPRINTF((" enable ep[%d] %08x %08x %016lx %08x\r\n", + DPRINTF((" enable ep[%d] %08x %08x %016lx %08x", i, iep_ctx->dwEpCtx0, iep_ctx->dwEpCtx1, iep_ctx->qwEpCtx2, iep_ctx->dwEpCtx4)); memcpy(ep_ctx, iep_ctx, sizeof(struct xhci_endp_ctx)); pci_xhci_init_ep(dev, i); /* ep state */ ep_ctx->dwEpCtx0 = FIELD_REPLACE( ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_RUNNING, 0x7, 0); } } /* slot state to configured */ dev_ctx->ctx_slot.dwSctx3 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx3, XHCI_ST_SLCTX_CONFIGURED, 0x1F, 27); dev_ctx->ctx_slot.dwSctx0 = FIELD_COPY( dev_ctx->ctx_slot.dwSctx0, input_ctx->ctx_slot.dwSctx0, 0x1F, 27); dev->dev_slotstate = XHCI_ST_CONFIGURED; DPRINTF(("EP configured; slot %u [0]=0x%08x [1]=0x%08x [2]=0x%08x " - "[3]=0x%08x\r\n", + "[3]=0x%08x", slot, dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); done: return (cmderr); } static uint32_t pci_xhci_cmd_reset_ep(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; uint32_t cmderr, epid; uint32_t type; epid = XHCI_TRB_3_EP_GET(trb->dwTrb3); - DPRINTF(("pci_xhci: reset ep %u: slot %u\r\n", epid, slot)); + DPRINTF(("pci_xhci: reset ep %u: slot %u", epid, slot)); cmderr = XHCI_TRB_ERROR_SUCCESS; type = XHCI_TRB_3_TYPE_GET(trb->dwTrb3); dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); if (type == XHCI_TRB_TYPE_STOP_EP && (trb->dwTrb3 & XHCI_TRB_3_SUSP_EP_BIT) != 0) { /* XXX suspend endpoint for 10ms */ } if (epid < 1 || epid > 31) { - DPRINTF(("pci_xhci: reset ep: invalid epid %u\r\n", epid)); + DPRINTF(("pci_xhci: reset ep: invalid epid %u", epid)); cmderr = XHCI_TRB_ERROR_TRB; goto done; } devep = &dev->eps[epid]; if (devep->ep_xfer != NULL) USB_DATA_XFER_RESET(devep->ep_xfer); dev_ctx = dev->dev_ctx; assert(dev_ctx != NULL); ep_ctx = &dev_ctx->ctx_ep[epid]; ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_STOPPED; if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) == 0) ep_ctx->qwEpCtx2 = devep->ep_ringaddr | devep->ep_ccs; - DPRINTF(("pci_xhci: reset ep[%u] %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: reset ep[%u] %08x %08x %016lx %08x", epid, ep_ctx->dwEpCtx0, ep_ctx->dwEpCtx1, ep_ctx->qwEpCtx2, ep_ctx->dwEpCtx4)); if (type == XHCI_TRB_TYPE_RESET_EP && (dev->dev_ue->ue_reset == NULL || dev->dev_ue->ue_reset(dev->dev_sc) < 0)) { cmderr = XHCI_TRB_ERROR_ENDP_NOT_ON; goto done; } done: return (cmderr); } static uint32_t pci_xhci_find_stream(struct pci_xhci_softc *sc, struct xhci_endp_ctx *ep, uint32_t streamid, struct xhci_stream_ctx **osctx) { struct xhci_stream_ctx *sctx; uint32_t maxpstreams; maxpstreams = XHCI_EPCTX_0_MAXP_STREAMS_GET(ep->dwEpCtx0); if (maxpstreams == 0) return (XHCI_TRB_ERROR_TRB); if (maxpstreams > XHCI_STREAMS_MAX) return (XHCI_TRB_ERROR_INVALID_SID); if (XHCI_EPCTX_0_LSA_GET(ep->dwEpCtx0) == 0) { - DPRINTF(("pci_xhci: find_stream; LSA bit not set\r\n")); + DPRINTF(("pci_xhci: find_stream; LSA bit not set")); return (XHCI_TRB_ERROR_INVALID_SID); } /* only support primary stream */ if (streamid > maxpstreams) return (XHCI_TRB_ERROR_STREAM_TYPE); sctx = XHCI_GADDR(sc, ep->qwEpCtx2 & ~0xFUL) + streamid; if (!XHCI_SCTX_0_SCT_GET(sctx->qwSctx0)) return (XHCI_TRB_ERROR_STREAM_TYPE); *osctx = sctx; return (XHCI_TRB_ERROR_SUCCESS); } static uint32_t pci_xhci_cmd_set_tr(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; uint32_t cmderr, epid; uint32_t streamid; cmderr = XHCI_TRB_ERROR_SUCCESS; dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); - DPRINTF(("pci_xhci set_tr: new-tr x%016lx, SCT %u DCS %u\r\n" - " stream-id %u, slot %u, epid %u, C %u\r\n", + DPRINTF(("pci_xhci set_tr: new-tr x%016lx, SCT %u DCS %u", (trb->qwTrb0 & ~0xF), (uint32_t)((trb->qwTrb0 >> 1) & 0x7), - (uint32_t)(trb->qwTrb0 & 0x1), (trb->dwTrb2 >> 16) & 0xFFFF, + (uint32_t)(trb->qwTrb0 & 0x1))); + DPRINTF((" stream-id %u, slot %u, epid %u, C %u", + (trb->dwTrb2 >> 16) & 0xFFFF, XHCI_TRB_3_SLOT_GET(trb->dwTrb3), XHCI_TRB_3_EP_GET(trb->dwTrb3), trb->dwTrb3 & 0x1)); epid = XHCI_TRB_3_EP_GET(trb->dwTrb3); if (epid < 1 || epid > 31) { - DPRINTF(("pci_xhci: set_tr_deq: invalid epid %u\r\n", epid)); + DPRINTF(("pci_xhci: set_tr_deq: invalid epid %u", epid)); cmderr = XHCI_TRB_ERROR_TRB; goto done; } dev_ctx = dev->dev_ctx; assert(dev_ctx != NULL); ep_ctx = &dev_ctx->ctx_ep[epid]; devep = &dev->eps[epid]; switch (XHCI_EPCTX_0_EPSTATE_GET(ep_ctx->dwEpCtx0)) { case XHCI_ST_EPCTX_STOPPED: case XHCI_ST_EPCTX_ERROR: break; default: - DPRINTF(("pci_xhci cmd set_tr invalid state %x\r\n", + DPRINTF(("pci_xhci cmd set_tr invalid state %x", XHCI_EPCTX_0_EPSTATE_GET(ep_ctx->dwEpCtx0))); cmderr = XHCI_TRB_ERROR_CONTEXT_STATE; goto done; } streamid = XHCI_TRB_2_STREAM_GET(trb->dwTrb2); if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) > 0) { struct xhci_stream_ctx *sctx; sctx = NULL; cmderr = pci_xhci_find_stream(sc, ep_ctx, streamid, &sctx); if (sctx != NULL) { assert(devep->ep_sctx != NULL); devep->ep_sctx[streamid].qwSctx0 = trb->qwTrb0; devep->ep_sctx_trbs[streamid].ringaddr = trb->qwTrb0 & ~0xF; devep->ep_sctx_trbs[streamid].ccs = XHCI_EPCTX_2_DCS_GET(trb->qwTrb0); } } else { if (streamid != 0) { - DPRINTF(("pci_xhci cmd set_tr streamid %x != 0\r\n", + DPRINTF(("pci_xhci cmd set_tr streamid %x != 0", streamid)); } ep_ctx->qwEpCtx2 = trb->qwTrb0 & ~0xFUL; devep->ep_ringaddr = ep_ctx->qwEpCtx2 & ~0xFUL; devep->ep_ccs = trb->qwTrb0 & 0x1; devep->ep_tr = XHCI_GADDR(sc, devep->ep_ringaddr); - DPRINTF(("pci_xhci set_tr first TRB:\r\n")); + DPRINTF(("pci_xhci set_tr first TRB:")); pci_xhci_dump_trb(devep->ep_tr); } ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_STOPPED; done: return (cmderr); } static uint32_t pci_xhci_cmd_eval_ctx(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct xhci_input_dev_ctx *input_ctx; struct xhci_slot_ctx *islot_ctx; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep0_ctx; uint32_t cmderr; input_ctx = XHCI_GADDR(sc, trb->qwTrb0 & ~0xFUL); islot_ctx = &input_ctx->ctx_slot; ep0_ctx = &input_ctx->ctx_ep[1]; cmderr = XHCI_TRB_ERROR_SUCCESS; - DPRINTF(("pci_xhci: eval ctx, input ctl: D 0x%08x A 0x%08x,\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", - input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1, + DPRINTF(("pci_xhci: eval ctx, input ctl: D 0x%08x A 0x%08x,", + input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1)); + DPRINTF((" slot %08x %08x %08x %08x", islot_ctx->dwSctx0, islot_ctx->dwSctx1, - islot_ctx->dwSctx2, islot_ctx->dwSctx3, + islot_ctx->dwSctx2, islot_ctx->dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); /* this command expects drop-ctx=0 & add-ctx=slot+ep0 */ if ((input_ctx->ctx_input.dwInCtx0 != 0) || (input_ctx->ctx_input.dwInCtx1 & 0x03) == 0) { - DPRINTF(("pci_xhci: eval ctx, input ctl invalid\r\n")); + DPRINTF(("pci_xhci: eval ctx, input ctl invalid")); cmderr = XHCI_TRB_ERROR_TRB; goto done; } /* assign address to slot; in this emulation, slot_id = address */ dev_ctx = pci_xhci_get_dev_ctx(sc, slot); - DPRINTF(("pci_xhci: eval ctx, dev ctx\r\n" - " slot %08x %08x %08x %08x\r\n", + DPRINTF(("pci_xhci: eval ctx, dev ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); if (input_ctx->ctx_input.dwInCtx1 & 0x01) { /* slot ctx */ /* set max exit latency */ dev_ctx->ctx_slot.dwSctx1 = FIELD_COPY( dev_ctx->ctx_slot.dwSctx1, input_ctx->ctx_slot.dwSctx1, 0xFFFF, 0); /* set interrupter target */ dev_ctx->ctx_slot.dwSctx2 = FIELD_COPY( dev_ctx->ctx_slot.dwSctx2, input_ctx->ctx_slot.dwSctx2, 0x3FF, 22); } if (input_ctx->ctx_input.dwInCtx1 & 0x02) { /* control ctx */ /* set max packet size */ dev_ctx->ctx_ep[1].dwEpCtx1 = FIELD_COPY( dev_ctx->ctx_ep[1].dwEpCtx1, ep0_ctx->dwEpCtx1, 0xFFFF, 16); ep0_ctx = &dev_ctx->ctx_ep[1]; } - DPRINTF(("pci_xhci: eval ctx, output ctx\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: eval ctx, output ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, - dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3, + dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); done: return (cmderr); } static int pci_xhci_complete_commands(struct pci_xhci_softc *sc) { struct xhci_trb evtrb; struct xhci_trb *trb; uint64_t crcr; uint32_t ccs; /* cycle state (XHCI 4.9.2) */ uint32_t type; uint32_t slot; uint32_t cmderr; int error; error = 0; sc->opregs.crcr |= XHCI_CRCR_LO_CRR; trb = sc->opregs.cr_p; ccs = sc->opregs.crcr & XHCI_CRCR_LO_RCS; crcr = sc->opregs.crcr & ~0xF; while (1) { sc->opregs.cr_p = trb; type = XHCI_TRB_3_TYPE_GET(trb->dwTrb3); if ((trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT) != (ccs & XHCI_TRB_3_CYCLE_BIT)) break; DPRINTF(("pci_xhci: cmd type 0x%x, Trb0 x%016lx dwTrb2 x%08x" - " dwTrb3 x%08x, TRB_CYCLE %u/ccs %u\r\n", + " dwTrb3 x%08x, TRB_CYCLE %u/ccs %u", type, trb->qwTrb0, trb->dwTrb2, trb->dwTrb3, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT, ccs)); cmderr = XHCI_TRB_ERROR_SUCCESS; evtrb.dwTrb2 = 0; evtrb.dwTrb3 = (ccs & XHCI_TRB_3_CYCLE_BIT) | XHCI_TRB_3_TYPE_SET(XHCI_TRB_EVENT_CMD_COMPLETE); slot = 0; switch (type) { case XHCI_TRB_TYPE_LINK: /* 0x06 */ if (trb->dwTrb3 & XHCI_TRB_3_TC_BIT) ccs ^= XHCI_CRCR_LO_RCS; break; case XHCI_TRB_TYPE_ENABLE_SLOT: /* 0x09 */ cmderr = pci_xhci_cmd_enable_slot(sc, &slot); break; case XHCI_TRB_TYPE_DISABLE_SLOT: /* 0x0A */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_disable_slot(sc, slot); break; case XHCI_TRB_TYPE_ADDRESS_DEVICE: /* 0x0B */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_address_device(sc, slot, trb); break; case XHCI_TRB_TYPE_CONFIGURE_EP: /* 0x0C */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_config_ep(sc, slot, trb); break; case XHCI_TRB_TYPE_EVALUATE_CTX: /* 0x0D */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_eval_ctx(sc, slot, trb); break; case XHCI_TRB_TYPE_RESET_EP: /* 0x0E */ - DPRINTF(("Reset Endpoint on slot %d\r\n", slot)); + DPRINTF(("Reset Endpoint on slot %d", slot)); slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_ep(sc, slot, trb); break; case XHCI_TRB_TYPE_STOP_EP: /* 0x0F */ - DPRINTF(("Stop Endpoint on slot %d\r\n", slot)); + DPRINTF(("Stop Endpoint on slot %d", slot)); slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_ep(sc, slot, trb); break; case XHCI_TRB_TYPE_SET_TR_DEQUEUE: /* 0x10 */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_set_tr(sc, slot, trb); break; case XHCI_TRB_TYPE_RESET_DEVICE: /* 0x11 */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_device(sc, slot); break; case XHCI_TRB_TYPE_FORCE_EVENT: /* 0x12 */ /* TODO: */ break; case XHCI_TRB_TYPE_NEGOTIATE_BW: /* 0x13 */ break; case XHCI_TRB_TYPE_SET_LATENCY_TOL: /* 0x14 */ break; case XHCI_TRB_TYPE_GET_PORT_BW: /* 0x15 */ break; case XHCI_TRB_TYPE_FORCE_HEADER: /* 0x16 */ break; case XHCI_TRB_TYPE_NOOP_CMD: /* 0x17 */ break; default: - DPRINTF(("pci_xhci: unsupported cmd %x\r\n", type)); + DPRINTF(("pci_xhci: unsupported cmd %x", type)); break; } if (type != XHCI_TRB_TYPE_LINK) { /* * insert command completion event and assert intr */ evtrb.qwTrb0 = crcr; evtrb.dwTrb2 |= XHCI_TRB_2_ERROR_SET(cmderr); evtrb.dwTrb3 |= XHCI_TRB_3_SLOT_SET(slot); - DPRINTF(("pci_xhci: command 0x%x result: 0x%x\r\n", + DPRINTF(("pci_xhci: command 0x%x result: 0x%x", type, cmderr)); pci_xhci_insert_event(sc, &evtrb, 1); } trb = pci_xhci_trb_next(sc, trb, &crcr); } sc->opregs.crcr = crcr | (sc->opregs.crcr & XHCI_CRCR_LO_CA) | ccs; sc->opregs.crcr &= ~XHCI_CRCR_LO_CRR; return (error); } static void pci_xhci_dump_trb(struct xhci_trb *trb) { static const char *trbtypes[] = { "RESERVED", "NORMAL", "SETUP_STAGE", "DATA_STAGE", "STATUS_STAGE", "ISOCH", "LINK", "EVENT_DATA", "NOOP", "ENABLE_SLOT", "DISABLE_SLOT", "ADDRESS_DEVICE", "CONFIGURE_EP", "EVALUATE_CTX", "RESET_EP", "STOP_EP", "SET_TR_DEQUEUE", "RESET_DEVICE", "FORCE_EVENT", "NEGOTIATE_BW", "SET_LATENCY_TOL", "GET_PORT_BW", "FORCE_HEADER", "NOOP_CMD" }; uint32_t type; type = XHCI_TRB_3_TYPE_GET(trb->dwTrb3); - DPRINTF(("pci_xhci: trb[@%p] type x%02x %s 0:x%016lx 2:x%08x 3:x%08x\r\n", + DPRINTF(("pci_xhci: trb[@%p] type x%02x %s 0:x%016lx 2:x%08x 3:x%08x", trb, type, type <= XHCI_TRB_TYPE_NOOP_CMD ? trbtypes[type] : "INVALID", trb->qwTrb0, trb->dwTrb2, trb->dwTrb3)); } static int pci_xhci_xfer_complete(struct pci_xhci_softc *sc, struct usb_data_xfer *xfer, uint32_t slot, uint32_t epid, int *do_intr) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; struct xhci_trb *trb; struct xhci_trb evtrb; uint32_t trbflags; uint32_t edtla; int i, err; dev = XHCI_SLOTDEV_PTR(sc, slot); devep = &dev->eps[epid]; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); assert(dev_ctx != NULL); ep_ctx = &dev_ctx->ctx_ep[epid]; err = XHCI_TRB_ERROR_SUCCESS; *do_intr = 0; edtla = 0; /* go through list of TRBs and insert event(s) */ for (i = xfer->head; xfer->ndata > 0; ) { evtrb.qwTrb0 = (uint64_t)xfer->data[i].hci_data; trb = XHCI_GADDR(sc, evtrb.qwTrb0); trbflags = trb->dwTrb3; DPRINTF(("pci_xhci: xfer[%d] done?%u:%d trb %x %016lx %x " - "(err %d) IOC?%d\r\n", + "(err %d) IOC?%d", i, xfer->data[i].processed, xfer->data[i].blen, XHCI_TRB_3_TYPE_GET(trbflags), evtrb.qwTrb0, trbflags, err, trb->dwTrb3 & XHCI_TRB_3_IOC_BIT ? 1 : 0)); if (!xfer->data[i].processed) { xfer->head = i; break; } xfer->ndata--; edtla += xfer->data[i].bdone; trb->dwTrb3 = (trb->dwTrb3 & ~0x1) | (xfer->data[i].ccs); pci_xhci_update_ep_ring(sc, dev, devep, ep_ctx, xfer->data[i].streamid, xfer->data[i].trbnext, xfer->data[i].ccs); /* Only interrupt if IOC or short packet */ if (!(trb->dwTrb3 & XHCI_TRB_3_IOC_BIT) && !((err == XHCI_TRB_ERROR_SHORT_PKT) && (trb->dwTrb3 & XHCI_TRB_3_ISP_BIT))) { i = (i + 1) % USB_MAX_XFER_BLOCKS; continue; } evtrb.dwTrb2 = XHCI_TRB_2_ERROR_SET(err) | XHCI_TRB_2_REM_SET(xfer->data[i].blen); evtrb.dwTrb3 = XHCI_TRB_3_TYPE_SET(XHCI_TRB_EVENT_TRANSFER) | XHCI_TRB_3_SLOT_SET(slot) | XHCI_TRB_3_EP_SET(epid); if (XHCI_TRB_3_TYPE_GET(trbflags) == XHCI_TRB_TYPE_EVENT_DATA) { - DPRINTF(("pci_xhci EVENT_DATA edtla %u\r\n", edtla)); + DPRINTF(("pci_xhci EVENT_DATA edtla %u", edtla)); evtrb.qwTrb0 = trb->qwTrb0; evtrb.dwTrb2 = (edtla & 0xFFFFF) | XHCI_TRB_2_ERROR_SET(err); evtrb.dwTrb3 |= XHCI_TRB_3_ED_BIT; edtla = 0; } *do_intr = 1; err = pci_xhci_insert_event(sc, &evtrb, 0); if (err != XHCI_TRB_ERROR_SUCCESS) { break; } i = (i + 1) % USB_MAX_XFER_BLOCKS; } return (err); } static void pci_xhci_update_ep_ring(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, uint32_t streamid, uint64_t ringaddr, int ccs) { if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) != 0) { devep->ep_sctx[streamid].qwSctx0 = (ringaddr & ~0xFUL) | (ccs & 0x1); devep->ep_sctx_trbs[streamid].ringaddr = ringaddr & ~0xFUL; devep->ep_sctx_trbs[streamid].ccs = ccs & 0x1; ep_ctx->qwEpCtx2 = (ep_ctx->qwEpCtx2 & ~0x1) | (ccs & 0x1); - DPRINTF(("xhci update ep-ring stream %d, addr %lx\r\n", + DPRINTF(("xhci update ep-ring stream %d, addr %lx", streamid, devep->ep_sctx[streamid].qwSctx0)); } else { devep->ep_ringaddr = ringaddr & ~0xFUL; devep->ep_ccs = ccs & 0x1; devep->ep_tr = XHCI_GADDR(sc, ringaddr & ~0xFUL); ep_ctx->qwEpCtx2 = (ringaddr & ~0xFUL) | (ccs & 0x1); - DPRINTF(("xhci update ep-ring, addr %lx\r\n", + DPRINTF(("xhci update ep-ring, addr %lx", (devep->ep_ringaddr | devep->ep_ccs))); } } /* * Outstanding transfer still in progress (device NAK'd earlier) so retry * the transfer again to see if it succeeds. */ static int pci_xhci_try_usb_xfer(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, uint32_t slot, uint32_t epid) { struct usb_data_xfer *xfer; int err; int do_intr; ep_ctx->dwEpCtx0 = FIELD_REPLACE( ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_RUNNING, 0x7, 0); err = 0; do_intr = 0; xfer = devep->ep_xfer; USB_DATA_XFER_LOCK(xfer); /* outstanding requests queued up */ if (dev->dev_ue->ue_data != NULL) { err = dev->dev_ue->ue_data(dev->dev_sc, xfer, epid & 0x1 ? USB_XFER_IN : USB_XFER_OUT, epid/2); if (err == USB_ERR_CANCELLED) { if (USB_DATA_GET_ERRCODE(&xfer->data[xfer->head]) == USB_NAK) err = XHCI_TRB_ERROR_SUCCESS; } else { err = pci_xhci_xfer_complete(sc, xfer, slot, epid, &do_intr); if (err == XHCI_TRB_ERROR_SUCCESS && do_intr) { pci_xhci_assert_interrupt(sc); } /* XXX should not do it if error? */ USB_DATA_XFER_RESET(xfer); } } USB_DATA_XFER_UNLOCK(xfer); return (err); } static int pci_xhci_handle_transfer(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, struct xhci_trb *trb, uint32_t slot, uint32_t epid, uint64_t addr, uint32_t ccs, uint32_t streamid) { struct xhci_trb *setup_trb; struct usb_data_xfer *xfer; struct usb_data_xfer_block *xfer_block; uint64_t val; uint32_t trbflags; int do_intr, err; int do_retry; ep_ctx->dwEpCtx0 = FIELD_REPLACE(ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_RUNNING, 0x7, 0); xfer = devep->ep_xfer; USB_DATA_XFER_LOCK(xfer); - DPRINTF(("pci_xhci handle_transfer slot %u\r\n", slot)); + DPRINTF(("pci_xhci handle_transfer slot %u", slot)); retry: err = 0; do_retry = 0; do_intr = 0; setup_trb = NULL; while (1) { pci_xhci_dump_trb(trb); trbflags = trb->dwTrb3; if (XHCI_TRB_3_TYPE_GET(trbflags) != XHCI_TRB_TYPE_LINK && (trbflags & XHCI_TRB_3_CYCLE_BIT) != (ccs & XHCI_TRB_3_CYCLE_BIT)) { - DPRINTF(("Cycle-bit changed trbflags %x, ccs %x\r\n", + DPRINTF(("Cycle-bit changed trbflags %x, ccs %x", trbflags & XHCI_TRB_3_CYCLE_BIT, ccs)); break; } xfer_block = NULL; switch (XHCI_TRB_3_TYPE_GET(trbflags)) { case XHCI_TRB_TYPE_LINK: if (trb->dwTrb3 & XHCI_TRB_3_TC_BIT) ccs ^= 0x1; xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); xfer_block->processed = 1; break; case XHCI_TRB_TYPE_SETUP_STAGE: if ((trbflags & XHCI_TRB_3_IDT_BIT) == 0 || XHCI_TRB_2_BYTES_GET(trb->dwTrb2) != 8) { - DPRINTF(("pci_xhci: invalid setup trb\r\n")); + DPRINTF(("pci_xhci: invalid setup trb")); err = XHCI_TRB_ERROR_TRB; goto errout; } setup_trb = trb; val = trb->qwTrb0; if (!xfer->ureq) xfer->ureq = malloc( sizeof(struct usb_device_request)); memcpy(xfer->ureq, &val, sizeof(struct usb_device_request)); xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); xfer_block->processed = 1; break; case XHCI_TRB_TYPE_NORMAL: case XHCI_TRB_TYPE_ISOCH: if (setup_trb != NULL) { DPRINTF(("pci_xhci: trb not supposed to be in " - "ctl scope\r\n")); + "ctl scope")); err = XHCI_TRB_ERROR_TRB; goto errout; } /* fall through */ case XHCI_TRB_TYPE_DATA_STAGE: xfer_block = usb_data_xfer_append(xfer, (void *)(trbflags & XHCI_TRB_3_IDT_BIT ? &trb->qwTrb0 : XHCI_GADDR(sc, trb->qwTrb0)), trb->dwTrb2 & 0x1FFFF, (void *)addr, ccs); break; case XHCI_TRB_TYPE_STATUS_STAGE: xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); break; case XHCI_TRB_TYPE_NOOP: xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); xfer_block->processed = 1; break; case XHCI_TRB_TYPE_EVENT_DATA: xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); if ((epid > 1) && (trbflags & XHCI_TRB_3_IOC_BIT)) { xfer_block->processed = 1; } break; default: DPRINTF(("pci_xhci: handle xfer unexpected trb type " - "0x%x\r\n", + "0x%x", XHCI_TRB_3_TYPE_GET(trbflags))); err = XHCI_TRB_ERROR_TRB; goto errout; } trb = pci_xhci_trb_next(sc, trb, &addr); - DPRINTF(("pci_xhci: next trb: 0x%lx\r\n", (uint64_t)trb)); + DPRINTF(("pci_xhci: next trb: 0x%lx", (uint64_t)trb)); if (xfer_block) { xfer_block->trbnext = addr; xfer_block->streamid = streamid; } if (!setup_trb && !(trbflags & XHCI_TRB_3_CHAIN_BIT) && XHCI_TRB_3_TYPE_GET(trbflags) != XHCI_TRB_TYPE_LINK) { break; } /* handle current batch that requires interrupt on complete */ if (trbflags & XHCI_TRB_3_IOC_BIT) { - DPRINTF(("pci_xhci: trb IOC bit set\r\n")); + DPRINTF(("pci_xhci: trb IOC bit set")); if (epid == 1) do_retry = 1; break; } } - DPRINTF(("pci_xhci[%d]: xfer->ndata %u\r\n", __LINE__, xfer->ndata)); + DPRINTF(("pci_xhci[%d]: xfer->ndata %u", __LINE__, xfer->ndata)); if (epid == 1) { err = USB_ERR_NOT_STARTED; if (dev->dev_ue->ue_request != NULL) err = dev->dev_ue->ue_request(dev->dev_sc, xfer); setup_trb = NULL; } else { /* handle data transfer */ pci_xhci_try_usb_xfer(sc, dev, devep, ep_ctx, slot, epid); err = XHCI_TRB_ERROR_SUCCESS; goto errout; } err = USB_TO_XHCI_ERR(err); if ((err == XHCI_TRB_ERROR_SUCCESS) || (err == XHCI_TRB_ERROR_SHORT_PKT)) { err = pci_xhci_xfer_complete(sc, xfer, slot, epid, &do_intr); if (err != XHCI_TRB_ERROR_SUCCESS) do_retry = 0; } errout: if (err == XHCI_TRB_ERROR_EV_RING_FULL) - DPRINTF(("pci_xhci[%d]: event ring full\r\n", __LINE__)); + DPRINTF(("pci_xhci[%d]: event ring full", __LINE__)); if (!do_retry) USB_DATA_XFER_UNLOCK(xfer); if (do_intr) pci_xhci_assert_interrupt(sc); if (do_retry) { USB_DATA_XFER_RESET(xfer); - DPRINTF(("pci_xhci[%d]: retry:continuing with next TRBs\r\n", + DPRINTF(("pci_xhci[%d]: retry:continuing with next TRBs", __LINE__)); goto retry; } if (epid == 1) USB_DATA_XFER_RESET(xfer); return (err); } static void pci_xhci_device_doorbell(struct pci_xhci_softc *sc, uint32_t slot, uint32_t epid, uint32_t streamid) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; struct pci_xhci_trb_ring *sctx_tr; struct xhci_trb *trb; uint64_t ringaddr; uint32_t ccs; - DPRINTF(("pci_xhci doorbell slot %u epid %u stream %u\r\n", + DPRINTF(("pci_xhci doorbell slot %u epid %u stream %u", slot, epid, streamid)); if (slot == 0 || slot > sc->ndevices) { - DPRINTF(("pci_xhci: invalid doorbell slot %u\r\n", slot)); + DPRINTF(("pci_xhci: invalid doorbell slot %u", slot)); return; } if (epid == 0 || epid >= XHCI_MAX_ENDPOINTS) { - DPRINTF(("pci_xhci: invalid endpoint %u\r\n", epid)); + DPRINTF(("pci_xhci: invalid endpoint %u", epid)); return; } dev = XHCI_SLOTDEV_PTR(sc, slot); devep = &dev->eps[epid]; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); if (!dev_ctx) { return; } ep_ctx = &dev_ctx->ctx_ep[epid]; sctx_tr = NULL; - DPRINTF(("pci_xhci: device doorbell ep[%u] %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: device doorbell ep[%u] %08x %08x %016lx %08x", epid, ep_ctx->dwEpCtx0, ep_ctx->dwEpCtx1, ep_ctx->qwEpCtx2, ep_ctx->dwEpCtx4)); if (ep_ctx->qwEpCtx2 == 0) return; /* handle pending transfers */ if (devep->ep_xfer->ndata > 0) { pci_xhci_try_usb_xfer(sc, dev, devep, ep_ctx, slot, epid); return; } /* get next trb work item */ if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) != 0) { struct xhci_stream_ctx *sctx; /* * Stream IDs of 0, 65535 (any stream), and 65534 * (prime) are invalid. */ if (streamid == 0 || streamid == 65534 || streamid == 65535) { - DPRINTF(("pci_xhci: invalid stream %u\r\n", streamid)); + DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } sctx = NULL; pci_xhci_find_stream(sc, ep_ctx, streamid, &sctx); if (sctx == NULL) { - DPRINTF(("pci_xhci: invalid stream %u\r\n", streamid)); + DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } sctx_tr = &devep->ep_sctx_trbs[streamid]; ringaddr = sctx_tr->ringaddr; ccs = sctx_tr->ccs; trb = XHCI_GADDR(sc, sctx_tr->ringaddr & ~0xFUL); - DPRINTF(("doorbell, stream %u, ccs %lx, trb ccs %x\r\n", + DPRINTF(("doorbell, stream %u, ccs %lx, trb ccs %x", streamid, ep_ctx->qwEpCtx2 & XHCI_TRB_3_CYCLE_BIT, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT)); } else { if (streamid != 0) { - DPRINTF(("pci_xhci: invalid stream %u\r\n", streamid)); + DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } ringaddr = devep->ep_ringaddr; ccs = devep->ep_ccs; trb = devep->ep_tr; - DPRINTF(("doorbell, ccs %lx, trb ccs %x\r\n", + DPRINTF(("doorbell, ccs %lx, trb ccs %x", ep_ctx->qwEpCtx2 & XHCI_TRB_3_CYCLE_BIT, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT)); } if (XHCI_TRB_3_TYPE_GET(trb->dwTrb3) == 0) { - DPRINTF(("pci_xhci: ring %lx trb[%lx] EP %u is RESERVED?\r\n", + DPRINTF(("pci_xhci: ring %lx trb[%lx] EP %u is RESERVED?", ep_ctx->qwEpCtx2, devep->ep_ringaddr, epid)); return; } pci_xhci_handle_transfer(sc, dev, devep, ep_ctx, trb, slot, epid, ringaddr, ccs, streamid); } static void pci_xhci_dbregs_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { offset = (offset - sc->dboff) / sizeof(uint32_t); - DPRINTF(("pci_xhci: doorbell write offset 0x%lx: 0x%lx\r\n", + DPRINTF(("pci_xhci: doorbell write offset 0x%lx: 0x%lx", offset, value)); if (XHCI_HALTED(sc)) { - DPRINTF(("pci_xhci: controller halted\r\n")); + DPRINTF(("pci_xhci: controller halted")); return; } if (offset == 0) pci_xhci_complete_commands(sc); else if (sc->portregs != NULL) pci_xhci_device_doorbell(sc, offset, XHCI_DB_TARGET_GET(value), XHCI_DB_SID_GET(value)); } static void pci_xhci_rtsregs_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { struct pci_xhci_rtsregs *rts; offset -= sc->rtsoff; if (offset == 0) { - DPRINTF(("pci_xhci attempted write to MFINDEX\r\n")); + DPRINTF(("pci_xhci attempted write to MFINDEX")); return; } - DPRINTF(("pci_xhci: runtime regs write offset 0x%lx: 0x%lx\r\n", + DPRINTF(("pci_xhci: runtime regs write offset 0x%lx: 0x%lx", offset, value)); offset -= 0x20; /* start of intrreg */ rts = &sc->rtsregs; switch (offset) { case 0x00: if (value & XHCI_IMAN_INTR_PEND) rts->intrreg.iman &= ~XHCI_IMAN_INTR_PEND; rts->intrreg.iman = (value & XHCI_IMAN_INTR_ENA) | (rts->intrreg.iman & XHCI_IMAN_INTR_PEND); if (!(value & XHCI_IMAN_INTR_ENA)) pci_xhci_deassert_interrupt(sc); break; case 0x04: rts->intrreg.imod = value; break; case 0x08: rts->intrreg.erstsz = value & 0xFFFF; break; case 0x10: /* ERSTBA low bits */ rts->intrreg.erstba = MASK_64_HI(sc->rtsregs.intrreg.erstba) | (value & ~0x3F); break; case 0x14: /* ERSTBA high bits */ rts->intrreg.erstba = (value << 32) | MASK_64_LO(sc->rtsregs.intrreg.erstba); rts->erstba_p = XHCI_GADDR(sc, sc->rtsregs.intrreg.erstba & ~0x3FUL); rts->erst_p = XHCI_GADDR(sc, sc->rtsregs.erstba_p->qwEvrsTablePtr & ~0x3FUL); rts->er_enq_idx = 0; rts->er_events_cnt = 0; - DPRINTF(("pci_xhci: wr erstba erst (%p) ptr 0x%lx, sz %u\r\n", + DPRINTF(("pci_xhci: wr erstba erst (%p) ptr 0x%lx, sz %u", rts->erstba_p, rts->erstba_p->qwEvrsTablePtr, rts->erstba_p->dwEvrsTableSize)); break; case 0x18: /* ERDP low bits */ rts->intrreg.erdp = MASK_64_HI(sc->rtsregs.intrreg.erdp) | (rts->intrreg.erdp & XHCI_ERDP_LO_BUSY) | (value & ~0xF); if (value & XHCI_ERDP_LO_BUSY) { rts->intrreg.erdp &= ~XHCI_ERDP_LO_BUSY; rts->intrreg.iman &= ~XHCI_IMAN_INTR_PEND; } rts->er_deq_seg = XHCI_ERDP_LO_SINDEX(value); break; case 0x1C: /* ERDP high bits */ rts->intrreg.erdp = (value << 32) | MASK_64_LO(sc->rtsregs.intrreg.erdp); if (rts->er_events_cnt > 0) { uint64_t erdp; uint32_t erdp_i; erdp = rts->intrreg.erdp & ~0xF; erdp_i = (erdp - rts->erstba_p->qwEvrsTablePtr) / sizeof(struct xhci_trb); if (erdp_i <= rts->er_enq_idx) rts->er_events_cnt = rts->er_enq_idx - erdp_i; else rts->er_events_cnt = rts->erstba_p->dwEvrsTableSize - (erdp_i - rts->er_enq_idx); - DPRINTF(("pci_xhci: erdp 0x%lx, events cnt %u\r\n", + DPRINTF(("pci_xhci: erdp 0x%lx, events cnt %u", erdp, rts->er_events_cnt)); } break; default: - DPRINTF(("pci_xhci attempted write to RTS offset 0x%lx\r\n", + DPRINTF(("pci_xhci attempted write to RTS offset 0x%lx", offset)); break; } } static uint64_t pci_xhci_portregs_read(struct pci_xhci_softc *sc, uint64_t offset) { int port; uint32_t *p; if (sc->portregs == NULL) return (0); port = (offset - 0x3F0) / 0x10; if (port > XHCI_MAX_DEVS) { - DPRINTF(("pci_xhci: portregs_read port %d >= XHCI_MAX_DEVS\r\n", + DPRINTF(("pci_xhci: portregs_read port %d >= XHCI_MAX_DEVS", port)); /* return default value for unused port */ return (XHCI_PS_SPEED_SET(3)); } offset = (offset - 0x3F0) % 0x10; p = &sc->portregs[port].portsc; p += offset / sizeof(uint32_t); - DPRINTF(("pci_xhci: portregs read offset 0x%lx port %u -> 0x%x\r\n", + DPRINTF(("pci_xhci: portregs read offset 0x%lx port %u -> 0x%x", offset, port, *p)); return (*p); } static void pci_xhci_hostop_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { offset -= XHCI_CAPLEN; if (offset < 0x400) - DPRINTF(("pci_xhci: hostop write offset 0x%lx: 0x%lx\r\n", + DPRINTF(("pci_xhci: hostop write offset 0x%lx: 0x%lx", offset, value)); switch (offset) { case XHCI_USBCMD: sc->opregs.usbcmd = pci_xhci_usbcmd_write(sc, value & 0x3F0F); break; case XHCI_USBSTS: /* clear bits on write */ sc->opregs.usbsts &= ~(value & (XHCI_STS_HSE|XHCI_STS_EINT|XHCI_STS_PCD|XHCI_STS_SSS| XHCI_STS_RSS|XHCI_STS_SRE|XHCI_STS_CNR)); break; case XHCI_PAGESIZE: /* read only */ break; case XHCI_DNCTRL: sc->opregs.dnctrl = value & 0xFFFF; break; case XHCI_CRCR_LO: if (sc->opregs.crcr & XHCI_CRCR_LO_CRR) { sc->opregs.crcr &= ~(XHCI_CRCR_LO_CS|XHCI_CRCR_LO_CA); sc->opregs.crcr |= value & (XHCI_CRCR_LO_CS|XHCI_CRCR_LO_CA); } else { sc->opregs.crcr = MASK_64_HI(sc->opregs.crcr) | (value & (0xFFFFFFC0 | XHCI_CRCR_LO_RCS)); } break; case XHCI_CRCR_HI: if (!(sc->opregs.crcr & XHCI_CRCR_LO_CRR)) { sc->opregs.crcr = MASK_64_LO(sc->opregs.crcr) | (value << 32); sc->opregs.cr_p = XHCI_GADDR(sc, sc->opregs.crcr & ~0xF); } if (sc->opregs.crcr & XHCI_CRCR_LO_CS) { /* Stop operation of Command Ring */ } if (sc->opregs.crcr & XHCI_CRCR_LO_CA) { /* Abort command */ } break; case XHCI_DCBAAP_LO: sc->opregs.dcbaap = MASK_64_HI(sc->opregs.dcbaap) | (value & 0xFFFFFFC0); break; case XHCI_DCBAAP_HI: sc->opregs.dcbaap = MASK_64_LO(sc->opregs.dcbaap) | (value << 32); sc->opregs.dcbaa_p = XHCI_GADDR(sc, sc->opregs.dcbaap & ~0x3FUL); - DPRINTF(("pci_xhci: opregs dcbaap = 0x%lx (vaddr 0x%lx)\r\n", + DPRINTF(("pci_xhci: opregs dcbaap = 0x%lx (vaddr 0x%lx)", sc->opregs.dcbaap, (uint64_t)sc->opregs.dcbaa_p)); break; case XHCI_CONFIG: sc->opregs.config = value & 0x03FF; break; default: if (offset >= 0x400) pci_xhci_portregs_write(sc, offset, value); break; } } static void pci_xhci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct pci_xhci_softc *sc; sc = pi->pi_arg; assert(baridx == 0); pthread_mutex_lock(&sc->mtx); if (offset < XHCI_CAPLEN) /* read only registers */ - WPRINTF(("pci_xhci: write RO-CAPs offset %ld\r\n", offset)); + WPRINTF(("pci_xhci: write RO-CAPs offset %ld", offset)); else if (offset < sc->dboff) pci_xhci_hostop_write(sc, offset, value); else if (offset < sc->rtsoff) pci_xhci_dbregs_write(sc, offset, value); else if (offset < sc->regsend) pci_xhci_rtsregs_write(sc, offset, value); else - WPRINTF(("pci_xhci: write invalid offset %ld\r\n", offset)); + WPRINTF(("pci_xhci: write invalid offset %ld", offset)); pthread_mutex_unlock(&sc->mtx); } static uint64_t pci_xhci_hostcap_read(struct pci_xhci_softc *sc, uint64_t offset) { uint64_t value; switch (offset) { case XHCI_CAPLENGTH: /* 0x00 */ value = sc->caplength; break; case XHCI_HCSPARAMS1: /* 0x04 */ value = sc->hcsparams1; break; case XHCI_HCSPARAMS2: /* 0x08 */ value = sc->hcsparams2; break; case XHCI_HCSPARAMS3: /* 0x0C */ value = sc->hcsparams3; break; case XHCI_HCSPARAMS0: /* 0x10 */ value = sc->hccparams1; break; case XHCI_DBOFF: /* 0x14 */ value = sc->dboff; break; case XHCI_RTSOFF: /* 0x18 */ value = sc->rtsoff; break; case XHCI_HCCPRAMS2: /* 0x1C */ value = sc->hccparams2; break; default: value = 0; break; } - DPRINTF(("pci_xhci: hostcap read offset 0x%lx -> 0x%lx\r\n", + DPRINTF(("pci_xhci: hostcap read offset 0x%lx -> 0x%lx", offset, value)); return (value); } static uint64_t pci_xhci_hostop_read(struct pci_xhci_softc *sc, uint64_t offset) { uint64_t value; offset = (offset - XHCI_CAPLEN); switch (offset) { case XHCI_USBCMD: /* 0x00 */ value = sc->opregs.usbcmd; break; case XHCI_USBSTS: /* 0x04 */ value = sc->opregs.usbsts; break; case XHCI_PAGESIZE: /* 0x08 */ value = sc->opregs.pgsz; break; case XHCI_DNCTRL: /* 0x14 */ value = sc->opregs.dnctrl; break; case XHCI_CRCR_LO: /* 0x18 */ value = sc->opregs.crcr & XHCI_CRCR_LO_CRR; break; case XHCI_CRCR_HI: /* 0x1C */ value = 0; break; case XHCI_DCBAAP_LO: /* 0x30 */ value = sc->opregs.dcbaap & 0xFFFFFFFF; break; case XHCI_DCBAAP_HI: /* 0x34 */ value = (sc->opregs.dcbaap >> 32) & 0xFFFFFFFF; break; case XHCI_CONFIG: /* 0x38 */ value = sc->opregs.config; break; default: if (offset >= 0x400) value = pci_xhci_portregs_read(sc, offset); else value = 0; break; } if (offset < 0x400) - DPRINTF(("pci_xhci: hostop read offset 0x%lx -> 0x%lx\r\n", + DPRINTF(("pci_xhci: hostop read offset 0x%lx -> 0x%lx", offset, value)); return (value); } static uint64_t pci_xhci_dbregs_read(struct pci_xhci_softc *sc, uint64_t offset) { /* read doorbell always returns 0 */ return (0); } static uint64_t pci_xhci_rtsregs_read(struct pci_xhci_softc *sc, uint64_t offset) { uint32_t value; offset -= sc->rtsoff; value = 0; if (offset == XHCI_MFINDEX) { value = sc->rtsregs.mfindex; } else if (offset >= 0x20) { int item; uint32_t *p; offset -= 0x20; item = offset % 32; assert(offset < sizeof(sc->rtsregs.intrreg)); p = &sc->rtsregs.intrreg.iman; p += item / sizeof(uint32_t); value = *p; } - DPRINTF(("pci_xhci: rtsregs read offset 0x%lx -> 0x%x\r\n", + DPRINTF(("pci_xhci: rtsregs read offset 0x%lx -> 0x%x", offset, value)); return (value); } static uint64_t pci_xhci_xecp_read(struct pci_xhci_softc *sc, uint64_t offset) { uint32_t value; offset -= sc->regsend; value = 0; switch (offset) { case 0: /* rev major | rev minor | next-cap | cap-id */ value = (0x02 << 24) | (4 << 8) | XHCI_ID_PROTOCOLS; break; case 4: /* name string = "USB" */ value = 0x20425355; break; case 8: /* psic | proto-defined | compat # | compat offset */ value = ((XHCI_MAX_DEVS/2) << 8) | sc->usb2_port_start; break; case 12: break; case 16: /* rev major | rev minor | next-cap | cap-id */ value = (0x03 << 24) | XHCI_ID_PROTOCOLS; break; case 20: /* name string = "USB" */ value = 0x20425355; break; case 24: /* psic | proto-defined | compat # | compat offset */ value = ((XHCI_MAX_DEVS/2) << 8) | sc->usb3_port_start; break; case 28: break; default: - DPRINTF(("pci_xhci: xecp invalid offset 0x%lx\r\n", offset)); + DPRINTF(("pci_xhci: xecp invalid offset 0x%lx", offset)); break; } - DPRINTF(("pci_xhci: xecp read offset 0x%lx -> 0x%x\r\n", + DPRINTF(("pci_xhci: xecp read offset 0x%lx -> 0x%x", offset, value)); return (value); } static uint64_t pci_xhci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_xhci_softc *sc; uint32_t value; sc = pi->pi_arg; assert(baridx == 0); pthread_mutex_lock(&sc->mtx); if (offset < XHCI_CAPLEN) value = pci_xhci_hostcap_read(sc, offset); else if (offset < sc->dboff) value = pci_xhci_hostop_read(sc, offset); else if (offset < sc->rtsoff) value = pci_xhci_dbregs_read(sc, offset); else if (offset < sc->regsend) value = pci_xhci_rtsregs_read(sc, offset); else if (offset < (sc->regsend + 4*32)) value = pci_xhci_xecp_read(sc, offset); else { value = 0; - WPRINTF(("pci_xhci: read invalid offset %ld\r\n", offset)); + WPRINTF(("pci_xhci: read invalid offset %ld", offset)); } pthread_mutex_unlock(&sc->mtx); switch (size) { case 1: value &= 0xFF; break; case 2: value &= 0xFFFF; break; case 4: value &= 0xFFFFFFFF; break; } return (value); } static void pci_xhci_reset_port(struct pci_xhci_softc *sc, int portn, int warm) { struct pci_xhci_portregs *port; struct pci_xhci_dev_emu *dev; struct xhci_trb evtrb; int error; assert(portn <= XHCI_MAX_DEVS); - DPRINTF(("xhci reset port %d\r\n", portn)); + DPRINTF(("xhci reset port %d", portn)); port = XHCI_PORTREG_PTR(sc, portn); dev = XHCI_DEVINST_PTR(sc, portn); if (dev) { port->portsc &= ~(XHCI_PS_PLS_MASK | XHCI_PS_PR | XHCI_PS_PRC); port->portsc |= XHCI_PS_PED | XHCI_PS_SPEED_SET(dev->dev_ue->ue_usbspeed); if (warm && dev->dev_ue->ue_usbver == 3) { port->portsc |= XHCI_PS_WRC; } if ((port->portsc & XHCI_PS_PRC) == 0) { port->portsc |= XHCI_PS_PRC; pci_xhci_set_evtrb(&evtrb, portn, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); error = pci_xhci_insert_event(sc, &evtrb, 1); if (error != XHCI_TRB_ERROR_SUCCESS) DPRINTF(("xhci reset port insert event " - "failed\r\n")); + "failed")); } } } static void pci_xhci_init_port(struct pci_xhci_softc *sc, int portn) { struct pci_xhci_portregs *port; struct pci_xhci_dev_emu *dev; port = XHCI_PORTREG_PTR(sc, portn); dev = XHCI_DEVINST_PTR(sc, portn); if (dev) { port->portsc = XHCI_PS_CCS | /* connected */ XHCI_PS_PP; /* port power */ if (dev->dev_ue->ue_usbver == 2) { port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_POLL) | XHCI_PS_SPEED_SET(dev->dev_ue->ue_usbspeed); } else { port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_U0) | XHCI_PS_PED | /* enabled */ XHCI_PS_SPEED_SET(dev->dev_ue->ue_usbspeed); } - DPRINTF(("Init port %d 0x%x\n", portn, port->portsc)); + DPRINTF(("Init port %d 0x%x", portn, port->portsc)); } else { port->portsc = XHCI_PS_PLS_SET(UPS_PORT_LS_RX_DET) | XHCI_PS_PP; - DPRINTF(("Init empty port %d 0x%x\n", portn, port->portsc)); + DPRINTF(("Init empty port %d 0x%x", portn, port->portsc)); } } static int pci_xhci_dev_intr(struct usb_hci *hci, int epctx) { struct pci_xhci_dev_emu *dev; struct xhci_dev_ctx *dev_ctx; struct xhci_trb evtrb; struct pci_xhci_softc *sc; struct pci_xhci_portregs *p; struct xhci_endp_ctx *ep_ctx; int error = 0; int dir_in; int epid; dir_in = epctx & 0x80; epid = epctx & ~0x80; /* HW endpoint contexts are 0-15; convert to epid based on dir */ epid = (epid * 2) + (dir_in ? 1 : 0); assert(epid >= 1 && epid <= 31); dev = hci->hci_sc; sc = dev->xsc; /* check if device is ready; OS has to initialise it */ if (sc->rtsregs.erstba_p == NULL || (sc->opregs.usbcmd & XHCI_CMD_RS) == 0 || dev->dev_ctx == NULL) return (0); p = XHCI_PORTREG_PTR(sc, hci->hci_port); /* raise event if link U3 (suspended) state */ if (XHCI_PS_PLS_GET(p->portsc) == 3) { p->portsc &= ~XHCI_PS_PLS_MASK; p->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_RESUME); if ((p->portsc & XHCI_PS_PLC) != 0) return (0); p->portsc |= XHCI_PS_PLC; pci_xhci_set_evtrb(&evtrb, hci->hci_port, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); error = pci_xhci_insert_event(sc, &evtrb, 0); if (error != XHCI_TRB_ERROR_SUCCESS) goto done; } dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; if ((ep_ctx->dwEpCtx0 & 0x7) == XHCI_ST_EPCTX_DISABLED) { - DPRINTF(("xhci device interrupt on disabled endpoint %d\r\n", + DPRINTF(("xhci device interrupt on disabled endpoint %d", epid)); return (0); } - DPRINTF(("xhci device interrupt on endpoint %d\r\n", epid)); + DPRINTF(("xhci device interrupt on endpoint %d", epid)); pci_xhci_device_doorbell(sc, hci->hci_port, epid, 0); done: return (error); } static int pci_xhci_dev_event(struct usb_hci *hci, enum hci_usbev evid, void *param) { - DPRINTF(("xhci device event port %d\r\n", hci->hci_port)); + DPRINTF(("xhci device event port %d", hci->hci_port)); return (0); } static void pci_xhci_device_usage(char *opt) { - fprintf(stderr, "Invalid USB emulation \"%s\"\r\n", opt); + EPRINTLN("Invalid USB emulation \"%s\"", opt); } static int pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) { struct pci_xhci_dev_emu **devices; struct pci_xhci_dev_emu *dev; struct usb_devemu *ue; void *devsc; char *uopt, *xopts, *config; int usb3_port, usb2_port, i; uopt = NULL; usb3_port = sc->usb3_port_start - 1; usb2_port = sc->usb2_port_start - 1; devices = NULL; if (opts == NULL) goto portsfinal; devices = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_dev_emu *)); sc->slots = calloc(XHCI_MAX_SLOTS, sizeof(struct pci_xhci_dev_emu *)); sc->devices = devices; sc->ndevices = 0; uopt = strdup(opts); for (xopts = strtok(uopt, ","); xopts != NULL; xopts = strtok(NULL, ",")) { if (usb2_port == ((sc->usb2_port_start-1) + XHCI_MAX_DEVS/2) || usb3_port == ((sc->usb3_port_start-1) + XHCI_MAX_DEVS/2)) { WPRINTF(("pci_xhci max number of USB 2 or 3 " - "devices reached, max %d\r\n", XHCI_MAX_DEVS/2)); + "devices reached, max %d", XHCI_MAX_DEVS/2)); usb2_port = usb3_port = -1; goto done; } /* device[=] */ if ((config = strchr(xopts, '=')) == NULL) config = ""; /* no config */ else *config++ = '\0'; ue = usb_emu_finddev(xopts); if (ue == NULL) { pci_xhci_device_usage(xopts); - DPRINTF(("pci_xhci device not found %s\r\n", xopts)); + DPRINTF(("pci_xhci device not found %s", xopts)); usb2_port = usb3_port = -1; goto done; } - DPRINTF(("pci_xhci adding device %s, opts \"%s\"\r\n", + DPRINTF(("pci_xhci adding device %s, opts \"%s\"", xopts, config)); dev = calloc(1, sizeof(struct pci_xhci_dev_emu)); dev->xsc = sc; dev->hci.hci_sc = dev; dev->hci.hci_intr = pci_xhci_dev_intr; dev->hci.hci_event = pci_xhci_dev_event; if (ue->ue_usbver == 2) { dev->hci.hci_port = usb2_port + 1; devices[usb2_port] = dev; usb2_port++; } else { dev->hci.hci_port = usb3_port + 1; devices[usb3_port] = dev; usb3_port++; } dev->hci.hci_address = 0; devsc = ue->ue_init(&dev->hci, config); if (devsc == NULL) { pci_xhci_device_usage(xopts); usb2_port = usb3_port = -1; goto done; } dev->dev_ue = ue; dev->dev_sc = devsc; /* assign slot number to device */ sc->slots[sc->ndevices] = dev; sc->ndevices++; } portsfinal: sc->portregs = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_portregs)); if (sc->ndevices > 0) { /* port and slot numbering start from 1 */ sc->devices--; sc->portregs--; sc->slots--; for (i = 1; i <= XHCI_MAX_DEVS; i++) { pci_xhci_init_port(sc, i); } } else { - WPRINTF(("pci_xhci no USB devices configured\r\n")); + WPRINTF(("pci_xhci no USB devices configured")); sc->ndevices = 1; } done: if (devices != NULL) { if (usb2_port <= 0 && usb3_port <= 0) { sc->devices = NULL; for (i = 0; devices[i] != NULL; i++) free(devices[i]); sc->ndevices = -1; free(devices); } } free(uopt); return (sc->ndevices); } static int pci_xhci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_xhci_softc *sc; int error; if (xhci_in_use) { - WPRINTF(("pci_xhci controller already defined\r\n")); + WPRINTF(("pci_xhci controller already defined")); return (-1); } xhci_in_use = 1; sc = calloc(1, sizeof(struct pci_xhci_softc)); pi->pi_arg = sc; sc->xsc_pi = pi; sc->usb2_port_start = (XHCI_MAX_DEVS/2) + 1; sc->usb3_port_start = 1; /* discover devices */ error = pci_xhci_parse_opts(sc, opts); if (error < 0) goto done; else error = 0; sc->caplength = XHCI_SET_CAPLEN(XHCI_CAPLEN) | XHCI_SET_HCIVERSION(0x0100); sc->hcsparams1 = XHCI_SET_HCSP1_MAXPORTS(XHCI_MAX_DEVS) | XHCI_SET_HCSP1_MAXINTR(1) | /* interrupters */ XHCI_SET_HCSP1_MAXSLOTS(XHCI_MAX_SLOTS); sc->hcsparams2 = XHCI_SET_HCSP2_ERSTMAX(XHCI_ERST_MAX) | XHCI_SET_HCSP2_IST(0x04); sc->hcsparams3 = 0; /* no latency */ sc->hccparams1 = XHCI_SET_HCCP1_NSS(1) | /* no 2nd-streams */ XHCI_SET_HCCP1_SPC(1) | /* short packet */ XHCI_SET_HCCP1_MAXPSA(XHCI_STREAMS_MAX); sc->hccparams2 = XHCI_SET_HCCP2_LEC(1) | XHCI_SET_HCCP2_U3C(1); sc->dboff = XHCI_SET_DOORBELL(XHCI_CAPLEN + XHCI_PORTREGS_START + XHCI_MAX_DEVS * sizeof(struct pci_xhci_portregs)); /* dboff must be 32-bit aligned */ if (sc->dboff & 0x3) sc->dboff = (sc->dboff + 0x3) & ~0x3; /* rtsoff must be 32-bytes aligned */ sc->rtsoff = XHCI_SET_RTSOFFSET(sc->dboff + (XHCI_MAX_SLOTS+1) * 32); if (sc->rtsoff & 0x1F) sc->rtsoff = (sc->rtsoff + 0x1F) & ~0x1F; - DPRINTF(("pci_xhci dboff: 0x%x, rtsoff: 0x%x\r\n", sc->dboff, + DPRINTF(("pci_xhci dboff: 0x%x, rtsoff: 0x%x", sc->dboff, sc->rtsoff)); sc->opregs.usbsts = XHCI_STS_HCH; sc->opregs.pgsz = XHCI_PAGESIZE_4K; pci_xhci_reset(sc); sc->regsend = sc->rtsoff + 0x20 + 32; /* only 1 intrpter */ /* * Set extended capabilities pointer to be after regsend; * value of xecp field is 32-bit offset. */ sc->hccparams1 |= XHCI_SET_HCCP1_XECP(sc->regsend/4); pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1E31); pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SERIALBUS); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_SERIALBUS_USB); pci_set_cfgdata8(pi, PCIR_PROGIF,PCIP_SERIALBUS_USB_XHCI); pci_set_cfgdata8(pi, PCI_USBREV, PCI_USB_REV_3_0); pci_emul_add_msicap(pi, 1); /* regsend + xecp registers */ pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, sc->regsend + 4*32); - DPRINTF(("pci_xhci pci_emu_alloc: %d\r\n", sc->regsend + 4*32)); + DPRINTF(("pci_xhci pci_emu_alloc: %d", sc->regsend + 4*32)); pci_lintr_request(pi); pthread_mutex_init(&sc->mtx, NULL); done: if (error) { free(sc); } return (error); } struct pci_devemu pci_de_xhci = { .pe_emu = "xhci", .pe_init = pci_xhci_init, .pe_barwrite = pci_xhci_write, .pe_barread = pci_xhci_read }; PCI_EMUL_SET(pci_de_xhci); Index: stable/12/usr.sbin/bhyve/ps2kbd.c =================================================================== --- stable/12/usr.sbin/bhyve/ps2kbd.c (revision 358183) +++ stable/12/usr.sbin/bhyve/ps2kbd.c (revision 358184) @@ -1,383 +1,384 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Tycho Nightingale * Copyright (c) 2015 Nahanni Systems Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "atkbdc.h" +#include "debug.h" #include "console.h" /* keyboard device commands */ #define PS2KC_RESET_DEV 0xff #define PS2KC_DISABLE 0xf5 #define PS2KC_ENABLE 0xf4 #define PS2KC_SET_TYPEMATIC 0xf3 #define PS2KC_SEND_DEV_ID 0xf2 #define PS2KC_SET_SCANCODE_SET 0xf0 #define PS2KC_ECHO 0xee #define PS2KC_SET_LEDS 0xed #define PS2KC_BAT_SUCCESS 0xaa #define PS2KC_ACK 0xfa #define PS2KBD_FIFOSZ 16 struct fifo { uint8_t buf[PS2KBD_FIFOSZ]; int rindex; /* index to read from */ int windex; /* index to write to */ int num; /* number of bytes in the fifo */ int size; /* size of the fifo */ }; struct ps2kbd_softc { struct atkbdc_softc *atkbdc_sc; pthread_mutex_t mtx; bool enabled; struct fifo fifo; uint8_t curcmd; /* current command for next byte */ }; #define SCANCODE_E0_PREFIX 1 struct extended_translation { uint32_t keysym; uint8_t scancode; int flags; }; /* * FIXME: Pause/break and Print Screen/SysRq require special handling. */ static const struct extended_translation extended_translations[] = { {0xff08, 0x66}, /* Back space */ {0xff09, 0x0d}, /* Tab */ {0xff0d, 0x5a}, /* Return */ {0xff1b, 0x76}, /* Escape */ {0xff50, 0x6c, SCANCODE_E0_PREFIX}, /* Home */ {0xff51, 0x6b, SCANCODE_E0_PREFIX}, /* Left arrow */ {0xff52, 0x75, SCANCODE_E0_PREFIX}, /* Up arrow */ {0xff53, 0x74, SCANCODE_E0_PREFIX}, /* Right arrow */ {0xff54, 0x72, SCANCODE_E0_PREFIX}, /* Down arrow */ {0xff55, 0x7d, SCANCODE_E0_PREFIX}, /* PgUp */ {0xff56, 0x7a, SCANCODE_E0_PREFIX}, /* PgDown */ {0xff57, 0x69, SCANCODE_E0_PREFIX}, /* End */ {0xff63, 0x70, SCANCODE_E0_PREFIX}, /* Ins */ {0xff8d, 0x5a, SCANCODE_E0_PREFIX}, /* Keypad Enter */ {0xffe1, 0x12}, /* Left shift */ {0xffe2, 0x59}, /* Right shift */ {0xffe3, 0x14}, /* Left control */ {0xffe4, 0x14, SCANCODE_E0_PREFIX}, /* Right control */ /* {0xffe7, XXX}, Left meta */ /* {0xffe8, XXX}, Right meta */ {0xffe9, 0x11}, /* Left alt */ {0xfe03, 0x11, SCANCODE_E0_PREFIX}, /* AltGr */ {0xffea, 0x11, SCANCODE_E0_PREFIX}, /* Right alt */ {0xffeb, 0x1f, SCANCODE_E0_PREFIX}, /* Left Windows */ {0xffec, 0x27, SCANCODE_E0_PREFIX}, /* Right Windows */ {0xffbe, 0x05}, /* F1 */ {0xffbf, 0x06}, /* F2 */ {0xffc0, 0x04}, /* F3 */ {0xffc1, 0x0c}, /* F4 */ {0xffc2, 0x03}, /* F5 */ {0xffc3, 0x0b}, /* F6 */ {0xffc4, 0x83}, /* F7 */ {0xffc5, 0x0a}, /* F8 */ {0xffc6, 0x01}, /* F9 */ {0xffc7, 0x09}, /* F10 */ {0xffc8, 0x78}, /* F11 */ {0xffc9, 0x07}, /* F12 */ {0xffff, 0x71, SCANCODE_E0_PREFIX}, /* Del */ {0xff14, 0x7e}, /* ScrollLock */ /* NumLock and Keypads*/ {0xff7f, 0x77}, /* NumLock */ {0xffaf, 0x4a, SCANCODE_E0_PREFIX}, /* Keypad slash */ {0xffaa, 0x7c}, /* Keypad asterisk */ {0xffad, 0x7b}, /* Keypad minus */ {0xffab, 0x79}, /* Keypad plus */ {0xffb7, 0x6c}, /* Keypad 7 */ {0xff95, 0x6c}, /* Keypad home */ {0xffb8, 0x75}, /* Keypad 8 */ {0xff97, 0x75}, /* Keypad up arrow */ {0xffb9, 0x7d}, /* Keypad 9 */ {0xff9a, 0x7d}, /* Keypad PgUp */ {0xffb4, 0x6b}, /* Keypad 4 */ {0xff96, 0x6b}, /* Keypad left arrow */ {0xffb5, 0x73}, /* Keypad 5 */ {0xff9d, 0x73}, /* Keypad empty */ {0xffb6, 0x74}, /* Keypad 6 */ {0xff98, 0x74}, /* Keypad right arrow */ {0xffb1, 0x69}, /* Keypad 1 */ {0xff9c, 0x69}, /* Keypad end */ {0xffb2, 0x72}, /* Keypad 2 */ {0xff99, 0x72}, /* Keypad down arrow */ {0xffb3, 0x7a}, /* Keypad 3 */ {0xff9b, 0x7a}, /* Keypad PgDown */ {0xffb0, 0x70}, /* Keypad 0 */ {0xff9e, 0x70}, /* Keypad ins */ {0xffae, 0x71}, /* Keypad . */ {0xff9f, 0x71}, /* Keypad del */ {0, 0, 0} /* Terminator */ }; /* ASCII to type 2 scancode lookup table */ static const uint8_t ascii_translations[128] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x16, 0x52, 0x26, 0x25, 0x2e, 0x3d, 0x52, 0x46, 0x45, 0x3e, 0x55, 0x41, 0x4e, 0x49, 0x4a, 0x45, 0x16, 0x1e, 0x26, 0x25, 0x2e, 0x36, 0x3d, 0x3e, 0x46, 0x4c, 0x4c, 0x41, 0x55, 0x49, 0x4a, 0x1e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x36, 0x4e, 0x0e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x0e, 0x00, }; static void fifo_init(struct ps2kbd_softc *sc) { struct fifo *fifo; fifo = &sc->fifo; fifo->size = sizeof(((struct fifo *)0)->buf); } static void fifo_reset(struct ps2kbd_softc *sc) { struct fifo *fifo; fifo = &sc->fifo; bzero(fifo, sizeof(struct fifo)); fifo->size = sizeof(((struct fifo *)0)->buf); } static void fifo_put(struct ps2kbd_softc *sc, uint8_t val) { struct fifo *fifo; fifo = &sc->fifo; if (fifo->num < fifo->size) { fifo->buf[fifo->windex] = val; fifo->windex = (fifo->windex + 1) % fifo->size; fifo->num++; } } static int fifo_get(struct ps2kbd_softc *sc, uint8_t *val) { struct fifo *fifo; fifo = &sc->fifo; if (fifo->num > 0) { *val = fifo->buf[fifo->rindex]; fifo->rindex = (fifo->rindex + 1) % fifo->size; fifo->num--; return (0); } return (-1); } int ps2kbd_read(struct ps2kbd_softc *sc, uint8_t *val) { int retval; pthread_mutex_lock(&sc->mtx); retval = fifo_get(sc, val); pthread_mutex_unlock(&sc->mtx); return (retval); } void ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val) { pthread_mutex_lock(&sc->mtx); if (sc->curcmd) { switch (sc->curcmd) { case PS2KC_SET_TYPEMATIC: fifo_put(sc, PS2KC_ACK); break; case PS2KC_SET_SCANCODE_SET: fifo_put(sc, PS2KC_ACK); break; case PS2KC_SET_LEDS: fifo_put(sc, PS2KC_ACK); break; default: - fprintf(stderr, "Unhandled ps2 keyboard current " - "command byte 0x%02x\n", val); + EPRINTLN("Unhandled ps2 keyboard current " + "command byte 0x%02x", val); break; } sc->curcmd = 0; } else { switch (val) { case 0x00: fifo_put(sc, PS2KC_ACK); break; case PS2KC_RESET_DEV: fifo_reset(sc); fifo_put(sc, PS2KC_ACK); fifo_put(sc, PS2KC_BAT_SUCCESS); break; case PS2KC_DISABLE: sc->enabled = false; fifo_put(sc, PS2KC_ACK); break; case PS2KC_ENABLE: sc->enabled = true; fifo_reset(sc); fifo_put(sc, PS2KC_ACK); break; case PS2KC_SET_TYPEMATIC: sc->curcmd = val; fifo_put(sc, PS2KC_ACK); break; case PS2KC_SEND_DEV_ID: fifo_put(sc, PS2KC_ACK); fifo_put(sc, 0xab); fifo_put(sc, 0x83); break; case PS2KC_SET_SCANCODE_SET: sc->curcmd = val; fifo_put(sc, PS2KC_ACK); break; case PS2KC_ECHO: fifo_put(sc, PS2KC_ECHO); break; case PS2KC_SET_LEDS: sc->curcmd = val; fifo_put(sc, PS2KC_ACK); break; default: - fprintf(stderr, "Unhandled ps2 keyboard command " - "0x%02x\n", val); + EPRINTLN("Unhandled ps2 keyboard command " + "0x%02x", val); break; } } pthread_mutex_unlock(&sc->mtx); } /* * Translate keysym to type 2 scancode and insert into keyboard buffer. */ static void ps2kbd_keysym_queue(struct ps2kbd_softc *sc, int down, uint32_t keysym) { assert(pthread_mutex_isowned_np(&sc->mtx)); int e0_prefix, found; uint8_t code; const struct extended_translation *trans; found = 0; if (keysym < 0x80) { code = ascii_translations[keysym]; e0_prefix = 0; found = 1; } else { for (trans = &(extended_translations[0]); trans->keysym != 0; trans++) { if (keysym == trans->keysym) { code = trans->scancode; e0_prefix = trans->flags & SCANCODE_E0_PREFIX; found = 1; break; } } } if (!found) { - fprintf(stderr, "Unhandled ps2 keyboard keysym 0x%x\n", keysym); + EPRINTLN("Unhandled ps2 keyboard keysym 0x%x", keysym); return; } if (e0_prefix) fifo_put(sc, 0xe0); if (!down) fifo_put(sc, 0xf0); fifo_put(sc, code); } static void ps2kbd_event(int down, uint32_t keysym, void *arg) { struct ps2kbd_softc *sc = arg; int fifo_full; pthread_mutex_lock(&sc->mtx); if (!sc->enabled) { pthread_mutex_unlock(&sc->mtx); return; } fifo_full = sc->fifo.num == PS2KBD_FIFOSZ; ps2kbd_keysym_queue(sc, down, keysym); pthread_mutex_unlock(&sc->mtx); if (!fifo_full) atkbdc_event(sc->atkbdc_sc, 1); } struct ps2kbd_softc * ps2kbd_init(struct atkbdc_softc *atkbdc_sc) { struct ps2kbd_softc *sc; sc = calloc(1, sizeof (struct ps2kbd_softc)); pthread_mutex_init(&sc->mtx, NULL); fifo_init(sc); sc->atkbdc_sc = atkbdc_sc; console_kbd_register(ps2kbd_event, sc, 1); return (sc); } Index: stable/12/usr.sbin/bhyve/ps2mouse.c =================================================================== --- stable/12/usr.sbin/bhyve/ps2mouse.c (revision 358183) +++ stable/12/usr.sbin/bhyve/ps2mouse.c (revision 358184) @@ -1,418 +1,419 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Tycho Nightingale * Copyright (c) 2015 Nahanni Systems Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "atkbdc.h" +#include "debug.h" #include "console.h" /* mouse device commands */ #define PS2MC_RESET_DEV 0xff #define PS2MC_SET_DEFAULTS 0xf6 #define PS2MC_DISABLE 0xf5 #define PS2MC_ENABLE 0xf4 #define PS2MC_SET_SAMPLING_RATE 0xf3 #define PS2MC_SEND_DEV_ID 0xf2 #define PS2MC_SET_REMOTE_MODE 0xf0 #define PS2MC_SEND_DEV_DATA 0xeb #define PS2MC_SET_STREAM_MODE 0xea #define PS2MC_SEND_DEV_STATUS 0xe9 #define PS2MC_SET_RESOLUTION 0xe8 #define PS2MC_SET_SCALING1 0xe7 #define PS2MC_SET_SCALING2 0xe6 #define PS2MC_BAT_SUCCESS 0xaa #define PS2MC_ACK 0xfa /* mouse device id */ #define PS2MOUSE_DEV_ID 0x0 /* mouse data bits */ #define PS2M_DATA_Y_OFLOW 0x80 #define PS2M_DATA_X_OFLOW 0x40 #define PS2M_DATA_Y_SIGN 0x20 #define PS2M_DATA_X_SIGN 0x10 #define PS2M_DATA_AONE 0x08 #define PS2M_DATA_MID_BUTTON 0x04 #define PS2M_DATA_RIGHT_BUTTON 0x02 #define PS2M_DATA_LEFT_BUTTON 0x01 /* mouse status bits */ #define PS2M_STS_REMOTE_MODE 0x40 #define PS2M_STS_ENABLE_DEV 0x20 #define PS2M_STS_SCALING_21 0x10 #define PS2M_STS_MID_BUTTON 0x04 #define PS2M_STS_RIGHT_BUTTON 0x02 #define PS2M_STS_LEFT_BUTTON 0x01 #define PS2MOUSE_FIFOSZ 16 struct fifo { uint8_t buf[PS2MOUSE_FIFOSZ]; int rindex; /* index to read from */ int windex; /* index to write to */ int num; /* number of bytes in the fifo */ int size; /* size of the fifo */ }; struct ps2mouse_softc { struct atkbdc_softc *atkbdc_sc; pthread_mutex_t mtx; uint8_t status; uint8_t resolution; uint8_t sampling_rate; int ctrlenable; struct fifo fifo; uint8_t curcmd; /* current command for next byte */ int cur_x, cur_y; int delta_x, delta_y; }; static void fifo_init(struct ps2mouse_softc *sc) { struct fifo *fifo; fifo = &sc->fifo; fifo->size = sizeof(((struct fifo *)0)->buf); } static void fifo_reset(struct ps2mouse_softc *sc) { struct fifo *fifo; fifo = &sc->fifo; bzero(fifo, sizeof(struct fifo)); fifo->size = sizeof(((struct fifo *)0)->buf); } static void fifo_put(struct ps2mouse_softc *sc, uint8_t val) { struct fifo *fifo; fifo = &sc->fifo; if (fifo->num < fifo->size) { fifo->buf[fifo->windex] = val; fifo->windex = (fifo->windex + 1) % fifo->size; fifo->num++; } } static int fifo_get(struct ps2mouse_softc *sc, uint8_t *val) { struct fifo *fifo; fifo = &sc->fifo; if (fifo->num > 0) { *val = fifo->buf[fifo->rindex]; fifo->rindex = (fifo->rindex + 1) % fifo->size; fifo->num--; return (0); } return (-1); } static void movement_reset(struct ps2mouse_softc *sc) { assert(pthread_mutex_isowned_np(&sc->mtx)); sc->delta_x = 0; sc->delta_y = 0; } static void movement_update(struct ps2mouse_softc *sc, int x, int y) { sc->delta_x += x - sc->cur_x; sc->delta_y += sc->cur_y - y; sc->cur_x = x; sc->cur_y = y; } static void movement_get(struct ps2mouse_softc *sc) { uint8_t val0, val1, val2; assert(pthread_mutex_isowned_np(&sc->mtx)); val0 = PS2M_DATA_AONE; val0 |= sc->status & (PS2M_DATA_LEFT_BUTTON | PS2M_DATA_RIGHT_BUTTON | PS2M_DATA_MID_BUTTON); if (sc->delta_x >= 0) { if (sc->delta_x > 255) { val0 |= PS2M_DATA_X_OFLOW; val1 = 255; } else val1 = sc->delta_x; } else { val0 |= PS2M_DATA_X_SIGN; if (sc->delta_x < -255) { val0 |= PS2M_DATA_X_OFLOW; val1 = 255; } else val1 = sc->delta_x; } sc->delta_x = 0; if (sc->delta_y >= 0) { if (sc->delta_y > 255) { val0 |= PS2M_DATA_Y_OFLOW; val2 = 255; } else val2 = sc->delta_y; } else { val0 |= PS2M_DATA_Y_SIGN; if (sc->delta_y < -255) { val0 |= PS2M_DATA_Y_OFLOW; val2 = 255; } else val2 = sc->delta_y; } sc->delta_y = 0; if (sc->fifo.num < (sc->fifo.size - 3)) { fifo_put(sc, val0); fifo_put(sc, val1); fifo_put(sc, val2); } } static void ps2mouse_reset(struct ps2mouse_softc *sc) { assert(pthread_mutex_isowned_np(&sc->mtx)); fifo_reset(sc); movement_reset(sc); sc->status = PS2M_STS_ENABLE_DEV; sc->resolution = 4; sc->sampling_rate = 100; sc->cur_x = 0; sc->cur_y = 0; sc->delta_x = 0; sc->delta_y = 0; } int ps2mouse_read(struct ps2mouse_softc *sc, uint8_t *val) { int retval; pthread_mutex_lock(&sc->mtx); retval = fifo_get(sc, val); pthread_mutex_unlock(&sc->mtx); return (retval); } int ps2mouse_fifocnt(struct ps2mouse_softc *sc) { return (sc->fifo.num); } void ps2mouse_toggle(struct ps2mouse_softc *sc, int enable) { pthread_mutex_lock(&sc->mtx); if (enable) sc->ctrlenable = 1; else { sc->ctrlenable = 0; sc->fifo.rindex = 0; sc->fifo.windex = 0; sc->fifo.num = 0; } pthread_mutex_unlock(&sc->mtx); } void ps2mouse_write(struct ps2mouse_softc *sc, uint8_t val, int insert) { pthread_mutex_lock(&sc->mtx); fifo_reset(sc); if (sc->curcmd) { switch (sc->curcmd) { case PS2MC_SET_SAMPLING_RATE: sc->sampling_rate = val; fifo_put(sc, PS2MC_ACK); break; case PS2MC_SET_RESOLUTION: sc->resolution = val; fifo_put(sc, PS2MC_ACK); break; default: - fprintf(stderr, "Unhandled ps2 mouse current " - "command byte 0x%02x\n", val); + EPRINTLN("Unhandled ps2 mouse current " + "command byte 0x%02x", val); break; } sc->curcmd = 0; } else if (insert) { fifo_put(sc, val); } else { switch (val) { case 0x00: fifo_put(sc, PS2MC_ACK); break; case PS2MC_RESET_DEV: ps2mouse_reset(sc); fifo_put(sc, PS2MC_ACK); fifo_put(sc, PS2MC_BAT_SUCCESS); fifo_put(sc, PS2MOUSE_DEV_ID); break; case PS2MC_SET_DEFAULTS: ps2mouse_reset(sc); fifo_put(sc, PS2MC_ACK); break; case PS2MC_DISABLE: fifo_reset(sc); sc->status &= ~PS2M_STS_ENABLE_DEV; fifo_put(sc, PS2MC_ACK); break; case PS2MC_ENABLE: fifo_reset(sc); sc->status |= PS2M_STS_ENABLE_DEV; fifo_put(sc, PS2MC_ACK); break; case PS2MC_SET_SAMPLING_RATE: sc->curcmd = val; fifo_put(sc, PS2MC_ACK); break; case PS2MC_SEND_DEV_ID: fifo_put(sc, PS2MC_ACK); fifo_put(sc, PS2MOUSE_DEV_ID); break; case PS2MC_SET_REMOTE_MODE: sc->status |= PS2M_STS_REMOTE_MODE; fifo_put(sc, PS2MC_ACK); break; case PS2MC_SEND_DEV_DATA: fifo_put(sc, PS2MC_ACK); movement_get(sc); break; case PS2MC_SET_STREAM_MODE: sc->status &= ~PS2M_STS_REMOTE_MODE; fifo_put(sc, PS2MC_ACK); break; case PS2MC_SEND_DEV_STATUS: fifo_put(sc, PS2MC_ACK); fifo_put(sc, sc->status); fifo_put(sc, sc->resolution); fifo_put(sc, sc->sampling_rate); break; case PS2MC_SET_RESOLUTION: sc->curcmd = val; fifo_put(sc, PS2MC_ACK); break; case PS2MC_SET_SCALING1: case PS2MC_SET_SCALING2: fifo_put(sc, PS2MC_ACK); break; default: fifo_put(sc, PS2MC_ACK); - fprintf(stderr, "Unhandled ps2 mouse command " - "0x%02x\n", val); + EPRINTLN("Unhandled ps2 mouse command " + "0x%02x", val); break; } } pthread_mutex_unlock(&sc->mtx); } static void ps2mouse_event(uint8_t button, int x, int y, void *arg) { struct ps2mouse_softc *sc = arg; pthread_mutex_lock(&sc->mtx); movement_update(sc, x, y); sc->status &= ~(PS2M_STS_LEFT_BUTTON | PS2M_STS_RIGHT_BUTTON | PS2M_STS_MID_BUTTON); if (button & (1 << 0)) sc->status |= PS2M_STS_LEFT_BUTTON; if (button & (1 << 1)) sc->status |= PS2M_STS_MID_BUTTON; if (button & (1 << 2)) sc->status |= PS2M_STS_RIGHT_BUTTON; if ((sc->status & PS2M_STS_ENABLE_DEV) == 0 || !sc->ctrlenable) { /* no data reporting */ pthread_mutex_unlock(&sc->mtx); return; } movement_get(sc); pthread_mutex_unlock(&sc->mtx); if (sc->fifo.num > 0) atkbdc_event(sc->atkbdc_sc, 0); } struct ps2mouse_softc * ps2mouse_init(struct atkbdc_softc *atkbdc_sc) { struct ps2mouse_softc *sc; sc = calloc(1, sizeof (struct ps2mouse_softc)); pthread_mutex_init(&sc->mtx, NULL); fifo_init(sc); sc->atkbdc_sc = atkbdc_sc; pthread_mutex_lock(&sc->mtx); ps2mouse_reset(sc); pthread_mutex_unlock(&sc->mtx); console_ptr_register(ps2mouse_event, sc, 1); return (sc); } Index: stable/12/usr.sbin/bhyve/rfb.c =================================================================== --- stable/12/usr.sbin/bhyve/rfb.c (revision 358183) +++ stable/12/usr.sbin/bhyve/rfb.c (revision 358184) @@ -1,1056 +1,1057 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Tycho Nightingale * Copyright (c) 2015 Leon Dang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyvegc.h" +#include "debug.h" #include "console.h" #include "rfb.h" #include "sockstream.h" #ifndef NO_OPENSSL #include #endif static int rfb_debug = 0; -#define DPRINTF(params) if (rfb_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (rfb_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params #define AUTH_LENGTH 16 #define PASSWD_LENGTH 8 #define SECURITY_TYPE_NONE 1 #define SECURITY_TYPE_VNC_AUTH 2 #define AUTH_FAILED_UNAUTH 1 #define AUTH_FAILED_ERROR 2 struct rfb_softc { int sfd; pthread_t tid; int cfd; int width, height; char *password; bool enc_raw_ok; bool enc_zlib_ok; bool enc_resize_ok; z_stream zstream; uint8_t *zbuf; int zbuflen; int conn_wait; int sending; pthread_mutex_t mtx; pthread_cond_t cond; int hw_crc; uint32_t *crc; /* WxH crc cells */ uint32_t *crc_tmp; /* buffer to store single crc row */ int crc_width, crc_height; }; struct rfb_pixfmt { uint8_t bpp; uint8_t depth; uint8_t bigendian; uint8_t truecolor; uint16_t red_max; uint16_t green_max; uint16_t blue_max; uint8_t red_shift; uint8_t green_shift; uint8_t blue_shift; uint8_t pad[3]; }; struct rfb_srvr_info { uint16_t width; uint16_t height; struct rfb_pixfmt pixfmt; uint32_t namelen; }; struct rfb_pixfmt_msg { uint8_t type; uint8_t pad[3]; struct rfb_pixfmt pixfmt; }; #define RFB_ENCODING_RAW 0 #define RFB_ENCODING_ZLIB 6 #define RFB_ENCODING_RESIZE -223 #define RFB_MAX_WIDTH 2000 #define RFB_MAX_HEIGHT 1200 #define RFB_ZLIB_BUFSZ RFB_MAX_WIDTH*RFB_MAX_HEIGHT*4 /* percentage changes to screen before sending the entire screen */ #define RFB_SEND_ALL_THRESH 25 struct rfb_enc_msg { uint8_t type; uint8_t pad; uint16_t numencs; }; struct rfb_updt_msg { uint8_t type; uint8_t incremental; uint16_t x; uint16_t y; uint16_t width; uint16_t height; }; struct rfb_key_msg { uint8_t type; uint8_t down; uint16_t pad; uint32_t code; }; struct rfb_ptr_msg { uint8_t type; uint8_t button; uint16_t x; uint16_t y; }; struct rfb_srvr_updt_msg { uint8_t type; uint8_t pad; uint16_t numrects; }; struct rfb_srvr_rect_hdr { uint16_t x; uint16_t y; uint16_t width; uint16_t height; uint32_t encoding; }; struct rfb_cuttext_msg { uint8_t type; uint8_t padding[3]; uint32_t length; }; static void rfb_send_server_init_msg(int cfd) { struct bhyvegc_image *gc_image; struct rfb_srvr_info sinfo; gc_image = console_get_image(); sinfo.width = htons(gc_image->width); sinfo.height = htons(gc_image->height); sinfo.pixfmt.bpp = 32; sinfo.pixfmt.depth = 32; sinfo.pixfmt.bigendian = 0; sinfo.pixfmt.truecolor = 1; sinfo.pixfmt.red_max = htons(255); sinfo.pixfmt.green_max = htons(255); sinfo.pixfmt.blue_max = htons(255); sinfo.pixfmt.red_shift = 16; sinfo.pixfmt.green_shift = 8; sinfo.pixfmt.blue_shift = 0; sinfo.namelen = htonl(strlen("bhyve")); (void)stream_write(cfd, &sinfo, sizeof(sinfo)); (void)stream_write(cfd, "bhyve", strlen("bhyve")); } static void rfb_send_resize_update_msg(struct rfb_softc *rc, int cfd) { struct rfb_srvr_updt_msg supdt_msg; struct rfb_srvr_rect_hdr srect_hdr; /* Number of rectangles: 1 */ supdt_msg.type = 0; supdt_msg.pad = 0; supdt_msg.numrects = htons(1); stream_write(cfd, &supdt_msg, sizeof(struct rfb_srvr_updt_msg)); /* Rectangle header */ srect_hdr.x = htons(0); srect_hdr.y = htons(0); srect_hdr.width = htons(rc->width); srect_hdr.height = htons(rc->height); srect_hdr.encoding = htonl(RFB_ENCODING_RESIZE); stream_write(cfd, &srect_hdr, sizeof(struct rfb_srvr_rect_hdr)); } static void rfb_recv_set_pixfmt_msg(struct rfb_softc *rc, int cfd) { struct rfb_pixfmt_msg pixfmt_msg; (void)stream_read(cfd, ((void *)&pixfmt_msg)+1, sizeof(pixfmt_msg)-1); } static void rfb_recv_set_encodings_msg(struct rfb_softc *rc, int cfd) { struct rfb_enc_msg enc_msg; int i; uint32_t encoding; assert((sizeof(enc_msg) - 1) == 3); (void)stream_read(cfd, ((void *)&enc_msg)+1, sizeof(enc_msg)-1); for (i = 0; i < htons(enc_msg.numencs); i++) { (void)stream_read(cfd, &encoding, sizeof(encoding)); switch (htonl(encoding)) { case RFB_ENCODING_RAW: rc->enc_raw_ok = true; break; case RFB_ENCODING_ZLIB: if (!rc->enc_zlib_ok) { deflateInit(&rc->zstream, Z_BEST_SPEED); rc->enc_zlib_ok = true; } break; case RFB_ENCODING_RESIZE: rc->enc_resize_ok = true; break; } } } /* * Calculate CRC32 using SSE4.2; Intel or AMD Bulldozer+ CPUs only */ static __inline uint32_t fast_crc32(void *buf, int len, uint32_t crcval) { uint32_t q = len / sizeof(uint32_t); uint32_t *p = (uint32_t *)buf; while (q--) { asm volatile ( ".byte 0xf2, 0xf, 0x38, 0xf1, 0xf1;" :"=S" (crcval) :"0" (crcval), "c" (*p) ); p++; } return (crcval); } static int rfb_send_rect(struct rfb_softc *rc, int cfd, struct bhyvegc_image *gc, int x, int y, int w, int h) { struct rfb_srvr_updt_msg supdt_msg; struct rfb_srvr_rect_hdr srect_hdr; unsigned long zlen; ssize_t nwrite, total; int err; uint32_t *p; uint8_t *zbufp; /* * Send a single rectangle of the given x, y, w h dimensions. */ /* Number of rectangles: 1 */ supdt_msg.type = 0; supdt_msg.pad = 0; supdt_msg.numrects = htons(1); nwrite = stream_write(cfd, &supdt_msg, sizeof(struct rfb_srvr_updt_msg)); if (nwrite <= 0) return (nwrite); /* Rectangle header */ srect_hdr.x = htons(x); srect_hdr.y = htons(y); srect_hdr.width = htons(w); srect_hdr.height = htons(h); h = y + h; w *= sizeof(uint32_t); if (rc->enc_zlib_ok) { zbufp = rc->zbuf; rc->zstream.total_in = 0; rc->zstream.total_out = 0; for (p = &gc->data[y * gc->width + x]; y < h; y++) { rc->zstream.next_in = (Bytef *)p; rc->zstream.avail_in = w; rc->zstream.next_out = (Bytef *)zbufp; rc->zstream.avail_out = RFB_ZLIB_BUFSZ + 16 - rc->zstream.total_out; rc->zstream.data_type = Z_BINARY; /* Compress with zlib */ err = deflate(&rc->zstream, Z_SYNC_FLUSH); if (err != Z_OK) { - WPRINTF(("zlib[rect] deflate err: %d\n", err)); + WPRINTF(("zlib[rect] deflate err: %d", err)); rc->enc_zlib_ok = false; deflateEnd(&rc->zstream); goto doraw; } zbufp = rc->zbuf + rc->zstream.total_out; p += gc->width; } srect_hdr.encoding = htonl(RFB_ENCODING_ZLIB); nwrite = stream_write(cfd, &srect_hdr, sizeof(struct rfb_srvr_rect_hdr)); if (nwrite <= 0) return (nwrite); zlen = htonl(rc->zstream.total_out); nwrite = stream_write(cfd, &zlen, sizeof(uint32_t)); if (nwrite <= 0) return (nwrite); return (stream_write(cfd, rc->zbuf, rc->zstream.total_out)); } doraw: total = 0; zbufp = rc->zbuf; for (p = &gc->data[y * gc->width + x]; y < h; y++) { memcpy(zbufp, p, w); zbufp += w; total += w; p += gc->width; } srect_hdr.encoding = htonl(RFB_ENCODING_RAW); nwrite = stream_write(cfd, &srect_hdr, sizeof(struct rfb_srvr_rect_hdr)); if (nwrite <= 0) return (nwrite); total = stream_write(cfd, rc->zbuf, total); return (total); } static int rfb_send_all(struct rfb_softc *rc, int cfd, struct bhyvegc_image *gc) { struct rfb_srvr_updt_msg supdt_msg; struct rfb_srvr_rect_hdr srect_hdr; ssize_t nwrite; unsigned long zlen; int err; /* * Send the whole thing */ /* Number of rectangles: 1 */ supdt_msg.type = 0; supdt_msg.pad = 0; supdt_msg.numrects = htons(1); nwrite = stream_write(cfd, &supdt_msg, sizeof(struct rfb_srvr_updt_msg)); if (nwrite <= 0) return (nwrite); /* Rectangle header */ srect_hdr.x = 0; srect_hdr.y = 0; srect_hdr.width = htons(gc->width); srect_hdr.height = htons(gc->height); if (rc->enc_zlib_ok) { rc->zstream.next_in = (Bytef *)gc->data; rc->zstream.avail_in = gc->width * gc->height * sizeof(uint32_t); rc->zstream.next_out = (Bytef *)rc->zbuf; rc->zstream.avail_out = RFB_ZLIB_BUFSZ + 16; rc->zstream.data_type = Z_BINARY; rc->zstream.total_in = 0; rc->zstream.total_out = 0; /* Compress with zlib */ err = deflate(&rc->zstream, Z_SYNC_FLUSH); if (err != Z_OK) { - WPRINTF(("zlib deflate err: %d\n", err)); + WPRINTF(("zlib deflate err: %d", err)); rc->enc_zlib_ok = false; deflateEnd(&rc->zstream); goto doraw; } srect_hdr.encoding = htonl(RFB_ENCODING_ZLIB); nwrite = stream_write(cfd, &srect_hdr, sizeof(struct rfb_srvr_rect_hdr)); if (nwrite <= 0) return (nwrite); zlen = htonl(rc->zstream.total_out); nwrite = stream_write(cfd, &zlen, sizeof(uint32_t)); if (nwrite <= 0) return (nwrite); return (stream_write(cfd, rc->zbuf, rc->zstream.total_out)); } doraw: srect_hdr.encoding = htonl(RFB_ENCODING_RAW); nwrite = stream_write(cfd, &srect_hdr, sizeof(struct rfb_srvr_rect_hdr)); if (nwrite <= 0) return (nwrite); nwrite = stream_write(cfd, gc->data, gc->width * gc->height * sizeof(uint32_t)); return (nwrite); } #define PIX_PER_CELL 32 #define PIXCELL_SHIFT 5 #define PIXCELL_MASK 0x1F static int rfb_send_screen(struct rfb_softc *rc, int cfd, int all) { struct bhyvegc_image *gc_image; ssize_t nwrite; int x, y; int celly, cellwidth; int xcells, ycells; int w, h; uint32_t *p; int rem_x, rem_y; /* remainder for resolutions not x32 pixels ratio */ int retval; uint32_t *crc_p, *orig_crc; int changes; console_refresh(); gc_image = console_get_image(); pthread_mutex_lock(&rc->mtx); if (rc->sending) { pthread_mutex_unlock(&rc->mtx); return (1); } rc->sending = 1; pthread_mutex_unlock(&rc->mtx); retval = 0; if (all) { retval = rfb_send_all(rc, cfd, gc_image); goto done; } /* * Calculate the checksum for each 32x32 cell. Send each that * has changed since the last scan. */ /* Resolution changed */ rc->crc_width = gc_image->width; rc->crc_height = gc_image->height; w = rc->crc_width; h = rc->crc_height; xcells = howmany(rc->crc_width, PIX_PER_CELL); ycells = howmany(rc->crc_height, PIX_PER_CELL); rem_x = w & PIXCELL_MASK; rem_y = h & PIXCELL_MASK; if (!rem_y) rem_y = PIX_PER_CELL; p = gc_image->data; /* * Go through all cells and calculate crc. If significant number * of changes, then send entire screen. * crc_tmp is dual purpose: to store the new crc and to flag as * a cell that has changed. */ crc_p = rc->crc_tmp - xcells; orig_crc = rc->crc - xcells; changes = 0; memset(rc->crc_tmp, 0, sizeof(uint32_t) * xcells * ycells); for (y = 0; y < h; y++) { if ((y & PIXCELL_MASK) == 0) { crc_p += xcells; orig_crc += xcells; } for (x = 0; x < xcells; x++) { if (x == (xcells - 1) && rem_x > 0) cellwidth = rem_x; else cellwidth = PIX_PER_CELL; if (rc->hw_crc) crc_p[x] = fast_crc32(p, cellwidth * sizeof(uint32_t), crc_p[x]); else crc_p[x] = (uint32_t)crc32(crc_p[x], (Bytef *)p, cellwidth * sizeof(uint32_t)); p += cellwidth; /* check for crc delta if last row in cell */ if ((y & PIXCELL_MASK) == PIXCELL_MASK || y == (h-1)) { if (orig_crc[x] != crc_p[x]) { orig_crc[x] = crc_p[x]; crc_p[x] = 1; changes++; } else { crc_p[x] = 0; } } } } /* If number of changes is > THRESH percent, send the whole screen */ if (((changes * 100) / (xcells * ycells)) >= RFB_SEND_ALL_THRESH) { retval = rfb_send_all(rc, cfd, gc_image); goto done; } /* Go through all cells, and send only changed ones */ crc_p = rc->crc_tmp; for (y = 0; y < h; y += PIX_PER_CELL) { /* previous cell's row */ celly = (y >> PIXCELL_SHIFT); /* Delta check crc to previous set */ for (x = 0; x < xcells; x++) { if (*crc_p++ == 0) continue; if (x == (xcells - 1) && rem_x > 0) cellwidth = rem_x; else cellwidth = PIX_PER_CELL; nwrite = rfb_send_rect(rc, cfd, gc_image, x * PIX_PER_CELL, celly * PIX_PER_CELL, cellwidth, y + PIX_PER_CELL >= h ? rem_y : PIX_PER_CELL); if (nwrite <= 0) { retval = nwrite; goto done; } } } retval = 1; done: pthread_mutex_lock(&rc->mtx); rc->sending = 0; pthread_mutex_unlock(&rc->mtx); return (retval); } static void rfb_recv_update_msg(struct rfb_softc *rc, int cfd, int discardonly) { struct rfb_updt_msg updt_msg; struct bhyvegc_image *gc_image; (void)stream_read(cfd, ((void *)&updt_msg) + 1 , sizeof(updt_msg) - 1); console_refresh(); gc_image = console_get_image(); updt_msg.x = htons(updt_msg.x); updt_msg.y = htons(updt_msg.y); updt_msg.width = htons(updt_msg.width); updt_msg.height = htons(updt_msg.height); if (updt_msg.width != gc_image->width || updt_msg.height != gc_image->height) { rc->width = gc_image->width; rc->height = gc_image->height; if (rc->enc_resize_ok) rfb_send_resize_update_msg(rc, cfd); } if (discardonly) return; rfb_send_screen(rc, cfd, 1); } static void rfb_recv_key_msg(struct rfb_softc *rc, int cfd) { struct rfb_key_msg key_msg; (void)stream_read(cfd, ((void *)&key_msg) + 1, sizeof(key_msg) - 1); console_key_event(key_msg.down, htonl(key_msg.code)); } static void rfb_recv_ptr_msg(struct rfb_softc *rc, int cfd) { struct rfb_ptr_msg ptr_msg; (void)stream_read(cfd, ((void *)&ptr_msg) + 1, sizeof(ptr_msg) - 1); console_ptr_event(ptr_msg.button, htons(ptr_msg.x), htons(ptr_msg.y)); } static void rfb_recv_cuttext_msg(struct rfb_softc *rc, int cfd) { struct rfb_cuttext_msg ct_msg; unsigned char buf[32]; int len; len = stream_read(cfd, ((void *)&ct_msg) + 1, sizeof(ct_msg) - 1); ct_msg.length = htonl(ct_msg.length); while (ct_msg.length > 0) { len = stream_read(cfd, buf, ct_msg.length > sizeof(buf) ? sizeof(buf) : ct_msg.length); ct_msg.length -= len; } } static int64_t timeval_delta(struct timeval *prev, struct timeval *now) { int64_t n1, n2; n1 = now->tv_sec * 1000000 + now->tv_usec; n2 = prev->tv_sec * 1000000 + prev->tv_usec; return (n1 - n2); } static void * rfb_wr_thr(void *arg) { struct rfb_softc *rc; fd_set rfds; struct timeval tv; struct timeval prev_tv; int64_t tdiff; int cfd; int err; rc = arg; cfd = rc->cfd; prev_tv.tv_sec = 0; prev_tv.tv_usec = 0; while (rc->cfd >= 0) { FD_ZERO(&rfds); FD_SET(cfd, &rfds); tv.tv_sec = 0; tv.tv_usec = 10000; err = select(cfd+1, &rfds, NULL, NULL, &tv); if (err < 0) return (NULL); /* Determine if its time to push screen; ~24hz */ gettimeofday(&tv, NULL); tdiff = timeval_delta(&prev_tv, &tv); if (tdiff > 40000) { prev_tv.tv_sec = tv.tv_sec; prev_tv.tv_usec = tv.tv_usec; if (rfb_send_screen(rc, cfd, 0) <= 0) { return (NULL); } } else { /* sleep */ usleep(40000 - tdiff); } } return (NULL); } void rfb_handle(struct rfb_softc *rc, int cfd) { const char *vbuf = "RFB 003.008\n"; unsigned char buf[80]; unsigned char *message = NULL; #ifndef NO_OPENSSL unsigned char challenge[AUTH_LENGTH]; unsigned char keystr[PASSWD_LENGTH]; unsigned char crypt_expected[AUTH_LENGTH]; DES_key_schedule ks; int i; #endif pthread_t tid; uint32_t sres = 0; int len; int perror = 1; rc->cfd = cfd; /* 1a. Send server version */ stream_write(cfd, vbuf, strlen(vbuf)); /* 1b. Read client version */ len = read(cfd, buf, sizeof(buf)); /* 2a. Send security type */ buf[0] = 1; #ifndef NO_OPENSSL if (rc->password) buf[1] = SECURITY_TYPE_VNC_AUTH; else buf[1] = SECURITY_TYPE_NONE; #else buf[1] = SECURITY_TYPE_NONE; #endif stream_write(cfd, buf, 2); /* 2b. Read agreed security type */ len = stream_read(cfd, buf, 1); /* 2c. Do VNC authentication */ switch (buf[0]) { case SECURITY_TYPE_NONE: sres = 0; break; case SECURITY_TYPE_VNC_AUTH: /* * The client encrypts the challenge with DES, using a password * supplied by the user as the key. * To form the key, the password is truncated to * eight characters, or padded with null bytes on the right. * The client then sends the resulting 16-bytes response. */ #ifndef NO_OPENSSL strncpy(keystr, rc->password, PASSWD_LENGTH); /* VNC clients encrypts the challenge with all the bit fields * in each byte of the password mirrored. * Here we flip each byte of the keystr. */ for (i = 0; i < PASSWD_LENGTH; i++) { keystr[i] = (keystr[i] & 0xF0) >> 4 | (keystr[i] & 0x0F) << 4; keystr[i] = (keystr[i] & 0xCC) >> 2 | (keystr[i] & 0x33) << 2; keystr[i] = (keystr[i] & 0xAA) >> 1 | (keystr[i] & 0x55) << 1; } /* Initialize a 16-byte random challenge */ arc4random_buf(challenge, sizeof(challenge)); stream_write(cfd, challenge, AUTH_LENGTH); /* Receive the 16-byte challenge response */ stream_read(cfd, buf, AUTH_LENGTH); memcpy(crypt_expected, challenge, AUTH_LENGTH); /* Encrypt the Challenge with DES */ DES_set_key((const_DES_cblock *)keystr, &ks); DES_ecb_encrypt((const_DES_cblock *)challenge, (const_DES_cblock *)crypt_expected, &ks, DES_ENCRYPT); DES_ecb_encrypt((const_DES_cblock *)(challenge + PASSWD_LENGTH), (const_DES_cblock *)(crypt_expected + PASSWD_LENGTH), &ks, DES_ENCRYPT); if (memcmp(crypt_expected, buf, AUTH_LENGTH) != 0) { message = "Auth Failed: Invalid Password."; sres = htonl(1); } else sres = 0; #else sres = 0; WPRINTF(("Auth not supported, no OpenSSL in your system")); #endif break; } /* 2d. Write back a status */ stream_write(cfd, &sres, 4); if (sres) { be32enc(buf, strlen(message)); stream_write(cfd, buf, 4); stream_write(cfd, message, strlen(message)); goto done; } /* 3a. Read client shared-flag byte */ len = stream_read(cfd, buf, 1); /* 4a. Write server-init info */ rfb_send_server_init_msg(cfd); if (!rc->zbuf) { rc->zbuf = malloc(RFB_ZLIB_BUFSZ + 16); assert(rc->zbuf != NULL); } rfb_send_screen(rc, cfd, 1); perror = pthread_create(&tid, NULL, rfb_wr_thr, rc); if (perror == 0) pthread_set_name_np(tid, "rfbout"); /* Now read in client requests. 1st byte identifies type */ for (;;) { len = read(cfd, buf, 1); if (len <= 0) { - DPRINTF(("rfb client exiting\r\n")); + DPRINTF(("rfb client exiting")); break; } switch (buf[0]) { case 0: rfb_recv_set_pixfmt_msg(rc, cfd); break; case 2: rfb_recv_set_encodings_msg(rc, cfd); break; case 3: rfb_recv_update_msg(rc, cfd, 1); break; case 4: rfb_recv_key_msg(rc, cfd); break; case 5: rfb_recv_ptr_msg(rc, cfd); break; case 6: rfb_recv_cuttext_msg(rc, cfd); break; default: - WPRINTF(("rfb unknown cli-code %d!\n", buf[0] & 0xff)); + WPRINTF(("rfb unknown cli-code %d!", buf[0] & 0xff)); goto done; } } done: rc->cfd = -1; if (perror == 0) pthread_join(tid, NULL); if (rc->enc_zlib_ok) deflateEnd(&rc->zstream); } static void * rfb_thr(void *arg) { struct rfb_softc *rc; sigset_t set; int cfd; rc = arg; sigemptyset(&set); sigaddset(&set, SIGPIPE); if (pthread_sigmask(SIG_BLOCK, &set, NULL) != 0) { perror("pthread_sigmask"); return (NULL); } for (;;) { rc->enc_raw_ok = false; rc->enc_zlib_ok = false; rc->enc_resize_ok = false; cfd = accept(rc->sfd, NULL, NULL); if (rc->conn_wait) { pthread_mutex_lock(&rc->mtx); pthread_cond_signal(&rc->cond); pthread_mutex_unlock(&rc->mtx); rc->conn_wait = 0; } rfb_handle(rc, cfd); close(cfd); } /* NOTREACHED */ return (NULL); } static int sse42_supported(void) { u_int cpu_registers[4], ecx; do_cpuid(1, cpu_registers); ecx = cpu_registers[2]; return ((ecx & CPUID2_SSE42) != 0); } int rfb_init(char *hostname, int port, int wait, char *password) { int e; char servname[6]; struct rfb_softc *rc; struct addrinfo *ai; struct addrinfo hints; int on = 1; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif rc = calloc(1, sizeof(struct rfb_softc)); rc->crc = calloc(howmany(RFB_MAX_WIDTH * RFB_MAX_HEIGHT, 32), sizeof(uint32_t)); rc->crc_tmp = calloc(howmany(RFB_MAX_WIDTH * RFB_MAX_HEIGHT, 32), sizeof(uint32_t)); rc->crc_width = RFB_MAX_WIDTH; rc->crc_height = RFB_MAX_HEIGHT; rc->password = password; snprintf(servname, sizeof(servname), "%d", port ? port : 5900); if (!hostname || strlen(hostname) == 0) #if defined(INET) hostname = "127.0.0.1"; #elif defined(INET6) hostname = "[::1]"; #endif memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV | AI_PASSIVE; if ((e = getaddrinfo(hostname, servname, &hints, &ai)) != 0) { - fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); + EPRINTLN("getaddrinfo: %s", gai_strerror(e)); return(-1); } rc->sfd = socket(ai->ai_family, ai->ai_socktype, 0); if (rc->sfd < 0) { perror("socket"); freeaddrinfo(ai); return (-1); } setsockopt(rc->sfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); if (bind(rc->sfd, ai->ai_addr, ai->ai_addrlen) < 0) { perror("bind"); freeaddrinfo(ai); return (-1); } if (listen(rc->sfd, 1) < 0) { perror("listen"); freeaddrinfo(ai); return (-1); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE); if (caph_rights_limit(rc->sfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif rc->hw_crc = sse42_supported(); rc->conn_wait = wait; if (wait) { pthread_mutex_init(&rc->mtx, NULL); pthread_cond_init(&rc->cond, NULL); } pthread_create(&rc->tid, NULL, rfb_thr, rc); pthread_set_name_np(rc->tid, "rfb"); if (wait) { - DPRINTF(("Waiting for rfb client...\n")); + DPRINTF(("Waiting for rfb client...")); pthread_mutex_lock(&rc->mtx); pthread_cond_wait(&rc->cond, &rc->mtx); pthread_mutex_unlock(&rc->mtx); } freeaddrinfo(ai); return (0); } Index: stable/12/usr.sbin/bhyve/smbiostbl.c =================================================================== --- stable/12/usr.sbin/bhyve/smbiostbl.c (revision 358183) +++ stable/12/usr.sbin/bhyve/smbiostbl.c (revision 358184) @@ -1,839 +1,840 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Tycho Nightingale * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "smbiostbl.h" #define MB (1024*1024) #define GB (1024ULL*1024*1024) #define SMBIOS_BASE 0xF1000 /* BHYVE_ACPI_BASE - SMBIOS_BASE) */ #define SMBIOS_MAX_LENGTH (0xF2400 - 0xF1000) #define SMBIOS_TYPE_BIOS 0 #define SMBIOS_TYPE_SYSTEM 1 #define SMBIOS_TYPE_CHASSIS 3 #define SMBIOS_TYPE_PROCESSOR 4 #define SMBIOS_TYPE_MEMARRAY 16 #define SMBIOS_TYPE_MEMDEVICE 17 #define SMBIOS_TYPE_MEMARRAYMAP 19 #define SMBIOS_TYPE_BOOT 32 #define SMBIOS_TYPE_EOT 127 struct smbios_structure { uint8_t type; uint8_t length; uint16_t handle; } __packed; typedef int (*initializer_func_t)(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_template_entry { struct smbios_structure *entry; const char **strings; initializer_func_t initializer; }; /* * SMBIOS Structure Table Entry Point */ #define SMBIOS_ENTRY_EANCHOR "_SM_" #define SMBIOS_ENTRY_EANCHORLEN 4 #define SMBIOS_ENTRY_IANCHOR "_DMI_" #define SMBIOS_ENTRY_IANCHORLEN 5 struct smbios_entry_point { char eanchor[4]; /* anchor tag */ uint8_t echecksum; /* checksum of entry point structure */ uint8_t eplen; /* length in bytes of entry point */ uint8_t major; /* major version of the SMBIOS spec */ uint8_t minor; /* minor version of the SMBIOS spec */ uint16_t maxssize; /* maximum size in bytes of a struct */ uint8_t revision; /* entry point structure revision */ uint8_t format[5]; /* entry point rev-specific data */ char ianchor[5]; /* intermediate anchor tag */ uint8_t ichecksum; /* intermediate checksum */ uint16_t stlen; /* len in bytes of structure table */ uint32_t staddr; /* physical addr of structure table */ uint16_t stnum; /* number of structure table entries */ uint8_t bcdrev; /* BCD value representing DMI ver */ } __packed; /* * BIOS Information */ #define SMBIOS_FL_ISA 0x00000010 /* ISA is supported */ #define SMBIOS_FL_PCI 0x00000080 /* PCI is supported */ #define SMBIOS_FL_SHADOW 0x00001000 /* BIOS shadowing is allowed */ #define SMBIOS_FL_CDBOOT 0x00008000 /* Boot from CD is supported */ #define SMBIOS_FL_SELBOOT 0x00010000 /* Selectable Boot supported */ #define SMBIOS_FL_EDD 0x00080000 /* EDD Spec is supported */ #define SMBIOS_XB1_FL_ACPI 0x00000001 /* ACPI is supported */ #define SMBIOS_XB2_FL_BBS 0x00000001 /* BIOS Boot Specification */ #define SMBIOS_XB2_FL_VM 0x00000010 /* Virtual Machine */ struct smbios_table_type0 { struct smbios_structure header; uint8_t vendor; /* vendor string */ uint8_t version; /* version string */ uint16_t segment; /* address segment location */ uint8_t rel_date; /* release date */ uint8_t size; /* rom size */ uint64_t cflags; /* characteristics */ uint8_t xc_bytes[2]; /* characteristics ext bytes */ uint8_t sb_major_rel; /* system bios version */ uint8_t sb_minor_rele; uint8_t ecfw_major_rel; /* embedded ctrl fw version */ uint8_t ecfw_minor_rel; } __packed; /* * System Information */ #define SMBIOS_WAKEUP_SWITCH 0x06 /* power switch */ struct smbios_table_type1 { struct smbios_structure header; uint8_t manufacturer; /* manufacturer string */ uint8_t product; /* product name string */ uint8_t version; /* version string */ uint8_t serial; /* serial number string */ uint8_t uuid[16]; /* uuid byte array */ uint8_t wakeup; /* wake-up event */ uint8_t sku; /* sku number string */ uint8_t family; /* family name string */ } __packed; /* * System Enclosure or Chassis */ #define SMBIOS_CHT_UNKNOWN 0x02 /* unknown */ #define SMBIOS_CHST_SAFE 0x03 /* safe */ #define SMBIOS_CHSC_NONE 0x03 /* none */ struct smbios_table_type3 { struct smbios_structure header; uint8_t manufacturer; /* manufacturer string */ uint8_t type; /* type */ uint8_t version; /* version string */ uint8_t serial; /* serial number string */ uint8_t asset; /* asset tag string */ uint8_t bustate; /* boot-up state */ uint8_t psstate; /* power supply state */ uint8_t tstate; /* thermal state */ uint8_t security; /* security status */ uint8_t uheight; /* height in 'u's */ uint8_t cords; /* number of power cords */ uint8_t elems; /* number of element records */ uint8_t elemlen; /* length of records */ uint8_t sku; /* sku number string */ } __packed; /* * Processor Information */ #define SMBIOS_PRT_CENTRAL 0x03 /* central processor */ #define SMBIOS_PRF_OTHER 0x01 /* other */ #define SMBIOS_PRS_PRESENT 0x40 /* socket is populated */ #define SMBIOS_PRS_ENABLED 0x1 /* enabled */ #define SMBIOS_PRU_NONE 0x06 /* none */ #define SMBIOS_PFL_64B 0x04 /* 64-bit capable */ struct smbios_table_type4 { struct smbios_structure header; uint8_t socket; /* socket designation string */ uint8_t type; /* processor type */ uint8_t family; /* processor family */ uint8_t manufacturer; /* manufacturer string */ uint64_t cpuid; /* processor cpuid */ uint8_t version; /* version string */ uint8_t voltage; /* voltage */ uint16_t clkspeed; /* ext clock speed in mhz */ uint16_t maxspeed; /* maximum speed in mhz */ uint16_t curspeed; /* current speed in mhz */ uint8_t status; /* status */ uint8_t upgrade; /* upgrade */ uint16_t l1handle; /* l1 cache handle */ uint16_t l2handle; /* l2 cache handle */ uint16_t l3handle; /* l3 cache handle */ uint8_t serial; /* serial number string */ uint8_t asset; /* asset tag string */ uint8_t part; /* part number string */ uint8_t cores; /* cores per socket */ uint8_t ecores; /* enabled cores */ uint8_t threads; /* threads per socket */ uint16_t cflags; /* processor characteristics */ uint16_t family2; /* processor family 2 */ } __packed; /* * Physical Memory Array */ #define SMBIOS_MAL_SYSMB 0x03 /* system board or motherboard */ #define SMBIOS_MAU_SYSTEM 0x03 /* system memory */ #define SMBIOS_MAE_NONE 0x03 /* none */ struct smbios_table_type16 { struct smbios_structure header; uint8_t location; /* physical device location */ uint8_t use; /* device functional purpose */ uint8_t ecc; /* err detect/correct method */ uint32_t size; /* max mem capacity in kb */ uint16_t errhand; /* handle of error (if any) */ uint16_t ndevs; /* num of slots or sockets */ uint64_t xsize; /* max mem capacity in bytes */ } __packed; /* * Memory Device */ #define SMBIOS_MDFF_UNKNOWN 0x02 /* unknown */ #define SMBIOS_MDT_UNKNOWN 0x02 /* unknown */ #define SMBIOS_MDF_UNKNOWN 0x0004 /* unknown */ struct smbios_table_type17 { struct smbios_structure header; uint16_t arrayhand; /* handle of physl mem array */ uint16_t errhand; /* handle of mem error data */ uint16_t twidth; /* total width in bits */ uint16_t dwidth; /* data width in bits */ uint16_t size; /* size in bytes */ uint8_t form; /* form factor */ uint8_t set; /* set */ uint8_t dloc; /* device locator string */ uint8_t bloc; /* phys bank locator string */ uint8_t type; /* memory type */ uint16_t flags; /* memory characteristics */ uint16_t maxspeed; /* maximum speed in mhz */ uint8_t manufacturer; /* manufacturer string */ uint8_t serial; /* serial number string */ uint8_t asset; /* asset tag string */ uint8_t part; /* part number string */ uint8_t attributes; /* attributes */ uint32_t xsize; /* extended size in mbs */ uint16_t curspeed; /* current speed in mhz */ uint16_t minvoltage; /* minimum voltage */ uint16_t maxvoltage; /* maximum voltage */ uint16_t curvoltage; /* configured voltage */ } __packed; /* * Memory Array Mapped Address */ struct smbios_table_type19 { struct smbios_structure header; uint32_t saddr; /* start phys addr in kb */ uint32_t eaddr; /* end phys addr in kb */ uint16_t arrayhand; /* physical mem array handle */ uint8_t width; /* num of dev in row */ uint64_t xsaddr; /* start phys addr in bytes */ uint64_t xeaddr; /* end phys addr in bytes */ } __packed; /* * System Boot Information */ #define SMBIOS_BOOT_NORMAL 0 /* no errors detected */ struct smbios_table_type32 { struct smbios_structure header; uint8_t reserved[6]; uint8_t status; /* boot status */ } __packed; /* * End-of-Table */ struct smbios_table_type127 { struct smbios_structure header; } __packed; struct smbios_table_type0 smbios_type0_template = { { SMBIOS_TYPE_BIOS, sizeof (struct smbios_table_type0), 0 }, 1, /* bios vendor string */ 2, /* bios version string */ 0xF000, /* bios address segment location */ 3, /* bios release date */ 0x0, /* bios size (64k * (n + 1) is the size in bytes) */ SMBIOS_FL_ISA | SMBIOS_FL_PCI | SMBIOS_FL_SHADOW | SMBIOS_FL_CDBOOT | SMBIOS_FL_EDD, { SMBIOS_XB1_FL_ACPI, SMBIOS_XB2_FL_BBS | SMBIOS_XB2_FL_VM }, 0x0, /* bios major release */ 0x0, /* bios minor release */ 0xff, /* embedded controller firmware major release */ 0xff /* embedded controller firmware minor release */ }; const char *smbios_type0_strings[] = { "BHYVE", /* vendor string */ "1.00", /* bios version string */ "03/14/2014", /* bios release date string */ NULL }; struct smbios_table_type1 smbios_type1_template = { { SMBIOS_TYPE_SYSTEM, sizeof (struct smbios_table_type1), 0 }, 1, /* manufacturer string */ 2, /* product string */ 3, /* version string */ 4, /* serial number string */ { 0 }, SMBIOS_WAKEUP_SWITCH, 5, /* sku string */ 6 /* family string */ }; static int smbios_type1_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); const char *smbios_type1_strings[] = { " ", /* manufacturer string */ "BHYVE", /* product name string */ "1.0", /* version string */ "None", /* serial number string */ "None", /* sku string */ " ", /* family name string */ NULL }; struct smbios_table_type3 smbios_type3_template = { { SMBIOS_TYPE_CHASSIS, sizeof (struct smbios_table_type3), 0 }, 1, /* manufacturer string */ SMBIOS_CHT_UNKNOWN, 2, /* version string */ 3, /* serial number string */ 4, /* asset tag string */ SMBIOS_CHST_SAFE, SMBIOS_CHST_SAFE, SMBIOS_CHST_SAFE, SMBIOS_CHSC_NONE, 0, /* height in 'u's (0=enclosure height unspecified) */ 0, /* number of power cords (0=number unspecified) */ 0, /* number of contained element records */ 0, /* length of records */ 5 /* sku number string */ }; const char *smbios_type3_strings[] = { " ", /* manufacturer string */ "1.0", /* version string */ "None", /* serial number string */ "None", /* asset tag string */ "None", /* sku number string */ NULL }; struct smbios_table_type4 smbios_type4_template = { { SMBIOS_TYPE_PROCESSOR, sizeof (struct smbios_table_type4), 0 }, 1, /* socket designation string */ SMBIOS_PRT_CENTRAL, SMBIOS_PRF_OTHER, 2, /* manufacturer string */ 0, /* cpuid */ 3, /* version string */ 0, /* voltage */ 0, /* external clock frequency in mhz (0=unknown) */ 0, /* maximum frequency in mhz (0=unknown) */ 0, /* current frequency in mhz (0=unknown) */ SMBIOS_PRS_PRESENT | SMBIOS_PRS_ENABLED, SMBIOS_PRU_NONE, -1, /* l1 cache handle */ -1, /* l2 cache handle */ -1, /* l3 cache handle */ 4, /* serial number string */ 5, /* asset tag string */ 6, /* part number string */ 0, /* cores per socket (0=unknown) */ 0, /* enabled cores per socket (0=unknown) */ 0, /* threads per socket (0=unknown) */ SMBIOS_PFL_64B, SMBIOS_PRF_OTHER }; const char *smbios_type4_strings[] = { " ", /* socket designation string */ " ", /* manufacturer string */ " ", /* version string */ "None", /* serial number string */ "None", /* asset tag string */ "None", /* part number string */ NULL }; static int smbios_type4_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type16 smbios_type16_template = { { SMBIOS_TYPE_MEMARRAY, sizeof (struct smbios_table_type16), 0 }, SMBIOS_MAL_SYSMB, SMBIOS_MAU_SYSTEM, SMBIOS_MAE_NONE, 0x80000000, /* max mem capacity in kb (0x80000000=use extended) */ -1, /* handle of error (if any) */ 0, /* number of slots or sockets (TBD) */ 0 /* extended maximum memory capacity in bytes (TBD) */ }; static int smbios_type16_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type17 smbios_type17_template = { { SMBIOS_TYPE_MEMDEVICE, sizeof (struct smbios_table_type17), 0 }, -1, /* handle of physical memory array */ -1, /* handle of memory error data */ 64, /* total width in bits including ecc */ 64, /* data width in bits */ 0x7fff, /* size in bytes (0x7fff=use extended)*/ SMBIOS_MDFF_UNKNOWN, 0, /* set (0x00=none, 0xff=unknown) */ 1, /* device locator string */ 2, /* physical bank locator string */ SMBIOS_MDT_UNKNOWN, SMBIOS_MDF_UNKNOWN, 0, /* maximum memory speed in mhz (0=unknown) */ 3, /* manufacturer string */ 4, /* serial number string */ 5, /* asset tag string */ 6, /* part number string */ 0, /* attributes (0=unknown rank information) */ 0, /* extended size in mb (TBD) */ 0, /* current speed in mhz (0=unknown) */ 0, /* minimum voltage in mv (0=unknown) */ 0, /* maximum voltage in mv (0=unknown) */ 0 /* configured voltage in mv (0=unknown) */ }; const char *smbios_type17_strings[] = { " ", /* device locator string */ " ", /* physical bank locator string */ " ", /* manufacturer string */ "None", /* serial number string */ "None", /* asset tag string */ "None", /* part number string */ NULL }; static int smbios_type17_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type19 smbios_type19_template = { { SMBIOS_TYPE_MEMARRAYMAP, sizeof (struct smbios_table_type19), 0 }, 0xffffffff, /* starting phys addr in kb (0xffffffff=use ext) */ 0xffffffff, /* ending phys addr in kb (0xffffffff=use ext) */ -1, /* physical memory array handle */ 1, /* number of devices that form a row */ 0, /* extended starting phys addr in bytes (TDB) */ 0 /* extended ending phys addr in bytes (TDB) */ }; static int smbios_type19_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); struct smbios_table_type32 smbios_type32_template = { { SMBIOS_TYPE_BOOT, sizeof (struct smbios_table_type32), 0 }, { 0, 0, 0, 0, 0, 0 }, SMBIOS_BOOT_NORMAL }; struct smbios_table_type127 smbios_type127_template = { { SMBIOS_TYPE_EOT, sizeof (struct smbios_table_type127), 0 } }; static int smbios_generic_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size); static struct smbios_template_entry smbios_template[] = { { (struct smbios_structure *)&smbios_type0_template, smbios_type0_strings, smbios_generic_initializer }, { (struct smbios_structure *)&smbios_type1_template, smbios_type1_strings, smbios_type1_initializer }, { (struct smbios_structure *)&smbios_type3_template, smbios_type3_strings, smbios_generic_initializer }, { (struct smbios_structure *)&smbios_type4_template, smbios_type4_strings, smbios_type4_initializer }, { (struct smbios_structure *)&smbios_type16_template, NULL, smbios_type16_initializer }, { (struct smbios_structure *)&smbios_type17_template, smbios_type17_strings, smbios_type17_initializer }, { (struct smbios_structure *)&smbios_type19_template, NULL, smbios_type19_initializer }, { (struct smbios_structure *)&smbios_type32_template, NULL, smbios_generic_initializer }, { (struct smbios_structure *)&smbios_type127_template, NULL, smbios_generic_initializer }, { NULL,NULL, NULL } }; static uint64_t guest_lomem, guest_himem; static uint16_t type16_handle; static int smbios_generic_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_structure *entry; memcpy(curaddr, template_entry, template_entry->length); entry = (struct smbios_structure *)curaddr; entry->handle = *n + 1; curaddr += entry->length; if (template_strings != NULL) { int i; for (i = 0; template_strings[i] != NULL; i++) { const char *string; int len; string = template_strings[i]; len = strlen(string) + 1; memcpy(curaddr, string, len); curaddr += len; } *curaddr = '\0'; curaddr++; } else { /* Minimum string section is double nul */ *curaddr = '\0'; curaddr++; *curaddr = '\0'; curaddr++; } (*n)++; *endaddr = curaddr; return (0); } static int smbios_type1_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type1 *type1; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type1 = (struct smbios_table_type1 *)curaddr; if (guest_uuid_str != NULL) { uuid_t uuid; uint32_t status; uuid_from_string(guest_uuid_str, &uuid, &status); if (status != uuid_s_ok) return (-1); uuid_enc_le(&type1->uuid, &uuid); } else { MD5_CTX mdctx; u_char digest[16]; char hostname[MAXHOSTNAMELEN]; /* * Universally unique and yet reproducible are an * oxymoron, however reproducible is desirable in * this case. */ if (gethostname(hostname, sizeof(hostname))) return (-1); MD5Init(&mdctx); MD5Update(&mdctx, vmname, strlen(vmname)); MD5Update(&mdctx, hostname, sizeof(hostname)); MD5Final(digest, &mdctx); /* * Set the variant and version number. */ digest[6] &= 0x0F; digest[6] |= 0x30; /* version 3 */ digest[8] &= 0x3F; digest[8] |= 0x80; memcpy(&type1->uuid, digest, sizeof (digest)); } return (0); } static int smbios_type4_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { int i; for (i = 0; i < sockets; i++) { struct smbios_table_type4 *type4; char *p; int nstrings, len; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type4 = (struct smbios_table_type4 *)curaddr; p = curaddr + sizeof (struct smbios_table_type4); nstrings = 0; while (p < *endaddr - 1) { if (*p++ == '\0') nstrings++; } len = sprintf(*endaddr - 1, "CPU #%d", i) + 1; *endaddr += len - 1; *(*endaddr) = '\0'; (*endaddr)++; type4->socket = nstrings + 1; /* Revise cores and threads after update to smbios 3.0 */ if (cores > 254) type4->cores = 0; else type4->cores = cores; /* This threads is total threads in a socket */ if ((cores * threads) > 254) type4->threads = 0; else type4->threads = (cores * threads); curaddr = *endaddr; } return (0); } static int smbios_type16_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type16 *type16; type16_handle = *n; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type16 = (struct smbios_table_type16 *)curaddr; type16->xsize = guest_lomem + guest_himem; type16->ndevs = guest_himem > 0 ? 2 : 1; return (0); } static int smbios_type17_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type17 *type17; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type17 = (struct smbios_table_type17 *)curaddr; type17->arrayhand = type16_handle; type17->xsize = guest_lomem; if (guest_himem > 0) { curaddr = *endaddr; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type17 = (struct smbios_table_type17 *)curaddr; type17->arrayhand = type16_handle; type17->xsize = guest_himem; } return (0); } static int smbios_type19_initializer(struct smbios_structure *template_entry, const char **template_strings, char *curaddr, char **endaddr, uint16_t *n, uint16_t *size) { struct smbios_table_type19 *type19; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type19 = (struct smbios_table_type19 *)curaddr; type19->arrayhand = type16_handle; type19->xsaddr = 0; type19->xeaddr = guest_lomem; if (guest_himem > 0) { curaddr = *endaddr; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type19 = (struct smbios_table_type19 *)curaddr; type19->arrayhand = type16_handle; type19->xsaddr = 4*GB; type19->xeaddr = guest_himem; } return (0); } static void smbios_ep_initializer(struct smbios_entry_point *smbios_ep, uint32_t staddr) { memset(smbios_ep, 0, sizeof(*smbios_ep)); memcpy(smbios_ep->eanchor, SMBIOS_ENTRY_EANCHOR, SMBIOS_ENTRY_EANCHORLEN); smbios_ep->eplen = 0x1F; assert(sizeof (struct smbios_entry_point) == smbios_ep->eplen); smbios_ep->major = 2; smbios_ep->minor = 6; smbios_ep->revision = 0; memcpy(smbios_ep->ianchor, SMBIOS_ENTRY_IANCHOR, SMBIOS_ENTRY_IANCHORLEN); smbios_ep->staddr = staddr; smbios_ep->bcdrev = 0x24; } static void smbios_ep_finalizer(struct smbios_entry_point *smbios_ep, uint16_t len, uint16_t num, uint16_t maxssize) { uint8_t checksum; int i; smbios_ep->maxssize = maxssize; smbios_ep->stlen = len; smbios_ep->stnum = num; checksum = 0; for (i = 0x10; i < 0x1f; i++) { checksum -= ((uint8_t *)smbios_ep)[i]; } smbios_ep->ichecksum = checksum; checksum = 0; for (i = 0; i < 0x1f; i++) { checksum -= ((uint8_t *)smbios_ep)[i]; } smbios_ep->echecksum = checksum; } int smbios_build(struct vmctx *ctx) { struct smbios_entry_point *smbios_ep; uint16_t n; uint16_t maxssize; char *curaddr, *startaddr, *ststartaddr; int i; int err; guest_lomem = vm_get_lowmem_size(ctx); guest_himem = vm_get_highmem_size(ctx); startaddr = paddr_guest2host(ctx, SMBIOS_BASE, SMBIOS_MAX_LENGTH); if (startaddr == NULL) { - fprintf(stderr, "smbios table requires mapped mem\n"); + EPRINTLN("smbios table requires mapped mem"); return (ENOMEM); } curaddr = startaddr; smbios_ep = (struct smbios_entry_point *)curaddr; smbios_ep_initializer(smbios_ep, SMBIOS_BASE + sizeof(struct smbios_entry_point)); curaddr += sizeof(struct smbios_entry_point); ststartaddr = curaddr; n = 0; maxssize = 0; for (i = 0; smbios_template[i].entry != NULL; i++) { struct smbios_structure *entry; const char **strings; initializer_func_t initializer; char *endaddr; uint16_t size; entry = smbios_template[i].entry; strings = smbios_template[i].strings; initializer = smbios_template[i].initializer; err = (*initializer)(entry, strings, curaddr, &endaddr, &n, &size); if (err != 0) return (err); if (size > maxssize) maxssize = size; curaddr = endaddr; } assert(curaddr - startaddr < SMBIOS_MAX_LENGTH); smbios_ep_finalizer(smbios_ep, curaddr - ststartaddr, n, maxssize); return (0); } Index: stable/12/usr.sbin/bhyve/task_switch.c =================================================================== --- stable/12/usr.sbin/bhyve/task_switch.c (revision 358183) +++ stable/12/usr.sbin/bhyve/task_switch.c (revision 358184) @@ -1,941 +1,942 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" /* * Using 'struct i386tss' is tempting but causes myriad sign extension * issues because all of its fields are defined as signed integers. */ struct tss32 { uint16_t tss_link; uint16_t rsvd1; uint32_t tss_esp0; uint16_t tss_ss0; uint16_t rsvd2; uint32_t tss_esp1; uint16_t tss_ss1; uint16_t rsvd3; uint32_t tss_esp2; uint16_t tss_ss2; uint16_t rsvd4; uint32_t tss_cr3; uint32_t tss_eip; uint32_t tss_eflags; uint32_t tss_eax; uint32_t tss_ecx; uint32_t tss_edx; uint32_t tss_ebx; uint32_t tss_esp; uint32_t tss_ebp; uint32_t tss_esi; uint32_t tss_edi; uint16_t tss_es; uint16_t rsvd5; uint16_t tss_cs; uint16_t rsvd6; uint16_t tss_ss; uint16_t rsvd7; uint16_t tss_ds; uint16_t rsvd8; uint16_t tss_fs; uint16_t rsvd9; uint16_t tss_gs; uint16_t rsvd10; uint16_t tss_ldt; uint16_t rsvd11; uint16_t tss_trap; uint16_t tss_iomap; }; static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed"); #define SEL_START(sel) (((sel) & ~0x7)) #define SEL_LIMIT(sel) (((sel) | 0x7)) #define TSS_BUSY(type) (((type) & 0x2) != 0) static uint64_t GETREG(struct vmctx *ctx, int vcpu, int reg) { uint64_t val; int error; error = vm_get_register(ctx, vcpu, reg, &val); assert(error == 0); return (val); } static void SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val) { int error; error = vm_set_register(ctx, vcpu, reg, val); assert(error == 0); } static struct seg_desc usd_to_seg_desc(struct user_segment_descriptor *usd) { struct seg_desc seg_desc; seg_desc.base = (u_int)USD_GETBASE(usd); if (usd->sd_gran) seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff; else seg_desc.limit = (u_int)USD_GETLIMIT(usd); seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7; seg_desc.access |= usd->sd_xx << 12; seg_desc.access |= usd->sd_def32 << 14; seg_desc.access |= usd->sd_gran << 15; return (seg_desc); } /* * Inject an exception with an error code that is a segment selector. * The format of the error code is described in section 6.13, "Error Code", * Intel SDM volume 3. * * Bit 0 (EXT) denotes whether the exception occurred during delivery * of an external event like an interrupt. * * Bit 1 (IDT) indicates whether the selector points to a gate descriptor * in the IDT. * * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI). */ static void sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext) { /* * Bit 2 from the selector is retained as-is in the error code. * * Bit 1 can be safely cleared because none of the selectors * encountered during task switch emulation refer to a task * gate in the IDT. * * Bit 0 is set depending on the value of 'ext'. */ sel &= ~0x3; if (ext) sel |= 0x1; vm_inject_fault(ctx, vcpu, vector, 1, sel); } /* * Return 0 if the selector 'sel' in within the limits of the GDT/LDT * and non-zero otherwise. */ static int desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel) { uint64_t base; uint32_t limit, access; int error, reg; reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR; error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access); assert(error == 0); if (reg == VM_REG_GUEST_LDTR) { if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access)) return (-1); } if (limit < SEL_LIMIT(sel)) return (-1); else return (0); } /* * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced * by the selector 'sel'. * * Returns 0 on success. * Returns 1 if an exception was injected into the guest. * Returns -1 otherwise. */ static int desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint16_t sel, struct user_segment_descriptor *desc, bool doread, int *faultptr) { struct iovec iov[2]; uint64_t base; uint32_t limit, access; int error, reg; reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR; error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access); assert(error == 0); assert(limit >= SEL_LIMIT(sel)); error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov), faultptr); if (error || *faultptr) return (error); if (doread) vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); else vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); return (0); } static int desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr)); } static int desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr)); } /* * Read the TSS descriptor referenced by 'sel' into 'desc'. * * Returns 0 on success. * Returns 1 if an exception was injected into the guest. * Returns -1 otherwise. */ static int read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { struct vm_guest_paging sup_paging; int error; assert(!ISLDT(sel)); assert(IDXSEL(sel) != 0); /* Fetch the new TSS descriptor */ if (desc_table_limit_check(ctx, vcpu, sel)) { if (ts->reason == TSR_IRET) sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); else sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext); return (1); } sup_paging = ts->paging; sup_paging.cpl = 0; /* implicit supervisor mode */ error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr); return (error); } static bool code_desc(int sd_type) { /* code descriptor */ return ((sd_type & 0x18) == 0x18); } static bool stack_desc(int sd_type) { /* writable data descriptor */ return ((sd_type & 0x1A) == 0x12); } static bool data_desc(int sd_type) { /* data descriptor or a readable code descriptor */ return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A); } static bool ldt_desc(int sd_type) { return (sd_type == SDT_SYSLDT); } /* * Validate the descriptor 'seg_desc' associated with 'segment'. */ static int validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, int segment, struct seg_desc *seg_desc, int *faultptr) { struct vm_guest_paging sup_paging; struct user_segment_descriptor usd; int error, idtvec; int cpl, dpl, rpl; uint16_t sel, cs; bool ldtseg, codeseg, stackseg, dataseg, conforming; ldtseg = codeseg = stackseg = dataseg = false; switch (segment) { case VM_REG_GUEST_LDTR: ldtseg = true; break; case VM_REG_GUEST_CS: codeseg = true; break; case VM_REG_GUEST_SS: stackseg = true; break; case VM_REG_GUEST_DS: case VM_REG_GUEST_ES: case VM_REG_GUEST_FS: case VM_REG_GUEST_GS: dataseg = true; break; default: assert(0); } /* Get the segment selector */ sel = GETREG(ctx, vcpu, segment); /* LDT selector must point into the GDT */ if (ldtseg && ISLDT(sel)) { sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); return (1); } /* Descriptor table limit check */ if (desc_table_limit_check(ctx, vcpu, sel)) { sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); return (1); } /* NULL selector */ if (IDXSEL(sel) == 0) { /* Code and stack segment selectors cannot be NULL */ if (codeseg || stackseg) { sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); return (1); } seg_desc->base = 0; seg_desc->limit = 0; seg_desc->access = 0x10000; /* unusable */ return (0); } /* Read the descriptor from the GDT/LDT */ sup_paging = ts->paging; sup_paging.cpl = 0; /* implicit supervisor mode */ error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr); if (error || *faultptr) return (error); /* Verify that the descriptor type is compatible with the segment */ if ((ldtseg && !ldt_desc(usd.sd_type)) || (codeseg && !code_desc(usd.sd_type)) || (dataseg && !data_desc(usd.sd_type)) || (stackseg && !stack_desc(usd.sd_type))) { sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); return (1); } /* Segment must be marked present */ if (!usd.sd_p) { if (ldtseg) idtvec = IDT_TS; else if (stackseg) idtvec = IDT_SS; else idtvec = IDT_NP; sel_exception(ctx, vcpu, idtvec, sel, ts->ext); return (1); } cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS); cpl = cs & SEL_RPL_MASK; rpl = sel & SEL_RPL_MASK; dpl = usd.sd_dpl; if (stackseg && (rpl != cpl || dpl != cpl)) { sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); return (1); } if (codeseg) { conforming = (usd.sd_type & 0x4) ? true : false; if ((conforming && (cpl < dpl)) || (!conforming && (cpl != dpl))) { sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); return (1); } } if (dataseg) { /* * A data segment is always non-conforming except when it's * descriptor is a readable, conforming code segment. */ if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0) conforming = true; else conforming = false; if (!conforming && (rpl > dpl || cpl > dpl)) { sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); return (1); } } *seg_desc = usd_to_seg_desc(&usd); return (0); } static void tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch, uint32_t eip, struct tss32 *tss, struct iovec *iov) { /* General purpose registers */ tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX); tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX); tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX); tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX); tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP); tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI); tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI); /* Segment selectors */ tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES); tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS); tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS); tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS); tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS); tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS); /* eflags and eip */ tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); if (task_switch->reason == TSR_IRET) tss->tss_eflags &= ~PSL_NT; tss->tss_eip = eip; /* Copy updated old TSS into guest memory */ vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32)); } static void update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd) { int error; error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access); assert(error == 0); } /* * Update the vcpu registers to reflect the state of the new task. */ static int tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr) { struct seg_desc seg_desc, seg_desc2; uint64_t *pdpte, maxphyaddr, reserved; uint32_t eflags; int error, i; bool nested; nested = false; if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) { tss->tss_link = ot_sel; nested = true; } eflags = tss->tss_eflags; if (nested) eflags |= PSL_NT; /* LDTR */ SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt); /* PBDR */ if (ts->paging.paging_mode != PAGING_MODE_FLAT) { if (ts->paging.paging_mode == PAGING_MODE_PAE) { /* * XXX Assuming 36-bit MAXPHYADDR. */ maxphyaddr = (1UL << 36) - 1; pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32); for (i = 0; i < 4; i++) { /* Check reserved bits if the PDPTE is valid */ if (!(pdpte[i] & 0x1)) continue; /* * Bits 2:1, 8:5 and bits above the processor's * maximum physical address are reserved. */ reserved = ~maxphyaddr | 0x1E6; if (pdpte[i] & reserved) { vm_inject_gp(ctx, vcpu); return (1); } } SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]); SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]); SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]); SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]); } SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3); ts->paging.cr3 = tss->tss_cr3; } /* eflags and eip */ SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags); SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip); /* General purpose registers */ SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax); SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx); SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx); SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx); SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp); SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp); SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi); SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi); /* Segment selectors */ SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es); SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs); SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss); SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds); SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs); SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs); /* * If this is a nested task then write out the new TSS to update * the previous link field. */ if (nested) vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss)); /* Validate segment descriptors */ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc, faultptr); if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc); /* * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3. * * The SS and CS attribute checks on VM-entry are inter-dependent so * we need to make sure that both segments are valid before updating * either of them. This ensures that the VMCS state can pass the * VM-entry checks so the guest can handle any exception injected * during task switch emulation. */ error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc, faultptr); if (error || *faultptr) return (error); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2, faultptr); if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc); update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2); ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK; error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc, faultptr); if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc, faultptr); if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc, faultptr); if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc); error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc, faultptr); if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc); return (0); } /* * Push an error code on the stack of the new task. This is needed if the * task switch was triggered by a hardware exception that causes an error * code to be saved (e.g. #PF). */ static int push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, int task_type, uint32_t errcode, int *faultptr) { struct iovec iov[2]; struct seg_desc seg_desc; int stacksize, bytes, error; uint64_t gla, cr0, rflags; uint32_t esp; uint16_t stacksel; *faultptr = 0; cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS); error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base, &seg_desc.limit, &seg_desc.access); assert(error == 0); /* * Section "Error Code" in the Intel SDM vol 3: the error code is * pushed on the stack as a doubleword or word (depending on the * default interrupt, trap or task gate size). */ if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS) bytes = 4; else bytes = 2; /* * PUSH instruction from Intel SDM vol 2: the 'B' flag in the * stack-segment descriptor determines the size of the stack * pointer outside of 64-bit mode. */ if (SEG_DESC_DEF32(seg_desc.access)) stacksize = 4; else stacksize = 2; esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); esp -= bytes; if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) { sel_exception(ctx, vcpu, IDT_SS, stacksel, 1); *faultptr = 1; return (0); } if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { vm_inject_ac(ctx, vcpu, 1); *faultptr = 1; return (0); } error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, iov, nitems(iov), faultptr); if (error || *faultptr) return (error); vm_copyout(ctx, vcpu, &errcode, iov, bytes); SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp); return (0); } /* * Evaluate return value from helper functions and potentially return to * the VM run loop. */ #define CHKERR(error,fault) \ do { \ assert((error == 0) || (error == EFAULT)); \ if (error) \ return (VMEXIT_ABORT); \ else if (fault) \ return (VMEXIT_CONTINUE); \ } while (0) int vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { struct seg_desc nt; struct tss32 oldtss, newtss; struct vm_task_switch *task_switch; struct vm_guest_paging *paging, sup_paging; struct user_segment_descriptor nt_desc, ot_desc; struct iovec nt_iov[2], ot_iov[2]; uint64_t cr0, ot_base; uint32_t eip, ot_lim, access; int error, ext, fault, minlimit, nt_type, ot_type, vcpu; enum task_switch_reason reason; uint16_t nt_sel, ot_sel; task_switch = &vmexit->u.task_switch; nt_sel = task_switch->tsssel; ext = vmexit->u.task_switch.ext; reason = vmexit->u.task_switch.reason; paging = &vmexit->u.task_switch.paging; vcpu = *pvcpu; assert(paging->cpu_mode == CPU_MODE_PROTECTED); /* * Calculate the instruction pointer to store in the old TSS. */ eip = vmexit->rip + vmexit->inst_length; /* * Section 4.6, "Access Rights" in Intel SDM Vol 3. * The following page table accesses are implicitly supervisor mode: * - accesses to GDT or LDT to load segment descriptors * - accesses to the task state segment during task switch */ sup_paging = *paging; sup_paging.cpl = 0; /* implicit supervisor mode */ /* Fetch the new TSS descriptor */ error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc, &fault); CHKERR(error, fault); nt = usd_to_seg_desc(&nt_desc); /* Verify the type of the new TSS */ nt_type = SEG_DESC_TYPE(nt.access); if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS && nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) { sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); goto done; } /* TSS descriptor must have present bit set */ if (!SEG_DESC_PRESENT(nt.access)) { sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext); goto done; } /* * TSS must have a minimum length of 104 bytes for a 32-bit TSS and * 44 bytes for a 16-bit TSS. */ if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS) minlimit = 104 - 1; else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) minlimit = 44 - 1; else minlimit = 0; assert(minlimit > 0); if (nt.limit < minlimit) { sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); goto done; } /* TSS must be busy if task switch is due to IRET */ if (reason == TSR_IRET && !TSS_BUSY(nt_type)) { sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); goto done; } /* * TSS must be available (not busy) if task switch reason is * CALL, JMP, exception or interrupt. */ if (reason != TSR_IRET && TSS_BUSY(nt_type)) { sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext); goto done; } /* Fetch the new TSS */ error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault); CHKERR(error, fault); vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1); /* Get the old TSS selector from the guest's task register */ ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR); if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) { /* * This might happen if a task switch was attempted without * ever loading the task register with LTR. In this case the * TR would contain the values from power-on: * (sel = 0, base = 0, limit = 0xffff). */ sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext); goto done; } /* Get the old TSS base and limit from the guest's task register */ error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim, &access); assert(error == 0); assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access)); ot_type = SEG_DESC_TYPE(access); assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY); /* Fetch the old TSS descriptor */ error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc, &fault); CHKERR(error, fault); /* Get the old TSS */ error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault); CHKERR(error, fault); vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1); /* * Clear the busy bit in the old TSS descriptor if the task switch * due to an IRET or JMP instruction. */ if (reason == TSR_IRET || reason == TSR_JMP) { ot_desc.sd_type &= ~0x2; error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel, &ot_desc, &fault); CHKERR(error, fault); } if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) { - fprintf(stderr, "Task switch to 16-bit TSS not supported\n"); + EPRINTLN("Task switch to 16-bit TSS not supported"); return (VMEXIT_ABORT); } /* Save processor state in old TSS */ tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov); /* * If the task switch was triggered for any reason other than IRET * then set the busy bit in the new TSS descriptor. */ if (reason != TSR_IRET) { nt_desc.sd_type |= 0x2; error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel, &nt_desc, &fault); CHKERR(error, fault); } /* Update task register to point at the new TSS */ SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel); /* Update the hidden descriptor state of the task register */ nt = usd_to_seg_desc(&nt_desc); update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt); /* Set CR0.TS */ cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS); /* * We are now committed to the task switch. Any exceptions encountered * after this point will be handled in the context of the new task and * the saved instruction pointer will belong to the new task. */ error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip); assert(error == 0); /* Load processor state from new TSS */ error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov, &fault); CHKERR(error, fault); /* * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception * caused an error code to be generated, this error code is copied * to the stack of the new task. */ if (task_switch->errcode_valid) { assert(task_switch->ext); assert(task_switch->reason == TSR_IDT_GATE); error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type, task_switch->errcode, &fault); CHKERR(error, fault); } /* * Treatment of virtual-NMI blocking if NMI is delivered through * a task gate. * * Section "Architectural State Before A VM Exit", Intel SDM, Vol3: * If the virtual NMIs VM-execution control is 1, VM entry injects * an NMI, and delivery of the NMI causes a task switch that causes * a VM exit, virtual-NMI blocking is in effect before the VM exit * commences. * * Thus, virtual-NMI blocking is in effect at the time of the task * switch VM exit. */ /* * Treatment of virtual-NMI unblocking on IRET from NMI handler task. * * Section "Changes to Instruction Behavior in VMX Non-Root Operation" * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking. * This unblocking of virtual-NMI occurs even if IRET causes a fault. * * Thus, virtual-NMI blocking is cleared at the time of the task switch * VM exit. */ /* * If the task switch was triggered by an event delivered through * the IDT then extinguish the pending event from the vcpu's * exitintinfo. */ if (task_switch->reason == TSR_IDT_GATE) { error = vm_set_intinfo(ctx, vcpu, 0); assert(error == 0); } /* * XXX should inject debug exception if 'T' bit is 1 */ done: return (VMEXIT_CONTINUE); } Index: stable/12/usr.sbin/bhyve/uart_emul.c =================================================================== --- stable/12/usr.sbin/bhyve/uart_emul.c (revision 358183) +++ stable/12/usr.sbin/bhyve/uart_emul.c (revision 358184) @@ -1,711 +1,713 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * Copyright (c) 2013 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #ifndef WITHOUT_CAPSICUM #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "mevent.h" #include "uart_emul.h" +#include "debug.h" #define COM1_BASE 0x3F8 #define COM1_IRQ 4 #define COM2_BASE 0x2F8 #define COM2_IRQ 3 #define DEFAULT_RCLK 1843200 #define DEFAULT_BAUD 9600 #define FCR_RX_MASK 0xC0 #define MCR_OUT1 0x04 #define MCR_OUT2 0x08 #define MSR_DELTA_MASK 0x0f #ifndef REG_SCR #define REG_SCR com_scr #endif #define FIFOSZ 16 static bool uart_stdio; /* stdio in use for i/o */ static struct termios tio_stdio_orig; static struct { int baseaddr; int irq; bool inuse; } uart_lres[] = { { COM1_BASE, COM1_IRQ, false}, { COM2_BASE, COM2_IRQ, false}, }; #define UART_NLDEVS (sizeof(uart_lres) / sizeof(uart_lres[0])) struct fifo { uint8_t buf[FIFOSZ]; int rindex; /* index to read from */ int windex; /* index to write to */ int num; /* number of characters in the fifo */ int size; /* size of the fifo */ }; struct ttyfd { bool opened; int rfd; /* fd for reading */ int wfd; /* fd for writing, may be == rfd */ }; struct uart_softc { pthread_mutex_t mtx; /* protects all softc elements */ uint8_t data; /* Data register (R/W) */ uint8_t ier; /* Interrupt enable register (R/W) */ uint8_t lcr; /* Line control register (R/W) */ uint8_t mcr; /* Modem control register (R/W) */ uint8_t lsr; /* Line status register (R/W) */ uint8_t msr; /* Modem status register (R/W) */ uint8_t fcr; /* FIFO control register (W) */ uint8_t scr; /* Scratch register (R/W) */ uint8_t dll; /* Baudrate divisor latch LSB */ uint8_t dlh; /* Baudrate divisor latch MSB */ struct fifo rxfifo; struct mevent *mev; struct ttyfd tty; bool thre_int_pending; /* THRE interrupt pending */ void *arg; uart_intr_func_t intr_assert; uart_intr_func_t intr_deassert; }; static void uart_drain(int fd, enum ev_type ev, void *arg); static void ttyclose(void) { tcsetattr(STDIN_FILENO, TCSANOW, &tio_stdio_orig); } static void ttyopen(struct ttyfd *tf) { struct termios orig, new; tcgetattr(tf->rfd, &orig); new = orig; cfmakeraw(&new); new.c_cflag |= CLOCAL; tcsetattr(tf->rfd, TCSANOW, &new); if (uart_stdio) { tio_stdio_orig = orig; atexit(ttyclose); } + raw_stdio = 1; } static int ttyread(struct ttyfd *tf) { unsigned char rb; if (read(tf->rfd, &rb, 1) == 1) return (rb); else return (-1); } static void ttywrite(struct ttyfd *tf, unsigned char wb) { (void)write(tf->wfd, &wb, 1); } static void rxfifo_reset(struct uart_softc *sc, int size) { char flushbuf[32]; struct fifo *fifo; ssize_t nread; int error; fifo = &sc->rxfifo; bzero(fifo, sizeof(struct fifo)); fifo->size = size; if (sc->tty.opened) { /* * Flush any unread input from the tty buffer. */ while (1) { nread = read(sc->tty.rfd, flushbuf, sizeof(flushbuf)); if (nread != sizeof(flushbuf)) break; } /* * Enable mevent to trigger when new characters are available * on the tty fd. */ error = mevent_enable(sc->mev); assert(error == 0); } } static int rxfifo_available(struct uart_softc *sc) { struct fifo *fifo; fifo = &sc->rxfifo; return (fifo->num < fifo->size); } static int rxfifo_putchar(struct uart_softc *sc, uint8_t ch) { struct fifo *fifo; int error; fifo = &sc->rxfifo; if (fifo->num < fifo->size) { fifo->buf[fifo->windex] = ch; fifo->windex = (fifo->windex + 1) % fifo->size; fifo->num++; if (!rxfifo_available(sc)) { if (sc->tty.opened) { /* * Disable mevent callback if the FIFO is full. */ error = mevent_disable(sc->mev); assert(error == 0); } } return (0); } else return (-1); } static int rxfifo_getchar(struct uart_softc *sc) { struct fifo *fifo; int c, error, wasfull; wasfull = 0; fifo = &sc->rxfifo; if (fifo->num > 0) { if (!rxfifo_available(sc)) wasfull = 1; c = fifo->buf[fifo->rindex]; fifo->rindex = (fifo->rindex + 1) % fifo->size; fifo->num--; if (wasfull) { if (sc->tty.opened) { error = mevent_enable(sc->mev); assert(error == 0); } } return (c); } else return (-1); } static int rxfifo_numchars(struct uart_softc *sc) { struct fifo *fifo = &sc->rxfifo; return (fifo->num); } static void uart_opentty(struct uart_softc *sc) { ttyopen(&sc->tty); sc->mev = mevent_add(sc->tty.rfd, EVF_READ, uart_drain, sc); assert(sc->mev != NULL); } static uint8_t modem_status(uint8_t mcr) { uint8_t msr; if (mcr & MCR_LOOPBACK) { /* * In the loopback mode certain bits from the MCR are * reflected back into MSR. */ msr = 0; if (mcr & MCR_RTS) msr |= MSR_CTS; if (mcr & MCR_DTR) msr |= MSR_DSR; if (mcr & MCR_OUT1) msr |= MSR_RI; if (mcr & MCR_OUT2) msr |= MSR_DCD; } else { /* * Always assert DCD and DSR so tty open doesn't block * even if CLOCAL is turned off. */ msr = MSR_DCD | MSR_DSR; } assert((msr & MSR_DELTA_MASK) == 0); return (msr); } /* * The IIR returns a prioritized interrupt reason: * - receive data available * - transmit holding register empty * - modem status change * * Return an interrupt reason if one is available. */ static int uart_intr_reason(struct uart_softc *sc) { if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0) return (IIR_RLS); else if (rxfifo_numchars(sc) > 0 && (sc->ier & IER_ERXRDY) != 0) return (IIR_RXTOUT); else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0) return (IIR_TXRDY); else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0) return (IIR_MLSC); else return (IIR_NOPEND); } static void uart_reset(struct uart_softc *sc) { uint16_t divisor; divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16; sc->dll = divisor; sc->dlh = divisor >> 16; sc->msr = modem_status(sc->mcr); rxfifo_reset(sc, 1); /* no fifo until enabled by software */ } /* * Toggle the COM port's intr pin depending on whether or not we have an * interrupt condition to report to the processor. */ static void uart_toggle_intr(struct uart_softc *sc) { uint8_t intr_reason; intr_reason = uart_intr_reason(sc); if (intr_reason == IIR_NOPEND) (*sc->intr_deassert)(sc->arg); else (*sc->intr_assert)(sc->arg); } static void uart_drain(int fd, enum ev_type ev, void *arg) { struct uart_softc *sc; int ch; sc = arg; assert(fd == sc->tty.rfd); assert(ev == EVF_READ); /* * This routine is called in the context of the mevent thread * to take out the softc lock to protect against concurrent * access from a vCPU i/o exit */ pthread_mutex_lock(&sc->mtx); if ((sc->mcr & MCR_LOOPBACK) != 0) { (void) ttyread(&sc->tty); } else { while (rxfifo_available(sc) && ((ch = ttyread(&sc->tty)) != -1)) { rxfifo_putchar(sc, ch); } uart_toggle_intr(sc); } pthread_mutex_unlock(&sc->mtx); } void uart_write(struct uart_softc *sc, int offset, uint8_t value) { int fifosz; uint8_t msr; pthread_mutex_lock(&sc->mtx); /* * Take care of the special case DLAB accesses first */ if ((sc->lcr & LCR_DLAB) != 0) { if (offset == REG_DLL) { sc->dll = value; goto done; } if (offset == REG_DLH) { sc->dlh = value; goto done; } } switch (offset) { case REG_DATA: if (sc->mcr & MCR_LOOPBACK) { if (rxfifo_putchar(sc, value) != 0) sc->lsr |= LSR_OE; } else if (sc->tty.opened) { ttywrite(&sc->tty, value); } /* else drop on floor */ sc->thre_int_pending = true; break; case REG_IER: /* * Apply mask so that bits 4-7 are 0 * Also enables bits 0-3 only if they're 1 */ sc->ier = value & 0x0F; break; case REG_FCR: /* * When moving from FIFO and 16450 mode and vice versa, * the FIFO contents are reset. */ if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) { fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1; rxfifo_reset(sc, fifosz); } /* * The FCR_ENABLE bit must be '1' for the programming * of other FCR bits to be effective. */ if ((value & FCR_ENABLE) == 0) { sc->fcr = 0; } else { if ((value & FCR_RCV_RST) != 0) rxfifo_reset(sc, FIFOSZ); sc->fcr = value & (FCR_ENABLE | FCR_DMA | FCR_RX_MASK); } break; case REG_LCR: sc->lcr = value; break; case REG_MCR: /* Apply mask so that bits 5-7 are 0 */ sc->mcr = value & 0x1F; msr = modem_status(sc->mcr); /* * Detect if there has been any change between the * previous and the new value of MSR. If there is * then assert the appropriate MSR delta bit. */ if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS)) sc->msr |= MSR_DCTS; if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR)) sc->msr |= MSR_DDSR; if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD)) sc->msr |= MSR_DDCD; if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0) sc->msr |= MSR_TERI; /* * Update the value of MSR while retaining the delta * bits. */ sc->msr &= MSR_DELTA_MASK; sc->msr |= msr; break; case REG_LSR: /* * Line status register is not meant to be written to * during normal operation. */ break; case REG_MSR: /* * As far as I can tell MSR is a read-only register. */ break; case REG_SCR: sc->scr = value; break; default: break; } done: uart_toggle_intr(sc); pthread_mutex_unlock(&sc->mtx); } uint8_t uart_read(struct uart_softc *sc, int offset) { uint8_t iir, intr_reason, reg; pthread_mutex_lock(&sc->mtx); /* * Take care of the special case DLAB accesses first */ if ((sc->lcr & LCR_DLAB) != 0) { if (offset == REG_DLL) { reg = sc->dll; goto done; } if (offset == REG_DLH) { reg = sc->dlh; goto done; } } switch (offset) { case REG_DATA: reg = rxfifo_getchar(sc); break; case REG_IER: reg = sc->ier; break; case REG_IIR: iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0; intr_reason = uart_intr_reason(sc); /* * Deal with side effects of reading the IIR register */ if (intr_reason == IIR_TXRDY) sc->thre_int_pending = false; iir |= intr_reason; reg = iir; break; case REG_LCR: reg = sc->lcr; break; case REG_MCR: reg = sc->mcr; break; case REG_LSR: /* Transmitter is always ready for more data */ sc->lsr |= LSR_TEMT | LSR_THRE; /* Check for new receive data */ if (rxfifo_numchars(sc) > 0) sc->lsr |= LSR_RXRDY; else sc->lsr &= ~LSR_RXRDY; reg = sc->lsr; /* The LSR_OE bit is cleared on LSR read */ sc->lsr &= ~LSR_OE; break; case REG_MSR: /* * MSR delta bits are cleared on read */ reg = sc->msr; sc->msr &= ~MSR_DELTA_MASK; break; case REG_SCR: reg = sc->scr; break; default: reg = 0xFF; break; } done: uart_toggle_intr(sc); pthread_mutex_unlock(&sc->mtx); return (reg); } int uart_legacy_alloc(int which, int *baseaddr, int *irq) { if (which < 0 || which >= UART_NLDEVS || uart_lres[which].inuse) return (-1); uart_lres[which].inuse = true; *baseaddr = uart_lres[which].baseaddr; *irq = uart_lres[which].irq; return (0); } struct uart_softc * uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert, void *arg) { struct uart_softc *sc; sc = calloc(1, sizeof(struct uart_softc)); sc->arg = arg; sc->intr_assert = intr_assert; sc->intr_deassert = intr_deassert; pthread_mutex_init(&sc->mtx, NULL); uart_reset(sc); return (sc); } static int uart_stdio_backend(struct uart_softc *sc) { #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ }; #endif if (uart_stdio) return (-1); sc->tty.rfd = STDIN_FILENO; sc->tty.wfd = STDOUT_FILENO; sc->tty.opened = true; if (fcntl(sc->tty.rfd, F_SETFL, O_NONBLOCK) != 0) return (-1); if (fcntl(sc->tty.wfd, F_SETFL, O_NONBLOCK) != 0) return (-1); #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ); if (caph_rights_limit(sc->tty.rfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_ioctls_limit(sc->tty.rfd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif uart_stdio = true; return (0); } static int uart_tty_backend(struct uart_softc *sc, const char *opts) { #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ }; #endif int fd; fd = open(opts, O_RDWR | O_NONBLOCK); if (fd < 0 || !isatty(fd)) return (-1); sc->tty.rfd = sc->tty.wfd = fd; sc->tty.opened = true; #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif return (0); } int uart_set_backend(struct uart_softc *sc, const char *opts) { int retval; if (opts == NULL) return (0); if (strcmp("stdio", opts) == 0) retval = uart_stdio_backend(sc); else retval = uart_tty_backend(sc, opts); if (retval == 0) uart_opentty(sc); return (retval); } Index: stable/12/usr.sbin/bhyve/usb_mouse.c =================================================================== --- stable/12/usr.sbin/bhyve/usb_mouse.c (revision 358183) +++ stable/12/usr.sbin/bhyve/usb_mouse.c (revision 358184) @@ -1,802 +1,803 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Leon Dang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include "usb_emul.h" #include "console.h" #include "bhyvegc.h" +#include "debug.h" static int umouse_debug = 0; -#define DPRINTF(params) if (umouse_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (umouse_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* USB endpoint context (1-15) for reporting mouse data events*/ #define UMOUSE_INTR_ENDPT 1 #define UMOUSE_REPORT_DESC_TYPE 0x22 #define UMOUSE_GET_REPORT 0x01 #define UMOUSE_GET_IDLE 0x02 #define UMOUSE_GET_PROTOCOL 0x03 #define UMOUSE_SET_REPORT 0x09 #define UMOUSE_SET_IDLE 0x0A #define UMOUSE_SET_PROTOCOL 0x0B #define HSETW(ptr, val) ptr = { (uint8_t)(val), (uint8_t)((val) >> 8) } enum { UMSTR_LANG, UMSTR_MANUFACTURER, UMSTR_PRODUCT, UMSTR_SERIAL, UMSTR_CONFIG, UMSTR_MAX }; static const char *umouse_desc_strings[] = { "\x04\x09", "BHYVE", "HID Tablet", "01", "HID Tablet Device", }; struct umouse_hid_descriptor { uint8_t bLength; uint8_t bDescriptorType; uint8_t bcdHID[2]; uint8_t bCountryCode; uint8_t bNumDescriptors; uint8_t bReportDescriptorType; uint8_t wItemLength[2]; } __packed; struct umouse_config_desc { struct usb_config_descriptor confd; struct usb_interface_descriptor ifcd; struct umouse_hid_descriptor hidd; struct usb_endpoint_descriptor endpd; struct usb_endpoint_ss_comp_descriptor sscompd; } __packed; #define MOUSE_MAX_X 0x8000 #define MOUSE_MAX_Y 0x8000 static const uint8_t umouse_report_desc[] = { 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ 0x09, 0x02, /* USAGE (Mouse) */ 0xa1, 0x01, /* COLLECTION (Application) */ 0x09, 0x01, /* USAGE (Pointer) */ 0xa1, 0x00, /* COLLECTION (Physical) */ 0x05, 0x09, /* USAGE_PAGE (Button) */ 0x19, 0x01, /* USAGE_MINIMUM (Button 1) */ 0x29, 0x03, /* USAGE_MAXIMUM (Button 3) */ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ 0x75, 0x01, /* REPORT_SIZE (1) */ 0x95, 0x03, /* REPORT_COUNT (3) */ 0x81, 0x02, /* INPUT (Data,Var,Abs); 3 buttons */ 0x75, 0x05, /* REPORT_SIZE (5) */ 0x95, 0x01, /* REPORT_COUNT (1) */ 0x81, 0x03, /* INPUT (Cnst,Var,Abs); padding */ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ 0x09, 0x30, /* USAGE (X) */ 0x09, 0x31, /* USAGE (Y) */ 0x35, 0x00, /* PHYSICAL_MINIMUM (0) */ 0x46, 0xff, 0x7f, /* PHYSICAL_MAXIMUM (0x7fff) */ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ 0x26, 0xff, 0x7f, /* LOGICAL_MAXIMUM (0x7fff) */ 0x75, 0x10, /* REPORT_SIZE (16) */ 0x95, 0x02, /* REPORT_COUNT (2) */ 0x81, 0x02, /* INPUT (Data,Var,Abs) */ 0x05, 0x01, /* USAGE Page (Generic Desktop) */ 0x09, 0x38, /* USAGE (Wheel) */ 0x35, 0x00, /* PHYSICAL_MINIMUM (0) */ 0x45, 0x00, /* PHYSICAL_MAXIMUM (0) */ 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */ 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */ 0x75, 0x08, /* REPORT_SIZE (8) */ 0x95, 0x01, /* REPORT_COUNT (1) */ 0x81, 0x06, /* INPUT (Data,Var,Rel) */ 0xc0, /* END_COLLECTION */ 0xc0 /* END_COLLECTION */ }; struct umouse_report { uint8_t buttons; /* bits: 0 left, 1 right, 2 middle */ int16_t x; /* x position */ int16_t y; /* y position */ int8_t z; /* z wheel position */ } __packed; #define MSETW(ptr, val) ptr = { (uint8_t)(val), (uint8_t)((val) >> 8) } static struct usb_device_descriptor umouse_dev_desc = { .bLength = sizeof(umouse_dev_desc), .bDescriptorType = UDESC_DEVICE, MSETW(.bcdUSB, UD_USB_3_0), .bMaxPacketSize = 8, /* max packet size */ MSETW(.idVendor, 0xFB5D), /* vendor */ MSETW(.idProduct, 0x0001), /* product */ MSETW(.bcdDevice, 0), /* device version */ .iManufacturer = UMSTR_MANUFACTURER, .iProduct = UMSTR_PRODUCT, .iSerialNumber = UMSTR_SERIAL, .bNumConfigurations = 1, }; static struct umouse_config_desc umouse_confd = { .confd = { .bLength = sizeof(umouse_confd.confd), .bDescriptorType = UDESC_CONFIG, .wTotalLength[0] = sizeof(umouse_confd), .bNumInterface = 1, .bConfigurationValue = 1, .iConfiguration = UMSTR_CONFIG, .bmAttributes = UC_BUS_POWERED | UC_REMOTE_WAKEUP, .bMaxPower = 0, }, .ifcd = { .bLength = sizeof(umouse_confd.ifcd), .bDescriptorType = UDESC_INTERFACE, .bNumEndpoints = 1, .bInterfaceClass = UICLASS_HID, .bInterfaceSubClass = UISUBCLASS_BOOT, .bInterfaceProtocol = UIPROTO_MOUSE, }, .hidd = { .bLength = sizeof(umouse_confd.hidd), .bDescriptorType = 0x21, .bcdHID = { 0x01, 0x10 }, .bCountryCode = 0, .bNumDescriptors = 1, .bReportDescriptorType = UMOUSE_REPORT_DESC_TYPE, .wItemLength = { sizeof(umouse_report_desc), 0 }, }, .endpd = { .bLength = sizeof(umouse_confd.endpd), .bDescriptorType = UDESC_ENDPOINT, .bEndpointAddress = UE_DIR_IN | UMOUSE_INTR_ENDPT, .bmAttributes = UE_INTERRUPT, .wMaxPacketSize[0] = 8, .bInterval = 0xA, }, .sscompd = { .bLength = sizeof(umouse_confd.sscompd), .bDescriptorType = UDESC_ENDPOINT_SS_COMP, .bMaxBurst = 0, .bmAttributes = 0, MSETW(.wBytesPerInterval, 0), }, }; struct umouse_bos_desc { struct usb_bos_descriptor bosd; struct usb_devcap_ss_descriptor usbssd; } __packed; struct umouse_bos_desc umouse_bosd = { .bosd = { .bLength = sizeof(umouse_bosd.bosd), .bDescriptorType = UDESC_BOS, HSETW(.wTotalLength, sizeof(umouse_bosd)), .bNumDeviceCaps = 1, }, .usbssd = { .bLength = sizeof(umouse_bosd.usbssd), .bDescriptorType = UDESC_DEVICE_CAPABILITY, .bDevCapabilityType = 3, .bmAttributes = 0, HSETW(.wSpeedsSupported, 0x08), .bFunctionalitySupport = 3, .bU1DevExitLat = 0xa, /* dummy - not used */ .wU2DevExitLat = { 0x20, 0x00 }, } }; struct umouse_softc { struct usb_hci *hci; char *opt; struct umouse_report um_report; int newdata; struct { uint8_t idle; uint8_t protocol; uint8_t feature; } hid; pthread_mutex_t mtx; pthread_mutex_t ev_mtx; int polling; struct timeval prev_evt; }; static void umouse_event(uint8_t button, int x, int y, void *arg) { struct umouse_softc *sc; struct bhyvegc_image *gc; gc = console_get_image(); if (gc == NULL) { /* not ready */ return; } sc = arg; pthread_mutex_lock(&sc->mtx); sc->um_report.buttons = 0; sc->um_report.z = 0; if (button & 0x01) sc->um_report.buttons |= 0x01; /* left */ if (button & 0x02) sc->um_report.buttons |= 0x04; /* middle */ if (button & 0x04) sc->um_report.buttons |= 0x02; /* right */ if (button & 0x8) sc->um_report.z = 1; if (button & 0x10) sc->um_report.z = -1; /* scale coords to mouse resolution */ sc->um_report.x = MOUSE_MAX_X * x / gc->width; sc->um_report.y = MOUSE_MAX_Y * y / gc->height; sc->newdata = 1; pthread_mutex_unlock(&sc->mtx); pthread_mutex_lock(&sc->ev_mtx); sc->hci->hci_intr(sc->hci, UE_DIR_IN | UMOUSE_INTR_ENDPT); pthread_mutex_unlock(&sc->ev_mtx); } static void * umouse_init(struct usb_hci *hci, char *opt) { struct umouse_softc *sc; sc = calloc(1, sizeof(struct umouse_softc)); sc->hci = hci; sc->hid.protocol = 1; /* REPORT protocol */ sc->opt = strdup(opt); pthread_mutex_init(&sc->mtx, NULL); pthread_mutex_init(&sc->ev_mtx, NULL); console_ptr_register(umouse_event, sc, 10); return (sc); } #define UREQ(x,y) ((x) | ((y) << 8)) static int umouse_request(void *scarg, struct usb_data_xfer *xfer) { struct umouse_softc *sc; struct usb_data_xfer_block *data; const char *str; uint16_t value; uint16_t index; uint16_t len; uint16_t slen; uint8_t *udata; int err; int i, idx; int eshort; sc = scarg; data = NULL; udata = NULL; idx = xfer->head; for (i = 0; i < xfer->ndata; i++) { xfer->data[idx].bdone = 0; if (data == NULL && USB_DATA_OK(xfer,i)) { data = &xfer->data[idx]; udata = data->buf; } xfer->data[idx].processed = 1; idx = (idx + 1) % USB_MAX_XFER_BLOCKS; } err = USB_ERR_NORMAL_COMPLETION; eshort = 0; if (!xfer->ureq) { - DPRINTF(("umouse_request: port %d\r\n", sc->hci->hci_port)); + DPRINTF(("umouse_request: port %d", sc->hci->hci_port)); goto done; } value = UGETW(xfer->ureq->wValue); index = UGETW(xfer->ureq->wIndex); len = UGETW(xfer->ureq->wLength); DPRINTF(("umouse_request: port %d, type 0x%x, req 0x%x, val 0x%x, " - "idx 0x%x, len %u\r\n", + "idx 0x%x, len %u", sc->hci->hci_port, xfer->ureq->bmRequestType, xfer->ureq->bRequest, value, index, len)); switch (UREQ(xfer->ureq->bRequest, xfer->ureq->bmRequestType)) { case UREQ(UR_GET_CONFIG, UT_READ_DEVICE): - DPRINTF(("umouse: (UR_GET_CONFIG, UT_READ_DEVICE)\r\n")); + DPRINTF(("umouse: (UR_GET_CONFIG, UT_READ_DEVICE)")); if (!data) break; *udata = umouse_confd.confd.bConfigurationValue; data->blen = len > 0 ? len - 1 : 0; eshort = data->blen > 0; data->bdone += 1; break; case UREQ(UR_GET_DESCRIPTOR, UT_READ_DEVICE): - DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_DEVICE) val %x\r\n", + DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_DEVICE) val %x", value >> 8)); if (!data) break; switch (value >> 8) { case UDESC_DEVICE: DPRINTF(("umouse: (->UDESC_DEVICE) len %u ?= " - "sizeof(umouse_dev_desc) %lu\r\n", + "sizeof(umouse_dev_desc) %lu", len, sizeof(umouse_dev_desc))); if ((value & 0xFF) != 0) { err = USB_ERR_IOERROR; goto done; } if (len > sizeof(umouse_dev_desc)) { data->blen = len - sizeof(umouse_dev_desc); len = sizeof(umouse_dev_desc); } else data->blen = 0; memcpy(data->buf, &umouse_dev_desc, len); data->bdone += len; break; case UDESC_CONFIG: - DPRINTF(("umouse: (->UDESC_CONFIG)\r\n")); + DPRINTF(("umouse: (->UDESC_CONFIG)")); if ((value & 0xFF) != 0) { err = USB_ERR_IOERROR; goto done; } if (len > sizeof(umouse_confd)) { data->blen = len - sizeof(umouse_confd); len = sizeof(umouse_confd); } else data->blen = 0; memcpy(data->buf, &umouse_confd, len); data->bdone += len; break; case UDESC_STRING: - DPRINTF(("umouse: (->UDESC_STRING)\r\n")); + DPRINTF(("umouse: (->UDESC_STRING)")); str = NULL; if ((value & 0xFF) < UMSTR_MAX) str = umouse_desc_strings[value & 0xFF]; else goto done; if ((value & 0xFF) == UMSTR_LANG) { udata[0] = 4; udata[1] = UDESC_STRING; data->blen = len - 2; len -= 2; data->bdone += 2; if (len >= 2) { udata[2] = str[0]; udata[3] = str[1]; data->blen -= 2; data->bdone += 2; } else data->blen = 0; goto done; } slen = 2 + strlen(str) * 2; udata[0] = slen; udata[1] = UDESC_STRING; if (len > slen) { data->blen = len - slen; len = slen; } else data->blen = 0; for (i = 2; i < len; i += 2) { udata[i] = *str++; udata[i+1] = '\0'; } data->bdone += slen; break; case UDESC_BOS: - DPRINTF(("umouse: USB3 BOS\r\n")); + DPRINTF(("umouse: USB3 BOS")); if (len > sizeof(umouse_bosd)) { data->blen = len - sizeof(umouse_bosd); len = sizeof(umouse_bosd); } else data->blen = 0; memcpy(udata, &umouse_bosd, len); data->bdone += len; break; default: - DPRINTF(("umouse: unknown(%d)->ERROR\r\n", value >> 8)); + DPRINTF(("umouse: unknown(%d)->ERROR", value >> 8)); err = USB_ERR_IOERROR; goto done; } eshort = data->blen > 0; break; case UREQ(UR_GET_DESCRIPTOR, UT_READ_INTERFACE): DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_INTERFACE) " - "0x%x\r\n", (value >> 8))); + "0x%x", (value >> 8))); if (!data) break; switch (value >> 8) { case UMOUSE_REPORT_DESC_TYPE: if (len > sizeof(umouse_report_desc)) { data->blen = len - sizeof(umouse_report_desc); len = sizeof(umouse_report_desc); } else data->blen = 0; memcpy(data->buf, umouse_report_desc, len); data->bdone += len; break; default: - DPRINTF(("umouse: IO ERROR\r\n")); + DPRINTF(("umouse: IO ERROR")); err = USB_ERR_IOERROR; goto done; } eshort = data->blen > 0; break; case UREQ(UR_GET_INTERFACE, UT_READ_INTERFACE): - DPRINTF(("umouse: (UR_GET_INTERFACE, UT_READ_INTERFACE)\r\n")); + DPRINTF(("umouse: (UR_GET_INTERFACE, UT_READ_INTERFACE)")); if (index != 0) { - DPRINTF(("umouse get_interface, invalid index %d\r\n", + DPRINTF(("umouse get_interface, invalid index %d", index)); err = USB_ERR_IOERROR; goto done; } if (!data) break; if (len > 0) { *udata = 0; data->blen = len - 1; } eshort = data->blen > 0; data->bdone += 1; break; case UREQ(UR_GET_STATUS, UT_READ_DEVICE): - DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_DEVICE)\r\n")); + DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_DEVICE)")); if (data != NULL && len > 1) { if (sc->hid.feature == UF_DEVICE_REMOTE_WAKEUP) USETW(udata, UDS_REMOTE_WAKEUP); else USETW(udata, 0); data->blen = len - 2; data->bdone += 2; } eshort = data->blen > 0; break; case UREQ(UR_GET_STATUS, UT_READ_INTERFACE): case UREQ(UR_GET_STATUS, UT_READ_ENDPOINT): - DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_INTERFACE)\r\n")); + DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_INTERFACE)")); if (data != NULL && len > 1) { USETW(udata, 0); data->blen = len - 2; data->bdone += 2; } eshort = data->blen > 0; break; case UREQ(UR_SET_ADDRESS, UT_WRITE_DEVICE): /* XXX Controller should've handled this */ - DPRINTF(("umouse set address %u\r\n", value)); + DPRINTF(("umouse set address %u", value)); break; case UREQ(UR_SET_CONFIG, UT_WRITE_DEVICE): - DPRINTF(("umouse set config %u\r\n", value)); + DPRINTF(("umouse set config %u", value)); break; case UREQ(UR_SET_DESCRIPTOR, UT_WRITE_DEVICE): - DPRINTF(("umouse set descriptor %u\r\n", value)); + DPRINTF(("umouse set descriptor %u", value)); break; case UREQ(UR_CLEAR_FEATURE, UT_WRITE_DEVICE): - DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x\r\n", value)); + DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x", value)); if (value == UF_DEVICE_REMOTE_WAKEUP) sc->hid.feature = 0; break; case UREQ(UR_SET_FEATURE, UT_WRITE_DEVICE): - DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x\r\n", value)); + DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x", value)); if (value == UF_DEVICE_REMOTE_WAKEUP) sc->hid.feature = UF_DEVICE_REMOTE_WAKEUP; break; case UREQ(UR_CLEAR_FEATURE, UT_WRITE_INTERFACE): case UREQ(UR_CLEAR_FEATURE, UT_WRITE_ENDPOINT): case UREQ(UR_SET_FEATURE, UT_WRITE_INTERFACE): case UREQ(UR_SET_FEATURE, UT_WRITE_ENDPOINT): - DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_INTERFACE)\r\n")); + DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_INTERFACE)")); err = USB_ERR_IOERROR; goto done; case UREQ(UR_SET_INTERFACE, UT_WRITE_INTERFACE): - DPRINTF(("umouse set interface %u\r\n", value)); + DPRINTF(("umouse set interface %u", value)); break; case UREQ(UR_ISOCH_DELAY, UT_WRITE_DEVICE): - DPRINTF(("umouse set isoch delay %u\r\n", value)); + DPRINTF(("umouse set isoch delay %u", value)); break; case UREQ(UR_SET_SEL, 0): - DPRINTF(("umouse set sel\r\n")); + DPRINTF(("umouse set sel")); break; case UREQ(UR_SYNCH_FRAME, UT_WRITE_ENDPOINT): - DPRINTF(("umouse synch frame\r\n")); + DPRINTF(("umouse synch frame")); break; /* HID device requests */ case UREQ(UMOUSE_GET_REPORT, UT_READ_CLASS_INTERFACE): DPRINTF(("umouse: (UMOUSE_GET_REPORT, UT_READ_CLASS_INTERFACE) " - "0x%x\r\n", (value >> 8))); + "0x%x", (value >> 8))); if (!data) break; if ((value >> 8) == 0x01 && len >= sizeof(sc->um_report)) { /* TODO read from backend */ if (len > sizeof(sc->um_report)) { data->blen = len - sizeof(sc->um_report); len = sizeof(sc->um_report); } else data->blen = 0; memcpy(data->buf, &sc->um_report, len); data->bdone += len; } else { err = USB_ERR_IOERROR; goto done; } eshort = data->blen > 0; break; case UREQ(UMOUSE_GET_IDLE, UT_READ_CLASS_INTERFACE): if (data != NULL && len > 0) { *udata = sc->hid.idle; data->blen = len - 1; data->bdone += 1; } eshort = data->blen > 0; break; case UREQ(UMOUSE_GET_PROTOCOL, UT_READ_CLASS_INTERFACE): if (data != NULL && len > 0) { *udata = sc->hid.protocol; data->blen = len - 1; data->bdone += 1; } eshort = data->blen > 0; break; case UREQ(UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE): - DPRINTF(("umouse: (UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE) ignored\r\n")); + DPRINTF(("umouse: (UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE) ignored")); break; case UREQ(UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE): sc->hid.idle = UGETW(xfer->ureq->wValue) >> 8; - DPRINTF(("umouse: (UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE) %x\r\n", + DPRINTF(("umouse: (UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE) %x", sc->hid.idle)); break; case UREQ(UMOUSE_SET_PROTOCOL, UT_WRITE_CLASS_INTERFACE): sc->hid.protocol = UGETW(xfer->ureq->wValue) >> 8; - DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_CLASS_INTERFACE) %x\r\n", + DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_CLASS_INTERFACE) %x", sc->hid.protocol)); break; default: - DPRINTF(("**** umouse request unhandled\r\n")); + DPRINTF(("**** umouse request unhandled")); err = USB_ERR_IOERROR; break; } done: if (xfer->ureq && (xfer->ureq->bmRequestType & UT_WRITE) && (err == USB_ERR_NORMAL_COMPLETION) && (data != NULL)) data->blen = 0; else if (eshort) err = USB_ERR_SHORT_XFER; - DPRINTF(("umouse request error code %d (0=ok), blen %u txlen %u\r\n", + DPRINTF(("umouse request error code %d (0=ok), blen %u txlen %u", err, (data ? data->blen : 0), (data ? data->bdone : 0))); return (err); } static int umouse_data_handler(void *scarg, struct usb_data_xfer *xfer, int dir, int epctx) { struct umouse_softc *sc; struct usb_data_xfer_block *data; uint8_t *udata; int len, i, idx; int err; - DPRINTF(("umouse handle data - DIR=%s|EP=%d, blen %d\r\n", + DPRINTF(("umouse handle data - DIR=%s|EP=%d, blen %d", dir ? "IN" : "OUT", epctx, xfer->data[0].blen)); /* find buffer to add data */ udata = NULL; err = USB_ERR_NORMAL_COMPLETION; /* handle xfer at first unprocessed item with buffer */ data = NULL; idx = xfer->head; for (i = 0; i < xfer->ndata; i++) { data = &xfer->data[idx]; if (data->buf != NULL && data->blen != 0) { break; } else { data->processed = 1; data = NULL; } idx = (idx + 1) % USB_MAX_XFER_BLOCKS; } if (!data) goto done; udata = data->buf; len = data->blen; if (udata == NULL) { - DPRINTF(("umouse no buffer provided for input\r\n")); + DPRINTF(("umouse no buffer provided for input")); err = USB_ERR_NOMEM; goto done; } sc = scarg; if (dir) { pthread_mutex_lock(&sc->mtx); if (!sc->newdata) { err = USB_ERR_CANCELLED; USB_DATA_SET_ERRCODE(&xfer->data[xfer->head], USB_NAK); pthread_mutex_unlock(&sc->mtx); goto done; } if (sc->polling) { err = USB_ERR_STALLED; USB_DATA_SET_ERRCODE(data, USB_STALL); pthread_mutex_unlock(&sc->mtx); goto done; } sc->polling = 1; if (len > 0) { sc->newdata = 0; data->processed = 1; data->bdone += 6; memcpy(udata, &sc->um_report, 6); data->blen = len - 6; if (data->blen > 0) err = USB_ERR_SHORT_XFER; } sc->polling = 0; pthread_mutex_unlock(&sc->mtx); } else { USB_DATA_SET_ERRCODE(data, USB_STALL); err = USB_ERR_STALLED; } done: return (err); } static int umouse_reset(void *scarg) { struct umouse_softc *sc; sc = scarg; sc->newdata = 0; return (0); } static int umouse_remove(void *scarg) { return (0); } static int umouse_stop(void *scarg) { return (0); } struct usb_devemu ue_mouse = { .ue_emu = "tablet", .ue_usbver = 3, .ue_usbspeed = USB_SPEED_HIGH, .ue_init = umouse_init, .ue_request = umouse_request, .ue_data = umouse_data_handler, .ue_reset = umouse_reset, .ue_remove = umouse_remove, .ue_stop = umouse_stop }; USB_EMUL_SET(ue_mouse); Index: stable/12/usr.sbin/bhyve/virtio.c =================================================================== --- stable/12/usr.sbin/bhyve/virtio.c (revision 358183) +++ stable/12/usr.sbin/bhyve/virtio.c (revision 358184) @@ -1,807 +1,808 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Chris Torek * All rights reserved. * Copyright (c) 2019 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" /* * Functions for dealing with generalized "virtual devices" as * defined by */ /* * In case we decide to relax the "virtio softc comes at the * front of virtio-based device softc" constraint, let's use * this to convert. */ #define DEV_SOFTC(vs) ((void *)(vs)) /* * Link a virtio_softc to its constants, the device softc, and * the PCI emulation. */ void vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc, void *dev_softc, struct pci_devinst *pi, struct vqueue_info *queues) { int i; /* vs and dev_softc addresses must match */ assert((void *)vs == dev_softc); vs->vs_vc = vc; vs->vs_pi = pi; pi->pi_arg = vs; vs->vs_queues = queues; for (i = 0; i < vc->vc_nvq; i++) { queues[i].vq_vs = vs; queues[i].vq_num = i; } } /* * Reset device (device-wide). This erases all queues, i.e., * all the queues become invalid (though we don't wipe out the * internal pointers, we just clear the VQ_ALLOC flag). * * It resets negotiated features to "none". * * If MSI-X is enabled, this also resets all the vectors to NO_VECTOR. */ void vi_reset_dev(struct virtio_softc *vs) { struct vqueue_info *vq; int i, nvq; if (vs->vs_mtx) assert(pthread_mutex_isowned_np(vs->vs_mtx)); nvq = vs->vs_vc->vc_nvq; for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) { vq->vq_flags = 0; vq->vq_last_avail = 0; vq->vq_next_used = 0; vq->vq_save_used = 0; vq->vq_pfn = 0; vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR; } vs->vs_negotiated_caps = 0; vs->vs_curq = 0; /* vs->vs_status = 0; -- redundant */ if (vs->vs_isr) pci_lintr_deassert(vs->vs_pi); vs->vs_isr = 0; vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR; } /* * Set I/O BAR (usually 0) to map PCI config registers. */ void vi_set_io_bar(struct virtio_softc *vs, int barnum) { size_t size; /* * ??? should we use CFG0 if MSI-X is disabled? * Existing code did not... */ size = VTCFG_R_CFG1 + vs->vs_vc->vc_cfgsize; pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size); } /* * Initialize MSI-X vector capabilities if we're to use MSI-X, * or MSI capabilities if not. * * We assume we want one MSI-X vector per queue, here, plus one * for the config vec. */ int vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix) { int nvec; if (use_msix) { vs->vs_flags |= VIRTIO_USE_MSIX; VS_LOCK(vs); vi_reset_dev(vs); /* set all vectors to NO_VECTOR */ VS_UNLOCK(vs); nvec = vs->vs_vc->vc_nvq + 1; if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum)) return (1); } else vs->vs_flags &= ~VIRTIO_USE_MSIX; /* Only 1 MSI vector for bhyve */ pci_emul_add_msicap(vs->vs_pi, 1); /* Legacy interrupts are mandatory for virtio devices */ pci_lintr_request(vs->vs_pi); return (0); } /* * Initialize the currently-selected virtio queue (vs->vs_curq). * The guest just gave us a page frame number, from which we can * calculate the addresses of the queue. */ void vi_vq_init(struct virtio_softc *vs, uint32_t pfn) { struct vqueue_info *vq; uint64_t phys; size_t size; char *base; vq = &vs->vs_queues[vs->vs_curq]; vq->vq_pfn = pfn; phys = (uint64_t)pfn << VRING_PFN; size = vring_size(vq->vq_qsize); base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); /* First page(s) are descriptors... */ vq->vq_desc = (struct virtio_desc *)base; base += vq->vq_qsize * sizeof(struct virtio_desc); /* ... immediately followed by "avail" ring (entirely uint16_t's) */ vq->vq_avail = (struct vring_avail *)base; base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t); /* Then it's rounded up to the next page... */ base = (char *)roundup2((uintptr_t)base, VRING_ALIGN); /* ... and the last page(s) are the used ring. */ vq->vq_used = (struct vring_used *)base; /* Mark queue as allocated, and start at 0 when we use it. */ vq->vq_flags = VQ_ALLOC; vq->vq_last_avail = 0; vq->vq_next_used = 0; vq->vq_save_used = 0; } /* * Helper inline for vq_getchain(): record the i'th "real" * descriptor. */ static inline void _vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx, struct iovec *iov, int n_iov, uint16_t *flags) { if (i >= n_iov) return; iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len); iov[i].iov_len = vd->vd_len; if (flags != NULL) flags[i] = vd->vd_flags; } #define VQ_MAX_DESCRIPTORS 512 /* see below */ /* * Examine the chain of descriptors starting at the "next one" to * make sure that they describe a sensible request. If so, return * the number of "real" descriptors that would be needed/used in * acting on this request. This may be smaller than the number of * available descriptors, e.g., if there are two available but * they are two separate requests, this just returns 1. Or, it * may be larger: if there are indirect descriptors involved, * there may only be one descriptor available but it may be an * indirect pointing to eight more. We return 8 in this case, * i.e., we do not count the indirect descriptors, only the "real" * ones. * * Basically, this vets the vd_flags and vd_next field of each * descriptor and tells you how many are involved. Since some may * be indirect, this also needs the vmctx (in the pci_devinst * at vs->vs_pi) so that it can find indirect descriptors. * * As we process each descriptor, we copy and adjust it (guest to * host address wise, also using the vmtctx) into the given iov[] * array (of the given size). If the array overflows, we stop * placing values into the array but keep processing descriptors, * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1. * So you, the caller, must not assume that iov[] is as big as the * return value (you can process the same thing twice to allocate * a larger iov array if needed, or supply a zero length to find * out how much space is needed). * * If you want to verify the WRITE flag on each descriptor, pass a * non-NULL "flags" pointer to an array of "uint16_t" of the same size * as n_iov and we'll copy each vd_flags field after unwinding any * indirects. * * If some descriptor(s) are invalid, this prints a diagnostic message * and returns -1. If no descriptors are ready now it simply returns 0. * * You are assumed to have done a vq_ring_ready() if needed (note * that vq_has_descs() does one). */ int vq_getchain(struct vqueue_info *vq, uint16_t *pidx, struct iovec *iov, int n_iov, uint16_t *flags) { int i; u_int ndesc, n_indir; u_int idx, next; volatile struct virtio_desc *vdir, *vindir, *vp; struct vmctx *ctx; struct virtio_softc *vs; const char *name; vs = vq->vq_vs; name = vs->vs_vc->vc_name; /* * Note: it's the responsibility of the guest not to * update vq->vq_avail->va_idx until all of the descriptors * the guest has written are valid (including all their * vd_next fields and vd_flags). * * Compute (va_idx - last_avail) in integers mod 2**16. This is * the number of descriptors the device has made available * since the last time we updated vq->vq_last_avail. * * We just need to do the subtraction as an unsigned int, * then trim off excess bits. */ idx = vq->vq_last_avail; ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx); if (ndesc == 0) return (0); if (ndesc > vq->vq_qsize) { /* XXX need better way to diagnose issues */ - fprintf(stderr, - "%s: ndesc (%u) out of range, driver confused?\r\n", + EPRINTLN( + "%s: ndesc (%u) out of range, driver confused?", name, (u_int)ndesc); return (-1); } /* * Now count/parse "involved" descriptors starting from * the head of the chain. * * To prevent loops, we could be more complicated and * check whether we're re-visiting a previously visited * index, but we just abort if the count gets excessive. */ ctx = vs->vs_pi->pi_vmctx; *pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)]; vq->vq_last_avail++; for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) { if (next >= vq->vq_qsize) { - fprintf(stderr, + EPRINTLN( "%s: descriptor index %u out of range, " - "driver confused?\r\n", + "driver confused?", name, next); return (-1); } vdir = &vq->vq_desc[next]; if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) { _vq_record(i, vdir, ctx, iov, n_iov, flags); i++; } else if ((vs->vs_vc->vc_hv_caps & VIRTIO_RING_F_INDIRECT_DESC) == 0) { - fprintf(stderr, + EPRINTLN( "%s: descriptor has forbidden INDIRECT flag, " - "driver confused?\r\n", + "driver confused?", name); return (-1); } else { n_indir = vdir->vd_len / 16; if ((vdir->vd_len & 0xf) || n_indir == 0) { - fprintf(stderr, + EPRINTLN( "%s: invalid indir len 0x%x, " - "driver confused?\r\n", + "driver confused?", name, (u_int)vdir->vd_len); return (-1); } vindir = paddr_guest2host(ctx, vdir->vd_addr, vdir->vd_len); /* * Indirects start at the 0th, then follow * their own embedded "next"s until those run * out. Each one's indirect flag must be off * (we don't really have to check, could just * ignore errors...). */ next = 0; for (;;) { vp = &vindir[next]; if (vp->vd_flags & VRING_DESC_F_INDIRECT) { - fprintf(stderr, + EPRINTLN( "%s: indirect desc has INDIR flag," - " driver confused?\r\n", + " driver confused?", name); return (-1); } _vq_record(i, vp, ctx, iov, n_iov, flags); if (++i > VQ_MAX_DESCRIPTORS) goto loopy; if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0) break; next = vp->vd_next; if (next >= n_indir) { - fprintf(stderr, + EPRINTLN( "%s: invalid next %u > %u, " - "driver confused?\r\n", + "driver confused?", name, (u_int)next, n_indir); return (-1); } } } if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0) return (i); } loopy: - fprintf(stderr, - "%s: descriptor loop? count > %d - driver confused?\r\n", + EPRINTLN( + "%s: descriptor loop? count > %d - driver confused?", name, i); return (-1); } /* * Return the first n_chain request chains back to the available queue. * * (These chains are the ones you handled when you called vq_getchain() * and used its positive return value.) */ void vq_retchains(struct vqueue_info *vq, uint16_t n_chains) { vq->vq_last_avail -= n_chains; } void vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) { volatile struct vring_used *vuh; volatile struct virtio_used *vue; uint16_t mask; /* * Notes: * - mask is N-1 where N is a power of 2 so computes x % N * - vuh points to the "used" data shared with guest * - vue points to the "used" ring entry we want to update * * (I apologize for the two fields named vu_idx; the * virtio spec calls the one that vue points to, "id"...) */ mask = vq->vq_qsize - 1; vuh = vq->vq_used; vue = &vuh->vu_ring[vq->vq_next_used++ & mask]; vue->vu_idx = idx; vue->vu_tlen = iolen; } void vq_relchain_publish(struct vqueue_info *vq) { /* * Ensure the used descriptor is visible before updating the index. * This is necessary on ISAs with memory ordering less strict than x86 * (and even on x86 to act as a compiler barrier). */ atomic_thread_fence_rel(); vq->vq_used->vu_idx = vq->vq_next_used; } /* * Return specified request chain to the guest, setting its I/O length * to the provided value. * * (This chain is the one you handled when you called vq_getchain() * and used its positive return value.) */ void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) { vq_relchain_prepare(vq, idx, iolen); vq_relchain_publish(vq); } /* * Driver has finished processing "available" chains and calling * vq_relchain on each one. If driver used all the available * chains, used_all should be set. * * If the "used" index moved we may need to inform the guest, i.e., * deliver an interrupt. Even if the used index did NOT move we * may need to deliver an interrupt, if the avail ring is empty and * we are supposed to interrupt on empty. * * Note that used_all_avail is provided by the caller because it's * a snapshot of the ring state when he decided to finish interrupt * processing -- it's possible that descriptors became available after * that point. (It's also typically a constant 1/True as well.) */ void vq_endchains(struct vqueue_info *vq, int used_all_avail) { struct virtio_softc *vs; uint16_t event_idx, new_idx, old_idx; int intr; /* * Interrupt generation: if we're using EVENT_IDX, * interrupt if we've crossed the event threshold. * Otherwise interrupt is generated if we added "used" entries, * but suppressed by VRING_AVAIL_F_NO_INTERRUPT. * * In any case, though, if NOTIFY_ON_EMPTY is set and the * entire avail was processed, we need to interrupt always. */ vs = vq->vq_vs; old_idx = vq->vq_save_used; vq->vq_save_used = new_idx = vq->vq_used->vu_idx; /* * Use full memory barrier between vu_idx store from preceding * vq_relchain() call and the loads from VQ_USED_EVENT_IDX() or * va_flags below. */ atomic_thread_fence_seq_cst(); if (used_all_avail && (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY)) intr = 1; else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) { event_idx = VQ_USED_EVENT_IDX(vq); /* * This calculation is per docs and the kernel * (see src/sys/dev/virtio/virtio_ring.h). */ intr = (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old_idx); } else { intr = new_idx != old_idx && !(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT); } if (intr) vq_interrupt(vs, vq); } /* Note: these are in sorted order to make for a fast search */ static struct config_reg { uint16_t cr_offset; /* register offset */ uint8_t cr_size; /* size (bytes) */ uint8_t cr_ro; /* true => reg is read only */ const char *cr_name; /* name of reg */ } config_regs[] = { { VTCFG_R_HOSTCAP, 4, 1, "HOSTCAP" }, { VTCFG_R_GUESTCAP, 4, 0, "GUESTCAP" }, { VTCFG_R_PFN, 4, 0, "PFN" }, { VTCFG_R_QNUM, 2, 1, "QNUM" }, { VTCFG_R_QSEL, 2, 0, "QSEL" }, { VTCFG_R_QNOTIFY, 2, 0, "QNOTIFY" }, { VTCFG_R_STATUS, 1, 0, "STATUS" }, { VTCFG_R_ISR, 1, 0, "ISR" }, { VTCFG_R_CFGVEC, 2, 0, "CFGVEC" }, { VTCFG_R_QVEC, 2, 0, "QVEC" }, }; static inline struct config_reg * vi_find_cr(int offset) { u_int hi, lo, mid; struct config_reg *cr; lo = 0; hi = sizeof(config_regs) / sizeof(*config_regs) - 1; while (hi >= lo) { mid = (hi + lo) >> 1; cr = &config_regs[mid]; if (cr->cr_offset == offset) return (cr); if (cr->cr_offset < offset) lo = mid + 1; else hi = mid - 1; } return (NULL); } /* * Handle pci config space reads. * If it's to the MSI-X info, do that. * If it's part of the virtio standard stuff, do that. * Otherwise dispatch to the actual driver. */ uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct virtio_softc *vs = pi->pi_arg; struct virtio_consts *vc; struct config_reg *cr; uint64_t virtio_config_size, max; const char *name; uint32_t newoff; uint32_t value; int error; if (vs->vs_flags & VIRTIO_USE_MSIX) { if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { return (pci_emul_msix_tread(pi, offset, size)); } } /* XXX probably should do something better than just assert() */ assert(baridx == 0); if (vs->vs_mtx) pthread_mutex_lock(vs->vs_mtx); vc = vs->vs_vc; name = vc->vc_name; value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; if (size != 1 && size != 2 && size != 4) goto bad; if (pci_msix_enabled(pi)) virtio_config_size = VTCFG_R_CFG1; else virtio_config_size = VTCFG_R_CFG0; if (offset >= virtio_config_size) { /* * Subtract off the standard size (including MSI-X * registers if enabled) and dispatch to underlying driver. * If that fails, fall into general code. */ newoff = offset - virtio_config_size; max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; if (newoff + size > max) goto bad; error = (*vc->vc_cfgread)(DEV_SOFTC(vs), newoff, size, &value); if (!error) goto done; } bad: cr = vi_find_cr(offset); if (cr == NULL || cr->cr_size != size) { if (cr != NULL) { /* offset must be OK, so size must be bad */ - fprintf(stderr, - "%s: read from %s: bad size %d\r\n", + EPRINTLN( + "%s: read from %s: bad size %d", name, cr->cr_name, size); } else { - fprintf(stderr, - "%s: read from bad offset/size %jd/%d\r\n", + EPRINTLN( + "%s: read from bad offset/size %jd/%d", name, (uintmax_t)offset, size); } goto done; } switch (offset) { case VTCFG_R_HOSTCAP: value = vc->vc_hv_caps; break; case VTCFG_R_GUESTCAP: value = vs->vs_negotiated_caps; break; case VTCFG_R_PFN: if (vs->vs_curq < vc->vc_nvq) value = vs->vs_queues[vs->vs_curq].vq_pfn; break; case VTCFG_R_QNUM: value = vs->vs_curq < vc->vc_nvq ? vs->vs_queues[vs->vs_curq].vq_qsize : 0; break; case VTCFG_R_QSEL: value = vs->vs_curq; break; case VTCFG_R_QNOTIFY: value = 0; /* XXX */ break; case VTCFG_R_STATUS: value = vs->vs_status; break; case VTCFG_R_ISR: value = vs->vs_isr; vs->vs_isr = 0; /* a read clears this flag */ if (value) pci_lintr_deassert(pi); break; case VTCFG_R_CFGVEC: value = vs->vs_msix_cfg_idx; break; case VTCFG_R_QVEC: value = vs->vs_curq < vc->vc_nvq ? vs->vs_queues[vs->vs_curq].vq_msix_idx : VIRTIO_MSI_NO_VECTOR; break; } done: if (vs->vs_mtx) pthread_mutex_unlock(vs->vs_mtx); return (value); } /* * Handle pci config space writes. * If it's to the MSI-X info, do that. * If it's part of the virtio standard stuff, do that. * Otherwise dispatch to the actual driver. */ void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct virtio_softc *vs = pi->pi_arg; struct vqueue_info *vq; struct virtio_consts *vc; struct config_reg *cr; uint64_t virtio_config_size, max; const char *name; uint32_t newoff; int error; if (vs->vs_flags & VIRTIO_USE_MSIX) { if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { pci_emul_msix_twrite(pi, offset, size, value); return; } } /* XXX probably should do something better than just assert() */ assert(baridx == 0); if (vs->vs_mtx) pthread_mutex_lock(vs->vs_mtx); vc = vs->vs_vc; name = vc->vc_name; if (size != 1 && size != 2 && size != 4) goto bad; if (pci_msix_enabled(pi)) virtio_config_size = VTCFG_R_CFG1; else virtio_config_size = VTCFG_R_CFG0; if (offset >= virtio_config_size) { /* * Subtract off the standard size (including MSI-X * registers if enabled) and dispatch to underlying driver. */ newoff = offset - virtio_config_size; max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; if (newoff + size > max) goto bad; error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs), newoff, size, value); if (!error) goto done; } bad: cr = vi_find_cr(offset); if (cr == NULL || cr->cr_size != size || cr->cr_ro) { if (cr != NULL) { /* offset must be OK, wrong size and/or reg is R/O */ if (cr->cr_size != size) - fprintf(stderr, - "%s: write to %s: bad size %d\r\n", + EPRINTLN( + "%s: write to %s: bad size %d", name, cr->cr_name, size); if (cr->cr_ro) - fprintf(stderr, - "%s: write to read-only reg %s\r\n", + EPRINTLN( + "%s: write to read-only reg %s", name, cr->cr_name); } else { - fprintf(stderr, - "%s: write to bad offset/size %jd/%d\r\n", + EPRINTLN( + "%s: write to bad offset/size %jd/%d", name, (uintmax_t)offset, size); } goto done; } switch (offset) { case VTCFG_R_GUESTCAP: vs->vs_negotiated_caps = value & vc->vc_hv_caps; if (vc->vc_apply_features) (*vc->vc_apply_features)(DEV_SOFTC(vs), vs->vs_negotiated_caps); break; case VTCFG_R_PFN: if (vs->vs_curq >= vc->vc_nvq) goto bad_qindex; vi_vq_init(vs, value); break; case VTCFG_R_QSEL: /* * Note that the guest is allowed to select an * invalid queue; we just need to return a QNUM * of 0 while the bad queue is selected. */ vs->vs_curq = value; break; case VTCFG_R_QNOTIFY: if (value >= vc->vc_nvq) { - fprintf(stderr, "%s: queue %d notify out of range\r\n", + EPRINTLN("%s: queue %d notify out of range", name, (int)value); goto done; } vq = &vs->vs_queues[value]; if (vq->vq_notify) (*vq->vq_notify)(DEV_SOFTC(vs), vq); else if (vc->vc_qnotify) (*vc->vc_qnotify)(DEV_SOFTC(vs), vq); else - fprintf(stderr, - "%s: qnotify queue %d: missing vq/vc notify\r\n", + EPRINTLN( + "%s: qnotify queue %d: missing vq/vc notify", name, (int)value); break; case VTCFG_R_STATUS: vs->vs_status = value; if (value == 0) (*vc->vc_reset)(DEV_SOFTC(vs)); break; case VTCFG_R_CFGVEC: vs->vs_msix_cfg_idx = value; break; case VTCFG_R_QVEC: if (vs->vs_curq >= vc->vc_nvq) goto bad_qindex; vq = &vs->vs_queues[vs->vs_curq]; vq->vq_msix_idx = value; break; } goto done; bad_qindex: - fprintf(stderr, - "%s: write config reg %s: curq %d >= max %d\r\n", + EPRINTLN( + "%s: write config reg %s: curq %d >= max %d", name, cr->cr_name, vs->vs_curq, vc->vc_nvq); done: if (vs->vs_mtx) pthread_mutex_unlock(vs->vs_mtx); } Index: stable/12/usr.sbin/bhyve/xmsr.c =================================================================== --- stable/12/usr.sbin/bhyve/xmsr.c (revision 358183) +++ stable/12/usr.sbin/bhyve/xmsr.c (revision 358184) @@ -1,236 +1,237 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include +#include "debug.h" #include "xmsr.h" static int cpu_vendor_intel, cpu_vendor_amd, cpu_vendor_hygon; int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val) { if (cpu_vendor_intel) { switch (num) { case 0xd04: /* Sandy Bridge uncore PMCs */ case 0xc24: return (0); case MSR_BIOS_UPDT_TRIG: return (0); case MSR_BIOS_SIGN: return (0); default: break; } } else if (cpu_vendor_amd || cpu_vendor_hygon) { switch (num) { case MSR_HWCR: /* * Ignore writes to hardware configuration MSR. */ return (0); case MSR_NB_CFG1: case MSR_LS_CFG: case MSR_IC_CFG: return (0); /* Ignore writes */ case MSR_PERFEVSEL0: case MSR_PERFEVSEL1: case MSR_PERFEVSEL2: case MSR_PERFEVSEL3: /* Ignore writes to the PerfEvtSel MSRs */ return (0); case MSR_K7_PERFCTR0: case MSR_K7_PERFCTR1: case MSR_K7_PERFCTR2: case MSR_K7_PERFCTR3: /* Ignore writes to the PerfCtr MSRs */ return (0); case MSR_P_STATE_CONTROL: /* Ignore write to change the P-state */ return (0); default: break; } } return (-1); } int emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val) { int error = 0; if (cpu_vendor_intel) { switch (num) { case MSR_BIOS_SIGN: case MSR_IA32_PLATFORM_ID: case MSR_PKG_ENERGY_STATUS: case MSR_PP0_ENERGY_STATUS: case MSR_PP1_ENERGY_STATUS: case MSR_DRAM_ENERGY_STATUS: *val = 0; break; case MSR_RAPL_POWER_UNIT: /* * Use the default value documented in section * "RAPL Interfaces" in Intel SDM vol3. */ *val = 0x000a1003; break; default: error = -1; break; } } else if (cpu_vendor_amd || cpu_vendor_hygon) { switch (num) { case MSR_BIOS_SIGN: *val = 0; break; case MSR_HWCR: /* * Bios and Kernel Developer's Guides for AMD Families * 12H, 14H, 15H and 16H. */ *val = 0x01000010; /* Reset value */ *val |= 1 << 9; /* MONITOR/MWAIT disable */ break; case MSR_NB_CFG1: case MSR_LS_CFG: case MSR_IC_CFG: /* * The reset value is processor family dependent so * just return 0. */ *val = 0; break; case MSR_PERFEVSEL0: case MSR_PERFEVSEL1: case MSR_PERFEVSEL2: case MSR_PERFEVSEL3: /* * PerfEvtSel MSRs are not properly virtualized so just * return zero. */ *val = 0; break; case MSR_K7_PERFCTR0: case MSR_K7_PERFCTR1: case MSR_K7_PERFCTR2: case MSR_K7_PERFCTR3: /* * PerfCtr MSRs are not properly virtualized so just * return zero. */ *val = 0; break; case MSR_SMM_ADDR: case MSR_SMM_MASK: /* * Return the reset value defined in the AMD Bios and * Kernel Developer's Guide. */ *val = 0; break; case MSR_P_STATE_LIMIT: case MSR_P_STATE_CONTROL: case MSR_P_STATE_STATUS: case MSR_P_STATE_CONFIG(0): /* P0 configuration */ *val = 0; break; /* * OpenBSD guests test bit 0 of this MSR to detect if the * workaround for erratum 721 is already applied. * https://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf */ case 0xC0011029: *val = 1; break; default: error = -1; break; } } else { error = -1; } return (error); } int init_msr(void) { int error; u_int regs[4]; char cpu_vendor[13]; do_cpuid(0, regs); ((u_int *)&cpu_vendor)[0] = regs[1]; ((u_int *)&cpu_vendor)[1] = regs[3]; ((u_int *)&cpu_vendor)[2] = regs[2]; cpu_vendor[12] = '\0'; error = 0; if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { cpu_vendor_amd = 1; } else if (strcmp(cpu_vendor, "HygonGenuine") == 0) { cpu_vendor_hygon = 1; } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { cpu_vendor_intel = 1; } else { - fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor); + EPRINTLN("Unknown cpu vendor \"%s\"", cpu_vendor); error = -1; } return (error); }