diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index 580bb900dd43..95c65ecc15d2 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -1,1003 +1,1003 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * bhyve ACPI table generator. * * Create the minimal set of ACPI tables required to boot FreeBSD (and * hopefully other o/s's) by writing out ASL template files for each of * the tables and the compiling them to AML with the Intel iasl compiler. * The AML files are then read into guest memory. * * The tables are placed in the guest's ROM area just below 1MB physical, * above the MPTable. * * Layout (No longer correct at FADT and beyond due to properly * calculating the size of the MADT to allow for changes to * VM_MAXCPU above 21 which overflows this layout.) * ------ * RSDP -> 0xf2400 (36 bytes fixed) * RSDT -> 0xf2440 (36 bytes + 4*7 table addrs, 4 used) * XSDT -> 0xf2480 (36 bytes + 8*7 table addrs, 4 used) * MADT -> 0xf2500 (depends on #CPUs) * FADT -> 0xf2600 (268 bytes) * HPET -> 0xf2740 (56 bytes) * MCFG -> 0xf2780 (60 bytes) * FACS -> 0xf27C0 (64 bytes) * DSDT -> 0xf2800 (variable - can go up to 0x100000) */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "acpi.h" #include "pci_emul.h" #include "vmgenc.h" /* - * Define the base address of the ACPI tables, the sizes of some tables, + * Define the base address of the ACPI tables, the sizes of some tables, * and the offsets to the individual tables, */ #define BHYVE_ACPI_BASE 0xf2400 #define RSDT_OFFSET 0x040 #define XSDT_OFFSET 0x080 #define MADT_OFFSET 0x100 /* * The MADT consists of: * 44 Fixed Header * 8 * maxcpu Processor Local APIC entries * 12 I/O APIC entry * 2 * 10 Interrupt Source Override entires * 6 Local APIC NMI entry */ #define MADT_SIZE (44 + VM_MAXCPU*8 + 12 + 2*10 + 6) #define FADT_OFFSET (MADT_OFFSET + MADT_SIZE) #define FADT_SIZE 0x140 #define HPET_OFFSET (FADT_OFFSET + FADT_SIZE) #define HPET_SIZE 0x40 #define MCFG_OFFSET (HPET_OFFSET + HPET_SIZE) #define MCFG_SIZE 0x40 #define FACS_OFFSET (MCFG_OFFSET + MCFG_SIZE) #define FACS_SIZE 0x40 #define DSDT_OFFSET (FACS_OFFSET + FACS_SIZE) #define BHYVE_ASL_TEMPLATE "bhyve.XXXXXXX" #define BHYVE_ASL_SUFFIX ".aml" #define BHYVE_ASL_COMPILER "/usr/sbin/iasl" static int basl_keep_temps; static int basl_verbose_iasl; static int basl_ncpu; static uint32_t basl_acpi_base = BHYVE_ACPI_BASE; static uint32_t hpet_capabilities; /* * Contains the full pathname of the template to be passed * to mkstemp/mktemps(3) */ static char basl_template[MAXPATHLEN]; static char basl_stemplate[MAXPATHLEN]; /* * State for dsdt_line(), dsdt_indent(), and dsdt_unindent(). */ static FILE *dsdt_fp; static int dsdt_indent_level; static int dsdt_error; struct basl_fio { int fd; FILE *fp; char f_name[MAXPATHLEN]; }; #define EFPRINTF(...) \ if (fprintf(__VA_ARGS__) < 0) goto err_exit; #define EFFLUSH(x) \ if (fflush(x) != 0) goto err_exit; static int basl_fwrite_rsdp(FILE *fp) { EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve RSDP template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0008]\t\tSignature : \"RSD PTR \"\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 43\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0001]\t\tRevision : 02\n"); EFPRINTF(fp, "[0004]\t\tRSDT Address : %08X\n", basl_acpi_base + RSDT_OFFSET); EFPRINTF(fp, "[0004]\t\tLength : 00000024\n"); EFPRINTF(fp, "[0008]\t\tXSDT Address : 00000000%08X\n", basl_acpi_base + XSDT_OFFSET); EFPRINTF(fp, "[0001]\t\tExtended Checksum : 00\n"); EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_rsdt(FILE *fp) { EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve RSDT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"RSDT\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVRSDT \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); /* Add in pointers to the MADT, FADT and HPET */ EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : %08X\n", basl_acpi_base + MADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : %08X\n", basl_acpi_base + FADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : %08X\n", basl_acpi_base + HPET_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : %08X\n", basl_acpi_base + MCFG_OFFSET); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_xsdt(FILE *fp) { EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve XSDT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"XSDT\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVXSDT \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); /* Add in pointers to the MADT, FADT and HPET */ EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : 00000000%08X\n", basl_acpi_base + MADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : 00000000%08X\n", basl_acpi_base + FADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : 00000000%08X\n", basl_acpi_base + HPET_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : 00000000%08X\n", basl_acpi_base + MCFG_OFFSET); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_madt(FILE *fp) { int i; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve MADT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"APIC\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMADT \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0004]\t\tLocal Apic Address : FEE00000\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\tPC-AT Compatibility : 1\n"); EFPRINTF(fp, "\n"); /* Add a Processor Local APIC entry for each CPU */ for (i = 0; i < basl_ncpu; i++) { EFPRINTF(fp, "[0001]\t\tSubtable Type : 00\n"); EFPRINTF(fp, "[0001]\t\tLength : 08\n"); /* iasl expects hex values for the proc and apic id's */ EFPRINTF(fp, "[0001]\t\tProcessor ID : %02x\n", i); EFPRINTF(fp, "[0001]\t\tLocal Apic ID : %02x\n", i); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\tProcessor Enabled : 1\n"); EFPRINTF(fp, "\t\t\tRuntime Online Capable : 0\n"); EFPRINTF(fp, "\n"); } /* Always a single IOAPIC entry, with ID 0 */ EFPRINTF(fp, "[0001]\t\tSubtable Type : 01\n"); EFPRINTF(fp, "[0001]\t\tLength : 0C\n"); /* iasl expects a hex value for the i/o apic id */ EFPRINTF(fp, "[0001]\t\tI/O Apic ID : %02x\n", 0); EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); EFPRINTF(fp, "[0004]\t\tAddress : fec00000\n"); EFPRINTF(fp, "[0004]\t\tInterrupt : 00000000\n"); EFPRINTF(fp, "\n"); /* Legacy IRQ0 is connected to pin 2 of the IOAPIC */ EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n"); EFPRINTF(fp, "[0001]\t\tLength : 0A\n"); EFPRINTF(fp, "[0001]\t\tBus : 00\n"); EFPRINTF(fp, "[0001]\t\tSource : 00\n"); EFPRINTF(fp, "[0004]\t\tInterrupt : 00000002\n"); EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); EFPRINTF(fp, "\t\t\tPolarity : 1\n"); EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n"); EFPRINTF(fp, "[0001]\t\tLength : 0A\n"); EFPRINTF(fp, "[0001]\t\tBus : 00\n"); EFPRINTF(fp, "[0001]\t\tSource : %02X\n", SCI_INT); EFPRINTF(fp, "[0004]\t\tInterrupt : %08X\n", SCI_INT); EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n"); EFPRINTF(fp, "\t\t\tPolarity : 3\n"); EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n"); EFPRINTF(fp, "\n"); /* Local APIC NMI is connected to LINT 1 on all CPUs */ EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n"); EFPRINTF(fp, "[0001]\t\tLength : 06\n"); EFPRINTF(fp, "[0001]\t\tProcessor ID : FF\n"); EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); EFPRINTF(fp, "\t\t\tPolarity : 1\n"); EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); EFPRINTF(fp, "[0001]\t\tInterrupt Input LINT : 01\n"); EFPRINTF(fp, "\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_fadt(FILE *fp) { EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve FADT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"FACP\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 0000010C\n"); EFPRINTF(fp, "[0001]\t\tRevision : 05\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVFACP \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0004]\t\tFACS Address : %08X\n", basl_acpi_base + FACS_OFFSET); EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n", basl_acpi_base + DSDT_OFFSET); EFPRINTF(fp, "[0001]\t\tModel : 01\n"); EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n"); EFPRINTF(fp, "[0002]\t\tSCI Interrupt : %04X\n", SCI_INT); EFPRINTF(fp, "[0004]\t\tSMI Command Port : %08X\n", SMI_CMD); EFPRINTF(fp, "[0001]\t\tACPI Enable Value : %02X\n", BHYVE_ACPI_ENABLE); EFPRINTF(fp, "[0001]\t\tACPI Disable Value : %02X\n", BHYVE_ACPI_DISABLE); EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n"); EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n"); EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : %08X\n", PM1A_EVT_ADDR); EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : %08X\n", PM1A_CNT_ADDR); EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n", IO_PMTMR); EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : %08X\n", IO_GPE0_BLK); EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n"); EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n"); EFPRINTF(fp, "[0001]\t\tPM1 Control Block Length : 02\n"); EFPRINTF(fp, "[0001]\t\tPM2 Control Block Length : 00\n"); EFPRINTF(fp, "[0001]\t\tPM Timer Block Length : 04\n"); EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : %02x\n", IO_GPE0_LEN); EFPRINTF(fp, "[0001]\t\tGPE1 Block Length : 00\n"); EFPRINTF(fp, "[0001]\t\tGPE1 Base Offset : 00\n"); EFPRINTF(fp, "[0001]\t\t_CST Support : 00\n"); EFPRINTF(fp, "[0002]\t\tC2 Latency : 0000\n"); EFPRINTF(fp, "[0002]\t\tC3 Latency : 0000\n"); EFPRINTF(fp, "[0002]\t\tCPU Cache Size : 0000\n"); EFPRINTF(fp, "[0002]\t\tCache Flush Stride : 0000\n"); EFPRINTF(fp, "[0001]\t\tDuty Cycle Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n"); EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n"); EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n"); EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n"); EFPRINTF(fp, "\t\t\tVGA Not Present (V4) : 1\n"); EFPRINTF(fp, "\t\t\tMSI Not Supported (V4) : 0\n"); EFPRINTF(fp, "\t\t\tPCIe ASPM Not Supported (V4) : 1\n"); EFPRINTF(fp, "\t\t\tCMOS RTC Not Present (V5) : 0\n"); EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); EFPRINTF(fp, "\t\t\tWBINVD instruction is operational (V1) : 1\n"); EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n"); EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 1\n"); EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n"); EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 0\n"); EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n"); EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n"); EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n"); EFPRINTF(fp, "\t\t\t32-bit PM Timer (V1) : 1\n"); EFPRINTF(fp, "\t\t\tDocking Supported (V1) : 0\n"); EFPRINTF(fp, "\t\t\tReset Register Supported (V2) : 1\n"); EFPRINTF(fp, "\t\t\tSealed Case (V3) : 0\n"); EFPRINTF(fp, "\t\t\tHeadless - No Video (V3) : 1\n"); EFPRINTF(fp, "\t\t\tUse native instr after SLP_TYPx (V3) : 0\n"); EFPRINTF(fp, "\t\t\tPCIEXP_WAK Bits Supported (V4) : 0\n"); EFPRINTF(fp, "\t\t\tUse Platform Timer (V4) : 0\n"); EFPRINTF(fp, "\t\t\tRTC_STS valid on S4 wake (V4) : 0\n"); EFPRINTF(fp, "\t\t\tRemote Power-on capable (V4) : 0\n"); EFPRINTF(fp, "\t\t\tUse APIC Cluster Model (V4) : 0\n"); EFPRINTF(fp, "\t\t\tUse APIC Physical Destination Mode (V4) : 1\n"); EFPRINTF(fp, "\t\t\tHardware Reduced (V5) : 0\n"); EFPRINTF(fp, "\t\t\tLow Power S0 Idle (V5) : 0\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tReset Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000CF9\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0001]\t\tValue to cause reset : 06\n"); EFPRINTF(fp, "[0002]\t\tARM Flags (decoded below): 0000\n"); EFPRINTF(fp, "\t\t\tPSCI Compliant : 0\n"); EFPRINTF(fp, "\t\t\tMust use HVC for PSCI : 0\n"); EFPRINTF(fp, "[0001]\t\tFADT Minor Revision : 01\n"); EFPRINTF(fp, "[0008]\t\tFACS Address : 00000000%08X\n", basl_acpi_base + FACS_OFFSET); EFPRINTF(fp, "[0008]\t\tDSDT Address : 00000000%08X\n", basl_acpi_base + DSDT_OFFSET); EFPRINTF(fp, "[0012]\t\tPM1A Event Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 20\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", PM1A_EVT_ADDR); EFPRINTF(fp, "\n"); - + EFPRINTF(fp, "[0012]\t\tPM1B Event Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tPM1A Control Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 10\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", PM1A_CNT_ADDR); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tPM1B Control Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tPM2 Control Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); /* Valid for bhyve */ EFPRINTF(fp, "[0012]\t\tPM Timer Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 20\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", IO_PMTMR); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : %02x\n", IO_GPE0_LEN * 8); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : %016X\n", IO_GPE0_BLK); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tGPE1 Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tSleep Control Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tSleep Status Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_hpet(FILE *fp) { EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve HPET template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"HPET\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVHPET \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0004]\t\tHardware Block ID : %08X\n", hpet_capabilities); EFPRINTF(fp, "[0012]\t\tTimer Block Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 00 [SystemMemory]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 00000000FED00000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0001]\t\tSequence Number : 00\n"); EFPRINTF(fp, "[0002]\t\tMinimum Clock Ticks : 0000\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\t4K Page Protect : 1\n"); EFPRINTF(fp, "\t\t\t64K Page Protect : 0\n"); EFPRINTF(fp, "\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_mcfg(FILE *fp) { EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve MCFG template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"MCFG\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMCFG \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "[0008]\t\tReserved : 0\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0008]\t\tBase Address : %016lX\n", pci_ecfg_base()); EFPRINTF(fp, "[0002]\t\tSegment Group Number : 0000\n"); EFPRINTF(fp, "[0001]\t\tStart Bus Number : 00\n"); EFPRINTF(fp, "[0001]\t\tEnd Bus Number : FF\n"); EFPRINTF(fp, "[0004]\t\tReserved : 0\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_facs(FILE *fp) { EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve FACS template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"FACS\"\n"); EFPRINTF(fp, "[0004]\t\tLength : 00000040\n"); EFPRINTF(fp, "[0004]\t\tHardware Signature : 00000000\n"); EFPRINTF(fp, "[0004]\t\t32 Firmware Waking Vector : 00000000\n"); EFPRINTF(fp, "[0004]\t\tGlobal Lock : 00000000\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); EFPRINTF(fp, "\t\t\tS4BIOS Support Present : 0\n"); EFPRINTF(fp, "\t\t\t64-bit Wake Supported (V2) : 0\n"); EFPRINTF(fp, "[0008]\t\t64 Firmware Waking Vector : 0000000000000000\n"); EFPRINTF(fp, "[0001]\t\tVersion : 02\n"); EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); EFPRINTF(fp, "[0004]\t\tOspmFlags (decoded below) : 00000000\n"); EFPRINTF(fp, "\t\t\t64-bit Wake Env Required (V2) : 0\n"); EFFLUSH(fp); return (0); - + err_exit: return (errno); } /* * Helper routines for writing to the DSDT from other modules. */ void dsdt_line(const char *fmt, ...) { va_list ap; if (dsdt_error != 0) return; if (strcmp(fmt, "") != 0) { if (dsdt_indent_level != 0) EFPRINTF(dsdt_fp, "%*c", dsdt_indent_level * 2, ' '); va_start(ap, fmt); if (vfprintf(dsdt_fp, fmt, ap) < 0) { va_end(ap); goto err_exit; } va_end(ap); } EFPRINTF(dsdt_fp, "\n"); return; err_exit: dsdt_error = errno; } void dsdt_indent(int levels) { dsdt_indent_level += levels; assert(dsdt_indent_level >= 0); } void dsdt_unindent(int levels) { assert(dsdt_indent_level >= levels); dsdt_indent_level -= levels; } void dsdt_fixed_ioport(uint16_t iobase, uint16_t length) { dsdt_line("IO (Decode16,"); dsdt_line(" 0x%04X, // Range Minimum", iobase); dsdt_line(" 0x%04X, // Range Maximum", iobase); dsdt_line(" 0x01, // Alignment"); dsdt_line(" 0x%02X, // Length", length); dsdt_line(" )"); } void dsdt_fixed_irq(uint8_t irq) { dsdt_line("IRQNoFlags ()"); dsdt_line(" {%d}", irq); } void dsdt_fixed_mem32(uint32_t base, uint32_t length) { dsdt_line("Memory32Fixed (ReadWrite,"); dsdt_line(" 0x%08X, // Address Base", base); dsdt_line(" 0x%08X, // Address Length", length); dsdt_line(" )"); } static int basl_fwrite_dsdt(FILE *fp) { dsdt_fp = fp; dsdt_error = 0; dsdt_indent_level = 0; dsdt_line("/*"); dsdt_line(" * bhyve DSDT template"); dsdt_line(" */"); dsdt_line("DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2," "\"BHYVE \", \"BVDSDT \", 0x00000001)"); dsdt_line("{"); dsdt_line(" Name (_S5, Package ()"); dsdt_line(" {"); dsdt_line(" 0x05,"); dsdt_line(" Zero,"); dsdt_line(" })"); pci_write_dsdt(); dsdt_line(""); dsdt_line(" Scope (_SB.PC00)"); dsdt_line(" {"); dsdt_line(" Device (HPET)"); dsdt_line(" {"); dsdt_line(" Name (_HID, EISAID(\"PNP0103\"))"); dsdt_line(" Name (_UID, 0)"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(4); dsdt_fixed_mem32(0xFED00000, 0x400); dsdt_unindent(4); dsdt_line(" })"); dsdt_line(" }"); dsdt_line(" }"); vmgenc_write_dsdt(); dsdt_line("}"); if (dsdt_error != 0) return (dsdt_error); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_open(struct basl_fio *bf, int suffix) { int err; err = 0; if (suffix) { strlcpy(bf->f_name, basl_stemplate, MAXPATHLEN); bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX)); } else { strlcpy(bf->f_name, basl_template, MAXPATHLEN); bf->fd = mkstemp(bf->f_name); } if (bf->fd > 0) { bf->fp = fdopen(bf->fd, "w+"); if (bf->fp == NULL) { unlink(bf->f_name); close(bf->fd); } } else { err = 1; } return (err); } static void basl_close(struct basl_fio *bf) { if (!basl_keep_temps) unlink(bf->f_name); fclose(bf->fp); } static int basl_start(struct basl_fio *in, struct basl_fio *out) { int err; err = basl_open(in, 0); if (!err) { err = basl_open(out, 1); if (err) { basl_close(in); } } return (err); } static void basl_end(struct basl_fio *in, struct basl_fio *out) { basl_close(in); basl_close(out); } static int basl_load(struct vmctx *ctx, int fd, uint64_t off) { struct stat sb; void *gaddr; if (fstat(fd, &sb) < 0) return (errno); - + gaddr = paddr_guest2host(ctx, basl_acpi_base + off, sb.st_size); if (gaddr == NULL) return (EFAULT); if (read(fd, gaddr, sb.st_size) < 0) return (errno); return (0); } static int basl_compile(struct vmctx *ctx, int (*fwrite_section)(FILE *), uint64_t offset) { struct basl_fio io[2]; static char iaslbuf[3*MAXPATHLEN + 10]; char *fmt; int err; err = basl_start(&io[0], &io[1]); if (!err) { err = (*fwrite_section)(io[0].fp); if (!err) { /* * iasl sends the results of the compilation to * stdout. Shut this down by using the shell to * redirect stdout to /dev/null, unless the user * has requested verbose output for debugging * purposes */ fmt = basl_verbose_iasl ? "%s -p %s %s" : "/bin/sh -c \"%s -p %s %s\" 1> /dev/null"; - + snprintf(iaslbuf, sizeof(iaslbuf), fmt, BHYVE_ASL_COMPILER, io[1].f_name, io[0].f_name); err = system(iaslbuf); if (!err) { /* * Copy the aml output file into guest * memory at the specified location */ err = basl_load(ctx, io[1].fd, offset); } } basl_end(&io[0], &io[1]); } return (err); } static int basl_make_templates(void) { const char *tmpdir; int err; int len; err = 0; /* - * + * */ if ((tmpdir = getenv("BHYVE_TMPDIR")) == NULL || *tmpdir == '\0' || (tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') { tmpdir = _PATH_TMP; } len = strlen(tmpdir); if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1) < MAXPATHLEN) { strcpy(basl_template, tmpdir); while (len > 0 && basl_template[len - 1] == '/') len--; basl_template[len] = '/'; strcpy(&basl_template[len + 1], BHYVE_ASL_TEMPLATE); } else err = E2BIG; if (!err) { /* * len has been intialized (and maybe adjusted) above */ if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1 + sizeof(BHYVE_ASL_SUFFIX)) < MAXPATHLEN) { strcpy(basl_stemplate, tmpdir); basl_stemplate[len] = '/'; strcpy(&basl_stemplate[len + 1], BHYVE_ASL_TEMPLATE); len = strlen(basl_stemplate); strcpy(&basl_stemplate[len], BHYVE_ASL_SUFFIX); } else err = E2BIG; } return (err); } static struct { int (*wsect)(FILE *fp); uint64_t offset; } basl_ftables[] = { { basl_fwrite_rsdp, 0}, { basl_fwrite_rsdt, RSDT_OFFSET }, { basl_fwrite_xsdt, XSDT_OFFSET }, { basl_fwrite_madt, MADT_OFFSET }, { basl_fwrite_fadt, FADT_OFFSET }, { basl_fwrite_hpet, HPET_OFFSET }, { basl_fwrite_mcfg, MCFG_OFFSET }, { basl_fwrite_facs, FACS_OFFSET }, { basl_fwrite_dsdt, DSDT_OFFSET }, { NULL } }; int acpi_build(struct vmctx *ctx, int ncpu) { int err; int i; basl_ncpu = ncpu; err = vm_get_hpet_capabilities(ctx, &hpet_capabilities); if (err != 0) return (err); /* * For debug, allow the user to have iasl compiler output sent * to stdout rather than /dev/null */ if (getenv("BHYVE_ACPI_VERBOSE_IASL")) basl_verbose_iasl = 1; /* * Allow the user to keep the generated ASL files for debugging * instead of deleting them following use */ if (getenv("BHYVE_ACPI_KEEPTMPS")) basl_keep_temps = 1; i = 0; err = basl_make_templates(); /* * Run through all the ASL files, compiling them and * copying them into guest memory */ while (!err && basl_ftables[i].wsect != NULL) { err = basl_compile(ctx, basl_ftables[i].wsect, basl_ftables[i].offset); i++; } return (err); } diff --git a/usr.sbin/bhyve/audio.h b/usr.sbin/bhyve/audio.h index 2b559a43e5df..88f4dc8709c4 100644 --- a/usr.sbin/bhyve/audio.h +++ b/usr.sbin/bhyve/audio.h @@ -1,88 +1,88 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Alex Teaca * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ -#ifndef _AUDIO_EMUL_H_ +#ifndef _AUDIO_EMUL_H_ #define _AUDIO_EMUL_H_ #include #include /* * Audio Player data structures */ struct audio; struct audio_params { int channels; int format; int rate; }; /* * Audio Player API */ /* * audio_init - initialize an instance of audio player * @dev_name - the backend sound device used to play / capture * @dir - dir = 1 for write mode, dir = 0 for read mode * Returns NULL on error and the address of the audio player instance */ struct audio *audio_init(const char *dev_name, uint8_t dir); /* * audio_set_params - reset the sound device and set the audio params * @aud - the audio player to be configured * @params - the audio parameters to be set * Returns -1 on error and 0 on success */ int audio_set_params(struct audio *aud, struct audio_params *params); /* * audio_playback - plays samples to the sound device using blocking operations * @aud - the audio player used to play the samples * @buf - the buffer containing the samples * @count - the number of bytes in buffer * Returns -1 on error and 0 on success */ int audio_playback(struct audio *aud, const void *buf, size_t count); /* * audio_record - records samples from the sound device using blocking * operations. * @aud - the audio player used to capture the samples * @buf - the buffer to receive the samples * @count - the number of bytes to capture in buffer * Returns -1 on error and 0 on success */ int audio_record(struct audio *aud, void *buf, size_t count); #endif /* _AUDIO_EMUL_H_ */ diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index c38a4ee84056..9d2b9704ecec 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -1,1600 +1,1600 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #ifdef BHYVE_SNAPSHOT #include #include #endif #include #ifdef BHYVE_SNAPSHOT #include #endif #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #ifdef BHYVE_SNAPSHOT #include #endif #include #include #include #include #include #include #include #include #ifdef BHYVE_SNAPSHOT #include #include #include #endif #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include "bhyverun.h" #include "acpi.h" #include "atkbdc.h" #include "bootrom.h" #include "config.h" #include "inout.h" #include "debug.h" #include "fwctl.h" #include "gdb.h" #include "ioapic.h" #include "kernemu_dev.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "smbiostbl.h" #ifdef BHYVE_SNAPSHOT #include "snapshot.h" #endif #include "xmsr.h" #include "spinup_ap.h" #include "rtc.h" #include "vmgenc.h" #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ #define MB (1024UL * 1024) #define GB (1024UL * MB) static const char * const vmx_exit_reason_desc[] = { [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", [EXIT_REASON_EXT_INTR] = "External interrupt", [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", [EXIT_REASON_INIT] = "INIT signal", [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", [EXIT_REASON_SMI] = "Other SMI", [EXIT_REASON_INTR_WINDOW] = "Interrupt window", [EXIT_REASON_NMI_WINDOW] = "NMI window", [EXIT_REASON_TASK_SWITCH] = "Task switch", [EXIT_REASON_CPUID] = "CPUID", [EXIT_REASON_GETSEC] = "GETSEC", [EXIT_REASON_HLT] = "HLT", [EXIT_REASON_INVD] = "INVD", [EXIT_REASON_INVLPG] = "INVLPG", [EXIT_REASON_RDPMC] = "RDPMC", [EXIT_REASON_RDTSC] = "RDTSC", [EXIT_REASON_RSM] = "RSM", [EXIT_REASON_VMCALL] = "VMCALL", [EXIT_REASON_VMCLEAR] = "VMCLEAR", [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", [EXIT_REASON_VMPTRLD] = "VMPTRLD", [EXIT_REASON_VMPTRST] = "VMPTRST", [EXIT_REASON_VMREAD] = "VMREAD", [EXIT_REASON_VMRESUME] = "VMRESUME", [EXIT_REASON_VMWRITE] = "VMWRITE", [EXIT_REASON_VMXOFF] = "VMXOFF", [EXIT_REASON_VMXON] = "VMXON", [EXIT_REASON_CR_ACCESS] = "Control-register accesses", [EXIT_REASON_DR_ACCESS] = "MOV DR", [EXIT_REASON_INOUT] = "I/O instruction", [EXIT_REASON_RDMSR] = "RDMSR", [EXIT_REASON_WRMSR] = "WRMSR", [EXIT_REASON_INVAL_VMCS] = "VM-entry failure due to invalid guest state", [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", [EXIT_REASON_MWAIT] = "MWAIT", [EXIT_REASON_MTF] = "Monitor trap flag", [EXIT_REASON_MONITOR] = "MONITOR", [EXIT_REASON_PAUSE] = "PAUSE", [EXIT_REASON_MCE_DURING_ENTRY] = "VM-entry failure due to machine-check event", [EXIT_REASON_TPR] = "TPR below threshold", [EXIT_REASON_APIC_ACCESS] = "APIC access", [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", [EXIT_REASON_EPT_FAULT] = "EPT violation", [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", [EXIT_REASON_INVEPT] = "INVEPT", [EXIT_REASON_RDTSCP] = "RDTSCP", [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", [EXIT_REASON_INVVPID] = "INVVPID", [EXIT_REASON_WBINVD] = "WBINVD", [EXIT_REASON_XSETBV] = "XSETBV", [EXIT_REASON_APIC_WRITE] = "APIC write", [EXIT_REASON_RDRAND] = "RDRAND", [EXIT_REASON_INVPCID] = "INVPCID", [EXIT_REASON_VMFUNC] = "VMFUNC", [EXIT_REASON_ENCLS] = "ENCLS", [EXIT_REASON_RDSEED] = "RDSEED", [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", [EXIT_REASON_XSAVES] = "XSAVES", [EXIT_REASON_XRSTORS] = "XRSTORS" }; typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); int guest_ncpus; uint16_t cores, maxcpus, sockets, threads; int raw_stdio = 0; static char *progname; static const int BSP = 0; static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; uint64_t vmexit_reqidle; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; uint64_t vmexit_inst_emul; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; } stats; struct mt_vmm_info { pthread_t mt_thr; struct vmctx *mt_ctx; - int mt_vcpu; + int mt_vcpu; } mt_vmm_info[VM_MAXCPU]; static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; static void usage(int code) { fprintf(stderr, "Usage: %s [-AaCDeHhPSuWwxY]\n" " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" " %*s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n" " %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n" " -A: create ACPI tables\n" " -a: local apic is in xAPIC mode (deprecated)\n" " -C: include guest memory in core file\n" " -c: number of CPUs and/or topology specification\n" " -D: destroy on power-off\n" " -e: exit on unhandled I/O access\n" " -G: start a debug server\n" " -H: vmexit from the guest on HLT\n" " -h: help\n" " -k: key=value flat config file\n" " -l: LPC device configuration\n" " -m: memory size in MB\n" " -o: set config 'var' to 'value'\n" " -P: vmexit from the guest on pause\n" " -p: pin 'vcpu' to 'hostcpu'\n" #ifdef BHYVE_SNAPSHOT " -r: path to checkpoint file\n" #endif " -S: guest memory cannot be swapped\n" " -s: PCI slot config\n" " -U: UUID\n" " -u: RTC keeps UTC time\n" " -W: force virtio to use single-vector MSI\n" " -w: ignore unimplemented MSRs\n" " -x: local APIC is in x2APIC mode\n" " -Y: disable MPtable generation\n", progname, (int)strlen(progname), "", (int)strlen(progname), "", (int)strlen(progname), ""); exit(code); } /* * XXX This parser is known to have the following issues: * 1. It accepts null key=value tokens ",," as setting "cpus" to an * empty string. * * The acceptance of a null specification ('-c ""') is by design to match the * manual page syntax specification, this results in a topology of 1 vCPU. */ static int topology_parse(const char *opt) { char *cp, *str; if (*opt == '\0') { set_config_value("sockets", "1"); set_config_value("cores", "1"); set_config_value("threads", "1"); set_config_value("cpus", "1"); return (0); } str = strdup(opt); if (str == NULL) errx(4, "Failed to allocate memory"); while ((cp = strsep(&str, ",")) != NULL) { if (strncmp(cp, "cpus=", strlen("cpus=")) == 0) set_config_value("cpus", cp + strlen("cpus=")); else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0) set_config_value("sockets", cp + strlen("sockets=")); else if (strncmp(cp, "cores=", strlen("cores=")) == 0) set_config_value("cores", cp + strlen("cores=")); else if (strncmp(cp, "threads=", strlen("threads=")) == 0) set_config_value("threads", cp + strlen("threads=")); #ifdef notyet /* Do not expose this until vmm.ko implements it */ else if (strncmp(cp, "maxcpus=", strlen("maxcpus=")) == 0) set_config_value("maxcpus", cp + strlen("maxcpus=")); #endif else if (strchr(cp, '=') != NULL) goto out; else set_config_value("cpus", cp); } free(str); return (0); out: free(str); return (-1); } static int parse_int_value(const char *key, const char *value, int minval, int maxval) { char *cp; long lval; errno = 0; lval = strtol(value, &cp, 0); if (errno != 0 || *cp != '\0' || cp == value || lval < minval || lval > maxval) errx(4, "Invalid value for %s: '%s'", key, value); return (lval); } /* * Set the sockets, cores, threads, and guest_cpus variables based on * the configured topology. * * The limits of UINT16_MAX are due to the types passed to * vm_set_topology(). vmm.ko may enforce tighter limits. */ static void calc_topolopgy(void) { const char *value; bool explicit_cpus; uint64_t ncpus; value = get_config_value("cpus"); if (value != NULL) { guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX); explicit_cpus = true; } else { guest_ncpus = 1; explicit_cpus = false; } value = get_config_value("cores"); if (value != NULL) cores = parse_int_value("cores", value, 1, UINT16_MAX); else cores = 1; value = get_config_value("threads"); if (value != NULL) threads = parse_int_value("threads", value, 1, UINT16_MAX); else threads = 1; value = get_config_value("sockets"); if (value != NULL) sockets = parse_int_value("sockets", value, 1, UINT16_MAX); else sockets = guest_ncpus; /* * Compute sockets * cores * threads avoiding overflow. The * range check above insures these are 16 bit values. */ ncpus = (uint64_t)sockets * cores * threads; if (ncpus > UINT16_MAX) errx(4, "Computed number of vCPUs too high: %ju", (uintmax_t)ncpus); if (explicit_cpus) { if (guest_ncpus != ncpus) errx(4, "Topology (%d sockets, %d cores, %d threads) " "does not match %d vCPUs", sockets, cores, threads, guest_ncpus); } else guest_ncpus = ncpus; } static int pincpu_parse(const char *opt) { const char *value; char *newval; char key[16]; int vcpu, pcpu; if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { fprintf(stderr, "invalid format: %s\n", opt); return (-1); } if (vcpu < 0 || vcpu >= VM_MAXCPU) { fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", vcpu, VM_MAXCPU - 1); return (-1); } if (pcpu < 0 || pcpu >= CPU_SETSIZE) { fprintf(stderr, "hostcpu '%d' outside valid range from " "0 to %d\n", pcpu, CPU_SETSIZE - 1); return (-1); } snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); value = get_config_value(key); if (asprintf(&newval, "%s%s%d", value != NULL ? value : "", value != NULL ? "," : "", pcpu) == -1) { perror("failed to build new cpuset string"); return (-1); } set_config_value(key, newval); free(newval); return (0); } static void parse_cpuset(int vcpu, const char *list, cpuset_t *set) { char *cp, *token; int pcpu, start; CPU_ZERO(set); start = -1; token = __DECONST(char *, list); for (;;) { pcpu = strtoul(token, &cp, 0); if (cp == token) errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); if (pcpu < 0 || pcpu >= CPU_SETSIZE) errx(4, "hostcpu '%d' outside valid range from 0 to %d", pcpu, CPU_SETSIZE - 1); switch (*cp) { case ',': case '\0': if (start >= 0) { if (start > pcpu) errx(4, "Invalid hostcpu range %d-%d", start, pcpu); while (start < pcpu) { CPU_SET(start, vcpumap[vcpu]); start++; } start = -1; } CPU_SET(pcpu, vcpumap[vcpu]); break; case '-': if (start >= 0) errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); start = pcpu; break; default: errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); } if (*cp == '\0') break; token = cp + 1; } } static void build_vcpumaps(void) { char key[16]; const char *value; int vcpu; for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); value = get_config_value(key); if (value == NULL) continue; vcpumap[vcpu] = malloc(sizeof(cpuset_t)); if (vcpumap[vcpu] == NULL) err(4, "Failed to allocate cpuset for vcpu %d", vcpu); parse_cpuset(vcpu, value, vcpumap[vcpu]); } } void vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, int errcode) { struct vmctx *ctx; int error, restart_instruction; ctx = arg; restart_instruction = 1; error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, restart_instruction); assert(error == 0); } void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { return (vm_map_gpa(ctx, gaddr, len)); } #ifdef BHYVE_SNAPSHOT uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr) { return (vm_rev_map_gpa(ctx, addr)); } #endif int fbsdrun_virtio_msix(void) { return (get_config_bool_default("virtio_msix", true)); } static void * fbsdrun_start_thread(void *param) { char tname[MAXCOMLEN + 1]; struct mt_vmm_info *mtp; int vcpu; mtp = param; vcpu = mtp->mt_vcpu; snprintf(tname, sizeof(tname), "vcpu %d", vcpu); pthread_set_name_np(mtp->mt_thr, tname); #ifdef BHYVE_SNAPSHOT checkpoint_cpu_add(vcpu); #endif gdb_cpu_add(vcpu); vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ exit(1); return (NULL); } void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) { int error; assert(fromcpu == BSP); /* * The 'newcpu' must be activated in the context of 'fromcpu'. If * vm_activate_cpu() is delayed until newcpu's pthread starts running * then vmm.ko is out-of-sync with bhyve and this can create a race * with vm_suspend(). */ error = vm_activate_cpu(ctx, newcpu); if (error != 0) err(EX_OSERR, "could not activate CPU %d", newcpu); CPU_SET_ATOMIC(newcpu, &cpumask); /* * Set up the vmexit struct to allow execution to start * at the given RIP */ vmexit[newcpu].rip = rip; vmexit[newcpu].inst_length = 0; mt_vmm_info[newcpu].mt_ctx = ctx; mt_vmm_info[newcpu].mt_vcpu = newcpu; error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, fbsdrun_start_thread, &mt_vmm_info[newcpu]); assert(error == 0); } static int fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) { if (!CPU_ISSET(vcpu, &cpumask)) { fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); exit(4); } CPU_CLR_ATOMIC(vcpu, &cpumask); return (CPU_EMPTY(&cpumask)); } static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) { #if BHYVE_DEBUG /* * put guest-driven debug here */ #endif return (VMEXIT_CONTINUE); } static int vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; int bytes, port, in, out; int vcpu; vcpu = *pvcpu; port = vme->u.inout.port; bytes = vme->u.inout.bytes; in = vme->u.inout.in; out = !in; /* Extra-special case of host notifications */ if (out && port == GUEST_NIO_PORT) { error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); return (error); } error = emulate_inout(ctx, vcpu, vme); if (error) { fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port, vmexit->rip); return (VMEXIT_ABORT); } else { return (VMEXIT_CONTINUE); } } static int vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { uint64_t val; uint32_t eax, edx; int error; val = 0; error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); if (error != 0) { fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (get_config_bool("x86.strictmsr")) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } eax = val; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); assert(error == 0); edx = val >> 32; error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); assert(error == 0); return (VMEXIT_CONTINUE); } static int vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); if (error != 0) { fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (get_config_bool("x86.strictmsr")) { vm_inject_gp(ctx, *pvcpu); return (VMEXIT_CONTINUE); } } return (VMEXIT_CONTINUE); } static int vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { (void)spinup_ap(ctx, *pvcpu, vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); return (VMEXIT_CONTINUE); } #define DEBUG_EPT_MISCONFIG #ifdef DEBUG_EPT_MISCONFIG #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; static int ept_misconfig_ptenum; #endif static const char * vmexit_vmx_desc(uint32_t exit_reason) { if (exit_reason >= nitems(vmx_exit_reason_desc) || vmx_exit_reason_desc[exit_reason] == NULL) return ("Unknown"); return (vmx_exit_reason_desc[exit_reason]); } static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tVMX\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason, vmexit_vmx_desc(vmexit->u.vmx.exit_reason)); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); #ifdef DEBUG_EPT_MISCONFIG if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { vm_get_register(ctx, *pvcpu, VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), &ept_misconfig_gpa); vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, &ept_misconfig_ptenum); fprintf(stderr, "\tEPT misconfiguration:\n"); fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", ept_misconfig_ptenum, ept_misconfig_pte[0], ept_misconfig_pte[1], ept_misconfig_pte[2], ept_misconfig_pte[3]); } #endif /* DEBUG_EPT_MISCONFIG */ return (VMEXIT_ABORT); } static int vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tSVM\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); return (VMEXIT_ABORT); } static int vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_bogus++; return (VMEXIT_CONTINUE); } static int vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_reqidle++; return (VMEXIT_CONTINUE); } static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_hlt++; /* * Just continue execution with the next instruction. We use * the HLT VM exit as a way to be friendly with the host * scheduler. */ return (VMEXIT_CONTINUE); } static int vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_pause++; return (VMEXIT_CONTINUE); } static int vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { assert(vmexit->inst_length == 0); stats.vmexit_mtrap++; #ifdef BHYVE_SNAPSHOT checkpoint_cpu_suspend(*pvcpu); #endif gdb_cpu_mtrap(*pvcpu); #ifdef BHYVE_SNAPSHOT checkpoint_cpu_resume(*pvcpu); #endif return (VMEXIT_CONTINUE); } static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { int err, i, cs_d; struct vie *vie; enum vm_cpu_mode mode; stats.vmexit_inst_emul++; vie = &vmexit->u.inst_emul.vie; if (!vie->decoded) { /* * Attempt to decode in userspace as a fallback. This allows * updating instruction decode in bhyve without rebooting the * kernel (rapid prototyping), albeit with much slower * emulation. */ vie_restart(vie); mode = vmexit->u.inst_emul.paging.cpu_mode; cs_d = vmexit->u.inst_emul.cs_d; if (vmm_decode_instruction(mode, cs_d, vie) != 0) goto fail; if (vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RIP, vmexit->rip + vie->num_processed) != 0) goto fail; } err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, vie, &vmexit->u.inst_emul.paging); if (err) { if (err == ESRCH) { EPRINTLN("Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } goto fail; } return (VMEXIT_CONTINUE); fail: fprintf(stderr, "Failed to emulate instruction sequence [ "); for (i = 0; i < vie->num_valid; i++) fprintf(stderr, "%02x", vie->inst[i]); FPRINTLN(stderr, " ] at 0x%lx", vmexit->rip); return (VMEXIT_ABORT); } static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; static int vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { enum vm_suspend_how how; how = vmexit->u.suspended.how; fbsdrun_deletecpu(ctx, *pvcpu); if (*pvcpu != BSP) { pthread_mutex_lock(&resetcpu_mtx); pthread_cond_signal(&resetcpu_cond); pthread_mutex_unlock(&resetcpu_mtx); pthread_exit(NULL); } pthread_mutex_lock(&resetcpu_mtx); while (!CPU_EMPTY(&cpumask)) { pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); } pthread_mutex_unlock(&resetcpu_mtx); switch (how) { case VM_SUSPEND_RESET: exit(0); case VM_SUSPEND_POWEROFF: if (get_config_bool_default("destroy_on_poweroff", false)) vm_destroy(ctx); exit(1); case VM_SUSPEND_HALT: exit(2); case VM_SUSPEND_TRIPLEFAULT: exit(3); default: fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); exit(100); } return (0); /* NOTREACHED */ } static int vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { #ifdef BHYVE_SNAPSHOT checkpoint_cpu_suspend(*pvcpu); #endif gdb_cpu_suspend(*pvcpu); #ifdef BHYVE_SNAPSHOT checkpoint_cpu_resume(*pvcpu); #endif return (VMEXIT_CONTINUE); } static int vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { gdb_cpu_breakpoint(*pvcpu, vmexit); return (VMEXIT_CONTINUE); } static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, [VM_EXITCODE_REQIDLE] = vmexit_reqidle, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, [VM_EXITCODE_SUSPENDED] = vmexit_suspend, [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, [VM_EXITCODE_DEBUG] = vmexit_debug, [VM_EXITCODE_BPT] = vmexit_breakpoint, }; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) { int error, rc; enum vm_exitcode exitcode; cpuset_t active_cpus; if (vcpumap[vcpu] != NULL) { error = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t), vcpumap[vcpu]); assert(error == 0); } error = vm_active_cpus(ctx, &active_cpus); assert(CPU_ISSET(vcpu, &active_cpus)); error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); assert(error == 0); while (1) { error = vm_run(ctx, vcpu, &vmexit[vcpu]); if (error != 0) break; exitcode = vmexit[vcpu].exitcode; if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", exitcode); exit(4); } rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); switch (rc) { case VMEXIT_CONTINUE: break; case VMEXIT_ABORT: abort(); default: exit(4); } } fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); } static int num_vcpus_allowed(struct vmctx *ctx) { int tmp, error; error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); /* * The guest is allowed to spinup more than one processor only if the * UNRESTRICTED_GUEST capability is available. */ if (error == 0) return (VM_MAXCPU); else return (1); } void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) { int err, tmp; if (get_config_bool_default("x86.vmexit_on_hlt", false)) { err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { fprintf(stderr, "VM exit on HLT not supported\n"); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_HLT] = vmexit_hlt; } if (get_config_bool_default("x86.vmexit_on_pause", false)) { /* * pause exit support required for this mode */ err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { fprintf(stderr, "SMP mux requested, no pause support\n"); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_PAUSE] = vmexit_pause; } if (get_config_bool_default("x86.x2apic", false)) err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); else err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); if (err) { fprintf(stderr, "Unable to set x2apic state (%d)\n", err); exit(4); } vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } static struct vmctx * do_open(const char *vmname) { struct vmctx *ctx; int error; bool reinit, romboot; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; - const cap_ioctl_t *cmds; + const cap_ioctl_t *cmds; size_t ncmds; #endif reinit = romboot = false; if (lpc_bootrom()) romboot = true; error = vm_create(vmname); if (error) { if (errno == EEXIST) { if (romboot) { reinit = true; } else { /* * The virtual machine has been setup by the * userspace bootloader. */ } } else { perror("vm_create"); exit(4); } } else { if (!romboot) { /* * If the virtual machine was just created then a * bootrom must be configured to boot it. */ fprintf(stderr, "virtual machine cannot be booted\n"); exit(4); } } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(4); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); - if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) + if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); vm_get_ioctls(&ncmds); cmds = vm_get_ioctls(NULL); if (cmds == NULL) errx(EX_OSERR, "out of memory"); if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); free((cap_ioctl_t *)cmds); #endif - + if (reinit) { error = vm_reinit(ctx); if (error) { perror("vm_reinit"); exit(4); } } error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); if (error) errx(EX_OSERR, "vm_set_topology"); return (ctx); } void spinup_vcpu(struct vmctx *ctx, int vcpu) { int error; uint64_t rip; error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); assert(error == 0); fbsdrun_set_capabilities(ctx, vcpu); error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); assert(error == 0); fbsdrun_addcpu(ctx, BSP, vcpu, rip); } static bool parse_config_option(const char *option) { const char *value; char *path; value = strchr(option, '='); if (value == NULL || value[1] == '\0') return (false); path = strndup(option, value - option); if (path == NULL) err(4, "Failed to allocate memory"); set_config_value(path, value + 1); return (true); } static void parse_simple_config_file(const char *path) { FILE *fp; char *line, *cp; size_t linecap; unsigned int lineno; fp = fopen(path, "r"); if (fp == NULL) err(4, "Failed to open configuration file %s", path); line = NULL; linecap = 0; lineno = 1; for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) { if (*line == '#' || *line == '\n') continue; cp = strchr(line, '\n'); if (cp != NULL) *cp = '\0'; if (!parse_config_option(line)) errx(4, "%s line %u: invalid config option '%s'", path, lineno, line); } free(line); fclose(fp); } static void parse_gdb_options(char *optarg) { const char *sport; char *colon; if (optarg[0] == 'w') { set_config_bool("gdb.wait", true); optarg++; } colon = strrchr(optarg, ':'); if (colon == NULL) { sport = optarg; } else { *colon = '\0'; colon++; sport = colon; set_config_value("gdb.address", optarg); } set_config_value("gdb.port", sport); } static void set_defaults(void) { set_config_bool("acpi_tables", false); set_config_value("memory.size", "256M"); set_config_bool("x86.strictmsr", true); } int main(int argc, char *argv[]) { int c, error, err; int max_vcpus, memflags; struct vmctx *ctx; uint64_t rip; size_t memsize; const char *value, *vmname; char *optstr; #ifdef BHYVE_SNAPSHOT char *restore_file; struct restore_state rstate; int vcpu; restore_file = NULL; #endif init_config(); set_defaults(); progname = basename(argv[0]); #ifdef BHYVE_SNAPSHOT optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:r:"; #else optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:U:"; #endif while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': set_config_bool("x86.x2apic", false); break; case 'A': set_config_bool("acpi_tables", true); break; case 'D': set_config_bool("destroy_on_poweroff", true); break; case 'p': if (pincpu_parse(optarg) != 0) { errx(EX_USAGE, "invalid vcpu pinning " "configuration '%s'", optarg); } break; case 'c': if (topology_parse(optarg) != 0) { errx(EX_USAGE, "invalid cpu topology " "'%s'", optarg); } break; case 'C': set_config_bool("memory.guest_in_core", true); break; case 'G': parse_gdb_options(optarg); break; case 'k': parse_simple_config_file(optarg); break; case 'l': if (strncmp(optarg, "help", strlen(optarg)) == 0) { lpc_print_supported_devices(); exit(0); } else if (lpc_device_parse(optarg) != 0) { errx(EX_USAGE, "invalid lpc device " "configuration '%s'", optarg); } break; #ifdef BHYVE_SNAPSHOT case 'r': restore_file = optarg; break; #endif case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { pci_print_supported_devices(); exit(0); } else if (pci_parse_slot(optarg) != 0) exit(4); else break; case 'S': set_config_bool("memory.wired", true); break; case 'm': set_config_value("memory.size", optarg); break; case 'o': if (!parse_config_option(optarg)) errx(EX_USAGE, "invalid configuration option '%s'", optarg); break; case 'H': set_config_bool("x86.vmexit_on_hlt", true); break; case 'I': /* * The "-I" option was used to add an ioapic to the * virtual machine. * * An ioapic is now provided unconditionally for each * virtual machine and this option is now deprecated. */ break; case 'P': set_config_bool("x86.vmexit_on_pause", true); break; case 'e': set_config_bool("x86.strictio", true); break; case 'u': set_config_bool("rtc.use_localtime", false); break; case 'U': set_config_value("uuid", optarg); break; case 'w': set_config_bool("x86.strictmsr", false); break; case 'W': set_config_bool("virtio_msix", false); break; case 'x': set_config_bool("x86.x2apic", true); break; case 'Y': set_config_bool("x86.mptable", false); break; case 'h': - usage(0); + usage(0); default: usage(1); } } argc -= optind; argv += optind; if (argc > 1) usage(1); #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) { error = load_restore_file(restore_file, &rstate); if (error) { fprintf(stderr, "Failed to read checkpoint info from " "file: '%s'.\n", restore_file); exit(1); } vmname = lookup_vmname(&rstate); if (vmname != NULL) set_config_value("name", vmname); } #endif if (argc == 1) set_config_value("name", argv[0]); vmname = get_config_value("name"); if (vmname == NULL) usage(1); if (get_config_bool_default("config.dump", false)) { dump_config(); exit(1); } calc_topolopgy(); build_vcpumaps(); value = get_config_value("memory.size"); error = vm_parse_memsize(value, &memsize); if (error) errx(EX_USAGE, "invalid memsize '%s'", value); ctx = do_open(vmname); #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) { guest_ncpus = lookup_guest_ncpus(&rstate); memflags = lookup_memflags(&rstate); memsize = lookup_memsize(&rstate); } if (guest_ncpus < 1) { fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); exit(1); } #endif max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", guest_ncpus, max_vcpus); exit(4); } fbsdrun_set_capabilities(ctx, BSP); memflags = 0; if (get_config_bool_default("memory.wired", false)) memflags |= VM_MEM_F_WIRED; if (get_config_bool_default("memory.guest_in_core", false)) memflags |= VM_MEM_F_INCORE; vm_set_memflags(ctx, memflags); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(4); } error = init_msr(); if (error) { fprintf(stderr, "init_msr error %d", error); exit(4); } init_mem(); init_inout(); kernemu_dev_init(); init_bootrom(ctx); atkbdc_init(ctx); pci_irq_init(ctx); ioapic_init(ctx); rtc_init(ctx); sci_init(ctx); /* * Exit if a device emulation finds an error in its initilization */ if (init_pci(ctx) != 0) { perror("device emulation initialization error"); exit(4); } /* * Initialize after PCI, to allow a bootrom file to reserve the high * region. */ if (get_config_bool("acpi_tables")) vmgenc_init(ctx); init_gdb(ctx); if (lpc_bootrom()) { if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { fprintf(stderr, "ROM boot failed: unrestricted guest " "capability not available\n"); exit(4); } error = vcpu_reset(ctx, BSP); assert(error == 0); } #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) { fprintf(stdout, "Pausing pci devs...\r\n"); if (vm_pause_user_devs(ctx) != 0) { fprintf(stderr, "Failed to pause PCI device state.\n"); exit(1); } fprintf(stdout, "Restoring vm mem...\r\n"); if (restore_vm_mem(ctx, &rstate) != 0) { fprintf(stderr, "Failed to restore VM memory.\n"); exit(1); } fprintf(stdout, "Restoring pci devs...\r\n"); if (vm_restore_user_devs(ctx, &rstate) != 0) { fprintf(stderr, "Failed to restore PCI device state.\n"); exit(1); } fprintf(stdout, "Restoring kernel structs...\r\n"); if (vm_restore_kern_structs(ctx, &rstate) != 0) { fprintf(stderr, "Failed to restore kernel structs.\n"); exit(1); } fprintf(stdout, "Resuming pci devs...\r\n"); if (vm_resume_user_devs(ctx) != 0) { fprintf(stderr, "Failed to resume PCI device state.\n"); exit(1); } } #endif error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); /* * build the guest tables, MP etc. */ if (get_config_bool_default("x86.mptable", true)) { error = mptable_build(ctx, guest_ncpus); if (error) { perror("error to build the guest tables"); exit(4); } } error = smbios_build(ctx); assert(error == 0); if (get_config_bool("acpi_tables")) { error = acpi_build(ctx, guest_ncpus); assert(error == 0); } if (lpc_bootrom()) fwctl_init(); /* * Change the proc title to include the VM name. */ setproctitle("%s", vmname); #ifndef WITHOUT_CAPSICUM caph_cache_catpages(); if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_enter() == -1) errx(EX_OSERR, "cap_enter() failed"); #endif #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) destroy_restore_state(&rstate); /* initialize mutex/cond variables */ init_snapshot(); /* * checkpointing thread for communication with bhyvectl */ if (init_checkpoint_thread(ctx) < 0) printf("Failed to start checkpoint thread!\r\n"); if (restore_file != NULL) vm_restore_time(ctx); #endif /* * Add CPU 0 */ fbsdrun_addcpu(ctx, BSP, BSP, rip); #ifdef BHYVE_SNAPSHOT /* * If we restore a VM, start all vCPUs now (including APs), otherwise, * let the guest OS to spin them up later via vmexits. */ if (restore_file != NULL) { for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { if (vcpu == BSP) continue; fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu); spinup_vcpu(ctx, vcpu); } } #endif /* * Head off to the main event dispatch loop */ mevent_dispatch(); exit(4); } diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index d9813f30037f..4003bc438a58 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -1,1089 +1,1089 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Peter Grehan * All rights reserved. * Copyright 2020 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "mevent.h" #include "pci_emul.h" #include "block_if.h" #define BLOCKIF_SIG 0xb109b109 #define BLOCKIF_NUMTHR 8 #define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) enum blockop { BOP_READ, BOP_WRITE, BOP_FLUSH, BOP_DELETE }; enum blockstat { BST_FREE, BST_BLOCK, BST_PEND, BST_BUSY, BST_DONE }; struct blockif_elem { TAILQ_ENTRY(blockif_elem) be_link; struct blockif_req *be_req; enum blockop be_op; enum blockstat be_status; pthread_t be_tid; off_t be_block; }; struct blockif_ctxt { int bc_magic; int bc_fd; int bc_ischr; int bc_isgeom; int bc_candelete; int bc_rdonly; off_t bc_size; int bc_sectsz; int bc_psectsz; int bc_psectoff; int bc_closing; int bc_paused; int bc_work_count; pthread_t bc_btid[BLOCKIF_NUMTHR]; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; pthread_cond_t bc_paused_cond; pthread_cond_t bc_work_done_cond; blockif_resize_cb *bc_resize_cb; void *bc_resize_cb_arg; struct mevent *bc_resize_event; /* Request elements and free/pending/busy queues */ - TAILQ_HEAD(, blockif_elem) bc_freeq; + TAILQ_HEAD(, blockif_elem) bc_freeq; TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; struct blockif_sig_elem { pthread_mutex_t bse_mtx; pthread_cond_t bse_cond; int bse_pending; struct blockif_sig_elem *bse_next; }; static struct blockif_sig_elem *blockif_bse_head; static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { struct blockif_elem *be, *tbe; off_t off; int i; be = TAILQ_FIRST(&bc->bc_freeq); assert(be != NULL); assert(be->be_status == BST_FREE); TAILQ_REMOVE(&bc->bc_freeq, be, be_link); be->be_req = breq; be->be_op = op; switch (op) { case BOP_READ: case BOP_WRITE: case BOP_DELETE: off = breq->br_offset; for (i = 0; i < breq->br_iovcnt; i++) off += breq->br_iov[i].iov_len; break; default: off = OFF_MAX; } be->be_block = off; TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_block == breq->br_offset) break; } if (tbe == NULL) { TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { if (tbe->be_block == breq->br_offset) break; } } if (tbe == NULL) be->be_status = BST_PEND; else be->be_status = BST_BLOCK; TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); return (be->be_status == BST_PEND); } static int blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) { struct blockif_elem *be; TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_status == BST_PEND) break; assert(be->be_status == BST_BLOCK); } if (be == NULL) return (0); TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_BUSY; be->be_tid = t; TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); *bep = be; return (1); } static void blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_elem *tbe; if (be->be_status == BST_DONE || be->be_status == BST_BUSY) TAILQ_REMOVE(&bc->bc_busyq, be, be_link); else TAILQ_REMOVE(&bc->bc_pendq, be, be_link); TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_req->br_offset == be->be_block) tbe->be_status = BST_PEND; } be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); } static int blockif_flush_bc(struct blockif_ctxt *bc) { if (bc->bc_ischr) { if (ioctl(bc->bc_fd, DIOCGFLUSH)) return (errno); } else if (fsync(bc->bc_fd)) return (errno); return (0); } static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) { struct blockif_req *br; off_t arg[2]; ssize_t clen, len, off, boff, voff; int i, err; struct spacectl_range range; br = be->be_req; if (br->br_iovcnt <= 1) buf = NULL; err = 0; switch (be->be_op) { case BOP_READ: if (buf == NULL) { if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset)) < 0) err = errno; else br->br_resid -= len; break; } i = 0; off = voff = 0; while (br->br_resid > 0) { len = MIN(br->br_resid, MAXPHYS); if (pread(bc->bc_fd, buf, len, br->br_offset + off) < 0) { err = errno; break; } boff = 0; do { clen = MIN(len - boff, br->br_iov[i].iov_len - voff); memcpy(br->br_iov[i].iov_base + voff, buf + boff, clen); if (clen < br->br_iov[i].iov_len - voff) voff += clen; else { i++; voff = 0; } boff += clen; } while (boff < len); off += len; br->br_resid -= len; } break; case BOP_WRITE: if (bc->bc_rdonly) { err = EROFS; break; } if (buf == NULL) { if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset)) < 0) err = errno; else br->br_resid -= len; break; } i = 0; off = voff = 0; while (br->br_resid > 0) { len = MIN(br->br_resid, MAXPHYS); boff = 0; do { clen = MIN(len - boff, br->br_iov[i].iov_len - voff); memcpy(buf + boff, br->br_iov[i].iov_base + voff, clen); if (clen < br->br_iov[i].iov_len - voff) voff += clen; else { i++; voff = 0; } boff += clen; } while (boff < len); if (pwrite(bc->bc_fd, buf, len, br->br_offset + off) < 0) { err = errno; break; } off += len; br->br_resid -= len; } break; case BOP_FLUSH: err = blockif_flush_bc(bc); break; case BOP_DELETE: if (!bc->bc_candelete) err = EOPNOTSUPP; else if (bc->bc_rdonly) err = EROFS; else if (bc->bc_ischr) { arg[0] = br->br_offset; arg[1] = br->br_resid; if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) err = errno; else br->br_resid = 0; } else { range.r_offset = br->br_offset; range.r_len = br->br_resid; while (range.r_len > 0) { if (fspacectl(bc->bc_fd, SPACECTL_DEALLOC, &range, 0, &range) != 0) { err = errno; break; } } if (err == 0) br->br_resid = 0; } break; default: err = EINVAL; break; } be->be_status = BST_DONE; (*br->br_callback)(br, err); } static void * blockif_thr(void *arg) { struct blockif_ctxt *bc; struct blockif_elem *be; pthread_t t; uint8_t *buf; bc = arg; if (bc->bc_isgeom) buf = malloc(MAXPHYS); else buf = NULL; t = pthread_self(); pthread_mutex_lock(&bc->bc_mtx); for (;;) { bc->bc_work_count++; /* We cannot process work if the interface is paused */ while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); blockif_proc(bc, be, buf); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } bc->bc_work_count--; /* If none of the workers are busy, notify the main thread */ if (bc->bc_work_count == 0) pthread_cond_broadcast(&bc->bc_work_done_cond); /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) break; /* Make all worker threads wait here if the device is paused */ while (bc->bc_paused) pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx); pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } pthread_mutex_unlock(&bc->bc_mtx); if (buf) free(buf); pthread_exit(NULL); return (NULL); } static void blockif_sigcont_handler(int signal, enum ev_type type, void *arg) { struct blockif_sig_elem *bse; for (;;) { /* * Process the entire list even if not intended for * this thread. */ do { bse = blockif_bse_head; if (bse == NULL) return; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)bse, (uintptr_t)bse->bse_next)); pthread_mutex_lock(&bse->bse_mtx); bse->bse_pending = 0; pthread_cond_signal(&bse->bse_cond); pthread_mutex_unlock(&bse->bse_mtx); } } static void blockif_init(void) { mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); (void) signal(SIGCONT, SIG_IGN); } int blockif_legacy_config(nvlist_t *nvl, const char *opts) { char *cp, *path; if (opts == NULL) return (0); cp = strchr(opts, ','); if (cp == NULL) { set_config_value_node(nvl, "path", opts); return (0); } path = strndup(opts, cp - opts); set_config_value_node(nvl, "path", path); free(path); return (pci_parse_legacy_config(nvl, cp + 1)); } struct blockif_ctxt * blockif_open(nvlist_t *nvl, const char *ident) { char tname[MAXCOMLEN + 1]; char name[MAXPATHLEN]; const char *path, *pssval, *ssval; char *cp; struct blockif_ctxt *bc; struct stat sbuf; struct diocgattr_arg arg; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; int ro, candelete, geom, ssopt, pssopt; int nodelete; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; #endif pthread_once(&blockif_once, blockif_init); fd = -1; extra = 0; ssopt = 0; ro = 0; nodelete = 0; if (get_config_bool_node_default(nvl, "nocache", false)) extra |= O_DIRECT; if (get_config_bool_node_default(nvl, "nodelete", false)) nodelete = 1; if (get_config_bool_node_default(nvl, "sync", false) || get_config_bool_node_default(nvl, "direct", false)) extra |= O_SYNC; if (get_config_bool_node_default(nvl, "ro", false)) ro = 1; ssval = get_config_value_node(nvl, "sectorsize"); if (ssval != NULL) { ssopt = strtol(ssval, &cp, 10); if (cp == ssval) { EPRINTLN("Invalid sector size \"%s\"", ssval); goto err; } if (*cp == '\0') { pssopt = ssopt; } else if (*cp == '/') { pssval = cp + 1; pssopt = strtol(pssval, &cp, 10); if (cp == pssval || *cp != '\0') { EPRINTLN("Invalid sector size \"%s\"", ssval); goto err; } } else { EPRINTLN("Invalid sector size \"%s\"", ssval); goto err; } } path = get_config_value_node(nvl, "path"); if (path == NULL) { EPRINTLN("Missing \"path\" for block device."); goto err; } fd = open(path, (ro ? O_RDONLY : O_RDWR) | extra); if (fd < 0 && !ro) { /* Attempt a r/w fail with a r/o open */ fd = open(path, O_RDONLY | extra); ro = 1; } if (fd < 0) { warn("Could not open backing file: %s", path); goto err; } if (fstat(fd, &sbuf) < 0) { warn("Could not stat backing file %s", path); goto err; } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, CAP_WRITE, CAP_FSTAT, CAP_EVENT, CAP_FPATHCONF); if (ro) cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif /* * Deal with raw devices */ size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; candelete = geom = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { perror("Could not fetch dev blk/sector size"); goto err; } assert(size != 0); assert(sectsz != 0); if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); arg.len = sizeof(arg.value.i); if (nodelete == 0 && ioctl(fd, DIOCGATTR, &arg) == 0) candelete = arg.value.i; if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) geom = 1; } else { psectsz = sbuf.st_blksize; /* Avoid fallback implementation */ candelete = fpathconf(fd, _PC_DEALLOC_PRESENT) == 1; } #ifndef WITHOUT_CAPSICUM if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif if (ssopt != 0) { if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || ssopt > pssopt) { EPRINTLN("Invalid sector size %d/%d", ssopt, pssopt); goto err; } /* * Some backend drivers (e.g. cd0, ada0) require that the I/O * size be a multiple of the device's sector size. * * Validate that the emulated sector size complies with this * requirement. */ if (S_ISCHR(sbuf.st_mode)) { if (ssopt < sectsz || (ssopt % sectsz) != 0) { EPRINTLN("Sector size %d incompatible " "with underlying device sector size %d", ssopt, sectsz); goto err; } } sectsz = ssopt; psectsz = pssopt; psectoff = 0; } bc = calloc(1, sizeof(struct blockif_ctxt)); if (bc == NULL) { perror("calloc"); goto err; } bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; bc->bc_ischr = S_ISCHR(sbuf.st_mode); bc->bc_isgeom = geom; bc->bc_candelete = candelete; bc->bc_rdonly = ro; bc->bc_size = size; bc->bc_sectsz = sectsz; bc->bc_psectsz = psectsz; bc->bc_psectoff = psectoff; pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); bc->bc_paused = 0; bc->bc_work_count = 0; pthread_cond_init(&bc->bc_paused_cond, NULL); pthread_cond_init(&bc->bc_work_done_cond, NULL); TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); } for (i = 0; i < BLOCKIF_NUMTHR; i++) { pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); pthread_set_name_np(bc->bc_btid[i], tname); } return (bc); err: if (fd >= 0) close(fd); return (NULL); } static void blockif_resized(int fd, enum ev_type type, void *arg) { struct blockif_ctxt *bc; struct stat sb; if (fstat(fd, &sb) != 0) return; bc = arg; pthread_mutex_lock(&bc->bc_mtx); if (sb.st_size != bc->bc_size) { bc->bc_size = sb.st_size; bc->bc_resize_cb(bc, bc->bc_resize_cb_arg, bc->bc_size); } pthread_mutex_unlock(&bc->bc_mtx); } int blockif_register_resize_callback(struct blockif_ctxt *bc, blockif_resize_cb *cb, void *cb_arg) { struct stat sb; int err; if (cb == NULL) return (EINVAL); pthread_mutex_lock(&bc->bc_mtx); if (bc->bc_resize_cb != NULL) { err = EBUSY; goto out; } assert(bc->bc_closing == 0); if (fstat(bc->bc_fd, &sb) != 0) { err = errno; goto out; } bc->bc_resize_event = mevent_add_flags(bc->bc_fd, EVF_VNODE, EVFF_ATTRIB, blockif_resized, bc); if (bc->bc_resize_event == NULL) { err = ENXIO; goto out; } bc->bc_resize_cb = cb; bc->bc_resize_cb_arg = cb_arg; out: pthread_mutex_unlock(&bc->bc_mtx); return (err); } static int blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { int err; err = 0; pthread_mutex_lock(&bc->bc_mtx); if (!TAILQ_EMPTY(&bc->bc_freeq)) { /* * Enqueue and inform the block i/o thread * that there is work available */ if (blockif_enqueue(bc, breq, op)) pthread_cond_signal(&bc->bc_cond); } else { /* * Callers are not allowed to enqueue more than * the specified blockif queue limit. Return an * error to indicate that the queue length has been * exceeded. */ err = E2BIG; } pthread_mutex_unlock(&bc->bc_mtx); return (err); } int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_READ)); } int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_WRITE)); } int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_FLUSH)); } int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_DELETE)); } int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { struct blockif_elem *be; assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); /* XXX: not waiting while paused */ /* * Check pending requests. */ TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_req == breq) break; } if (be != NULL) { /* * Found it. */ blockif_complete(bc, be); pthread_mutex_unlock(&bc->bc_mtx); return (0); } /* * Check in-flight requests. */ TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { if (be->be_req == breq) break; } if (be == NULL) { /* * Didn't find it. */ pthread_mutex_unlock(&bc->bc_mtx); return (EINVAL); } /* * Interrupt the processing thread to force it return * prematurely via it's normal callback path. */ while (be->be_status == BST_BUSY) { struct blockif_sig_elem bse, *old_head; pthread_mutex_init(&bse.bse_mtx, NULL); pthread_cond_init(&bse.bse_cond, NULL); bse.bse_pending = 1; do { old_head = blockif_bse_head; bse.bse_next = old_head; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)old_head, (uintptr_t)&bse)); pthread_kill(be->be_tid, SIGCONT); pthread_mutex_lock(&bse.bse_mtx); while (bse.bse_pending) pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); pthread_mutex_unlock(&bse.bse_mtx); } pthread_mutex_unlock(&bc->bc_mtx); /* * The processing thread has been interrupted. Since it's not * clear if the callback has been invoked yet, return EBUSY. */ return (EBUSY); } int blockif_close(struct blockif_ctxt *bc) { void *jval; int i; assert(bc->bc_magic == BLOCKIF_SIG); /* * Stop the block i/o thread */ pthread_mutex_lock(&bc->bc_mtx); bc->bc_closing = 1; if (bc->bc_resize_event != NULL) mevent_disable(bc->bc_resize_event); pthread_mutex_unlock(&bc->bc_mtx); pthread_cond_broadcast(&bc->bc_cond); for (i = 0; i < BLOCKIF_NUMTHR; i++) pthread_join(bc->bc_btid[i], &jval); /* XXX Cancel queued i/o's ??? */ /* * Release resources */ bc->bc_magic = 0; close(bc->bc_fd); free(bc); return (0); } /* * Return virtual C/H/S values for a given block. Use the algorithm * outlined in the VHD specification to calculate values. */ void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) { off_t sectors; /* total sectors of the block dev */ off_t hcyl; /* cylinders times heads */ uint16_t secpt; /* sectors per track */ uint8_t heads; assert(bc->bc_magic == BLOCKIF_SIG); sectors = bc->bc_size / bc->bc_sectsz; /* Clamp the size to the largest possible with CHS */ if (sectors > 65535UL*16*255) sectors = 65535UL*16*255; if (sectors >= 65536UL*16*63) { secpt = 255; heads = 16; hcyl = sectors / secpt; } else { secpt = 17; hcyl = sectors / secpt; heads = (hcyl + 1023) / 1024; if (heads < 4) heads = 4; if (hcyl >= (heads * 1024) || heads > 16) { secpt = 31; heads = 16; hcyl = sectors / secpt; } if (hcyl >= (heads * 1024)) { secpt = 63; heads = 16; hcyl = sectors / secpt; } } *c = hcyl / heads; *h = heads; *s = secpt; } /* * Accessors */ off_t blockif_size(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_size); } int blockif_sectsz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_sectsz); } void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) { assert(bc->bc_magic == BLOCKIF_SIG); *size = bc->bc_psectsz; *off = bc->bc_psectoff; } int blockif_queuesz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } int blockif_is_ro(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } int blockif_candelete(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } #ifdef BHYVE_SNAPSHOT void blockif_pause(struct blockif_ctxt *bc) { assert(bc != NULL); assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); bc->bc_paused = 1; /* The interface is paused. Wait for workers to finish their work */ while (bc->bc_work_count) pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx); pthread_mutex_unlock(&bc->bc_mtx); if (blockif_flush_bc(bc)) fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n", __func__); } void blockif_resume(struct blockif_ctxt *bc) { assert(bc != NULL); assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); bc->bc_paused = 0; /* resume the threads waiting for paused */ pthread_cond_broadcast(&bc->bc_paused_cond); /* kick the threads after restore */ pthread_cond_broadcast(&bc->bc_cond); pthread_mutex_unlock(&bc->bc_mtx); } int blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta) { int i; struct iovec *iov; int ret; SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done); /* * XXX: The callback and parameter must be filled by the virtualized * device that uses the interface, during its init; we're not touching * them here. */ /* Snapshot the iovecs. */ for (i = 0; i < br->br_iovcnt; i++) { iov = &br->br_iov[i]; SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done); /* We assume the iov is a guest-mapped address. */ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len, false, meta, ret, done); } done: return (ret); } int blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta) { int ret; if (bc->bc_paused == 0) { fprintf(stderr, "%s: Snapshot failed: " "interface not paused.\r\n", __func__); return (ENXIO); } pthread_mutex_lock(&bc->bc_mtx); SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done); done: pthread_mutex_unlock(&bc->bc_mtx); return (ret); } #endif diff --git a/usr.sbin/bhyve/config.c b/usr.sbin/bhyve/config.c index 1df750e9576c..c9ac9f0a1905 100644 --- a/usr.sbin/bhyve/config.c +++ b/usr.sbin/bhyve/config.c @@ -1,431 +1,431 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2021 John H. Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "config.h" static nvlist_t *config_root; void init_config(void) { config_root = nvlist_create(0); if (config_root == NULL) err(4, "Failed to create configuration root nvlist"); } static nvlist_t * _lookup_config_node(nvlist_t *parent, const char *path, bool create) { char *copy, *name, *tofree; nvlist_t *nvl, *new_nvl; copy = strdup(path); if (copy == NULL) errx(4, "Failed to allocate memory"); tofree = copy; nvl = parent; while ((name = strsep(©, ".")) != NULL) { if (*name == '\0') { warnx("Invalid configuration node: %s", path); nvl = NULL; break; } if (nvlist_exists_nvlist(nvl, name)) nvl = (nvlist_t *)nvlist_get_nvlist(nvl, name); else if (nvlist_exists(nvl, name)) { for (copy = tofree; copy < name; copy++) if (*copy == '\0') *copy = '.'; warnx( "Configuration node %s is a child of existing variable %s", path, tofree); nvl = NULL; break; } else if (create) { new_nvl = nvlist_create(0); if (new_nvl == NULL) errx(4, "Failed to allocate memory"); nvlist_move_nvlist(nvl, name, new_nvl); nvl = new_nvl; } else { nvl = NULL; break; } } free(tofree); return (nvl); } nvlist_t * create_config_node(const char *path) { return (_lookup_config_node(config_root, path, true)); } nvlist_t * find_config_node(const char *path) { return (_lookup_config_node(config_root, path, false)); } nvlist_t * create_relative_config_node(nvlist_t *parent, const char *path) { return (_lookup_config_node(parent, path, true)); } nvlist_t * find_relative_config_node(nvlist_t *parent, const char *path) { return (_lookup_config_node(parent, path, false)); } void set_config_value_node(nvlist_t *parent, const char *name, const char *value) { if (strchr(name, '.') != NULL) errx(4, "Invalid config node name %s", name); if (parent == NULL) parent = config_root; if (nvlist_exists_string(parent, name)) nvlist_free_string(parent, name); else if (nvlist_exists(parent, name)) errx(4, "Attemping to add value %s to existing node %s of list %p", value, name, parent); nvlist_add_string(parent, name, value); } void set_config_value(const char *path, const char *value) { const char *name; char *node_name; nvlist_t *nvl; /* Look for last separator. */ name = strrchr(path, '.'); if (name == NULL) { nvl = config_root; name = path; } else { node_name = strndup(path, name - path); if (node_name == NULL) errx(4, "Failed to allocate memory"); nvl = create_config_node(node_name); if (nvl == NULL) errx(4, "Failed to create configuration node %s", node_name); free(node_name); /* Skip over '.'. */ name++; } if (nvlist_exists_nvlist(nvl, name)) errx(4, "Attempting to add value %s to existing node %s", value, path); set_config_value_node(nvl, name, value); } static const char * get_raw_config_value(const char *path) { const char *name; char *node_name; nvlist_t *nvl; /* Look for last separator. */ name = strrchr(path, '.'); if (name == NULL) { nvl = config_root; name = path; } else { node_name = strndup(path, name - path); if (node_name == NULL) errx(4, "Failed to allocate memory"); nvl = find_config_node(node_name); free(node_name); if (nvl == NULL) return (NULL); /* Skip over '.'. */ name++; } if (nvlist_exists_string(nvl, name)) return (nvlist_get_string(nvl, name)); if (nvlist_exists_nvlist(nvl, name)) warnx("Attempting to fetch value of node %s", path); return (NULL); } static char * _expand_config_value(const char *value, int depth) { FILE *valfp; const char *cp, *vp; char *nestedval, *path, *valbuf; size_t valsize; valfp = open_memstream(&valbuf, &valsize); if (valfp == NULL) errx(4, "Failed to allocate memory"); vp = value; while (*vp != '\0') { switch (*vp) { case '%': if (depth > 15) { warnx( "Too many recursive references in configuration value"); fputc('%', valfp); vp++; break; - } + } if (vp[1] != '(' || vp[2] == '\0') cp = NULL; else cp = strchr(vp + 2, ')'); if (cp == NULL) { warnx( "Invalid reference in configuration value \"%s\"", value); fputc('%', valfp); vp++; break; } vp += 2; if (cp == vp) { warnx( "Empty reference in configuration value \"%s\"", value); vp++; break; } /* Allocate a C string holding the path. */ path = strndup(vp, cp - vp); if (path == NULL) errx(4, "Failed to allocate memory"); /* Advance 'vp' past the reference. */ vp = cp + 1; /* Fetch the referenced value. */ cp = get_raw_config_value(path); if (cp == NULL) warnx( "Failed to fetch referenced configuration variable %s", path); else { nestedval = _expand_config_value(cp, depth + 1); fputs(nestedval, valfp); free(nestedval); } free(path); break; case '\\': vp++; if (*vp == '\0') { warnx( "Trailing \\ in configuration value \"%s\"", value); break; } /* FALLTHROUGH */ default: fputc(*vp, valfp); vp++; break; } } fclose(valfp); return (valbuf); } const char * expand_config_value(const char *value) { static char *valbuf; if (strchr(value, '%') == NULL) return (value); free(valbuf); valbuf = _expand_config_value(value, 0); return (valbuf); } const char * get_config_value(const char *path) { const char *value; value = get_raw_config_value(path); if (value == NULL) return (NULL); return (expand_config_value(value)); } const char * get_config_value_node(const nvlist_t *parent, const char *name) { if (strchr(name, '.') != NULL) errx(4, "Invalid config node name %s", name); if (parent == NULL) parent = config_root; if (nvlist_exists_nvlist(parent, name)) warnx("Attempt to fetch value of node %s of list %p", name, parent); if (!nvlist_exists_string(parent, name)) return (NULL); return (expand_config_value(nvlist_get_string(parent, name))); } bool _bool_value(const char *name, const char *value) { if (strcasecmp(value, "true") == 0 || strcasecmp(value, "on") == 0 || strcasecmp(value, "yes") == 0 || strcmp(value, "1") == 0) return (true); if (strcasecmp(value, "false") == 0 || strcasecmp(value, "off") == 0 || strcasecmp(value, "no") == 0 || strcmp(value, "0") == 0) return (false); err(4, "Invalid value %s for boolean variable %s", value, name); } bool get_config_bool(const char *path) { const char *value; value = get_config_value(path); if (value == NULL) err(4, "Failed to fetch boolean variable %s", path); return (_bool_value(path, value)); } bool get_config_bool_default(const char *path, bool def) { const char *value; value = get_config_value(path); if (value == NULL) return (def); return (_bool_value(path, value)); } bool get_config_bool_node(const nvlist_t *parent, const char *name) { const char *value; value = get_config_value_node(parent, name); if (value == NULL) err(4, "Failed to fetch boolean variable %s", name); return (_bool_value(name, value)); } bool get_config_bool_node_default(const nvlist_t *parent, const char *name, bool def) { const char *value; value = get_config_value_node(parent, name); if (value == NULL) return (def); return (_bool_value(name, value)); } void set_config_bool(const char *path, bool value) { set_config_value(path, value ? "true" : "false"); } void set_config_bool_node(nvlist_t *parent, const char *name, bool value) { set_config_value_node(parent, name, value ? "true" : "false"); } static void dump_tree(const char *prefix, const nvlist_t *nvl) { const char *name; void *cookie; int type; cookie = NULL; while ((name = nvlist_next(nvl, &type, &cookie)) != NULL) { if (type == NV_TYPE_NVLIST) { char *new_prefix; asprintf(&new_prefix, "%s%s.", prefix, name); dump_tree(new_prefix, nvlist_get_nvlist(nvl, name)); free(new_prefix); } else { assert(type == NV_TYPE_STRING); printf("%s%s=%s\n", prefix, name, nvlist_get_string(nvl, name)); } } } void dump_config(void) { dump_tree("", config_root); } diff --git a/usr.sbin/bhyve/config.h b/usr.sbin/bhyve/config.h index 9f82afb267b2..574da966df74 100644 --- a/usr.sbin/bhyve/config.h +++ b/usr.sbin/bhyve/config.h @@ -1,119 +1,119 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2021 John H. Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __CONFIG_H__ #define __CONFIG_H__ #include /*- * Manages a configuration database backed by an nv(9) list. * * The database only stores string values. Callers should parse * values into other types if needed. String values can reference * other configuration variables using a '%(name)' syntax. In this * case, the name must be the the full path of the configuration * variable. The % character can be escaped with a preceding \ to * avoid expansion. Any \ characters must be escaped. * * Configuration variables are stored in a tree. The full path of a * variable is specified as a dot-separated name similar to sysctl(8) * OIDs. - */ + */ /* * Fetches the value of a configuration variable. If the "raw" value * contains references to other configuration variables, this function * expands those references and returns a pointer to the parsed * string. The string's storage is only stable until the next call to * this function. * * If no node is found, returns NULL. * * If 'parent' is NULL, 'name' is assumed to be a top-level variable. */ const char *get_config_value_node(const nvlist_t *parent, const char *name); /* * Similar to get_config_value_node but expects a full path to the * leaf node. */ const char *get_config_value(const char *path); /* Initializes the tree to an empty state. */ void init_config(void); /* * Creates an existing configuration node via a dot-separated OID * path. Will fail if the path names an existing leaf configuration * variable. If the node already exists, this returns a pointer to * the existing node. */ nvlist_t *create_config_node(const char *path); /* * Looks for an existing configuration node via a dot-separated OID * path. Will fail if the path names an existing leaf configuration * variable. */ nvlist_t *find_config_node(const char *path); /* * Similar to the above, but treats the path relative to an existing * 'parent' node rather than as an absolute path. */ nvlist_t *create_relative_config_node(nvlist_t *parent, const char *path); nvlist_t *find_relative_config_node(nvlist_t *parent, const char *path); /* * Adds or replaces the value of the specified variable. * * If 'parent' is NULL, 'name' is assumed to be a top-level variable. */ void set_config_value_node(nvlist_t *parent, const char *name, const char *value); /* * Similar to set_config_value_node but expects a full path to the * leaf node. */ void set_config_value(const char *path, const char *value); /* Convenience wrappers for boolean variables. */ bool get_config_bool(const char *path); bool get_config_bool_node(const nvlist_t *parent, const char *name); bool get_config_bool_default(const char *path, bool def); bool get_config_bool_node_default(const nvlist_t *parent, const char *name, bool def); void set_config_bool(const char *path, bool value); void set_config_bool_node(nvlist_t *parent, const char *name, bool value); void dump_config(void); #endif /* !__CONFIG_H__ */ diff --git a/usr.sbin/bhyve/fwctl.c b/usr.sbin/bhyve/fwctl.c index 0640bc28ba2b..a91f3d14fb78 100644 --- a/usr.sbin/bhyve/fwctl.c +++ b/usr.sbin/bhyve/fwctl.c @@ -1,552 +1,552 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Peter Grehan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Guest firmware interface. Uses i/o ports x510/x511 as Qemu does, * but with a request/response messaging protocol. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "inout.h" #include "fwctl.h" /* * Messaging protocol base operations */ #define OP_NULL 1 #define OP_ECHO 2 #define OP_GET 3 #define OP_GET_LEN 4 #define OP_SET 5 #define OP_MAX OP_SET /* I/O ports */ #define FWCTL_OUT 0x510 #define FWCTL_IN 0x511 /* * Back-end state-machine */ enum state { DORMANT, IDENT_WAIT, IDENT_SEND, REQ, RESP } be_state = DORMANT; static uint8_t sig[] = { 'B', 'H', 'Y', 'V' }; static u_int ident_idx; struct op_info { int op; int (*op_start)(uint32_t len); void (*op_data)(uint32_t data, uint32_t len); int (*op_result)(struct iovec **data); void (*op_done)(struct iovec *data); }; static struct op_info *ops[OP_MAX+1]; /* Return 0-padded uint32_t */ static uint32_t fwctl_send_rest(uint32_t *data, size_t len) { union { uint8_t c[4]; uint32_t w; } u; uint8_t *cdata; int i; cdata = (uint8_t *) data; - u.w = 0; + u.w = 0; for (i = 0, u.w = 0; i < len; i++) u.c[i] = *cdata++; return (u.w); } /* * error op dummy proto - drop all data sent and return an error */ static int errop_code; static void errop_set(int err) { errop_code = err; } static int errop_start(uint32_t len) { errop_code = ENOENT; /* accept any length */ return (errop_code); } static void errop_data(uint32_t data, uint32_t len) { /* ignore */ } static int errop_result(struct iovec **data) { /* no data to send back; always successful */ *data = NULL; return (errop_code); } static void errop_done(struct iovec *data) { /* assert data is NULL */ } static struct op_info errop_info = { .op_start = errop_start, .op_data = errop_data, .op_result = errop_result, .op_done = errop_done }; /* OID search */ SET_DECLARE(ctl_set, struct ctl); CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus)); static struct ctl * ctl_locate(const char *str, int maxlen) { struct ctl *cp, **cpp; SET_FOREACH(cpp, ctl_set) { cp = *cpp; if (!strncmp(str, cp->c_oid, maxlen)) return (cp); } return (NULL); } /* uefi-sysctl get-len */ #define FGET_STRSZ 80 static struct iovec fget_biov[2]; static char fget_str[FGET_STRSZ]; static struct { size_t f_sz; uint32_t f_data[1024]; } fget_buf; static int fget_cnt; static size_t fget_size; static int fget_start(uint32_t len) { if (len > FGET_STRSZ) return(E2BIG); fget_cnt = 0; return (0); } static void fget_data(uint32_t data, uint32_t len) { *((uint32_t *) &fget_str[fget_cnt]) = data; fget_cnt += sizeof(uint32_t); } static int fget_result(struct iovec **data, int val) { struct ctl *cp; int err; err = 0; /* Locate the OID */ cp = ctl_locate(fget_str, fget_cnt); if (cp == NULL) { *data = NULL; err = ENOENT; } else { if (val) { /* For now, copy the len/data into a buffer */ memset(&fget_buf, 0, sizeof(fget_buf)); fget_buf.f_sz = cp->c_len; memcpy(fget_buf.f_data, cp->c_data, cp->c_len); fget_biov[0].iov_base = (char *)&fget_buf; fget_biov[0].iov_len = sizeof(fget_buf.f_sz) + cp->c_len; } else { fget_size = cp->c_len; fget_biov[0].iov_base = (char *)&fget_size; fget_biov[0].iov_len = sizeof(fget_size); } fget_biov[1].iov_base = NULL; fget_biov[1].iov_len = 0; *data = fget_biov; } return (err); } static void fget_done(struct iovec *data) { /* nothing needs to be freed */ } static int fget_len_result(struct iovec **data) { return (fget_result(data, 0)); } static int fget_val_result(struct iovec **data) { return (fget_result(data, 1)); } static struct op_info fgetlen_info = { .op_start = fget_start, .op_data = fget_data, .op_result = fget_len_result, .op_done = fget_done }; static struct op_info fgetval_info = { .op_start = fget_start, .op_data = fget_data, .op_result = fget_val_result, .op_done = fget_done }; static struct req_info { int req_error; u_int req_count; uint32_t req_size; uint32_t req_type; uint32_t req_txid; struct op_info *req_op; int resp_error; int resp_count; size_t resp_size; size_t resp_off; struct iovec *resp_biov; } rinfo; static void fwctl_response_done(void) { (*rinfo.req_op->op_done)(rinfo.resp_biov); /* reinit the req data struct */ memset(&rinfo, 0, sizeof(rinfo)); } static void fwctl_request_done(void) { rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov); /* XXX only a single vector supported at the moment */ rinfo.resp_off = 0; if (rinfo.resp_biov == NULL) { rinfo.resp_size = 0; } else { rinfo.resp_size = rinfo.resp_biov[0].iov_len; } } static int fwctl_request_start(void) { int err; /* Data size doesn't include header */ rinfo.req_size -= 12; rinfo.req_op = &errop_info; if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL) rinfo.req_op = ops[rinfo.req_type]; err = (*rinfo.req_op->op_start)(rinfo.req_size); if (err) { errop_set(err); rinfo.req_op = &errop_info; } /* Catch case of zero-length message here */ if (rinfo.req_size == 0) { fwctl_request_done(); return (1); } return (0); } static int fwctl_request_data(uint32_t value) { /* Make sure remaining size is >= 0 */ if (rinfo.req_size <= sizeof(uint32_t)) rinfo.req_size = 0; else rinfo.req_size -= sizeof(uint32_t); (*rinfo.req_op->op_data)(value, rinfo.req_size); if (rinfo.req_size < sizeof(uint32_t)) { fwctl_request_done(); return (1); } return (0); } static int fwctl_request(uint32_t value) { int ret; ret = 0; switch (rinfo.req_count) { case 0: /* Verify size */ if (value < 12) { printf("msg size error"); exit(4); } rinfo.req_size = value; rinfo.req_count = 1; break; case 1: rinfo.req_type = value; rinfo.req_count++; break; case 2: rinfo.req_txid = value; rinfo.req_count++; ret = fwctl_request_start(); break; default: ret = fwctl_request_data(value); break; } return (ret); } static int fwctl_response(uint32_t *retval) { uint32_t *dp; ssize_t remlen; switch(rinfo.resp_count) { case 0: /* 4 x u32 header len + data */ *retval = 4*sizeof(uint32_t) + roundup(rinfo.resp_size, sizeof(uint32_t)); rinfo.resp_count++; break; case 1: *retval = rinfo.req_type; rinfo.resp_count++; break; case 2: *retval = rinfo.req_txid; rinfo.resp_count++; break; case 3: *retval = rinfo.resp_error; rinfo.resp_count++; break; default: remlen = rinfo.resp_size - rinfo.resp_off; dp = (uint32_t *) ((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off); if (remlen >= sizeof(uint32_t)) { *retval = *dp; } else if (remlen > 0) { *retval = fwctl_send_rest(dp, remlen); } rinfo.resp_off += sizeof(uint32_t); break; } if (rinfo.resp_count > 3 && rinfo.resp_off >= rinfo.resp_size) { fwctl_response_done(); return (1); } return (0); } /* * i/o port handling. */ static uint8_t fwctl_inb(void) { uint8_t retval; retval = 0xff; switch (be_state) { case IDENT_SEND: retval = sig[ident_idx++]; if (ident_idx >= sizeof(sig)) be_state = REQ; break; default: break; } return (retval); } static void fwctl_outw(uint16_t val) { switch (be_state) { case IDENT_WAIT: if (val == 0) { be_state = IDENT_SEND; ident_idx = 0; } break; default: /* ignore */ break; } } static uint32_t fwctl_inl(void) { uint32_t retval; switch (be_state) { case RESP: if (fwctl_response(&retval)) be_state = REQ; break; default: retval = 0xffffffff; break; } return (retval); } static void fwctl_outl(uint32_t val) { switch (be_state) { case REQ: if (fwctl_request(val)) be_state = RESP; default: break; } } static int fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { if (in) { if (bytes == 1) *eax = fwctl_inb(); else if (bytes == 4) *eax = fwctl_inl(); else *eax = 0xffff; } else { if (bytes == 2) fwctl_outw(*eax); else if (bytes == 4) fwctl_outl(*eax); } return (0); } INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler); INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler); void fwctl_init(void) { ops[OP_GET_LEN] = &fgetlen_info; ops[OP_GET] = &fgetval_info; be_state = IDENT_WAIT; } diff --git a/usr.sbin/bhyve/hda_reg.h b/usr.sbin/bhyve/hda_reg.h index b3034bf9f417..c309056c6f5d 100644 --- a/usr.sbin/bhyve/hda_reg.h +++ b/usr.sbin/bhyve/hda_reg.h @@ -1,1369 +1,1369 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 Stephane E. Potvin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _HDA_REG_H_ #define _HDA_REG_H_ /**************************************************************************** * HDA Device Verbs ****************************************************************************/ /* HDA Command */ #define HDA_CMD_VERB_MASK 0x000fffff #define HDA_CMD_VERB_SHIFT 0 #define HDA_CMD_NID_MASK 0x0ff00000 #define HDA_CMD_NID_SHIFT 20 #define HDA_CMD_CAD_MASK 0xf0000000 #define HDA_CMD_CAD_SHIFT 28 #define HDA_CMD_VERB_4BIT_SHIFT 16 #define HDA_CMD_VERB_12BIT_SHIFT 8 #define HDA_CMD_VERB_4BIT(verb, payload) \ (((verb) << HDA_CMD_VERB_4BIT_SHIFT) | (payload)) #define HDA_CMD_4BIT(cad, nid, verb, payload) \ (((cad) << HDA_CMD_CAD_SHIFT) | \ ((nid) << HDA_CMD_NID_SHIFT) | \ (HDA_CMD_VERB_4BIT((verb), (payload)))) #define HDA_CMD_VERB_12BIT(verb, payload) \ (((verb) << HDA_CMD_VERB_12BIT_SHIFT) | (payload)) #define HDA_CMD_12BIT(cad, nid, verb, payload) \ (((cad) << HDA_CMD_CAD_SHIFT) | \ ((nid) << HDA_CMD_NID_SHIFT) | \ (HDA_CMD_VERB_12BIT((verb), (payload)))) /* Get Parameter */ #define HDA_CMD_VERB_GET_PARAMETER 0xf00 #define HDA_CMD_GET_PARAMETER(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_PARAMETER, (payload))) /* Connection Select Control */ #define HDA_CMD_VERB_GET_CONN_SELECT_CONTROL 0xf01 #define HDA_CMD_VERB_SET_CONN_SELECT_CONTROL 0x701 #define HDA_CMD_GET_CONN_SELECT_CONTROL(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_CONN_SELECT_CONTROL, 0x0)) #define HDA_CMD_SET_CONNECTION_SELECT_CONTROL(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONN_SELECT_CONTROL, (payload))) /* Connection List Entry */ #define HDA_CMD_VERB_GET_CONN_LIST_ENTRY 0xf02 #define HDA_CMD_GET_CONN_LIST_ENTRY(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_CONN_LIST_ENTRY, (payload))) #define HDA_CMD_GET_CONN_LIST_ENTRY_SIZE_SHORT 1 #define HDA_CMD_GET_CONN_LIST_ENTRY_SIZE_LONG 2 /* Processing State */ #define HDA_CMD_VERB_GET_PROCESSING_STATE 0xf03 #define HDA_CMD_VERB_SET_PROCESSING_STATE 0x703 #define HDA_CMD_GET_PROCESSING_STATE(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_PROCESSING_STATE, 0x0)) #define HDA_CMD_SET_PROCESSING_STATE(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_PROCESSING_STATE, (payload))) #define HDA_CMD_GET_PROCESSING_STATE_STATE_OFF 0x00 #define HDA_CMD_GET_PROCESSING_STATE_STATE_ON 0x01 #define HDA_CMD_GET_PROCESSING_STATE_STATE_BENIGN 0x02 /* Coefficient Index */ #define HDA_CMD_VERB_GET_COEFF_INDEX 0xd #define HDA_CMD_VERB_SET_COEFF_INDEX 0x5 #define HDA_CMD_GET_COEFF_INDEX(cad, nid) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_GET_COEFF_INDEX, 0x0)) #define HDA_CMD_SET_COEFF_INDEX(cad, nid, payload) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_SET_COEFF_INDEX, (payload))) /* Processing Coefficient */ #define HDA_CMD_VERB_GET_PROCESSING_COEFF 0xc #define HDA_CMD_VERB_SET_PROCESSING_COEFF 0x4 #define HDA_CMD_GET_PROCESSING_COEFF(cad, nid) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_GET_PROCESSING_COEFF, 0x0)) #define HDA_CMD_SET_PROCESSING_COEFF(cad, nid, payload) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_SET_PROCESSING_COEFF, (payload))) /* Amplifier Gain/Mute */ #define HDA_CMD_VERB_GET_AMP_GAIN_MUTE 0xb #define HDA_CMD_VERB_SET_AMP_GAIN_MUTE 0x3 #define HDA_CMD_GET_AMP_GAIN_MUTE(cad, nid, payload) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_GET_AMP_GAIN_MUTE, (payload))) #define HDA_CMD_SET_AMP_GAIN_MUTE(cad, nid, payload) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_SET_AMP_GAIN_MUTE, (payload))) #define HDA_CMD_GET_AMP_GAIN_MUTE_INPUT 0x0000 #define HDA_CMD_GET_AMP_GAIN_MUTE_OUTPUT 0x8000 #define HDA_CMD_GET_AMP_GAIN_MUTE_RIGHT 0x0000 #define HDA_CMD_GET_AMP_GAIN_MUTE_LEFT 0x2000 #define HDA_CMD_GET_AMP_GAIN_MUTE_MUTE_MASK 0x00000008 #define HDA_CMD_GET_AMP_GAIN_MUTE_MUTE_SHIFT 7 #define HDA_CMD_GET_AMP_GAIN_MUTE_GAIN_MASK 0x00000007 #define HDA_CMD_GET_AMP_GAIN_MUTE_GAIN_SHIFT 0 #define HDA_CMD_GET_AMP_GAIN_MUTE_MUTE(rsp) \ (((rsp) & HDA_CMD_GET_AMP_GAIN_MUTE_MUTE_MASK) >> \ HDA_CMD_GET_AMP_GAIN_MUTE_MUTE_SHIFT) #define HDA_CMD_GET_AMP_GAIN_MUTE_GAIN(rsp) \ (((rsp) & HDA_CMD_GET_AMP_GAIN_MUTE_GAIN_MASK) >> \ HDA_CMD_GET_AMP_GAIN_MUTE_GAIN_SHIFT) #define HDA_CMD_SET_AMP_GAIN_MUTE_OUTPUT 0x8000 #define HDA_CMD_SET_AMP_GAIN_MUTE_INPUT 0x4000 #define HDA_CMD_SET_AMP_GAIN_MUTE_LEFT 0x2000 #define HDA_CMD_SET_AMP_GAIN_MUTE_RIGHT 0x1000 #define HDA_CMD_SET_AMP_GAIN_MUTE_INDEX_MASK 0x0f00 #define HDA_CMD_SET_AMP_GAIN_MUTE_INDEX_SHIFT 8 #define HDA_CMD_SET_AMP_GAIN_MUTE_MUTE 0x0080 #define HDA_CMD_SET_AMP_GAIN_MUTE_GAIN_MASK 0x0007 #define HDA_CMD_SET_AMP_GAIN_MUTE_GAIN_SHIFT 0 #define HDA_CMD_SET_AMP_GAIN_MUTE_INDEX(index) \ (((index) << HDA_CMD_SET_AMP_GAIN_MUTE_INDEX_SHIFT) & \ HDA_CMD_SET_AMP_GAIN_MUTE_INDEX_MASK) #define HDA_CMD_SET_AMP_GAIN_MUTE_GAIN(index) \ (((index) << HDA_CMD_SET_AMP_GAIN_MUTE_GAIN_SHIFT) & \ HDA_CMD_SET_AMP_GAIN_MUTE_GAIN_MASK) /* Converter format */ #define HDA_CMD_VERB_GET_CONV_FMT 0xa #define HDA_CMD_VERB_SET_CONV_FMT 0x2 #define HDA_CMD_GET_CONV_FMT(cad, nid) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_GET_CONV_FMT, 0x0)) #define HDA_CMD_SET_CONV_FMT(cad, nid, payload) \ (HDA_CMD_4BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONV_FMT, (payload))) /* Digital Converter Control */ #define HDA_CMD_VERB_GET_DIGITAL_CONV_FMT1 0xf0d #define HDA_CMD_VERB_GET_DIGITAL_CONV_FMT2 0xf0e #define HDA_CMD_VERB_SET_DIGITAL_CONV_FMT1 0x70d #define HDA_CMD_VERB_SET_DIGITAL_CONV_FMT2 0x70e #define HDA_CMD_GET_DIGITAL_CONV_FMT(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_DIGITAL_CONV_FMT1, 0x0)) #define HDA_CMD_SET_DIGITAL_CONV_FMT1(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_DIGITAL_CONV_FMT1, (payload))) #define HDA_CMD_SET_DIGITAL_CONV_FMT2(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_DIGITAL_CONV_FMT2, (payload))) #define HDA_CMD_GET_DIGITAL_CONV_FMT_CC_MASK 0x7f00 #define HDA_CMD_GET_DIGITAL_CONV_FMT_CC_SHIFT 8 #define HDA_CMD_GET_DIGITAL_CONV_FMT_L_MASK 0x0080 #define HDA_CMD_GET_DIGITAL_CONV_FMT_L_SHIFT 7 #define HDA_CMD_GET_DIGITAL_CONV_FMT_PRO_MASK 0x0040 #define HDA_CMD_GET_DIGITAL_CONV_FMT_PRO_SHIFT 6 #define HDA_CMD_GET_DIGITAL_CONV_FMT_NAUDIO_MASK 0x0020 #define HDA_CMD_GET_DIGITAL_CONV_FMT_NAUDIO_SHIFT 5 #define HDA_CMD_GET_DIGITAL_CONV_FMT_COPY_MASK 0x0010 #define HDA_CMD_GET_DIGITAL_CONV_FMT_COPY_SHIFT 4 #define HDA_CMD_GET_DIGITAL_CONV_FMT_PRE_MASK 0x0008 #define HDA_CMD_GET_DIGITAL_CONV_FMT_PRE_SHIFT 3 #define HDA_CMD_GET_DIGITAL_CONV_FMT_VCFG_MASK 0x0004 #define HDA_CMD_GET_DIGITAL_CONV_FMT_VCFG_SHIFT 2 #define HDA_CMD_GET_DIGITAL_CONV_FMT_V_MASK 0x0002 #define HDA_CMD_GET_DIGITAL_CONV_FMT_V_SHIFT 1 #define HDA_CMD_GET_DIGITAL_CONV_FMT_DIGEN_MASK 0x0001 #define HDA_CMD_GET_DIGITAL_CONV_FMT_DIGEN_SHIFT 0 #define HDA_CMD_GET_DIGITAL_CONV_FMT_CC(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_CC_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_CC_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_L(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_L_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_L_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_PRO(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_PRO_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_PRO_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_NAUDIO(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_NAUDIO_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_NAUDIO_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_COPY(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_COPY_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_COPY_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_PRE(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_PRE_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_PRE_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_VCFG(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_VCFG_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_VCFG_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_V(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_V_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_V_SHIFT) #define HDA_CMD_GET_DIGITAL_CONV_FMT_DIGEN(rsp) \ (((rsp) & HDA_CMD_GET_DIGITAL_CONV_FMT_DIGEN_MASK) >> \ HDA_CMD_GET_DIGITAL_CONV_FMT_DIGEN_SHIFT) #define HDA_CMD_SET_DIGITAL_CONV_FMT1_L 0x80 #define HDA_CMD_SET_DIGITAL_CONV_FMT1_PRO 0x40 #define HDA_CMD_SET_DIGITAL_CONV_FMT1_NAUDIO 0x20 #define HDA_CMD_SET_DIGITAL_CONV_FMT1_COPY 0x10 #define HDA_CMD_SET_DIGITAL_CONV_FMT1_PRE 0x08 #define HDA_CMD_SET_DIGITAL_CONV_FMT1_VCFG 0x04 #define HDA_CMD_SET_DIGITAL_CONV_FMT1_V 0x02 #define HDA_CMD_SET_DIGITAL_CONV_FMT1_DIGEN 0x01 /* Power State */ #define HDA_CMD_VERB_GET_POWER_STATE 0xf05 #define HDA_CMD_VERB_SET_POWER_STATE 0x705 #define HDA_CMD_GET_POWER_STATE(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_POWER_STATE, 0x0)) #define HDA_CMD_SET_POWER_STATE(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_POWER_STATE, (payload))) #define HDA_CMD_POWER_STATE_D0 0x00 #define HDA_CMD_POWER_STATE_D1 0x01 #define HDA_CMD_POWER_STATE_D2 0x02 #define HDA_CMD_POWER_STATE_D3 0x03 #define HDA_CMD_POWER_STATE_ACT_MASK 0x000000f0 #define HDA_CMD_POWER_STATE_ACT_SHIFT 4 #define HDA_CMD_POWER_STATE_SET_MASK 0x0000000f #define HDA_CMD_POWER_STATE_SET_SHIFT 0 #define HDA_CMD_GET_POWER_STATE_ACT(rsp) \ (((rsp) & HDA_CMD_POWER_STATE_ACT_MASK) >> \ HDA_CMD_POWER_STATE_ACT_SHIFT) #define HDA_CMD_GET_POWER_STATE_SET(rsp) \ (((rsp) & HDA_CMD_POWER_STATE_SET_MASK) >> \ HDA_CMD_POWER_STATE_SET_SHIFT) #define HDA_CMD_SET_POWER_STATE_ACT(ps) \ (((ps) << HDA_CMD_POWER_STATE_ACT_SHIFT) & \ HDA_CMD_POWER_STATE_ACT_MASK) #define HDA_CMD_SET_POWER_STATE_SET(ps) \ (((ps) << HDA_CMD_POWER_STATE_SET_SHIFT) & \ HDA_CMD_POWER_STATE_ACT_MASK) /* Converter Stream, Channel */ #define HDA_CMD_VERB_GET_CONV_STREAM_CHAN 0xf06 #define HDA_CMD_VERB_SET_CONV_STREAM_CHAN 0x706 #define HDA_CMD_GET_CONV_STREAM_CHAN(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_CONV_STREAM_CHAN, 0x0)) #define HDA_CMD_SET_CONV_STREAM_CHAN(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONV_STREAM_CHAN, (payload))) #define HDA_CMD_CONV_STREAM_CHAN_STREAM_MASK 0x000000f0 #define HDA_CMD_CONV_STREAM_CHAN_STREAM_SHIFT 4 #define HDA_CMD_CONV_STREAM_CHAN_CHAN_MASK 0x0000000f #define HDA_CMD_CONV_STREAM_CHAN_CHAN_SHIFT 0 #define HDA_CMD_GET_CONV_STREAM_CHAN_STREAM(rsp) \ (((rsp) & HDA_CMD_CONV_STREAM_CHAN_STREAM_MASK) >> \ HDA_CMD_CONV_STREAM_CHAN_STREAM_SHIFT) #define HDA_CMD_GET_CONV_STREAM_CHAN_CHAN(rsp) \ (((rsp) & HDA_CMD_CONV_STREAM_CHAN_CHAN_MASK) >> \ HDA_CMD_CONV_STREAM_CHAN_CHAN_SHIFT) #define HDA_CMD_SET_CONV_STREAM_CHAN_STREAM(param) \ (((param) << HDA_CMD_CONV_STREAM_CHAN_STREAM_SHIFT) & \ HDA_CMD_CONV_STREAM_CHAN_STREAM_MASK) #define HDA_CMD_SET_CONV_STREAM_CHAN_CHAN(param) \ (((param) << HDA_CMD_CONV_STREAM_CHAN_CHAN_SHIFT) & \ HDA_CMD_CONV_STREAM_CHAN_CHAN_MASK) /* Input Converter SDI Select */ #define HDA_CMD_VERB_GET_INPUT_CONVERTER_SDI_SELECT 0xf04 #define HDA_CMD_VERB_SET_INPUT_CONVERTER_SDI_SELECT 0x704 #define HDA_CMD_GET_INPUT_CONVERTER_SDI_SELECT(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_INPUT_CONVERTER_SDI_SELECT, 0x0)) #define HDA_CMD_SET_INPUT_CONVERTER_SDI_SELECT(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_INPUT_CONVERTER_SDI_SELECT, (payload))) /* Pin Widget Control */ #define HDA_CMD_VERB_GET_PIN_WIDGET_CTRL 0xf07 #define HDA_CMD_VERB_SET_PIN_WIDGET_CTRL 0x707 #define HDA_CMD_GET_PIN_WIDGET_CTRL(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_PIN_WIDGET_CTRL, 0x0)) #define HDA_CMD_SET_PIN_WIDGET_CTRL(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_PIN_WIDGET_CTRL, (payload))) #define HDA_CMD_GET_PIN_WIDGET_CTRL_HPHN_ENABLE_MASK 0x00000080 #define HDA_CMD_GET_PIN_WIDGET_CTRL_HPHN_ENABLE_SHIFT 7 #define HDA_CMD_GET_PIN_WIDGET_CTRL_OUT_ENABLE_MASK 0x00000040 #define HDA_CMD_GET_PIN_WIDGET_CTRL_OUT_ENABLE_SHIFT 6 #define HDA_CMD_GET_PIN_WIDGET_CTRL_IN_ENABLE_MASK 0x00000020 #define HDA_CMD_GET_PIN_WIDGET_CTRL_IN_ENABLE_SHIFT 5 #define HDA_CMD_GET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK 0x00000007 #define HDA_CMD_GET_PIN_WIDGET_CTRL_VREF_ENABLE_SHIFT 0 #define HDA_CMD_GET_PIN_WIDGET_CTRL_HPHN_ENABLE(rsp) \ (((rsp) & HDA_CMD_GET_PIN_WIDGET_CTRL_HPHN_ENABLE_MASK) >> \ HDA_CMD_GET_PIN_WIDGET_CTRL_HPHN_ENABLE_SHIFT) #define HDA_CMD_GET_PIN_WIDGET_CTRL_OUT_ENABLE(rsp) \ (((rsp) & HDA_CMD_GET_PIN_WIDGET_CTRL_OUT_ENABLE_MASK) >> \ HDA_GET_CMD_PIN_WIDGET_CTRL_OUT_ENABLE_SHIFT) #define HDA_CMD_GET_PIN_WIDGET_CTRL_IN_ENABLE(rsp) \ (((rsp) & HDA_CMD_GET_PIN_WIDGET_CTRL_IN_ENABLE_MASK) >> \ HDA_CMD_GET_PIN_WIDGET_CTRL_IN_ENABLE_SHIFT) #define HDA_CMD_GET_PIN_WIDGET_CTRL_VREF_ENABLE(rsp) \ (((rsp) & HDA_CMD_GET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK) >> \ HDA_CMD_GET_PIN_WIDGET_CTRL_VREF_ENABLE_SHIFT) #define HDA_CMD_SET_PIN_WIDGET_CTRL_HPHN_ENABLE 0x80 #define HDA_CMD_SET_PIN_WIDGET_CTRL_OUT_ENABLE 0x40 #define HDA_CMD_SET_PIN_WIDGET_CTRL_IN_ENABLE 0x20 #define HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK 0x07 #define HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_SHIFT 0 #define HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE(param) \ (((param) << HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_SHIFT) & \ HDA_CMD_SET_PIN_WIDGET_CTRL_VREF_ENABLE_MASK) #define HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_HIZ 0 #define HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_50 1 #define HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_GROUND 2 #define HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_80 4 #define HDA_CMD_PIN_WIDGET_CTRL_VREF_ENABLE_100 5 /* Unsolicited Response */ #define HDA_CMD_VERB_GET_UNSOLICITED_RESPONSE 0xf08 #define HDA_CMD_VERB_SET_UNSOLICITED_RESPONSE 0x708 #define HDA_CMD_GET_UNSOLICITED_RESPONSE(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_UNSOLICITED_RESPONSE, 0x0)) #define HDA_CMD_SET_UNSOLICITED_RESPONSE(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_UNSOLICITED_RESPONSE, (payload))) #define HDA_CMD_GET_UNSOLICITED_RESPONSE_ENABLE_MASK 0x00000080 #define HDA_CMD_GET_UNSOLICITED_RESPONSE_ENABLE_SHIFT 7 #define HDA_CMD_GET_UNSOLICITED_RESPONSE_TAG_MASK 0x0000001f #define HDA_CMD_GET_UNSOLICITED_RESPONSE_TAG_SHIFT 0 #define HDA_CMD_GET_UNSOLICITED_RESPONSE_ENABLE(rsp) \ (((rsp) & HDA_CMD_GET_UNSOLICITED_RESPONSE_ENABLE_MASK) >> \ HDA_CMD_GET_UNSOLICITED_RESPONSE_ENABLE_SHIFT) #define HDA_CMD_GET_UNSOLICITED_RESPONSE_TAG(rsp) \ (((rsp) & HDA_CMD_GET_UNSOLICITED_RESPONSE_TAG_MASK) >> \ HDA_CMD_GET_UNSOLICITED_RESPONSE_TAG_SHIFT) #define HDA_CMD_SET_UNSOLICITED_RESPONSE_ENABLE 0x80 #define HDA_CMD_SET_UNSOLICITED_RESPONSE_TAG_MASK 0x3f #define HDA_CMD_SET_UNSOLICITED_RESPONSE_TAG_SHIFT 0 #define HDA_CMD_SET_UNSOLICITED_RESPONSE_TAG(param) \ (((param) << HDA_CMD_SET_UNSOLICITED_RESPONSE_TAG_SHIFT) & \ HDA_CMD_SET_UNSOLICITED_RESPONSE_TAG_MASK) /* Pin Sense */ #define HDA_CMD_VERB_GET_PIN_SENSE 0xf09 #define HDA_CMD_VERB_SET_PIN_SENSE 0x709 #define HDA_CMD_GET_PIN_SENSE(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_PIN_SENSE, 0x0)) #define HDA_CMD_SET_PIN_SENSE(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_PIN_SENSE, (payload))) #define HDA_CMD_GET_PIN_SENSE_PRESENCE_DETECT 0x80000000 #define HDA_CMD_GET_PIN_SENSE_ELD_VALID 0x40000000 #define HDA_CMD_GET_PIN_SENSE_IMP_SENSE_MASK 0x7fffffff #define HDA_CMD_GET_PIN_SENSE_IMP_SENSE_SHIFT 0 #define HDA_CMD_GET_PIN_SENSE_IMP_SENSE(rsp) \ (((rsp) & HDA_CMD_GET_PIN_SENSE_IMP_SENSE_MASK) >> \ HDA_CMD_GET_PIN_SENSE_IMP_SENSE_SHIFT) #define HDA_CMD_GET_PIN_SENSE_IMP_SENSE_INVALID 0x7fffffff #define HDA_CMD_SET_PIN_SENSE_LEFT_CHANNEL 0x00 #define HDA_CMD_SET_PIN_SENSE_RIGHT_CHANNEL 0x01 /* EAPD/BTL Enable */ #define HDA_CMD_VERB_GET_EAPD_BTL_ENABLE 0xf0c #define HDA_CMD_VERB_SET_EAPD_BTL_ENABLE 0x70c #define HDA_CMD_GET_EAPD_BTL_ENABLE(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_EAPD_BTL_ENABLE, 0x0)) #define HDA_CMD_SET_EAPD_BTL_ENABLE(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_EAPD_BTL_ENABLE, (payload))) #define HDA_CMD_GET_EAPD_BTL_ENABLE_LR_SWAP_MASK 0x00000004 #define HDA_CMD_GET_EAPD_BTL_ENABLE_LR_SWAP_SHIFT 2 #define HDA_CMD_GET_EAPD_BTL_ENABLE_EAPD_MASK 0x00000002 #define HDA_CMD_GET_EAPD_BTL_ENABLE_EAPD_SHIFT 1 #define HDA_CMD_GET_EAPD_BTL_ENABLE_BTL_MASK 0x00000001 #define HDA_CMD_GET_EAPD_BTL_ENABLE_BTL_SHIFT 0 #define HDA_CMD_GET_EAPD_BTL_ENABLE_LR_SWAP(rsp) \ (((rsp) & HDA_CMD_GET_EAPD_BTL_ENABLE_LR_SWAP_MASK) >> \ HDA_CMD_GET_EAPD_BTL_ENABLE_LR_SWAP_SHIFT) #define HDA_CMD_GET_EAPD_BTL_ENABLE_EAPD(rsp) \ (((rsp) & HDA_CMD_GET_EAPD_BTL_ENABLE_EAPD_MASK) >> \ HDA_CMD_GET_EAPD_BTL_ENABLE_EAPD_SHIFT) #define HDA_CMD_GET_EAPD_BTL_ENABLE_BTL(rsp) \ (((rsp) & HDA_CMD_GET_EAPD_BTL_ENABLE_BTL_MASK) >> \ HDA_CMD_GET_EAPD_BTL_ENABLE_BTL_SHIFT) #define HDA_CMD_SET_EAPD_BTL_ENABLE_LR_SWAP 0x04 #define HDA_CMD_SET_EAPD_BTL_ENABLE_EAPD 0x02 #define HDA_CMD_SET_EAPD_BTL_ENABLE_BTL 0x01 /* GPI Data */ #define HDA_CMD_VERB_GET_GPI_DATA 0xf10 #define HDA_CMD_VERB_SET_GPI_DATA 0x710 #define HDA_CMD_GET_GPI_DATA(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPI_DATA, 0x0)) #define HDA_CMD_SET_GPI_DATA(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPI_DATA, (payload))) /* GPI Wake Enable Mask */ #define HDA_CMD_VERB_GET_GPI_WAKE_ENABLE_MASK 0xf11 #define HDA_CMD_VERB_SET_GPI_WAKE_ENABLE_MASK 0x711 #define HDA_CMD_GET_GPI_WAKE_ENABLE_MASK(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPI_WAKE_ENABLE_MASK, 0x0)) #define HDA_CMD_SET_GPI_WAKE_ENABLE_MASK(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPI_WAKE_ENABLE_MASK, (payload))) /* GPI Unsolicited Enable Mask */ #define HDA_CMD_VERB_GET_GPI_UNSOLICITED_ENABLE_MASK 0xf12 #define HDA_CMD_VERB_SET_GPI_UNSOLICITED_ENABLE_MASK 0x712 #define HDA_CMD_GET_GPI_UNSOLICITED_ENABLE_MASK(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPI_UNSOLICITED_ENABLE_MASK, 0x0)) #define HDA_CMD_SET_GPI_UNSOLICITED_ENABLE_MASK(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPI_UNSOLICITED_ENABLE_MASK, (payload))) /* GPI Sticky Mask */ #define HDA_CMD_VERB_GET_GPI_STICKY_MASK 0xf13 #define HDA_CMD_VERB_SET_GPI_STICKY_MASK 0x713 #define HDA_CMD_GET_GPI_STICKY_MASK(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPI_STICKY_MASK, 0x0)) #define HDA_CMD_SET_GPI_STICKY_MASK(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPI_STICKY_MASK, (payload))) /* GPO Data */ #define HDA_CMD_VERB_GET_GPO_DATA 0xf14 #define HDA_CMD_VERB_SET_GPO_DATA 0x714 #define HDA_CMD_GET_GPO_DATA(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPO_DATA, 0x0)) #define HDA_CMD_SET_GPO_DATA(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPO_DATA, (payload))) /* GPIO Data */ #define HDA_CMD_VERB_GET_GPIO_DATA 0xf15 #define HDA_CMD_VERB_SET_GPIO_DATA 0x715 #define HDA_CMD_GET_GPIO_DATA(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPIO_DATA, 0x0)) #define HDA_CMD_SET_GPIO_DATA(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPIO_DATA, (payload))) /* GPIO Enable Mask */ #define HDA_CMD_VERB_GET_GPIO_ENABLE_MASK 0xf16 #define HDA_CMD_VERB_SET_GPIO_ENABLE_MASK 0x716 #define HDA_CMD_GET_GPIO_ENABLE_MASK(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPIO_ENABLE_MASK, 0x0)) #define HDA_CMD_SET_GPIO_ENABLE_MASK(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPIO_ENABLE_MASK, (payload))) /* GPIO Direction */ #define HDA_CMD_VERB_GET_GPIO_DIRECTION 0xf17 #define HDA_CMD_VERB_SET_GPIO_DIRECTION 0x717 #define HDA_CMD_GET_GPIO_DIRECTION(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPIO_DIRECTION, 0x0)) #define HDA_CMD_SET_GPIO_DIRECTION(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPIO_DIRECTION, (payload))) /* GPIO Wake Enable Mask */ #define HDA_CMD_VERB_GET_GPIO_WAKE_ENABLE_MASK 0xf18 #define HDA_CMD_VERB_SET_GPIO_WAKE_ENABLE_MASK 0x718 #define HDA_CMD_GET_GPIO_WAKE_ENABLE_MASK(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPIO_WAKE_ENABLE_MASK, 0x0)) #define HDA_CMD_SET_GPIO_WAKE_ENABLE_MASK(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPIO_WAKE_ENABLE_MASK, (payload))) /* GPIO Unsolicited Enable Mask */ #define HDA_CMD_VERB_GET_GPIO_UNSOLICITED_ENABLE_MASK 0xf19 #define HDA_CMD_VERB_SET_GPIO_UNSOLICITED_ENABLE_MASK 0x719 #define HDA_CMD_GET_GPIO_UNSOLICITED_ENABLE_MASK(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPIO_UNSOLICITED_ENABLE_MASK, 0x0)) #define HDA_CMD_SET_GPIO_UNSOLICITED_ENABLE_MASK(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPIO_UNSOLICITED_ENABLE_MASK, (payload))) /* GPIO_STICKY_MASK */ #define HDA_CMD_VERB_GET_GPIO_STICKY_MASK 0xf1a #define HDA_CMD_VERB_SET_GPIO_STICKY_MASK 0x71a #define HDA_CMD_GET_GPIO_STICKY_MASK(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_GPIO_STICKY_MASK, 0x0)) #define HDA_CMD_SET_GPIO_STICKY_MASK(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_GPIO_STICKY_MASK, (payload))) /* Beep Generation */ #define HDA_CMD_VERB_GET_BEEP_GENERATION 0xf0a #define HDA_CMD_VERB_SET_BEEP_GENERATION 0x70a #define HDA_CMD_GET_BEEP_GENERATION(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_BEEP_GENERATION, 0x0)) #define HDA_CMD_SET_BEEP_GENERATION(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_BEEP_GENERATION, (payload))) /* Volume Knob */ #define HDA_CMD_VERB_GET_VOLUME_KNOB 0xf0f #define HDA_CMD_VERB_SET_VOLUME_KNOB 0x70f #define HDA_CMD_GET_VOLUME_KNOB(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_VOLUME_KNOB, 0x0)) #define HDA_CMD_SET_VOLUME_KNOB(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_VOLUME_KNOB, (payload))) /* Subsystem ID */ #define HDA_CMD_VERB_GET_SUBSYSTEM_ID 0xf20 #define HDA_CMD_VERB_SET_SUSBYSTEM_ID1 0x720 #define HDA_CMD_VERB_SET_SUBSYSTEM_ID2 0x721 #define HDA_CMD_VERB_SET_SUBSYSTEM_ID3 0x722 #define HDA_CMD_VERB_SET_SUBSYSTEM_ID4 0x723 #define HDA_CMD_GET_SUBSYSTEM_ID(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_SUBSYSTEM_ID, 0x0)) #define HDA_CMD_SET_SUBSYSTEM_ID1(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_SUSBYSTEM_ID1, (payload))) #define HDA_CMD_SET_SUBSYSTEM_ID2(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_SUSBYSTEM_ID2, (payload))) #define HDA_CMD_SET_SUBSYSTEM_ID3(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_SUSBYSTEM_ID3, (payload))) #define HDA_CMD_SET_SUBSYSTEM_ID4(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_SUSBYSTEM_ID4, (payload))) /* Configuration Default */ #define HDA_CMD_VERB_GET_CONFIGURATION_DEFAULT 0xf1c #define HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT1 0x71c #define HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT2 0x71d #define HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT3 0x71e #define HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT4 0x71f #define HDA_CMD_GET_CONFIGURATION_DEFAULT(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_CONFIGURATION_DEFAULT, 0x0)) #define HDA_CMD_SET_CONFIGURATION_DEFAULT1(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT1, (payload))) #define HDA_CMD_SET_CONFIGURATION_DEFAULT2(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT2, (payload))) #define HDA_CMD_SET_CONFIGURATION_DEFAULT3(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT3, (payload))) #define HDA_CMD_SET_CONFIGURATION_DEFAULT4(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONFIGURATION_DEFAULT4, (payload))) /* Stripe Control */ #define HDA_CMD_VERB_GET_STRIPE_CONTROL 0xf24 #define HDA_CMD_VERB_SET_STRIPE_CONTROL 0x724 #define HDA_CMD_GET_STRIPE_CONTROL(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_STRIPE_CONTROL, 0x0)) #define HDA_CMD_SET_STRIPE_CONTROL(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_STRIPE_CONTROL, (payload))) /* Channel Count Control */ #define HDA_CMD_VERB_GET_CONV_CHAN_COUNT 0xf2d -#define HDA_CMD_VERB_SET_CONV_CHAN_COUNT 0x72d +#define HDA_CMD_VERB_SET_CONV_CHAN_COUNT 0x72d #define HDA_CMD_GET_CONV_CHAN_COUNT(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_CONV_CHAN_COUNT, 0x0)) #define HDA_CMD_SET_CONV_CHAN_COUNT(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_CONV_CHAN_COUNT, (payload))) -#define HDA_CMD_VERB_GET_HDMI_DIP_SIZE 0xf2e +#define HDA_CMD_VERB_GET_HDMI_DIP_SIZE 0xf2e #define HDA_CMD_GET_HDMI_DIP_SIZE(cad, nid, arg) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_HDMI_DIP_SIZE, (arg))) -#define HDA_CMD_VERB_GET_HDMI_ELDD 0xf2f +#define HDA_CMD_VERB_GET_HDMI_ELDD 0xf2f #define HDA_CMD_GET_HDMI_ELDD(cad, nid, off) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_HDMI_ELDD, (off))) -#define HDA_CMD_VERB_GET_HDMI_DIP_INDEX 0xf30 -#define HDA_CMD_VERB_SET_HDMI_DIP_INDEX 0x730 +#define HDA_CMD_VERB_GET_HDMI_DIP_INDEX 0xf30 +#define HDA_CMD_VERB_SET_HDMI_DIP_INDEX 0x730 #define HDA_CMD_GET_HDMI_DIP_INDEX(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_HDMI_DIP_INDEX, 0x0)) #define HDA_CMD_SET_HDMI_DIP_INDEX(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_HDMI_DIP_INDEX, (payload))) -#define HDA_CMD_VERB_GET_HDMI_DIP_DATA 0xf31 -#define HDA_CMD_VERB_SET_HDMI_DIP_DATA 0x731 +#define HDA_CMD_VERB_GET_HDMI_DIP_DATA 0xf31 +#define HDA_CMD_VERB_SET_HDMI_DIP_DATA 0x731 #define HDA_CMD_GET_HDMI_DIP_DATA(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_HDMI_DIP_DATA, 0x0)) #define HDA_CMD_SET_HDMI_DIP_DATA(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_HDMI_DIP_DATA, (payload))) -#define HDA_CMD_VERB_GET_HDMI_DIP_XMIT 0xf32 -#define HDA_CMD_VERB_SET_HDMI_DIP_XMIT 0x732 +#define HDA_CMD_VERB_GET_HDMI_DIP_XMIT 0xf32 +#define HDA_CMD_VERB_SET_HDMI_DIP_XMIT 0x732 #define HDA_CMD_GET_HDMI_DIP_XMIT(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_HDMI_DIP_XMIT, 0x0)) #define HDA_CMD_SET_HDMI_DIP_XMIT(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_HDMI_DIP_XMIT, (payload))) -#define HDA_CMD_VERB_GET_HDMI_CP_CTRL 0xf33 -#define HDA_CMD_VERB_SET_HDMI_CP_CTRL 0x733 +#define HDA_CMD_VERB_GET_HDMI_CP_CTRL 0xf33 +#define HDA_CMD_VERB_SET_HDMI_CP_CTRL 0x733 -#define HDA_CMD_VERB_GET_HDMI_CHAN_SLOT 0xf34 -#define HDA_CMD_VERB_SET_HDMI_CHAN_SLOT 0x734 +#define HDA_CMD_VERB_GET_HDMI_CHAN_SLOT 0xf34 +#define HDA_CMD_VERB_SET_HDMI_CHAN_SLOT 0x734 #define HDA_CMD_GET_HDMI_CHAN_SLOT(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_GET_HDMI_CHAN_SLOT, 0x0)) #define HDA_CMD_SET_HDMI_CHAN_SLOT(cad, nid, payload) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_SET_HDMI_CHAN_SLOT, (payload))) #define HDA_HDMI_CODING_TYPE_REF_STREAM_HEADER 0 #define HDA_HDMI_CODING_TYPE_LPCM 1 #define HDA_HDMI_CODING_TYPE_AC3 2 #define HDA_HDMI_CODING_TYPE_MPEG1 3 #define HDA_HDMI_CODING_TYPE_MP3 4 #define HDA_HDMI_CODING_TYPE_MPEG2 5 #define HDA_HDMI_CODING_TYPE_AACLC 6 #define HDA_HDMI_CODING_TYPE_DTS 7 #define HDA_HDMI_CODING_TYPE_ATRAC 8 #define HDA_HDMI_CODING_TYPE_SACD 9 #define HDA_HDMI_CODING_TYPE_EAC3 10 #define HDA_HDMI_CODING_TYPE_DTS_HD 11 #define HDA_HDMI_CODING_TYPE_MLP 12 #define HDA_HDMI_CODING_TYPE_DST 13 #define HDA_HDMI_CODING_TYPE_WMAPRO 14 #define HDA_HDMI_CODING_TYPE_REF_CTX 15 /* Function Reset */ #define HDA_CMD_VERB_FUNCTION_RESET 0x7ff #define HDA_CMD_FUNCTION_RESET(cad, nid) \ (HDA_CMD_12BIT((cad), (nid), \ HDA_CMD_VERB_FUNCTION_RESET, 0x0)) /**************************************************************************** * HDA Device Parameters ****************************************************************************/ /* Vendor ID */ #define HDA_PARAM_VENDOR_ID 0x00 #define HDA_PARAM_VENDOR_ID_VENDOR_ID_MASK 0xffff0000 #define HDA_PARAM_VENDOR_ID_VENDOR_ID_SHIFT 16 #define HDA_PARAM_VENDOR_ID_DEVICE_ID_MASK 0x0000ffff #define HDA_PARAM_VENDOR_ID_DEVICE_ID_SHIFT 0 #define HDA_PARAM_VENDOR_ID_VENDOR_ID(param) \ (((param) & HDA_PARAM_VENDOR_ID_VENDOR_ID_MASK) >> \ HDA_PARAM_VENDOR_ID_VENDOR_ID_SHIFT) #define HDA_PARAM_VENDOR_ID_DEVICE_ID(param) \ (((param) & HDA_PARAM_VENDOR_ID_DEVICE_ID_MASK) >> \ HDA_PARAM_VENDOR_ID_DEVICE_ID_SHIFT) /* Revision ID */ #define HDA_PARAM_REVISION_ID 0x02 #define HDA_PARAM_REVISION_ID_MAJREV_MASK 0x00f00000 #define HDA_PARAM_REVISION_ID_MAJREV_SHIFT 20 #define HDA_PARAM_REVISION_ID_MINREV_MASK 0x000f0000 #define HDA_PARAM_REVISION_ID_MINREV_SHIFT 16 #define HDA_PARAM_REVISION_ID_REVISION_ID_MASK 0x0000ff00 #define HDA_PARAM_REVISION_ID_REVISION_ID_SHIFT 8 #define HDA_PARAM_REVISION_ID_STEPPING_ID_MASK 0x000000ff #define HDA_PARAM_REVISION_ID_STEPPING_ID_SHIFT 0 #define HDA_PARAM_REVISION_ID_MAJREV(param) \ (((param) & HDA_PARAM_REVISION_ID_MAJREV_MASK) >> \ HDA_PARAM_REVISION_ID_MAJREV_SHIFT) #define HDA_PARAM_REVISION_ID_MINREV(param) \ (((param) & HDA_PARAM_REVISION_ID_MINREV_MASK) >> \ HDA_PARAM_REVISION_ID_MINREV_SHIFT) #define HDA_PARAM_REVISION_ID_REVISION_ID(param) \ (((param) & HDA_PARAM_REVISION_ID_REVISION_ID_MASK) >> \ HDA_PARAM_REVISION_ID_REVISION_ID_SHIFT) #define HDA_PARAM_REVISION_ID_STEPPING_ID(param) \ (((param) & HDA_PARAM_REVISION_ID_STEPPING_ID_MASK) >> \ HDA_PARAM_REVISION_ID_STEPPING_ID_SHIFT) /* Subordinate Node Cound */ #define HDA_PARAM_SUB_NODE_COUNT 0x04 #define HDA_PARAM_SUB_NODE_COUNT_START_MASK 0x00ff0000 #define HDA_PARAM_SUB_NODE_COUNT_START_SHIFT 16 #define HDA_PARAM_SUB_NODE_COUNT_TOTAL_MASK 0x000000ff #define HDA_PARAM_SUB_NODE_COUNT_TOTAL_SHIFT 0 #define HDA_PARAM_SUB_NODE_COUNT_START(param) \ (((param) & HDA_PARAM_SUB_NODE_COUNT_START_MASK) >> \ HDA_PARAM_SUB_NODE_COUNT_START_SHIFT) #define HDA_PARAM_SUB_NODE_COUNT_TOTAL(param) \ (((param) & HDA_PARAM_SUB_NODE_COUNT_TOTAL_MASK) >> \ HDA_PARAM_SUB_NODE_COUNT_TOTAL_SHIFT) /* Function Group Type */ #define HDA_PARAM_FCT_GRP_TYPE 0x05 #define HDA_PARAM_FCT_GRP_TYPE_UNSOL_MASK 0x00000100 #define HDA_PARAM_FCT_GRP_TYPE_UNSOL_SHIFT 8 #define HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE_MASK 0x000000ff #define HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE_SHIFT 0 #define HDA_PARAM_FCT_GRP_TYPE_UNSOL(param) \ (((param) & HDA_PARAM_FCT_GRP_TYPE_UNSOL_MASK) >> \ HDA_PARAM_FCT_GROUP_TYPE_UNSOL_SHIFT) #define HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE(param) \ (((param) & HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE_MASK) >> \ HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE_SHIFT) #define HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE_AUDIO 0x01 #define HDA_PARAM_FCT_GRP_TYPE_NODE_TYPE_MODEM 0x02 /* Audio Function Group Capabilities */ #define HDA_PARAM_AUDIO_FCT_GRP_CAP 0x08 #define HDA_PARAM_AUDIO_FCT_GRP_CAP_BEEP_GEN_MASK 0x00010000 #define HDA_PARAM_AUDIO_FCT_GRP_CAP_BEEP_GEN_SHIFT 16 #define HDA_PARAM_AUDIO_FCT_GRP_CAP_INPUT_DELAY_MASK 0x00000f00 #define HDA_PARAM_AUDIO_FCT_GRP_CAP_INPUT_DELAY_SHIFT 8 #define HDA_PARAM_AUDIO_FCT_GRP_CAP_OUTPUT_DELAY_MASK 0x0000000f #define HDA_PARAM_AUDIO_FCT_GRP_CAP_OUTPUT_DELAY_SHIFT 0 #define HDA_PARAM_AUDIO_FCT_GRP_CAP_BEEP_GEN(param) \ (((param) & HDA_PARAM_AUDIO_FCT_GRP_CAP_BEEP_GEN_MASK) >> \ HDA_PARAM_AUDIO_FCT_GRP_CAP_BEEP_GEN_SHIFT) #define HDA_PARAM_AUDIO_FCT_GRP_CAP_INPUT_DELAY(param) \ (((param) & HDA_PARAM_AUDIO_FCT_GRP_CAP_INPUT_DELAY_MASK) >> \ HDA_PARAM_AUDIO_FCT_GRP_CAP_INPUT_DELAY_SHIFT) #define HDA_PARAM_AUDIO_FCT_GRP_CAP_OUTPUT_DELAY(param) \ (((param) & HDA_PARAM_AUDIO_FCT_GRP_CAP_OUTPUT_DELAY_MASK) >> \ HDA_PARAM_AUDIO_FCT_GRP_CAP_OUTPUT_DELAY_SHIFT) /* Audio Widget Capabilities */ #define HDA_PARAM_AUDIO_WIDGET_CAP 0x09 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_MASK 0x00f00000 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_SHIFT 20 #define HDA_PARAM_AUDIO_WIDGET_CAP_DELAY_MASK 0x000f0000 #define HDA_PARAM_AUDIO_WIDGET_CAP_DELAY_SHIFT 16 #define HDA_PARAM_AUDIO_WIDGET_CAP_CC_EXT_MASK 0x0000e000 #define HDA_PARAM_AUDIO_WIDGET_CAP_CC_EXT_SHIFT 13 #define HDA_PARAM_AUDIO_WIDGET_CAP_CP_MASK 0x00001000 #define HDA_PARAM_AUDIO_WIDGET_CAP_CP_SHIFT 12 #define HDA_PARAM_AUDIO_WIDGET_CAP_LR_SWAP_MASK 0x00000800 #define HDA_PARAM_AUDIO_WIDGET_CAP_LR_SWAP_SHIFT 11 #define HDA_PARAM_AUDIO_WIDGET_CAP_POWER_CTRL_MASK 0x00000400 #define HDA_PARAM_AUDIO_WIDGET_CAP_POWER_CTRL_SHIFT 10 #define HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL_MASK 0x00000200 #define HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL_SHIFT 9 #define HDA_PARAM_AUDIO_WIDGET_CAP_CONN_LIST_MASK 0x00000100 #define HDA_PARAM_AUDIO_WIDGET_CAP_CONN_LIST_SHIFT 8 #define HDA_PARAM_AUDIO_WIDGET_CAP_UNSOL_CAP_MASK 0x00000080 #define HDA_PARAM_AUDIO_WIDGET_CAP_UNSOL_CAP_SHIFT 7 #define HDA_PARAM_AUDIO_WIDGET_CAP_PROC_WIDGET_MASK 0x00000040 #define HDA_PARAM_AUDIO_WIDGET_CAP_PROC_WIDGET_SHIFT 6 #define HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE_MASK 0x00000020 #define HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE_SHIFT 5 #define HDA_PARAM_AUDIO_WIDGET_CAP_FORMAT_OVR_MASK 0x00000010 #define HDA_PARAM_AUDIO_WIDGET_CAP_FORMAT_OVR_SHIFT 4 #define HDA_PARAM_AUDIO_WIDGET_CAP_AMP_OVR_MASK 0x00000008 #define HDA_PARAM_AUDIO_WIDGET_CAP_AMP_OVR_SHIFT 3 #define HDA_PARAM_AUDIO_WIDGET_CAP_OUT_AMP_MASK 0x00000004 #define HDA_PARAM_AUDIO_WIDGET_CAP_OUT_AMP_SHIFT 2 #define HDA_PARAM_AUDIO_WIDGET_CAP_IN_AMP_MASK 0x00000002 #define HDA_PARAM_AUDIO_WIDGET_CAP_IN_AMP_SHIFT 1 #define HDA_PARAM_AUDIO_WIDGET_CAP_STEREO_MASK 0x00000001 #define HDA_PARAM_AUDIO_WIDGET_CAP_STEREO_SHIFT 0 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_DELAY(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_DELAY_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_DELAY_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_CC(param) \ ((((param) & HDA_PARAM_AUDIO_WIDGET_CAP_CC_EXT_MASK) >> \ (HDA_PARAM_AUDIO_WIDGET_CAP_CC_EXT_SHIFT - 1)) | \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_STEREO_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_STEREO_SHIFT)) #define HDA_PARAM_AUDIO_WIDGET_CAP_CP(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_CP_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_CP_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_LR_SWAP(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_LR_SWAP_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_LR_SWAP_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_POWER_CTRL(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_POWER_CTRL_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_POWER_CTRL_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_DIGITAL_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_CONN_LIST(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_CONN_LIST_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_CONN_LIST_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_UNSOL_CAP(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_UNSOL_CAP_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_UNSOL_CAP_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_PROC_WIDGET(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_PROC_WIDGET_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_PROC_WIDGET_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_STRIPE_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_FORMAT_OVR(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_FORMAT_OVR_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_FORMAT_OVR_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_AMP_OVR(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_AMP_OVR_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_AMP_OVR_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_OUT_AMP(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_OUT_AMP_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_OUT_AMP_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_IN_AMP(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_IN_AMP_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_IN_AMP_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_STEREO(param) \ (((param) & HDA_PARAM_AUDIO_WIDGET_CAP_STEREO_MASK) >> \ HDA_PARAM_AUDIO_WIDGET_CAP_STEREO_SHIFT) #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_OUTPUT 0x0 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_INPUT 0x1 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_MIXER 0x2 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_AUDIO_SELECTOR 0x3 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_PIN_COMPLEX 0x4 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_POWER_WIDGET 0x5 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_VOLUME_WIDGET 0x6 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_BEEP_WIDGET 0x7 #define HDA_PARAM_AUDIO_WIDGET_CAP_TYPE_VENDOR_WIDGET 0xf /* Supported PCM Size, Rates */ #define HDA_PARAM_SUPP_PCM_SIZE_RATE 0x0a #define HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT_MASK 0x00100000 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT_SHIFT 20 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT_MASK 0x00080000 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT_SHIFT 19 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT_MASK 0x00040000 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT_SHIFT 18 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_16BIT_MASK 0x00020000 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_16BIT_SHIFT 17 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_8BIT_MASK 0x00010000 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_8BIT_SHIFT 16 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_8KHZ_MASK 0x00000001 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_8KHZ_SHIFT 0 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_11KHZ_MASK 0x00000002 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_11KHZ_SHIFT 1 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_16KHZ_MASK 0x00000004 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_16KHZ_SHIFT 2 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_22KHZ_MASK 0x00000008 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_22KHZ_SHIFT 3 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_32KHZ_MASK 0x00000010 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_32KHZ_SHIFT 4 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_44KHZ_MASK 0x00000020 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_44KHZ_SHIFT 5 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_48KHZ_MASK 0x00000040 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_48KHZ_SHIFT 6 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_88KHZ_MASK 0x00000080 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_88KHZ_SHIFT 7 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_96KHZ_MASK 0x00000100 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_96KHZ_SHIFT 8 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_176KHZ_MASK 0x00000200 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_176KHZ_SHIFT 9 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_192KHZ_MASK 0x00000400 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_192KHZ_SHIFT 10 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_384KHZ_MASK 0x00000800 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_384KHZ_SHIFT 11 #define HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_32BIT_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_24BIT_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_20BIT_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_16BIT(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_16BIT_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_16BIT_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_8BIT(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_8BIT_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_8BIT_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_8KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_8KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_8KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_11KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_11KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_11KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_16KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_16KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_16KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_22KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_22KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_22KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_32KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_32KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_32KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_44KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_44KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_44KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_48KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_48KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_48KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_88KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_88KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_88KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_96KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_96KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_96KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_176KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_176KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_176KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_192KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_192KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_192KHZ_SHIFT) #define HDA_PARAM_SUPP_PCM_SIZE_RATE_384KHZ(param) \ (((param) & HDA_PARAM_SUPP_PCM_SIZE_RATE_384KHZ_MASK) >> \ HDA_PARAM_SUPP_PCM_SIZE_RATE_384KHZ_SHIFT) /* Supported Stream Formats */ #define HDA_PARAM_SUPP_STREAM_FORMATS 0x0b #define HDA_PARAM_SUPP_STREAM_FORMATS_AC3_MASK 0x00000004 #define HDA_PARAM_SUPP_STREAM_FORMATS_AC3_SHIFT 2 #define HDA_PARAM_SUPP_STREAM_FORMATS_FLOAT32_MASK 0x00000002 #define HDA_PARAM_SUPP_STREAM_FORMATS_FLOAT32_SHIFT 1 #define HDA_PARAM_SUPP_STREAM_FORMATS_PCM_MASK 0x00000001 #define HDA_PARAM_SUPP_STREAM_FORMATS_PCM_SHIFT 0 #define HDA_PARAM_SUPP_STREAM_FORMATS_AC3(param) \ (((param) & HDA_PARAM_SUPP_STREAM_FORMATS_AC3_MASK) >> \ HDA_PARAM_SUPP_STREAM_FORMATS_AC3_SHIFT) #define HDA_PARAM_SUPP_STREAM_FORMATS_FLOAT32(param) \ (((param) & HDA_PARAM_SUPP_STREAM_FORMATS_FLOAT32_MASK) >> \ HDA_PARAM_SUPP_STREAM_FORMATS_FLOAT32_SHIFT) #define HDA_PARAM_SUPP_STREAM_FORMATS_PCM(param) \ (((param) & HDA_PARAM_SUPP_STREAM_FORMATS_PCM_MASK) >> \ HDA_PARAM_SUPP_STREAM_FORMATS_PCM_SHIFT) /* Pin Capabilities */ #define HDA_PARAM_PIN_CAP 0x0c #define HDA_PARAM_PIN_CAP_HBR_MASK 0x08000000 #define HDA_PARAM_PIN_CAP_HBR_SHIFT 27 #define HDA_PARAM_PIN_CAP_DP_MASK 0x01000000 #define HDA_PARAM_PIN_CAP_DP_SHIFT 24 #define HDA_PARAM_PIN_CAP_EAPD_CAP_MASK 0x00010000 #define HDA_PARAM_PIN_CAP_EAPD_CAP_SHIFT 16 #define HDA_PARAM_PIN_CAP_VREF_CTRL_MASK 0x0000ff00 #define HDA_PARAM_PIN_CAP_VREF_CTRL_SHIFT 8 #define HDA_PARAM_PIN_CAP_VREF_CTRL_100_MASK 0x00002000 #define HDA_PARAM_PIN_CAP_VREF_CTRL_100_SHIFT 13 #define HDA_PARAM_PIN_CAP_VREF_CTRL_80_MASK 0x00001000 #define HDA_PARAM_PIN_CAP_VREF_CTRL_80_SHIFT 12 #define HDA_PARAM_PIN_CAP_VREF_CTRL_GROUND_MASK 0x00000400 #define HDA_PARAM_PIN_CAP_VREF_CTRL_GROUND_SHIFT 10 #define HDA_PARAM_PIN_CAP_VREF_CTRL_50_MASK 0x00000200 #define HDA_PARAM_PIN_CAP_VREF_CTRL_50_SHIFT 9 #define HDA_PARAM_PIN_CAP_VREF_CTRL_HIZ_MASK 0x00000100 #define HDA_PARAM_PIN_CAP_VREF_CTRL_HIZ_SHIFT 8 #define HDA_PARAM_PIN_CAP_HDMI_MASK 0x00000080 #define HDA_PARAM_PIN_CAP_HDMI_SHIFT 7 #define HDA_PARAM_PIN_CAP_BALANCED_IO_PINS_MASK 0x00000040 #define HDA_PARAM_PIN_CAP_BALANCED_IO_PINS_SHIFT 6 #define HDA_PARAM_PIN_CAP_INPUT_CAP_MASK 0x00000020 #define HDA_PARAM_PIN_CAP_INPUT_CAP_SHIFT 5 #define HDA_PARAM_PIN_CAP_OUTPUT_CAP_MASK 0x00000010 #define HDA_PARAM_PIN_CAP_OUTPUT_CAP_SHIFT 4 #define HDA_PARAM_PIN_CAP_HEADPHONE_CAP_MASK 0x00000008 #define HDA_PARAM_PIN_CAP_HEADPHONE_CAP_SHIFT 3 #define HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP_MASK 0x00000004 #define HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP_SHIFT 2 #define HDA_PARAM_PIN_CAP_TRIGGER_REQD_MASK 0x00000002 #define HDA_PARAM_PIN_CAP_TRIGGER_REQD_SHIFT 1 #define HDA_PARAM_PIN_CAP_IMP_SENSE_CAP_MASK 0x00000001 #define HDA_PARAM_PIN_CAP_IMP_SENSE_CAP_SHIFT 0 #define HDA_PARAM_PIN_CAP_HBR(param) \ (((param) & HDA_PARAM_PIN_CAP_HBR_MASK) >> \ HDA_PARAM_PIN_CAP_HBR_SHIFT) #define HDA_PARAM_PIN_CAP_DP(param) \ (((param) & HDA_PARAM_PIN_CAP_DP_MASK) >> \ HDA_PARAM_PIN_CAP_DP_SHIFT) #define HDA_PARAM_PIN_CAP_EAPD_CAP(param) \ (((param) & HDA_PARAM_PIN_CAP_EAPD_CAP_MASK) >> \ HDA_PARAM_PIN_CAP_EAPD_CAP_SHIFT) #define HDA_PARAM_PIN_CAP_VREF_CTRL(param) \ (((param) & HDA_PARAM_PIN_CAP_VREF_CTRL_MASK) >> \ HDA_PARAM_PIN_CAP_VREF_CTRL_SHIFT) #define HDA_PARAM_PIN_CAP_VREF_CTRL_100(param) \ (((param) & HDA_PARAM_PIN_CAP_VREF_CTRL_100_MASK) >> \ HDA_PARAM_PIN_CAP_VREF_CTRL_100_SHIFT) #define HDA_PARAM_PIN_CAP_VREF_CTRL_80(param) \ (((param) & HDA_PARAM_PIN_CAP_VREF_CTRL_80_MASK) >> \ HDA_PARAM_PIN_CAP_VREF_CTRL_80_SHIFT) #define HDA_PARAM_PIN_CAP_VREF_CTRL_GROUND(param) \ (((param) & HDA_PARAM_PIN_CAP_VREF_CTRL_GROUND_MASK) >> \ HDA_PARAM_PIN_CAP_VREF_CTRL_GROUND_SHIFT) #define HDA_PARAM_PIN_CAP_VREF_CTRL_50(param) \ (((param) & HDA_PARAM_PIN_CAP_VREF_CTRL_50_MASK) >> \ HDA_PARAM_PIN_CAP_VREF_CTRL_50_SHIFT) #define HDA_PARAM_PIN_CAP_VREF_CTRL_HIZ(param) \ (((param) & HDA_PARAM_PIN_CAP_VREF_CTRL_HIZ_MASK) >> \ HDA_PARAM_PIN_CAP_VREF_CTRL_HIZ_SHIFT) #define HDA_PARAM_PIN_CAP_HDMI(param) \ (((param) & HDA_PARAM_PIN_CAP_HDMI_MASK) >> \ HDA_PARAM_PIN_CAP_HDMI_SHIFT) #define HDA_PARAM_PIN_CAP_BALANCED_IO_PINS(param) \ (((param) & HDA_PARAM_PIN_CAP_BALANCED_IO_PINS_MASK) >> \ HDA_PARAM_PIN_CAP_BALANCED_IO_PINS_SHIFT) #define HDA_PARAM_PIN_CAP_INPUT_CAP(param) \ (((param) & HDA_PARAM_PIN_CAP_INPUT_CAP_MASK) >> \ HDA_PARAM_PIN_CAP_INPUT_CAP_SHIFT) #define HDA_PARAM_PIN_CAP_OUTPUT_CAP(param) \ (((param) & HDA_PARAM_PIN_CAP_OUTPUT_CAP_MASK) >> \ HDA_PARAM_PIN_CAP_OUTPUT_CAP_SHIFT) #define HDA_PARAM_PIN_CAP_HEADPHONE_CAP(param) \ (((param) & HDA_PARAM_PIN_CAP_HEADPHONE_CAP_MASK) >> \ HDA_PARAM_PIN_CAP_HEADPHONE_CAP_SHIFT) #define HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP(param) \ (((param) & HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP_MASK) >> \ HDA_PARAM_PIN_CAP_PRESENCE_DETECT_CAP_SHIFT) #define HDA_PARAM_PIN_CAP_TRIGGER_REQD(param) \ (((param) & HDA_PARAM_PIN_CAP_TRIGGER_REQD_MASK) >> \ HDA_PARAM_PIN_CAP_TRIGGER_REQD_SHIFT) #define HDA_PARAM_PIN_CAP_IMP_SENSE_CAP(param) \ (((param) & HDA_PARAM_PIN_CAP_IMP_SENSE_CAP_MASK) >> \ HDA_PARAM_PIN_CAP_IMP_SENSE_CAP_SHIFT) /* Input Amplifier Capabilities */ #define HDA_PARAM_INPUT_AMP_CAP 0x0d #define HDA_PARAM_INPUT_AMP_CAP_MUTE_CAP_MASK 0x80000000 #define HDA_PARAM_INPUT_AMP_CAP_MUTE_CAP_SHIFT 31 #define HDA_PARAM_INPUT_AMP_CAP_STEPSIZE_MASK 0x007f0000 #define HDA_PARAM_INPUT_AMP_CAP_STEPSIZE_SHIFT 16 #define HDA_PARAM_INPUT_AMP_CAP_NUMSTEPS_MASK 0x00007f00 #define HDA_PARAM_INPUT_AMP_CAP_NUMSTEPS_SHIFT 8 #define HDA_PARAM_INPUT_AMP_CAP_OFFSET_MASK 0x0000007f #define HDA_PARAM_INPUT_AMP_CAP_OFFSET_SHIFT 0 #define HDA_PARAM_INPUT_AMP_CAP_MUTE_CAP(param) \ (((param) & HDA_PARAM_INPUT_AMP_CAP_MUTE_CAP_MASK) >> \ HDA_PARAM_INPUT_AMP_CAP_MUTE_CAP_SHIFT) #define HDA_PARAM_INPUT_AMP_CAP_STEPSIZE(param) \ (((param) & HDA_PARAM_INPUT_AMP_CAP_STEPSIZE_MASK) >> \ HDA_PARAM_INPUT_AMP_CAP_STEPSIZE_SHIFT) #define HDA_PARAM_INPUT_AMP_CAP_NUMSTEPS(param) \ (((param) & HDA_PARAM_INPUT_AMP_CAP_NUMSTEPS_MASK) >> \ HDA_PARAM_INPUT_AMP_CAP_NUMSTEPS_SHIFT) #define HDA_PARAM_INPUT_AMP_CAP_OFFSET(param) \ (((param) & HDA_PARAM_INPUT_AMP_CAP_OFFSET_MASK) >> \ HDA_PARAM_INPUT_AMP_CAP_OFFSET_SHIFT) /* Output Amplifier Capabilities */ #define HDA_PARAM_OUTPUT_AMP_CAP 0x12 #define HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP_MASK 0x80000000 #define HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP_SHIFT 31 #define HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE_MASK 0x007f0000 #define HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE_SHIFT 16 #define HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS_MASK 0x00007f00 #define HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS_SHIFT 8 #define HDA_PARAM_OUTPUT_AMP_CAP_OFFSET_MASK 0x0000007f #define HDA_PARAM_OUTPUT_AMP_CAP_OFFSET_SHIFT 0 #define HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP(param) \ (((param) & HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP_MASK) >> \ HDA_PARAM_OUTPUT_AMP_CAP_MUTE_CAP_SHIFT) #define HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE(param) \ (((param) & HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE_MASK) >> \ HDA_PARAM_OUTPUT_AMP_CAP_STEPSIZE_SHIFT) #define HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS(param) \ (((param) & HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS_MASK) >> \ HDA_PARAM_OUTPUT_AMP_CAP_NUMSTEPS_SHIFT) #define HDA_PARAM_OUTPUT_AMP_CAP_OFFSET(param) \ (((param) & HDA_PARAM_OUTPUT_AMP_CAP_OFFSET_MASK) >> \ HDA_PARAM_OUTPUT_AMP_CAP_OFFSET_SHIFT) /* Connection List Length */ #define HDA_PARAM_CONN_LIST_LENGTH 0x0e #define HDA_PARAM_CONN_LIST_LENGTH_LONG_FORM_MASK 0x00000080 #define HDA_PARAM_CONN_LIST_LENGTH_LONG_FORM_SHIFT 7 #define HDA_PARAM_CONN_LIST_LENGTH_LIST_LENGTH_MASK 0x0000007f #define HDA_PARAM_CONN_LIST_LENGTH_LIST_LENGTH_SHIFT 0 #define HDA_PARAM_CONN_LIST_LENGTH_LONG_FORM(param) \ (((param) & HDA_PARAM_CONN_LIST_LENGTH_LONG_FORM_MASK) >> \ HDA_PARAM_CONN_LIST_LENGTH_LONG_FORM_SHIFT) #define HDA_PARAM_CONN_LIST_LENGTH_LIST_LENGTH(param) \ (((param) & HDA_PARAM_CONN_LIST_LENGTH_LIST_LENGTH_MASK) >> \ HDA_PARAM_CONN_LIST_LENGTH_LIST_LENGTH_SHIFT) /* Supported Power States */ #define HDA_PARAM_SUPP_POWER_STATES 0x0f #define HDA_PARAM_SUPP_POWER_STATES_D3_MASK 0x00000008 #define HDA_PARAM_SUPP_POWER_STATES_D3_SHIFT 3 #define HDA_PARAM_SUPP_POWER_STATES_D2_MASK 0x00000004 #define HDA_PARAM_SUPP_POWER_STATES_D2_SHIFT 2 #define HDA_PARAM_SUPP_POWER_STATES_D1_MASK 0x00000002 #define HDA_PARAM_SUPP_POWER_STATES_D1_SHIFT 1 #define HDA_PARAM_SUPP_POWER_STATES_D0_MASK 0x00000001 #define HDA_PARAM_SUPP_POWER_STATES_D0_SHIFT 0 #define HDA_PARAM_SUPP_POWER_STATES_D3(param) \ (((param) & HDA_PARAM_SUPP_POWER_STATES_D3_MASK) >> \ HDA_PARAM_SUPP_POWER_STATES_D3_SHIFT) #define HDA_PARAM_SUPP_POWER_STATES_D2(param) \ (((param) & HDA_PARAM_SUPP_POWER_STATES_D2_MASK) >> \ HDA_PARAM_SUPP_POWER_STATES_D2_SHIFT) #define HDA_PARAM_SUPP_POWER_STATES_D1(param) \ (((param) & HDA_PARAM_SUPP_POWER_STATES_D1_MASK) >> \ HDA_PARAM_SUPP_POWER_STATES_D1_SHIFT) #define HDA_PARAM_SUPP_POWER_STATES_D0(param) \ (((param) & HDA_PARAM_SUPP_POWER_STATES_D0_MASK) >> \ HDA_PARAM_SUPP_POWER_STATES_D0_SHIFT) /* Processing Capabilities */ #define HDA_PARAM_PROCESSING_CAP 0x10 #define HDA_PARAM_PROCESSING_CAP_NUMCOEFF_MASK 0x0000ff00 #define HDA_PARAM_PROCESSING_CAP_NUMCOEFF_SHIFT 8 #define HDA_PARAM_PROCESSING_CAP_BENIGN_MASK 0x00000001 #define HDA_PARAM_PROCESSING_CAP_BENIGN_SHIFT 0 #define HDA_PARAM_PROCESSING_CAP_NUMCOEFF(param) \ (((param) & HDA_PARAM_PROCESSING_CAP_NUMCOEFF_MASK) >> \ HDA_PARAM_PROCESSING_CAP_NUMCOEFF_SHIFT) #define HDA_PARAM_PROCESSING_CAP_BENIGN(param) \ (((param) & HDA_PARAM_PROCESSING_CAP_BENIGN_MASK) >> \ HDA_PARAM_PROCESSING_CAP_BENIGN_SHIFT) /* GPIO Count */ #define HDA_PARAM_GPIO_COUNT 0x11 #define HDA_PARAM_GPIO_COUNT_GPI_WAKE_MASK 0x80000000 #define HDA_PARAM_GPIO_COUNT_GPI_WAKE_SHIFT 31 #define HDA_PARAM_GPIO_COUNT_GPI_UNSOL_MASK 0x40000000 #define HDA_PARAM_GPIO_COUNT_GPI_UNSOL_SHIFT 30 #define HDA_PARAM_GPIO_COUNT_NUM_GPI_MASK 0x00ff0000 #define HDA_PARAM_GPIO_COUNT_NUM_GPI_SHIFT 16 #define HDA_PARAM_GPIO_COUNT_NUM_GPO_MASK 0x0000ff00 #define HDA_PARAM_GPIO_COUNT_NUM_GPO_SHIFT 8 #define HDA_PARAM_GPIO_COUNT_NUM_GPIO_MASK 0x000000ff #define HDA_PARAM_GPIO_COUNT_NUM_GPIO_SHIFT 0 #define HDA_PARAM_GPIO_COUNT_GPI_WAKE(param) \ (((param) & HDA_PARAM_GPIO_COUNT_GPI_WAKE_MASK) >> \ HDA_PARAM_GPIO_COUNT_GPI_WAKE_SHIFT) #define HDA_PARAM_GPIO_COUNT_GPI_UNSOL(param) \ (((param) & HDA_PARAM_GPIO_COUNT_GPI_UNSOL_MASK) >> \ HDA_PARAM_GPIO_COUNT_GPI_UNSOL_SHIFT) #define HDA_PARAM_GPIO_COUNT_NUM_GPI(param) \ (((param) & HDA_PARAM_GPIO_COUNT_NUM_GPI_MASK) >> \ HDA_PARAM_GPIO_COUNT_NUM_GPI_SHIFT) #define HDA_PARAM_GPIO_COUNT_NUM_GPO(param) \ (((param) & HDA_PARAM_GPIO_COUNT_NUM_GPO_MASK) >> \ HDA_PARAM_GPIO_COUNT_NUM_GPO_SHIFT) #define HDA_PARAM_GPIO_COUNT_NUM_GPIO(param) \ (((param) & HDA_PARAM_GPIO_COUNT_NUM_GPIO_MASK) >> \ HDA_PARAM_GPIO_COUNT_NUM_GPIO_SHIFT) /* Volume Knob Capabilities */ #define HDA_PARAM_VOLUME_KNOB_CAP 0x13 #define HDA_PARAM_VOLUME_KNOB_CAP_DELTA_MASK 0x00000080 #define HDA_PARAM_VOLUME_KNOB_CAP_DELTA_SHIFT 7 #define HDA_PARAM_VOLUME_KNOB_CAP_NUM_STEPS_MASK 0x0000007f #define HDA_PARAM_VOLUME_KNOB_CAP_NUM_STEPS_SHIFT 0 #define HDA_PARAM_VOLUME_KNOB_CAP_DELTA(param) \ (((param) & HDA_PARAM_VOLUME_KNOB_CAP_DELTA_MASK) >> \ HDA_PARAM_VOLUME_KNOB_CAP_DELTA_SHIFT) #define HDA_PARAM_VOLUME_KNOB_CAP_NUM_STEPS(param) \ (((param) & HDA_PARAM_VOLUME_KNOB_CAP_NUM_STEPS_MASK) >> \ HDA_PARAM_VOLUME_KNOB_CAP_NUM_STEPS_SHIFT) #define HDA_CONFIG_DEFAULTCONF_SEQUENCE_MASK 0x0000000f #define HDA_CONFIG_DEFAULTCONF_SEQUENCE_SHIFT 0 #define HDA_CONFIG_DEFAULTCONF_ASSOCIATION_MASK 0x000000f0 #define HDA_CONFIG_DEFAULTCONF_ASSOCIATION_SHIFT 4 #define HDA_CONFIG_DEFAULTCONF_MISC_MASK 0x00000f00 #define HDA_CONFIG_DEFAULTCONF_MISC_SHIFT 8 #define HDA_CONFIG_DEFAULTCONF_COLOR_MASK 0x0000f000 #define HDA_CONFIG_DEFAULTCONF_COLOR_SHIFT 12 #define HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_MASK 0x000f0000 #define HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_SHIFT 16 #define HDA_CONFIG_DEFAULTCONF_DEVICE_MASK 0x00f00000 #define HDA_CONFIG_DEFAULTCONF_DEVICE_SHIFT 20 #define HDA_CONFIG_DEFAULTCONF_LOCATION_MASK 0x3f000000 #define HDA_CONFIG_DEFAULTCONF_LOCATION_SHIFT 24 #define HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_MASK 0xc0000000 #define HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_SHIFT 30 #define HDA_CONFIG_DEFAULTCONF_SEQUENCE(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_SEQUENCE_MASK) >> \ HDA_CONFIG_DEFAULTCONF_SEQUENCE_SHIFT) #define HDA_CONFIG_DEFAULTCONF_ASSOCIATION(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_ASSOCIATION_MASK) >> \ HDA_CONFIG_DEFAULTCONF_ASSOCIATION_SHIFT) #define HDA_CONFIG_DEFAULTCONF_MISC(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_MISC_MASK) >> \ HDA_CONFIG_DEFAULTCONF_MISC_SHIFT) #define HDA_CONFIG_DEFAULTCONF_COLOR(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_COLOR_MASK) >> \ HDA_CONFIG_DEFAULTCONF_COLOR_SHIFT) #define HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_MASK) >> \ HDA_CONFIG_DEFAULTCONF_CONNECTION_TYPE_SHIFT) #define HDA_CONFIG_DEFAULTCONF_DEVICE(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_DEVICE_MASK) >> \ HDA_CONFIG_DEFAULTCONF_DEVICE_SHIFT) #define HDA_CONFIG_DEFAULTCONF_LOCATION(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_LOCATION_MASK) >> \ HDA_CONFIG_DEFAULTCONF_LOCATION_SHIFT) #define HDA_CONFIG_DEFAULTCONF_CONNECTIVITY(conf) \ (((conf) & HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_MASK) >> \ HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_SHIFT) #define HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_JACK (0<<30) #define HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_NONE (1<<30) #define HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_FIXED (2<<30) #define HDA_CONFIG_DEFAULTCONF_CONNECTIVITY_BOTH (3<<30) #define HDA_CONFIG_DEFAULTCONF_DEVICE_LINE_OUT (0<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_SPEAKER (1<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_HP_OUT (2<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_CD (3<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_SPDIF_OUT (4<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_DIGITAL_OTHER_OUT (5<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_MODEM_LINE (6<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_MODEM_HANDSET (7<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_LINE_IN (8<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_AUX (9<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_MIC_IN (10<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_TELEPHONY (11<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_SPDIF_IN (12<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_DIGITAL_OTHER_IN (13<<20) #define HDA_CONFIG_DEFAULTCONF_DEVICE_OTHER (15<<20) #endif /* _HDA_REG_H_ */ diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 1fba90945afb..b65d273f4320 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -1,301 +1,301 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "inout.h" SET_DECLARE(inout_port_set, struct inout_port); #define MAX_IOPORTS (1 << 16) #define VERIFY_IOPORT(port, size) \ assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS) static struct { const char *name; int flags; inout_func_t handler; void *arg; } inout_handlers[MAX_IOPORTS]; static int default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { if (in) { switch (bytes) { case 4: *eax = 0xffffffff; break; case 2: *eax = 0xffff; break; case 1: *eax = 0xff; break; } } return (0); } -static void +static void register_default_iohandler(int start, int size) { struct inout_port iop; - + VERIFY_IOPORT(start, size); bzero(&iop, sizeof(iop)); iop.name = "default"; iop.port = start; iop.size = size; iop.flags = IOPORT_F_INOUT | IOPORT_F_DEFAULT; iop.handler = default_inout; register_inout(&iop); } int emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit) { int addrsize, bytes, flags, in, port, prot, rep; uint32_t eax, val; inout_func_t handler; void *arg; int error, fault, retval; enum vm_reg_name idxreg; uint64_t gla, index, iterations, count; struct vm_inout_str *vis; struct iovec iov[2]; bytes = vmexit->u.inout.bytes; in = vmexit->u.inout.in; port = vmexit->u.inout.port; assert(port < MAX_IOPORTS); assert(bytes == 1 || bytes == 2 || bytes == 4); handler = inout_handlers[port].handler; if (handler == default_inout && get_config_bool_default("x86.strictio", false)) return (-1); flags = inout_handlers[port].flags; arg = inout_handlers[port].arg; if (in) { if (!(flags & IOPORT_F_IN)) return (-1); } else { if (!(flags & IOPORT_F_OUT)) return (-1); } retval = 0; if (vmexit->u.inout.string) { vis = &vmexit->u.inout_str; rep = vis->inout.rep; addrsize = vis->addrsize; prot = in ? PROT_WRITE : PROT_READ; assert(addrsize == 2 || addrsize == 4 || addrsize == 8); /* Index register */ idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; index = vis->index & vie_size2mask(addrsize); /* Count register */ count = vis->count & vie_size2mask(addrsize); /* Limit number of back-to-back in/out emulations to 16 */ iterations = MIN(count, 16); while (iterations > 0) { assert(retval == 0); if (vie_calculate_gla(vis->paging.cpu_mode, vis->seg_name, &vis->seg_desc, index, bytes, addrsize, prot, &gla)) { vm_inject_gp(ctx, vcpu); break; } error = vm_copy_setup(ctx, vcpu, &vis->paging, gla, bytes, prot, iov, nitems(iov), &fault); if (error) { retval = -1; /* Unrecoverable error */ break; } else if (fault) { retval = 0; /* Resume guest to handle fault */ break; } if (vie_alignment_check(vis->paging.cpl, bytes, vis->cr0, vis->rflags, gla)) { vm_inject_ac(ctx, vcpu, 0); break; } val = 0; if (!in) vm_copyin(ctx, vcpu, iov, &val, bytes); retval = handler(ctx, vcpu, in, port, bytes, &val, arg); if (retval != 0) break; if (in) vm_copyout(ctx, vcpu, &val, iov, bytes); /* Update index */ if (vis->rflags & PSL_D) index -= bytes; else index += bytes; count--; iterations--; } /* Update index register */ error = vie_update_register(ctx, vcpu, idxreg, index, addrsize); assert(error == 0); /* * Update count register only if the instruction had a repeat * prefix. */ if (rep) { error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX, count, addrsize); assert(error == 0); } /* Restart the instruction if more iterations remain */ if (retval == 0 && count != 0) { error = vm_restart_instruction(ctx, vcpu); assert(error == 0); } } else { eax = vmexit->u.inout.eax; val = eax & vie_size2mask(bytes); retval = handler(ctx, vcpu, in, port, bytes, &val, arg); if (retval == 0 && in) { eax &= ~vie_size2mask(bytes); eax |= val & vie_size2mask(bytes); error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); assert(error == 0); } } return (retval); } void init_inout(void) { struct inout_port **iopp, *iop; /* * Set up the default handler for all ports */ register_default_iohandler(0, MAX_IOPORTS); /* * Overwrite with specified handlers */ SET_FOREACH(iopp, inout_port_set) { iop = *iopp; assert(iop->port < MAX_IOPORTS); inout_handlers[iop->port].name = iop->name; inout_handlers[iop->port].flags = iop->flags; inout_handlers[iop->port].handler = iop->handler; inout_handlers[iop->port].arg = NULL; } } int register_inout(struct inout_port *iop) { int i; VERIFY_IOPORT(iop->port, iop->size); /* * Verify that the new registration is not overwriting an already * allocated i/o range. */ if ((iop->flags & IOPORT_F_DEFAULT) == 0) { for (i = iop->port; i < iop->port + iop->size; i++) { if ((inout_handlers[i].flags & IOPORT_F_DEFAULT) == 0) return (-1); } } for (i = iop->port; i < iop->port + iop->size; i++) { inout_handlers[i].name = iop->name; inout_handlers[i].flags = iop->flags; inout_handlers[i].handler = iop->handler; inout_handlers[i].arg = iop->arg; } return (0); } int unregister_inout(struct inout_port *iop) { VERIFY_IOPORT(iop->port, iop->size); assert(inout_handlers[iop->port].name == iop->name); register_default_iohandler(iop->port, iop->size); return (0); } diff --git a/usr.sbin/bhyve/inout.h b/usr.sbin/bhyve/inout.h index afbae34c2dd0..19b2975b94e5 100644 --- a/usr.sbin/bhyve/inout.h +++ b/usr.sbin/bhyve/inout.h @@ -1,79 +1,79 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _INOUT_H_ #define _INOUT_H_ #include struct vmctx; struct vm_exit; /* * inout emulation handlers return 0 on success and -1 on failure. */ typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg); struct inout_port { const char *name; int port; int size; int flags; inout_func_t handler; void *arg; }; #define IOPORT_F_IN 0x1 #define IOPORT_F_OUT 0x2 #define IOPORT_F_INOUT (IOPORT_F_IN | IOPORT_F_OUT) /* * The following flags are used internally and must not be used by * device models. */ #define IOPORT_F_DEFAULT 0x80000000 /* claimed by default handler */ #define INOUT_PORT(name, port, flags, handler) \ static struct inout_port __CONCAT(__inout_port, __LINE__) = { \ #name, \ (port), \ 1, \ (flags), \ (handler), \ 0 \ }; \ DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__)) - + void init_inout(void); int emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit); int register_inout(struct inout_port *iop); int unregister_inout(struct inout_port *iop); #endif /* _INOUT_H_ */ diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c index 55f851590128..49a468305769 100644 --- a/usr.sbin/bhyve/mem.c +++ b/usr.sbin/bhyve/mem.c @@ -1,376 +1,376 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Memory ranges are represented with an RB tree. On insertion, the range * is checked for overlaps. On lookup, the key has the same base and limit * so it can be searched within the range. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "mem.h" struct mmio_rb_range { RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ struct mem_range mr_param; uint64_t mr_base; uint64_t mr_end; }; struct mmio_rb_tree; RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; /* * Per-vCPU cache. Since most accesses from a vCPU will be to * consecutive addresses in a range, it makes sense to cache the * result of a lookup. */ static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; static pthread_rwlock_t mmio_rwlock; static int mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) { if (a->mr_end < b->mr_base) return (-1); else if (a->mr_base > b->mr_end) return (1); return (0); } static int mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr, struct mmio_rb_range **entry) { struct mmio_rb_range find, *res; find.mr_base = find.mr_end = addr; res = RB_FIND(mmio_rb_tree, rbt, &find); if (res != NULL) { *entry = res; return (0); } - + return (ENOENT); } static int mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new) { struct mmio_rb_range *overlap; overlap = RB_INSERT(mmio_rb_tree, rbt, new); if (overlap != NULL) { #ifdef RB_DEBUG printf("overlap detected: new %lx:%lx, tree %lx:%lx, '%s' " "claims region already claimed for '%s'\n", new->mr_base, new->mr_end, overlap->mr_base, overlap->mr_end, new->mr_param.name, overlap->mr_param.name); #endif return (EEXIST); } return (0); } #if 0 static void mmio_rb_dump(struct mmio_rb_tree *rbt) { int perror; struct mmio_rb_range *np; pthread_rwlock_rdlock(&mmio_rwlock); RB_FOREACH(np, mmio_rb_tree, rbt) { printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, np->mr_param.name); } perror = pthread_rwlock_unlock(&mmio_rwlock); assert(perror == 0); } #endif RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); typedef int (mem_cb_t)(struct vmctx *ctx, int vcpu, uint64_t gpa, struct mem_range *mr, void *arg); static int mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) { int error; struct mem_range *mr = arg; error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size, rval, mr->arg1, mr->arg2); return (error); } static int mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) { int error; struct mem_range *mr = arg; error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size, &wval, mr->arg1, mr->arg2); return (error); } static int access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb, void *arg) { struct mmio_rb_range *entry; int err, perror, immutable; - + pthread_rwlock_rdlock(&mmio_rwlock); /* * First check the per-vCPU cache */ if (mmio_hint[vcpu] && paddr >= mmio_hint[vcpu]->mr_base && paddr <= mmio_hint[vcpu]->mr_end) { entry = mmio_hint[vcpu]; } else entry = NULL; if (entry == NULL) { if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { /* Update the per-vCPU cache */ - mmio_hint[vcpu] = entry; + mmio_hint[vcpu] = entry; } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { perror = pthread_rwlock_unlock(&mmio_rwlock); assert(perror == 0); return (ESRCH); } } assert(entry != NULL); /* * An 'immutable' memory range is guaranteed to be never removed * so there is no need to hold 'mmio_rwlock' while calling the * handler. * * XXX writes to the PCIR_COMMAND register can cause register_mem() * to be called. If the guest is using PCI extended config space * to modify the PCIR_COMMAND register then register_mem() can * deadlock on 'mmio_rwlock'. However by registering the extended * config space window as 'immutable' the deadlock can be avoided. */ immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE); if (immutable) { perror = pthread_rwlock_unlock(&mmio_rwlock); assert(perror == 0); } err = cb(ctx, vcpu, paddr, &entry->mr_param, arg); if (!immutable) { perror = pthread_rwlock_unlock(&mmio_rwlock); assert(perror == 0); } return (err); } struct emulate_mem_args { struct vie *vie; struct vm_guest_paging *paging; }; static int emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, void *arg) { struct emulate_mem_args *ema; ema = arg; return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging, mem_read, mem_write, mr)); } int emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, struct vm_guest_paging *paging) { struct emulate_mem_args ema; ema.vie = vie; ema.paging = paging; return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema)); } struct rw_mem_args { uint64_t *val; int size; int operation; }; static int rw_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, void *arg) { struct rw_mem_args *rma; rma = arg; return (mr->handler(ctx, vcpu, rma->operation, paddr, rma->size, rma->val, mr->arg1, mr->arg2)); } int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size) { struct rw_mem_args rma; rma.val = rval; rma.size = size; rma.operation = MEM_F_READ; return (access_memory(ctx, vcpu, gpa, rw_mem_cb, &rma)); } int write_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size) { struct rw_mem_args rma; rma.val = &wval; rma.size = size; rma.operation = MEM_F_WRITE; return (access_memory(ctx, vcpu, gpa, rw_mem_cb, &rma)); } static int register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) { struct mmio_rb_range *entry, *mrp; int err, perror; err = 0; mrp = malloc(sizeof(struct mmio_rb_range)); if (mrp == NULL) { warn("%s: couldn't allocate memory for mrp\n", __func__); err = ENOMEM; } else { mrp->mr_param = *memp; mrp->mr_base = memp->base; mrp->mr_end = memp->base + memp->size - 1; pthread_rwlock_wrlock(&mmio_rwlock); if (mmio_rb_lookup(rbt, memp->base, &entry) != 0) err = mmio_rb_add(rbt, mrp); perror = pthread_rwlock_unlock(&mmio_rwlock); assert(perror == 0); if (err) free(mrp); } return (err); } int register_mem(struct mem_range *memp) { return (register_mem_int(&mmio_rb_root, memp)); } int register_mem_fallback(struct mem_range *memp) { return (register_mem_int(&mmio_rb_fallback, memp)); } -int +int unregister_mem(struct mem_range *memp) { struct mem_range *mr; struct mmio_rb_range *entry = NULL; int err, perror, i; - + pthread_rwlock_wrlock(&mmio_rwlock); err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); if (err == 0) { mr = &entry->mr_param; assert(mr->name == memp->name); - assert(mr->base == memp->base && mr->size == memp->size); + assert(mr->base == memp->base && mr->size == memp->size); assert((mr->flags & MEM_F_IMMUTABLE) == 0); RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); - /* flush Per-vCPU cache */ + /* flush Per-vCPU cache */ for (i=0; i < VM_MAXCPU; i++) { if (mmio_hint[i] == entry) mmio_hint[i] = NULL; } } perror = pthread_rwlock_unlock(&mmio_rwlock); assert(perror == 0); if (entry) free(entry); - + return (err); } void init_mem(void) { RB_INIT(&mmio_rb_root); RB_INIT(&mmio_rb_fallback); pthread_rwlock_init(&mmio_rwlock, NULL); } diff --git a/usr.sbin/bhyve/mevent.c b/usr.sbin/bhyve/mevent.c index 5170251e008b..b7749ed67745 100644 --- a/usr.sbin/bhyve/mevent.c +++ b/usr.sbin/bhyve/mevent.c @@ -1,555 +1,555 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* - * Micro event library for FreeBSD, designed for a single i/o thread + * Micro event library for FreeBSD, designed for a single i/o thread * using kqueue, and having events be persistent by default. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include "mevent.h" #define MEVENT_MAX 64 static pthread_t mevent_tid; static pthread_once_t mevent_once = PTHREAD_ONCE_INIT; static int mevent_timid = 43; static int mevent_pipefd[2]; static int mfd; static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; struct mevent { void (*me_func)(int, enum ev_type, void *); #define me_msecs me_fd int me_fd; int me_timid; enum ev_type me_type; void *me_param; int me_cq; int me_state; /* Desired kevent flags. */ int me_closefd; int me_fflags; LIST_ENTRY(mevent) me_list; }; static LIST_HEAD(listhead, mevent) global_head, change_head; static void mevent_qlock(void) { pthread_mutex_lock(&mevent_lmutex); } static void mevent_qunlock(void) { pthread_mutex_unlock(&mevent_lmutex); } static void mevent_pipe_read(int fd, enum ev_type type, void *param) { char buf[MEVENT_MAX]; int status; /* * Drain the pipe read side. The fd is non-blocking so this is * safe to do. */ do { status = read(fd, buf, sizeof(buf)); } while (status == MEVENT_MAX); } static void mevent_notify(void) { char c = '\0'; - + /* * If calling from outside the i/o thread, write a byte on the * pipe to force the i/o thread to exit the blocking kevent call. */ if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { write(mevent_pipefd[1], &c, 1); } } static void mevent_init(void) { #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif mfd = kqueue(); assert(mfd > 0); #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_KQUEUE); if (caph_rights_limit(mfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif LIST_INIT(&change_head); LIST_INIT(&global_head); } static int mevent_kq_filter(struct mevent *mevp) { int retval; retval = 0; if (mevp->me_type == EVF_READ) retval = EVFILT_READ; if (mevp->me_type == EVF_WRITE) retval = EVFILT_WRITE; if (mevp->me_type == EVF_TIMER) retval = EVFILT_TIMER; if (mevp->me_type == EVF_SIGNAL) retval = EVFILT_SIGNAL; if (mevp->me_type == EVF_VNODE) retval = EVFILT_VNODE; return (retval); } static int mevent_kq_flags(struct mevent *mevp) { int retval; retval = mevp->me_state; if (mevp->me_type == EVF_VNODE) retval |= EV_CLEAR; return (retval); } static int mevent_kq_fflags(struct mevent *mevp) { int retval; retval = 0; switch (mevp->me_type) { case EVF_VNODE: if ((mevp->me_fflags & EVFF_ATTRIB) != 0) retval |= NOTE_ATTRIB; break; case EVF_READ: case EVF_WRITE: case EVF_TIMER: case EVF_SIGNAL: break; } return (retval); } static void mevent_populate(struct mevent *mevp, struct kevent *kev) { if (mevp->me_type == EVF_TIMER) { kev->ident = mevp->me_timid; kev->data = mevp->me_msecs; } else { kev->ident = mevp->me_fd; kev->data = 0; } kev->filter = mevent_kq_filter(mevp); kev->flags = mevent_kq_flags(mevp); kev->fflags = mevent_kq_fflags(mevp); kev->udata = mevp; } static int mevent_build(struct kevent *kev) { struct mevent *mevp, *tmpp; int i; i = 0; mevent_qlock(); LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { if (mevp->me_closefd) { /* * A close of the file descriptor will remove the * event */ close(mevp->me_fd); } else { assert((mevp->me_state & EV_ADD) == 0); mevent_populate(mevp, &kev[i]); i++; } mevp->me_cq = 0; LIST_REMOVE(mevp, me_list); if (mevp->me_state & EV_DELETE) { free(mevp); } else { LIST_INSERT_HEAD(&global_head, mevp, me_list); } assert(i < MEVENT_MAX); } mevent_qunlock(); return (i); } static void mevent_handle(struct kevent *kev, int numev) { struct mevent *mevp; int i; for (i = 0; i < numev; i++) { mevp = kev[i].udata; /* XXX check for EV_ERROR ? */ (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); } } static struct mevent * mevent_add_state(int tfd, enum ev_type type, void (*func)(int, enum ev_type, void *), void *param, int state, int fflags) { struct kevent kev; struct mevent *lp, *mevp; int ret; if (tfd < 0 || func == NULL) { return (NULL); } mevp = NULL; pthread_once(&mevent_once, mevent_init); mevent_qlock(); /* * Verify that the fd/type tuple is not present in any list */ LIST_FOREACH(lp, &global_head, me_list) { if (type != EVF_TIMER && lp->me_fd == tfd && lp->me_type == type) { goto exit; } } LIST_FOREACH(lp, &change_head, me_list) { if (type != EVF_TIMER && lp->me_fd == tfd && lp->me_type == type) { goto exit; } } /* * Allocate an entry and populate it. */ mevp = calloc(1, sizeof(struct mevent)); if (mevp == NULL) { goto exit; } if (type == EVF_TIMER) { mevp->me_msecs = tfd; mevp->me_timid = mevent_timid++; } else mevp->me_fd = tfd; mevp->me_type = type; mevp->me_func = func; mevp->me_param = param; mevp->me_state = state; mevp->me_fflags = fflags; /* * Try to add the event. If this fails, report the failure to * the caller. */ mevent_populate(mevp, &kev); ret = kevent(mfd, &kev, 1, NULL, 0, NULL); if (ret == -1) { free(mevp); mevp = NULL; goto exit; } mevp->me_state &= ~EV_ADD; LIST_INSERT_HEAD(&global_head, mevp, me_list); exit: mevent_qunlock(); return (mevp); } struct mevent * mevent_add(int tfd, enum ev_type type, void (*func)(int, enum ev_type, void *), void *param) { return (mevent_add_state(tfd, type, func, param, EV_ADD, 0)); } struct mevent * mevent_add_flags(int tfd, enum ev_type type, int fflags, void (*func)(int, enum ev_type, void *), void *param) { return (mevent_add_state(tfd, type, func, param, EV_ADD, fflags)); } struct mevent * mevent_add_disabled(int tfd, enum ev_type type, void (*func)(int, enum ev_type, void *), void *param) { return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE, 0)); } static int mevent_update(struct mevent *evp, bool enable) { int newstate; mevent_qlock(); /* * It's not possible to enable/disable a deleted event */ assert((evp->me_state & EV_DELETE) == 0); newstate = evp->me_state; if (enable) { newstate |= EV_ENABLE; newstate &= ~EV_DISABLE; } else { newstate |= EV_DISABLE; newstate &= ~EV_ENABLE; } /* * No update needed if state isn't changing */ if (evp->me_state != newstate) { evp->me_state = newstate; /* * Place the entry onto the changed list if not * already there. */ if (evp->me_cq == 0) { evp->me_cq = 1; LIST_REMOVE(evp, me_list); LIST_INSERT_HEAD(&change_head, evp, me_list); mevent_notify(); } } mevent_qunlock(); return (0); } int mevent_enable(struct mevent *evp) { return (mevent_update(evp, true)); } int mevent_disable(struct mevent *evp) { return (mevent_update(evp, false)); } static int mevent_delete_event(struct mevent *evp, int closefd) { mevent_qlock(); /* * Place the entry onto the changed list if not already there, and * mark as to be deleted. */ if (evp->me_cq == 0) { evp->me_cq = 1; LIST_REMOVE(evp, me_list); LIST_INSERT_HEAD(&change_head, evp, me_list); mevent_notify(); } evp->me_state = EV_DELETE; if (closefd) evp->me_closefd = 1; mevent_qunlock(); return (0); } int mevent_delete(struct mevent *evp) { return (mevent_delete_event(evp, 0)); } int mevent_delete_close(struct mevent *evp) { return (mevent_delete_event(evp, 1)); } static void mevent_set_name(void) { pthread_set_name_np(mevent_tid, "mevent"); } void mevent_dispatch(void) { struct kevent changelist[MEVENT_MAX]; struct kevent eventlist[MEVENT_MAX]; struct mevent *pipev; int numev; int ret; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif mevent_tid = pthread_self(); mevent_set_name(); pthread_once(&mevent_once, mevent_init); /* * Open the pipe that will be used for other threads to force * the blocking kqueue call to exit by writing to it. Set the * descriptor to non-blocking. */ ret = pipe(mevent_pipefd); if (ret < 0) { perror("pipe"); exit(0); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif /* * Add internal event handler for the pipe write fd */ pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); assert(pipev != NULL); for (;;) { /* * Build changelist if required. * XXX the changelist can be put into the blocking call * to eliminate the extra syscall. Currently better for * debug. */ numev = mevent_build(changelist); if (numev) { ret = kevent(mfd, changelist, numev, NULL, 0, NULL); if (ret == -1) { perror("Error return from kevent change"); } } /* * Block awaiting events */ ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); if (ret == -1 && errno != EINTR) { perror("Error return from kevent monitor"); } - + /* * Handle reported events */ mevent_handle(eventlist, ret); - } + } } diff --git a/usr.sbin/bhyve/mevent.h b/usr.sbin/bhyve/mevent.h index a26293867a09..8c3db9db54a0 100644 --- a/usr.sbin/bhyve/mevent.h +++ b/usr.sbin/bhyve/mevent.h @@ -1,63 +1,63 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MEVENT_H_ #define _MEVENT_H_ enum ev_type { EVF_READ, EVF_WRITE, EVF_TIMER, EVF_SIGNAL, EVF_VNODE, }; /* Filter flags for EVF_VNODE */ #define EVFF_ATTRIB 0x0001 struct mevent; -struct mevent *mevent_add(int fd, enum ev_type type, +struct mevent *mevent_add(int fd, enum ev_type type, void (*func)(int, enum ev_type, void *), void *param); struct mevent *mevent_add_flags(int fd, enum ev_type type, int fflags, void (*func)(int, enum ev_type, void *), void *param); struct mevent *mevent_add_disabled(int fd, enum ev_type type, void (*func)(int, enum ev_type, void *), void *param); int mevent_enable(struct mevent *evp); int mevent_disable(struct mevent *evp); int mevent_delete(struct mevent *evp); int mevent_delete_close(struct mevent *evp); void mevent_dispatch(void); #endif /* _MEVENT_H_ */ diff --git a/usr.sbin/bhyve/mevent_test.c b/usr.sbin/bhyve/mevent_test.c index 3a7aac98a65c..eda1c100e8a3 100644 --- a/usr.sbin/bhyve/mevent_test.c +++ b/usr.sbin/bhyve/mevent_test.c @@ -1,256 +1,256 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Test program for the micro event library. Set up a simple TCP echo * service. * * cc mevent_test.c mevent.c -lpthread */ #include #include #include #include #include #include #include #include #include #include #include "mevent.h" #define TEST_PORT 4321 static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER; static struct mevent *tevp; #define MEVENT_ECHO /* Number of timer events to capture */ #define TEVSZ 4096 uint64_t tevbuf[TEVSZ]; static void timer_print(void) { uint64_t min, max, diff, sum, tsc_freq; size_t len; int j; min = UINT64_MAX; max = 0; sum = 0; len = sizeof(tsc_freq); sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0); for (j = 1; j < TEVSZ; j++) { /* Convert a tsc diff into microseconds */ diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq; sum += diff; if (min > diff) min = diff; if (max < diff) max = diff; } printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max, sum/(TEVSZ - 1)); } static void timer_callback(int fd, enum ev_type type, void *param) { static int i; if (i >= TEVSZ) abort(); tevbuf[i++] = rdtsc(); if (i == TEVSZ) { mevent_delete(tevp); timer_print(); } } #ifdef MEVENT_ECHO struct esync { pthread_mutex_t e_mt; - pthread_cond_t e_cond; + pthread_cond_t e_cond; }; static void echoer_callback(int fd, enum ev_type type, void *param) { struct esync *sync = param; pthread_mutex_lock(&sync->e_mt); pthread_cond_signal(&sync->e_cond); pthread_mutex_unlock(&sync->e_mt); } static void * echoer(void *param) { struct esync sync; struct mevent *mev; char buf[128]; int fd = (int)(uintptr_t) param; int len; pthread_mutex_init(&sync.e_mt, NULL); pthread_cond_init(&sync.e_cond, NULL); pthread_mutex_lock(&sync.e_mt); mev = mevent_add(fd, EVF_READ, echoer_callback, &sync); if (mev == NULL) { printf("Could not allocate echoer event\n"); exit(4); } while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) { len = read(fd, buf, sizeof(buf)); if (len > 0) { write(fd, buf, len); write(0, buf, len); } else { break; } } mevent_delete_close(mev); pthread_mutex_unlock(&sync.e_mt); pthread_mutex_destroy(&sync.e_mt); pthread_cond_destroy(&sync.e_cond); return (NULL); } #else static void * echoer(void *param) { char buf[128]; int fd = (int)(uintptr_t) param; int len; while ((len = read(fd, buf, sizeof(buf))) > 0) { write(1, buf, len); } return (NULL); } #endif /* MEVENT_ECHO */ static void acceptor_callback(int fd, enum ev_type type, void *param) { pthread_mutex_lock(&accept_mutex); pthread_cond_signal(&accept_condvar); pthread_mutex_unlock(&accept_mutex); } static void * acceptor(void *param) { struct sockaddr_in sin; pthread_t tid; int news; int s; static int first; if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { perror("cannot create socket"); exit(4); } sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_ANY); sin.sin_port = htons(TEST_PORT); if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) { perror("cannot bind socket"); exit(4); } if (listen(s, 1) < 0) { perror("cannot listen socket"); exit(4); } (void) mevent_add(s, EVF_READ, acceptor_callback, NULL); pthread_mutex_lock(&accept_mutex); while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) { news = accept(s, NULL, NULL); if (news < 0) { perror("accept error"); } else { static int first = 1; if (first) { /* * Start a timer */ first = 0; tevp = mevent_add(1, EVF_TIMER, timer_callback, NULL); } printf("incoming connection, spawning thread\n"); pthread_create(&tid, NULL, echoer, (void *)(uintptr_t)news); } } return (NULL); } main() { pthread_t tid; pthread_create(&tid, NULL, acceptor, NULL); mevent_dispatch(); } diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c index e57c6daf9dd0..56582256a857 100644 --- a/usr.sbin/bhyve/mptbl.c +++ b/usr.sbin/bhyve/mptbl.c @@ -1,380 +1,380 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "acpi.h" #include "debug.h" #include "bhyverun.h" #include "mptbl.h" #include "pci_emul.h" #define MPTABLE_BASE 0xF0000 /* floating pointer length + maximum length of configuration table */ #define MPTABLE_MAX_LENGTH (65536 + 16) #define LAPIC_PADDR 0xFEE00000 #define LAPIC_VERSION 16 #define IOAPIC_PADDR 0xFEC00000 #define IOAPIC_VERSION 0x11 #define MP_SPECREV 4 #define MPFP_SIG "_MP_" /* Configuration header defines */ #define MPCH_SIG "PCMP" #define MPCH_OEMID "BHyVe " #define MPCH_OEMID_LEN 8 #define MPCH_PRODID "Hypervisor " #define MPCH_PRODID_LEN 12 /* Processor entry defines */ #define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */ #define MPEP_SIG_MODEL 26 #define MPEP_SIG_STEPPING 5 #define MPEP_SIG \ ((MPEP_SIG_FAMILY << 8) | \ (MPEP_SIG_MODEL << 4) | \ (MPEP_SIG_STEPPING)) #define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */ /* Number of local intr entries */ #define MPEII_NUM_LOCAL_IRQ 2 /* Bus entry defines */ #define MPE_NUM_BUSES 2 #define MPE_BUSNAME_LEN 6 #define MPE_BUSNAME_ISA "ISA " #define MPE_BUSNAME_PCI "PCI " static void *oem_tbl_start; static int oem_tbl_size; static uint8_t mpt_compute_checksum(void *base, size_t len) { uint8_t *bytes; uint8_t sum; for(bytes = base, sum = 0; len > 0; len--) { sum += *bytes++; } return (256 - sum); } static void mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa) { memset(mpfp, 0, sizeof(*mpfp)); memcpy(mpfp->signature, MPFP_SIG, 4); mpfp->pap = gpa + sizeof(*mpfp); mpfp->length = 1; mpfp->spec_rev = MP_SPECREV; mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp)); } static void mpt_build_mpch(mpcth_t mpch) { memset(mpch, 0, sizeof(*mpch)); memcpy(mpch->signature, MPCH_SIG, 4); mpch->spec_rev = MP_SPECREV; memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN); memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN); mpch->apic_address = LAPIC_PADDR; } static void mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu) { int i; for (i = 0; i < ncpu; i++) { memset(mpep, 0, sizeof(*mpep)); mpep->type = MPCT_ENTRY_PROCESSOR; mpep->apic_id = i; // XXX mpep->apic_version = LAPIC_VERSION; mpep->cpu_flags = PROCENTRY_FLAG_EN; if (i == 0) mpep->cpu_flags |= PROCENTRY_FLAG_BP; mpep->cpu_signature = MPEP_SIG; mpep->feature_flags = MPEP_FEATURES; mpep++; } } static void mpt_build_localint_entries(int_entry_ptr mpie) { /* Hardcode LINT0 as ExtINT on all CPUs. */ memset(mpie, 0, sizeof(*mpie)); mpie->type = MPCT_ENTRY_LOCAL_INT; mpie->int_type = INTENTRY_TYPE_EXTINT; mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | INTENTRY_FLAGS_TRIGGER_CONFORM; mpie->dst_apic_id = 0xff; mpie->dst_apic_int = 0; mpie++; /* Hardcode LINT1 as NMI on all CPUs. */ memset(mpie, 0, sizeof(*mpie)); mpie->type = MPCT_ENTRY_LOCAL_INT; mpie->int_type = INTENTRY_TYPE_NMI; mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | INTENTRY_FLAGS_TRIGGER_CONFORM; mpie->dst_apic_id = 0xff; mpie->dst_apic_int = 1; } static void mpt_build_bus_entries(bus_entry_ptr mpeb) { memset(mpeb, 0, sizeof(*mpeb)); mpeb->type = MPCT_ENTRY_BUS; mpeb->bus_id = 0; memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN); mpeb++; memset(mpeb, 0, sizeof(*mpeb)); mpeb->type = MPCT_ENTRY_BUS; - mpeb->bus_id = 1; + mpeb->bus_id = 1; memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN); } static void mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id) { memset(mpei, 0, sizeof(*mpei)); mpei->type = MPCT_ENTRY_IOAPIC; mpei->apic_id = id; mpei->apic_version = IOAPIC_VERSION; mpei->apic_flags = IOAPICENTRY_FLAG_EN; mpei->apic_address = IOAPIC_PADDR; } static int mpt_count_ioint_entries(void) { int bus, count; count = 0; for (bus = 0; bus <= PCI_BUSMAX; bus++) count += pci_count_lintr(bus); /* * Always include entries for the first 16 pins along with a entry * for each active PCI INTx pin. */ return (16 + count); } static void mpt_generate_pci_int(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, void *arg) { int_entry_ptr *mpiep, mpie; mpiep = arg; mpie = *mpiep; memset(mpie, 0, sizeof(*mpie)); /* * This is always after another I/O interrupt entry, so cheat * and fetch the I/O APIC ID from the prior entry. */ mpie->type = MPCT_ENTRY_INT; mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_id = bus; mpie->src_bus_irq = slot << 2 | (pin - 1); mpie->dst_apic_id = mpie[-1].dst_apic_id; mpie->dst_apic_int = ioapic_irq; *mpiep = mpie + 1; } static void mpt_build_ioint_entries(int_entry_ptr mpie, int id) { int pin, bus; /* * The following config is taken from kernel mptable.c - * mptable_parse_default_config_ints(...), for now + * mptable_parse_default_config_ints(...), for now * just use the default config, tweek later if needed. */ /* First, generate the first 16 pins. */ for (pin = 0; pin < 16; pin++) { memset(mpie, 0, sizeof(*mpie)); mpie->type = MPCT_ENTRY_INT; mpie->src_bus_id = 1; mpie->dst_apic_id = id; /* * All default configs route IRQs from bus 0 to the first 16 * pins of the first I/O APIC with an APIC ID of 2. */ mpie->dst_apic_int = pin; switch (pin) { case 0: /* Pin 0 is an ExtINT pin. */ mpie->int_type = INTENTRY_TYPE_EXTINT; break; case 2: /* IRQ 0 is routed to pin 2. */ mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = 0; break; case SCI_INT: /* ACPI SCI is level triggered and active-lo. */ mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO | INTENTRY_FLAGS_TRIGGER_LEVEL; mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = SCI_INT; break; default: /* All other pins are identity mapped. */ mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = pin; break; } mpie++; } /* Next, generate entries for any PCI INTx interrupts. */ for (bus = 0; bus <= PCI_BUSMAX; bus++) - pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); + pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); } void mptable_add_oemtbl(void *tbl, int tblsz) { oem_tbl_start = tbl; oem_tbl_size = tblsz; } int mptable_build(struct vmctx *ctx, int ncpu) { mpcth_t mpch; bus_entry_ptr mpeb; io_apic_entry_ptr mpei; proc_entry_ptr mpep; mpfps_t mpfp; int_entry_ptr mpie; int ioints, bus; char *curraddr; char *startaddr; startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH); if (startaddr == NULL) { EPRINTLN("mptable requires mapped mem"); return (ENOMEM); } /* * There is no way to advertise multiple PCI hierarchies via MPtable * so require that there is no PCI hierarchy with a non-zero bus * number. */ for (bus = 1; bus <= PCI_BUSMAX; bus++) { if (pci_bus_configured(bus)) { EPRINTLN("MPtable is incompatible with " "multiple PCI hierarchies."); EPRINTLN("MPtable generation can be disabled " "by passing the -Y option to bhyve(8)."); return (EINVAL); } } curraddr = startaddr; mpfp = (mpfps_t)curraddr; mpt_build_mpfp(mpfp, MPTABLE_BASE); curraddr += sizeof(*mpfp); mpch = (mpcth_t)curraddr; mpt_build_mpch(mpch); curraddr += sizeof(*mpch); mpep = (proc_entry_ptr)curraddr; mpt_build_proc_entries(mpep, ncpu); curraddr += sizeof(*mpep) * ncpu; mpch->entry_count += ncpu; mpeb = (bus_entry_ptr) curraddr; mpt_build_bus_entries(mpeb); curraddr += sizeof(*mpeb) * MPE_NUM_BUSES; mpch->entry_count += MPE_NUM_BUSES; mpei = (io_apic_entry_ptr)curraddr; mpt_build_ioapic_entries(mpei, 0); curraddr += sizeof(*mpei); mpch->entry_count++; mpie = (int_entry_ptr) curraddr; ioints = mpt_count_ioint_entries(); mpt_build_ioint_entries(mpie, 0); curraddr += sizeof(*mpie) * ioints; mpch->entry_count += ioints; mpie = (int_entry_ptr)curraddr; mpt_build_localint_entries(mpie); curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ; mpch->entry_count += MPEII_NUM_LOCAL_IRQ; if (oem_tbl_start) { mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE; mpch->oem_table_size = oem_tbl_size; memcpy(curraddr, oem_tbl_start, oem_tbl_size); } mpch->base_table_length = curraddr - (char *)mpch; mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length); return (0); } diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c index cb1730fc77df..ca4c96b10239 100644 --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -1,1145 +1,1145 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 Vincenzo Maffione * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * This file implements multiple network backends (tap, netmap, ...), * to be used by network frontends such as virtio-net and e1000. * The API to access the backend (e.g. send/receive packets, negotiate * features) is exported by net_backends.h. */ #include __FBSDID("$FreeBSD$"); #include /* u_short etc */ #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #if defined(INET6) || defined(INET) #include #endif #include #include #define NETMAP_WITH_LIBS #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NETGRAPH #include #include #include #endif #include "config.h" #include "debug.h" #include "iov.h" #include "mevent.h" #include "net_backends.h" #include "pci_emul.h" #include /* * Each network backend registers a set of function pointers that are * used to implement the net backends API. * This might need to be exposed if we implement backends in separate files. */ struct net_backend { const char *prefix; /* prefix matching this backend */ /* * Routines used to initialize and cleanup the resources needed * by a backend. The cleanup function is used internally, * and should not be called by the frontend. */ int (*init)(struct net_backend *be, const char *devname, nvlist_t *nvl, net_be_rxeof_t cb, void *param); void (*cleanup)(struct net_backend *be); /* * Called to serve a guest transmit request. The scatter-gather * vector provided by the caller has 'iovcnt' elements and contains * the packet to send. */ ssize_t (*send)(struct net_backend *be, const struct iovec *iov, int iovcnt); /* * Get the length of the next packet that can be received from * the backend. If no packets are currently available, this * function returns 0. */ ssize_t (*peek_recvlen)(struct net_backend *be); /* * Called to receive a packet from the backend. When the function * returns a positive value 'len', the scatter-gather vector * provided by the caller contains a packet with such length. * The function returns 0 if the backend doesn't have a new packet to * receive. */ ssize_t (*recv)(struct net_backend *be, const struct iovec *iov, int iovcnt); /* * Ask the backend to enable or disable receive operation in the * backend. On return from a disable operation, it is guaranteed * that the receive callback won't be called until receive is * enabled again. Note however that it is up to the caller to make * sure that netbe_recv() is not currently being executed by another * thread. */ void (*recv_enable)(struct net_backend *be); void (*recv_disable)(struct net_backend *be); /* * Ask the backend for the virtio-net features it is able to * support. Possible features are TSO, UFO and checksum offloading * in both rx and tx direction and for both IPv4 and IPv6. */ uint64_t (*get_cap)(struct net_backend *be); /* * Tell the backend to enable/disable the specified virtio-net * features (capabilities). */ int (*set_cap)(struct net_backend *be, uint64_t features, unsigned int vnet_hdr_len); struct pci_vtnet_softc *sc; int fd; /* * Length of the virtio-net header used by the backend and the * frontend, respectively. A zero value means that the header * is not used. */ unsigned int be_vnet_hdr_len; unsigned int fe_vnet_hdr_len; /* Size of backend-specific private data. */ size_t priv_size; /* Room for backend-specific data. */ char opaque[0]; }; SET_DECLARE(net_backend_set, struct net_backend); #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) #define WPRINTF(params) PRINTLN params /* * The tap backend */ #if defined(INET6) || defined(INET) const int pf_list[] = { #if defined(INET6) PF_INET6, #endif #if defined(INET) PF_INET, #endif }; #endif struct tap_priv { struct mevent *mevp; /* * A bounce buffer that allows us to implement the peek_recvlen * callback. In the future we may get the same information from * the kevent data. */ char bbuf[1 << 16]; ssize_t bbuflen; }; static void tap_cleanup(struct net_backend *be) { struct tap_priv *priv = (struct tap_priv *)be->opaque; if (priv->mevp) { mevent_delete(priv->mevp); } if (be->fd != -1) { close(be->fd); be->fd = -1; } } static int tap_init(struct net_backend *be, const char *devname, nvlist_t *nvl, net_be_rxeof_t cb, void *param) { struct tap_priv *priv = (struct tap_priv *)be->opaque; char tbuf[80]; int opt = 1; #if defined(INET6) || defined(INET) struct ifreq ifrq; int i, s; #endif #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif if (cb == NULL) { WPRINTF(("TAP backend requires non-NULL callback")); return (-1); } strcpy(tbuf, "/dev/"); strlcat(tbuf, devname, sizeof(tbuf)); be->fd = open(tbuf, O_RDWR); if (be->fd == -1) { WPRINTF(("open of tap device %s failed", tbuf)); goto error; } /* * Set non-blocking and register for read * notifications with the event loop */ if (ioctl(be->fd, FIONBIO, &opt) < 0) { WPRINTF(("tap device O_NONBLOCK failed")); goto error; } #if defined(INET6) || defined(INET) /* * Try to UP the interface rather than relying on * net.link.tap.up_on_open. */ bzero(&ifrq, sizeof(ifrq)); if (ioctl(be->fd, TAPGIFNAME, &ifrq) < 0) { WPRINTF(("Could not get interface name")); goto error; } s = -1; for (i = 0; s == -1 && i < nitems(pf_list); i++) s = socket(pf_list[i], SOCK_DGRAM, 0); if (s == -1) { WPRINTF(("Could open socket")); goto error; } if (ioctl(s, SIOCGIFFLAGS, &ifrq) < 0) { (void)close(s); WPRINTF(("Could not get interface flags")); goto error; } ifrq.ifr_flags |= IFF_UP; if (ioctl(s, SIOCSIFFLAGS, &ifrq) < 0) { (void)close(s); WPRINTF(("Could not set interface flags")); goto error; } (void)close(s); #endif #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); if (caph_rights_limit(be->fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif memset(priv->bbuf, 0, sizeof(priv->bbuf)); priv->bbuflen = 0; priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { WPRINTF(("Could not register event")); goto error; } return (0); error: tap_cleanup(be); return (-1); } /* * Called to send a buffer chain out to the tap device */ static ssize_t tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt) { return (writev(be->fd, iov, iovcnt)); } static ssize_t tap_peek_recvlen(struct net_backend *be) { struct tap_priv *priv = (struct tap_priv *)be->opaque; ssize_t ret; if (priv->bbuflen > 0) { /* * We already have a packet in the bounce buffer. * Just return its length. */ return priv->bbuflen; } /* * Read the next packet (if any) into the bounce buffer, so * that we get to know its length and we can return that * to the caller. */ ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf)); if (ret < 0 && errno == EWOULDBLOCK) { return (0); } if (ret > 0) priv->bbuflen = ret; return (ret); } static ssize_t tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) { struct tap_priv *priv = (struct tap_priv *)be->opaque; ssize_t ret; if (priv->bbuflen > 0) { /* * A packet is available in the bounce buffer, so * we read it from there. */ ret = buf_to_iov(priv->bbuf, priv->bbuflen, iov, iovcnt, 0); /* Mark the bounce buffer as empty. */ priv->bbuflen = 0; return (ret); } ret = readv(be->fd, iov, iovcnt); if (ret < 0 && errno == EWOULDBLOCK) { return (0); } return (ret); } static void tap_recv_enable(struct net_backend *be) { struct tap_priv *priv = (struct tap_priv *)be->opaque; mevent_enable(priv->mevp); } static void tap_recv_disable(struct net_backend *be) { struct tap_priv *priv = (struct tap_priv *)be->opaque; mevent_disable(priv->mevp); } static uint64_t tap_get_cap(struct net_backend *be) { return (0); /* no capabilities for now */ } static int tap_set_cap(struct net_backend *be, uint64_t features, unsigned vnet_hdr_len) { return ((features || vnet_hdr_len) ? -1 : 0); } static struct net_backend tap_backend = { .prefix = "tap", .priv_size = sizeof(struct tap_priv), .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, .peek_recvlen = tap_peek_recvlen, .recv = tap_recv, .recv_enable = tap_recv_enable, .recv_disable = tap_recv_disable, .get_cap = tap_get_cap, .set_cap = tap_set_cap, }; /* A clone of the tap backend, with a different prefix. */ static struct net_backend vmnet_backend = { .prefix = "vmnet", .priv_size = sizeof(struct tap_priv), .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, .peek_recvlen = tap_peek_recvlen, .recv = tap_recv, .recv_enable = tap_recv_enable, .recv_disable = tap_recv_disable, .get_cap = tap_get_cap, .set_cap = tap_set_cap, }; DATA_SET(net_backend_set, tap_backend); DATA_SET(net_backend_set, vmnet_backend); #ifdef NETGRAPH /* * Netgraph backend */ #define NG_SBUF_MAX_SIZE (4 * 1024 * 1024) static int ng_init(struct net_backend *be, const char *devname, nvlist_t *nvl, net_be_rxeof_t cb, void *param) { struct tap_priv *p = (struct tap_priv *)be->opaque; struct ngm_connect ngc; const char *value, *nodename; int sbsz; int ctrl_sock; int flags; unsigned long maxsbsz; size_t msbsz; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif if (cb == NULL) { WPRINTF(("Netgraph backend requires non-NULL callback")); return (-1); } be->fd = -1; memset(&ngc, 0, sizeof(ngc)); value = get_config_value_node(nvl, "path"); if (value == NULL) { WPRINTF(("path must be provided")); return (-1); } strncpy(ngc.path, value, NG_PATHSIZ - 1); value = get_config_value_node(nvl, "hook"); if (value == NULL) value = "vmlink"; strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1); value = get_config_value_node(nvl, "peerhook"); if (value == NULL) { WPRINTF(("peer hook must be provided")); return (-1); } strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1); nodename = get_config_value_node(nvl, "socket"); if (NgMkSockNode(nodename, &ctrl_sock, &be->fd) < 0) { WPRINTF(("can't get Netgraph sockets")); return (-1); } if (NgSendMsg(ctrl_sock, ".", NGM_GENERIC_COOKIE, NGM_CONNECT, &ngc, sizeof(ngc)) < 0) { WPRINTF(("can't connect to node")); close(ctrl_sock); goto error; } close(ctrl_sock); flags = fcntl(be->fd, F_GETFL); if (flags < 0) { WPRINTF(("can't get socket flags")); goto error; } if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) { WPRINTF(("can't set O_NONBLOCK flag")); goto error; } /* * The default ng_socket(4) buffer's size is too low. * Calculate the minimum value between NG_SBUF_MAX_SIZE - * and kern.ipc.maxsockbuf. + * and kern.ipc.maxsockbuf. */ msbsz = sizeof(maxsbsz); if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz, NULL, 0) < 0) { WPRINTF(("can't get 'kern.ipc.maxsockbuf' value")); goto error; } /* * We can't set the socket buffer size to kern.ipc.maxsockbuf value, * as it takes into account the mbuf(9) overhead. */ maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES); sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz); if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz, sizeof(sbsz)) < 0) { WPRINTF(("can't set TX buffer size")); goto error; } if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz, sizeof(sbsz)) < 0) { WPRINTF(("can't set RX buffer size")); goto error; } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); if (caph_rights_limit(be->fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif memset(p->bbuf, 0, sizeof(p->bbuf)); p->bbuflen = 0; p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (p->mevp == NULL) { WPRINTF(("Could not register event")); goto error; } return (0); error: tap_cleanup(be); return (-1); } static struct net_backend ng_backend = { .prefix = "netgraph", .priv_size = sizeof(struct tap_priv), .init = ng_init, .cleanup = tap_cleanup, .send = tap_send, .peek_recvlen = tap_peek_recvlen, .recv = tap_recv, .recv_enable = tap_recv_enable, .recv_disable = tap_recv_disable, .get_cap = tap_get_cap, .set_cap = tap_set_cap, }; DATA_SET(net_backend_set, ng_backend); #endif /* NETGRAPH */ /* * The netmap backend */ /* The virtio-net features supported by netmap. */ #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO) struct netmap_priv { char ifname[IFNAMSIZ]; struct nm_desc *nmd; uint16_t memid; struct netmap_ring *rx; struct netmap_ring *tx; struct mevent *mevp; net_be_rxeof_t cb; void *cb_param; }; static void nmreq_init(struct nmreq *req, char *ifname) { memset(req, 0, sizeof(*req)); strlcpy(req->nr_name, ifname, sizeof(req->nr_name)); req->nr_version = NETMAP_API; } static int netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) { int err; struct nmreq req; struct netmap_priv *priv = (struct netmap_priv *)be->opaque; nmreq_init(&req, priv->ifname); req.nr_cmd = NETMAP_BDG_VNET_HDR; req.nr_arg1 = vnet_hdr_len; err = ioctl(be->fd, NIOCREGIF, &req); if (err) { WPRINTF(("Unable to set vnet header length %d", vnet_hdr_len)); return (err); } be->be_vnet_hdr_len = vnet_hdr_len; return (0); } static int netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len) { int prev_hdr_len = be->be_vnet_hdr_len; int ret; if (vnet_hdr_len == prev_hdr_len) { return (1); } ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len); if (ret) { return (0); } netmap_set_vnet_hdr_len(be, prev_hdr_len); return (1); } static uint64_t netmap_get_cap(struct net_backend *be) { return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ? NETMAP_FEATURES : 0); } static int netmap_set_cap(struct net_backend *be, uint64_t features, unsigned vnet_hdr_len) { return (netmap_set_vnet_hdr_len(be, vnet_hdr_len)); } static int netmap_init(struct net_backend *be, const char *devname, nvlist_t *nvl, net_be_rxeof_t cb, void *param) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; strlcpy(priv->ifname, devname, sizeof(priv->ifname)); priv->ifname[sizeof(priv->ifname) - 1] = '\0'; priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); if (priv->nmd == NULL) { WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)", devname, strerror(errno))); free(priv); return (-1); } priv->memid = priv->nmd->req.nr_arg2; priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0); priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0); priv->cb = cb; priv->cb_param = param; be->fd = priv->nmd->fd; priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { WPRINTF(("Could not register event")); return (-1); } return (0); } static void netmap_cleanup(struct net_backend *be) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; if (priv->mevp) { mevent_delete(priv->mevp); } if (priv->nmd) { nm_close(priv->nmd); } be->fd = -1; } static ssize_t netmap_send(struct net_backend *be, const struct iovec *iov, int iovcnt) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; struct netmap_ring *ring; ssize_t totlen = 0; int nm_buf_size; int nm_buf_len; uint32_t head; void *nm_buf; int j; ring = priv->tx; head = ring->head; if (head == ring->tail) { WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); goto txsync; } nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); nm_buf_size = ring->nr_buf_size; nm_buf_len = 0; for (j = 0; j < iovcnt; j++) { int iov_frag_size = iov[j].iov_len; void *iov_frag_buf = iov[j].iov_base; totlen += iov_frag_size; /* * Split each iovec fragment over more netmap slots, if * necessary. */ for (;;) { int copylen; copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size; memcpy(nm_buf, iov_frag_buf, copylen); iov_frag_buf += copylen; iov_frag_size -= copylen; nm_buf += copylen; nm_buf_size -= copylen; nm_buf_len += copylen; if (iov_frag_size == 0) { break; } ring->slot[head].len = nm_buf_len; ring->slot[head].flags = NS_MOREFRAG; head = nm_ring_next(ring, head); if (head == ring->tail) { /* * We ran out of netmap slots while * splitting the iovec fragments. */ WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); goto txsync; } nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); nm_buf_size = ring->nr_buf_size; nm_buf_len = 0; } } /* Complete the last slot, which must not have NS_MOREFRAG set. */ ring->slot[head].len = nm_buf_len; ring->slot[head].flags = 0; head = nm_ring_next(ring, head); /* Now update ring->head and ring->cur. */ ring->head = ring->cur = head; txsync: ioctl(be->fd, NIOCTXSYNC, NULL); return (totlen); } static ssize_t netmap_peek_recvlen(struct net_backend *be) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; struct netmap_ring *ring = priv->rx; uint32_t head = ring->head; ssize_t totlen = 0; while (head != ring->tail) { struct netmap_slot *slot = ring->slot + head; totlen += slot->len; if ((slot->flags & NS_MOREFRAG) == 0) break; head = nm_ring_next(ring, head); } return (totlen); } static ssize_t netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; struct netmap_slot *slot = NULL; struct netmap_ring *ring; void *iov_frag_buf; int iov_frag_size; ssize_t totlen = 0; uint32_t head; assert(iovcnt); ring = priv->rx; head = ring->head; iov_frag_buf = iov->iov_base; iov_frag_size = iov->iov_len; do { int nm_buf_len; void *nm_buf; if (head == ring->tail) { return (0); } slot = ring->slot + head; nm_buf = NETMAP_BUF(ring, slot->buf_idx); nm_buf_len = slot->len; for (;;) { int copylen = nm_buf_len < iov_frag_size ? nm_buf_len : iov_frag_size; memcpy(iov_frag_buf, nm_buf, copylen); nm_buf += copylen; nm_buf_len -= copylen; iov_frag_buf += copylen; iov_frag_size -= copylen; totlen += copylen; if (nm_buf_len == 0) { break; } iov++; iovcnt--; if (iovcnt == 0) { /* No space to receive. */ WPRINTF(("Short iov, drop %zd bytes", totlen)); return (-ENOSPC); } iov_frag_buf = iov->iov_base; iov_frag_size = iov->iov_len; } head = nm_ring_next(ring, head); } while (slot->flags & NS_MOREFRAG); /* Release slots to netmap. */ ring->head = ring->cur = head; return (totlen); } static void netmap_recv_enable(struct net_backend *be) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; mevent_enable(priv->mevp); } static void netmap_recv_disable(struct net_backend *be) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; mevent_disable(priv->mevp); } static struct net_backend netmap_backend = { .prefix = "netmap", .priv_size = sizeof(struct netmap_priv), .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, .peek_recvlen = netmap_peek_recvlen, .recv = netmap_recv, .recv_enable = netmap_recv_enable, .recv_disable = netmap_recv_disable, .get_cap = netmap_get_cap, .set_cap = netmap_set_cap, }; /* A clone of the netmap backend, with a different prefix. */ static struct net_backend vale_backend = { .prefix = "vale", .priv_size = sizeof(struct netmap_priv), .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, .peek_recvlen = netmap_peek_recvlen, .recv = netmap_recv, .recv_enable = netmap_recv_enable, .recv_disable = netmap_recv_disable, .get_cap = netmap_get_cap, .set_cap = netmap_set_cap, }; DATA_SET(net_backend_set, netmap_backend); DATA_SET(net_backend_set, vale_backend); int netbe_legacy_config(nvlist_t *nvl, const char *opts) { char *backend, *cp; if (opts == NULL) return (0); cp = strchr(opts, ','); if (cp == NULL) { set_config_value_node(nvl, "backend", opts); return (0); } backend = strndup(opts, cp - opts); set_config_value_node(nvl, "backend", backend); free(backend); return (pci_parse_legacy_config(nvl, cp + 1)); } /* * Initialize a backend and attach to the frontend. * This is called during frontend initialization. * @ret is a pointer to the backend to be initialized * @devname is the backend-name as supplied on the command line, * e.g. -s 2:0,frontend-name,backend-name[,other-args] * @cb is the receive callback supplied by the frontend, * and it is invoked in the event loop when a receive * event is generated in the hypervisor, * @param is a pointer to the frontend, and normally used as * the argument for the callback. */ int netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb, void *param) { struct net_backend **pbe, *nbe, *tbe = NULL; const char *value; char *devname; int err; value = get_config_value_node(nvl, "backend"); if (value == NULL) { return (-1); } devname = strdup(value); /* * Find the network backend that matches the user-provided * device name. net_backend_set is built using a linker set. */ SET_FOREACH(pbe, net_backend_set) { if (strncmp(devname, (*pbe)->prefix, strlen((*pbe)->prefix)) == 0) { tbe = *pbe; assert(tbe->init != NULL); assert(tbe->cleanup != NULL); assert(tbe->send != NULL); assert(tbe->recv != NULL); assert(tbe->get_cap != NULL); assert(tbe->set_cap != NULL); break; } } *ret = NULL; if (tbe == NULL) { free(devname); return (EINVAL); } nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); *nbe = *tbe; /* copy the template */ nbe->fd = -1; nbe->sc = param; nbe->be_vnet_hdr_len = 0; nbe->fe_vnet_hdr_len = 0; /* Initialize the backend. */ err = nbe->init(nbe, devname, nvl, cb, param); if (err) { free(devname); free(nbe); return (err); } *ret = nbe; free(devname); return (0); } void netbe_cleanup(struct net_backend *be) { if (be != NULL) { be->cleanup(be); free(be); } } uint64_t netbe_get_cap(struct net_backend *be) { assert(be != NULL); return (be->get_cap(be)); } int netbe_set_cap(struct net_backend *be, uint64_t features, unsigned vnet_hdr_len) { int ret; assert(be != NULL); /* There are only three valid lengths, i.e., 0, 10 and 12. */ if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t))) return (-1); be->fe_vnet_hdr_len = vnet_hdr_len; ret = be->set_cap(be, features, vnet_hdr_len); assert(be->be_vnet_hdr_len == 0 || be->be_vnet_hdr_len == be->fe_vnet_hdr_len); return (ret); } ssize_t netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt) { return (be->send(be, iov, iovcnt)); } ssize_t netbe_peek_recvlen(struct net_backend *be) { return (be->peek_recvlen(be)); } /* * Try to read a packet from the backend, without blocking. * If no packets are available, return 0. In case of success, return * the length of the packet just read. Return -1 in case of errors. */ ssize_t netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) { return (be->recv(be, iov, iovcnt)); } /* * Read a packet from the backend and discard it. * Returns the size of the discarded packet or zero if no packet was available. * A negative error code is returned in case of read error. */ ssize_t netbe_rx_discard(struct net_backend *be) { /* * MP note: the dummybuf is only used to discard frames, * so there is no need for it to be per-vtnet or locked. * We only make it large enough for TSO-sized segment. */ static uint8_t dummybuf[65536 + 64]; struct iovec iov; iov.iov_base = dummybuf; iov.iov_len = sizeof(dummybuf); return netbe_recv(be, &iov, 1); } void netbe_rx_disable(struct net_backend *be) { return be->recv_disable(be); } void netbe_rx_enable(struct net_backend *be) { return be->recv_enable(be); } size_t netbe_get_vnet_hdr_len(struct net_backend *be) { return (be->be_vnet_hdr_len); } diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c index 38ec87c34eed..316fbcf0f18d 100644 --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -1,2869 +1,2869 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Zhixiang Yu * Copyright (c) 2015-2016 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" #include "ahci.h" #include "block_if.h" #define DEF_PORTS 6 /* Intel ICH8 AHCI supports 6 ports */ #define MAX_PORTS 32 /* AHCI supports 32 ports */ #define PxSIG_ATA 0x00000101 /* ATA drive */ #define PxSIG_ATAPI 0xeb140101 /* ATAPI drive */ enum sata_fis_type { FIS_TYPE_REGH2D = 0x27, /* Register FIS - host to device */ FIS_TYPE_REGD2H = 0x34, /* Register FIS - device to host */ FIS_TYPE_DMAACT = 0x39, /* DMA activate FIS - device to host */ FIS_TYPE_DMASETUP = 0x41, /* DMA setup FIS - bidirectional */ FIS_TYPE_DATA = 0x46, /* Data FIS - bidirectional */ FIS_TYPE_BIST = 0x58, /* BIST activate FIS - bidirectional */ FIS_TYPE_PIOSETUP = 0x5F, /* PIO setup FIS - device to host */ FIS_TYPE_SETDEVBITS = 0xA1, /* Set dev bits FIS - device to host */ }; /* * SCSI opcodes */ #define TEST_UNIT_READY 0x00 #define REQUEST_SENSE 0x03 #define INQUIRY 0x12 #define START_STOP_UNIT 0x1B #define PREVENT_ALLOW 0x1E #define READ_CAPACITY 0x25 #define READ_10 0x28 #define POSITION_TO_ELEMENT 0x2B #define READ_TOC 0x43 #define GET_EVENT_STATUS_NOTIFICATION 0x4A #define MODE_SENSE_10 0x5A #define REPORT_LUNS 0xA0 #define READ_12 0xA8 #define READ_CD 0xBE /* * SCSI mode page codes */ #define MODEPAGE_RW_ERROR_RECOVERY 0x01 #define MODEPAGE_CD_CAPABILITIES 0x2A /* * ATA commands */ #define ATA_SF_ENAB_SATA_SF 0x10 #define ATA_SATA_SF_AN 0x05 #define ATA_SF_DIS_SATA_SF 0x90 /* * Debug printf */ #ifdef AHCI_DEBUG static FILE *dbg; #define DPRINTF(format, arg...) do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0) #else #define DPRINTF(format, arg...) #endif #define WPRINTF(format, arg...) printf(format, ##arg) #define AHCI_PORT_IDENT 20 + 1 struct ahci_ioreq { struct blockif_req io_req; struct ahci_port *io_pr; STAILQ_ENTRY(ahci_ioreq) io_flist; TAILQ_ENTRY(ahci_ioreq) io_blist; uint8_t *cfis; uint32_t len; uint32_t done; int slot; int more; int readop; }; struct ahci_port { struct blockif_ctxt *bctx; struct pci_ahci_softc *pr_sc; struct ata_params ata_ident; uint8_t *cmd_lst; uint8_t *rfis; int port; int atapi; int reset; int waitforclear; int mult_sectors; uint8_t xfermode; uint8_t err_cfis[20]; uint8_t sense_key; uint8_t asc; u_int ccs; uint32_t pending; uint32_t clb; uint32_t clbu; uint32_t fb; uint32_t fbu; uint32_t is; uint32_t ie; uint32_t cmd; uint32_t unused0; uint32_t tfd; uint32_t sig; uint32_t ssts; uint32_t sctl; uint32_t serr; uint32_t sact; uint32_t ci; uint32_t sntf; uint32_t fbs; /* * i/o request info */ struct ahci_ioreq *ioreq; int ioqsz; STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd; TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd; }; struct ahci_cmd_hdr { uint16_t flags; uint16_t prdtl; uint32_t prdbc; uint64_t ctba; uint32_t reserved[4]; }; struct ahci_prdt_entry { uint64_t dba; uint32_t reserved; #define DBCMASK 0x3fffff uint32_t dbc; }; struct pci_ahci_softc { struct pci_devinst *asc_pi; pthread_mutex_t mtx; int ports; uint32_t cap; uint32_t ghc; uint32_t is; uint32_t pi; uint32_t vs; uint32_t ccc_ctl; uint32_t ccc_pts; uint32_t em_loc; uint32_t em_ctl; uint32_t cap2; uint32_t bohc; uint32_t lintr; struct ahci_port port[MAX_PORTS]; }; #define ahci_ctx(sc) ((sc)->asc_pi->pi_vmctx) static void ahci_handle_port(struct ahci_port *p); static inline void lba_to_msf(uint8_t *buf, int lba) { lba += 150; buf[0] = (lba / 75) / 60; buf[1] = (lba / 75) % 60; buf[2] = lba % 75; } /* * Generate HBA interrupts on global IS register write. */ static void ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask) { struct pci_devinst *pi = sc->asc_pi; struct ahci_port *p; int i, nmsg; uint32_t mmask; /* Update global IS from PxIS/PxIE. */ for (i = 0; i < sc->ports; i++) { p = &sc->port[i]; if (p->is & p->ie) sc->is |= (1 << i); } DPRINTF("%s(%08x) %08x", __func__, mask, sc->is); /* If there is nothing enabled -- clear legacy interrupt and exit. */ if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) { if (sc->lintr) { pci_lintr_deassert(pi); sc->lintr = 0; } return; } /* If there is anything and no MSI -- assert legacy interrupt. */ nmsg = pci_msi_maxmsgnum(pi); if (nmsg == 0) { if (!sc->lintr) { sc->lintr = 1; pci_lintr_assert(pi); } return; } /* Assert respective MSIs for ports that were touched. */ for (i = 0; i < nmsg; i++) { if (sc->ports <= nmsg || i < nmsg - 1) mmask = 1 << i; else mmask = 0xffffffff << i; if (sc->is & mask && mmask & mask) pci_generate_msi(pi, i); } } /* * Generate HBA interrupt on specific port event. */ static void ahci_port_intr(struct ahci_port *p) { struct pci_ahci_softc *sc = p->pr_sc; struct pci_devinst *pi = sc->asc_pi; int nmsg; DPRINTF("%s(%d) %08x/%08x %08x", __func__, p->port, p->is, p->ie, sc->is); /* If there is nothing enabled -- we are done. */ if ((p->is & p->ie) == 0) return; /* In case of non-shared MSI always generate interrupt. */ nmsg = pci_msi_maxmsgnum(pi); if (sc->ports <= nmsg || p->port < nmsg - 1) { sc->is |= (1 << p->port); if ((sc->ghc & AHCI_GHC_IE) == 0) return; pci_generate_msi(pi, p->port); return; } /* If IS for this port is already set -- do nothing. */ if (sc->is & (1 << p->port)) return; sc->is |= (1 << p->port); /* If interrupts are enabled -- generate one. */ if ((sc->ghc & AHCI_GHC_IE) == 0) return; if (nmsg > 0) { pci_generate_msi(pi, nmsg - 1); } else if (!sc->lintr) { sc->lintr = 1; pci_lintr_assert(pi); } } static void ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis) { int offset, len, irq; if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE)) return; switch (ft) { case FIS_TYPE_REGD2H: offset = 0x40; len = 20; irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0; break; case FIS_TYPE_SETDEVBITS: offset = 0x58; len = 8; irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0; break; case FIS_TYPE_PIOSETUP: offset = 0x20; len = 20; irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0; break; default: WPRINTF("unsupported fis type %d", ft); return; } if (fis[2] & ATA_S_ERROR) { p->waitforclear = 1; irq |= AHCI_P_IX_TFE; } memcpy(p->rfis + offset, fis, len); if (irq) { if (~p->is & irq) { p->is |= irq; ahci_port_intr(p); } } } static void ahci_write_fis_piosetup(struct ahci_port *p) { uint8_t fis[20]; memset(fis, 0, sizeof(fis)); fis[0] = FIS_TYPE_PIOSETUP; ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis); } static void ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd) { uint8_t fis[8]; uint8_t error; error = (tfd >> 8) & 0xff; tfd &= 0x77; memset(fis, 0, sizeof(fis)); fis[0] = FIS_TYPE_SETDEVBITS; fis[1] = (1 << 6); fis[2] = tfd; fis[3] = error; if (fis[2] & ATA_S_ERROR) { p->err_cfis[0] = slot; p->err_cfis[2] = tfd; p->err_cfis[3] = error; memcpy(&p->err_cfis[4], cfis + 4, 16); } else { *(uint32_t *)(fis + 4) = (1 << slot); p->sact &= ~(1 << slot); } p->tfd &= ~0x77; p->tfd |= tfd; ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis); } static void ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd) { uint8_t fis[20]; uint8_t error; error = (tfd >> 8) & 0xff; memset(fis, 0, sizeof(fis)); fis[0] = FIS_TYPE_REGD2H; fis[1] = (1 << 6); fis[2] = tfd & 0xff; fis[3] = error; fis[4] = cfis[4]; fis[5] = cfis[5]; fis[6] = cfis[6]; fis[7] = cfis[7]; fis[8] = cfis[8]; fis[9] = cfis[9]; fis[10] = cfis[10]; fis[11] = cfis[11]; fis[12] = cfis[12]; fis[13] = cfis[13]; if (fis[2] & ATA_S_ERROR) { p->err_cfis[0] = 0x80; p->err_cfis[2] = tfd & 0xff; p->err_cfis[3] = error; memcpy(&p->err_cfis[4], cfis + 4, 16); } else p->ci &= ~(1 << slot); p->tfd = tfd; ahci_write_fis(p, FIS_TYPE_REGD2H, fis); } static void ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot) { uint8_t fis[20]; p->tfd = ATA_S_READY | ATA_S_DSC; memset(fis, 0, sizeof(fis)); fis[0] = FIS_TYPE_REGD2H; fis[1] = 0; /* No interrupt */ fis[2] = p->tfd; /* Status */ fis[3] = 0; /* No error */ p->ci &= ~(1 << slot); ahci_write_fis(p, FIS_TYPE_REGD2H, fis); } static void ahci_write_reset_fis_d2h(struct ahci_port *p) { uint8_t fis[20]; memset(fis, 0, sizeof(fis)); fis[0] = FIS_TYPE_REGD2H; fis[3] = 1; fis[4] = 1; if (p->atapi) { fis[5] = 0x14; fis[6] = 0xeb; } fis[12] = 1; ahci_write_fis(p, FIS_TYPE_REGD2H, fis); } static void ahci_check_stopped(struct ahci_port *p) { /* * If we are no longer processing the command list and nothing * is in-flight, clear the running bit, the current command * slot, the command issue and active bits. */ if (!(p->cmd & AHCI_P_CMD_ST)) { if (p->pending == 0) { p->ccs = 0; p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK); p->ci = 0; p->sact = 0; p->waitforclear = 0; } } } static void ahci_port_stop(struct ahci_port *p) { struct ahci_ioreq *aior; uint8_t *cfis; int slot; int error; assert(pthread_mutex_isowned_np(&p->pr_sc->mtx)); TAILQ_FOREACH(aior, &p->iobhd, io_blist) { /* * Try to cancel the outstanding blockif request. */ error = blockif_cancel(p->bctx, &aior->io_req); if (error != 0) continue; slot = aior->slot; cfis = aior->cfis; if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || cfis[2] == ATA_READ_FPDMA_QUEUED || cfis[2] == ATA_SEND_FPDMA_QUEUED) p->sact &= ~(1 << slot); /* NCQ */ else p->ci &= ~(1 << slot); /* * This command is now done. */ p->pending &= ~(1 << slot); /* * Delete the blockif request from the busy list */ TAILQ_REMOVE(&p->iobhd, aior, io_blist); /* * Move the blockif request back to the free list */ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); } ahci_check_stopped(p); } static void ahci_port_reset(struct ahci_port *pr) { pr->serr = 0; pr->sact = 0; pr->xfermode = ATA_UDMA6; pr->mult_sectors = 128; if (!pr->bctx) { pr->ssts = ATA_SS_DET_NO_DEVICE; pr->sig = 0xFFFFFFFF; pr->tfd = 0x7F; return; } pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE; if (pr->sctl & ATA_SC_SPD_MASK) pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK); else pr->ssts |= ATA_SS_SPD_GEN3; pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA; if (!pr->atapi) { pr->sig = PxSIG_ATA; pr->tfd |= ATA_S_READY; } else pr->sig = PxSIG_ATAPI; ahci_write_reset_fis_d2h(pr); } static void ahci_reset(struct pci_ahci_softc *sc) { int i; sc->ghc = AHCI_GHC_AE; sc->is = 0; if (sc->lintr) { pci_lintr_deassert(sc->asc_pi); sc->lintr = 0; } for (i = 0; i < sc->ports; i++) { sc->port[i].ie = 0; sc->port[i].is = 0; sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD); if (sc->port[i].bctx) sc->port[i].cmd |= AHCI_P_CMD_CPS; sc->port[i].sctl = 0; ahci_port_reset(&sc->port[i]); } } static void ata_string(uint8_t *dest, const char *src, int len) { int i; for (i = 0; i < len; i++) { if (*src) dest[i ^ 1] = *src++; else dest[i ^ 1] = ' '; } } static void atapi_string(uint8_t *dest, const char *src, int len) { int i; for (i = 0; i < len; i++) { if (*src) dest[i] = *src++; else dest[i] = ' '; } } /* * Build up the iovec based on the PRDT, 'done' and 'len'. */ static void ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior, struct ahci_prdt_entry *prdt, uint16_t prdtl) { struct blockif_req *breq = &aior->io_req; int i, j, skip, todo, left, extra; uint32_t dbcsz; /* Copy part of PRDT between 'done' and 'len' bytes into the iov. */ skip = aior->done; left = aior->len - aior->done; todo = 0; for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0; i++, prdt++) { dbcsz = (prdt->dbc & DBCMASK) + 1; /* Skip already done part of the PRDT */ if (dbcsz <= skip) { skip -= dbcsz; continue; } dbcsz -= skip; if (dbcsz > left) dbcsz = left; breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba + skip, dbcsz); breq->br_iov[j].iov_len = dbcsz; todo += dbcsz; left -= dbcsz; skip = 0; j++; } /* If we got limited by IOV length, round I/O down to sector size. */ if (j == BLOCKIF_IOV_MAX) { extra = todo % blockif_sectsz(p->bctx); todo -= extra; assert(todo > 0); while (extra > 0) { if (breq->br_iov[j - 1].iov_len > extra) { breq->br_iov[j - 1].iov_len -= extra; break; } extra -= breq->br_iov[j - 1].iov_len; j--; } } breq->br_iovcnt = j; breq->br_resid = todo; aior->done += todo; aior->more = (aior->done < aior->len && i < prdtl); } static void ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) { struct ahci_ioreq *aior; struct blockif_req *breq; struct ahci_prdt_entry *prdt; struct ahci_cmd_hdr *hdr; uint64_t lba; uint32_t len; int err, first, ncq, readop; prdt = (struct ahci_prdt_entry *)(cfis + 0x80); hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); ncq = 0; readop = 1; first = (done == 0); if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 || cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 || cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 || cfis[2] == ATA_WRITE_FPDMA_QUEUED) readop = 0; if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || cfis[2] == ATA_READ_FPDMA_QUEUED) { lba = ((uint64_t)cfis[10] << 40) | ((uint64_t)cfis[9] << 32) | ((uint64_t)cfis[8] << 24) | ((uint64_t)cfis[6] << 16) | ((uint64_t)cfis[5] << 8) | cfis[4]; len = cfis[11] << 8 | cfis[3]; if (!len) len = 65536; ncq = 1; } else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 || cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 || cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) { lba = ((uint64_t)cfis[10] << 40) | ((uint64_t)cfis[9] << 32) | ((uint64_t)cfis[8] << 24) | ((uint64_t)cfis[6] << 16) | ((uint64_t)cfis[5] << 8) | cfis[4]; len = cfis[13] << 8 | cfis[12]; if (!len) len = 65536; } else { lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) | (cfis[5] << 8) | cfis[4]; len = cfis[12]; if (!len) len = 256; } lba *= blockif_sectsz(p->bctx); len *= blockif_sectsz(p->bctx); /* Pull request off free list */ aior = STAILQ_FIRST(&p->iofhd); assert(aior != NULL); STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); aior->cfis = cfis; aior->slot = slot; aior->len = len; aior->done = done; aior->readop = readop; breq = &aior->io_req; breq->br_offset = lba + done; ahci_build_iov(p, aior, prdt, hdr->prdtl); /* Mark this command in-flight. */ p->pending |= 1 << slot; /* Stuff request onto busy list. */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); if (ncq && first) ahci_write_fis_d2h_ncq(p, slot); if (readop) err = blockif_read(p->bctx, breq); else err = blockif_write(p->bctx, breq); assert(err == 0); } static void ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis) { struct ahci_ioreq *aior; struct blockif_req *breq; int err; /* * Pull request off free list */ aior = STAILQ_FIRST(&p->iofhd); assert(aior != NULL); STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); aior->cfis = cfis; aior->slot = slot; aior->len = 0; aior->done = 0; aior->more = 0; breq = &aior->io_req; /* * Mark this command in-flight. */ p->pending |= 1 << slot; /* * Stuff request onto busy list */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); err = blockif_flush(p->bctx, breq); assert(err == 0); } static inline void read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf, int size) { struct ahci_cmd_hdr *hdr; struct ahci_prdt_entry *prdt; void *to; int i, len; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); len = size; to = buf; prdt = (struct ahci_prdt_entry *)(cfis + 0x80); for (i = 0; i < hdr->prdtl && len; i++) { uint8_t *ptr; uint32_t dbcsz; int sublen; dbcsz = (prdt->dbc & DBCMASK) + 1; ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz); sublen = MIN(len, dbcsz); memcpy(to, ptr, sublen); len -= sublen; to += sublen; prdt++; } } static void ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) { struct ahci_ioreq *aior; struct blockif_req *breq; uint8_t *entry; uint64_t elba; uint32_t len, elen; int err, first, ncq; uint8_t buf[512]; first = (done == 0); if (cfis[2] == ATA_DATA_SET_MANAGEMENT) { len = (uint16_t)cfis[13] << 8 | cfis[12]; len *= 512; ncq = 0; } else { /* ATA_SEND_FPDMA_QUEUED */ len = (uint16_t)cfis[11] << 8 | cfis[3]; len *= 512; ncq = 1; } read_prdt(p, slot, cfis, buf, sizeof(buf)); next: entry = &buf[done]; elba = ((uint64_t)entry[5] << 40) | ((uint64_t)entry[4] << 32) | ((uint64_t)entry[3] << 24) | ((uint64_t)entry[2] << 16) | ((uint64_t)entry[1] << 8) | entry[0]; elen = (uint16_t)entry[7] << 8 | entry[6]; done += 8; if (elen == 0) { if (done >= len) { if (ncq) { if (first) ahci_write_fis_d2h_ncq(p, slot); ahci_write_fis_sdb(p, slot, cfis, ATA_S_READY | ATA_S_DSC); } else { ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); } p->pending &= ~(1 << slot); ahci_check_stopped(p); if (!first) ahci_handle_port(p); return; } goto next; } /* * Pull request off free list */ aior = STAILQ_FIRST(&p->iofhd); assert(aior != NULL); STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); aior->cfis = cfis; aior->slot = slot; aior->len = len; aior->done = done; aior->more = (len != done); breq = &aior->io_req; breq->br_offset = elba * blockif_sectsz(p->bctx); breq->br_resid = elen * blockif_sectsz(p->bctx); /* * Mark this command in-flight. */ p->pending |= 1 << slot; /* * Stuff request onto busy list */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); if (ncq && first) ahci_write_fis_d2h_ncq(p, slot); err = blockif_delete(p->bctx, breq); assert(err == 0); } static inline void write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, void *buf, int size) { struct ahci_cmd_hdr *hdr; struct ahci_prdt_entry *prdt; void *from; int i, len; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); len = size; from = buf; prdt = (struct ahci_prdt_entry *)(cfis + 0x80); for (i = 0; i < hdr->prdtl && len; i++) { uint8_t *ptr; uint32_t dbcsz; int sublen; dbcsz = (prdt->dbc & DBCMASK) + 1; ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz); sublen = MIN(len, dbcsz); memcpy(ptr, from, sublen); len -= sublen; from += sublen; prdt++; } hdr->prdbc = size - len; } static void ahci_checksum(uint8_t *buf, int size) { int i; uint8_t sum = 0; for (i = 0; i < size - 1; i++) sum += buf[i]; buf[size - 1] = 0x100 - sum; } static void ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis) { struct ahci_cmd_hdr *hdr; uint32_t buf[128]; uint8_t *buf8 = (uint8_t *)buf; uint16_t *buf16 = (uint16_t *)buf; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); if (p->atapi || hdr->prdtl == 0 || cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) { ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); return; } memset(buf, 0, sizeof(buf)); if (cfis[4] == 0x00) { /* Log directory */ buf16[0x00] = 1; /* Version -- 1 */ buf16[0x10] = 1; /* NCQ Command Error Log -- 1 page */ buf16[0x13] = 1; /* SATA NCQ Send and Receive Log -- 1 page */ } else if (cfis[4] == 0x10) { /* NCQ Command Error Log */ memcpy(buf8, p->err_cfis, sizeof(p->err_cfis)); ahci_checksum(buf8, sizeof(buf)); } else if (cfis[4] == 0x13) { /* SATA NCQ Send and Receive Log */ if (blockif_candelete(p->bctx) && !blockif_is_ro(p->bctx)) { buf[0x00] = 1; /* SFQ DSM supported */ buf[0x01] = 1; /* SFQ DSM TRIM supported */ } } else { ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); return; } if (cfis[2] == ATA_READ_LOG_EXT) ahci_write_fis_piosetup(p); write_prdt(p, slot, cfis, (void *)buf, sizeof(buf)); ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); } static void handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) { struct ahci_cmd_hdr *hdr; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); if (p->atapi || hdr->prdtl == 0) { ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); } else { ahci_write_fis_piosetup(p); write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params)); ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); } } static void ata_identify_init(struct ahci_port* p, int atapi) { struct ata_params* ata_ident = &p->ata_ident; if (atapi) { ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM | ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST; ata_ident->capabilities1 = ATA_SUPPORT_LBA | ATA_SUPPORT_DMA; ata_ident->capabilities2 = (1 << 14 | 1); ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88; ata_ident->obsolete62 = 0x3f; ata_ident->mwdmamodes = 7; if (p->xfermode & ATA_WDMA0) ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8)); ata_ident->apiomodes = 3; ata_ident->mwdmamin = 0x0078; ata_ident->mwdmarec = 0x0078; ata_ident->pioblind = 0x0078; ata_ident->pioiordy = 0x0078; ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3); ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3); ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM; ata_ident->version_major = 0x3f0; ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); ata_ident->support.command2 = (1 << 14); ata_ident->support.extension = (1 << 14); ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); ata_ident->enabled.extension = (1 << 14); ata_ident->udmamodes = 0x7f; if (p->xfermode & ATA_UDMA0) ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8)); ata_ident->transport_major = 0x1020; ata_ident->integrity = 0x00a5; } else { uint64_t sectors; int sectsz, psectsz, psectoff, candelete, ro; uint16_t cyl; uint8_t sech, heads; ro = blockif_is_ro(p->bctx); candelete = blockif_candelete(p->bctx); sectsz = blockif_sectsz(p->bctx); sectors = blockif_size(p->bctx) / sectsz; blockif_chs(p->bctx, &cyl, &heads, &sech); blockif_psectsz(p->bctx, &psectsz, &psectoff); ata_ident->config = ATA_DRQ_FAST; ata_ident->cylinders = cyl; ata_ident->heads = heads; ata_ident->sectors = sech; ata_ident->sectors_intr = (0x8000 | 128); ata_ident->tcg = 0; ata_ident->capabilities1 = ATA_SUPPORT_DMA | ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY; ata_ident->capabilities2 = (1 << 14); ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88; if (p->mult_sectors) ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors); if (sectors <= 0x0fffffff) { ata_ident->lba_size_1 = sectors; ata_ident->lba_size_2 = (sectors >> 16); } else { ata_ident->lba_size_1 = 0xffff; ata_ident->lba_size_2 = 0x0fff; } ata_ident->mwdmamodes = 0x7; if (p->xfermode & ATA_WDMA0) ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8)); ata_ident->apiomodes = 0x3; ata_ident->mwdmamin = 0x0078; ata_ident->mwdmarec = 0x0078; ata_ident->pioblind = 0x0078; ata_ident->pioiordy = 0x0078; ata_ident->support3 = 0; ata_ident->queue = 31; ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 | ATA_SUPPORT_NCQ); ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED | (p->ssts & ATA_SS_SPD_MASK) >> 3); ata_ident->version_major = 0x3f0; ata_ident->version_minor = 0x28; ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE | ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | ATA_SUPPORT_FLUSHCACHE48 | 1 << 14); ata_ident->support.extension = (1 << 14); ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE | ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | ATA_SUPPORT_FLUSHCACHE48 | 1 << 15); ata_ident->enabled.extension = (1 << 14); ata_ident->udmamodes = 0x7f; if (p->xfermode & ATA_UDMA0) ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8)); ata_ident->lba_size48_1 = sectors; ata_ident->lba_size48_2 = (sectors >> 16); ata_ident->lba_size48_3 = (sectors >> 32); ata_ident->lba_size48_4 = (sectors >> 48); if (candelete && !ro) { ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT; ata_ident->max_dsm_blocks = 1; ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM; } ata_ident->pss = ATA_PSS_VALID_VALUE; ata_ident->lsalign = 0x4000; if (psectsz > sectsz) { ata_ident->pss |= ATA_PSS_MULTLS; ata_ident->pss |= ffsl(psectsz / sectsz) - 1; ata_ident->lsalign |= (psectoff / sectsz); } if (sectsz > 512) { ata_ident->pss |= ATA_PSS_LSSABOVE512; ata_ident->lss_1 = sectsz / 2; ata_ident->lss_2 = ((sectsz / 2) >> 16); } ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); ata_ident->transport_major = 0x1020; ata_ident->integrity = 0x00a5; } ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params)); } static void handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis) { if (!p->atapi) { ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); } else { ahci_write_fis_piosetup(p); write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params)); ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); } } static void atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t buf[36]; uint8_t *acmd; int len; uint32_t tfd; acmd = cfis + 0x40; if (acmd[1] & 1) { /* VPD */ if (acmd[2] == 0) { /* Supported VPD pages */ buf[0] = 0x05; buf[1] = 0; buf[2] = 0; buf[3] = 1; buf[4] = 0; len = 4 + buf[3]; } else { p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x24; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, tfd); return; } } else { buf[0] = 0x05; buf[1] = 0x80; buf[2] = 0x00; buf[3] = 0x21; buf[4] = 31; buf[5] = 0; buf[6] = 0; buf[7] = 0; atapi_string(buf + 8, "BHYVE", 8); atapi_string(buf + 16, "BHYVE DVD-ROM", 16); atapi_string(buf + 32, "001", 4); len = sizeof(buf); } if (len > acmd[4]) len = acmd[4]; cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; write_prdt(p, slot, cfis, buf, len); ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); } static void atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t buf[8]; uint64_t sectors; sectors = blockif_size(p->bctx) / 2048; be32enc(buf, sectors - 1); be32enc(buf + 4, 2048); cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; write_prdt(p, slot, cfis, buf, sizeof(buf)); ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); } static void atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t *acmd; uint8_t format; int len; acmd = cfis + 0x40; len = be16dec(acmd + 7); format = acmd[9] >> 6; switch (format) { case 0: { int msf, size; uint64_t sectors; uint8_t start_track, buf[20], *bp; msf = (acmd[1] >> 1) & 1; start_track = acmd[6]; if (start_track > 1 && start_track != 0xaa) { uint32_t tfd; p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x24; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, tfd); return; } bp = buf + 2; *bp++ = 1; *bp++ = 1; if (start_track <= 1) { *bp++ = 0; *bp++ = 0x14; *bp++ = 1; *bp++ = 0; if (msf) { *bp++ = 0; lba_to_msf(bp, 0); bp += 3; } else { *bp++ = 0; *bp++ = 0; *bp++ = 0; *bp++ = 0; } } *bp++ = 0; *bp++ = 0x14; *bp++ = 0xaa; *bp++ = 0; sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx); sectors >>= 2; if (msf) { *bp++ = 0; lba_to_msf(bp, sectors); bp += 3; } else { be32enc(bp, sectors); bp += 4; } size = bp - buf; be16enc(buf, size - 2); if (len > size) len = size; write_prdt(p, slot, cfis, buf, len); cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); break; } case 1: { uint8_t buf[12]; memset(buf, 0, sizeof(buf)); buf[1] = 0xa; buf[2] = 0x1; buf[3] = 0x1; if (len > sizeof(buf)) len = sizeof(buf); write_prdt(p, slot, cfis, buf, len); cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); break; } case 2: { int msf, size; uint64_t sectors; uint8_t *bp, buf[50]; msf = (acmd[1] >> 1) & 1; bp = buf + 2; *bp++ = 1; *bp++ = 1; *bp++ = 1; *bp++ = 0x14; *bp++ = 0; *bp++ = 0xa0; *bp++ = 0; *bp++ = 0; *bp++ = 0; *bp++ = 0; *bp++ = 1; *bp++ = 0; *bp++ = 0; *bp++ = 1; *bp++ = 0x14; *bp++ = 0; *bp++ = 0xa1; *bp++ = 0; *bp++ = 0; *bp++ = 0; *bp++ = 0; *bp++ = 1; *bp++ = 0; *bp++ = 0; *bp++ = 1; *bp++ = 0x14; *bp++ = 0; *bp++ = 0xa2; *bp++ = 0; *bp++ = 0; *bp++ = 0; sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx); sectors >>= 2; if (msf) { *bp++ = 0; lba_to_msf(bp, sectors); bp += 3; } else { be32enc(bp, sectors); bp += 4; } *bp++ = 1; *bp++ = 0x14; *bp++ = 0; *bp++ = 1; *bp++ = 0; *bp++ = 0; *bp++ = 0; if (msf) { *bp++ = 0; lba_to_msf(bp, 0); bp += 3; } else { *bp++ = 0; *bp++ = 0; *bp++ = 0; *bp++ = 0; } size = bp - buf; be16enc(buf, size - 2); if (len > size) len = size; write_prdt(p, slot, cfis, buf, len); cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); break; } default: { uint32_t tfd; p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x24; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, tfd); break; } } } static void atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t buf[16]; memset(buf, 0, sizeof(buf)); buf[3] = 8; cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; write_prdt(p, slot, cfis, buf, sizeof(buf)); ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); } static void atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) { struct ahci_ioreq *aior; struct ahci_cmd_hdr *hdr; struct ahci_prdt_entry *prdt; struct blockif_req *breq; uint8_t *acmd; uint64_t lba; uint32_t len; int err; acmd = cfis + 0x40; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); prdt = (struct ahci_prdt_entry *)(cfis + 0x80); lba = be32dec(acmd + 2); if (acmd[0] == READ_10) len = be16dec(acmd + 7); else len = be32dec(acmd + 6); if (len == 0) { cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); } lba *= 2048; len *= 2048; /* * Pull request off free list */ aior = STAILQ_FIRST(&p->iofhd); assert(aior != NULL); STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); aior->cfis = cfis; aior->slot = slot; aior->len = len; aior->done = done; aior->readop = 1; breq = &aior->io_req; breq->br_offset = lba + done; ahci_build_iov(p, aior, prdt, hdr->prdtl); /* Mark this command in-flight. */ p->pending |= 1 << slot; /* Stuff request onto busy list. */ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); err = blockif_read(p->bctx, breq); assert(err == 0); } static void atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t buf[64]; uint8_t *acmd; int len; acmd = cfis + 0x40; len = acmd[4]; if (len > sizeof(buf)) len = sizeof(buf); memset(buf, 0, len); buf[0] = 0x70 | (1 << 7); buf[2] = p->sense_key; buf[7] = 10; buf[12] = p->asc; write_prdt(p, slot, cfis, buf, len); cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); } static void atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t *acmd = cfis + 0x40; uint32_t tfd; switch (acmd[4] & 3) { case 0: case 1: case 3: cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; tfd = ATA_S_READY | ATA_S_DSC; break; case 2: /* TODO eject media */ cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x53; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; break; } ahci_write_fis_d2h(p, slot, cfis, tfd); } static void atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t *acmd; uint32_t tfd; uint8_t pc, code; int len; acmd = cfis + 0x40; len = be16dec(acmd + 7); pc = acmd[2] >> 6; code = acmd[2] & 0x3f; switch (pc) { case 0: switch (code) { case MODEPAGE_RW_ERROR_RECOVERY: { uint8_t buf[16]; if (len > sizeof(buf)) len = sizeof(buf); memset(buf, 0, sizeof(buf)); be16enc(buf, 16 - 2); buf[2] = 0x70; buf[8] = 0x01; buf[9] = 16 - 10; buf[11] = 0x05; write_prdt(p, slot, cfis, buf, len); tfd = ATA_S_READY | ATA_S_DSC; break; } case MODEPAGE_CD_CAPABILITIES: { uint8_t buf[30]; if (len > sizeof(buf)) len = sizeof(buf); memset(buf, 0, sizeof(buf)); be16enc(buf, 30 - 2); buf[2] = 0x70; buf[8] = 0x2A; buf[9] = 30 - 10; buf[10] = 0x08; buf[12] = 0x71; be16enc(&buf[18], 2); be16enc(&buf[20], 512); write_prdt(p, slot, cfis, buf, len); tfd = ATA_S_READY | ATA_S_DSC; break; } default: goto error; break; } break; case 3: p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x39; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; break; error: case 1: case 2: p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x24; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; break; } cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, tfd); } static void atapi_get_event_status_notification(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t *acmd; uint32_t tfd; acmd = cfis + 0x40; /* we don't support asynchronous operation */ if (!(acmd[1] & 1)) { p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x24; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; } else { uint8_t buf[8]; int len; len = be16dec(acmd + 7); if (len > sizeof(buf)) len = sizeof(buf); memset(buf, 0, sizeof(buf)); be16enc(buf, 8 - 2); buf[2] = 0x04; buf[3] = 0x10; buf[5] = 0x02; write_prdt(p, slot, cfis, buf, len); tfd = ATA_S_READY | ATA_S_DSC; } cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, tfd); } static void handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis) { uint8_t *acmd; acmd = cfis + 0x40; #ifdef AHCI_DEBUG { int i; DPRINTF("ACMD:"); for (i = 0; i < 16; i++) DPRINTF("%02x ", acmd[i]); DPRINTF(""); } #endif switch (acmd[0]) { case TEST_UNIT_READY: cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); break; case INQUIRY: atapi_inquiry(p, slot, cfis); break; case READ_CAPACITY: atapi_read_capacity(p, slot, cfis); break; case PREVENT_ALLOW: /* TODO */ cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); break; case READ_TOC: atapi_read_toc(p, slot, cfis); break; case REPORT_LUNS: atapi_report_luns(p, slot, cfis); break; case READ_10: case READ_12: atapi_read(p, slot, cfis, 0); break; case REQUEST_SENSE: atapi_request_sense(p, slot, cfis); break; case START_STOP_UNIT: atapi_start_stop_unit(p, slot, cfis); break; case MODE_SENSE_10: atapi_mode_sense(p, slot, cfis); break; case GET_EVENT_STATUS_NOTIFICATION: atapi_get_event_status_notification(p, slot, cfis); break; default: cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x20; ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR); break; } } static void ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis) { p->tfd |= ATA_S_BUSY; switch (cfis[2]) { case ATA_ATA_IDENTIFY: handle_identify(p, slot, cfis); break; case ATA_SETFEATURES: { switch (cfis[3]) { case ATA_SF_ENAB_SATA_SF: switch (cfis[12]) { case ATA_SATA_SF_AN: p->tfd = ATA_S_DSC | ATA_S_READY; break; default: p->tfd = ATA_S_ERROR | ATA_S_READY; p->tfd |= (ATA_ERROR_ABORT << 8); break; } break; case ATA_SF_ENAB_WCACHE: case ATA_SF_DIS_WCACHE: case ATA_SF_ENAB_RCACHE: case ATA_SF_DIS_RCACHE: p->tfd = ATA_S_DSC | ATA_S_READY; break; case ATA_SF_SETXFER: { switch (cfis[12] & 0xf8) { case ATA_PIO: case ATA_PIO0: break; case ATA_WDMA0: case ATA_UDMA0: p->xfermode = (cfis[12] & 0x7); break; } p->tfd = ATA_S_DSC | ATA_S_READY; break; } default: p->tfd = ATA_S_ERROR | ATA_S_READY; p->tfd |= (ATA_ERROR_ABORT << 8); break; } ahci_write_fis_d2h(p, slot, cfis, p->tfd); break; } case ATA_SET_MULTI: if (cfis[12] != 0 && (cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) { p->tfd = ATA_S_ERROR | ATA_S_READY; p->tfd |= (ATA_ERROR_ABORT << 8); } else { p->mult_sectors = cfis[12]; p->tfd = ATA_S_DSC | ATA_S_READY; } ahci_write_fis_d2h(p, slot, cfis, p->tfd); break; case ATA_READ: case ATA_WRITE: case ATA_READ48: case ATA_WRITE48: case ATA_READ_MUL: case ATA_WRITE_MUL: case ATA_READ_MUL48: case ATA_WRITE_MUL48: case ATA_READ_DMA: case ATA_WRITE_DMA: case ATA_READ_DMA48: case ATA_WRITE_DMA48: case ATA_READ_FPDMA_QUEUED: case ATA_WRITE_FPDMA_QUEUED: ahci_handle_rw(p, slot, cfis, 0); break; case ATA_FLUSHCACHE: case ATA_FLUSHCACHE48: ahci_handle_flush(p, slot, cfis); break; case ATA_DATA_SET_MANAGEMENT: if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM && cfis[13] == 0 && cfis[12] == 1) { ahci_handle_dsm_trim(p, slot, cfis, 0); break; } ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); break; case ATA_SEND_FPDMA_QUEUED: if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM && cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM && cfis[11] == 0 && cfis[3] == 1) { ahci_handle_dsm_trim(p, slot, cfis, 0); break; } ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); break; case ATA_READ_LOG_EXT: case ATA_READ_LOG_DMA_EXT: ahci_handle_read_log(p, slot, cfis); break; case ATA_SECURITY_FREEZE_LOCK: case ATA_SMART_CMD: case ATA_NOP: ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); break; case ATA_CHECK_POWER_MODE: cfis[12] = 0xff; /* always on */ ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); break; case ATA_STANDBY_CMD: case ATA_STANDBY_IMMEDIATE: case ATA_IDLE_CMD: case ATA_IDLE_IMMEDIATE: case ATA_SLEEP: case ATA_READ_VERIFY: case ATA_READ_VERIFY48: ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); break; case ATA_ATAPI_IDENTIFY: handle_atapi_identify(p, slot, cfis); break; case ATA_PACKET_CMD: if (!p->atapi) { ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); } else handle_packet_cmd(p, slot, cfis); break; default: WPRINTF("Unsupported cmd:%02x", cfis[2]); ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); break; } } static void ahci_handle_slot(struct ahci_port *p, int slot) { struct ahci_cmd_hdr *hdr; #ifdef AHCI_DEBUG struct ahci_prdt_entry *prdt; #endif struct pci_ahci_softc *sc; uint8_t *cfis; #ifdef AHCI_DEBUG int cfl, i; #endif sc = p->pr_sc; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); #ifdef AHCI_DEBUG cfl = (hdr->flags & 0x1f) * 4; #endif cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba, 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry)); #ifdef AHCI_DEBUG prdt = (struct ahci_prdt_entry *)(cfis + 0x80); DPRINTF("cfis:"); for (i = 0; i < cfl; i++) { if (i % 10 == 0) DPRINTF(""); DPRINTF("%02x ", cfis[i]); } DPRINTF(""); for (i = 0; i < hdr->prdtl; i++) { DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba); prdt++; } #endif if (cfis[0] != FIS_TYPE_REGH2D) { WPRINTF("Not a H2D FIS:%02x", cfis[0]); return; } if (cfis[1] & 0x80) { ahci_handle_cmd(p, slot, cfis); } else { if (cfis[15] & (1 << 2)) p->reset = 1; else if (p->reset) { p->reset = 0; ahci_port_reset(p); } p->ci &= ~(1 << slot); } } static void ahci_handle_port(struct ahci_port *p) { if (!(p->cmd & AHCI_P_CMD_ST)) return; /* * Search for any new commands to issue ignoring those that * are already in-flight. Stop if device is busy or in error. */ for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) { if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0) break; if (p->waitforclear) break; if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) { p->cmd &= ~AHCI_P_CMD_CCS_MASK; p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT; ahci_handle_slot(p, p->ccs); } } } /* * blockif callback routine - this runs in the context of the blockif * i/o thread, so the mutex needs to be acquired. */ static void ata_ioreq_cb(struct blockif_req *br, int err) { struct ahci_cmd_hdr *hdr; struct ahci_ioreq *aior; struct ahci_port *p; struct pci_ahci_softc *sc; uint32_t tfd; uint8_t *cfis; int slot, ncq, dsm; DPRINTF("%s %d", __func__, err); ncq = dsm = 0; aior = br->br_param; p = aior->io_pr; cfis = aior->cfis; slot = aior->slot; sc = p->pr_sc; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || cfis[2] == ATA_READ_FPDMA_QUEUED || cfis[2] == ATA_SEND_FPDMA_QUEUED) ncq = 1; if (cfis[2] == ATA_DATA_SET_MANAGEMENT || (cfis[2] == ATA_SEND_FPDMA_QUEUED && (cfis[13] & 0x1f) == ATA_SFPDMA_DSM)) dsm = 1; pthread_mutex_lock(&sc->mtx); /* * Delete the blockif request from the busy list */ TAILQ_REMOVE(&p->iobhd, aior, io_blist); /* * Move the blockif request back to the free list */ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); if (!err) hdr->prdbc = aior->done; if (!err && aior->more) { if (dsm) ahci_handle_dsm_trim(p, slot, cfis, aior->done); - else + else ahci_handle_rw(p, slot, cfis, aior->done); goto out; } if (!err) tfd = ATA_S_READY | ATA_S_DSC; else tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR; if (ncq) ahci_write_fis_sdb(p, slot, cfis, tfd); else ahci_write_fis_d2h(p, slot, cfis, tfd); /* * This command is now complete. */ p->pending &= ~(1 << slot); ahci_check_stopped(p); ahci_handle_port(p); out: pthread_mutex_unlock(&sc->mtx); DPRINTF("%s exit", __func__); } static void atapi_ioreq_cb(struct blockif_req *br, int err) { struct ahci_cmd_hdr *hdr; struct ahci_ioreq *aior; struct ahci_port *p; struct pci_ahci_softc *sc; uint8_t *cfis; uint32_t tfd; int slot; DPRINTF("%s %d", __func__, err); aior = br->br_param; p = aior->io_pr; cfis = aior->cfis; slot = aior->slot; sc = p->pr_sc; hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE); pthread_mutex_lock(&sc->mtx); /* * Delete the blockif request from the busy list */ TAILQ_REMOVE(&p->iobhd, aior, io_blist); /* * Move the blockif request back to the free list */ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); if (!err) hdr->prdbc = aior->done; if (!err && aior->more) { atapi_read(p, slot, cfis, aior->done); goto out; } if (!err) { tfd = ATA_S_READY | ATA_S_DSC; } else { p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; p->asc = 0x21; tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; } cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; ahci_write_fis_d2h(p, slot, cfis, tfd); /* * This command is now complete. */ p->pending &= ~(1 << slot); ahci_check_stopped(p); ahci_handle_port(p); out: pthread_mutex_unlock(&sc->mtx); DPRINTF("%s exit", __func__); } static void pci_ahci_ioreq_init(struct ahci_port *pr) { struct ahci_ioreq *vr; int i; pr->ioqsz = blockif_queuesz(pr->bctx); pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq)); STAILQ_INIT(&pr->iofhd); /* * Add all i/o request entries to the free queue */ for (i = 0; i < pr->ioqsz; i++) { vr = &pr->ioreq[i]; vr->io_pr = pr; if (!pr->atapi) vr->io_req.br_callback = ata_ioreq_cb; else vr->io_req.br_callback = atapi_ioreq_cb; vr->io_req.br_param = vr; STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist); } TAILQ_INIT(&pr->iobhd); } static void pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) { int port = (offset - AHCI_OFFSET) / AHCI_STEP; offset = (offset - AHCI_OFFSET) % AHCI_STEP; struct ahci_port *p = &sc->port[port]; DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"", port, offset, value); switch (offset) { case AHCI_P_CLB: p->clb = value; break; case AHCI_P_CLBU: p->clbu = value; break; case AHCI_P_FB: p->fb = value; break; case AHCI_P_FBU: p->fbu = value; break; case AHCI_P_IS: p->is &= ~value; ahci_port_intr(p); break; case AHCI_P_IE: p->ie = value & 0xFDC000FF; ahci_port_intr(p); break; case AHCI_P_CMD: { p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD | AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE | AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE | AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK); p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD | AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE | AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE | AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value; if (!(value & AHCI_P_CMD_ST)) { ahci_port_stop(p); } else { uint64_t clb; p->cmd |= AHCI_P_CMD_CR; clb = (uint64_t)p->clbu << 32 | p->clb; p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb, AHCI_CL_SIZE * AHCI_MAX_SLOTS); } if (value & AHCI_P_CMD_FRE) { uint64_t fb; p->cmd |= AHCI_P_CMD_FR; fb = (uint64_t)p->fbu << 32 | p->fb; /* we don't support FBSCP, so rfis size is 256Bytes */ p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256); } else { p->cmd &= ~AHCI_P_CMD_FR; } if (value & AHCI_P_CMD_CLO) { p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ); p->cmd &= ~AHCI_P_CMD_CLO; } if (value & AHCI_P_CMD_ICC_MASK) { p->cmd &= ~AHCI_P_CMD_ICC_MASK; } ahci_handle_port(p); break; } case AHCI_P_TFD: case AHCI_P_SIG: case AHCI_P_SSTS: WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"", offset); break; case AHCI_P_SCTL: p->sctl = value; if (!(p->cmd & AHCI_P_CMD_ST)) { if (value & ATA_SC_DET_RESET) ahci_port_reset(p); } break; case AHCI_P_SERR: p->serr &= ~value; break; case AHCI_P_SACT: p->sact |= value; break; case AHCI_P_CI: p->ci |= value; ahci_handle_port(p); break; case AHCI_P_SNTF: case AHCI_P_FBS: default: break; } } static void pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) { DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"", offset, value); switch (offset) { case AHCI_CAP: case AHCI_PI: case AHCI_VS: case AHCI_CAP2: DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset); break; case AHCI_GHC: if (value & AHCI_GHC_HR) { ahci_reset(sc); break; } if (value & AHCI_GHC_IE) sc->ghc |= AHCI_GHC_IE; else sc->ghc &= ~AHCI_GHC_IE; ahci_generate_intr(sc, 0xffffffff); break; case AHCI_IS: sc->is &= ~value; ahci_generate_intr(sc, value); break; default: break; } } static void pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct pci_ahci_softc *sc = pi->pi_arg; assert(baridx == 5); assert((offset % 4) == 0 && size == 4); pthread_mutex_lock(&sc->mtx); if (offset < AHCI_OFFSET) pci_ahci_host_write(sc, offset, value); else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP) pci_ahci_port_write(sc, offset, value); else WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset); pthread_mutex_unlock(&sc->mtx); } static uint64_t pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset) { uint32_t value; switch (offset) { case AHCI_CAP: case AHCI_GHC: case AHCI_IS: case AHCI_PI: case AHCI_VS: case AHCI_CCCC: case AHCI_CCCP: case AHCI_EM_LOC: case AHCI_EM_CTL: case AHCI_CAP2: { uint32_t *p = &sc->cap; p += (offset - AHCI_CAP) / sizeof(uint32_t); value = *p; break; } default: value = 0; break; } DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x", offset, value); return (value); } static uint64_t pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset) { uint32_t value; int port = (offset - AHCI_OFFSET) / AHCI_STEP; offset = (offset - AHCI_OFFSET) % AHCI_STEP; switch (offset) { case AHCI_P_CLB: case AHCI_P_CLBU: case AHCI_P_FB: case AHCI_P_FBU: case AHCI_P_IS: case AHCI_P_IE: case AHCI_P_CMD: case AHCI_P_TFD: case AHCI_P_SIG: case AHCI_P_SSTS: case AHCI_P_SCTL: case AHCI_P_SERR: case AHCI_P_SACT: case AHCI_P_CI: case AHCI_P_SNTF: case AHCI_P_FBS: { uint32_t *p= &sc->port[port].clb; p += (offset - AHCI_P_CLB) / sizeof(uint32_t); value = *p; break; } default: value = 0; break; } DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x", port, offset, value); return value; } static uint64_t pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t regoff, int size) { struct pci_ahci_softc *sc = pi->pi_arg; uint64_t offset; uint32_t value; assert(baridx == 5); assert(size == 1 || size == 2 || size == 4); assert((regoff & (size - 1)) == 0); pthread_mutex_lock(&sc->mtx); offset = regoff & ~0x3; /* round down to a multiple of 4 bytes */ if (offset < AHCI_OFFSET) value = pci_ahci_host_read(sc, offset); else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP) value = pci_ahci_port_read(sc, offset); else { value = 0; WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"", regoff); } value >>= 8 * (regoff & 0x3); pthread_mutex_unlock(&sc->mtx); return (value); } /* * Each AHCI controller has a "port" node which contains nodes for * each port named after the decimal number of the port (no leading * zeroes). Port nodes contain a "type" ("hd" or "cd"), as well as * options for blockif. For example: * * pci.0.1.0 * .device="ahci" * .port * .0 * .type="hd" * .path="/path/to/image" */ static int pci_ahci_legacy_config_port(nvlist_t *nvl, int port, const char *type, const char *opts) { char node_name[sizeof("XX")]; nvlist_t *port_nvl; snprintf(node_name, sizeof(node_name), "%d", port); port_nvl = create_relative_config_node(nvl, node_name); set_config_value_node(port_nvl, "type", type); return (blockif_legacy_config(port_nvl, opts)); } static int pci_ahci_legacy_config(nvlist_t *nvl, const char *opts) { nvlist_t *ports_nvl; const char *type; char *next, *next2, *str, *tofree; int p, ret; if (opts == NULL) return (0); ports_nvl = create_relative_config_node(nvl, "port"); ret = 1; tofree = str = strdup(opts); for (p = 0; p < MAX_PORTS && str != NULL; p++, str = next) { /* Identify and cut off type of present port. */ if (strncmp(str, "hd:", 3) == 0) { type = "hd"; str += 3; } else if (strncmp(str, "cd:", 3) == 0) { type = "cd"; str += 3; } else type = NULL; /* Find and cut off the next port options. */ next = strstr(str, ",hd:"); next2 = strstr(str, ",cd:"); if (next == NULL || (next2 != NULL && next2 < next)) next = next2; if (next != NULL) { next[0] = 0; next++; } if (str[0] == 0) continue; if (type == NULL) { EPRINTLN("Missing or invalid type for port %d: \"%s\"", p, str); goto out; } if (pci_ahci_legacy_config_port(ports_nvl, p, type, str) != 0) goto out; } ret = 0; out: free(tofree); return (ret); } static int pci_ahci_cd_legacy_config(nvlist_t *nvl, const char *opts) { nvlist_t *ports_nvl; ports_nvl = create_relative_config_node(nvl, "port"); return (pci_ahci_legacy_config_port(ports_nvl, 0, "cd", opts)); } static int pci_ahci_hd_legacy_config(nvlist_t *nvl, const char *opts) { nvlist_t *ports_nvl; ports_nvl = create_relative_config_node(nvl, "port"); return (pci_ahci_legacy_config_port(ports_nvl, 0, "hd", opts)); } static int pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { char bident[sizeof("XX:XX:XX")]; char node_name[sizeof("XX")]; struct blockif_ctxt *bctxt; struct pci_ahci_softc *sc; int atapi, ret, slots, p; MD5_CTX mdctx; u_char digest[16]; const char *path, *type, *value; nvlist_t *ports_nvl, *port_nvl; ret = 0; #ifdef AHCI_DEBUG dbg = fopen("/tmp/log", "w+"); #endif sc = calloc(1, sizeof(struct pci_ahci_softc)); pi->pi_arg = sc; sc->asc_pi = pi; pthread_mutex_init(&sc->mtx, NULL); sc->ports = 0; sc->pi = 0; slots = 32; ports_nvl = find_relative_config_node(nvl, "port"); for (p = 0; p < MAX_PORTS; p++) { struct ata_params *ata_ident = &sc->port[p].ata_ident; char ident[AHCI_PORT_IDENT]; snprintf(node_name, sizeof(node_name), "%d", p); port_nvl = find_relative_config_node(ports_nvl, node_name); if (port_nvl == NULL) continue; type = get_config_value_node(port_nvl, "type"); if (type == NULL) continue; if (strcmp(type, "hd") == 0) atapi = 0; else atapi = 1; /* * Attempt to open the backing image. Use the PCI slot/func * and the port number for the identifier string. */ snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot, pi->pi_func, p); bctxt = blockif_open(port_nvl, bident); if (bctxt == NULL) { sc->ports = p; ret = 1; goto open_fail; - } + } sc->port[p].bctx = bctxt; sc->port[p].pr_sc = sc; sc->port[p].port = p; sc->port[p].atapi = atapi; /* * Create an identifier for the backing file. * Use parts of the md5 sum of the filename */ path = get_config_value_node(port_nvl, "path"); MD5Init(&mdctx); MD5Update(&mdctx, path, strlen(path)); MD5Final(digest, &mdctx); snprintf(ident, AHCI_PORT_IDENT, "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); memset(ata_ident, 0, sizeof(struct ata_params)); ata_string((uint8_t*)&ata_ident->serial, ident, 20); ata_string((uint8_t*)&ata_ident->revision, "001", 8); if (atapi) ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40); else ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40); value = get_config_value_node(port_nvl, "nmrr"); if (value != NULL) ata_ident->media_rotation_rate = atoi(value); value = get_config_value_node(port_nvl, "ser"); if (value != NULL) ata_string((uint8_t*)(&ata_ident->serial), value, 20); value = get_config_value_node(port_nvl, "rev"); if (value != NULL) ata_string((uint8_t*)(&ata_ident->revision), value, 8); value = get_config_value_node(port_nvl, "model"); if (value != NULL) ata_string((uint8_t*)(&ata_ident->model), value, 40); ata_identify_init(&sc->port[p], atapi); /* * Allocate blockif request structures and add them * to the free list */ pci_ahci_ioreq_init(&sc->port[p]); sc->pi |= (1 << p); if (sc->port[p].ioqsz < slots) slots = sc->port[p].ioqsz; } sc->ports = p; /* Intel ICH8 AHCI */ --slots; if (sc->ports < DEF_PORTS) sc->ports = DEF_PORTS; sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF | AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP | AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)| AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC | (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1); sc->vs = 0x10300; sc->cap2 = AHCI_CAP2_APST; ahci_reset(sc); pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821); pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA); pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0); p = MIN(sc->ports, 16); p = flsl(p) - ((p & (p - 1)) ? 0 : 1); pci_emul_add_msicap(pi, 1 << p); pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32, AHCI_OFFSET + sc->ports * AHCI_STEP); pci_lintr_request(pi); open_fail: if (ret) { for (p = 0; p < sc->ports; p++) { if (sc->port[p].bctx != NULL) blockif_close(sc->port[p].bctx); } free(sc); } return (ret); } #ifdef BHYVE_SNAPSHOT static int pci_ahci_snapshot_save_queues(struct ahci_port *port, struct vm_snapshot_meta *meta) { int ret; int idx; struct ahci_ioreq *ioreq; STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) { idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq); SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); } idx = -1; SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) { idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq); SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); /* * Snapshot only the busy requests; other requests are * not valid. */ ret = blockif_snapshot_req(&ioreq->io_req, meta); if (ret != 0) { fprintf(stderr, "%s: failed to snapshot req\r\n", __func__); goto done; } } idx = -1; SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); done: return (ret); } static int pci_ahci_snapshot_restore_queues(struct ahci_port *port, struct vm_snapshot_meta *meta) { int ret; int idx; struct ahci_ioreq *ioreq; /* Empty the free queue before restoring. */ while (!STAILQ_EMPTY(&port->iofhd)) STAILQ_REMOVE_HEAD(&port->iofhd, io_flist); /* Restore the free queue. */ while (1) { SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); if (idx == -1) break; STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist); } /* Restore the busy queue. */ while (1) { SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done); if (idx == -1) break; ioreq = &port->ioreq[idx]; TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist); /* * Restore only the busy requests; other requests are * not valid. */ ret = blockif_snapshot_req(&ioreq->io_req, meta); if (ret != 0) { fprintf(stderr, "%s: failed to restore request\r\n", __func__); goto done; } /* Re-enqueue the requests in the block interface. */ if (ioreq->readop) ret = blockif_read(port->bctx, &ioreq->io_req); else ret = blockif_write(port->bctx, &ioreq->io_req); if (ret != 0) { fprintf(stderr, "%s: failed to re-enqueue request\r\n", __func__); goto done; } } done: return (ret); } static int pci_ahci_snapshot(struct vm_snapshot_meta *meta) { int i, j, ret; void *bctx; struct pci_devinst *pi; struct pci_ahci_softc *sc; struct ahci_port *port; struct ahci_cmd_hdr *hdr; struct ahci_ioreq *ioreq; pi = meta->dev_data; sc = pi->pi_arg; /* TODO: add mtx lock/unlock */ SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done); for (i = 0; i < MAX_PORTS; i++) { port = &sc->port[i]; if (meta->op == VM_SNAPSHOT_SAVE) bctx = port->bctx; SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done); /* Mostly for restore; save is ensured by the lines above. */ if (((bctx == NULL) && (port->bctx != NULL)) || ((bctx != NULL) && (port->bctx == NULL))) { fprintf(stderr, "%s: ports not matching\r\n", __func__); ret = EINVAL; goto done; } if (port->bctx == NULL) continue; if (port->port != i) { fprintf(stderr, "%s: ports not matching: " "actual: %d expected: %d\r\n", __func__, port->port, i); ret = EINVAL; goto done; } SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst, AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done); SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done); for (j = 0; j < port->ioqsz; j++) { ioreq = &port->ioreq[j]; /* blockif_req snapshot done only for busy requests. */ hdr = (struct ahci_cmd_hdr *)(port->cmd_lst + ioreq->slot * AHCI_CL_SIZE); SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis, 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry), false, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done); } /* Perform save / restore specific operations. */ if (meta->op == VM_SNAPSHOT_SAVE) { ret = pci_ahci_snapshot_save_queues(port, meta); if (ret != 0) goto done; } else if (meta->op == VM_SNAPSHOT_RESTORE) { ret = pci_ahci_snapshot_restore_queues(port, meta); if (ret != 0) goto done; } else { ret = EINVAL; goto done; } ret = blockif_snapshot(port->bctx, meta); if (ret != 0) { fprintf(stderr, "%s: failed to restore blockif\r\n", __func__); goto done; } } done: return (ret); } static int pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi) { struct pci_ahci_softc *sc; struct blockif_ctxt *bctxt; int i; sc = pi->pi_arg; for (i = 0; i < MAX_PORTS; i++) { bctxt = sc->port[i].bctx; if (bctxt == NULL) continue; blockif_pause(bctxt); } return (0); } static int pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi) { struct pci_ahci_softc *sc; struct blockif_ctxt *bctxt; int i; sc = pi->pi_arg; for (i = 0; i < MAX_PORTS; i++) { bctxt = sc->port[i].bctx; if (bctxt == NULL) continue; blockif_resume(bctxt); } return (0); } #endif /* * Use separate emulation names to distinguish drive and atapi devices */ struct pci_devemu pci_de_ahci = { .pe_emu = "ahci", .pe_init = pci_ahci_init, .pe_legacy_config = pci_ahci_legacy_config, .pe_barwrite = pci_ahci_write, .pe_barread = pci_ahci_read, #ifdef BHYVE_SNAPSHOT .pe_snapshot = pci_ahci_snapshot, .pe_pause = pci_ahci_pause, .pe_resume = pci_ahci_resume, #endif }; PCI_EMUL_SET(pci_de_ahci); struct pci_devemu pci_de_ahci_hd = { .pe_emu = "ahci-hd", .pe_legacy_config = pci_ahci_hd_legacy_config, .pe_alias = "ahci", }; PCI_EMUL_SET(pci_de_ahci_hd); struct pci_devemu pci_de_ahci_cd = { .pe_emu = "ahci-cd", .pe_legacy_config = pci_ahci_cd_legacy_config, .pe_alias = "ahci", }; PCI_EMUL_SET(pci_de_ahci_cd); diff --git a/usr.sbin/bhyve/pci_e82545.c b/usr.sbin/bhyve/pci_e82545.c index 2bca6753e6a0..85c02889ad51 100644 --- a/usr.sbin/bhyve/pci_e82545.c +++ b/usr.sbin/bhyve/pci_e82545.c @@ -1,2511 +1,2511 @@ /* * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Alexander Motin * Copyright (c) 2015 Peter Grehan * Copyright (c) 2013 Jeremiah Lott, Avere Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_regs.h" #include "e1000_defines.h" #include "mii.h" #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" #include "mevent.h" #include "net_utils.h" #include "net_backends.h" /* Hardware/register definitions XXX: move some to common code. */ #define E82545_VENDOR_ID_INTEL 0x8086 #define E82545_DEV_ID_82545EM_COPPER 0x100F #define E82545_SUBDEV_ID 0x1008 #define E82545_REVISION_4 4 #define E82545_MDIC_DATA_MASK 0x0000FFFF #define E82545_MDIC_OP_MASK 0x0c000000 #define E82545_MDIC_IE 0x20000000 #define E82545_EECD_FWE_DIS 0x00000010 /* Flash writes disabled */ #define E82545_EECD_FWE_EN 0x00000020 /* Flash writes enabled */ #define E82545_EECD_FWE_MASK 0x00000030 /* Flash writes mask */ #define E82545_BAR_REGISTER 0 #define E82545_BAR_REGISTER_LEN (128*1024) #define E82545_BAR_FLASH 1 #define E82545_BAR_FLASH_LEN (64*1024) #define E82545_BAR_IO 2 #define E82545_BAR_IO_LEN 8 #define E82545_IOADDR 0x00000000 #define E82545_IODATA 0x00000004 #define E82545_IO_REGISTER_MAX 0x0001FFFF #define E82545_IO_FLASH_BASE 0x00080000 #define E82545_IO_FLASH_MAX 0x000FFFFF #define E82545_ARRAY_ENTRY(reg, offset) (reg + (offset<<2)) #define E82545_RAR_MAX 15 #define E82545_MTA_MAX 127 #define E82545_VFTA_MAX 127 /* Slightly modified from the driver versions, hardcoded for 3 opcode bits, * followed by 6 address bits. * TODO: make opcode bits and addr bits configurable? * NVM Commands - Microwire */ #define E82545_NVM_OPCODE_BITS 3 #define E82545_NVM_ADDR_BITS 6 #define E82545_NVM_DATA_BITS 16 #define E82545_NVM_OPADDR_BITS (E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS) #define E82545_NVM_ADDR_MASK ((1 << E82545_NVM_ADDR_BITS)-1) #define E82545_NVM_OPCODE_MASK \ (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS) #define E82545_NVM_OPCODE_READ (0x6 << E82545_NVM_ADDR_BITS) /* read */ #define E82545_NVM_OPCODE_WRITE (0x5 << E82545_NVM_ADDR_BITS) /* write */ #define E82545_NVM_OPCODE_ERASE (0x7 << E82545_NVM_ADDR_BITS) /* erase */ #define E82545_NVM_OPCODE_EWEN (0x4 << E82545_NVM_ADDR_BITS) /* wr-enable */ #define E82545_NVM_EEPROM_SIZE 64 /* 64 * 16-bit values == 128K */ #define E1000_ICR_SRPD 0x00010000 /* This is an arbitrary number. There is no hard limit on the chip. */ #define I82545_MAX_TXSEGS 64 /* Legacy receive descriptor */ struct e1000_rx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ uint16_t length; /* Length of data DMAed into data buffer */ uint16_t csum; /* Packet checksum */ uint8_t status; /* Descriptor status */ uint8_t errors; /* Descriptor Errors */ uint16_t special; }; /* Transmit descriptor types */ #define E1000_TXD_MASK (E1000_TXD_CMD_DEXT | 0x00F00000) #define E1000_TXD_TYP_L (0) #define E1000_TXD_TYP_C (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C) #define E1000_TXD_TYP_D (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D) /* Legacy transmit descriptor */ struct e1000_tx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ union { uint32_t data; struct { uint16_t length; /* Data buffer length */ uint8_t cso; /* Checksum offset */ uint8_t cmd; /* Descriptor control */ } flags; } lower; union { uint32_t data; struct { uint8_t status; /* Descriptor status */ uint8_t css; /* Checksum start */ uint16_t special; } fields; } upper; }; /* Context descriptor */ struct e1000_context_desc { union { uint32_t ip_config; struct { uint8_t ipcss; /* IP checksum start */ uint8_t ipcso; /* IP checksum offset */ uint16_t ipcse; /* IP checksum end */ } ip_fields; } lower_setup; union { uint32_t tcp_config; struct { uint8_t tucss; /* TCP checksum start */ uint8_t tucso; /* TCP checksum offset */ uint16_t tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; uint32_t cmd_and_length; union { uint32_t data; struct { uint8_t status; /* Descriptor status */ uint8_t hdr_len; /* Header length */ uint16_t mss; /* Maximum segment size */ } fields; } tcp_seg_setup; }; /* Data descriptor */ struct e1000_data_desc { uint64_t buffer_addr; /* Address of the descriptor's buffer address */ union { uint32_t data; struct { uint16_t length; /* Data buffer length */ uint8_t typ_len_ext; uint8_t cmd; } flags; } lower; union { uint32_t data; struct { uint8_t status; /* Descriptor status */ uint8_t popts; /* Packet Options */ uint16_t special; } fields; } upper; }; union e1000_tx_udesc { struct e1000_tx_desc td; struct e1000_context_desc cd; struct e1000_data_desc dd; }; /* Tx checksum info for a packet. */ struct ck_info { int ck_valid; /* ck_info is valid */ uint8_t ck_start; /* start byte of cksum calcuation */ uint8_t ck_off; /* offset of cksum insertion */ uint16_t ck_len; /* length of cksum calc: 0 is to packet-end */ }; /* * Debug printf */ static int e82545_debug = 0; #define WPRINTF(msg,params...) PRINTLN("e82545: " msg, params) #define DPRINTF(msg,params...) if (e82545_debug) WPRINTF(msg, params) #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) /* s/w representation of the RAL/RAH regs */ struct eth_uni { int eu_valid; int eu_addrsel; struct ether_addr eu_eth; }; struct e82545_softc { struct pci_devinst *esc_pi; struct vmctx *esc_ctx; struct mevent *esc_mevpitr; pthread_mutex_t esc_mtx; struct ether_addr esc_mac; net_backend_t *esc_be; /* General */ uint32_t esc_CTRL; /* x0000 device ctl */ uint32_t esc_FCAL; /* x0028 flow ctl addr lo */ uint32_t esc_FCAH; /* x002C flow ctl addr hi */ uint32_t esc_FCT; /* x0030 flow ctl type */ uint32_t esc_VET; /* x0038 VLAN eth type */ uint32_t esc_FCTTV; /* x0170 flow ctl tx timer */ uint32_t esc_LEDCTL; /* x0E00 LED control */ uint32_t esc_PBA; /* x1000 pkt buffer allocation */ - + /* Interrupt control */ int esc_irq_asserted; uint32_t esc_ICR; /* x00C0 cause read/clear */ uint32_t esc_ITR; /* x00C4 intr throttling */ uint32_t esc_ICS; /* x00C8 cause set */ uint32_t esc_IMS; /* x00D0 mask set/read */ uint32_t esc_IMC; /* x00D8 mask clear */ /* Transmit */ union e1000_tx_udesc *esc_txdesc; struct e1000_context_desc esc_txctx; pthread_t esc_tx_tid; pthread_cond_t esc_tx_cond; int esc_tx_enabled; int esc_tx_active; uint32_t esc_TXCW; /* x0178 transmit config */ uint32_t esc_TCTL; /* x0400 transmit ctl */ uint32_t esc_TIPG; /* x0410 inter-packet gap */ uint16_t esc_AIT; /* x0458 Adaptive Interframe Throttle */ uint64_t esc_tdba; /* verified 64-bit desc table addr */ uint32_t esc_TDBAL; /* x3800 desc table addr, low bits */ uint32_t esc_TDBAH; /* x3804 desc table addr, hi 32-bits */ uint32_t esc_TDLEN; /* x3808 # descriptors in bytes */ uint16_t esc_TDH; /* x3810 desc table head idx */ uint16_t esc_TDHr; /* internal read version of TDH */ uint16_t esc_TDT; /* x3818 desc table tail idx */ uint32_t esc_TIDV; /* x3820 intr delay */ uint32_t esc_TXDCTL; /* x3828 desc control */ uint32_t esc_TADV; /* x382C intr absolute delay */ - + /* L2 frame acceptance */ struct eth_uni esc_uni[16]; /* 16 x unicast MAC addresses */ uint32_t esc_fmcast[128]; /* Multicast filter bit-match */ uint32_t esc_fvlan[128]; /* VLAN 4096-bit filter */ - + /* Receive */ struct e1000_rx_desc *esc_rxdesc; pthread_cond_t esc_rx_cond; int esc_rx_enabled; int esc_rx_active; int esc_rx_loopback; uint32_t esc_RCTL; /* x0100 receive ctl */ uint32_t esc_FCRTL; /* x2160 flow cntl thresh, low */ uint32_t esc_FCRTH; /* x2168 flow cntl thresh, hi */ uint64_t esc_rdba; /* verified 64-bit desc table addr */ uint32_t esc_RDBAL; /* x2800 desc table addr, low bits */ uint32_t esc_RDBAH; /* x2804 desc table addr, hi 32-bits*/ uint32_t esc_RDLEN; /* x2808 #descriptors */ uint16_t esc_RDH; /* x2810 desc table head idx */ uint16_t esc_RDT; /* x2818 desc table tail idx */ uint32_t esc_RDTR; /* x2820 intr delay */ uint32_t esc_RXDCTL; /* x2828 desc control */ uint32_t esc_RADV; /* x282C intr absolute delay */ uint32_t esc_RSRPD; /* x2C00 recv small packet detect */ uint32_t esc_RXCSUM; /* x5000 receive cksum ctl */ - + /* IO Port register access */ uint32_t io_addr; /* Shadow copy of MDIC */ uint32_t mdi_control; /* Shadow copy of EECD */ uint32_t eeprom_control; /* Latest NVM in/out */ uint16_t nvm_data; uint16_t nvm_opaddr; /* stats */ uint32_t missed_pkt_count; /* dropped for no room in rx queue */ uint32_t pkt_rx_by_size[6]; uint32_t pkt_tx_by_size[6]; uint32_t good_pkt_rx_count; uint32_t bcast_pkt_rx_count; uint32_t mcast_pkt_rx_count; uint32_t good_pkt_tx_count; uint32_t bcast_pkt_tx_count; uint32_t mcast_pkt_tx_count; uint32_t oversize_rx_count; uint32_t tso_tx_count; uint64_t good_octets_rx; uint64_t good_octets_tx; uint64_t missed_octets; /* counts missed and oversized */ uint8_t nvm_bits:6; /* number of bits remaining in/out */ uint8_t nvm_mode:2; #define E82545_NVM_MODE_OPADDR 0x0 #define E82545_NVM_MODE_DATAIN 0x1 #define E82545_NVM_MODE_DATAOUT 0x2 /* EEPROM data */ uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE]; }; static void e82545_reset(struct e82545_softc *sc, int dev); static void e82545_rx_enable(struct e82545_softc *sc); static void e82545_rx_disable(struct e82545_softc *sc); static void e82545_rx_callback(int fd, enum ev_type type, void *param); static void e82545_tx_start(struct e82545_softc *sc); static void e82545_tx_enable(struct e82545_softc *sc); static void e82545_tx_disable(struct e82545_softc *sc); static inline int e82545_size_stat_index(uint32_t size) { if (size <= 64) { return 0; } else if (size >= 1024) { return 5; } else { /* should be 1-4 */ return (ffs(size) - 6); } } static void e82545_init_eeprom(struct e82545_softc *sc) { uint16_t checksum, i; /* mac addr */ sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) | (((uint16_t)sc->esc_mac.octet[1]) << 8); sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) | (((uint16_t)sc->esc_mac.octet[3]) << 8); sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) | (((uint16_t)sc->esc_mac.octet[5]) << 8); /* pci ids */ sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID; sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL; sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER; sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL; /* fill in the checksum */ checksum = 0; for (i = 0; i < NVM_CHECKSUM_REG; i++) { checksum += sc->eeprom_data[i]; } checksum = NVM_SUM - checksum; sc->eeprom_data[NVM_CHECKSUM_REG] = checksum; DPRINTF("eeprom checksum: 0x%x", checksum); } static void e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr, uint8_t phy_addr, uint32_t data) { DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x", reg_addr, phy_addr, data); } static uint32_t e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, uint8_t phy_addr) { //DPRINTF("Read mdi reg:0x%x phy:0x%x", reg_addr, phy_addr); switch (reg_addr) { case PHY_STATUS: return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | MII_SR_AUTONEG_COMPLETE); case PHY_AUTONEG_ADV: return NWAY_AR_SELECTOR_FIELD; case PHY_LP_ABILITY: return 0; case PHY_1000T_STATUS: return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS | SR_1000T_LOCAL_RX_STATUS); case PHY_ID1: return (M88E1011_I_PHY_ID >> 16) & 0xFFFF; case PHY_ID2: return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF; default: DPRINTF("Unknown mdi read reg:0x%x phy:0x%x", reg_addr, phy_addr); return 0; } /* not reached */ } static void e82545_eecd_strobe(struct e82545_softc *sc) { /* Microwire state machine */ /* DPRINTF("eeprom state machine srtobe " "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data);*/ if (sc->nvm_bits == 0) { DPRINTF("eeprom state machine not expecting data! " "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data); return; } sc->nvm_bits--; if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) { /* shifting out */ if (sc->nvm_data & 0x8000) { sc->eeprom_control |= E1000_EECD_DO; } else { sc->eeprom_control &= ~E1000_EECD_DO; } sc->nvm_data <<= 1; if (sc->nvm_bits == 0) { /* read done, back to opcode mode. */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; } } else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) { /* shifting in */ sc->nvm_data <<= 1; if (sc->eeprom_control & E1000_EECD_DI) { sc->nvm_data |= 1; } if (sc->nvm_bits == 0) { /* eeprom write */ uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; if (op != E82545_NVM_OPCODE_WRITE) { DPRINTF("Illegal eeprom write op 0x%x", sc->nvm_opaddr); } else if (addr >= E82545_NVM_EEPROM_SIZE) { DPRINTF("Illegal eeprom write addr 0x%x", sc->nvm_opaddr); } else { DPRINTF("eeprom write eeprom[0x%x] = 0x%x", addr, sc->nvm_data); sc->eeprom_data[addr] = sc->nvm_data; } /* back to opcode mode */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; } } else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) { sc->nvm_opaddr <<= 1; if (sc->eeprom_control & E1000_EECD_DI) { sc->nvm_opaddr |= 1; } if (sc->nvm_bits == 0) { uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; switch (op) { case E82545_NVM_OPCODE_EWEN: DPRINTF("eeprom write enable: 0x%x", sc->nvm_opaddr); /* back to opcode mode */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; break; case E82545_NVM_OPCODE_READ: { uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; sc->nvm_mode = E82545_NVM_MODE_DATAOUT; sc->nvm_bits = E82545_NVM_DATA_BITS; if (addr < E82545_NVM_EEPROM_SIZE) { sc->nvm_data = sc->eeprom_data[addr]; DPRINTF("eeprom read: eeprom[0x%x] = 0x%x", addr, sc->nvm_data); } else { DPRINTF("eeprom illegal read: 0x%x", sc->nvm_opaddr); sc->nvm_data = 0; } break; } case E82545_NVM_OPCODE_WRITE: sc->nvm_mode = E82545_NVM_MODE_DATAIN; sc->nvm_bits = E82545_NVM_DATA_BITS; sc->nvm_data = 0; break; default: DPRINTF("eeprom unknown op: 0x%x", sc->nvm_opaddr); /* back to opcode mode */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; } } } else { DPRINTF("eeprom state machine wrong state! " "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data); } } static void e82545_itr_callback(int fd, enum ev_type type, void *param) { uint32_t new; struct e82545_softc *sc = param; pthread_mutex_lock(&sc->esc_mtx); new = sc->esc_ICR & sc->esc_IMS; if (new && !sc->esc_irq_asserted) { DPRINTF("itr callback: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); } else { mevent_delete(sc->esc_mevpitr); sc->esc_mevpitr = NULL; } pthread_mutex_unlock(&sc->esc_mtx); } static void e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) { uint32_t new; DPRINTF("icr assert: 0x%x", bits); - + /* * An interrupt is only generated if bits are set that * aren't already in the ICR, these bits are unmasked, * and there isn't an interrupt already pending. */ new = bits & ~sc->esc_ICR & sc->esc_IMS; sc->esc_ICR |= bits; if (new == 0) { DPRINTF("icr assert: masked %x, ims %x", new, sc->esc_IMS); } else if (sc->esc_mevpitr != NULL) { DPRINTF("icr assert: throttled %x, ims %x", new, sc->esc_IMS); } else if (!sc->esc_irq_asserted) { DPRINTF("icr assert: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { sc->esc_mevpitr = mevent_add( (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ EVF_TIMER, e82545_itr_callback, sc); } } } static void e82545_ims_change(struct e82545_softc *sc, uint32_t bits) { uint32_t new; /* * Changing the mask may allow previously asserted * but masked interrupt requests to generate an interrupt. */ new = bits & sc->esc_ICR & ~sc->esc_IMS; sc->esc_IMS |= bits; if (new == 0) { DPRINTF("ims change: masked %x, ims %x", new, sc->esc_IMS); } else if (sc->esc_mevpitr != NULL) { DPRINTF("ims change: throttled %x, ims %x", new, sc->esc_IMS); } else if (!sc->esc_irq_asserted) { DPRINTF("ims change: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { sc->esc_mevpitr = mevent_add( (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ EVF_TIMER, e82545_itr_callback, sc); } } } static void e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) { DPRINTF("icr deassert: 0x%x", bits); sc->esc_ICR &= ~bits; /* * If there are no longer any interrupt sources and there * was an asserted interrupt, clear it */ if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) { DPRINTF("icr deassert: lintr deassert %x", bits); pci_lintr_deassert(sc->esc_pi); sc->esc_irq_asserted = 0; } } static void e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value) { DPRINTF("intr_write: off %x, val %x", offset, value); - + switch (offset) { case E1000_ICR: e82545_icr_deassert(sc, value); break; case E1000_ITR: sc->esc_ITR = value; break; case E1000_ICS: sc->esc_ICS = value; /* not used: store for debug */ e82545_icr_assert(sc, value); break; case E1000_IMS: e82545_ims_change(sc, value); break; case E1000_IMC: sc->esc_IMC = value; /* for debug */ sc->esc_IMS &= ~value; // XXX clear interrupts if all ICR bits now masked // and interrupt was pending ? break; default: break; } } static uint32_t e82545_intr_read(struct e82545_softc *sc, uint32_t offset) { uint32_t retval; retval = 0; DPRINTF("intr_read: off %x", offset); - + switch (offset) { case E1000_ICR: retval = sc->esc_ICR; sc->esc_ICR = 0; e82545_icr_deassert(sc, ~0); break; case E1000_ITR: retval = sc->esc_ITR; break; case E1000_ICS: /* write-only register */ break; case E1000_IMS: retval = sc->esc_IMS; break; case E1000_IMC: /* write-only register */ break; default: break; } return (retval); } static void e82545_devctl(struct e82545_softc *sc, uint32_t val) { sc->esc_CTRL = val & ~E1000_CTRL_RST; if (val & E1000_CTRL_RST) { DPRINTF("e1k: s/w reset, ctl %x", val); e82545_reset(sc, 1); } /* XXX check for phy reset ? */ } static void e82545_rx_update_rdba(struct e82545_softc *sc) { /* XXX verify desc base/len within phys mem range */ sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 | sc->esc_RDBAL; - + /* Cache host mapping of guest descriptor array */ sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx, - sc->esc_rdba, sc->esc_RDLEN); + sc->esc_rdba, sc->esc_RDLEN); } static void e82545_rx_ctl(struct e82545_softc *sc, uint32_t val) { int on; on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN); /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */ sc->esc_RCTL = val & ~0xF9204c01; DPRINTF("rx_ctl - %s RCTL %x, val %x", on ? "on" : "off", sc->esc_RCTL, val); /* state change requested */ if (on != sc->esc_rx_enabled) { if (on) { /* Catch disallowed/unimplemented settings */ //assert(!(val & E1000_RCTL_LBM_TCVR)); if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) { sc->esc_rx_loopback = 1; } else { sc->esc_rx_loopback = 0; } e82545_rx_update_rdba(sc); e82545_rx_enable(sc); } else { e82545_rx_disable(sc); sc->esc_rx_loopback = 0; sc->esc_rdba = 0; sc->esc_rxdesc = NULL; } } } static void e82545_tx_update_tdba(struct e82545_softc *sc) { /* XXX verify desc base/len within phys mem range */ sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL; /* Cache host mapping of guest descriptor array */ sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba, sc->esc_TDLEN); } static void e82545_tx_ctl(struct e82545_softc *sc, uint32_t val) { int on; - + on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN); /* ignore TCTL_EN settings that don't change state */ if (on == sc->esc_tx_enabled) return; if (on) { e82545_tx_update_tdba(sc); e82545_tx_enable(sc); } else { e82545_tx_disable(sc); sc->esc_tdba = 0; sc->esc_txdesc = NULL; } /* Save TCTL value after stripping reserved bits 31:25,23,2,0 */ sc->esc_TCTL = val & ~0xFE800005; } int e82545_bufsz(uint32_t rctl) { switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) { case (E1000_RCTL_SZ_2048): return (2048); case (E1000_RCTL_SZ_1024): return (1024); case (E1000_RCTL_SZ_512): return (512); case (E1000_RCTL_SZ_256): return (256); case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384); case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192); case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096); } return (256); /* Forbidden value. */ } /* XXX one packet at a time until this is debugged */ static void e82545_rx_callback(int fd, enum ev_type type, void *param) { struct e82545_softc *sc = param; struct e1000_rx_desc *rxd; struct iovec vec[64]; int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size; uint32_t cause = 0; uint16_t *tp, tag, head; pthread_mutex_lock(&sc->esc_mtx); DPRINTF("rx_run: head %x, tail %x", sc->esc_RDH, sc->esc_RDT); if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped", sc->esc_rx_enabled, sc->esc_rx_loopback); while (netbe_rx_discard(sc->esc_be) > 0) { } goto done1; } bufsz = e82545_bufsz(sc->esc_RCTL); maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522; maxpktdesc = (maxpktsz + bufsz - 1) / bufsz; size = sc->esc_RDLEN / 16; head = sc->esc_RDH; left = (size + sc->esc_RDT - head) % size; if (left < maxpktdesc) { DPRINTF("rx overflow (%d < %d) -- packet(s) dropped", left, maxpktdesc); while (netbe_rx_discard(sc->esc_be) > 0) { } goto done1; } sc->esc_rx_active = 1; pthread_mutex_unlock(&sc->esc_mtx); for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) { /* Grab rx descriptor pointed to by the head pointer */ for (i = 0; i < maxpktdesc; i++) { rxd = &sc->esc_rxdesc[(head + i) % size]; vec[i].iov_base = paddr_guest2host(sc->esc_ctx, rxd->buffer_addr, bufsz); vec[i].iov_len = bufsz; } len = netbe_recv(sc->esc_be, vec, maxpktdesc); if (len <= 0) { DPRINTF("netbe_recv() returned %d", len); goto done; } /* * Adjust the packet length based on whether the CRC needs * to be stripped or if the packet is less than the minimum * eth packet size. */ if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) len = ETHER_MIN_LEN - ETHER_CRC_LEN; if (!(sc->esc_RCTL & E1000_RCTL_SECRC)) len += ETHER_CRC_LEN; n = (len + bufsz - 1) / bufsz; DPRINTF("packet read %d bytes, %d segs, head %d", len, n, head); /* Apply VLAN filter. */ tp = (uint16_t *)vec[0].iov_base + 6; if ((sc->esc_RCTL & E1000_RCTL_VFE) && (ntohs(tp[0]) == sc->esc_VET)) { tag = ntohs(tp[1]) & 0x0fff; if ((sc->esc_fvlan[tag >> 5] & (1 << (tag & 0x1f))) != 0) { DPRINTF("known VLAN %d", tag); } else { DPRINTF("unknown VLAN %d", tag); n = 0; continue; } } /* Update all consumed descriptors. */ for (i = 0; i < n - 1; i++) { rxd = &sc->esc_rxdesc[(head + i) % size]; rxd->length = bufsz; rxd->csum = 0; rxd->errors = 0; rxd->special = 0; rxd->status = E1000_RXD_STAT_DD; } rxd = &sc->esc_rxdesc[(head + i) % size]; rxd->length = len % bufsz; rxd->csum = 0; rxd->errors = 0; rxd->special = 0; /* XXX signal no checksum for now */ rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM | E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD; /* Schedule receive interrupts. */ if (len <= sc->esc_RSRPD) { cause |= E1000_ICR_SRPD | E1000_ICR_RXT0; } else { /* XXX: RDRT and RADV timers should be here. */ cause |= E1000_ICR_RXT0; } head = (head + n) % size; left -= n; } done: pthread_mutex_lock(&sc->esc_mtx); sc->esc_rx_active = 0; if (sc->esc_rx_enabled == 0) pthread_cond_signal(&sc->esc_rx_cond); sc->esc_RDH = head; /* Respect E1000_RCTL_RDMTS */ left = (size + sc->esc_RDT - head) % size; if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1))) cause |= E1000_ICR_RXDMT0; /* Assert all accumulated interrupts. */ if (cause != 0) e82545_icr_assert(sc, cause); done1: DPRINTF("rx_run done: head %x, tail %x", sc->esc_RDH, sc->esc_RDT); pthread_mutex_unlock(&sc->esc_mtx); } static uint16_t e82545_carry(uint32_t sum) { sum = (sum & 0xFFFF) + (sum >> 16); if (sum > 0xFFFF) sum -= 0xFFFF; return (sum); } static uint16_t e82545_buf_checksum(uint8_t *buf, int len) { int i; uint32_t sum = 0; /* Checksum all the pairs of bytes first... */ for (i = 0; i < (len & ~1U); i += 2) sum += *((u_int16_t *)(buf + i)); /* * If there's a single byte left over, checksum it, too. * Network byte order is big-endian, so the remaining byte is * the high byte. */ if (i < len) sum += htons(buf[i] << 8); return (e82545_carry(sum)); } static uint16_t e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len) { int now, odd; uint32_t sum = 0, s; /* Skip completely unneeded vectors. */ while (iovcnt > 0 && iov->iov_len <= off && off > 0) { off -= iov->iov_len; iov++; iovcnt--; } /* Calculate checksum of requested range. */ odd = 0; while (len > 0 && iovcnt > 0) { now = MIN(len, iov->iov_len - off); s = e82545_buf_checksum(iov->iov_base + off, now); sum += odd ? (s << 8) : s; odd ^= (now & 1); len -= now; off = 0; iov++; iovcnt--; } return (e82545_carry(sum)); } /* * Return the transmit descriptor type. */ int e82545_txdesc_type(uint32_t lower) { int type; type = 0; - + if (lower & E1000_TXD_CMD_DEXT) type = lower & E1000_TXD_MASK; return (type); } static void e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck) { uint16_t cksum; int cklen; DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d", iovcnt, ck->ck_start, ck->ck_off, ck->ck_len); cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX; cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen); *(uint16_t *)((uint8_t *)iov[0].iov_base + ck->ck_off) = ~cksum; } static void e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt) { if (sc->esc_be == NULL) return; (void) netbe_send(sc->esc_be, iov, iovcnt); } static void e82545_transmit_done(struct e82545_softc *sc, uint16_t head, uint16_t tail, uint16_t dsize, int *tdwb) { union e1000_tx_udesc *dsc; for ( ; head != tail; head = (head + 1) % dsize) { dsc = &sc->esc_txdesc[head]; if (dsc->td.lower.data & E1000_TXD_CMD_RS) { dsc->td.upper.data |= E1000_TXD_STAT_DD; *tdwb = 1; } } } static int e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, uint16_t dsize, uint16_t *rhead, int *tdwb) { uint8_t *hdr, *hdrp; struct iovec iovb[I82545_MAX_TXSEGS + 2]; struct iovec tiov[I82545_MAX_TXSEGS + 2]; struct e1000_context_desc *cd; struct ck_info ckinfo[2]; struct iovec *iov; union e1000_tx_udesc *dsc; int desc, dtype, len, ntype, iovcnt, tcp, tso; int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff; unsigned hdrlen, vlen; uint32_t tcpsum, tcpseq; uint16_t ipcs, tcpcs, ipid, ohead; ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0; iovcnt = 0; ntype = 0; tso = 0; ohead = head; /* iovb[0/1] may be used for writable copy of headers. */ iov = &iovb[2]; for (desc = 0; ; desc++, head = (head + 1) % dsize) { if (head == tail) { *rhead = head; return (0); } dsc = &sc->esc_txdesc[head]; dtype = e82545_txdesc_type(dsc->td.lower.data); if (desc == 0) { switch (dtype) { case E1000_TXD_TYP_C: DPRINTF("tx ctxt desc idx %d: %016jx " "%08x%08x", head, dsc->td.buffer_addr, dsc->td.upper.data, dsc->td.lower.data); /* Save context and return */ sc->esc_txctx = dsc->cd; goto done; case E1000_TXD_TYP_L: DPRINTF("tx legacy desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); /* * legacy cksum start valid in first descriptor */ ntype = dtype; ckinfo[0].ck_start = dsc->td.upper.fields.css; break; case E1000_TXD_TYP_D: DPRINTF("tx data desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); ntype = dtype; break; default: break; } } else { /* Descriptor type must be consistent */ assert(dtype == ntype); DPRINTF("tx next desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); } len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length : dsc->dd.lower.data & 0xFFFFF; if (len > 0) { /* Strip checksum supplied by guest. */ if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 && (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0) len -= 2; if (iovcnt < I82545_MAX_TXSEGS) { iov[iovcnt].iov_base = paddr_guest2host( sc->esc_ctx, dsc->td.buffer_addr, len); iov[iovcnt].iov_len = len; } iovcnt++; } /* * Pull out info that is valid in the final descriptor * and exit descriptor loop. */ if (dsc->td.lower.data & E1000_TXD_CMD_EOP) { if (dtype == E1000_TXD_TYP_L) { if (dsc->td.lower.data & E1000_TXD_CMD_IC) { ckinfo[0].ck_valid = 1; ckinfo[0].ck_off = dsc->td.lower.flags.cso; ckinfo[0].ck_len = 0; } } else { cd = &sc->esc_txctx; if (dsc->dd.lower.data & E1000_TXD_CMD_TSE) tso = 1; if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_IXSM) ckinfo[0].ck_valid = 1; if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_IXSM || tso) { ckinfo[0].ck_start = cd->lower_setup.ip_fields.ipcss; ckinfo[0].ck_off = cd->lower_setup.ip_fields.ipcso; ckinfo[0].ck_len = cd->lower_setup.ip_fields.ipcse; } if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_TXSM) ckinfo[1].ck_valid = 1; if (dsc->dd.upper.fields.popts & E1000_TXD_POPTS_TXSM || tso) { ckinfo[1].ck_start = cd->upper_setup.tcp_fields.tucss; ckinfo[1].ck_off = cd->upper_setup.tcp_fields.tucso; ckinfo[1].ck_len = cd->upper_setup.tcp_fields.tucse; } } break; } } if (iovcnt > I82545_MAX_TXSEGS) { WPRINTF("tx too many descriptors (%d > %d) -- dropped", iovcnt, I82545_MAX_TXSEGS); goto done; } hdrlen = vlen = 0; /* Estimate writable space for VLAN header insertion. */ if ((sc->esc_CTRL & E1000_CTRL_VME) && (dsc->td.lower.data & E1000_TXD_CMD_VLE)) { hdrlen = ETHER_ADDR_LEN*2; vlen = ETHER_VLAN_ENCAP_LEN; } if (!tso) { /* Estimate required writable space for checksums. */ if (ckinfo[0].ck_valid) hdrlen = MAX(hdrlen, ckinfo[0].ck_off + 2); if (ckinfo[1].ck_valid) hdrlen = MAX(hdrlen, ckinfo[1].ck_off + 2); /* Round up writable space to the first vector. */ if (hdrlen != 0 && iov[0].iov_len > hdrlen && iov[0].iov_len < hdrlen + 100) hdrlen = iov[0].iov_len; } else { /* In case of TSO header length provided by software. */ hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len; /* * Cap the header length at 240 based on 7.2.4.5 of * the Intel 82576EB (Rev 2.63) datasheet. */ if (hdrlen > 240) { WPRINTF("TSO hdrlen too large: %d", hdrlen); goto done; } /* * If VLAN insertion is requested, ensure the header * at least holds the amount of data copied during * VLAN insertion below. * * XXX: Realistic packets will include a full Ethernet * header before the IP header at ckinfo[0].ck_start, * but this check is sufficient to prevent * out-of-bounds access below. */ if (vlen != 0 && hdrlen < ETHER_ADDR_LEN*2) { WPRINTF("TSO hdrlen too small for vlan insertion " "(%d vs %d) -- dropped", hdrlen, ETHER_ADDR_LEN*2); goto done; } /* * Ensure that the header length covers the used fields * in the IP and TCP headers as well as the IP and TCP * checksums. The following fields are accessed below: * * Header | Field | Offset | Length * -------+-------+--------+------- * IPv4 | len | 2 | 2 * IPv4 | ID | 4 | 2 * IPv6 | len | 4 | 2 * TCP | seq # | 4 | 4 * TCP | flags | 13 | 1 * UDP | len | 4 | 4 */ if (hdrlen < ckinfo[0].ck_start + 6 || hdrlen < ckinfo[0].ck_off + 2) { WPRINTF("TSO hdrlen too small for IP fields (%d) " "-- dropped", hdrlen); goto done; } if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) { if (hdrlen < ckinfo[1].ck_start + 14 || (ckinfo[1].ck_valid && hdrlen < ckinfo[1].ck_off + 2)) { WPRINTF("TSO hdrlen too small for TCP fields " "(%d) -- dropped", hdrlen); goto done; } } else { if (hdrlen < ckinfo[1].ck_start + 8) { WPRINTF("TSO hdrlen too small for UDP fields " "(%d) -- dropped", hdrlen); goto done; } } } /* Allocate, fill and prepend writable header vector. */ if (hdrlen != 0) { hdr = __builtin_alloca(hdrlen + vlen); hdr += vlen; for (left = hdrlen, hdrp = hdr; left > 0; left -= now, hdrp += now) { now = MIN(left, iov->iov_len); memcpy(hdrp, iov->iov_base, now); iov->iov_base += now; iov->iov_len -= now; if (iov->iov_len == 0) { iov++; iovcnt--; } } iov--; iovcnt++; iov->iov_base = hdr; iov->iov_len = hdrlen; } else hdr = NULL; /* Insert VLAN tag. */ if (vlen != 0) { hdr -= ETHER_VLAN_ENCAP_LEN; memmove(hdr, hdr + ETHER_VLAN_ENCAP_LEN, ETHER_ADDR_LEN*2); hdrlen += ETHER_VLAN_ENCAP_LEN; hdr[ETHER_ADDR_LEN*2 + 0] = sc->esc_VET >> 8; hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff; hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8; hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff; iov->iov_base = hdr; iov->iov_len += ETHER_VLAN_ENCAP_LEN; /* Correct checksum offsets after VLAN tag insertion. */ ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN; ckinfo[0].ck_off += ETHER_VLAN_ENCAP_LEN; if (ckinfo[0].ck_len != 0) ckinfo[0].ck_len += ETHER_VLAN_ENCAP_LEN; ckinfo[1].ck_start += ETHER_VLAN_ENCAP_LEN; ckinfo[1].ck_off += ETHER_VLAN_ENCAP_LEN; if (ckinfo[1].ck_len != 0) ckinfo[1].ck_len += ETHER_VLAN_ENCAP_LEN; } /* Simple non-TSO case. */ if (!tso) { /* Calculate checksums and transmit. */ if (ckinfo[0].ck_valid) e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]); if (ckinfo[1].ck_valid) e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]); e82545_transmit_backend(sc, iov, iovcnt); goto done; } /* Doing TSO. */ tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0; mss = sc->esc_txctx.tcp_seg_setup.fields.mss; paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff); DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs", tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt); ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]); tcpseq = 0; if (tcp) tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]); ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off]; tcpcs = 0; if (ckinfo[1].ck_valid) /* Save partial pseudo-header checksum. */ tcpcs = *(uint16_t *)&hdr[ckinfo[1].ck_off]; pv = 1; pvoff = 0; for (seg = 0, left = paylen; left > 0; seg++, left -= now) { now = MIN(left, mss); /* Construct IOVs for the segment. */ /* Include whole original header. */ tiov[0].iov_base = hdr; tiov[0].iov_len = hdrlen; tiovcnt = 1; /* Include respective part of payload IOV. */ for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) { nnow = MIN(nleft, iov[pv].iov_len - pvoff); tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff; tiov[tiovcnt++].iov_len = nnow; if (pvoff + nnow == iov[pv].iov_len) { pv++; pvoff = 0; } else pvoff += nnow; } DPRINTF("tx segment %d %d+%d bytes %d iovs", seg, hdrlen, now, tiovcnt); /* Update IP header. */ if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_IP) { /* IPv4 -- set length and ID */ *(uint16_t *)&hdr[ckinfo[0].ck_start + 2] = htons(hdrlen - ckinfo[0].ck_start + now); *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = htons(ipid + seg); } else { /* IPv6 -- set length */ *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = htons(hdrlen - ckinfo[0].ck_start - 40 + now); } /* Update pseudo-header checksum. */ tcpsum = tcpcs; tcpsum += htons(hdrlen - ckinfo[1].ck_start + now); /* Update TCP/UDP headers. */ if (tcp) { /* Update sequence number and FIN/PUSH flags. */ *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = htonl(tcpseq + paylen - left); if (now < left) { hdr[ckinfo[1].ck_start + 13] &= ~(TH_FIN | TH_PUSH); } } else { /* Update payload length. */ *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = hdrlen - ckinfo[1].ck_start + now; } /* Calculate checksums and transmit. */ if (ckinfo[0].ck_valid) { *(uint16_t *)&hdr[ckinfo[0].ck_off] = ipcs; e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[0]); } if (ckinfo[1].ck_valid) { *(uint16_t *)&hdr[ckinfo[1].ck_off] = e82545_carry(tcpsum); e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]); } e82545_transmit_backend(sc, tiov, tiovcnt); } done: head = (head + 1) % dsize; e82545_transmit_done(sc, ohead, head, dsize, tdwb); *rhead = head; return (desc + 1); } static void e82545_tx_run(struct e82545_softc *sc) { uint32_t cause; uint16_t head, rhead, tail, size; int lim, tdwb, sent; head = sc->esc_TDH; tail = sc->esc_TDT; size = sc->esc_TDLEN / 16; DPRINTF("tx_run: head %x, rhead %x, tail %x", sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); pthread_mutex_unlock(&sc->esc_mtx); rhead = head; tdwb = 0; for (lim = size / 4; sc->esc_tx_enabled && lim > 0; lim -= sent) { sent = e82545_transmit(sc, head, tail, size, &rhead, &tdwb); if (sent == 0) break; head = rhead; } pthread_mutex_lock(&sc->esc_mtx); sc->esc_TDH = head; sc->esc_TDHr = rhead; cause = 0; if (tdwb) cause |= E1000_ICR_TXDW; if (lim != size / 4 && sc->esc_TDH == sc->esc_TDT) cause |= E1000_ICR_TXQE; if (cause) e82545_icr_assert(sc, cause); DPRINTF("tx_run done: head %x, rhead %x, tail %x", sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); } static _Noreturn void * e82545_tx_thread(void *param) { struct e82545_softc *sc = param; pthread_mutex_lock(&sc->esc_mtx); for (;;) { while (!sc->esc_tx_enabled || sc->esc_TDHr == sc->esc_TDT) { if (sc->esc_tx_enabled && sc->esc_TDHr != sc->esc_TDT) break; sc->esc_tx_active = 0; if (sc->esc_tx_enabled == 0) pthread_cond_signal(&sc->esc_tx_cond); pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); } sc->esc_tx_active = 1; /* Process some tx descriptors. Lock dropped inside. */ e82545_tx_run(sc); } } static void e82545_tx_start(struct e82545_softc *sc) { if (sc->esc_tx_active == 0) pthread_cond_signal(&sc->esc_tx_cond); } static void e82545_tx_enable(struct e82545_softc *sc) { sc->esc_tx_enabled = 1; } static void e82545_tx_disable(struct e82545_softc *sc) { sc->esc_tx_enabled = 0; while (sc->esc_tx_active) pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); } static void e82545_rx_enable(struct e82545_softc *sc) { sc->esc_rx_enabled = 1; } static void e82545_rx_disable(struct e82545_softc *sc) { sc->esc_rx_enabled = 0; while (sc->esc_rx_active) pthread_cond_wait(&sc->esc_rx_cond, &sc->esc_mtx); } static void e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval) { struct eth_uni *eu; int idx; idx = reg >> 1; assert(idx < 15); eu = &sc->esc_uni[idx]; if (reg & 0x1) { /* RAH */ eu->eu_valid = ((wval & E1000_RAH_AV) == E1000_RAH_AV); eu->eu_addrsel = (wval >> 16) & 0x3; eu->eu_eth.octet[5] = wval >> 8; eu->eu_eth.octet[4] = wval; } else { /* RAL */ eu->eu_eth.octet[3] = wval >> 24; eu->eu_eth.octet[2] = wval >> 16; eu->eu_eth.octet[1] = wval >> 8; eu->eu_eth.octet[0] = wval; } } static uint32_t e82545_read_ra(struct e82545_softc *sc, int reg) { struct eth_uni *eu; uint32_t retval; int idx; idx = reg >> 1; assert(idx < 15); eu = &sc->esc_uni[idx]; if (reg & 0x1) { /* RAH */ retval = (eu->eu_valid << 31) | (eu->eu_addrsel << 16) | (eu->eu_eth.octet[5] << 8) | eu->eu_eth.octet[4]; } else { /* RAL */ retval = (eu->eu_eth.octet[3] << 24) | (eu->eu_eth.octet[2] << 16) | (eu->eu_eth.octet[1] << 8) | eu->eu_eth.octet[0]; } - return (retval); + return (retval); } static void e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) { int ridx; - + if (offset & 0x3) { DPRINTF("Unaligned register write offset:0x%x value:0x%x", offset, value); return; } DPRINTF("Register write: 0x%x value: 0x%x", offset, value); switch (offset) { case E1000_CTRL: case E1000_CTRL_DUP: e82545_devctl(sc, value); break; case E1000_FCAL: sc->esc_FCAL = value; break; case E1000_FCAH: sc->esc_FCAH = value & ~0xFFFF0000; break; case E1000_FCT: sc->esc_FCT = value & ~0xFFFF0000; break; case E1000_VET: sc->esc_VET = value & ~0xFFFF0000; break; case E1000_FCTTV: sc->esc_FCTTV = value & ~0xFFFF0000; break; case E1000_LEDCTL: sc->esc_LEDCTL = value & ~0x30303000; break; case E1000_PBA: sc->esc_PBA = value & 0x0000FF80; break; case E1000_ICR: case E1000_ITR: case E1000_ICS: case E1000_IMS: case E1000_IMC: e82545_intr_write(sc, offset, value); break; case E1000_RCTL: e82545_rx_ctl(sc, value); break; case E1000_FCRTL: sc->esc_FCRTL = value & ~0xFFFF0007; break; case E1000_FCRTH: sc->esc_FCRTH = value & ~0xFFFF0007; break; case E1000_RDBAL(0): sc->esc_RDBAL = value & ~0xF; if (sc->esc_rx_enabled) { /* Apparently legal: update cached address */ e82545_rx_update_rdba(sc); } break; case E1000_RDBAH(0): assert(!sc->esc_rx_enabled); sc->esc_RDBAH = value; break; case E1000_RDLEN(0): assert(!sc->esc_rx_enabled); sc->esc_RDLEN = value & ~0xFFF0007F; break; case E1000_RDH(0): /* XXX should only ever be zero ? Range check ? */ sc->esc_RDH = value; break; case E1000_RDT(0): /* XXX if this opens up the rx ring, do something ? */ sc->esc_RDT = value; break; case E1000_RDTR: /* ignore FPD bit 31 */ sc->esc_RDTR = value & ~0xFFFF0000; break; case E1000_RXDCTL(0): sc->esc_RXDCTL = value & ~0xFEC0C0C0; break; case E1000_RADV: sc->esc_RADV = value & ~0xFFFF0000; break; case E1000_RSRPD: sc->esc_RSRPD = value & ~0xFFFFF000; break; case E1000_RXCSUM: sc->esc_RXCSUM = value & ~0xFFFFF800; break; case E1000_TXCW: sc->esc_TXCW = value & ~0x3FFF0000; break; case E1000_TCTL: e82545_tx_ctl(sc, value); break; case E1000_TIPG: sc->esc_TIPG = value; break; case E1000_AIT: sc->esc_AIT = value; break; case E1000_TDBAL(0): sc->esc_TDBAL = value & ~0xF; if (sc->esc_tx_enabled) e82545_tx_update_tdba(sc); break; case E1000_TDBAH(0): sc->esc_TDBAH = value; if (sc->esc_tx_enabled) e82545_tx_update_tdba(sc); break; case E1000_TDLEN(0): sc->esc_TDLEN = value & ~0xFFF0007F; if (sc->esc_tx_enabled) e82545_tx_update_tdba(sc); break; case E1000_TDH(0): //assert(!sc->esc_tx_enabled); /* XXX should only ever be zero ? Range check ? */ sc->esc_TDHr = sc->esc_TDH = value; break; case E1000_TDT(0): /* XXX range check ? */ sc->esc_TDT = value; if (sc->esc_tx_enabled) e82545_tx_start(sc); break; case E1000_TIDV: sc->esc_TIDV = value & ~0xFFFF0000; break; case E1000_TXDCTL(0): //assert(!sc->esc_tx_enabled); sc->esc_TXDCTL = value & ~0xC0C0C0; break; case E1000_TADV: sc->esc_TADV = value & ~0xFFFF0000; break; case E1000_RAL(0) ... E1000_RAH(15): /* convert to u32 offset */ ridx = (offset - E1000_RAL(0)) >> 2; e82545_write_ra(sc, ridx, value); break; case E1000_MTA ... (E1000_MTA + (127*4)): sc->esc_fmcast[(offset - E1000_MTA) >> 2] = value; break; case E1000_VFTA ... (E1000_VFTA + (127*4)): sc->esc_fvlan[(offset - E1000_VFTA) >> 2] = value; - break; + break; case E1000_EECD: { //DPRINTF("EECD write 0x%x -> 0x%x", sc->eeprom_control, value); /* edge triggered low->high */ uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ? 0 : (value & E1000_EECD_SK)); uint32_t eecd_mask = (E1000_EECD_SK|E1000_EECD_CS| E1000_EECD_DI|E1000_EECD_REQ); sc->eeprom_control &= ~eecd_mask; sc->eeprom_control |= (value & eecd_mask); /* grant/revoke immediately */ if (value & E1000_EECD_REQ) { sc->eeprom_control |= E1000_EECD_GNT; } else { sc->eeprom_control &= ~E1000_EECD_GNT; } if (eecd_strobe && (sc->eeprom_control & E1000_EECD_CS)) { e82545_eecd_strobe(sc); } return; } case E1000_MDIC: { uint8_t reg_addr = (uint8_t)((value & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); uint8_t phy_addr = (uint8_t)((value & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT); sc->mdi_control = (value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST)); if ((value & E1000_MDIC_READY) != 0) { DPRINTF("Incorrect MDIC ready bit: 0x%x", value); return; } switch (value & E82545_MDIC_OP_MASK) { case E1000_MDIC_OP_READ: sc->mdi_control &= ~E82545_MDIC_DATA_MASK; sc->mdi_control |= e82545_read_mdi(sc, reg_addr, phy_addr); break; case E1000_MDIC_OP_WRITE: e82545_write_mdi(sc, reg_addr, phy_addr, value & E82545_MDIC_DATA_MASK); break; default: DPRINTF("Unknown MDIC op: 0x%x", value); return; } /* TODO: barrier? */ sc->mdi_control |= E1000_MDIC_READY; if (value & E82545_MDIC_IE) { // TODO: generate interrupt } return; } case E1000_MANC: - case E1000_STATUS: + case E1000_STATUS: return; default: DPRINTF("Unknown write register: 0x%x value:%x", offset, value); return; } } static uint32_t e82545_read_register(struct e82545_softc *sc, uint32_t offset) { uint32_t retval; int ridx; if (offset & 0x3) { DPRINTF("Unaligned register read offset:0x%x", offset); return 0; } DPRINTF("Register read: 0x%x", offset); switch (offset) { case E1000_CTRL: retval = sc->esc_CTRL; break; case E1000_STATUS: retval = E1000_STATUS_FD | E1000_STATUS_LU | E1000_STATUS_SPEED_1000; break; case E1000_FCAL: retval = sc->esc_FCAL; break; case E1000_FCAH: retval = sc->esc_FCAH; break; case E1000_FCT: retval = sc->esc_FCT; break; case E1000_VET: retval = sc->esc_VET; break; case E1000_FCTTV: retval = sc->esc_FCTTV; break; case E1000_LEDCTL: retval = sc->esc_LEDCTL; break; case E1000_PBA: retval = sc->esc_PBA; break; case E1000_ICR: case E1000_ITR: case E1000_ICS: case E1000_IMS: case E1000_IMC: retval = e82545_intr_read(sc, offset); break; case E1000_RCTL: retval = sc->esc_RCTL; break; case E1000_FCRTL: retval = sc->esc_FCRTL; break; case E1000_FCRTH: retval = sc->esc_FCRTH; break; case E1000_RDBAL(0): retval = sc->esc_RDBAL; break; case E1000_RDBAH(0): retval = sc->esc_RDBAH; break; case E1000_RDLEN(0): retval = sc->esc_RDLEN; break; case E1000_RDH(0): retval = sc->esc_RDH; break; case E1000_RDT(0): retval = sc->esc_RDT; break; case E1000_RDTR: retval = sc->esc_RDTR; break; case E1000_RXDCTL(0): retval = sc->esc_RXDCTL; break; case E1000_RADV: retval = sc->esc_RADV; break; case E1000_RSRPD: retval = sc->esc_RSRPD; break; - case E1000_RXCSUM: + case E1000_RXCSUM: retval = sc->esc_RXCSUM; break; case E1000_TXCW: retval = sc->esc_TXCW; break; case E1000_TCTL: retval = sc->esc_TCTL; break; case E1000_TIPG: retval = sc->esc_TIPG; break; case E1000_AIT: retval = sc->esc_AIT; break; case E1000_TDBAL(0): retval = sc->esc_TDBAL; break; case E1000_TDBAH(0): retval = sc->esc_TDBAH; break; case E1000_TDLEN(0): retval = sc->esc_TDLEN; break; case E1000_TDH(0): retval = sc->esc_TDH; break; case E1000_TDT(0): retval = sc->esc_TDT; break; case E1000_TIDV: retval = sc->esc_TIDV; break; case E1000_TXDCTL(0): retval = sc->esc_TXDCTL; break; case E1000_TADV: retval = sc->esc_TADV; break; case E1000_RAL(0) ... E1000_RAH(15): /* convert to u32 offset */ ridx = (offset - E1000_RAL(0)) >> 2; retval = e82545_read_ra(sc, ridx); break; case E1000_MTA ... (E1000_MTA + (127*4)): retval = sc->esc_fmcast[(offset - E1000_MTA) >> 2]; break; case E1000_VFTA ... (E1000_VFTA + (127*4)): retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2]; - break; + break; case E1000_EECD: //DPRINTF("EECD read %x", sc->eeprom_control); retval = sc->eeprom_control; break; case E1000_MDIC: retval = sc->mdi_control; break; case E1000_MANC: retval = 0; break; /* stats that we emulate. */ case E1000_MPC: retval = sc->missed_pkt_count; break; case E1000_PRC64: retval = sc->pkt_rx_by_size[0]; break; case E1000_PRC127: retval = sc->pkt_rx_by_size[1]; break; case E1000_PRC255: retval = sc->pkt_rx_by_size[2]; break; case E1000_PRC511: retval = sc->pkt_rx_by_size[3]; break; case E1000_PRC1023: retval = sc->pkt_rx_by_size[4]; break; case E1000_PRC1522: retval = sc->pkt_rx_by_size[5]; break; case E1000_GPRC: retval = sc->good_pkt_rx_count; break; case E1000_BPRC: retval = sc->bcast_pkt_rx_count; break; case E1000_MPRC: retval = sc->mcast_pkt_rx_count; break; case E1000_GPTC: case E1000_TPT: retval = sc->good_pkt_tx_count; break; case E1000_GORCL: retval = (uint32_t)sc->good_octets_rx; break; case E1000_GORCH: retval = (uint32_t)(sc->good_octets_rx >> 32); break; case E1000_TOTL: case E1000_GOTCL: retval = (uint32_t)sc->good_octets_tx; break; case E1000_TOTH: case E1000_GOTCH: retval = (uint32_t)(sc->good_octets_tx >> 32); break; case E1000_ROC: retval = sc->oversize_rx_count; break; case E1000_TORL: retval = (uint32_t)(sc->good_octets_rx + sc->missed_octets); break; case E1000_TORH: retval = (uint32_t)((sc->good_octets_rx + sc->missed_octets) >> 32); break; case E1000_TPR: retval = sc->good_pkt_rx_count + sc->missed_pkt_count + sc->oversize_rx_count; break; case E1000_PTC64: retval = sc->pkt_tx_by_size[0]; break; case E1000_PTC127: retval = sc->pkt_tx_by_size[1]; break; case E1000_PTC255: retval = sc->pkt_tx_by_size[2]; break; case E1000_PTC511: retval = sc->pkt_tx_by_size[3]; break; case E1000_PTC1023: retval = sc->pkt_tx_by_size[4]; break; case E1000_PTC1522: retval = sc->pkt_tx_by_size[5]; break; case E1000_MPTC: retval = sc->mcast_pkt_tx_count; break; case E1000_BPTC: retval = sc->bcast_pkt_tx_count; break; case E1000_TSCTC: retval = sc->tso_tx_count; break; /* stats that are always 0. */ case E1000_CRCERRS: case E1000_ALGNERRC: case E1000_SYMERRS: case E1000_RXERRC: case E1000_SCC: case E1000_ECOL: case E1000_MCC: case E1000_LATECOL: case E1000_COLC: case E1000_DC: case E1000_TNCRS: case E1000_SEC: case E1000_CEXTERR: case E1000_RLEC: case E1000_XONRXC: case E1000_XONTXC: case E1000_XOFFRXC: case E1000_XOFFTXC: case E1000_FCRUC: case E1000_RNBC: case E1000_RUC: case E1000_RFC: case E1000_RJC: case E1000_MGTPRC: case E1000_MGTPDC: case E1000_MGTPTC: case E1000_TSCTFC: retval = 0; break; default: DPRINTF("Unknown read register: 0x%x", offset); retval = 0; break; } return (retval); } static void e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct e82545_softc *sc; //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d", baridx, offset, value, size); sc = pi->pi_arg; pthread_mutex_lock(&sc->esc_mtx); switch (baridx) { case E82545_BAR_IO: switch (offset) { case E82545_IOADDR: if (size != 4) { DPRINTF("Wrong io addr write sz:%d value:0x%lx", size, value); } else sc->io_addr = (uint32_t)value; break; case E82545_IODATA: if (size != 4) { DPRINTF("Wrong io data write size:%d value:0x%lx", size, value); } else if (sc->io_addr > E82545_IO_REGISTER_MAX) { DPRINTF("Non-register io write addr:0x%x value:0x%lx", sc->io_addr, value); } else e82545_write_register(sc, sc->io_addr, (uint32_t)value); break; default: DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d", offset, value, size); break; } break; case E82545_BAR_REGISTER: if (size != 4) { DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx", size, offset, value); } else e82545_write_register(sc, (uint32_t)offset, (uint32_t)value); break; default: DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d", baridx, offset, value, size); } pthread_mutex_unlock(&sc->esc_mtx); } static uint64_t e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct e82545_softc *sc; uint64_t retval; - + //DPRINTF("Read bar:%d offset:0x%lx size:%d", baridx, offset, size); sc = pi->pi_arg; retval = 0; pthread_mutex_lock(&sc->esc_mtx); switch (baridx) { case E82545_BAR_IO: switch (offset) { case E82545_IOADDR: if (size != 4) { DPRINTF("Wrong io addr read sz:%d", size); } else retval = sc->io_addr; break; case E82545_IODATA: if (size != 4) { DPRINTF("Wrong io data read sz:%d", size); } if (sc->io_addr > E82545_IO_REGISTER_MAX) { DPRINTF("Non-register io read addr:0x%x", sc->io_addr); } else retval = e82545_read_register(sc, sc->io_addr); break; default: DPRINTF("Unknown io bar read offset:0x%lx size:%d", offset, size); break; } break; case E82545_BAR_REGISTER: if (size != 4) { DPRINTF("Wrong register read size:%d offset:0x%lx", size, offset); } else retval = e82545_read_register(sc, (uint32_t)offset); break; default: DPRINTF("Unknown read bar:%d offset:0x%lx size:%d", baridx, offset, size); break; } pthread_mutex_unlock(&sc->esc_mtx); return (retval); } static void e82545_reset(struct e82545_softc *sc, int drvr) { int i; e82545_rx_disable(sc); e82545_tx_disable(sc); /* clear outstanding interrupts */ if (sc->esc_irq_asserted) pci_lintr_deassert(sc->esc_pi); /* misc */ if (!drvr) { sc->esc_FCAL = 0; sc->esc_FCAH = 0; sc->esc_FCT = 0; sc->esc_VET = 0; sc->esc_FCTTV = 0; } sc->esc_LEDCTL = 0x07061302; sc->esc_PBA = 0x00100030; - + /* start nvm in opcode mode. */ sc->nvm_opaddr = 0; sc->nvm_mode = E82545_NVM_MODE_OPADDR; sc->nvm_bits = E82545_NVM_OPADDR_BITS; sc->eeprom_control = E1000_EECD_PRES | E82545_EECD_FWE_EN; e82545_init_eeprom(sc); /* interrupt */ sc->esc_ICR = 0; sc->esc_ITR = 250; sc->esc_ICS = 0; sc->esc_IMS = 0; sc->esc_IMC = 0; - + /* L2 filters */ if (!drvr) { memset(sc->esc_fvlan, 0, sizeof(sc->esc_fvlan)); memset(sc->esc_fmcast, 0, sizeof(sc->esc_fmcast)); memset(sc->esc_uni, 0, sizeof(sc->esc_uni)); /* XXX not necessary on 82545 ?? */ sc->esc_uni[0].eu_valid = 1; memcpy(sc->esc_uni[0].eu_eth.octet, sc->esc_mac.octet, ETHER_ADDR_LEN); } else { /* Clear RAH valid bits */ for (i = 0; i < 16; i++) sc->esc_uni[i].eu_valid = 0; } - + /* receive */ if (!drvr) { sc->esc_RDBAL = 0; sc->esc_RDBAH = 0; } sc->esc_RCTL = 0; sc->esc_FCRTL = 0; sc->esc_FCRTH = 0; sc->esc_RDLEN = 0; sc->esc_RDH = 0; sc->esc_RDT = 0; sc->esc_RDTR = 0; sc->esc_RXDCTL = (1 << 24) | (1 << 16); /* default GRAN/WTHRESH */ sc->esc_RADV = 0; sc->esc_RXCSUM = 0; /* transmit */ if (!drvr) { sc->esc_TDBAL = 0; sc->esc_TDBAH = 0; sc->esc_TIPG = 0; sc->esc_AIT = 0; sc->esc_TIDV = 0; sc->esc_TADV = 0; } sc->esc_tdba = 0; sc->esc_txdesc = NULL; sc->esc_TXCW = 0; sc->esc_TCTL = 0; sc->esc_TDLEN = 0; sc->esc_TDT = 0; sc->esc_TDHr = sc->esc_TDH = 0; sc->esc_TXDCTL = 0; } static int e82545_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { char nstr[80]; struct e82545_softc *sc; const char *mac; int err; /* Setup our softc */ sc = calloc(1, sizeof(*sc)); pi->pi_arg = sc; sc->esc_pi = pi; sc->esc_ctx = ctx; pthread_mutex_init(&sc->esc_mtx, NULL); pthread_cond_init(&sc->esc_rx_cond, NULL); pthread_cond_init(&sc->esc_tx_cond, NULL); pthread_create(&sc->esc_tx_tid, NULL, e82545_tx_thread, sc); snprintf(nstr, sizeof(nstr), "e82545-%d:%d tx", pi->pi_slot, pi->pi_func); pthread_set_name_np(sc->esc_tx_tid, nstr); pci_set_cfgdata16(pi, PCIR_DEVICE, E82545_DEV_ID_82545EM_COPPER); pci_set_cfgdata16(pi, PCIR_VENDOR, E82545_VENDOR_ID_INTEL); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, E82545_SUBDEV_ID); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, E82545_VENDOR_ID_INTEL); pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL); pci_set_cfgdata8(pi, PCIR_INTPIN, 0x1); - + /* TODO: this card also supports msi, but the freebsd driver for it * does not, so I have not implemented it. */ pci_lintr_request(pi); pci_emul_alloc_bar(pi, E82545_BAR_REGISTER, PCIBAR_MEM32, E82545_BAR_REGISTER_LEN); pci_emul_alloc_bar(pi, E82545_BAR_FLASH, PCIBAR_MEM32, E82545_BAR_FLASH_LEN); pci_emul_alloc_bar(pi, E82545_BAR_IO, PCIBAR_IO, E82545_BAR_IO_LEN); mac = get_config_value_node(nvl, "mac"); if (mac != NULL) { err = net_parsemac(mac, sc->esc_mac.octet); if (err) { free(sc); return (err); } } else net_genmac(pi, sc->esc_mac.octet); err = netbe_init(&sc->esc_be, nvl, e82545_rx_callback, sc); if (err) { free(sc); return (err); } netbe_rx_enable(sc->esc_be); /* H/w initiated reset */ e82545_reset(sc, 0); return (0); } #ifdef BHYVE_SNAPSHOT static int e82545_snapshot(struct vm_snapshot_meta *meta) { int i; int ret; struct e82545_softc *sc; struct pci_devinst *pi; uint64_t bitmap_value; pi = meta->dev_data; sc = pi->pi_arg; /* esc_mevp and esc_mevpitr should be reinitiated at init. */ SNAPSHOT_VAR_OR_LEAVE(sc->esc_mac, meta, ret, done); /* General */ SNAPSHOT_VAR_OR_LEAVE(sc->esc_CTRL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAH, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCT, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_VET, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCTTV, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_LEDCTL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_PBA, meta, ret, done); /* Interrupt control */ SNAPSHOT_VAR_OR_LEAVE(sc->esc_irq_asserted, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICR, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_ITR, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICS, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMS, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMC, meta, ret, done); /* * Transmit * * The fields in the unions are in superposition to access certain * bytes in the larger uint variables. * e.g., ip_config = [ipcss|ipcso|ipcse0|ipcse1] */ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.lower_setup.ip_config, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.upper_setup.tcp_config, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.cmd_and_length, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.tcp_seg_setup.data, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_enabled, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_active, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXCW, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TCTL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIPG, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_AIT, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_tdba, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAH, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDLEN, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDH, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDHr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDT, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIDV, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXDCTL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_TADV, meta, ret, done); /* Has dependency on esc_TDLEN; reoreder of fields from struct. */ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_txdesc, sc->esc_TDLEN, true, meta, ret, done); /* L2 frame acceptance */ for (i = 0; i < nitems(sc->esc_uni); i++) { SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_valid, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_addrsel, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_eth, meta, ret, done); } SNAPSHOT_BUF_OR_LEAVE(sc->esc_fmcast, sizeof(sc->esc_fmcast), meta, ret, done); SNAPSHOT_BUF_OR_LEAVE(sc->esc_fvlan, sizeof(sc->esc_fvlan), meta, ret, done); /* Receive */ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_enabled, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_active, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_loopback, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RCTL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTH, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_rdba, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAH, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDLEN, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDH, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDT, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDTR, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXDCTL, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RADV, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RSRPD, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXCSUM, meta, ret, done); /* Has dependency on esc_RDLEN; reoreder of fields from struct. */ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_rxdesc, sc->esc_TDLEN, true, meta, ret, done); /* IO Port register access */ SNAPSHOT_VAR_OR_LEAVE(sc->io_addr, meta, ret, done); /* Shadow copy of MDIC */ SNAPSHOT_VAR_OR_LEAVE(sc->mdi_control, meta, ret, done); /* Shadow copy of EECD */ SNAPSHOT_VAR_OR_LEAVE(sc->eeprom_control, meta, ret, done); /* Latest NVM in/out */ SNAPSHOT_VAR_OR_LEAVE(sc->nvm_data, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->nvm_opaddr, meta, ret, done); /* Stats */ SNAPSHOT_VAR_OR_LEAVE(sc->missed_pkt_count, meta, ret, done); SNAPSHOT_BUF_OR_LEAVE(sc->pkt_rx_by_size, sizeof(sc->pkt_rx_by_size), meta, ret, done); SNAPSHOT_BUF_OR_LEAVE(sc->pkt_tx_by_size, sizeof(sc->pkt_tx_by_size), meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_rx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_rx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_rx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_tx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_tx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_tx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->oversize_rx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->tso_tx_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_rx, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_tx, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->missed_octets, meta, ret, done); if (meta->op == VM_SNAPSHOT_SAVE) bitmap_value = sc->nvm_bits; SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done); if (meta->op == VM_SNAPSHOT_RESTORE) sc->nvm_bits = bitmap_value; if (meta->op == VM_SNAPSHOT_SAVE) bitmap_value = sc->nvm_bits; SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done); if (meta->op == VM_SNAPSHOT_RESTORE) sc->nvm_bits = bitmap_value; /* EEPROM data */ SNAPSHOT_BUF_OR_LEAVE(sc->eeprom_data, sizeof(sc->eeprom_data), meta, ret, done); done: return (ret); } #endif struct pci_devemu pci_de_e82545 = { .pe_emu = "e1000", .pe_init = e82545_init, .pe_legacy_config = netbe_legacy_config, .pe_barwrite = e82545_write, .pe_barread = e82545_read, #ifdef BHYVE_SNAPSHOT .pe_snapshot = e82545_snapshot, #endif }; PCI_EMUL_SET(pci_de_e82545); diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 0b46bcc6200e..92eaa80e8cf6 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -1,2433 +1,2433 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi.h" #include "bhyverun.h" #include "config.h" #include "debug.h" #include "inout.h" #include "ioapic.h" #include "mem.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #define CONF1_ADDR_PORT 0x0cf8 #define CONF1_DATA_PORT 0x0cfc #define CONF1_ENABLE 0x80000000ul #define MAXBUSES (PCI_BUSMAX + 1) #define MAXSLOTS (PCI_SLOTMAX + 1) #define MAXFUNCS (PCI_FUNCMAX + 1) #define GB (1024 * 1024 * 1024UL) struct funcinfo { nvlist_t *fi_config; struct pci_devemu *fi_pde; struct pci_devinst *fi_devi; }; struct intxinfo { int ii_count; int ii_pirq_pin; int ii_ioapic_irq; }; struct slotinfo { struct intxinfo si_intpins[4]; struct funcinfo si_funcs[MAXFUNCS]; }; struct businfo { uint16_t iobase, iolimit; /* I/O window */ uint32_t membase32, memlimit32; /* mmio window below 4GB */ uint64_t membase64, memlimit64; /* mmio window above 4GB */ struct slotinfo slotinfo[MAXSLOTS]; }; static struct businfo *pci_businfo[MAXBUSES]; SET_DECLARE(pci_devemu_set, struct pci_devemu); static uint64_t pci_emul_iobase; static uint64_t pci_emul_membase32; static uint64_t pci_emul_membase64; static uint64_t pci_emul_memlim64; #define PCI_EMUL_IOBASE 0x2000 #define PCI_EMUL_IOLIMIT 0x10000 #define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ #define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); /* * OVMF always uses 0xC0000000 as base address for 32 bit PCI MMIO. Don't * change this address without changing it in OVMF. */ #define PCI_EMUL_MEMBASE32 0xC0000000 #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE #define PCI_EMUL_MEMSIZE64 (32*GB) static struct pci_devemu *pci_emul_finddev(const char *name); static void pci_lintr_route(struct pci_devinst *pi); static void pci_lintr_update(struct pci_devinst *pi); static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, int coff, int bytes, uint32_t *val); static __inline void CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) { if (bytes == 1) pci_set_cfgdata8(pi, coff, val); else if (bytes == 2) pci_set_cfgdata16(pi, coff, val); else pci_set_cfgdata32(pi, coff, val); } static __inline uint32_t CFGREAD(struct pci_devinst *pi, int coff, int bytes) { if (bytes == 1) return (pci_get_cfgdata8(pi, coff)); else if (bytes == 2) return (pci_get_cfgdata16(pi, coff)); else return (pci_get_cfgdata32(pi, coff)); } /* * I/O access */ /* * Slot options are in the form: * * ::,[,] * [:],[,] * * slot is 0..31 * func is 0..7 * emul is a string describing the type of PCI device e.g. virtio-net * config is an optional string, depending on the device, that can be * used for configuration. * Examples are: * 1,virtio-net,tap0 * 3:0,dummy */ static void pci_parse_slot_usage(char *aopt) { EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); } /* * Helper function to parse a list of comma-separated options where * each option is formatted as "name[=value]". If no value is * provided, the option is treated as a boolean and is given a value * of true. */ int pci_parse_legacy_config(nvlist_t *nvl, const char *opt) { char *config, *name, *tofree, *value; if (opt == NULL) return (0); config = tofree = strdup(opt); while ((name = strsep(&config, ",")) != NULL) { value = strchr(name, '='); if (value != NULL) { *value = '\0'; value++; set_config_value_node(nvl, name, value); } else set_config_bool_node(nvl, name, true); } free(tofree); return (0); } /* * PCI device configuration is stored in MIBs that encode the device's * location: * * pci... * * Where "bus", "slot", and "func" are all decimal values without * leading zeroes. Each valid device must have a "device" node which * identifies the driver model of the device. * * Device backends can provide a parser for the "config" string. If * a custom parser is not provided, pci_parse_legacy_config() is used * to parse the string. */ int pci_parse_slot(char *opt) { char node_name[sizeof("pci.XXX.XX.X")]; struct pci_devemu *pde; char *emul, *config, *str, *cp; int error, bnum, snum, fnum; nvlist_t *nvl; error = -1; str = strdup(opt); emul = config = NULL; if ((cp = strchr(str, ',')) != NULL) { *cp = '\0'; emul = cp + 1; if ((cp = strchr(emul, ',')) != NULL) { *cp = '\0'; config = cp + 1; } } else { pci_parse_slot_usage(opt); goto done; } /* :: */ if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { bnum = 0; /* : */ if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { fnum = 0; /* */ if (sscanf(str, "%d", &snum) != 1) { snum = -1; } } } if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) { pci_parse_slot_usage(opt); goto done; } pde = pci_emul_finddev(emul); if (pde == NULL) { EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum, fnum, emul); goto done; } snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum, fnum); nvl = find_config_node(node_name); if (nvl != NULL) { EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum, fnum); goto done; } nvl = create_config_node(node_name); if (pde->pe_alias != NULL) set_config_value_node(nvl, "device", pde->pe_alias); else set_config_value_node(nvl, "device", pde->pe_emu); if (pde->pe_legacy_config != NULL) error = pde->pe_legacy_config(nvl, config); else error = pci_parse_legacy_config(nvl, config); done: free(str); return (error); } void pci_print_supported_devices() { struct pci_devemu **pdpp, *pdp; SET_FOREACH(pdpp, pci_devemu_set) { pdp = *pdpp; printf("%s\n", pdp->pe_emu); } } static int pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) { if (offset < pi->pi_msix.pba_offset) return (0); if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { return (0); } return (1); } int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, uint64_t value) { int msix_entry_offset; int tab_index; char *dest; /* support only 4 or 8 byte writes */ if (size != 4 && size != 8) return (-1); /* * Return if table index is beyond what device supports */ tab_index = offset / MSIX_TABLE_ENTRY_SIZE; if (tab_index >= pi->pi_msix.table_count) return (-1); msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; /* support only aligned writes */ if ((msix_entry_offset % size) != 0) return (-1); dest = (char *)(pi->pi_msix.table + tab_index); dest += msix_entry_offset; if (size == 4) *((uint32_t *)dest) = value; else *((uint64_t *)dest) = value; return (0); } uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) { char *dest; int msix_entry_offset; int tab_index; uint64_t retval = ~0; /* * The PCI standard only allows 4 and 8 byte accesses to the MSI-X * table but we also allow 1 byte access to accommodate reads from * ddb. */ if (size != 1 && size != 4 && size != 8) return (retval); msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; /* support only aligned reads */ if ((msix_entry_offset % size) != 0) { return (retval); } tab_index = offset / MSIX_TABLE_ENTRY_SIZE; if (tab_index < pi->pi_msix.table_count) { /* valid MSI-X Table access */ dest = (char *)(pi->pi_msix.table + tab_index); dest += msix_entry_offset; if (size == 1) retval = *((uint8_t *)dest); else if (size == 4) retval = *((uint32_t *)dest); else retval = *((uint64_t *)dest); } else if (pci_valid_pba_offset(pi, offset)) { /* return 0 for PBA access */ retval = 0; } return (retval); } int pci_msix_table_bar(struct pci_devinst *pi) { if (pi->pi_msix.table != NULL) return (pi->pi_msix.table_bar); else return (-1); } int pci_msix_pba_bar(struct pci_devinst *pi) { if (pi->pi_msix.table != NULL) return (pi->pi_msix.pba_bar); else return (-1); } static int pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { struct pci_devinst *pdi = arg; struct pci_devemu *pe = pdi->pi_d; uint64_t offset; int i; for (i = 0; i <= PCI_BARMAX; i++) { if (pdi->pi_bar[i].type == PCIBAR_IO && port >= pdi->pi_bar[i].addr && port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { offset = port - pdi->pi_bar[i].addr; if (in) *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, offset, bytes); else (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, bytes, *eax); return (0); } } return (-1); } static int pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) { struct pci_devinst *pdi = arg1; struct pci_devemu *pe = pdi->pi_d; uint64_t offset; int bidx = (int) arg2; assert(bidx <= PCI_BARMAX); assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || pdi->pi_bar[bidx].type == PCIBAR_MEM64); assert(addr >= pdi->pi_bar[bidx].addr && addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); offset = addr - pdi->pi_bar[bidx].addr; if (dir == MEM_F_WRITE) { if (size == 8) { (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, 4, *val & 0xffffffff); (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, 4, *val >> 32); } else { (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); } } else { if (size == 8) { *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, 4); *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset + 4, 4) << 32; } else { *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); } } return (0); } static int pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, uint64_t *addr) { uint64_t base; assert((size & (size - 1)) == 0); /* must be a power of 2 */ base = roundup2(*baseptr, size); if (base + size <= limit) { *addr = base; *baseptr = base + size; return (0); } else return (-1); } /* * Register (or unregister) the MMIO or I/O region associated with the BAR * register 'idx' of an emulated pci device. */ static void modify_bar_registration(struct pci_devinst *pi, int idx, int registration) { struct pci_devemu *pe; int error; struct inout_port iop; struct mem_range mr; pe = pi->pi_d; switch (pi->pi_bar[idx].type) { case PCIBAR_IO: bzero(&iop, sizeof(struct inout_port)); iop.name = pi->pi_name; iop.port = pi->pi_bar[idx].addr; iop.size = pi->pi_bar[idx].size; if (registration) { iop.flags = IOPORT_F_INOUT; iop.handler = pci_emul_io_handler; iop.arg = pi; error = register_inout(&iop); } else error = unregister_inout(&iop); if (pe->pe_baraddr != NULL) (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, pi->pi_bar[idx].addr); break; case PCIBAR_MEM32: case PCIBAR_MEM64: bzero(&mr, sizeof(struct mem_range)); mr.name = pi->pi_name; mr.base = pi->pi_bar[idx].addr; mr.size = pi->pi_bar[idx].size; if (registration) { mr.flags = MEM_F_RW; mr.handler = pci_emul_mem_handler; mr.arg1 = pi; mr.arg2 = idx; error = register_mem(&mr); } else error = unregister_mem(&mr); if (pe->pe_baraddr != NULL) (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, pi->pi_bar[idx].addr); break; default: error = EINVAL; break; } assert(error == 0); } static void unregister_bar(struct pci_devinst *pi, int idx) { modify_bar_registration(pi, idx, 0); } static void register_bar(struct pci_devinst *pi, int idx) { modify_bar_registration(pi, idx, 1); } /* Are we decoding i/o port accesses for the emulated pci device? */ static int porten(struct pci_devinst *pi) { uint16_t cmd; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); return (cmd & PCIM_CMD_PORTEN); } /* Are we decoding memory accesses for the emulated pci device? */ static int memen(struct pci_devinst *pi) { uint16_t cmd; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); return (cmd & PCIM_CMD_MEMEN); } /* * Update the MMIO or I/O address that is decoded by the BAR register. * * If the pci device has enabled the address space decoding then intercept * the address range decoded by the BAR register. */ static void update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) { int decode; if (pi->pi_bar[idx].type == PCIBAR_IO) decode = porten(pi); else decode = memen(pi); if (decode) unregister_bar(pi, idx); switch (type) { case PCIBAR_IO: case PCIBAR_MEM32: pi->pi_bar[idx].addr = addr; break; case PCIBAR_MEM64: pi->pi_bar[idx].addr &= ~0xffffffffUL; pi->pi_bar[idx].addr |= addr; break; case PCIBAR_MEMHI64: pi->pi_bar[idx].addr &= 0xffffffff; pi->pi_bar[idx].addr |= addr; break; default: assert(0); } if (decode) register_bar(pi, idx); } int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size) { int error; uint64_t *baseptr, limit, addr, mask, lobits, bar; uint16_t cmd, enbit; assert(idx >= 0 && idx <= PCI_BARMAX); if ((size & (size - 1)) != 0) size = 1UL << flsl(size); /* round up to a power of 2 */ /* Enforce minimum BAR sizes required by the PCI standard */ if (type == PCIBAR_IO) { if (size < 4) size = 4; } else { if (size < 16) size = 16; } switch (type) { case PCIBAR_NONE: baseptr = NULL; addr = mask = lobits = enbit = 0; break; case PCIBAR_IO: baseptr = &pci_emul_iobase; limit = PCI_EMUL_IOLIMIT; mask = PCIM_BAR_IO_BASE; lobits = PCIM_BAR_IO_SPACE; enbit = PCIM_CMD_PORTEN; break; case PCIBAR_MEM64: /* * XXX * Some drivers do not work well if the 64-bit BAR is allocated * above 4GB. Allow for this by allocating small requests under * 4GB unless then allocation size is larger than some arbitrary * number (128MB currently). */ if (size > 128 * 1024 * 1024) { baseptr = &pci_emul_membase64; limit = pci_emul_memlim64; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | PCIM_BAR_MEM_PREFETCH; } else { baseptr = &pci_emul_membase32; limit = PCI_EMUL_MEMLIMIT32; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; } enbit = PCIM_CMD_MEMEN; break; case PCIBAR_MEM32: baseptr = &pci_emul_membase32; limit = PCI_EMUL_MEMLIMIT32; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; enbit = PCIM_CMD_MEMEN; break; default: printf("pci_emul_alloc_base: invalid bar type %d\n", type); assert(0); } if (baseptr != NULL) { error = pci_emul_alloc_resource(baseptr, limit, size, &addr); if (error != 0) return (error); } pdi->pi_bar[idx].type = type; pdi->pi_bar[idx].addr = addr; pdi->pi_bar[idx].size = size; /* * passthru devices are using same lobits as physical device they set * this property */ if (pdi->pi_bar[idx].lobits != 0) { lobits = pdi->pi_bar[idx].lobits; } else { pdi->pi_bar[idx].lobits = lobits; } /* Initialize the BAR register in config space */ bar = (addr & mask) | lobits; pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); if (type == PCIBAR_MEM64) { assert(idx + 1 <= PCI_BARMAX); pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); } cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); if ((cmd & enbit) != enbit) pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); register_bar(pdi, idx); return (0); } #define CAP_START_OFFSET 0x40 static int pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) { int i, capoff, reallen; uint16_t sts; assert(caplen > 0); reallen = roundup2(caplen, 4); /* dword aligned */ sts = pci_get_cfgdata16(pi, PCIR_STATUS); if ((sts & PCIM_STATUS_CAPPRESENT) == 0) capoff = CAP_START_OFFSET; else capoff = pi->pi_capend + 1; /* Check if we have enough space */ if (capoff + reallen > PCI_REGMAX + 1) return (-1); /* Set the previous capability pointer */ if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); } else pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); /* Copy the capability */ for (i = 0; i < caplen; i++) pci_set_cfgdata8(pi, capoff + i, capdata[i]); /* Set the next capability pointer */ pci_set_cfgdata8(pi, capoff + 1, 0); pi->pi_prevcap = capoff; pi->pi_capend = capoff + reallen - 1; return (0); } static struct pci_devemu * pci_emul_finddev(const char *name) { struct pci_devemu **pdpp, *pdp; SET_FOREACH(pdpp, pci_devemu_set) { pdp = *pdpp; if (!strcmp(pdp->pe_emu, name)) { return (pdp); } } return (NULL); } static int pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, int func, struct funcinfo *fi) { struct pci_devinst *pdi; int err; pdi = calloc(1, sizeof(struct pci_devinst)); pdi->pi_vmctx = ctx; pdi->pi_bus = bus; pdi->pi_slot = slot; pdi->pi_func = func; pthread_mutex_init(&pdi->pi_lintr.lock, NULL); pdi->pi_lintr.pin = 0; pdi->pi_lintr.state = IDLE; pdi->pi_lintr.pirq_pin = 0; pdi->pi_lintr.ioapic_irq = 0; pdi->pi_d = pde; snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); /* Disable legacy interrupts */ pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN); err = (*pde->pe_init)(ctx, pdi, fi->fi_config); if (err == 0) fi->fi_devi = pdi; else free(pdi); return (err); } void pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) { int mmc; /* Number of msi messages must be a power of 2 between 1 and 32 */ assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); mmc = ffs(msgnum) - 1; bzero(msicap, sizeof(struct msicap)); msicap->capid = PCIY_MSI; msicap->nextptr = nextptr; msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); } int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) { struct msicap msicap; pci_populate_msicap(&msicap, msgnum, 0); return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); } static void pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, uint32_t msix_tab_size) { assert(msix_tab_size % 4096 == 0); bzero(msixcap, sizeof(struct msixcap)); msixcap->capid = PCIY_MSIX; /* * Message Control Register, all fields set to * zero except for the Table Size. * Note: Table size N is encoded as N-1 */ msixcap->msgctrl = msgnum - 1; /* * MSI-X BAR setup: * - MSI-X table start at offset 0 * - PBA table starts at a 4K aligned offset after the MSI-X table */ msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); } static void pci_msix_table_init(struct pci_devinst *pi, int table_entries) { int i, table_size; assert(table_entries > 0); assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; pi->pi_msix.table = calloc(1, table_size); /* set mask bit of vector control register */ for (i = 0; i < table_entries; i++) pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; } int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) { uint32_t tab_size; struct msixcap msixcap; assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; /* Align table size to nearest 4K */ tab_size = roundup2(tab_size, 4096); pi->pi_msix.table_bar = barnum; pi->pi_msix.pba_bar = barnum; pi->pi_msix.table_offset = 0; pi->pi_msix.table_count = msgnum; pi->pi_msix.pba_offset = tab_size; pi->pi_msix.pba_size = PBA_SIZE(msgnum); pci_msix_table_init(pi, msgnum); pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); /* allocate memory for MSI-X Table and PBA */ pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, tab_size + pi->pi_msix.pba_size); return (pci_emul_add_capability(pi, (u_char *)&msixcap, sizeof(msixcap))); } static void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { uint16_t msgctrl, rwmask; int off; off = offset - capoff; /* Message Control Register */ if (off == 2 && bytes == 2) { rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; msgctrl = pci_get_cfgdata16(pi, offset); msgctrl &= ~rwmask; msgctrl |= val & rwmask; val = msgctrl; pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; pci_lintr_update(pi); } CFGWRITE(pi, offset, val, bytes); } static void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { uint16_t msgctrl, rwmask, msgdata, mme; uint32_t addrlo; /* * If guest is writing to the message control register make sure * we do not overwrite read-only fields. */ if ((offset - capoff) == 2 && bytes == 2) { rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; msgctrl = pci_get_cfgdata16(pi, offset); msgctrl &= ~rwmask; msgctrl |= val & rwmask; val = msgctrl; } CFGWRITE(pi, offset, val, bytes); msgctrl = pci_get_cfgdata16(pi, capoff + 2); addrlo = pci_get_cfgdata32(pi, capoff + 4); if (msgctrl & PCIM_MSICTRL_64BIT) msgdata = pci_get_cfgdata16(pi, capoff + 12); else msgdata = pci_get_cfgdata16(pi, capoff + 8); mme = msgctrl & PCIM_MSICTRL_MME_MASK; pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; if (pi->pi_msi.enabled) { pi->pi_msi.addr = addrlo; pi->pi_msi.msg_data = msgdata; pi->pi_msi.maxmsgnum = 1 << (mme >> 4); } else { pi->pi_msi.maxmsgnum = 0; } pci_lintr_update(pi); } void pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { /* XXX don't write to the readonly parts */ CFGWRITE(pi, offset, val, bytes); } #define PCIECAP_VERSION 0x2 int pci_emul_add_pciecap(struct pci_devinst *pi, int type) { int err; struct pciecap pciecap; bzero(&pciecap, sizeof(pciecap)); /* * Use the integrated endpoint type for endpoints on a root complex bus. * * NB: bhyve currently only supports a single PCI bus that is the root * complex bus, so all endpoints are integrated. */ if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0)) type = PCIEM_TYPE_ROOT_INT_EP; pciecap.capid = PCIY_EXPRESS; pciecap.pcie_capabilities = PCIECAP_VERSION | type; if (type != PCIEM_TYPE_ROOT_INT_EP) { pciecap.link_capabilities = 0x411; /* gen1, x1 */ pciecap.link_status = 0x11; /* gen1, x1 */ } err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); return (err); } /* * This function assumes that 'coff' is in the capabilities region of the * config space. A capoff parameter of zero will force a search for the * offset and type. */ void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, uint8_t capoff, int capid) { uint8_t nextoff; /* Do not allow un-aligned writes */ if ((offset & (bytes - 1)) != 0) return; if (capoff == 0) { /* Find the capability that we want to update */ capoff = CAP_START_OFFSET; while (1) { nextoff = pci_get_cfgdata8(pi, capoff + 1); if (nextoff == 0) break; if (offset >= capoff && offset < nextoff) break; capoff = nextoff; } assert(offset >= capoff); capid = pci_get_cfgdata8(pi, capoff); } /* * Capability ID and Next Capability Pointer are readonly. * However, some o/s's do 4-byte writes that include these. * For this case, trim the write back to 2 bytes and adjust * the data. */ if (offset == capoff || offset == capoff + 1) { if (offset == capoff && bytes == 4) { bytes = 2; offset += 2; val >>= 16; } else return; } switch (capid) { case PCIY_MSI: msicap_cfgwrite(pi, capoff, offset, bytes, val); break; case PCIY_MSIX: msixcap_cfgwrite(pi, capoff, offset, bytes, val); break; case PCIY_EXPRESS: pciecap_cfgwrite(pi, capoff, offset, bytes, val); break; default: break; } } static int pci_emul_iscap(struct pci_devinst *pi, int offset) { uint16_t sts; sts = pci_get_cfgdata16(pi, PCIR_STATUS); if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) return (1); } return (0); } static int pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, uint64_t *val, void *arg1, long arg2) { /* * Ignore writes; return 0xff's for reads. The mem read code * will take care of truncating to the correct size. */ if (dir == MEM_F_READ) { *val = 0xffffffffffffffff; } return (0); } static int pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int bytes, uint64_t *val, void *arg1, long arg2) { int bus, slot, func, coff, in; coff = addr & 0xfff; func = (addr >> 12) & 0x7; slot = (addr >> 15) & 0x1f; bus = (addr >> 20) & 0xff; in = (dir == MEM_F_READ); if (in) *val = ~0UL; pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); return (0); } uint64_t pci_ecfg_base(void) { return (PCI_EMUL_ECFG_BASE); } #define BUSIO_ROUNDUP 32 #define BUSMEM_ROUNDUP (1024 * 1024) int init_pci(struct vmctx *ctx) { char node_name[sizeof("pci.XXX.XX.X")]; struct mem_range mr; struct pci_devemu *pde; struct businfo *bi; struct slotinfo *si; struct funcinfo *fi; nvlist_t *nvl; const char *emul; size_t lowmem; int bus, slot, func; int error; if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32) errx(EX_OSERR, "Invalid lowmem limit"); pci_emul_iobase = PCI_EMUL_IOBASE; pci_emul_membase32 = PCI_EMUL_MEMBASE32; pci_emul_membase64 = 4*GB + vm_get_highmem_size(ctx); pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64); pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; for (bus = 0; bus < MAXBUSES; bus++) { snprintf(node_name, sizeof(node_name), "pci.%d", bus); nvl = find_config_node(node_name); if (nvl == NULL) continue; pci_businfo[bus] = calloc(1, sizeof(struct businfo)); bi = pci_businfo[bus]; /* * Keep track of the i/o and memory resources allocated to * this bus. */ bi->iobase = pci_emul_iobase; bi->membase32 = pci_emul_membase32; bi->membase64 = pci_emul_membase64; for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { fi = &si->si_funcs[func]; snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bus, slot, func); nvl = find_config_node(node_name); if (nvl == NULL) continue; fi->fi_config = nvl; emul = get_config_value_node(nvl, "device"); if (emul == NULL) { EPRINTLN("pci slot %d:%d:%d: missing " "\"device\" value", bus, slot, func); return (EINVAL); } pde = pci_emul_finddev(emul); if (pde == NULL) { EPRINTLN("pci slot %d:%d:%d: unknown " "device \"%s\"", bus, slot, func, emul); return (EINVAL); } if (pde->pe_alias != NULL) { EPRINTLN("pci slot %d:%d:%d: legacy " "device \"%s\", use \"%s\" instead", bus, slot, func, emul, pde->pe_alias); return (EINVAL); } fi->fi_pde = pde; error = pci_emul_init(ctx, pde, bus, slot, func, fi); if (error) return (error); } } /* * Add some slop to the I/O and memory resources decoded by * this bus to give a guest some flexibility if it wants to * reprogram the BARs. */ pci_emul_iobase += BUSIO_ROUNDUP; pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); bi->iolimit = pci_emul_iobase; pci_emul_membase32 += BUSMEM_ROUNDUP; pci_emul_membase32 = roundup2(pci_emul_membase32, BUSMEM_ROUNDUP); bi->memlimit32 = pci_emul_membase32; pci_emul_membase64 += BUSMEM_ROUNDUP; pci_emul_membase64 = roundup2(pci_emul_membase64, BUSMEM_ROUNDUP); bi->memlimit64 = pci_emul_membase64; } /* * PCI backends are initialized before routing INTx interrupts * so that LPC devices are able to reserve ISA IRQs before * routing PIRQ pins. */ for (bus = 0; bus < MAXBUSES; bus++) { if ((bi = pci_businfo[bus]) == NULL) continue; for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { fi = &si->si_funcs[func]; if (fi->fi_devi == NULL) continue; pci_lintr_route(fi->fi_devi); } } } lpc_pirq_routed(); /* * The guest physical memory map looks like the following: * [0, lowmem) guest system memory * [lowmem, 0xC0000000) memory hole (may be absent) * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation) * [0xE0000000, 0xF0000000) PCI extended config window * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware * [4GB, 4GB + highmem) */ /* * Accesses to memory addresses that are not allocated to system * memory or PCI devices return 0xff's. */ lowmem = vm_get_lowmem_size(ctx); bzero(&mr, sizeof(struct mem_range)); mr.name = "PCI hole"; mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; mr.base = lowmem; mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; mr.handler = pci_emul_fallback_handler; error = register_mem_fallback(&mr); assert(error == 0); /* PCI extended config space */ bzero(&mr, sizeof(struct mem_range)); mr.name = "PCI ECFG"; mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; mr.base = PCI_EMUL_ECFG_BASE; mr.size = PCI_EMUL_ECFG_SIZE; mr.handler = pci_emul_ecfg_handler; error = register_mem(&mr); assert(error == 0); return (0); } static void pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, void *arg) { dsdt_line(" Package ()"); dsdt_line(" {"); dsdt_line(" 0x%X,", slot << 16 | 0xffff); dsdt_line(" 0x%02X,", pin - 1); dsdt_line(" Zero,"); dsdt_line(" 0x%X", ioapic_irq); dsdt_line(" },"); } static void pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, void *arg) { char *name; name = lpc_pirq_name(pirq_pin); if (name == NULL) return; dsdt_line(" Package ()"); dsdt_line(" {"); dsdt_line(" 0x%X,", slot << 16 | 0xffff); dsdt_line(" 0x%02X,", pin - 1); dsdt_line(" %s,", name); dsdt_line(" 0x00"); dsdt_line(" },"); free(name); } /* * A bhyve virtual machine has a flat PCI hierarchy with a root port * corresponding to each PCI bus. */ static void pci_bus_write_dsdt(int bus) { struct businfo *bi; struct slotinfo *si; struct pci_devinst *pi; int count, func, slot; /* * If there are no devices on this 'bus' then just return. */ if ((bi = pci_businfo[bus]) == NULL) { /* * Bus 0 is special because it decodes the I/O ports used * for PCI config space access even if there are no devices * on it. */ if (bus != 0) return; } dsdt_line(" Device (PC%02X)", bus); dsdt_line(" {"); dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); dsdt_line(" Method (_BBN, 0, NotSerialized)"); dsdt_line(" {"); dsdt_line(" Return (0x%08X)", bus); dsdt_line(" }"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " "MaxFixed, PosDecode,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x%04X, // Range Minimum", bus); dsdt_line(" 0x%04X, // Range Maximum", bus); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x0001, // Length"); dsdt_line(" ,, )"); if (bus == 0) { dsdt_indent(3); dsdt_fixed_ioport(0xCF8, 8); dsdt_unindent(3); dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x0000, // Range Minimum"); dsdt_line(" 0x0CF7, // Range Maximum"); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x0CF8, // Length"); dsdt_line(" ,, , TypeStatic)"); dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x0D00, // Range Minimum"); dsdt_line(" 0x%04X, // Range Maximum", PCI_EMUL_IOBASE - 1); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x%04X, // Length", PCI_EMUL_IOBASE - 0x0D00); dsdt_line(" ,, , TypeStatic)"); if (bi == NULL) { dsdt_line(" })"); goto done; } } assert(bi != NULL); /* i/o window */ dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); dsdt_line(" 0x%04X, // Range Maximum", bi->iolimit - 1); dsdt_line(" 0x0000, // Translation Offset"); dsdt_line(" 0x%04X, // Length", bi->iolimit - bi->iobase); dsdt_line(" ,, , TypeStatic)"); /* mmio window (32-bit) */ dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x00000000, // Granularity"); dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); dsdt_line(" 0x%08X, // Range Maximum\n", bi->memlimit32 - 1); dsdt_line(" 0x00000000, // Translation Offset"); dsdt_line(" 0x%08X, // Length\n", bi->memlimit32 - bi->membase32); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); /* mmio window (64-bit) */ dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x0000000000000000, // Granularity"); dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); dsdt_line(" 0x%016lX, // Range Maximum\n", bi->memlimit64 - 1); dsdt_line(" 0x0000000000000000, // Translation Offset"); dsdt_line(" 0x%016lX, // Length\n", bi->memlimit64 - bi->membase64); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); dsdt_line(" })"); count = pci_count_lintr(bus); if (count != 0) { dsdt_indent(2); dsdt_line("Name (PPRT, Package ()"); dsdt_line("{"); pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); dsdt_line("})"); dsdt_line("Name (APRT, Package ()"); dsdt_line("{"); pci_walk_lintr(bus, pci_apic_prt_entry, NULL); dsdt_line("})"); dsdt_line("Method (_PRT, 0, NotSerialized)"); dsdt_line("{"); dsdt_line(" If (PICM)"); dsdt_line(" {"); dsdt_line(" Return (APRT)"); dsdt_line(" }"); dsdt_line(" Else"); dsdt_line(" {"); dsdt_line(" Return (PPRT)"); dsdt_line(" }"); dsdt_line("}"); dsdt_unindent(2); } dsdt_indent(2); for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { pi = si->si_funcs[func].fi_devi; if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) pi->pi_d->pe_write_dsdt(pi); } } dsdt_unindent(2); done: dsdt_line(" }"); } void pci_write_dsdt(void) { int bus; dsdt_indent(1); dsdt_line("Name (PICM, 0x00)"); dsdt_line("Method (_PIC, 1, NotSerialized)"); dsdt_line("{"); dsdt_line(" Store (Arg0, PICM)"); dsdt_line("}"); dsdt_line(""); dsdt_line("Scope (_SB)"); dsdt_line("{"); for (bus = 0; bus < MAXBUSES; bus++) pci_bus_write_dsdt(bus); dsdt_line("}"); dsdt_unindent(1); } int pci_bus_configured(int bus) { assert(bus >= 0 && bus < MAXBUSES); return (pci_businfo[bus] != NULL); } int pci_msi_enabled(struct pci_devinst *pi) { return (pi->pi_msi.enabled); } int pci_msi_maxmsgnum(struct pci_devinst *pi) { if (pi->pi_msi.enabled) return (pi->pi_msi.maxmsgnum); else return (0); } int pci_msix_enabled(struct pci_devinst *pi) { return (pi->pi_msix.enabled && !pi->pi_msi.enabled); } void pci_generate_msix(struct pci_devinst *pi, int index) { struct msix_table_entry *mte; if (!pci_msix_enabled(pi)) return; if (pi->pi_msix.function_mask) return; if (index >= pi->pi_msix.table_count) return; mte = &pi->pi_msix.table[index]; if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { /* XXX Set PBA bit if interrupt is disabled */ vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); } } void pci_generate_msi(struct pci_devinst *pi, int index) { if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, pi->pi_msi.msg_data + index); } } static bool pci_lintr_permitted(struct pci_devinst *pi) { uint16_t cmd; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || (cmd & PCIM_CMD_INTxDIS))); } void pci_lintr_request(struct pci_devinst *pi) { struct businfo *bi; struct slotinfo *si; int bestpin, bestcount, pin; bi = pci_businfo[pi->pi_bus]; assert(bi != NULL); /* * Just allocate a pin from our slot. The pin will be * assigned IRQs later when interrupts are routed. */ si = &bi->slotinfo[pi->pi_slot]; bestpin = 0; bestcount = si->si_intpins[0].ii_count; for (pin = 1; pin < 4; pin++) { if (si->si_intpins[pin].ii_count < bestcount) { bestpin = pin; bestcount = si->si_intpins[pin].ii_count; } } si->si_intpins[bestpin].ii_count++; pi->pi_lintr.pin = bestpin + 1; pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); } static void pci_lintr_route(struct pci_devinst *pi) { struct businfo *bi; struct intxinfo *ii; if (pi->pi_lintr.pin == 0) return; bi = pci_businfo[pi->pi_bus]; assert(bi != NULL); ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; /* * Attempt to allocate an I/O APIC pin for this intpin if one * is not yet assigned. */ if (ii->ii_ioapic_irq == 0) ii->ii_ioapic_irq = ioapic_pci_alloc_irq(pi); assert(ii->ii_ioapic_irq > 0); /* * Attempt to allocate a PIRQ pin for this intpin if one is * not yet assigned. */ if (ii->ii_pirq_pin == 0) ii->ii_pirq_pin = pirq_alloc_pin(pi); assert(ii->ii_pirq_pin > 0); pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); } void pci_lintr_assert(struct pci_devinst *pi) { assert(pi->pi_lintr.pin > 0); pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == IDLE) { if (pci_lintr_permitted(pi)) { pi->pi_lintr.state = ASSERTED; pci_irq_assert(pi); } else pi->pi_lintr.state = PENDING; } pthread_mutex_unlock(&pi->pi_lintr.lock); } void pci_lintr_deassert(struct pci_devinst *pi) { assert(pi->pi_lintr.pin > 0); pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == ASSERTED) { pi->pi_lintr.state = IDLE; pci_irq_deassert(pi); } else if (pi->pi_lintr.state == PENDING) pi->pi_lintr.state = IDLE; pthread_mutex_unlock(&pi->pi_lintr.lock); } static void pci_lintr_update(struct pci_devinst *pi) { pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { pci_irq_deassert(pi); pi->pi_lintr.state = PENDING; } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { pi->pi_lintr.state = ASSERTED; pci_irq_assert(pi); } pthread_mutex_unlock(&pi->pi_lintr.lock); } int pci_count_lintr(int bus) { int count, slot, pin; struct slotinfo *slotinfo; count = 0; if (pci_businfo[bus] != NULL) { for (slot = 0; slot < MAXSLOTS; slot++) { slotinfo = &pci_businfo[bus]->slotinfo[slot]; for (pin = 0; pin < 4; pin++) { if (slotinfo->si_intpins[pin].ii_count != 0) count++; } } } return (count); } void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) { struct businfo *bi; struct slotinfo *si; struct intxinfo *ii; int slot, pin; if ((bi = pci_businfo[bus]) == NULL) return; for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (pin = 0; pin < 4; pin++) { ii = &si->si_intpins[pin]; if (ii->ii_count != 0) cb(bus, slot, pin + 1, ii->ii_pirq_pin, ii->ii_ioapic_irq, arg); } } } /* * Return 1 if the emulated device in 'slot' is a multi-function device. * Return 0 otherwise. */ static int pci_emul_is_mfdev(int bus, int slot) { struct businfo *bi; struct slotinfo *si; int f, numfuncs; numfuncs = 0; if ((bi = pci_businfo[bus]) != NULL) { si = &bi->slotinfo[slot]; for (f = 0; f < MAXFUNCS; f++) { if (si->si_funcs[f].fi_devi != NULL) { numfuncs++; } } } return (numfuncs > 1); } /* * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on * whether or not is a multi-function being emulated in the pci 'slot'. */ static void pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) { int mfdev; if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { mfdev = pci_emul_is_mfdev(bus, slot); switch (bytes) { case 1: case 2: *rv &= ~PCIM_MFDEV; if (mfdev) { *rv |= PCIM_MFDEV; } break; case 4: *rv &= ~(PCIM_MFDEV << 16); if (mfdev) { *rv |= (PCIM_MFDEV << 16); } break; } } } /* * Update device state in response to changes to the PCI command * register. */ void pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old) { int i; uint16_t changed, new; new = pci_get_cfgdata16(pi, PCIR_COMMAND); changed = old ^ new; /* * If the MMIO or I/O address space decoding has changed then * register/unregister all BARs that decode that address space. */ for (i = 0; i <= PCI_BARMAX; i++) { switch (pi->pi_bar[i].type) { case PCIBAR_NONE: case PCIBAR_MEMHI64: break; case PCIBAR_IO: /* I/O address space decoding changed? */ if (changed & PCIM_CMD_PORTEN) { if (new & PCIM_CMD_PORTEN) register_bar(pi, i); else unregister_bar(pi, i); } break; case PCIBAR_MEM32: case PCIBAR_MEM64: /* MMIO address space decoding changed? */ if (changed & PCIM_CMD_MEMEN) { if (new & PCIM_CMD_MEMEN) register_bar(pi, i); else unregister_bar(pi, i); } break; default: assert(0); } } /* * If INTx has been unmasked and is pending, assert the * interrupt. */ pci_lintr_update(pi); } static void pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) { int rshift; uint32_t cmd, old, readonly; cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ /* * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. * * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are * 'write 1 to clear'. However these bits are not set to '1' by * any device emulation so it is simpler to treat them as readonly. */ rshift = (coff & 0x3) * 8; readonly = 0xFFFFF880 >> rshift; old = CFGREAD(pi, coff, bytes); new &= ~readonly; new |= (old & readonly); CFGWRITE(pi, coff, new, bytes); /* update config */ pci_emul_cmd_changed(pi, cmd); } static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, int coff, int bytes, uint32_t *eax) { struct businfo *bi; struct slotinfo *si; struct pci_devinst *pi; struct pci_devemu *pe; int idx, needcfg; uint64_t addr, bar, mask; if ((bi = pci_businfo[bus]) != NULL) { si = &bi->slotinfo[slot]; pi = si->si_funcs[func].fi_devi; } else pi = NULL; /* * Just return if there is no device at this slot:func or if the * the guest is doing an un-aligned access. */ if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || (coff & (bytes - 1)) != 0) { if (in) *eax = 0xffffffff; return; } /* * Ignore all writes beyond the standard config space and return all * ones on reads. */ if (coff >= PCI_REGMAX + 1) { if (in) { *eax = 0xffffffff; /* * Extended capabilities begin at offset 256 in config * space. Absence of extended capabilities is signaled * with all 0s in the extended capability header at * offset 256. */ if (coff <= PCI_REGMAX + 4) *eax = 0x00000000; } return; } pe = pi->pi_d; /* * Config read */ if (in) { /* Let the device emulation override the default handler */ if (pe->pe_cfgread != NULL) { needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, eax); } else { needcfg = 1; } if (needcfg) *eax = CFGREAD(pi, coff, bytes); pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); } else { /* Let the device emulation override the default handler */ if (pe->pe_cfgwrite != NULL && (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) return; /* * Special handling for write to BAR registers */ if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { /* * Ignore writes to BAR registers that are not * 4-byte aligned. */ if (bytes != 4 || (coff & 0x3) != 0) return; idx = (coff - PCIR_BAR(0)) / 4; mask = ~(pi->pi_bar[idx].size - 1); switch (pi->pi_bar[idx].type) { case PCIBAR_NONE: pi->pi_bar[idx].addr = bar = 0; break; case PCIBAR_IO: addr = *eax & mask; addr &= 0xffff; bar = addr | pi->pi_bar[idx].lobits; /* * Register the new BAR value for interception */ if (addr != pi->pi_bar[idx].addr) { update_bar_address(pi, addr, idx, PCIBAR_IO); } break; case PCIBAR_MEM32: addr = bar = *eax & mask; bar |= pi->pi_bar[idx].lobits; if (addr != pi->pi_bar[idx].addr) { update_bar_address(pi, addr, idx, PCIBAR_MEM32); } break; case PCIBAR_MEM64: addr = bar = *eax & mask; bar |= pi->pi_bar[idx].lobits; if (addr != (uint32_t)pi->pi_bar[idx].addr) { update_bar_address(pi, addr, idx, PCIBAR_MEM64); } break; case PCIBAR_MEMHI64: mask = ~(pi->pi_bar[idx - 1].size - 1); addr = ((uint64_t)*eax << 32) & mask; bar = addr >> 32; if (bar != pi->pi_bar[idx - 1].addr >> 32) { update_bar_address(pi, addr, idx - 1, PCIBAR_MEMHI64); } break; default: assert(0); } pci_set_cfgdata32(pi, coff, bar); } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { pci_emul_cmdsts_write(pi, coff, *eax, bytes); } else { CFGWRITE(pi, coff, *eax, bytes); } } } static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; static int pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { uint32_t x; if (bytes != 4) { if (in) *eax = (bytes == 2) ? 0xffff : 0xff; return (0); } if (in) { x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; if (cfgenable) x |= CONF1_ENABLE; *eax = x; } else { x = *eax; cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; cfgoff = (x & PCI_REGMAX) & ~0x03; cfgfunc = (x >> 8) & PCI_FUNCMAX; cfgslot = (x >> 11) & PCI_SLOTMAX; cfgbus = (x >> 16) & PCI_BUSMAX; } return (0); } INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); static int pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { int coff; assert(bytes == 1 || bytes == 2 || bytes == 4); coff = cfgoff + (port - CONF1_DATA_PORT); if (cfgenable) { pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, eax); } else { /* Ignore accesses to cfgdata if not enabled by cfgaddr */ if (in) *eax = 0xffffffff; } return (0); } INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); #ifdef BHYVE_SNAPSHOT /* * Saves/restores PCI device emulated state. Returns 0 on success. */ static int pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) { struct pci_devinst *pi; int i; int ret; pi = meta->dev_data; SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done); SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), meta, ret, done); for (i = 0; i < nitems(pi->pi_bar); i++) { SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); } /* Restore MSI-X table. */ for (i = 0; i < pi->pi_msix.table_count; i++) { SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, meta, ret, done); } done: return (ret); } static int pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde, struct pci_devinst **pdi) { struct businfo *bi; struct slotinfo *si; struct funcinfo *fi; int bus, slot, func; assert(dev_name != NULL); assert(pde != NULL); assert(pdi != NULL); for (bus = 0; bus < MAXBUSES; bus++) { if ((bi = pci_businfo[bus]) == NULL) continue; for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { fi = &si->si_funcs[func]; if (fi->fi_pde == NULL) continue; if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0) continue; *pde = fi->fi_pde; *pdi = fi->fi_devi; return (0); } } } return (EINVAL); } int pci_snapshot(struct vm_snapshot_meta *meta) { struct pci_devemu *pde; struct pci_devinst *pdi; int ret; assert(meta->dev_name != NULL); ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi); if (ret != 0) { fprintf(stderr, "%s: no such name: %s\r\n", __func__, meta->dev_name); memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); return (0); } meta->dev_data = pdi; if (pde->pe_snapshot == NULL) { fprintf(stderr, "%s: not implemented yet for: %s\r\n", __func__, meta->dev_name); return (-1); } ret = pci_snapshot_pci_dev(meta); if (ret != 0) { fprintf(stderr, "%s: failed to snapshot pci dev\r\n", __func__); return (-1); } ret = (*pde->pe_snapshot)(meta); return (ret); } int pci_pause(struct vmctx *ctx, const char *dev_name) { struct pci_devemu *pde; struct pci_devinst *pdi; int ret; assert(dev_name != NULL); ret = pci_find_slotted_dev(dev_name, &pde, &pdi); if (ret != 0) { /* * It is possible to call this function without * checking that the device is inserted first. */ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); return (0); } if (pde->pe_pause == NULL) { /* The pause/resume functionality is optional. */ fprintf(stderr, "%s: not implemented for: %s\n", __func__, dev_name); return (0); } return (*pde->pe_pause)(ctx, pdi); } int pci_resume(struct vmctx *ctx, const char *dev_name) { struct pci_devemu *pde; struct pci_devinst *pdi; int ret; assert(dev_name != NULL); ret = pci_find_slotted_dev(dev_name, &pde, &pdi); if (ret != 0) { /* * It is possible to call this function without * checking that the device is inserted first. */ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name); return (0); } if (pde->pe_resume == NULL) { /* The pause/resume functionality is optional. */ fprintf(stderr, "%s: not implemented for: %s\n", __func__, dev_name); return (0); } return (*pde->pe_resume)(ctx, pdi); } #endif #define PCI_EMUL_TEST #ifdef PCI_EMUL_TEST /* * Define a dummy test device */ #define DIOSZ 8 #define DMEMSZ 4096 struct pci_emul_dsoftc { uint8_t ioregs[DIOSZ]; uint8_t memregs[2][DMEMSZ]; }; #define PCI_EMUL_MSI_MSGS 4 #define PCI_EMUL_MSIX_MSGS 16 static int pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { int error; struct pci_emul_dsoftc *sc; sc = calloc(1, sizeof(struct pci_emul_dsoftc)); pi->pi_arg = sc; pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); assert(error == 0); error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); assert(error == 0); error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); assert(error == 0); error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); assert(error == 0); return (0); } static void pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { int i; struct pci_emul_dsoftc *sc = pi->pi_arg; if (baridx == 0) { if (offset + size > DIOSZ) { printf("diow: iow too large, offset %ld size %d\n", offset, size); return; } if (size == 1) { sc->ioregs[offset] = value & 0xff; } else if (size == 2) { *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; } else if (size == 4) { *(uint32_t *)&sc->ioregs[offset] = value; } else { printf("diow: iow unknown size %d\n", size); } /* * Special magic value to generate an interrupt */ if (offset == 4 && size == 4 && pci_msi_enabled(pi)) pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); if (value == 0xabcdef) { for (i = 0; i < pci_msi_maxmsgnum(pi); i++) pci_generate_msi(pi, i); } } if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("diow: memw too large, offset %ld size %d\n", offset, size); return; } i = baridx - 1; /* 'memregs' index */ if (size == 1) { sc->memregs[i][offset] = value; } else if (size == 2) { *(uint16_t *)&sc->memregs[i][offset] = value; } else if (size == 4) { *(uint32_t *)&sc->memregs[i][offset] = value; } else if (size == 8) { *(uint64_t *)&sc->memregs[i][offset] = value; } else { printf("diow: memw unknown size %d\n", size); } - + /* * magic interrupt ?? */ } if (baridx > 2 || baridx < 0) { printf("diow: unknown bar idx %d\n", baridx); } } static uint64_t pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_emul_dsoftc *sc = pi->pi_arg; uint32_t value; int i; if (baridx == 0) { if (offset + size > DIOSZ) { printf("dior: ior too large, offset %ld size %d\n", offset, size); return (0); } - + value = 0; if (size == 1) { value = sc->ioregs[offset]; } else if (size == 2) { value = *(uint16_t *) &sc->ioregs[offset]; } else if (size == 4) { value = *(uint32_t *) &sc->ioregs[offset]; } else { printf("dior: ior unknown size %d\n", size); } } if (baridx == 1 || baridx == 2) { if (offset + size > DMEMSZ) { printf("dior: memr too large, offset %ld size %d\n", offset, size); return (0); } - + i = baridx - 1; /* 'memregs' index */ if (size == 1) { value = sc->memregs[i][offset]; } else if (size == 2) { value = *(uint16_t *) &sc->memregs[i][offset]; } else if (size == 4) { value = *(uint32_t *) &sc->memregs[i][offset]; } else if (size == 8) { value = *(uint64_t *) &sc->memregs[i][offset]; } else { printf("dior: ior unknown size %d\n", size); } } if (baridx > 2 || baridx < 0) { printf("dior: unknown bar idx %d\n", baridx); return (0); } return (value); } #ifdef BHYVE_SNAPSHOT int pci_emul_snapshot(struct vm_snapshot_meta *meta) { return (0); } #endif struct pci_devemu pci_dummy = { .pe_emu = "dummy", .pe_init = pci_emul_dinit, .pe_barwrite = pci_emul_diow, .pe_barread = pci_emul_dior, #ifdef BHYVE_SNAPSHOT .pe_snapshot = pci_emul_snapshot, #endif }; PCI_EMUL_SET(pci_dummy); #endif /* PCI_EMUL_TEST */ diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index 6eac0720f09f..651a0a52f6ec 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -1,308 +1,308 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PCI_EMUL_H_ #define _PCI_EMUL_H_ #include #include #include #include #include #include #include #define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */ struct vmctx; struct pci_devinst; struct memory_region; struct vm_snapshot_meta; struct pci_devemu { char *pe_emu; /* Name of device emulation */ /* instance creation */ int (*pe_init)(struct vmctx *, struct pci_devinst *, nvlist_t *); int (*pe_legacy_config)(nvlist_t *, const char *); const char *pe_alias; /* ACPI DSDT enumeration */ void (*pe_write_dsdt)(struct pci_devinst *); /* config space read/write callbacks */ int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int offset, int bytes, uint32_t val); int (*pe_cfgread)(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int offset, int bytes, uint32_t *retval); /* BAR read/write callbacks */ void (*pe_barwrite)(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value); uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size); void (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi, int baridx, int enabled, uint64_t address); /* Save/restore device state */ int (*pe_snapshot)(struct vm_snapshot_meta *meta); int (*pe_pause)(struct vmctx *ctx, struct pci_devinst *pi); int (*pe_resume)(struct vmctx *ctx, struct pci_devinst *pi); }; #define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x); enum pcibar_type { PCIBAR_NONE, PCIBAR_IO, PCIBAR_MEM32, PCIBAR_MEM64, PCIBAR_MEMHI64 }; struct pcibar { enum pcibar_type type; /* io or memory */ uint64_t size; uint64_t addr; uint8_t lobits; }; #define PI_NAMESZ 40 struct msix_table_entry { uint64_t addr; uint32_t msg_data; uint32_t vector_control; } __packed; -/* +/* * In case the structure is modified to hold extra information, use a define * for the size that should be emulated. */ #define MSIX_TABLE_ENTRY_SIZE 16 #define MAX_MSIX_TABLE_ENTRIES 2048 #define PBA_SIZE(msgnum) (roundup2((msgnum), 64) / 8) enum lintr_stat { IDLE, ASSERTED, PENDING }; struct pci_devinst { struct pci_devemu *pi_d; struct vmctx *pi_vmctx; uint8_t pi_bus, pi_slot, pi_func; char pi_name[PI_NAMESZ]; int pi_bar_getsize; int pi_prevcap; int pi_capend; struct { int8_t pin; enum lintr_stat state; int pirq_pin; int ioapic_irq; pthread_mutex_t lock; } pi_lintr; struct { int enabled; uint64_t addr; uint64_t msg_data; int maxmsgnum; } pi_msi; struct { int enabled; int table_bar; int pba_bar; uint32_t table_offset; int table_count; uint32_t pba_offset; int pba_size; - int function_mask; + int function_mask; struct msix_table_entry *table; /* allocated at runtime */ uint8_t *mapped_addr; size_t mapped_size; } pi_msix; void *pi_arg; /* devemu-private data */ u_char pi_cfgdata[PCI_REGMAX + 1]; struct pcibar pi_bar[PCI_BARMAX + 1]; }; struct msicap { uint8_t capid; uint8_t nextptr; uint16_t msgctrl; uint32_t addrlo; uint32_t addrhi; uint16_t msgdata; } __packed; static_assert(sizeof(struct msicap) == 14, "compile-time assertion failed"); struct msixcap { uint8_t capid; uint8_t nextptr; uint16_t msgctrl; uint32_t table_info; /* bar index and offset within it */ uint32_t pba_info; /* bar index and offset within it */ } __packed; static_assert(sizeof(struct msixcap) == 12, "compile-time assertion failed"); struct pciecap { uint8_t capid; uint8_t nextptr; uint16_t pcie_capabilities; uint32_t dev_capabilities; /* all devices */ uint16_t dev_control; uint16_t dev_status; uint32_t link_capabilities; /* devices with links */ uint16_t link_control; uint16_t link_status; uint32_t slot_capabilities; /* ports with slots */ uint16_t slot_control; uint16_t slot_status; uint16_t root_control; /* root ports */ uint16_t root_capabilities; uint32_t root_status; uint32_t dev_capabilities2; /* all devices */ uint16_t dev_control2; uint16_t dev_status2; uint32_t link_capabilities2; /* devices with links */ uint16_t link_control2; uint16_t link_status2; uint32_t slot_capabilities2; /* ports with slots */ uint16_t slot_control2; uint16_t slot_status2; } __packed; static_assert(sizeof(struct pciecap) == 60, "compile-time assertion failed"); typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin, int ioapic_irq, void *arg); int init_pci(struct vmctx *ctx); void pci_callback(void); int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size); int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type); void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, uint8_t capoff, int capid); void pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old); void pci_generate_msi(struct pci_devinst *pi, int msgnum); void pci_generate_msix(struct pci_devinst *pi, int msgnum); void pci_lintr_assert(struct pci_devinst *pi); void pci_lintr_deassert(struct pci_devinst *pi); void pci_lintr_request(struct pci_devinst *pi); int pci_msi_enabled(struct pci_devinst *pi); int pci_msix_enabled(struct pci_devinst *pi); int pci_msix_table_bar(struct pci_devinst *pi); int pci_msix_pba_bar(struct pci_devinst *pi); int pci_msi_maxmsgnum(struct pci_devinst *pi); int pci_parse_legacy_config(nvlist_t *nvl, const char *opt); int pci_parse_slot(char *opt); void pci_print_supported_devices(); void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr); int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum); int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, uint64_t value); uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size); int pci_count_lintr(int bus); void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg); void pci_write_dsdt(void); uint64_t pci_ecfg_base(void); int pci_bus_configured(int bus); #ifdef BHYVE_SNAPSHOT int pci_snapshot(struct vm_snapshot_meta *meta); int pci_pause(struct vmctx *ctx, const char *dev_name); int pci_resume(struct vmctx *ctx, const char *dev_name); #endif -static __inline void +static __inline void pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val) { assert(offset <= PCI_REGMAX); *(uint8_t *)(pi->pi_cfgdata + offset) = val; } -static __inline void +static __inline void pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val) { assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0); *(uint16_t *)(pi->pi_cfgdata + offset) = val; } -static __inline void +static __inline void pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val) { assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0); *(uint32_t *)(pi->pi_cfgdata + offset) = val; } static __inline uint8_t pci_get_cfgdata8(struct pci_devinst *pi, int offset) { assert(offset <= PCI_REGMAX); return (*(uint8_t *)(pi->pi_cfgdata + offset)); } static __inline uint16_t pci_get_cfgdata16(struct pci_devinst *pi, int offset) { assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0); return (*(uint16_t *)(pi->pi_cfgdata + offset)); } static __inline uint32_t pci_get_cfgdata32(struct pci_devinst *pi, int offset) { assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0); return (*(uint32_t *)(pi->pi_cfgdata + offset)); } #endif /* _PCI_EMUL_H_ */ diff --git a/usr.sbin/bhyve/pci_fbuf.c b/usr.sbin/bhyve/pci_fbuf.c index 4bf64e3f2adc..e0d8167a5928 100644 --- a/usr.sbin/bhyve/pci_fbuf.c +++ b/usr.sbin/bhyve/pci_fbuf.c @@ -1,477 +1,477 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Nahanni Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "bhyvegc.h" #include "bhyverun.h" #include "config.h" #include "debug.h" #include "console.h" #include "inout.h" #include "pci_emul.h" #include "rfb.h" #include "vga.h" /* * bhyve Framebuffer device emulation. * BAR0 points to the current mode information. * BAR1 is the 32-bit framebuffer address. * * -s ,fbuf,wait,vga=on|io|off,rfb=:port,w=width,h=height */ static int fbuf_debug = 1; #define DEBUG_INFO 1 #define DEBUG_VERBOSE 4 #define DPRINTF(level, params) if (level <= fbuf_debug) PRINTLN params #define KB (1024UL) #define MB (1024 * 1024UL) #define DMEMSZ 128 #define FB_SIZE (16*MB) #define COLS_MAX 1920 #define ROWS_MAX 1200 #define COLS_DEFAULT 1024 #define ROWS_DEFAULT 768 #define COLS_MIN 640 #define ROWS_MIN 480 struct pci_fbuf_softc { struct pci_devinst *fsc_pi; struct { uint32_t fbsize; uint16_t width; uint16_t height; uint16_t depth; uint16_t refreshrate; uint8_t reserved[116]; } __packed memregs; /* rfb server */ char *rfb_host; char *rfb_password; int rfb_port; int rfb_wait; int vga_enabled; int vga_full; uint32_t fbaddr; char *fb_base; uint16_t gc_width; uint16_t gc_height; void *vgasc; struct bhyvegc_image *gc_image; }; static struct pci_fbuf_softc *fbuf_sc; #define PCI_FBUF_MSI_MSGS 4 static void pci_fbuf_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct pci_fbuf_softc *sc; uint8_t *p; assert(baridx == 0); sc = pi->pi_arg; DPRINTF(DEBUG_VERBOSE, ("fbuf wr: offset 0x%lx, size: %d, value: 0x%lx", offset, size, value)); if (offset + size > DMEMSZ) { printf("fbuf: write too large, offset %ld size %d\n", offset, size); return; } p = (uint8_t *)&sc->memregs + offset; switch (size) { case 1: *p = value; break; case 2: *(uint16_t *)p = value; break; case 4: *(uint32_t *)p = value; break; case 8: *(uint64_t *)p = value; break; default: printf("fbuf: write unknown size %d\n", size); break; } if (!sc->gc_image->vgamode && sc->memregs.width == 0 && sc->memregs.height == 0) { DPRINTF(DEBUG_INFO, ("switching to VGA mode")); sc->gc_image->vgamode = 1; sc->gc_width = 0; sc->gc_height = 0; } else if (sc->gc_image->vgamode && sc->memregs.width != 0 && sc->memregs.height != 0) { DPRINTF(DEBUG_INFO, ("switching to VESA mode")); sc->gc_image->vgamode = 0; } } uint64_t pci_fbuf_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_fbuf_softc *sc; uint8_t *p; uint64_t value; assert(baridx == 0); sc = pi->pi_arg; if (offset + size > DMEMSZ) { printf("fbuf: read too large, offset %ld size %d\n", offset, size); return (0); } p = (uint8_t *)&sc->memregs + offset; value = 0; switch (size) { case 1: value = *p; break; case 2: value = *(uint16_t *)p; break; case 4: value = *(uint32_t *)p; break; case 8: value = *(uint64_t *)p; break; default: printf("fbuf: read unknown size %d\n", size); break; } DPRINTF(DEBUG_VERBOSE, ("fbuf rd: offset 0x%lx, size: %d, value: 0x%lx", offset, size, value)); return (value); } static void pci_fbuf_baraddr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, int enabled, uint64_t address) { struct pci_fbuf_softc *sc; int prot; if (baridx != 1) return; sc = pi->pi_arg; if (!enabled) { if (vm_munmap_memseg(ctx, sc->fbaddr, FB_SIZE) != 0) EPRINTLN("pci_fbuf: munmap_memseg failed"); sc->fbaddr = 0; } else { prot = PROT_READ | PROT_WRITE; if (vm_mmap_memseg(ctx, address, VM_FRAMEBUFFER, 0, FB_SIZE, prot) != 0) EPRINTLN("pci_fbuf: mmap_memseg failed"); sc->fbaddr = address; } } static int pci_fbuf_parse_config(struct pci_fbuf_softc *sc, nvlist_t *nvl) { const char *value; char *cp; sc->rfb_wait = get_config_bool_node_default(nvl, "wait", false); /* Prefer "rfb" to "tcp". */ value = get_config_value_node(nvl, "rfb"); if (value == NULL) value = get_config_value_node(nvl, "tcp"); if (value != NULL) { /* * IPv4 -- host-ip:port * IPv6 -- [host-ip%zone]:port * XXX for now port is mandatory for IPv4. */ if (value[0] == '[') { cp = strchr(value + 1, ']'); if (cp == NULL || cp == value + 1) { EPRINTLN("fbuf: Invalid IPv6 address: \"%s\"", value); return (-1); } sc->rfb_host = strndup(value + 1, cp - (value + 1)); cp++; if (*cp == ':') { cp++; if (*cp == '\0') { EPRINTLN( "fbuf: Missing port number: \"%s\"", value); return (-1); } sc->rfb_port = atoi(cp); } else if (*cp != '\0') { EPRINTLN("fbuf: Invalid IPv6 address: \"%s\"", value); return (-1); } } else { cp = strchr(value, ':'); if (cp == NULL) { sc->rfb_port = atoi(value); } else { sc->rfb_host = strndup(value, cp - value); cp++; if (*cp == '\0') { EPRINTLN( "fbuf: Missing port number: \"%s\"", value); return (-1); } sc->rfb_port = atoi(cp); } } } value = get_config_value_node(nvl, "vga"); if (value != NULL) { if (strcmp(value, "off") == 0) { sc->vga_enabled = 0; } else if (strcmp(value, "io") == 0) { sc->vga_enabled = 1; sc->vga_full = 0; } else if (strcmp(value, "on") == 0) { sc->vga_enabled = 1; sc->vga_full = 1; } else { EPRINTLN("fbuf: Invalid vga setting: \"%s\"", value); return (-1); } } value = get_config_value_node(nvl, "w"); if (value != NULL) { sc->memregs.width = atoi(value); if (sc->memregs.width > COLS_MAX) { EPRINTLN("fbuf: width %d too large", sc->memregs.width); return (-1); } if (sc->memregs.width == 0) sc->memregs.width = 1920; } value = get_config_value_node(nvl, "h"); if (value != NULL) { sc->memregs.height = atoi(value); if (sc->memregs.height > ROWS_MAX) { EPRINTLN("fbuf: height %d too large", sc->memregs.height); return (-1); } if (sc->memregs.height == 0) sc->memregs.height = 1080; } value = get_config_value_node(nvl, "password"); if (value != NULL) sc->rfb_password = strdup(value); return (0); } extern void vga_render(struct bhyvegc *gc, void *arg); void pci_fbuf_render(struct bhyvegc *gc, void *arg) { struct pci_fbuf_softc *sc; sc = arg; if (sc->vga_full && sc->gc_image->vgamode) { /* TODO: mode switching to vga and vesa should use the special * EFI-bhyve protocol port. */ vga_render(gc, sc->vgasc); return; } if (sc->gc_width != sc->memregs.width || sc->gc_height != sc->memregs.height) { bhyvegc_resize(gc, sc->memregs.width, sc->memregs.height); sc->gc_width = sc->memregs.width; sc->gc_height = sc->memregs.height; } return; } static int pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { int error; struct pci_fbuf_softc *sc; - + if (fbuf_sc != NULL) { EPRINTLN("Only one frame buffer device is allowed."); return (-1); } sc = calloc(1, sizeof(struct pci_fbuf_softc)); pi->pi_arg = sc; /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, 0x40FB); pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_DISPLAY); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_DISPLAY_VGA); sc->fb_base = vm_create_devmem( ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE); if (sc->fb_base == MAP_FAILED) { error = -1; goto done; } error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, DMEMSZ); assert(error == 0); error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, FB_SIZE); assert(error == 0); error = pci_emul_add_msicap(pi, PCI_FBUF_MSI_MSGS); assert(error == 0); sc->memregs.fbsize = FB_SIZE; sc->memregs.width = COLS_DEFAULT; sc->memregs.height = ROWS_DEFAULT; sc->memregs.depth = 32; sc->vga_enabled = 1; sc->vga_full = 0; sc->fsc_pi = pi; error = pci_fbuf_parse_config(sc, nvl); if (error != 0) goto done; /* XXX until VGA rendering is enabled */ if (sc->vga_full != 0) { EPRINTLN("pci_fbuf: VGA rendering not enabled"); goto done; } DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]", sc->fb_base, FB_SIZE)); console_init(sc->memregs.width, sc->memregs.height, sc->fb_base); console_fb_register(pci_fbuf_render, sc); if (sc->vga_enabled) sc->vgasc = vga_init(!sc->vga_full); sc->gc_image = console_get_image(); fbuf_sc = sc; memset((void *)sc->fb_base, 0, FB_SIZE); error = rfb_init(sc->rfb_host, sc->rfb_port, sc->rfb_wait, sc->rfb_password); done: if (error) free(sc); return (error); } #ifdef BHYVE_SNAPSHOT static int pci_fbuf_snapshot(struct vm_snapshot_meta *meta) { int ret; SNAPSHOT_BUF_OR_LEAVE(fbuf_sc->fb_base, FB_SIZE, meta, ret, err); err: return (ret); } #endif struct pci_devemu pci_fbuf = { .pe_emu = "fbuf", .pe_init = pci_fbuf_init, .pe_barwrite = pci_fbuf_write, .pe_barread = pci_fbuf_read, .pe_baraddr = pci_fbuf_baraddr, #ifdef BHYVE_SNAPSHOT .pe_snapshot = pci_fbuf_snapshot, #endif }; PCI_EMUL_SET(pci_fbuf); diff --git a/usr.sbin/bhyve/pci_hda.h b/usr.sbin/bhyve/pci_hda.h index 65a85f6d603d..a34366dedc5a 100644 --- a/usr.sbin/bhyve/pci_hda.h +++ b/usr.sbin/bhyve/pci_hda.h @@ -1,92 +1,92 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Alex Teaca * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ -#ifndef _HDA_EMUL_H_ +#ifndef _HDA_EMUL_H_ #define _HDA_EMUL_H_ #include #include #include #include #include #include #include #include #include "hda_reg.h" /* * HDA Debug Log */ #define DEBUG_HDA 1 #if DEBUG_HDA == 1 extern FILE *dbg; #define DPRINTF(fmt, arg...) \ do {fprintf(dbg, "%s-%d: " fmt "\n", __func__, __LINE__, ##arg); \ fflush(dbg); } while (0) #else #define DPRINTF(fmt, arg...) #endif #define HDA_FIFO_SIZE 0x100 struct hda_softc; struct hda_codec_class; struct hda_codec_inst { uint8_t cad; struct hda_codec_class *codec; struct hda_softc *hda; struct hda_ops *hops; void *priv; }; struct hda_codec_class { char *name; int (*init)(struct hda_codec_inst *hci, const char *play, const char *rec); int (*reset)(struct hda_codec_inst *hci); int (*command)(struct hda_codec_inst *hci, uint32_t cmd_data); int (*notify)(struct hda_codec_inst *hci, uint8_t run, uint8_t stream, uint8_t dir); }; struct hda_ops { int (*signal)(struct hda_codec_inst *hci); int (*response)(struct hda_codec_inst *hci, uint32_t response, uint8_t unsol); int (*transfer)(struct hda_codec_inst *hci, uint8_t stream, uint8_t dir, void *buf, size_t count); }; #define HDA_EMUL_SET(x) DATA_SET(hda_codec_class_set, x); #endif /* _HDA_EMUL_H_ */ diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c index 8cc536fef112..42a6e5c2f78c 100644 --- a/usr.sbin/bhyve/pci_lpc.c +++ b/usr.sbin/bhyve/pci_lpc.c @@ -1,516 +1,516 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Neel Natu * Copyright (c) 2013 Tycho Nightingale * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include "acpi.h" #include "debug.h" #include "bootrom.h" #include "config.h" #include "inout.h" #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" #include "pctestdev.h" #include "uart_emul.h" #define IO_ICU1 0x20 #define IO_ICU2 0xA0 SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt); SET_DECLARE(lpc_sysres_set, struct lpc_sysres); #define ELCR_PORT 0x4d0 SYSRES_IO(ELCR_PORT, 2); #define IO_TIMER1_PORT 0x40 #define NMISC_PORT 0x61 SYSRES_IO(NMISC_PORT, 1); static struct pci_devinst *lpc_bridge; #define LPC_UART_NUM 4 static struct lpc_uart_softc { struct uart_softc *uart_softc; int iobase; int irq; int enabled; } lpc_uart_softc[LPC_UART_NUM]; static const char *lpc_uart_names[LPC_UART_NUM] = { "com1", "com2", "com3", "com4" }; static const char *lpc_uart_acpi_names[LPC_UART_NUM] = { "COM1", "COM2", "COM3", "COM4" }; /* * LPC device configuration is in the following form: * [,] * For e.g. "com1,stdio" or "bootrom,/var/romfile" */ int lpc_device_parse(const char *opts) { int unit, error; char *str, *cpy, *lpcdev, *node_name; error = -1; str = cpy = strdup(opts); lpcdev = strsep(&str, ","); if (lpcdev != NULL) { if (strcasecmp(lpcdev, "bootrom") == 0) { set_config_value("lpc.bootrom", str); error = 0; goto done; } for (unit = 0; unit < LPC_UART_NUM; unit++) { if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) { asprintf(&node_name, "lpc.%s.path", lpc_uart_names[unit]); set_config_value(node_name, str); free(node_name); error = 0; goto done; } } if (strcasecmp(lpcdev, pctestdev_getname()) == 0) { asprintf(&node_name, "lpc.%s", pctestdev_getname()); set_config_bool(node_name, true); free(node_name); error = 0; goto done; } } done: free(cpy); return (error); } void lpc_print_supported_devices() { size_t i; printf("bootrom\n"); for (i = 0; i < LPC_UART_NUM; i++) printf("%s\n", lpc_uart_names[i]); printf("%s\n", pctestdev_getname()); } const char * lpc_bootrom(void) { return (get_config_value("lpc.bootrom")); } static void lpc_uart_intr_assert(void *arg) { struct lpc_uart_softc *sc = arg; assert(sc->irq >= 0); vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq); } static void lpc_uart_intr_deassert(void *arg) { - /* + /* * The COM devices on the LPC bus generate edge triggered interrupts, * so nothing more to do here. */ } static int lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { int offset; struct lpc_uart_softc *sc = arg; offset = port - sc->iobase; switch (bytes) { case 1: if (in) *eax = uart_read(sc->uart_softc, offset); else uart_write(sc->uart_softc, offset, *eax); break; case 2: if (in) { *eax = uart_read(sc->uart_softc, offset); *eax |= uart_read(sc->uart_softc, offset + 1) << 8; } else { uart_write(sc->uart_softc, offset, *eax); uart_write(sc->uart_softc, offset + 1, *eax >> 8); } break; default: return (-1); } return (0); } static int lpc_init(struct vmctx *ctx) { struct lpc_uart_softc *sc; struct inout_port iop; const char *backend, *name, *romfile; char *node_name; int unit, error; romfile = get_config_value("lpc.bootrom"); if (romfile != NULL) { error = bootrom_loadrom(ctx, romfile); if (error) return (error); } /* COM1 and COM2 */ for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; name = lpc_uart_names[unit]; if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) { EPRINTLN("Unable to allocate resources for " "LPC device %s", name); return (-1); } pci_irq_reserve(sc->irq); sc->uart_softc = uart_init(lpc_uart_intr_assert, lpc_uart_intr_deassert, sc); asprintf(&node_name, "lpc.%s.path", name); backend = get_config_value(node_name); free(node_name); if (uart_set_backend(sc->uart_softc, backend) != 0) { EPRINTLN("Unable to initialize backend '%s' " "for LPC device %s", backend, name); return (-1); } bzero(&iop, sizeof(struct inout_port)); iop.name = name; iop.port = sc->iobase; iop.size = UART_IO_BAR_SIZE; iop.flags = IOPORT_F_INOUT; iop.handler = lpc_uart_io_handler; iop.arg = sc; error = register_inout(&iop); assert(error == 0); sc->enabled = 1; } /* pc-testdev */ asprintf(&node_name, "lpc.%s", pctestdev_getname()); if (get_config_bool_default(node_name, false)) { error = pctestdev_init(ctx); if (error) return (error); } free(node_name); return (0); } static void pci_lpc_write_dsdt(struct pci_devinst *pi) { struct lpc_dsdt **ldpp, *ldp; dsdt_line(""); dsdt_line("Device (ISA)"); dsdt_line("{"); dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func); dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)"); dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)"); dsdt_line(" {"); dsdt_line(" Offset (0x60),"); dsdt_line(" PIRA, 8,"); dsdt_line(" PIRB, 8,"); dsdt_line(" PIRC, 8,"); dsdt_line(" PIRD, 8,"); dsdt_line(" Offset (0x68),"); dsdt_line(" PIRE, 8,"); dsdt_line(" PIRF, 8,"); dsdt_line(" PIRG, 8,"); dsdt_line(" PIRH, 8"); dsdt_line(" }"); dsdt_line(""); dsdt_indent(1); SET_FOREACH(ldpp, lpc_dsdt_set) { ldp = *ldpp; ldp->handler(); } dsdt_line(""); dsdt_line("Device (PIC)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0000\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_ICU1, 2); dsdt_fixed_ioport(IO_ICU2, 2); dsdt_fixed_irq(2); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_line(""); dsdt_line("Device (TIMR)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0100\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_TIMER1_PORT, 4); dsdt_fixed_irq(0); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); dsdt_unindent(1); dsdt_line("}"); } static void pci_lpc_sysres_dsdt(void) { struct lpc_sysres **lspp, *lsp; dsdt_line(""); dsdt_line("Device (SIO)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0C02\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); SET_FOREACH(lspp, lpc_sysres_set) { lsp = *lspp; switch (lsp->type) { case LPC_SYSRES_IO: dsdt_fixed_ioport(lsp->base, lsp->length); break; case LPC_SYSRES_MEM: dsdt_fixed_mem32(lsp->base, lsp->length); break; } } dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } LPC_DSDT(pci_lpc_sysres_dsdt); static void pci_lpc_uart_dsdt(void) { struct lpc_uart_softc *sc; int unit; for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; if (!sc->enabled) continue; dsdt_line(""); dsdt_line("Device (%s)", lpc_uart_acpi_names[unit]); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0501\"))"); dsdt_line(" Name (_UID, %d)", unit + 1); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(sc->iobase, UART_IO_BAR_SIZE); dsdt_fixed_irq(sc->irq); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } } LPC_DSDT(pci_lpc_uart_dsdt); static int pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t val) { int pirq_pin; if (bytes == 1) { pirq_pin = 0; if (coff >= 0x60 && coff <= 0x63) pirq_pin = coff - 0x60 + 1; if (coff >= 0x68 && coff <= 0x6b) pirq_pin = coff - 0x68 + 5; if (pirq_pin != 0) { pirq_write(ctx, pirq_pin, val); pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin)); return (0); } } return (-1); } static void pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { } static uint64_t pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { return (0); } #define LPC_DEV 0x7000 #define LPC_VENDOR 0x8086 static int pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { /* * Do not allow more than one LPC bridge to be configured. */ if (lpc_bridge != NULL) { EPRINTLN("Only one LPC bridge is allowed."); return (-1); } /* * Enforce that the LPC can only be configured on bus 0. This * simplifies the ACPI DSDT because it can provide a decode for * all legacy i/o ports behind bus 0. */ if (pi->pi_bus != 0) { EPRINTLN("LPC bridge can be present only on bus 0."); return (-1); } if (lpc_init(ctx) != 0) return (-1); /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV); pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA); lpc_bridge = pi; return (0); } char * lpc_pirq_name(int pin) { char *name; if (lpc_bridge == NULL) return (NULL); asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1); return (name); } void lpc_pirq_routed(void) { int pin; if (lpc_bridge == NULL) return; for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1)); for (pin = 0; pin < 4; pin++) pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5)); } #ifdef BHYVE_SNAPSHOT static int pci_lpc_snapshot(struct vm_snapshot_meta *meta) { int unit, ret; struct uart_softc *sc; for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = lpc_uart_softc[unit].uart_softc; ret = uart_snapshot(sc, meta); if (ret != 0) goto done; } done: return (ret); } #endif struct pci_devemu pci_de_lpc = { .pe_emu = "lpc", .pe_init = pci_lpc_init, .pe_write_dsdt = pci_lpc_write_dsdt, .pe_cfgwrite = pci_lpc_cfgwrite, .pe_barwrite = pci_lpc_write, .pe_barread = pci_lpc_read, #ifdef BHYVE_SNAPSHOT .pe_snapshot = pci_lpc_snapshot, #endif }; PCI_EMUL_SET(pci_de_lpc); diff --git a/usr.sbin/bhyve/pci_nvme.c b/usr.sbin/bhyve/pci_nvme.c index 07b079ecd4a1..2410366de197 100644 --- a/usr.sbin/bhyve/pci_nvme.c +++ b/usr.sbin/bhyve/pci_nvme.c @@ -1,3129 +1,3129 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 Shunsuke Mie * Copyright (c) 2018 Leon Dang * Copyright (c) 2020 Chuck Tuffli * - * Function crc16 Copyright (c) 2017, Fedor Uporov + * Function crc16 Copyright (c) 2017, Fedor Uporov * Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * bhyve PCIe-NVMe device emulation. * * options: * -s ,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=#,dsm= * * accepted devpath: * /dev/blockdev * /path/to/image * ram=size_in_MiB * * maxq = max number of queues * qsz = max elements in each queue * ioslots = max number of concurrent io requests * sectsz = sector size (defaults to blockif sector size) * ser = serial number (20-chars max) * eui64 = IEEE Extended Unique Identifier (8 byte value) * dsm = DataSet Management support. Option is one of auto, enable,disable * */ /* TODO: - create async event for smart and log - intr coalesce */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "block_if.h" #include "config.h" #include "debug.h" #include "pci_emul.h" static int nvme_debug = 0; #define DPRINTF(fmt, args...) if (nvme_debug) PRINTLN(fmt, ##args) #define WPRINTF(fmt, args...) PRINTLN(fmt, ##args) /* defaults; can be overridden */ #define NVME_MSIX_BAR 4 #define NVME_IOSLOTS 8 /* The NVMe spec defines bits 13:4 in BAR0 as reserved */ #define NVME_MMIO_SPACE_MIN (1 << 14) #define NVME_QUEUES 16 #define NVME_MAX_QENTRIES 2048 /* Memory Page size Minimum reported in CAP register */ #define NVME_MPSMIN 0 /* MPSMIN converted to bytes */ #define NVME_MPSMIN_BYTES (1 << (12 + NVME_MPSMIN)) #define NVME_PRP2_ITEMS (PAGE_SIZE/sizeof(uint64_t)) #define NVME_MDTS 9 /* Note the + 1 allows for the initial descriptor to not be page aligned */ #define NVME_MAX_IOVEC ((1 << NVME_MDTS) + 1) #define NVME_MAX_DATA_SIZE ((1 << NVME_MDTS) * NVME_MPSMIN_BYTES) /* This is a synthetic status code to indicate there is no status */ #define NVME_NO_STATUS 0xffff #define NVME_COMPLETION_VALID(c) ((c).status != NVME_NO_STATUS) /* helpers */ /* Convert a zero-based value into a one-based value */ #define ONE_BASED(zero) ((zero) + 1) /* Convert a one-based value into a zero-based value */ #define ZERO_BASED(one) ((one) - 1) /* Encode number of SQ's and CQ's for Set/Get Features */ #define NVME_FEATURE_NUM_QUEUES(sc) \ (ZERO_BASED((sc)->num_squeues) & 0xffff) | \ (ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16; #define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell) enum nvme_controller_register_offsets { NVME_CR_CAP_LOW = 0x00, NVME_CR_CAP_HI = 0x04, NVME_CR_VS = 0x08, NVME_CR_INTMS = 0x0c, NVME_CR_INTMC = 0x10, NVME_CR_CC = 0x14, NVME_CR_CSTS = 0x1c, NVME_CR_NSSR = 0x20, NVME_CR_AQA = 0x24, NVME_CR_ASQ_LOW = 0x28, NVME_CR_ASQ_HI = 0x2c, NVME_CR_ACQ_LOW = 0x30, NVME_CR_ACQ_HI = 0x34, }; enum nvme_cmd_cdw11 { NVME_CMD_CDW11_PC = 0x0001, NVME_CMD_CDW11_IEN = 0x0002, NVME_CMD_CDW11_IV = 0xFFFF0000, }; enum nvme_copy_dir { NVME_COPY_TO_PRP, NVME_COPY_FROM_PRP, }; #define NVME_CQ_INTEN 0x01 #define NVME_CQ_INTCOAL 0x02 struct nvme_completion_queue { struct nvme_completion *qbase; pthread_mutex_t mtx; uint32_t size; uint16_t tail; /* nvme progress */ uint16_t head; /* guest progress */ uint16_t intr_vec; uint32_t intr_en; }; struct nvme_submission_queue { struct nvme_command *qbase; pthread_mutex_t mtx; uint32_t size; uint16_t head; /* nvme progress */ uint16_t tail; /* guest progress */ uint16_t cqid; /* completion queue id */ int qpriority; }; enum nvme_storage_type { NVME_STOR_BLOCKIF = 0, NVME_STOR_RAM = 1, }; struct pci_nvme_blockstore { enum nvme_storage_type type; void *ctx; uint64_t size; uint32_t sectsz; uint32_t sectsz_bits; uint64_t eui64; uint32_t deallocate:1; }; /* * Calculate the number of additional page descriptors for guest IO requests * based on the advertised Max Data Transfer (MDTS) and given the number of * default iovec's in a struct blockif_req. */ #define MDTS_PAD_SIZE \ ( NVME_MAX_IOVEC > BLOCKIF_IOV_MAX ? \ NVME_MAX_IOVEC - BLOCKIF_IOV_MAX : \ 0 ) struct pci_nvme_ioreq { struct pci_nvme_softc *sc; STAILQ_ENTRY(pci_nvme_ioreq) link; struct nvme_submission_queue *nvme_sq; uint16_t sqid; /* command information */ uint16_t opc; uint16_t cid; uint32_t nsid; uint64_t prev_gpaddr; size_t prev_size; size_t bytes; struct blockif_req io_req; struct iovec iovpadding[MDTS_PAD_SIZE]; }; enum nvme_dsm_type { /* Dataset Management bit in ONCS reflects backing storage capability */ NVME_DATASET_MANAGEMENT_AUTO, /* Unconditionally set Dataset Management bit in ONCS */ NVME_DATASET_MANAGEMENT_ENABLE, /* Unconditionally clear Dataset Management bit in ONCS */ NVME_DATASET_MANAGEMENT_DISABLE, }; struct pci_nvme_softc; struct nvme_feature_obj; typedef void (*nvme_feature_cb)(struct pci_nvme_softc *, struct nvme_feature_obj *, struct nvme_command *, struct nvme_completion *); struct nvme_feature_obj { uint32_t cdw11; nvme_feature_cb set; nvme_feature_cb get; bool namespace_specific; }; #define NVME_FID_MAX (NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION + 1) typedef enum { PCI_NVME_AE_TYPE_ERROR = 0, PCI_NVME_AE_TYPE_SMART, PCI_NVME_AE_TYPE_NOTICE, PCI_NVME_AE_TYPE_IO_CMD = 6, PCI_NVME_AE_TYPE_VENDOR = 7, PCI_NVME_AE_TYPE_MAX /* Must be last */ } pci_nvme_async_type; /* Asynchronous Event Requests */ struct pci_nvme_aer { STAILQ_ENTRY(pci_nvme_aer) link; uint16_t cid; /* Command ID of the submitted AER */ }; typedef enum { PCI_NVME_AE_INFO_NS_ATTR_CHANGED = 0, PCI_NVME_AE_INFO_FW_ACTIVATION, PCI_NVME_AE_INFO_TELEMETRY_CHANGE, PCI_NVME_AE_INFO_ANA_CHANGE, PCI_NVME_AE_INFO_PREDICT_LATENCY_CHANGE, PCI_NVME_AE_INFO_LBA_STATUS_ALERT, PCI_NVME_AE_INFO_ENDURANCE_GROUP_CHANGE, PCI_NVME_AE_INFO_MAX, } pci_nvme_async_info; /* Asynchronous Event Notifications */ struct pci_nvme_aen { pci_nvme_async_type atype; uint32_t event_data; bool posted; }; struct pci_nvme_softc { struct pci_devinst *nsc_pi; pthread_mutex_t mtx; struct nvme_registers regs; struct nvme_namespace_data nsdata; struct nvme_controller_data ctrldata; struct nvme_error_information_entry err_log; struct nvme_health_information_page health_log; struct nvme_firmware_page fw_log; struct nvme_ns_list ns_log; struct pci_nvme_blockstore nvstore; uint16_t max_qentries; /* max entries per queue */ uint32_t max_queues; /* max number of IO SQ's or CQ's */ uint32_t num_cqueues; uint32_t num_squeues; bool num_q_is_set; /* Has host set Number of Queues */ struct pci_nvme_ioreq *ioreqs; STAILQ_HEAD(, pci_nvme_ioreq) ioreqs_free; /* free list of ioreqs */ uint32_t pending_ios; uint32_t ioslots; sem_t iosemlock; /* * Memory mapped Submission and Completion queues * Each array includes both Admin and IO queues */ struct nvme_completion_queue *compl_queues; struct nvme_submission_queue *submit_queues; struct nvme_feature_obj feat[NVME_FID_MAX]; enum nvme_dsm_type dataset_management; /* Accounting for SMART data */ __uint128_t read_data_units; __uint128_t write_data_units; __uint128_t read_commands; __uint128_t write_commands; uint32_t read_dunits_remainder; uint32_t write_dunits_remainder; STAILQ_HEAD(, pci_nvme_aer) aer_list; pthread_mutex_t aer_mtx; uint32_t aer_count; struct pci_nvme_aen aen[PCI_NVME_AE_TYPE_MAX]; pthread_t aen_tid; pthread_mutex_t aen_mtx; pthread_cond_t aen_cond; }; static void pci_nvme_cq_update(struct pci_nvme_softc *sc, struct nvme_completion_queue *cq, uint32_t cdw0, uint16_t cid, uint16_t sqid, uint16_t status); static struct pci_nvme_ioreq *pci_nvme_get_ioreq(struct pci_nvme_softc *); static void pci_nvme_release_ioreq(struct pci_nvme_softc *, struct pci_nvme_ioreq *); static void pci_nvme_io_done(struct blockif_req *, int); /* Controller Configuration utils */ #define NVME_CC_GET_EN(cc) \ ((cc) >> NVME_CC_REG_EN_SHIFT & NVME_CC_REG_EN_MASK) #define NVME_CC_GET_CSS(cc) \ ((cc) >> NVME_CC_REG_CSS_SHIFT & NVME_CC_REG_CSS_MASK) #define NVME_CC_GET_SHN(cc) \ ((cc) >> NVME_CC_REG_SHN_SHIFT & NVME_CC_REG_SHN_MASK) #define NVME_CC_GET_IOSQES(cc) \ ((cc) >> NVME_CC_REG_IOSQES_SHIFT & NVME_CC_REG_IOSQES_MASK) #define NVME_CC_GET_IOCQES(cc) \ ((cc) >> NVME_CC_REG_IOCQES_SHIFT & NVME_CC_REG_IOCQES_MASK) #define NVME_CC_WRITE_MASK \ ((NVME_CC_REG_EN_MASK << NVME_CC_REG_EN_SHIFT) | \ (NVME_CC_REG_IOSQES_MASK << NVME_CC_REG_IOSQES_SHIFT) | \ (NVME_CC_REG_IOCQES_MASK << NVME_CC_REG_IOCQES_SHIFT)) #define NVME_CC_NEN_WRITE_MASK \ ((NVME_CC_REG_CSS_MASK << NVME_CC_REG_CSS_SHIFT) | \ (NVME_CC_REG_MPS_MASK << NVME_CC_REG_MPS_SHIFT) | \ (NVME_CC_REG_AMS_MASK << NVME_CC_REG_AMS_SHIFT)) /* Controller Status utils */ #define NVME_CSTS_GET_RDY(sts) \ ((sts) >> NVME_CSTS_REG_RDY_SHIFT & NVME_CSTS_REG_RDY_MASK) #define NVME_CSTS_RDY (1 << NVME_CSTS_REG_RDY_SHIFT) /* Completion Queue status word utils */ #define NVME_STATUS_P (1 << NVME_STATUS_P_SHIFT) #define NVME_STATUS_MASK \ ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\ (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT)) #define NVME_ONCS_DSM (NVME_CTRLR_DATA_ONCS_DSM_MASK << \ NVME_CTRLR_DATA_ONCS_DSM_SHIFT) static void nvme_feature_invalid_cb(struct pci_nvme_softc *, struct nvme_feature_obj *, struct nvme_command *, struct nvme_completion *); static void nvme_feature_num_queues(struct pci_nvme_softc *, struct nvme_feature_obj *, struct nvme_command *, struct nvme_completion *); static void nvme_feature_iv_config(struct pci_nvme_softc *, struct nvme_feature_obj *, struct nvme_command *, struct nvme_completion *); static void *aen_thr(void *arg); static __inline void cpywithpad(char *dst, size_t dst_size, const char *src, char pad) { size_t len; len = strnlen(src, dst_size); memset(dst, pad, dst_size); memcpy(dst, src, len); } static __inline void pci_nvme_status_tc(uint16_t *status, uint16_t type, uint16_t code) { *status &= ~NVME_STATUS_MASK; *status |= (type & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT | (code & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; } static __inline void pci_nvme_status_genc(uint16_t *status, uint16_t code) { pci_nvme_status_tc(status, NVME_SCT_GENERIC, code); } /* * Initialize the requested number or IO Submission and Completion Queues. * Admin queues are allocated implicitly. */ static void pci_nvme_init_queues(struct pci_nvme_softc *sc, uint32_t nsq, uint32_t ncq) { uint32_t i; /* * Allocate and initialize the Submission Queues */ if (nsq > NVME_QUEUES) { WPRINTF("%s: clamping number of SQ from %u to %u", __func__, nsq, NVME_QUEUES); nsq = NVME_QUEUES; } sc->num_squeues = nsq; sc->submit_queues = calloc(sc->num_squeues + 1, sizeof(struct nvme_submission_queue)); if (sc->submit_queues == NULL) { WPRINTF("%s: SQ allocation failed", __func__); sc->num_squeues = 0; } else { struct nvme_submission_queue *sq = sc->submit_queues; for (i = 0; i < sc->num_squeues; i++) pthread_mutex_init(&sq[i].mtx, NULL); } /* * Allocate and initialize the Completion Queues */ if (ncq > NVME_QUEUES) { WPRINTF("%s: clamping number of CQ from %u to %u", __func__, ncq, NVME_QUEUES); ncq = NVME_QUEUES; } sc->num_cqueues = ncq; sc->compl_queues = calloc(sc->num_cqueues + 1, sizeof(struct nvme_completion_queue)); if (sc->compl_queues == NULL) { WPRINTF("%s: CQ allocation failed", __func__); sc->num_cqueues = 0; } else { struct nvme_completion_queue *cq = sc->compl_queues; for (i = 0; i < sc->num_cqueues; i++) pthread_mutex_init(&cq[i].mtx, NULL); } } static void pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) { struct nvme_controller_data *cd = &sc->ctrldata; cd->vid = 0xFB5D; cd->ssvid = 0x0000; cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' '); cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' '); /* Num of submission commands that we can handle at a time (2^rab) */ cd->rab = 4; /* FreeBSD OUI */ cd->ieee[0] = 0x58; cd->ieee[1] = 0x9c; cd->ieee[2] = 0xfc; cd->mic = 0; cd->mdts = NVME_MDTS; /* max data transfer size (2^mdts * CAP.MPSMIN) */ cd->ver = 0x00010300; cd->oacs = 1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT; cd->acl = 2; cd->aerl = 4; /* Advertise 1, Read-only firmware slot */ cd->frmw = NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK | (1 << NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT); cd->lpa = 0; /* TODO: support some simple things like SMART */ cd->elpe = 0; /* max error log page entries */ cd->npss = 1; /* number of power states support */ /* Warning Composite Temperature Threshold */ cd->wctemp = 0x0157; cd->sqes = (6 << NVME_CTRLR_DATA_SQES_MAX_SHIFT) | (6 << NVME_CTRLR_DATA_SQES_MIN_SHIFT); cd->cqes = (4 << NVME_CTRLR_DATA_CQES_MAX_SHIFT) | (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT); cd->nn = 1; /* number of namespaces */ cd->oncs = 0; switch (sc->dataset_management) { case NVME_DATASET_MANAGEMENT_AUTO: if (sc->nvstore.deallocate) cd->oncs |= NVME_ONCS_DSM; break; case NVME_DATASET_MANAGEMENT_ENABLE: cd->oncs |= NVME_ONCS_DSM; break; default: break; } cd->fna = 0x03; cd->power_state[0].mp = 10; } /* * Calculate the CRC-16 of the given buffer * See copyright attribution at top of file */ static uint16_t crc16(uint16_t crc, const void *buffer, unsigned int len) { const unsigned char *cp = buffer; /* CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1). */ static uint16_t const crc16_table[256] = { 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 }; while (len--) crc = (((crc >> 8) & 0xffU) ^ crc16_table[(crc ^ *cp++) & 0xffU]) & 0x0000ffffU; return crc; } static void pci_nvme_init_nsdata_size(struct pci_nvme_blockstore *nvstore, struct nvme_namespace_data *nd) { /* Get capacity and block size information from backing store */ nd->nsze = nvstore->size / nvstore->sectsz; nd->ncap = nd->nsze; nd->nuse = nd->nsze; } static void pci_nvme_init_nsdata(struct pci_nvme_softc *sc, struct nvme_namespace_data *nd, uint32_t nsid, struct pci_nvme_blockstore *nvstore) { pci_nvme_init_nsdata_size(nvstore, nd); if (nvstore->type == NVME_STOR_BLOCKIF) nvstore->deallocate = blockif_candelete(nvstore->ctx); nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */ nd->flbas = 0; /* Create an EUI-64 if user did not provide one */ if (nvstore->eui64 == 0) { char *data = NULL; uint64_t eui64 = nvstore->eui64; asprintf(&data, "%s%u%u%u", get_config_value("name"), sc->nsc_pi->pi_bus, sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); if (data != NULL) { eui64 = OUI_FREEBSD_NVME_LOW | crc16(0, data, strlen(data)); free(data); } nvstore->eui64 = (eui64 << 16) | (nsid & 0xffff); } be64enc(nd->eui64, nvstore->eui64); /* LBA data-sz = 2^lbads */ nd->lbaf[0] = nvstore->sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT; } static void pci_nvme_init_logpages(struct pci_nvme_softc *sc) { memset(&sc->err_log, 0, sizeof(sc->err_log)); memset(&sc->health_log, 0, sizeof(sc->health_log)); memset(&sc->fw_log, 0, sizeof(sc->fw_log)); memset(&sc->ns_log, 0, sizeof(sc->ns_log)); /* Set read/write remainder to round up according to spec */ sc->read_dunits_remainder = 999; sc->write_dunits_remainder = 999; /* Set nominal Health values checked by implementations */ sc->health_log.temperature = 310; sc->health_log.available_spare = 100; sc->health_log.available_spare_threshold = 10; } static void pci_nvme_init_features(struct pci_nvme_softc *sc) { sc->feat[0].set = nvme_feature_invalid_cb; sc->feat[0].get = nvme_feature_invalid_cb; sc->feat[NVME_FEAT_LBA_RANGE_TYPE].namespace_specific = true; sc->feat[NVME_FEAT_ERROR_RECOVERY].namespace_specific = true; sc->feat[NVME_FEAT_NUMBER_OF_QUEUES].set = nvme_feature_num_queues; sc->feat[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION].set = nvme_feature_iv_config; /* Enable all AENs by default */ sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11 = 0x31f; sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG].get = nvme_feature_invalid_cb; sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW].get = nvme_feature_invalid_cb; } static void pci_nvme_aer_reset(struct pci_nvme_softc *sc) { STAILQ_INIT(&sc->aer_list); sc->aer_count = 0; } static void pci_nvme_aer_init(struct pci_nvme_softc *sc) { pthread_mutex_init(&sc->aer_mtx, NULL); pci_nvme_aer_reset(sc); } static void pci_nvme_aer_destroy(struct pci_nvme_softc *sc) { struct pci_nvme_aer *aer = NULL; pthread_mutex_lock(&sc->aer_mtx); while (!STAILQ_EMPTY(&sc->aer_list)) { aer = STAILQ_FIRST(&sc->aer_list); STAILQ_REMOVE_HEAD(&sc->aer_list, link); free(aer); } pthread_mutex_unlock(&sc->aer_mtx); pci_nvme_aer_reset(sc); } static bool pci_nvme_aer_available(struct pci_nvme_softc *sc) { return (sc->aer_count != 0); } static bool pci_nvme_aer_limit_reached(struct pci_nvme_softc *sc) { struct nvme_controller_data *cd = &sc->ctrldata; /* AERL is a zero based value while aer_count is one's based */ return (sc->aer_count == (cd->aerl + 1)); } /* * Add an Async Event Request * * Stores an AER to be returned later if the Controller needs to notify the * host of an event. * Note that while the NVMe spec doesn't require Controllers to return AER's * in order, this implementation does preserve the order. */ static int pci_nvme_aer_add(struct pci_nvme_softc *sc, uint16_t cid) { struct pci_nvme_aer *aer = NULL; if (pci_nvme_aer_limit_reached(sc)) return (-1); aer = calloc(1, sizeof(struct pci_nvme_aer)); if (aer == NULL) return (-1); /* Save the Command ID for use in the completion message */ aer->cid = cid; pthread_mutex_lock(&sc->aer_mtx); sc->aer_count++; STAILQ_INSERT_TAIL(&sc->aer_list, aer, link); pthread_mutex_unlock(&sc->aer_mtx); return (0); } /* * Get an Async Event Request structure * * Returns a pointer to an AER previously submitted by the host or NULL if * no AER's exist. Caller is responsible for freeing the returned struct. */ static struct pci_nvme_aer * pci_nvme_aer_get(struct pci_nvme_softc *sc) { struct pci_nvme_aer *aer = NULL; pthread_mutex_lock(&sc->aer_mtx); aer = STAILQ_FIRST(&sc->aer_list); if (aer != NULL) { STAILQ_REMOVE_HEAD(&sc->aer_list, link); sc->aer_count--; } pthread_mutex_unlock(&sc->aer_mtx); - + return (aer); } static void pci_nvme_aen_reset(struct pci_nvme_softc *sc) { uint32_t atype; memset(sc->aen, 0, PCI_NVME_AE_TYPE_MAX * sizeof(struct pci_nvme_aen)); for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) { sc->aen[atype].atype = atype; } } static void pci_nvme_aen_init(struct pci_nvme_softc *sc) { char nstr[80]; pci_nvme_aen_reset(sc); pthread_mutex_init(&sc->aen_mtx, NULL); pthread_create(&sc->aen_tid, NULL, aen_thr, sc); snprintf(nstr, sizeof(nstr), "nvme-aen-%d:%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); pthread_set_name_np(sc->aen_tid, nstr); } static void pci_nvme_aen_destroy(struct pci_nvme_softc *sc) { pci_nvme_aen_reset(sc); } /* Notify the AEN thread of pending work */ static void pci_nvme_aen_notify(struct pci_nvme_softc *sc) { pthread_cond_signal(&sc->aen_cond); } /* * Post an Asynchronous Event Notification */ static int32_t pci_nvme_aen_post(struct pci_nvme_softc *sc, pci_nvme_async_type atype, uint32_t event_data) { struct pci_nvme_aen *aen; if (atype >= PCI_NVME_AE_TYPE_MAX) { return(EINVAL); } pthread_mutex_lock(&sc->aen_mtx); aen = &sc->aen[atype]; /* Has the controller already posted an event of this type? */ if (aen->posted) { pthread_mutex_unlock(&sc->aen_mtx); return(EALREADY); } aen->event_data = event_data; aen->posted = true; pthread_mutex_unlock(&sc->aen_mtx); pci_nvme_aen_notify(sc); return(0); } static void pci_nvme_aen_process(struct pci_nvme_softc *sc) { struct pci_nvme_aer *aer; struct pci_nvme_aen *aen; pci_nvme_async_type atype; uint32_t mask; uint16_t status; uint8_t lid; assert(pthread_mutex_isowned_np(&sc->aen_mtx)); for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) { aen = &sc->aen[atype]; /* Previous iterations may have depleted the available AER's */ if (!pci_nvme_aer_available(sc)) { DPRINTF("%s: no AER", __func__); break; } if (!aen->posted) { DPRINTF("%s: no AEN posted for atype=%#x", __func__, atype); continue; } status = NVME_SC_SUCCESS; /* Is the event masked? */ mask = sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11; DPRINTF("%s: atype=%#x mask=%#x event_data=%#x", __func__, atype, mask, aen->event_data); switch (atype) { case PCI_NVME_AE_TYPE_ERROR: lid = NVME_LOG_ERROR; break; case PCI_NVME_AE_TYPE_SMART: mask &= 0xff; if ((mask & aen->event_data) == 0) continue; lid = NVME_LOG_HEALTH_INFORMATION; break; case PCI_NVME_AE_TYPE_NOTICE: if (aen->event_data >= PCI_NVME_AE_INFO_MAX) { EPRINTLN("%s unknown AEN notice type %u", __func__, aen->event_data); status = NVME_SC_INTERNAL_DEVICE_ERROR; break; } mask >>= 8; if (((1 << aen->event_data) & mask) == 0) continue; switch (aen->event_data) { case PCI_NVME_AE_INFO_NS_ATTR_CHANGED: lid = NVME_LOG_CHANGED_NAMESPACE; break; case PCI_NVME_AE_INFO_FW_ACTIVATION: lid = NVME_LOG_FIRMWARE_SLOT; break; case PCI_NVME_AE_INFO_TELEMETRY_CHANGE: lid = NVME_LOG_TELEMETRY_CONTROLLER_INITIATED; break; case PCI_NVME_AE_INFO_ANA_CHANGE: lid = NVME_LOG_ASYMMETRIC_NAMESPAVE_ACCESS; //TODO spelling break; case PCI_NVME_AE_INFO_PREDICT_LATENCY_CHANGE: lid = NVME_LOG_PREDICTABLE_LATENCY_EVENT_AGGREGATE; break; case PCI_NVME_AE_INFO_LBA_STATUS_ALERT: lid = NVME_LOG_LBA_STATUS_INFORMATION; break; case PCI_NVME_AE_INFO_ENDURANCE_GROUP_CHANGE: lid = NVME_LOG_ENDURANCE_GROUP_EVENT_AGGREGATE; break; default: lid = 0; } break; default: /* bad type?!? */ EPRINTLN("%s unknown AEN type %u", __func__, atype); status = NVME_SC_INTERNAL_DEVICE_ERROR; break; } aer = pci_nvme_aer_get(sc); assert(aer != NULL); DPRINTF("%s: CID=%#x CDW0=%#x", __func__, aer->cid, (lid << 16) | (aen->event_data << 8) | atype); pci_nvme_cq_update(sc, &sc->compl_queues[0], (lid << 16) | (aen->event_data << 8) | atype, /* cdw0 */ aer->cid, 0, /* SQID */ status); aen->event_data = 0; aen->posted = false; pci_generate_msix(sc->nsc_pi, 0); } } static void * aen_thr(void *arg) { struct pci_nvme_softc *sc; sc = arg; pthread_mutex_lock(&sc->aen_mtx); for (;;) { pci_nvme_aen_process(sc); pthread_cond_wait(&sc->aen_cond, &sc->aen_mtx); } pthread_mutex_unlock(&sc->aen_mtx); pthread_exit(NULL); return (NULL); } static void pci_nvme_reset_locked(struct pci_nvme_softc *sc) { uint32_t i; DPRINTF("%s", __func__); sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | (1 << NVME_CAP_LO_REG_CQR_SHIFT) | (60 << NVME_CAP_LO_REG_TO_SHIFT); sc->regs.cap_hi = 1 << NVME_CAP_HI_REG_CSS_NVM_SHIFT; sc->regs.vs = 0x00010300; /* NVMe v1.3 */ sc->regs.cc = 0; sc->regs.csts = 0; assert(sc->submit_queues != NULL); for (i = 0; i < sc->num_squeues + 1; i++) { sc->submit_queues[i].qbase = NULL; sc->submit_queues[i].size = 0; sc->submit_queues[i].cqid = 0; sc->submit_queues[i].tail = 0; sc->submit_queues[i].head = 0; } assert(sc->compl_queues != NULL); for (i = 0; i < sc->num_cqueues + 1; i++) { sc->compl_queues[i].qbase = NULL; sc->compl_queues[i].size = 0; sc->compl_queues[i].tail = 0; sc->compl_queues[i].head = 0; } sc->num_q_is_set = false; pci_nvme_aer_destroy(sc); pci_nvme_aen_destroy(sc); } static void pci_nvme_reset(struct pci_nvme_softc *sc) { pthread_mutex_lock(&sc->mtx); pci_nvme_reset_locked(sc); pthread_mutex_unlock(&sc->mtx); } static void pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) { uint16_t acqs, asqs; DPRINTF("%s", __func__); asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; sc->submit_queues[0].size = asqs; sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, sizeof(struct nvme_command) * asqs); DPRINTF("%s mapping Admin-SQ guest 0x%lx, host: %p", __func__, sc->regs.asq, sc->submit_queues[0].qbase); - acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & + acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & NVME_AQA_REG_ACQS_MASK) + 1; sc->compl_queues[0].size = acqs; sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq, sizeof(struct nvme_completion) * acqs); sc->compl_queues[0].intr_en = NVME_CQ_INTEN; DPRINTF("%s mapping Admin-CQ guest 0x%lx, host: %p", __func__, sc->regs.acq, sc->compl_queues[0].qbase); } static int nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *b, size_t len, enum nvme_copy_dir dir) { uint8_t *p; size_t bytes; if (len > (8 * 1024)) { return (-1); } /* Copy from the start of prp1 to the end of the physical page */ bytes = PAGE_SIZE - (prp1 & PAGE_MASK); bytes = MIN(bytes, len); p = vm_map_gpa(ctx, prp1, bytes); if (p == NULL) { return (-1); } if (dir == NVME_COPY_TO_PRP) memcpy(p, b, bytes); else memcpy(b, p, bytes); b += bytes; len -= bytes; if (len == 0) { return (0); } len = MIN(len, PAGE_SIZE); p = vm_map_gpa(ctx, prp2, len); if (p == NULL) { return (-1); } if (dir == NVME_COPY_TO_PRP) memcpy(p, b, len); else memcpy(b, p, len); return (0); } /* * Write a Completion Queue Entry update * * Write the completion and update the doorbell value */ static void pci_nvme_cq_update(struct pci_nvme_softc *sc, struct nvme_completion_queue *cq, uint32_t cdw0, uint16_t cid, uint16_t sqid, uint16_t status) { struct nvme_submission_queue *sq = &sc->submit_queues[sqid]; struct nvme_completion *cqe; assert(cq->qbase != NULL); pthread_mutex_lock(&cq->mtx); cqe = &cq->qbase[cq->tail]; /* Flip the phase bit */ status |= (cqe->status ^ NVME_STATUS_P) & NVME_STATUS_P_MASK; cqe->cdw0 = cdw0; cqe->sqhd = sq->head; cqe->sqid = sqid; cqe->cid = cid; cqe->status = status; cq->tail++; if (cq->tail >= cq->size) { cq->tail = 0; } pthread_mutex_unlock(&cq->mtx); } static int nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint16_t qid = command->cdw10 & 0xffff; DPRINTF("%s DELETE_IO_SQ %u", __func__, qid); if (qid == 0 || qid > sc->num_squeues || (sc->submit_queues[qid].qbase == NULL)) { WPRINTF("%s NOT PERMITTED queue id %u / num_squeues %u", __func__, qid, sc->num_squeues); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } sc->submit_queues[qid].qbase = NULL; sc->submit_queues[qid].cqid = 0; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { if (command->cdw11 & NVME_CMD_CDW11_PC) { uint16_t qid = command->cdw10 & 0xffff; struct nvme_submission_queue *nsq; if ((qid == 0) || (qid > sc->num_squeues) || (sc->submit_queues[qid].qbase != NULL)) { WPRINTF("%s queue index %u > num_squeues %u", __func__, qid, sc->num_squeues); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } nsq = &sc->submit_queues[qid]; nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); DPRINTF("%s size=%u (max=%u)", __func__, nsq->size, sc->max_qentries); if ((nsq->size < 2) || (nsq->size > sc->max_qentries)) { /* * Queues must specify at least two entries * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec */ pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED); return (1); } nsq->head = nsq->tail = 0; nsq->cqid = (command->cdw11 >> 16) & 0xffff; if ((nsq->cqid == 0) || (nsq->cqid > sc->num_cqueues)) { pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } if (sc->compl_queues[nsq->cqid].qbase == NULL) { pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_COMPLETION_QUEUE_INVALID); return (1); } nsq->qpriority = (command->cdw11 >> 1) & 0x03; nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(struct nvme_command) * (size_t)nsq->size); DPRINTF("%s sq %u size %u gaddr %p cqid %u", __func__, qid, nsq->size, nsq->qbase, nsq->cqid); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); DPRINTF("%s completed creating IOSQ qid %u", __func__, qid); } else { - /* + /* * Guest sent non-cont submission queue request. * This setting is unsupported by this emulation. */ WPRINTF("%s unsupported non-contig (list-based) " "create i/o submission queue", __func__); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); } return (1); } static int nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint16_t qid = command->cdw10 & 0xffff; uint16_t sqid; DPRINTF("%s DELETE_IO_CQ %u", __func__, qid); if (qid == 0 || qid > sc->num_cqueues || (sc->compl_queues[qid].qbase == NULL)) { WPRINTF("%s queue index %u / num_cqueues %u", __func__, qid, sc->num_cqueues); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } /* Deleting an Active CQ is an error */ for (sqid = 1; sqid < sc->num_squeues + 1; sqid++) if (sc->submit_queues[sqid].cqid == qid) { pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_DELETION); return (1); } sc->compl_queues[qid].qbase = NULL; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { struct nvme_completion_queue *ncq; uint16_t qid = command->cdw10 & 0xffff; /* Only support Physically Contiguous queues */ if ((command->cdw11 & NVME_CMD_CDW11_PC) == 0) { WPRINTF("%s unsupported non-contig (list-based) " "create i/o completion queue", __func__); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } if ((qid == 0) || (qid > sc->num_cqueues) || (sc->compl_queues[qid].qbase != NULL)) { WPRINTF("%s queue index %u > num_cqueues %u", __func__, qid, sc->num_cqueues); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } ncq = &sc->compl_queues[qid]; ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; if (ncq->intr_vec > (sc->max_queues + 1)) { pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_INTERRUPT_VECTOR); return (1); } ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); if ((ncq->size < 2) || (ncq->size > sc->max_qentries)) { /* * Queues must specify at least two entries * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec */ pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED); return (1); } ncq->head = ncq->tail = 0; ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(struct nvme_command) * (size_t)ncq->size); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint32_t logsize; uint8_t logpage = command->cdw10 & 0xFF; DPRINTF("%s log page %u len %u", __func__, logpage, logsize); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); /* * Command specifies the number of dwords to return in fields NUMDU * and NUMDL. This is a zero-based value. */ logsize = ((command->cdw11 << 16) | (command->cdw10 >> 16)) + 1; logsize *= sizeof(uint32_t); switch (logpage) { case NVME_LOG_ERROR: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->err_log, MIN(logsize, sizeof(sc->err_log)), NVME_COPY_TO_PRP); break; case NVME_LOG_HEALTH_INFORMATION: pthread_mutex_lock(&sc->mtx); memcpy(&sc->health_log.data_units_read, &sc->read_data_units, sizeof(sc->health_log.data_units_read)); memcpy(&sc->health_log.data_units_written, &sc->write_data_units, sizeof(sc->health_log.data_units_written)); memcpy(&sc->health_log.host_read_commands, &sc->read_commands, sizeof(sc->health_log.host_read_commands)); memcpy(&sc->health_log.host_write_commands, &sc->write_commands, sizeof(sc->health_log.host_write_commands)); pthread_mutex_unlock(&sc->mtx); nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->health_log, MIN(logsize, sizeof(sc->health_log)), NVME_COPY_TO_PRP); break; case NVME_LOG_FIRMWARE_SLOT: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->fw_log, MIN(logsize, sizeof(sc->fw_log)), NVME_COPY_TO_PRP); break; case NVME_LOG_CHANGED_NAMESPACE: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->ns_log, MIN(logsize, sizeof(sc->ns_log)), NVME_COPY_TO_PRP); memset(&sc->ns_log, 0, sizeof(sc->ns_log)); break; default: DPRINTF("%s get log page %x command not supported", __func__, logpage); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_LOG_PAGE); } return (1); } static int nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { void *dest; uint16_t status; DPRINTF("%s identify 0x%x nsid 0x%x", __func__, command->cdw10 & 0xFF, command->nsid); pci_nvme_status_genc(&status, NVME_SC_SUCCESS); switch (command->cdw10 & 0xFF) { case 0x00: /* return Identify Namespace data structure */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata), NVME_COPY_TO_PRP); break; case 0x01: /* return Identify Controller data structure */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->ctrldata, sizeof(sc->ctrldata), NVME_COPY_TO_PRP); break; case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(uint32_t) * 1024); /* All unused entries shall be zero */ bzero(dest, sizeof(uint32_t) * 1024); ((uint32_t *)dest)[0] = 1; break; case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */ if (command->nsid != 1) { pci_nvme_status_genc(&status, NVME_SC_INVALID_NAMESPACE_OR_FORMAT); break; } dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(uint32_t) * 1024); /* All bytes after the descriptor shall be zero */ bzero(dest, sizeof(uint32_t) * 1024); /* Return NIDT=1 (i.e. EUI64) descriptor */ ((uint8_t *)dest)[0] = 1; ((uint8_t *)dest)[1] = sizeof(uint64_t); bcopy(sc->nsdata.eui64, ((uint8_t *)dest) + 4, sizeof(uint64_t)); break; default: DPRINTF("%s unsupported identify command requested 0x%x", __func__, command->cdw10 & 0xFF); pci_nvme_status_genc(&status, NVME_SC_INVALID_FIELD); break; } compl->status = status; return (1); } static const char * nvme_fid_to_name(uint8_t fid) { const char *name; switch (fid) { case NVME_FEAT_ARBITRATION: name = "Arbitration"; break; case NVME_FEAT_POWER_MANAGEMENT: name = "Power Management"; break; case NVME_FEAT_LBA_RANGE_TYPE: name = "LBA Range Type"; break; case NVME_FEAT_TEMPERATURE_THRESHOLD: name = "Temperature Threshold"; break; case NVME_FEAT_ERROR_RECOVERY: name = "Error Recovery"; break; case NVME_FEAT_VOLATILE_WRITE_CACHE: name = "Volatile Write Cache"; break; case NVME_FEAT_NUMBER_OF_QUEUES: name = "Number of Queues"; break; case NVME_FEAT_INTERRUPT_COALESCING: name = "Interrupt Coalescing"; break; case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: name = "Interrupt Vector Configuration"; break; case NVME_FEAT_WRITE_ATOMICITY: name = "Write Atomicity Normal"; break; case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: name = "Asynchronous Event Configuration"; break; case NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: name = "Autonomous Power State Transition"; break; case NVME_FEAT_HOST_MEMORY_BUFFER: name = "Host Memory Buffer"; break; case NVME_FEAT_TIMESTAMP: name = "Timestamp"; break; case NVME_FEAT_KEEP_ALIVE_TIMER: name = "Keep Alive Timer"; break; case NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT: name = "Host Controlled Thermal Management"; break; case NVME_FEAT_NON_OP_POWER_STATE_CONFIG: name = "Non-Operation Power State Config"; break; case NVME_FEAT_READ_RECOVERY_LEVEL_CONFIG: name = "Read Recovery Level Config"; break; case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG: name = "Predictable Latency Mode Config"; break; case NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW: name = "Predictable Latency Mode Window"; break; case NVME_FEAT_LBA_STATUS_INFORMATION_ATTRIBUTES: name = "LBA Status Information Report Interval"; break; case NVME_FEAT_HOST_BEHAVIOR_SUPPORT: name = "Host Behavior Support"; break; case NVME_FEAT_SANITIZE_CONFIG: name = "Sanitize Config"; break; case NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION: name = "Endurance Group Event Configuration"; break; case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: name = "Software Progress Marker"; break; case NVME_FEAT_HOST_IDENTIFIER: name = "Host Identifier"; break; case NVME_FEAT_RESERVATION_NOTIFICATION_MASK: name = "Reservation Notification Mask"; break; case NVME_FEAT_RESERVATION_PERSISTENCE: name = "Reservation Persistence"; break; case NVME_FEAT_NAMESPACE_WRITE_PROTECTION_CONFIG: name = "Namespace Write Protection Config"; break; default: name = "Unknown"; break; } return (name); } static void nvme_feature_invalid_cb(struct pci_nvme_softc *sc, struct nvme_feature_obj *feat, struct nvme_command *command, struct nvme_completion *compl) { pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); } static void nvme_feature_iv_config(struct pci_nvme_softc *sc, struct nvme_feature_obj *feat, struct nvme_command *command, struct nvme_completion *compl) { uint32_t i; uint32_t cdw11 = command->cdw11; uint16_t iv; bool cd; pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); iv = cdw11 & 0xffff; cd = cdw11 & (1 << 16); if (iv > (sc->max_queues + 1)) { return; } /* No Interrupt Coalescing (i.e. not Coalescing Disable) for Admin Q */ if ((iv == 0) && !cd) return; /* Requested Interrupt Vector must be used by a CQ */ for (i = 0; i < sc->num_cqueues + 1; i++) { if (sc->compl_queues[i].intr_vec == iv) { pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); } } } static void nvme_feature_num_queues(struct pci_nvme_softc *sc, struct nvme_feature_obj *feat, struct nvme_command *command, struct nvme_completion *compl) { uint16_t nqr; /* Number of Queues Requested */ if (sc->num_q_is_set) { WPRINTF("%s: Number of Queues already set", __func__); pci_nvme_status_genc(&compl->status, NVME_SC_COMMAND_SEQUENCE_ERROR); return; } nqr = command->cdw11 & 0xFFFF; if (nqr == 0xffff) { WPRINTF("%s: Illegal NSQR value %#x", __func__, nqr); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return; } sc->num_squeues = ONE_BASED(nqr); if (sc->num_squeues > sc->max_queues) { DPRINTF("NSQR=%u is greater than max %u", sc->num_squeues, sc->max_queues); sc->num_squeues = sc->max_queues; } nqr = (command->cdw11 >> 16) & 0xFFFF; if (nqr == 0xffff) { WPRINTF("%s: Illegal NCQR value %#x", __func__, nqr); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return; } sc->num_cqueues = ONE_BASED(nqr); if (sc->num_cqueues > sc->max_queues) { DPRINTF("NCQR=%u is greater than max %u", sc->num_cqueues, sc->max_queues); sc->num_cqueues = sc->max_queues; } /* Patch the command value which will be saved on callback's return */ command->cdw11 = NVME_FEATURE_NUM_QUEUES(sc); compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); sc->num_q_is_set = true; } static int nvme_opc_set_features(struct pci_nvme_softc *sc, struct nvme_command *command, struct nvme_completion *compl) { struct nvme_feature_obj *feat; uint32_t nsid = command->nsid; uint8_t fid = command->cdw10 & 0xFF; DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid)); if (fid >= NVME_FID_MAX) { DPRINTF("%s invalid feature 0x%x", __func__, fid); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } feat = &sc->feat[fid]; if (!feat->namespace_specific && !((nsid == 0) || (nsid == NVME_GLOBAL_NAMESPACE_TAG))) { pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_FEATURE_NOT_NS_SPECIFIC); return (1); } compl->cdw0 = 0; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); if (feat->set) feat->set(sc, feat, command, compl); DPRINTF("%s: status=%#x cdw11=%#x", __func__, compl->status, command->cdw11); if (compl->status == NVME_SC_SUCCESS) { feat->cdw11 = command->cdw11; if ((fid == NVME_FEAT_ASYNC_EVENT_CONFIGURATION) && (command->cdw11 != 0)) pci_nvme_aen_notify(sc); } return (0); } static int nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { struct nvme_feature_obj *feat; uint8_t fid = command->cdw10 & 0xFF; DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid)); if (fid >= NVME_FID_MAX) { DPRINTF("%s invalid feature 0x%x", __func__, fid); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } compl->cdw0 = 0; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); feat = &sc->feat[fid]; if (feat->get) { feat->get(sc, feat, command, compl); } if (compl->status == NVME_SC_SUCCESS) { compl->cdw0 = feat->cdw11; } return (0); } static int nvme_opc_format_nvm(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint8_t ses, lbaf, pi; /* Only supports Secure Erase Setting - User Data Erase */ ses = (command->cdw10 >> 9) & 0x7; if (ses > 0x1) { pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } /* Only supports a single LBA Format */ lbaf = command->cdw10 & 0xf; if (lbaf != 0) { pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_FORMAT); return (1); } /* Doesn't support Protection Infomation */ pi = (command->cdw10 >> 5) & 0x7; if (pi != 0) { pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } if (sc->nvstore.type == NVME_STOR_RAM) { if (sc->nvstore.ctx) free(sc->nvstore.ctx); sc->nvstore.ctx = calloc(1, sc->nvstore.size); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); } else { struct pci_nvme_ioreq *req; int err; req = pci_nvme_get_ioreq(sc); if (req == NULL) { pci_nvme_status_genc(&compl->status, NVME_SC_INTERNAL_DEVICE_ERROR); WPRINTF("%s: unable to allocate IO req", __func__); return (1); } req->nvme_sq = &sc->submit_queues[0]; req->sqid = 0; req->opc = command->opc; req->cid = command->cid; req->nsid = command->nsid; req->io_req.br_offset = 0; req->io_req.br_resid = sc->nvstore.size; req->io_req.br_callback = pci_nvme_io_done; err = blockif_delete(sc->nvstore.ctx, &req->io_req); if (err) { pci_nvme_status_genc(&compl->status, NVME_SC_INTERNAL_DEVICE_ERROR); pci_nvme_release_ioreq(sc, req); } } return (1); } static int nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { DPRINTF("%s submission queue %u, command ID 0x%x", __func__, command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF); /* TODO: search for the command ID and abort it */ compl->cdw0 = 1; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } static int nvme_opc_async_event_req(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { DPRINTF("%s async event request count=%u aerl=%u cid=%#x", __func__, sc->aer_count, sc->ctrldata.aerl, command->cid); /* Don't exceed the Async Event Request Limit (AERL). */ if (pci_nvme_aer_limit_reached(sc)) { pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); return (1); } if (pci_nvme_aer_add(sc, command->cid)) { pci_nvme_status_tc(&compl->status, NVME_SCT_GENERIC, NVME_SC_INTERNAL_DEVICE_ERROR); return (1); } /* * Raise events when they happen based on the Set Features cmd. * These events happen async, so only set completion successful if * there is an event reflective of the request to get event. */ compl->status = NVME_NO_STATUS; pci_nvme_aen_notify(sc); return (0); } static void pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) { struct nvme_completion compl; struct nvme_command *cmd; struct nvme_submission_queue *sq; struct nvme_completion_queue *cq; uint16_t sqhead; DPRINTF("%s index %u", __func__, (uint32_t)value); sq = &sc->submit_queues[0]; cq = &sc->compl_queues[0]; pthread_mutex_lock(&sq->mtx); sqhead = sq->head; DPRINTF("sqhead %u, tail %u", sqhead, sq->tail); - + while (sqhead != atomic_load_acq_short(&sq->tail)) { cmd = &(sq->qbase)[sqhead]; compl.cdw0 = 0; compl.status = 0; switch (cmd->opc) { case NVME_OPC_DELETE_IO_SQ: DPRINTF("%s command DELETE_IO_SQ", __func__); nvme_opc_delete_io_sq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_SQ: DPRINTF("%s command CREATE_IO_SQ", __func__); nvme_opc_create_io_sq(sc, cmd, &compl); break; case NVME_OPC_DELETE_IO_CQ: DPRINTF("%s command DELETE_IO_CQ", __func__); nvme_opc_delete_io_cq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_CQ: DPRINTF("%s command CREATE_IO_CQ", __func__); nvme_opc_create_io_cq(sc, cmd, &compl); break; case NVME_OPC_GET_LOG_PAGE: DPRINTF("%s command GET_LOG_PAGE", __func__); nvme_opc_get_log_page(sc, cmd, &compl); break; case NVME_OPC_IDENTIFY: DPRINTF("%s command IDENTIFY", __func__); nvme_opc_identify(sc, cmd, &compl); break; case NVME_OPC_ABORT: DPRINTF("%s command ABORT", __func__); nvme_opc_abort(sc, cmd, &compl); break; case NVME_OPC_SET_FEATURES: DPRINTF("%s command SET_FEATURES", __func__); nvme_opc_set_features(sc, cmd, &compl); break; case NVME_OPC_GET_FEATURES: DPRINTF("%s command GET_FEATURES", __func__); nvme_opc_get_features(sc, cmd, &compl); break; case NVME_OPC_FIRMWARE_ACTIVATE: DPRINTF("%s command FIRMWARE_ACTIVATE", __func__); pci_nvme_status_tc(&compl.status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_FIRMWARE_SLOT); break; case NVME_OPC_ASYNC_EVENT_REQUEST: DPRINTF("%s command ASYNC_EVENT_REQ", __func__); nvme_opc_async_event_req(sc, cmd, &compl); break; case NVME_OPC_FORMAT_NVM: DPRINTF("%s command FORMAT_NVM", __func__); if ((sc->ctrldata.oacs & (1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT)) == 0) { pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); } compl.status = NVME_NO_STATUS; nvme_opc_format_nvm(sc, cmd, &compl); break; default: DPRINTF("0x%x command is not implemented", cmd->opc); pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); } sqhead = (sqhead + 1) % sq->size; if (NVME_COMPLETION_VALID(compl)) { pci_nvme_cq_update(sc, &sc->compl_queues[0], compl.cdw0, cmd->cid, 0, /* SQID */ compl.status); } } DPRINTF("setting sqhead %u", sqhead); sq->head = sqhead; if (cq->head != cq->tail) pci_generate_msix(sc->nsc_pi, 0); pthread_mutex_unlock(&sq->mtx); } /* * Update the Write and Read statistics reported in SMART data * * NVMe defines "data unit" as thousand's of 512 byte blocks and is rounded up. * E.g. 1 data unit is 1 - 1,000 512 byte blocks. 3 data units are 2,001 - 3,000 * 512 byte blocks. Rounding up is acheived by initializing the remainder to 999. */ static void pci_nvme_stats_write_read_update(struct pci_nvme_softc *sc, uint8_t opc, size_t bytes, uint16_t status) { pthread_mutex_lock(&sc->mtx); switch (opc) { case NVME_OPC_WRITE: sc->write_commands++; if (status != NVME_SC_SUCCESS) break; sc->write_dunits_remainder += (bytes / 512); while (sc->write_dunits_remainder >= 1000) { sc->write_data_units++; sc->write_dunits_remainder -= 1000; } break; case NVME_OPC_READ: sc->read_commands++; if (status != NVME_SC_SUCCESS) break; sc->read_dunits_remainder += (bytes / 512); while (sc->read_dunits_remainder >= 1000) { sc->read_data_units++; sc->read_dunits_remainder -= 1000; } break; default: DPRINTF("%s: Invalid OPC 0x%02x for stats", __func__, opc); break; } pthread_mutex_unlock(&sc->mtx); } /* * Check if the combination of Starting LBA (slba) and Number of Logical * Blocks (nlb) exceeds the range of the underlying storage. * * Because NVMe specifies the SLBA in blocks as a uint64_t and blockif stores * the capacity in bytes as a uint64_t, care must be taken to avoid integer * overflow. */ static bool pci_nvme_out_of_range(struct pci_nvme_blockstore *nvstore, uint64_t slba, uint32_t nlb) { size_t offset, bytes; /* Overflow check of multiplying Starting LBA by the sector size */ if (slba >> (64 - nvstore->sectsz_bits)) return (true); offset = slba << nvstore->sectsz_bits; bytes = nlb << nvstore->sectsz_bits; /* Overflow check of Number of Logical Blocks */ if ((nvstore->size - offset) < bytes) return (true); return (false); } static int pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, uint64_t gpaddr, size_t size, int do_write, uint64_t lba) { int iovidx; if (req == NULL) return (-1); if (req->io_req.br_iovcnt == NVME_MAX_IOVEC) { return (-1); } /* concatenate contig block-iovs to minimize number of iovs */ if ((req->prev_gpaddr + req->prev_size) == gpaddr) { iovidx = req->io_req.br_iovcnt - 1; req->io_req.br_iov[iovidx].iov_base = paddr_guest2host(req->sc->nsc_pi->pi_vmctx, req->prev_gpaddr, size); req->prev_size += size; req->io_req.br_resid += size; req->io_req.br_iov[iovidx].iov_len = req->prev_size; } else { iovidx = req->io_req.br_iovcnt; if (iovidx == 0) { req->io_req.br_offset = lba; req->io_req.br_resid = 0; req->io_req.br_param = req; } req->io_req.br_iov[iovidx].iov_base = paddr_guest2host(req->sc->nsc_pi->pi_vmctx, gpaddr, size); req->io_req.br_iov[iovidx].iov_len = size; req->prev_gpaddr = gpaddr; req->prev_size = size; req->io_req.br_resid += size; req->io_req.br_iovcnt++; } return (0); } static void pci_nvme_set_completion(struct pci_nvme_softc *sc, struct nvme_submission_queue *sq, int sqid, uint16_t cid, uint32_t cdw0, uint16_t status) { struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; DPRINTF("%s sqid %d cqid %u cid %u status: 0x%x 0x%x", __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status), NVME_STATUS_GET_SC(status)); pci_nvme_cq_update(sc, cq, 0, /* CDW0 */ cid, sqid, status); if (cq->head != cq->tail) { if (cq->intr_en & NVME_CQ_INTEN) { pci_generate_msix(sc->nsc_pi, cq->intr_vec); } else { DPRINTF("%s: CQ%u interrupt disabled", __func__, sq->cqid); } } } static void pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req) { req->sc = NULL; req->nvme_sq = NULL; req->sqid = 0; pthread_mutex_lock(&sc->mtx); STAILQ_INSERT_TAIL(&sc->ioreqs_free, req, link); sc->pending_ios--; /* when no more IO pending, can set to ready if device reset/enabled */ if (sc->pending_ios == 0 && NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts))) sc->regs.csts |= NVME_CSTS_RDY; pthread_mutex_unlock(&sc->mtx); sem_post(&sc->iosemlock); } static struct pci_nvme_ioreq * pci_nvme_get_ioreq(struct pci_nvme_softc *sc) { struct pci_nvme_ioreq *req = NULL; sem_wait(&sc->iosemlock); pthread_mutex_lock(&sc->mtx); req = STAILQ_FIRST(&sc->ioreqs_free); assert(req != NULL); STAILQ_REMOVE_HEAD(&sc->ioreqs_free, link); req->sc = sc; sc->pending_ios++; pthread_mutex_unlock(&sc->mtx); req->io_req.br_iovcnt = 0; req->io_req.br_offset = 0; req->io_req.br_resid = 0; req->io_req.br_param = req; req->prev_gpaddr = 0; req->prev_size = 0; return req; } static void pci_nvme_io_done(struct blockif_req *br, int err) { struct pci_nvme_ioreq *req = br->br_param; struct nvme_submission_queue *sq = req->nvme_sq; uint16_t code, status; DPRINTF("%s error %d %s", __func__, err, strerror(err)); /* TODO return correct error */ code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status); pci_nvme_stats_write_read_update(req->sc, req->opc, req->bytes, status); pci_nvme_release_ioreq(req->sc, req); } /* * Implements the Flush command. The specification states: * If a volatile write cache is not present, Flush commands complete * successfully and have no effect * in the description of the Volatile Write Cache (VWC) field of the Identify * Controller data. Therefore, set status to Success if the command is * not supported (i.e. RAM or as indicated by the blockif). */ static bool nvme_opc_flush(struct pci_nvme_softc *sc, struct nvme_command *cmd, struct pci_nvme_blockstore *nvstore, struct pci_nvme_ioreq *req, uint16_t *status) { bool pending = false; if (nvstore->type == NVME_STOR_RAM) { pci_nvme_status_genc(status, NVME_SC_SUCCESS); } else { int err; req->io_req.br_callback = pci_nvme_io_done; err = blockif_flush(nvstore->ctx, &req->io_req); switch (err) { case 0: pending = true; break; case EOPNOTSUPP: pci_nvme_status_genc(status, NVME_SC_SUCCESS); break; default: pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); } } return (pending); } static uint16_t nvme_write_read_ram(struct pci_nvme_softc *sc, struct pci_nvme_blockstore *nvstore, uint64_t prp1, uint64_t prp2, size_t offset, uint64_t bytes, bool is_write) { uint8_t *buf = nvstore->ctx; enum nvme_copy_dir dir; uint16_t status; if (is_write) dir = NVME_COPY_TO_PRP; else dir = NVME_COPY_FROM_PRP; if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, prp1, prp2, buf + offset, bytes, dir)) pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); else pci_nvme_status_genc(&status, NVME_SC_SUCCESS); return (status); } static uint16_t nvme_write_read_blockif(struct pci_nvme_softc *sc, struct pci_nvme_blockstore *nvstore, struct pci_nvme_ioreq *req, uint64_t prp1, uint64_t prp2, size_t offset, uint64_t bytes, bool is_write) { uint64_t size; int err; uint16_t status = NVME_NO_STATUS; size = MIN(PAGE_SIZE - (prp1 % PAGE_SIZE), bytes); if (pci_nvme_append_iov_req(sc, req, prp1, size, is_write, offset)) { pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); goto out; } offset += size; bytes -= size; if (bytes == 0) { ; } else if (bytes <= PAGE_SIZE) { size = bytes; if (pci_nvme_append_iov_req(sc, req, prp2, size, is_write, offset)) { pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); goto out; } } else { void *vmctx = sc->nsc_pi->pi_vmctx; uint64_t *prp_list = &prp2; uint64_t *last = prp_list; /* PRP2 is pointer to a physical region page list */ while (bytes) { /* Last entry in list points to the next list */ if ((prp_list == last) && (bytes > PAGE_SIZE)) { uint64_t prp = *prp_list; prp_list = paddr_guest2host(vmctx, prp, PAGE_SIZE - (prp % PAGE_SIZE)); last = prp_list + (NVME_PRP2_ITEMS - 1); } size = MIN(bytes, PAGE_SIZE); if (pci_nvme_append_iov_req(sc, req, *prp_list, size, is_write, offset)) { pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); goto out; } offset += size; bytes -= size; prp_list++; } } req->io_req.br_callback = pci_nvme_io_done; if (is_write) err = blockif_write(nvstore->ctx, &req->io_req); else err = blockif_read(nvstore->ctx, &req->io_req); if (err) pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); out: return (status); } static bool nvme_opc_write_read(struct pci_nvme_softc *sc, struct nvme_command *cmd, struct pci_nvme_blockstore *nvstore, struct pci_nvme_ioreq *req, uint16_t *status) { uint64_t lba, nblocks, bytes; size_t offset; bool is_write = cmd->opc == NVME_OPC_WRITE; bool pending = false; lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; nblocks = (cmd->cdw12 & 0xFFFF) + 1; if (pci_nvme_out_of_range(nvstore, lba, nblocks)) { WPRINTF("%s command would exceed LBA range", __func__); pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE); goto out; } bytes = nblocks << nvstore->sectsz_bits; if (bytes > NVME_MAX_DATA_SIZE) { WPRINTF("%s command would exceed MDTS", __func__); pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD); goto out; } offset = lba << nvstore->sectsz_bits; req->bytes = bytes; req->io_req.br_offset = lba; /* PRP bits 1:0 must be zero */ cmd->prp1 &= ~0x3UL; cmd->prp2 &= ~0x3UL; if (nvstore->type == NVME_STOR_RAM) { *status = nvme_write_read_ram(sc, nvstore, cmd->prp1, cmd->prp2, offset, bytes, is_write); } else { *status = nvme_write_read_blockif(sc, nvstore, req, cmd->prp1, cmd->prp2, offset, bytes, is_write); if (*status == NVME_NO_STATUS) pending = true; } out: if (!pending) pci_nvme_stats_write_read_update(sc, cmd->opc, bytes, *status); return (pending); } static void pci_nvme_dealloc_sm(struct blockif_req *br, int err) { struct pci_nvme_ioreq *req = br->br_param; struct pci_nvme_softc *sc = req->sc; bool done = true; uint16_t status; if (err) { pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR); } else if ((req->prev_gpaddr + 1) == (req->prev_size)) { pci_nvme_status_genc(&status, NVME_SC_SUCCESS); } else { struct iovec *iov = req->io_req.br_iov; req->prev_gpaddr++; iov += req->prev_gpaddr; /* The iov_* values already include the sector size */ req->io_req.br_offset = (off_t)iov->iov_base; req->io_req.br_resid = iov->iov_len; if (blockif_delete(sc->nvstore.ctx, &req->io_req)) { pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR); } else done = false; } if (done) { pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, req->cid, 0, status); pci_nvme_release_ioreq(sc, req); } } static bool nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, struct nvme_command *cmd, struct pci_nvme_blockstore *nvstore, struct pci_nvme_ioreq *req, uint16_t *status) { struct nvme_dsm_range *range; uint32_t nr, r, non_zero, dr; int err; bool pending = false; if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) { pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE); goto out; } nr = cmd->cdw10 & 0xff; /* copy locally because a range entry could straddle PRPs */ range = calloc(1, NVME_MAX_DSM_TRIM); if (range == NULL) { pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); goto out; } nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2, (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP); /* Check for invalid ranges and the number of non-zero lengths */ non_zero = 0; for (r = 0; r <= nr; r++) { if (pci_nvme_out_of_range(nvstore, range[r].starting_lba, range[r].length)) { pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE); goto out; } if (range[r].length != 0) non_zero++; } if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) { size_t offset, bytes; int sectsz_bits = sc->nvstore.sectsz_bits; /* * DSM calls are advisory only, and compliant controllers * may choose to take no actions (i.e. return Success). */ if (!nvstore->deallocate) { pci_nvme_status_genc(status, NVME_SC_SUCCESS); goto out; } /* If all ranges have a zero length, return Success */ if (non_zero == 0) { pci_nvme_status_genc(status, NVME_SC_SUCCESS); goto out; } if (req == NULL) { pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); goto out; } offset = range[0].starting_lba << sectsz_bits; bytes = range[0].length << sectsz_bits; /* * If the request is for more than a single range, store * the ranges in the br_iov. Optimize for the common case * of a single range. * * Note that NVMe Number of Ranges is a zero based value */ req->io_req.br_iovcnt = 0; req->io_req.br_offset = offset; req->io_req.br_resid = bytes; if (nr == 0) { req->io_req.br_callback = pci_nvme_io_done; } else { struct iovec *iov = req->io_req.br_iov; for (r = 0, dr = 0; r <= nr; r++) { offset = range[r].starting_lba << sectsz_bits; bytes = range[r].length << sectsz_bits; if (bytes == 0) continue; if ((nvstore->size - offset) < bytes) { pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE); goto out; } iov[dr].iov_base = (void *)offset; iov[dr].iov_len = bytes; dr++; } req->io_req.br_callback = pci_nvme_dealloc_sm; /* * Use prev_gpaddr to track the current entry and * prev_size to track the number of entries */ req->prev_gpaddr = 0; req->prev_size = dr; } err = blockif_delete(nvstore->ctx, &req->io_req); if (err) pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); else pending = true; } out: free(range); return (pending); } static void pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) { struct nvme_submission_queue *sq; uint16_t status; uint16_t sqhead; /* handle all submissions up to sq->tail index */ sq = &sc->submit_queues[idx]; pthread_mutex_lock(&sq->mtx); sqhead = sq->head; DPRINTF("nvme_handle_io qid %u head %u tail %u cmdlist %p", idx, sqhead, sq->tail, sq->qbase); while (sqhead != atomic_load_acq_short(&sq->tail)) { struct nvme_command *cmd; struct pci_nvme_ioreq *req; uint32_t nsid; bool pending; pending = false; req = NULL; status = 0; cmd = &sq->qbase[sqhead]; sqhead = (sqhead + 1) % sq->size; nsid = le32toh(cmd->nsid); if ((nsid == 0) || (nsid > sc->ctrldata.nn)) { pci_nvme_status_genc(&status, NVME_SC_INVALID_NAMESPACE_OR_FORMAT); status |= NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT; goto complete; } req = pci_nvme_get_ioreq(sc); if (req == NULL) { pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR); WPRINTF("%s: unable to allocate IO req", __func__); goto complete; } req->nvme_sq = sq; req->sqid = idx; req->opc = cmd->opc; req->cid = cmd->cid; req->nsid = cmd->nsid; switch (cmd->opc) { case NVME_OPC_FLUSH: pending = nvme_opc_flush(sc, cmd, &sc->nvstore, req, &status); break; case NVME_OPC_WRITE: case NVME_OPC_READ: pending = nvme_opc_write_read(sc, cmd, &sc->nvstore, req, &status); break; case NVME_OPC_WRITE_ZEROES: /* TODO: write zeroes WPRINTF("%s write zeroes lba 0x%lx blocks %u", __func__, lba, cmd->cdw12 & 0xFFFF); */ pci_nvme_status_genc(&status, NVME_SC_SUCCESS); break; case NVME_OPC_DATASET_MANAGEMENT: pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req, &status); break; default: WPRINTF("%s unhandled io command 0x%x", __func__, cmd->opc); pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE); } complete: if (!pending) { pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, status); if (req != NULL) pci_nvme_release_ioreq(sc, req); } } sq->head = sqhead; pthread_mutex_unlock(&sq->mtx); } static void pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, uint64_t idx, int is_sq, uint64_t value) { DPRINTF("nvme doorbell %lu, %s, val 0x%lx", idx, is_sq ? "SQ" : "CQ", value & 0xFFFF); if (is_sq) { if (idx > sc->num_squeues) { WPRINTF("%s queue index %lu overflow from " "guest (max %u)", __func__, idx, sc->num_squeues); return; } atomic_store_short(&sc->submit_queues[idx].tail, (uint16_t)value); if (idx == 0) { pci_nvme_handle_admin_cmd(sc, value); } else { /* submission queue; handle new entries in SQ */ if (idx > sc->num_squeues) { WPRINTF("%s SQ index %lu overflow from " "guest (max %u)", __func__, idx, sc->num_squeues); return; } pci_nvme_handle_io_cmd(sc, (uint16_t)idx); } } else { if (idx > sc->num_cqueues) { WPRINTF("%s queue index %lu overflow from " "guest (max %u)", __func__, idx, sc->num_cqueues); return; } atomic_store_short(&sc->compl_queues[idx].head, (uint16_t)value); } } static void pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite) { const char *s = iswrite ? "WRITE" : "READ"; switch (offset) { case NVME_CR_CAP_LOW: DPRINTF("%s %s NVME_CR_CAP_LOW", func, s); break; case NVME_CR_CAP_HI: DPRINTF("%s %s NVME_CR_CAP_HI", func, s); break; case NVME_CR_VS: DPRINTF("%s %s NVME_CR_VS", func, s); break; case NVME_CR_INTMS: DPRINTF("%s %s NVME_CR_INTMS", func, s); break; case NVME_CR_INTMC: DPRINTF("%s %s NVME_CR_INTMC", func, s); break; case NVME_CR_CC: DPRINTF("%s %s NVME_CR_CC", func, s); break; case NVME_CR_CSTS: DPRINTF("%s %s NVME_CR_CSTS", func, s); break; case NVME_CR_NSSR: DPRINTF("%s %s NVME_CR_NSSR", func, s); break; case NVME_CR_AQA: DPRINTF("%s %s NVME_CR_AQA", func, s); break; case NVME_CR_ASQ_LOW: DPRINTF("%s %s NVME_CR_ASQ_LOW", func, s); break; case NVME_CR_ASQ_HI: DPRINTF("%s %s NVME_CR_ASQ_HI", func, s); break; case NVME_CR_ACQ_LOW: DPRINTF("%s %s NVME_CR_ACQ_LOW", func, s); break; case NVME_CR_ACQ_HI: DPRINTF("%s %s NVME_CR_ACQ_HI", func, s); break; default: DPRINTF("unknown nvme bar-0 offset 0x%lx", offset); } } static void pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, uint64_t offset, int size, uint64_t value) { uint32_t ccreg; if (offset >= NVME_DOORBELL_OFFSET) { uint64_t belloffset = offset - NVME_DOORBELL_OFFSET; uint64_t idx = belloffset / 8; /* door bell size = 2*int */ int is_sq = (belloffset % 8) < 4; if (belloffset > ((sc->max_queues+1) * 8 - 4)) { WPRINTF("guest attempted an overflow write offset " "0x%lx, val 0x%lx in %s", offset, value, __func__); return; } pci_nvme_handle_doorbell(ctx, sc, idx, is_sq, value); return; } DPRINTF("nvme-write offset 0x%lx, size %d, value 0x%lx", offset, size, value); if (size != 4) { WPRINTF("guest wrote invalid size %d (offset 0x%lx, " "val 0x%lx) to bar0 in %s", size, offset, value, __func__); /* TODO: shutdown device */ return; } pci_nvme_bar0_reg_dumps(__func__, offset, 1); pthread_mutex_lock(&sc->mtx); switch (offset) { case NVME_CR_CAP_LOW: case NVME_CR_CAP_HI: /* readonly */ break; case NVME_CR_VS: /* readonly */ break; case NVME_CR_INTMS: /* MSI-X, so ignore */ break; case NVME_CR_INTMC: /* MSI-X, so ignore */ break; case NVME_CR_CC: ccreg = (uint32_t)value; DPRINTF("%s NVME_CR_CC en %x css %x shn %x iosqes %u " "iocqes %u", __func__, NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg), NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg), NVME_CC_GET_IOCQES(ccreg)); if (NVME_CC_GET_SHN(ccreg)) { /* perform shutdown - flush out data to backend */ sc->regs.csts &= ~(NVME_CSTS_REG_SHST_MASK << NVME_CSTS_REG_SHST_SHIFT); sc->regs.csts |= NVME_SHST_COMPLETE << NVME_CSTS_REG_SHST_SHIFT; } if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) { if (NVME_CC_GET_EN(ccreg) == 0) /* transition 1-> causes controller reset */ pci_nvme_reset_locked(sc); else pci_nvme_init_controller(ctx, sc); } /* Insert the iocqes, iosqes and en bits from the write */ sc->regs.cc &= ~NVME_CC_WRITE_MASK; sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK; if (NVME_CC_GET_EN(ccreg) == 0) { /* Insert the ams, mps and css bit fields */ sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK; sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK; sc->regs.csts &= ~NVME_CSTS_RDY; } else if (sc->pending_ios == 0) { sc->regs.csts |= NVME_CSTS_RDY; } break; case NVME_CR_CSTS: break; case NVME_CR_NSSR: /* ignore writes; don't support subsystem reset */ break; case NVME_CR_AQA: sc->regs.aqa = (uint32_t)value; break; case NVME_CR_ASQ_LOW: sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) | (0xFFFFF000 & value); break; case NVME_CR_ASQ_HI: sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) | (value << 32); break; case NVME_CR_ACQ_LOW: sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) | (0xFFFFF000 & value); break; case NVME_CR_ACQ_HI: sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) | (value << 32); break; default: DPRINTF("%s unknown offset 0x%lx, value 0x%lx size %d", __func__, offset, value, size); } pthread_mutex_unlock(&sc->mtx); } static void pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct pci_nvme_softc* sc = pi->pi_arg; if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { DPRINTF("nvme-write baridx %d, msix: off 0x%lx, size %d, " " value 0x%lx", baridx, offset, size, value); pci_emul_msix_twrite(pi, offset, size, value); return; } switch (baridx) { case 0: pci_nvme_write_bar_0(ctx, sc, offset, size, value); break; default: DPRINTF("%s unknown baridx %d, val 0x%lx", __func__, baridx, value); } } static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, uint64_t offset, int size) { uint64_t value; pci_nvme_bar0_reg_dumps(__func__, offset, 0); if (offset < NVME_DOORBELL_OFFSET) { void *p = &(sc->regs); pthread_mutex_lock(&sc->mtx); memcpy(&value, (void *)((uintptr_t)p + offset), size); pthread_mutex_unlock(&sc->mtx); } else { value = 0; WPRINTF("pci_nvme: read invalid offset %ld", offset); } switch (size) { case 1: value &= 0xFF; break; case 2: value &= 0xFFFF; break; case 4: value &= 0xFFFFFFFF; break; } DPRINTF(" nvme-read offset 0x%lx, size %d -> value 0x%x", offset, size, (uint32_t)value); return (value); } static uint64_t pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_nvme_softc* sc = pi->pi_arg; if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { DPRINTF("nvme-read bar: %d, msix: regoff 0x%lx, size %d", baridx, offset, size); return pci_emul_msix_tread(pi, offset, size); } switch (baridx) { case 0: return pci_nvme_read_bar_0(sc, offset, size); default: DPRINTF("unknown bar %d, 0x%lx", baridx, offset); } return (0); } static int pci_nvme_parse_config(struct pci_nvme_softc *sc, nvlist_t *nvl) { char bident[sizeof("XX:X:X")]; const char *value; uint32_t sectsz; sc->max_queues = NVME_QUEUES; sc->max_qentries = NVME_MAX_QENTRIES; sc->ioslots = NVME_IOSLOTS; sc->num_squeues = sc->max_queues; sc->num_cqueues = sc->max_queues; sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO; sectsz = 0; snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn), "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); value = get_config_value_node(nvl, "maxq"); if (value != NULL) sc->max_queues = atoi(value); value = get_config_value_node(nvl, "qsz"); if (value != NULL) { sc->max_qentries = atoi(value); if (sc->max_qentries <= 0) { EPRINTLN("nvme: Invalid qsz option %d", sc->max_qentries); return (-1); } } value = get_config_value_node(nvl, "ioslots"); if (value != NULL) { sc->ioslots = atoi(value); if (sc->ioslots <= 0) { EPRINTLN("Invalid ioslots option %d", sc->ioslots); return (-1); } } value = get_config_value_node(nvl, "sectsz"); if (value != NULL) sectsz = atoi(value); value = get_config_value_node(nvl, "ser"); if (value != NULL) { /* * This field indicates the Product Serial Number in * 7-bit ASCII, unused bytes should be space characters. * Ref: NVMe v1.3c. */ cpywithpad((char *)sc->ctrldata.sn, sizeof(sc->ctrldata.sn), value, ' '); } value = get_config_value_node(nvl, "eui64"); if (value != NULL) sc->nvstore.eui64 = htobe64(strtoull(value, NULL, 0)); value = get_config_value_node(nvl, "dsm"); if (value != NULL) { if (strcmp(value, "auto") == 0) sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO; else if (strcmp(value, "enable") == 0) sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE; else if (strcmp(value, "disable") == 0) sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE; } value = get_config_value_node(nvl, "ram"); if (value != NULL) { uint64_t sz = strtoull(value, NULL, 10); sc->nvstore.type = NVME_STOR_RAM; sc->nvstore.size = sz * 1024 * 1024; sc->nvstore.ctx = calloc(1, sc->nvstore.size); sc->nvstore.sectsz = 4096; sc->nvstore.sectsz_bits = 12; if (sc->nvstore.ctx == NULL) { EPRINTLN("nvme: Unable to allocate RAM"); return (-1); } } else { snprintf(bident, sizeof(bident), "%d:%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); sc->nvstore.ctx = blockif_open(nvl, bident); if (sc->nvstore.ctx == NULL) { EPRINTLN("nvme: Could not open backing file: %s", strerror(errno)); return (-1); } sc->nvstore.type = NVME_STOR_BLOCKIF; sc->nvstore.size = blockif_size(sc->nvstore.ctx); } if (sectsz == 512 || sectsz == 4096 || sectsz == 8192) sc->nvstore.sectsz = sectsz; else if (sc->nvstore.type != NVME_STOR_RAM) sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx); for (sc->nvstore.sectsz_bits = 9; (1 << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz; sc->nvstore.sectsz_bits++); if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES) sc->max_queues = NVME_QUEUES; return (0); } static void pci_nvme_resized(struct blockif_ctxt *bctxt, void *arg, size_t new_size) { struct pci_nvme_softc *sc; struct pci_nvme_blockstore *nvstore; struct nvme_namespace_data *nd; sc = arg; nvstore = &sc->nvstore; nd = &sc->nsdata; nvstore->size = new_size; pci_nvme_init_nsdata_size(nvstore, nd); /* Add changed NSID to list */ sc->ns_log.ns[0] = 1; sc->ns_log.ns[1] = 0; pci_nvme_aen_post(sc, PCI_NVME_AE_TYPE_NOTICE, PCI_NVME_AE_INFO_NS_ATTR_CHANGED); } static int pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { struct pci_nvme_softc *sc; uint32_t pci_membar_sz; int error; error = 0; sc = calloc(1, sizeof(struct pci_nvme_softc)); pi->pi_arg = sc; sc->nsc_pi = pi; error = pci_nvme_parse_config(sc, nvl); if (error < 0) goto done; else error = 0; STAILQ_INIT(&sc->ioreqs_free); sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq)); for (int i = 0; i < sc->ioslots; i++) { STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link); } pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A); pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_NVM); pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0); /* * Allocate size of NVMe registers + doorbell space for all queues. * * The specification requires a minimum memory I/O window size of 16K. * The Windows driver will refuse to start a device with a smaller * window. */ pci_membar_sz = sizeof(struct nvme_registers) + 2 * sizeof(uint32_t) * (sc->max_queues + 1); pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN); DPRINTF("nvme membar size: %u", pci_membar_sz); error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz); if (error) { WPRINTF("%s pci alloc mem bar failed", __func__); goto done; } error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR); if (error) { WPRINTF("%s pci add msixcap failed", __func__); goto done; } error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP); if (error) { WPRINTF("%s pci add Express capability failed", __func__); goto done; } pthread_mutex_init(&sc->mtx, NULL); sem_init(&sc->iosemlock, 0, sc->ioslots); blockif_register_resize_callback(sc->nvstore.ctx, pci_nvme_resized, sc); pci_nvme_init_queues(sc, sc->max_queues, sc->max_queues); /* * Controller data depends on Namespace data so initialize Namespace * data first. */ pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore); pci_nvme_init_ctrldata(sc); pci_nvme_init_logpages(sc); pci_nvme_init_features(sc); pci_nvme_aer_init(sc); pci_nvme_aen_init(sc); pci_nvme_reset(sc); pci_lintr_request(pi); done: return (error); } static int pci_nvme_legacy_config(nvlist_t *nvl, const char *opts) { char *cp, *ram; if (opts == NULL) return (0); if (strncmp(opts, "ram=", 4) == 0) { cp = strchr(opts, ','); if (cp == NULL) { set_config_value_node(nvl, "ram", opts + 4); return (0); } ram = strndup(opts + 4, cp - opts - 4); set_config_value_node(nvl, "ram", ram); free(ram); return (pci_parse_legacy_config(nvl, cp + 1)); } else return (blockif_legacy_config(nvl, opts)); } struct pci_devemu pci_de_nvme = { .pe_emu = "nvme", .pe_init = pci_nvme_init, .pe_legacy_config = pci_nvme_legacy_config, .pe_barwrite = pci_nvme_write, .pe_barread = pci_nvme_read }; PCI_EMUL_SET(pci_de_nvme); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 769d7c160037..240571f209e3 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -1,1021 +1,1021 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include "config.h" #include "debug.h" #include "pci_emul.h" #include "mem.h" #ifndef _PATH_DEVPCI #define _PATH_DEVPCI "/dev/pci" #endif #define LEGACY_SUPPORT 1 #define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) #define MSIX_CAPLEN 12 static int pcifd = -1; struct passthru_softc { struct pci_devinst *psc_pi; struct pcibar psc_bar[PCI_BARMAX + 1]; struct { int capoff; int msgctrl; int emulated; } psc_msi; struct { int capoff; } psc_msix; struct pcisel psc_sel; }; static int msi_caplen(int msgctrl) { int len; - + len = 10; /* minimum length of msi capability */ if (msgctrl & PCIM_MSICTRL_64BIT) len += 4; #if 0 /* * Ignore the 'mask' and 'pending' bits in the MSI capability. * We'll let the guest manipulate them directly. */ if (msgctrl & PCIM_MSICTRL_VECTOR) len += 10; #endif return (len); } static uint32_t read_config(const struct pcisel *sel, long reg, int width) { struct pci_io pi; bzero(&pi, sizeof(pi)); pi.pi_sel = *sel; pi.pi_reg = reg; pi.pi_width = width; if (ioctl(pcifd, PCIOCREAD, &pi) < 0) return (0); /* XXX */ else return (pi.pi_data); } static void write_config(const struct pcisel *sel, long reg, int width, uint32_t data) { struct pci_io pi; bzero(&pi, sizeof(pi)); pi.pi_sel = *sel; pi.pi_reg = reg; pi.pi_width = width; pi.pi_data = data; (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ } #ifdef LEGACY_SUPPORT static int passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) { int capoff, i; struct msicap msicap; u_char *capdata; pci_populate_msicap(&msicap, msgnum, nextptr); /* * XXX * Copy the msi capability structure in the last 16 bytes of the * config space. This is wrong because it could shadow something * useful to the device. */ capoff = 256 - roundup(sizeof(msicap), 4); capdata = (u_char *)&msicap; for (i = 0; i < sizeof(msicap); i++) pci_set_cfgdata8(pi, capoff + i, capdata[i]); return (capoff); } #endif /* LEGACY_SUPPORT */ static int cfginitmsi(struct passthru_softc *sc) { int i, ptr, capptr, cap, sts, caplen, table_size; uint32_t u32; struct pcisel sel; struct pci_devinst *pi; struct msixcap msixcap; uint32_t *msixcap_ptr; pi = sc->psc_pi; sel = sc->psc_sel; /* * Parse the capabilities and cache the location of the MSI * and MSI-X capabilities. */ sts = read_config(&sel, PCIR_STATUS, 2); if (sts & PCIM_STATUS_CAPPRESENT) { ptr = read_config(&sel, PCIR_CAP_PTR, 1); while (ptr != 0 && ptr != 0xff) { cap = read_config(&sel, ptr + PCICAP_ID, 1); if (cap == PCIY_MSI) { /* * Copy the MSI capability into the config * space of the emulated pci device */ sc->psc_msi.capoff = ptr; sc->psc_msi.msgctrl = read_config(&sel, ptr + 2, 2); sc->psc_msi.emulated = 0; caplen = msi_caplen(sc->psc_msi.msgctrl); capptr = ptr; while (caplen > 0) { u32 = read_config(&sel, capptr, 4); pci_set_cfgdata32(pi, capptr, u32); caplen -= 4; capptr += 4; } } else if (cap == PCIY_MSIX) { /* - * Copy the MSI-X capability + * Copy the MSI-X capability */ sc->psc_msix.capoff = ptr; caplen = 12; msixcap_ptr = (uint32_t*) &msixcap; capptr = ptr; while (caplen > 0) { u32 = read_config(&sel, capptr, 4); *msixcap_ptr = u32; pci_set_cfgdata32(pi, capptr, u32); caplen -= 4; capptr += 4; msixcap_ptr++; } } ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); } } if (sc->psc_msix.capoff != 0) { pi->pi_msix.pba_bar = msixcap.pba_info & PCIM_MSIX_BIR_MASK; pi->pi_msix.pba_offset = msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; pi->pi_msix.table_bar = msixcap.table_info & PCIM_MSIX_BIR_MASK; pi->pi_msix.table_offset = msixcap.table_info & ~PCIM_MSIX_BIR_MASK; pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); /* Allocate the emulated MSI-X table array */ table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; pi->pi_msix.table = calloc(1, table_size); /* Mask all table entries */ for (i = 0; i < pi->pi_msix.table_count; i++) { pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; } } #ifdef LEGACY_SUPPORT /* * If the passthrough device does not support MSI then craft a * MSI capability for it. We link the new MSI capability at the * head of the list of capabilities. */ if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { int origptr, msiptr; origptr = read_config(&sel, PCIR_CAP_PTR, 1); msiptr = passthru_add_msicap(pi, 1, origptr); sc->psc_msi.capoff = msiptr; sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); sc->psc_msi.emulated = 1; pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); } #endif /* Make sure one of the capabilities is present */ - if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) + if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) return (-1); else return (0); } static uint64_t msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) { struct pci_devinst *pi; struct msix_table_entry *entry; uint8_t *src8; uint16_t *src16; uint32_t *src32; uint64_t *src64; uint64_t data; size_t entry_offset; uint32_t table_offset; int index, table_count; pi = sc->psc_pi; table_offset = pi->pi_msix.table_offset; table_count = pi->pi_msix.table_count; if (offset < table_offset || offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { switch (size) { case 1: src8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); data = *src8; break; case 2: src16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); data = *src16; break; case 4: src32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); data = *src32; break; case 8: src64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); data = *src64; break; default: return (-1); } return (data); } offset -= table_offset; index = offset / MSIX_TABLE_ENTRY_SIZE; assert(index < table_count); entry = &pi->pi_msix.table[index]; entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; switch (size) { case 1: src8 = (uint8_t *)((uint8_t *)entry + entry_offset); data = *src8; break; case 2: src16 = (uint16_t *)((uint8_t *)entry + entry_offset); data = *src16; break; case 4: src32 = (uint32_t *)((uint8_t *)entry + entry_offset); data = *src32; break; case 8: src64 = (uint64_t *)((uint8_t *)entry + entry_offset); data = *src64; break; default: return (-1); } return (data); } static void msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, uint64_t offset, int size, uint64_t data) { struct pci_devinst *pi; struct msix_table_entry *entry; uint8_t *dest8; uint16_t *dest16; uint32_t *dest32; uint64_t *dest64; size_t entry_offset; uint32_t table_offset, vector_control; int index, table_count; pi = sc->psc_pi; table_offset = pi->pi_msix.table_offset; table_count = pi->pi_msix.table_count; if (offset < table_offset || offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { switch (size) { case 1: dest8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); *dest8 = data; break; case 2: dest16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); *dest16 = data; break; case 4: dest32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); *dest32 = data; break; case 8: dest64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); *dest64 = data; break; } return; } offset -= table_offset; index = offset / MSIX_TABLE_ENTRY_SIZE; assert(index < table_count); entry = &pi->pi_msix.table[index]; entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; /* Only 4 byte naturally-aligned writes are supported */ assert(size == 4); assert(entry_offset % 4 == 0); vector_control = entry->vector_control; dest32 = (uint32_t *)((void *)entry + entry_offset); *dest32 = data; /* If MSI-X hasn't been enabled, do nothing */ if (pi->pi_msix.enabled) { /* If the entry is masked, don't set it up */ if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { (void)vm_setup_pptdev_msix(ctx, vcpu, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, index, entry->addr, entry->msg_data, entry->vector_control); } } } static int init_msix_table(struct vmctx *ctx, struct passthru_softc *sc) { struct pci_devinst *pi = sc->psc_pi; struct pci_bar_mmap pbm; int b, s, f; uint32_t table_size, table_offset; assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); b = sc->psc_sel.pc_bus; s = sc->psc_sel.pc_dev; f = sc->psc_sel.pc_func; /* * Map the region of the BAR containing the MSI-X table. This is * necessary for two reasons: * 1. The PBA may reside in the first or last page containing the MSI-X * table. * 2. While PCI devices are not supposed to use the page(s) containing * the MSI-X table for other purposes, some do in practice. */ memset(&pbm, 0, sizeof(pbm)); pbm.pbm_sel = sc->psc_sel; pbm.pbm_flags = PCIIO_BAR_MMAP_RW; pbm.pbm_reg = PCIR_BAR(pi->pi_msix.pba_bar); pbm.pbm_memattr = VM_MEMATTR_DEVICE; if (ioctl(pcifd, PCIOCBARMMAP, &pbm) != 0) { warn("Failed to map MSI-X table BAR on %d/%d/%d", b, s, f); return (-1); } assert(pbm.pbm_bar_off == 0); pi->pi_msix.mapped_addr = (uint8_t *)(uintptr_t)pbm.pbm_map_base; pi->pi_msix.mapped_size = pbm.pbm_map_length; table_offset = rounddown2(pi->pi_msix.table_offset, 4096); table_size = pi->pi_msix.table_offset - table_offset; table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; table_size = roundup2(table_size, 4096); /* * Unmap any pages not covered by the table, we do not need to emulate * accesses to them. Avoid releasing address space to help ensure that * a buggy out-of-bounds access causes a crash. */ if (table_offset != 0) if (mprotect(pi->pi_msix.mapped_addr, table_offset, PROT_NONE) != 0) warn("Failed to unmap MSI-X table BAR region"); if (table_offset + table_size != pi->pi_msix.mapped_size) if (mprotect(pi->pi_msix.mapped_addr, pi->pi_msix.mapped_size - (table_offset + table_size), PROT_NONE) != 0) warn("Failed to unmap MSI-X table BAR region"); return (0); } static int cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) { int i, error; struct pci_devinst *pi; struct pci_bar_io bar; enum pcibar_type bartype; uint64_t base, size; pi = sc->psc_pi; /* * Initialize BAR registers */ for (i = 0; i <= PCI_BARMAX; i++) { bzero(&bar, sizeof(bar)); bar.pbi_sel = sc->psc_sel; bar.pbi_reg = PCIR_BAR(i); if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) continue; if (PCI_BAR_IO(bar.pbi_base)) { bartype = PCIBAR_IO; base = bar.pbi_base & PCIM_BAR_IO_BASE; } else { switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { case PCIM_BAR_MEM_64: bartype = PCIBAR_MEM64; break; default: bartype = PCIBAR_MEM32; break; } base = bar.pbi_base & PCIM_BAR_MEM_BASE; } size = bar.pbi_length; if (bartype != PCIBAR_IO) { if (((base | size) & PAGE_MASK) != 0) { warnx("passthru device %d/%d/%d BAR %d: " "base %#lx or size %#lx not page aligned\n", sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, i, base, size); return (-1); } } /* Cache information about the "real" BAR */ sc->psc_bar[i].type = bartype; sc->psc_bar[i].size = size; sc->psc_bar[i].addr = base; sc->psc_bar[i].lobits = 0; /* Allocate the BAR in the guest I/O or MMIO space */ error = pci_emul_alloc_bar(pi, i, bartype, size); if (error) return (-1); /* Use same lobits as physical bar */ uint8_t lobits = read_config(&sc->psc_sel, PCIR_BAR(i), 0x01); if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) { lobits &= ~PCIM_BAR_MEM_BASE; } else { lobits &= ~PCIM_BAR_IO_BASE; } sc->psc_bar[i].lobits = lobits; pi->pi_bar[i].lobits = lobits; /* * 64-bit BAR takes up two slots so skip the next one. */ if (bartype == PCIBAR_MEM64) { i++; assert(i <= PCI_BARMAX); sc->psc_bar[i].type = PCIBAR_MEMHI64; } } return (0); } static int cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) { int error; struct passthru_softc *sc; error = 1; sc = pi->pi_arg; bzero(&sc->psc_sel, sizeof(struct pcisel)); sc->psc_sel.pc_bus = bus; sc->psc_sel.pc_dev = slot; sc->psc_sel.pc_func = func; if (cfginitmsi(sc) != 0) { warnx("failed to initialize MSI for PCI %d/%d/%d", bus, slot, func); goto done; } if (cfginitbar(ctx, sc) != 0) { warnx("failed to initialize BARs for PCI %d/%d/%d", bus, slot, func); goto done; } write_config(&sc->psc_sel, PCIR_COMMAND, 2, pci_get_cfgdata16(pi, PCIR_COMMAND)); /* * We need to do this after PCIR_COMMAND got possibly updated, e.g., * a BAR was enabled, as otherwise the PCIOCBARMMAP might fail on us. */ error = init_msix_table(ctx, sc); if (error != 0) { warnx("failed to initialize MSI-X table for PCI %d/%d/%d: %d", bus, slot, func, error); goto done; } done: return (error); } static int passthru_legacy_config(nvlist_t *nvl, const char *opts) { char value[16]; int bus, slot, func; if (opts == NULL) return (0); if (sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) { EPRINTLN("passthru: invalid options \"%s\"", opts); return (-1); } snprintf(value, sizeof(value), "%d", bus); set_config_value_node(nvl, "bus", value); snprintf(value, sizeof(value), "%d", slot); set_config_value_node(nvl, "slot", value); snprintf(value, sizeof(value), "%d", func); set_config_value_node(nvl, "func", value); return (0); } static int passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { int bus, slot, func, error, memflags; struct passthru_softc *sc; const char *value; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t pci_ioctls[] = { PCIOCREAD, PCIOCWRITE, PCIOCGETBAR, PCIOCBARIO, PCIOCBARMMAP }; #endif sc = NULL; error = 1; #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_IOCTL, CAP_READ, CAP_WRITE); #endif memflags = vm_get_memflags(ctx); if (!(memflags & VM_MEM_F_WIRED)) { warnx("passthru requires guest memory to be wired"); return (error); } if (pcifd < 0) { pcifd = open(_PATH_DEVPCI, O_RDWR, 0); if (pcifd < 0) { warn("failed to open %s", _PATH_DEVPCI); return (error); } } #ifndef WITHOUT_CAPSICUM if (caph_rights_limit(pcifd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_ioctls_limit(pcifd, pci_ioctls, nitems(pci_ioctls)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif #define GET_INT_CONFIG(var, name) do { \ value = get_config_value_node(nvl, name); \ if (value == NULL) { \ EPRINTLN("passthru: missing required %s setting", name); \ return (error); \ } \ var = atoi(value); \ } while (0) GET_INT_CONFIG(bus, "bus"); GET_INT_CONFIG(slot, "slot"); GET_INT_CONFIG(func, "func"); if (vm_assign_pptdev(ctx, bus, slot, func) != 0) { warnx("PCI device at %d/%d/%d is not using the ppt(4) driver", bus, slot, func); goto done; } sc = calloc(1, sizeof(struct passthru_softc)); pi->pi_arg = sc; sc->psc_pi = pi; /* initialize config space */ error = cfginit(ctx, pi, bus, slot, func); done: if (error) { free(sc); vm_unassign_pptdev(ctx, bus, slot, func); } return (error); } static int bar_access(int coff) { if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) return (1); else return (0); } static int msicap_access(struct passthru_softc *sc, int coff) { int caplen; if (sc->psc_msi.capoff == 0) return (0); caplen = msi_caplen(sc->psc_msi.msgctrl); if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) return (1); else return (0); } -static int +static int msixcap_access(struct passthru_softc *sc, int coff) { - if (sc->psc_msix.capoff == 0) + if (sc->psc_msix.capoff == 0) return (0); - return (coff >= sc->psc_msix.capoff && + return (coff >= sc->psc_msix.capoff && coff < sc->psc_msix.capoff + MSIX_CAPLEN); } static int passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t *rv) { struct passthru_softc *sc; sc = pi->pi_arg; /* * PCI BARs and MSI capability is emulated. */ if (bar_access(coff) || msicap_access(sc, coff) || msixcap_access(sc, coff)) return (-1); #ifdef LEGACY_SUPPORT /* * Emulate PCIR_CAP_PTR if this device does not support MSI capability * natively. */ if (sc->psc_msi.emulated) { if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4) return (-1); } #endif /* * Emulate the command register. If a single read reads both the * command and status registers, read the status register from the * device's config space. */ if (coff == PCIR_COMMAND) { if (bytes <= 2) return (-1); *rv = read_config(&sc->psc_sel, PCIR_STATUS, 2) << 16 | pci_get_cfgdata16(pi, PCIR_COMMAND); return (0); } /* Everything else just read from the device's config space */ *rv = read_config(&sc->psc_sel, coff, bytes); return (0); } static int passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t val) { int error, msix_table_entries, i; struct passthru_softc *sc; uint16_t cmd_old; sc = pi->pi_arg; /* * PCI BARs are emulated */ if (bar_access(coff)) return (-1); /* * MSI capability is emulated */ if (msicap_access(sc, coff)) { pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msi.capoff, PCIY_MSI); error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.addr, pi->pi_msi.msg_data, pi->pi_msi.maxmsgnum); if (error != 0) err(1, "vm_setup_pptdev_msi"); return (0); } if (msixcap_access(sc, coff)) { pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msix.capoff, PCIY_MSIX); if (pi->pi_msix.enabled) { msix_table_entries = pi->pi_msix.table_count; for (i = 0; i < msix_table_entries; i++) { error = vm_setup_pptdev_msix(ctx, vcpu, - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, + sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, pi->pi_msix.table[i].addr, pi->pi_msix.table[i].msg_data, pi->pi_msix.table[i].vector_control); - + if (error) err(1, "vm_setup_pptdev_msix"); } } else { error = vm_disable_pptdev_msix(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func); if (error) err(1, "vm_disable_pptdev_msix"); } return (0); } #ifdef LEGACY_SUPPORT /* * If this device does not support MSI natively then we cannot let * the guest disable legacy interrupts from the device. It is the * legacy interrupt that is triggering the virtual MSI to the guest. */ if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { if (coff == PCIR_COMMAND && bytes == 2) val &= ~PCIM_CMD_INTxDIS; } #endif write_config(&sc->psc_sel, coff, bytes, val); if (coff == PCIR_COMMAND) { cmd_old = pci_get_cfgdata16(pi, PCIR_COMMAND); if (bytes == 1) pci_set_cfgdata8(pi, PCIR_COMMAND, val); else if (bytes == 2) pci_set_cfgdata16(pi, PCIR_COMMAND, val); pci_emul_cmd_changed(pi, cmd_old); } return (0); } static void passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct passthru_softc *sc; struct pci_bar_ioreq pio; sc = pi->pi_arg; if (baridx == pci_msix_table_bar(pi)) { msix_table_write(ctx, vcpu, sc, offset, size, value); } else { assert(pi->pi_bar[baridx].type == PCIBAR_IO); assert(size == 1 || size == 2 || size == 4); assert(offset <= UINT32_MAX && offset + size <= UINT32_MAX); bzero(&pio, sizeof(pio)); pio.pbi_sel = sc->psc_sel; pio.pbi_op = PCIBARIO_WRITE; pio.pbi_bar = baridx; pio.pbi_offset = (uint32_t)offset; pio.pbi_width = size; pio.pbi_value = (uint32_t)value; (void)ioctl(pcifd, PCIOCBARIO, &pio); } } static uint64_t passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct passthru_softc *sc; struct pci_bar_ioreq pio; uint64_t val; sc = pi->pi_arg; if (baridx == pci_msix_table_bar(pi)) { val = msix_table_read(sc, offset, size); } else { assert(pi->pi_bar[baridx].type == PCIBAR_IO); assert(size == 1 || size == 2 || size == 4); assert(offset <= UINT32_MAX && offset + size <= UINT32_MAX); bzero(&pio, sizeof(pio)); pio.pbi_sel = sc->psc_sel; pio.pbi_op = PCIBARIO_READ; pio.pbi_bar = baridx; pio.pbi_offset = (uint32_t)offset; pio.pbi_width = size; (void)ioctl(pcifd, PCIOCBARIO, &pio); val = pio.pbi_value; } return (val); } static void passthru_msix_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, int enabled, uint64_t address) { struct passthru_softc *sc; size_t remaining; uint32_t table_size, table_offset; sc = pi->pi_arg; table_offset = rounddown2(pi->pi_msix.table_offset, 4096); if (table_offset > 0) { if (!enabled) { if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, table_offset) != 0) warnx("pci_passthru: unmap_pptdev_mmio failed"); } else { if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, table_offset, sc->psc_bar[baridx].addr) != 0) warnx("pci_passthru: map_pptdev_mmio failed"); } } table_size = pi->pi_msix.table_offset - table_offset; table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; table_size = roundup2(table_size, 4096); remaining = pi->pi_bar[baridx].size - table_offset - table_size; if (remaining > 0) { address += table_offset + table_size; if (!enabled) { if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, remaining) != 0) warnx("pci_passthru: unmap_pptdev_mmio failed"); } else { if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, remaining, sc->psc_bar[baridx].addr + table_offset + table_size) != 0) warnx("pci_passthru: map_pptdev_mmio failed"); } } } static void passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, int enabled, uint64_t address) { struct passthru_softc *sc; sc = pi->pi_arg; if (!enabled) { if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, sc->psc_bar[baridx].size) != 0) warnx("pci_passthru: unmap_pptdev_mmio failed"); } else { if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, sc->psc_bar[baridx].size, sc->psc_bar[baridx].addr) != 0) warnx("pci_passthru: map_pptdev_mmio failed"); } } static void passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, int enabled, uint64_t address) { if (pi->pi_bar[baridx].type == PCIBAR_IO) return; if (baridx == pci_msix_table_bar(pi)) passthru_msix_addr(ctx, pi, baridx, enabled, address); else passthru_mmio_addr(ctx, pi, baridx, enabled, address); } struct pci_devemu passthru = { .pe_emu = "passthru", .pe_init = passthru_init, .pe_legacy_config = passthru_legacy_config, .pe_cfgwrite = passthru_cfgwrite, .pe_cfgread = passthru_cfgread, .pe_barwrite = passthru_write, .pe_barread = passthru_read, .pe_baraddr = passthru_addr, }; PCI_EMUL_SET(passthru); diff --git a/usr.sbin/bhyve/pci_virtio_9p.c b/usr.sbin/bhyve/pci_virtio_9p.c index 70f3eba4bc34..3ec7eaa06c39 100644 --- a/usr.sbin/bhyve/pci_virtio_9p.c +++ b/usr.sbin/bhyve/pci_virtio_9p.c @@ -1,353 +1,353 @@ /*- * Copyright (c) 2015 iXsystems Inc. * Copyright (c) 2017-2018 Jakub Klama * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * VirtIO filesystem passthrough using 9p protocol. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" #include "virtio.h" #define VT9P_MAX_IOV 128 #define VT9P_RINGSZ 256 #define VT9P_MAXTAGSZ 256 #define VT9P_CONFIGSPACESZ (VT9P_MAXTAGSZ + sizeof(uint16_t)) static int pci_vt9p_debug; #define DPRINTF(params) if (pci_vt9p_debug) printf params #define WPRINTF(params) printf params /* * Per-device softc */ struct pci_vt9p_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_vq; pthread_mutex_t vsc_mtx; uint64_t vsc_cfg; uint64_t vsc_features; char * vsc_rootpath; struct pci_vt9p_config * vsc_config; struct l9p_backend * vsc_fs_backend; struct l9p_server * vsc_server; struct l9p_connection * vsc_conn; }; struct pci_vt9p_request { struct pci_vt9p_softc * vsr_sc; struct iovec * vsr_iov; size_t vsr_niov; size_t vsr_respidx; size_t vsr_iolen; uint16_t vsr_idx; }; struct pci_vt9p_config { uint16_t tag_len; char tag[0]; } __attribute__((packed)); static int pci_vt9p_send(struct l9p_request *, const struct iovec *, const size_t, const size_t, void *); static void pci_vt9p_drop(struct l9p_request *, const struct iovec *, size_t, void *); static void pci_vt9p_reset(void *); static void pci_vt9p_notify(void *, struct vqueue_info *); static int pci_vt9p_cfgread(void *, int, int, uint32_t *); static void pci_vt9p_neg_features(void *, uint64_t); static struct virtio_consts vt9p_vi_consts = { "vt9p", /* our name */ 1, /* we support 1 virtqueue */ VT9P_CONFIGSPACESZ, /* config reg size */ pci_vt9p_reset, /* reset */ pci_vt9p_notify, /* device-wide qnotify */ pci_vt9p_cfgread, /* read virtio config */ NULL, /* write virtio config */ pci_vt9p_neg_features, /* apply negotiated features */ (1 << 0), /* our capabilities */ }; static void pci_vt9p_reset(void *vsc) { struct pci_vt9p_softc *sc; sc = vsc; DPRINTF(("vt9p: device reset requested !\n")); vi_reset_dev(&sc->vsc_vs); } static void pci_vt9p_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vt9p_softc *sc = vsc; sc->vsc_features = negotiated_features; } static int pci_vt9p_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vt9p_softc *sc = vsc; void *ptr; ptr = (uint8_t *)sc->vsc_config + offset; memcpy(retval, ptr, size); return (0); } static int pci_vt9p_get_buffer(struct l9p_request *req, struct iovec *iov, size_t *niov, void *arg) { struct pci_vt9p_request *preq = req->lr_aux; size_t n = preq->vsr_niov - preq->vsr_respidx; - + memcpy(iov, preq->vsr_iov + preq->vsr_respidx, n * sizeof(struct iovec)); *niov = n; return (0); } static int pci_vt9p_send(struct l9p_request *req, const struct iovec *iov, const size_t niov, const size_t iolen, void *arg) { struct pci_vt9p_request *preq = req->lr_aux; struct pci_vt9p_softc *sc = preq->vsr_sc; preq->vsr_iolen = iolen; pthread_mutex_lock(&sc->vsc_mtx); vq_relchain(&sc->vsc_vq, preq->vsr_idx, preq->vsr_iolen); vq_endchains(&sc->vsc_vq, 1); pthread_mutex_unlock(&sc->vsc_mtx); free(preq); return (0); } static void pci_vt9p_drop(struct l9p_request *req, const struct iovec *iov, size_t niov, void *arg) { struct pci_vt9p_request *preq = req->lr_aux; struct pci_vt9p_softc *sc = preq->vsr_sc; pthread_mutex_lock(&sc->vsc_mtx); vq_relchain(&sc->vsc_vq, preq->vsr_idx, 0); vq_endchains(&sc->vsc_vq, 1); pthread_mutex_unlock(&sc->vsc_mtx); free(preq); } static void pci_vt9p_notify(void *vsc, struct vqueue_info *vq) { struct iovec iov[VT9P_MAX_IOV]; struct pci_vt9p_softc *sc; struct pci_vt9p_request *preq; struct vi_req req; int n; sc = vsc; while (vq_has_descs(vq)) { n = vq_getchain(vq, iov, VT9P_MAX_IOV, &req); assert(n >= 1 && n <= VT9P_MAX_IOV); preq = calloc(1, sizeof(struct pci_vt9p_request)); preq->vsr_sc = sc; preq->vsr_idx = req.idx; preq->vsr_iov = iov; preq->vsr_niov = n; preq->vsr_respidx = req.readable; for (int i = 0; i < n; i++) { DPRINTF(("vt9p: vt9p_notify(): desc%d base=%p, " "len=%zu\r\n", i, iov[i].iov_base, iov[i].iov_len)); } l9p_connection_recv(sc->vsc_conn, iov, preq->vsr_respidx, preq); } } static int pci_vt9p_legacy_config(nvlist_t *nvl, const char *opts) { char *sharename = NULL, *tofree, *token, *tokens; if (opts == NULL) return (0); tokens = tofree = strdup(opts); while ((token = strsep(&tokens, ",")) != NULL) { if (strchr(token, '=') != NULL) { if (sharename != NULL) { EPRINTLN( "virtio-9p: more than one share name given"); return (-1); } sharename = strsep(&token, "="); set_config_value_node(nvl, "sharename", sharename); set_config_value_node(nvl, "path", token); } else set_config_bool_node(nvl, token, true); } free(tofree); return (0); } static int pci_vt9p_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { struct pci_vt9p_softc *sc; const char *value; const char *sharename; int rootfd; bool ro; cap_rights_t rootcap; ro = get_config_bool_node_default(nvl, "ro", false); value = get_config_value_node(nvl, "path"); if (value == NULL) { EPRINTLN("virtio-9p: path required"); return (1); } rootfd = open(value, O_DIRECTORY); if (rootfd < 0) { EPRINTLN("virtio-9p: failed to open '%s': %s", value, strerror(errno)); return (-1); } sharename = get_config_value_node(nvl, "sharename"); if (sharename == NULL) { EPRINTLN("virtio-9p: share name required"); return (1); } if (strlen(sharename) > VT9P_MAXTAGSZ) { EPRINTLN("virtio-9p: share name too long"); return (1); } sc = calloc(1, sizeof(struct pci_vt9p_softc)); sc->vsc_config = calloc(1, sizeof(struct pci_vt9p_config) + VT9P_MAXTAGSZ); pthread_mutex_init(&sc->vsc_mtx, NULL); cap_rights_init(&rootcap, CAP_LOOKUP, CAP_ACL_CHECK, CAP_ACL_DELETE, CAP_ACL_GET, CAP_ACL_SET, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT, CAP_CREATE, CAP_FCHMODAT, CAP_FCHOWNAT, CAP_FTRUNCATE, CAP_LINKAT_SOURCE, CAP_LINKAT_TARGET, CAP_MKDIRAT, CAP_MKNODAT, CAP_PREAD, CAP_PWRITE, CAP_RENAMEAT_SOURCE, CAP_RENAMEAT_TARGET, CAP_SEEK, CAP_SYMLINKAT, CAP_UNLINKAT, CAP_EXTATTR_DELETE, CAP_EXTATTR_GET, CAP_EXTATTR_LIST, CAP_EXTATTR_SET, CAP_FUTIMES, CAP_FSTATFS, CAP_FSYNC, CAP_FPATHCONF); if (cap_rights_limit(rootfd, &rootcap) != 0) return (1); sc->vsc_config->tag_len = (uint16_t)strlen(sharename); memcpy(sc->vsc_config->tag, sharename, sc->vsc_config->tag_len); - + if (l9p_backend_fs_init(&sc->vsc_fs_backend, rootfd, ro) != 0) { errno = ENXIO; return (1); } if (l9p_server_init(&sc->vsc_server, sc->vsc_fs_backend) != 0) { errno = ENXIO; return (1); } if (l9p_connection_init(sc->vsc_server, &sc->vsc_conn) != 0) { errno = EIO; return (1); } sc->vsc_conn->lc_msize = L9P_MAX_IOV * PAGE_SIZE; sc->vsc_conn->lc_lt.lt_get_response_buffer = pci_vt9p_get_buffer; sc->vsc_conn->lc_lt.lt_send_response = pci_vt9p_send; sc->vsc_conn->lc_lt.lt_drop_response = pci_vt9p_drop; vi_softc_linkup(&sc->vsc_vs, &vt9p_vi_consts, sc, pi, &sc->vsc_vq); sc->vsc_vs.vs_mtx = &sc->vsc_mtx; sc->vsc_vq.vq_qsize = VT9P_RINGSZ; /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_9P); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_9P); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) return (1); vi_set_io_bar(&sc->vsc_vs, 0); return (0); } struct pci_devemu pci_de_v9p = { .pe_emu = "virtio-9p", .pe_legacy_config = pci_vt9p_legacy_config, .pe_init = pci_vt9p_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_v9p); diff --git a/usr.sbin/bhyve/pci_virtio_console.c b/usr.sbin/bhyve/pci_virtio_console.c index 71c3375a57ac..0414fc540c2b 100644 --- a/usr.sbin/bhyve/pci_virtio_console.c +++ b/usr.sbin/bhyve/pci_virtio_console.c @@ -1,759 +1,759 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 iXsystems Inc. * All rights reserved. * * This software was developed by Jakub Klama * under sponsorship from iXsystems Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #ifndef WITHOUT_CAPSICUM #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "mevent.h" #include "sockstream.h" #define VTCON_RINGSZ 64 #define VTCON_MAXPORTS 16 #define VTCON_MAXQ (VTCON_MAXPORTS * 2 + 2) #define VTCON_DEVICE_READY 0 #define VTCON_DEVICE_ADD 1 #define VTCON_DEVICE_REMOVE 2 #define VTCON_PORT_READY 3 #define VTCON_CONSOLE_PORT 4 #define VTCON_CONSOLE_RESIZE 5 #define VTCON_PORT_OPEN 6 #define VTCON_PORT_NAME 7 #define VTCON_F_SIZE 0 #define VTCON_F_MULTIPORT 1 #define VTCON_F_EMERG_WRITE 2 #define VTCON_S_HOSTCAPS \ (VTCON_F_SIZE | VTCON_F_MULTIPORT | VTCON_F_EMERG_WRITE) static int pci_vtcon_debug; #define DPRINTF(params) if (pci_vtcon_debug) PRINTLN params #define WPRINTF(params) PRINTLN params struct pci_vtcon_softc; struct pci_vtcon_port; struct pci_vtcon_config; typedef void (pci_vtcon_cb_t)(struct pci_vtcon_port *, void *, struct iovec *, int); struct pci_vtcon_port { struct pci_vtcon_softc * vsp_sc; int vsp_id; const char * vsp_name; bool vsp_enabled; bool vsp_console; bool vsp_rx_ready; bool vsp_open; int vsp_rxq; int vsp_txq; void * vsp_arg; pci_vtcon_cb_t * vsp_cb; }; struct pci_vtcon_sock { struct pci_vtcon_port * vss_port; const char * vss_path; struct mevent * vss_server_evp; struct mevent * vss_conn_evp; int vss_server_fd; int vss_conn_fd; bool vss_open; }; struct pci_vtcon_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_queues[VTCON_MAXQ]; pthread_mutex_t vsc_mtx; uint64_t vsc_cfg; uint64_t vsc_features; char * vsc_rootdir; int vsc_kq; bool vsc_ready; struct pci_vtcon_port vsc_control_port; struct pci_vtcon_port vsc_ports[VTCON_MAXPORTS]; struct pci_vtcon_config *vsc_config; }; struct pci_vtcon_config { uint16_t cols; uint16_t rows; uint32_t max_nr_ports; uint32_t emerg_wr; } __attribute__((packed)); struct pci_vtcon_control { uint32_t id; uint16_t event; uint16_t value; } __attribute__((packed)); struct pci_vtcon_console_resize { uint16_t cols; uint16_t rows; } __attribute__((packed)); static void pci_vtcon_reset(void *); static void pci_vtcon_notify_rx(void *, struct vqueue_info *); static void pci_vtcon_notify_tx(void *, struct vqueue_info *); static int pci_vtcon_cfgread(void *, int, int, uint32_t *); static int pci_vtcon_cfgwrite(void *, int, int, uint32_t); static void pci_vtcon_neg_features(void *, uint64_t); static void pci_vtcon_sock_accept(int, enum ev_type, void *); static void pci_vtcon_sock_rx(int, enum ev_type, void *); static void pci_vtcon_sock_tx(struct pci_vtcon_port *, void *, struct iovec *, int); static void pci_vtcon_control_send(struct pci_vtcon_softc *, struct pci_vtcon_control *, const void *, size_t); static void pci_vtcon_announce_port(struct pci_vtcon_port *); static void pci_vtcon_open_port(struct pci_vtcon_port *, bool); static struct virtio_consts vtcon_vi_consts = { "vtcon", /* our name */ VTCON_MAXQ, /* we support VTCON_MAXQ virtqueues */ sizeof(struct pci_vtcon_config), /* config reg size */ pci_vtcon_reset, /* reset */ NULL, /* device-wide qnotify */ pci_vtcon_cfgread, /* read virtio config */ pci_vtcon_cfgwrite, /* write virtio config */ pci_vtcon_neg_features, /* apply negotiated features */ VTCON_S_HOSTCAPS, /* our capabilities */ }; static void pci_vtcon_reset(void *vsc) { struct pci_vtcon_softc *sc; sc = vsc; DPRINTF(("vtcon: device reset requested!")); vi_reset_dev(&sc->vsc_vs); } static void pci_vtcon_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtcon_softc *sc = vsc; sc->vsc_features = negotiated_features; } static int pci_vtcon_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtcon_softc *sc = vsc; void *ptr; ptr = (uint8_t *)sc->vsc_config + offset; memcpy(retval, ptr, size); return (0); } static int pci_vtcon_cfgwrite(void *vsc, int offset, int size, uint32_t val) { return (0); } static inline struct pci_vtcon_port * pci_vtcon_vq_to_port(struct pci_vtcon_softc *sc, struct vqueue_info *vq) { uint16_t num = vq->vq_num; if (num == 0 || num == 1) return (&sc->vsc_ports[0]); if (num == 2 || num == 3) return (&sc->vsc_control_port); return (&sc->vsc_ports[(num / 2) - 1]); } static inline struct vqueue_info * pci_vtcon_port_to_vq(struct pci_vtcon_port *port, bool tx_queue) { int qnum; qnum = tx_queue ? port->vsp_txq : port->vsp_rxq; return (&port->vsp_sc->vsc_queues[qnum]); } static struct pci_vtcon_port * pci_vtcon_port_add(struct pci_vtcon_softc *sc, int port_id, const char *name, pci_vtcon_cb_t *cb, void *arg) { struct pci_vtcon_port *port; port = &sc->vsc_ports[port_id]; if (port->vsp_enabled) { errno = EBUSY; return (NULL); } port->vsp_id = port_id; port->vsp_sc = sc; port->vsp_name = name; port->vsp_cb = cb; port->vsp_arg = arg; if (port->vsp_id == 0) { /* port0 */ port->vsp_txq = 0; port->vsp_rxq = 1; } else { port->vsp_txq = (port_id + 1) * 2; port->vsp_rxq = port->vsp_txq + 1; } port->vsp_enabled = true; return (port); } static int pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *port_name, const nvlist_t *nvl) { struct pci_vtcon_sock *sock; struct sockaddr_un sun; const char *name, *path; char *cp, *pathcopy; long port; int s = -1, fd = -1, error = 0; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; #endif port = strtol(port_name, &cp, 0); if (*cp != '\0' || port < 0 || port >= VTCON_MAXPORTS) { EPRINTLN("vtcon: Invalid port %s", port_name); error = -1; goto out; } path = get_config_value_node(nvl, "path"); if (path == NULL) { EPRINTLN("vtcon: required path missing for port %ld", port); error = -1; goto out; } sock = calloc(1, sizeof(struct pci_vtcon_sock)); if (sock == NULL) { error = -1; goto out; } s = socket(AF_UNIX, SOCK_STREAM, 0); if (s < 0) { error = -1; goto out; } pathcopy = strdup(path); if (pathcopy == NULL) { error = -1; goto out; } fd = open(dirname(pathcopy), O_RDONLY | O_DIRECTORY); if (fd < 0) { free(pathcopy); error = -1; goto out; } sun.sun_family = AF_UNIX; sun.sun_len = sizeof(struct sockaddr_un); strcpy(pathcopy, path); strlcpy(sun.sun_path, basename(pathcopy), sizeof(sun.sun_path)); free(pathcopy); if (bindat(fd, s, (struct sockaddr *)&sun, sun.sun_len) < 0) { error = -1; goto out; } if (fcntl(s, F_SETFL, O_NONBLOCK) < 0) { error = -1; goto out; } if (listen(s, 1) < 0) { error = -1; goto out; } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE); if (caph_rights_limit(s, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif name = get_config_value_node(nvl, "name"); if (name == NULL) { EPRINTLN("vtcon: required name missing for port %ld", port); error = -1; goto out; } sock->vss_port = pci_vtcon_port_add(sc, port, name, pci_vtcon_sock_tx, sock); if (sock->vss_port == NULL) { error = -1; goto out; } sock->vss_open = false; sock->vss_conn_fd = -1; sock->vss_server_fd = s; sock->vss_server_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_accept, sock); if (sock->vss_server_evp == NULL) { error = -1; goto out; } out: if (fd != -1) close(fd); if (error != 0) { if (s != -1) close(s); free(sock); } return (error); } static void pci_vtcon_sock_accept(int fd __unused, enum ev_type t __unused, void *arg) { struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg; int s; s = accept(sock->vss_server_fd, NULL, NULL); if (s < 0) return; if (sock->vss_open) { close(s); return; } sock->vss_open = true; sock->vss_conn_fd = s; sock->vss_conn_evp = mevent_add(s, EVF_READ, pci_vtcon_sock_rx, sock); pci_vtcon_open_port(sock->vss_port, true); } static void pci_vtcon_sock_rx(int fd __unused, enum ev_type t __unused, void *arg) { struct pci_vtcon_port *port; struct pci_vtcon_sock *sock = (struct pci_vtcon_sock *)arg; struct vqueue_info *vq; struct vi_req req; struct iovec iov; static char dummybuf[2048]; int len, n; port = sock->vss_port; vq = pci_vtcon_port_to_vq(port, true); if (!sock->vss_open || !port->vsp_rx_ready) { len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf)); if (len == 0) goto close; return; } if (!vq_has_descs(vq)) { len = read(sock->vss_conn_fd, dummybuf, sizeof(dummybuf)); vq_endchains(vq, 1); if (len == 0) goto close; return; } do { n = vq_getchain(vq, &iov, 1, &req); assert(n == 1); len = readv(sock->vss_conn_fd, &iov, n); if (len == 0 || (len < 0 && errno == EWOULDBLOCK)) { vq_retchains(vq, 1); vq_endchains(vq, 0); if (len == 0) goto close; return; } vq_relchain(vq, req.idx, len); } while (vq_has_descs(vq)); vq_endchains(vq, 1); close: mevent_delete_close(sock->vss_conn_evp); sock->vss_conn_fd = -1; sock->vss_open = false; } static void pci_vtcon_sock_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov, int niov) { struct pci_vtcon_sock *sock; int i, ret; sock = (struct pci_vtcon_sock *)arg; if (sock->vss_conn_fd == -1) return; for (i = 0; i < niov; i++) { ret = stream_write(sock->vss_conn_fd, iov[i].iov_base, iov[i].iov_len); if (ret <= 0) break; } if (ret <= 0) { mevent_delete_close(sock->vss_conn_evp); sock->vss_conn_fd = -1; sock->vss_open = false; } } static void pci_vtcon_control_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov, int niov) { struct pci_vtcon_softc *sc; struct pci_vtcon_port *tmp; struct pci_vtcon_control resp, *ctrl; int i; assert(niov == 1); sc = port->vsp_sc; ctrl = (struct pci_vtcon_control *)iov->iov_base; switch (ctrl->event) { case VTCON_DEVICE_READY: sc->vsc_ready = true; /* set port ready events for registered ports */ for (i = 0; i < VTCON_MAXPORTS; i++) { tmp = &sc->vsc_ports[i]; if (tmp->vsp_enabled) pci_vtcon_announce_port(tmp); if (tmp->vsp_open) pci_vtcon_open_port(tmp, true); } break; case VTCON_PORT_READY: tmp = &sc->vsc_ports[ctrl->id]; if (ctrl->id >= VTCON_MAXPORTS || !tmp->vsp_enabled) { WPRINTF(("VTCON_PORT_READY event for unknown port %d", ctrl->id)); return; } if (tmp->vsp_console) { resp.event = VTCON_CONSOLE_PORT; resp.id = ctrl->id; resp.value = 1; pci_vtcon_control_send(sc, &resp, NULL, 0); } break; } } static void pci_vtcon_announce_port(struct pci_vtcon_port *port) { struct pci_vtcon_control event; event.id = port->vsp_id; event.event = VTCON_DEVICE_ADD; event.value = 1; pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0); event.event = VTCON_PORT_NAME; pci_vtcon_control_send(port->vsp_sc, &event, port->vsp_name, strlen(port->vsp_name)); } static void pci_vtcon_open_port(struct pci_vtcon_port *port, bool open) { struct pci_vtcon_control event; if (!port->vsp_sc->vsc_ready) { port->vsp_open = true; return; } event.id = port->vsp_id; event.event = VTCON_PORT_OPEN; event.value = (int)open; pci_vtcon_control_send(port->vsp_sc, &event, NULL, 0); } static void pci_vtcon_control_send(struct pci_vtcon_softc *sc, struct pci_vtcon_control *ctrl, const void *payload, size_t len) { struct vqueue_info *vq; struct vi_req req; struct iovec iov; int n; vq = pci_vtcon_port_to_vq(&sc->vsc_control_port, true); if (!vq_has_descs(vq)) return; n = vq_getchain(vq, &iov, 1, &req); assert(n == 1); memcpy(iov.iov_base, ctrl, sizeof(struct pci_vtcon_control)); if (payload != NULL && len > 0) memcpy(iov.iov_base + sizeof(struct pci_vtcon_control), payload, len); vq_relchain(vq, req.idx, sizeof(struct pci_vtcon_control) + len); vq_endchains(vq, 1); } - + static void pci_vtcon_notify_tx(void *vsc, struct vqueue_info *vq) { struct pci_vtcon_softc *sc; struct pci_vtcon_port *port; struct iovec iov[1]; struct vi_req req; int n; sc = vsc; port = pci_vtcon_vq_to_port(sc, vq); while (vq_has_descs(vq)) { n = vq_getchain(vq, iov, 1, &req); assert(n == 1); if (port != NULL) port->vsp_cb(port, port->vsp_arg, iov, 1); /* * Release this chain and handle more */ vq_relchain(vq, req.idx, 0); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ } static void pci_vtcon_notify_rx(void *vsc, struct vqueue_info *vq) { struct pci_vtcon_softc *sc; struct pci_vtcon_port *port; sc = vsc; port = pci_vtcon_vq_to_port(sc, vq); if (!port->vsp_rx_ready) { port->vsp_rx_ready = 1; vq_kick_disable(vq); } } /* * Each console device has a "port" node which contains nodes for * each port. Ports are numbered starting at 0. */ static int pci_vtcon_legacy_config_port(nvlist_t *nvl, int port, char *opt) { char *name, *path; char node_name[sizeof("XX")]; nvlist_t *port_nvl; name = strsep(&opt, "="); path = opt; if (path == NULL) { EPRINTLN("vtcon: port %s requires a path", name); return (-1); } if (port >= VTCON_MAXPORTS) { EPRINTLN("vtcon: too many ports"); return (-1); } snprintf(node_name, sizeof(node_name), "%d", port); port_nvl = create_relative_config_node(nvl, node_name); set_config_value_node(port_nvl, "name", name); set_config_value_node(port_nvl, "path", path); return (0); } static int pci_vtcon_legacy_config(nvlist_t *nvl, const char *opts) { char *opt, *str, *tofree; nvlist_t *ports_nvl; int error, port; ports_nvl = create_relative_config_node(nvl, "port"); tofree = str = strdup(opts); error = 0; port = 0; while ((opt = strsep(&str, ",")) != NULL) { error = pci_vtcon_legacy_config_port(ports_nvl, port, opt); if (error) break; port++; } free(tofree); return (error); } static int pci_vtcon_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { struct pci_vtcon_softc *sc; nvlist_t *ports_nvl; int i; sc = calloc(1, sizeof(struct pci_vtcon_softc)); sc->vsc_config = calloc(1, sizeof(struct pci_vtcon_config)); sc->vsc_config->max_nr_ports = VTCON_MAXPORTS; sc->vsc_config->cols = 80; - sc->vsc_config->rows = 25; + sc->vsc_config->rows = 25; vi_softc_linkup(&sc->vsc_vs, &vtcon_vi_consts, sc, pi, sc->vsc_queues); sc->vsc_vs.vs_mtx = &sc->vsc_mtx; for (i = 0; i < VTCON_MAXQ; i++) { sc->vsc_queues[i].vq_qsize = VTCON_RINGSZ; sc->vsc_queues[i].vq_notify = i % 2 == 0 ? pci_vtcon_notify_rx : pci_vtcon_notify_tx; } /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_CONSOLE); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_CONSOLE); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) return (1); vi_set_io_bar(&sc->vsc_vs, 0); /* create control port */ sc->vsc_control_port.vsp_sc = sc; sc->vsc_control_port.vsp_txq = 2; sc->vsc_control_port.vsp_rxq = 3; sc->vsc_control_port.vsp_cb = pci_vtcon_control_tx; sc->vsc_control_port.vsp_enabled = true; ports_nvl = find_relative_config_node(nvl, "port"); if (ports_nvl != NULL) { const char *name; void *cookie; int type; cookie = NULL; while ((name = nvlist_next(ports_nvl, &type, &cookie)) != NULL) { if (type != NV_TYPE_NVLIST) continue; if (pci_vtcon_sock_add(sc, name, nvlist_get_nvlist(ports_nvl, name)) < 0) { EPRINTLN("cannot create port %s: %s", name, strerror(errno)); return (1); } } } return (0); } struct pci_devemu pci_de_vcon = { .pe_emu = "virtio-console", .pe_init = pci_vtcon_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vcon); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index d253b081d13a..9d3c8aa6d541 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -1,819 +1,819 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include /* IFNAMSIZ */ #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" #include "mevent.h" #include "virtio.h" #include "net_utils.h" #include "net_backends.h" #include "iov.h" #define VTNET_RINGSZ 1024 #define VTNET_MAXSEGS 256 #define VTNET_MAX_PKT_LEN (65536 + 64) #define VTNET_MIN_MTU ETHERMIN #define VTNET_MAX_MTU 65535 #define VTNET_S_HOSTCAPS \ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) /* * PCI config-space "registers" */ struct virtio_net_config { uint8_t mac[6]; uint16_t status; uint16_t max_virtqueue_pairs; uint16_t mtu; } __packed; /* * Queue definitions. */ #define VTNET_RXQ 0 #define VTNET_TXQ 1 #define VTNET_CTLQ 2 /* NB: not yet supported */ #define VTNET_MAXQ 3 /* * Debug printf */ static int pci_vtnet_debug; #define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params #define WPRINTF(params) PRINTLN params /* * Per-device softc */ struct pci_vtnet_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; pthread_mutex_t vsc_mtx; net_backend_t *vsc_be; bool features_negotiated; /* protected by rx_mtx */ int resetting; /* protected by tx_mtx */ uint64_t vsc_features; /* negotiated features */ - + pthread_mutex_t rx_mtx; int rx_merge; /* merged rx bufs in use */ pthread_t tx_tid; pthread_mutex_t tx_mtx; pthread_cond_t tx_cond; int tx_in_progress; size_t vhdrlen; size_t be_vhdrlen; struct virtio_net_config vsc_config; struct virtio_consts vsc_consts; }; static void pci_vtnet_reset(void *); /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ static int pci_vtnet_cfgread(void *, int, int, uint32_t *); static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); static void pci_vtnet_neg_features(void *, uint64_t); #ifdef BHYVE_SNAPSHOT static void pci_vtnet_pause(void *); static void pci_vtnet_resume(void *); static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *); #endif static struct virtio_consts vtnet_vi_consts = { "vtnet", /* our name */ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ sizeof(struct virtio_net_config), /* config reg size */ pci_vtnet_reset, /* reset */ NULL, /* device-wide qnotify -- not used */ pci_vtnet_cfgread, /* read PCI config */ pci_vtnet_cfgwrite, /* write PCI config */ pci_vtnet_neg_features, /* apply negotiated features */ VTNET_S_HOSTCAPS, /* our capabilities */ #ifdef BHYVE_SNAPSHOT pci_vtnet_pause, /* pause rx/tx threads */ pci_vtnet_resume, /* resume rx/tx threads */ pci_vtnet_snapshot, /* save / restore device state */ #endif }; static void pci_vtnet_reset(void *vsc) { struct pci_vtnet_softc *sc = vsc; DPRINTF(("vtnet: device reset requested !")); /* Acquire the RX lock to block RX processing. */ pthread_mutex_lock(&sc->rx_mtx); /* * Make sure receive operation is disabled at least until we * re-negotiate the features, since receive operation depends * on the value of sc->rx_merge and the header length, which * are both set in pci_vtnet_neg_features(). * Receive operation will be enabled again once the guest adds * the first receive buffers and kicks us. */ sc->features_negotiated = false; netbe_rx_disable(sc->vsc_be); /* Set sc->resetting and give a chance to the TX thread to stop. */ pthread_mutex_lock(&sc->tx_mtx); sc->resetting = 1; while (sc->tx_in_progress) { pthread_mutex_unlock(&sc->tx_mtx); usleep(10000); pthread_mutex_lock(&sc->tx_mtx); } /* * Now reset rings, MSI-X vectors, and negotiated capabilities. * Do that with the TX lock held, since we need to reset * sc->resetting. */ vi_reset_dev(&sc->vsc_vs); sc->resetting = 0; pthread_mutex_unlock(&sc->tx_mtx); pthread_mutex_unlock(&sc->rx_mtx); } static __inline struct iovec * iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen) { struct iovec *riov; if (iov[0].iov_len < hlen) { /* * Not enough header space in the first fragment. * That's not ok for us. */ return NULL; } iov[0].iov_len -= hlen; if (iov[0].iov_len == 0) { *iovcnt -= 1; if (*iovcnt == 0) { /* * Only space for the header. That's not * enough for us. */ return NULL; } riov = &iov[1]; } else { iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen); riov = &iov[0]; } return (riov); } struct virtio_mrg_rxbuf_info { uint16_t idx; uint16_t pad; uint32_t len; }; static void pci_vtnet_rx(struct pci_vtnet_softc *sc) { int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen; struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; struct iovec iov[VTNET_MAXSEGS + 1]; struct vqueue_info *vq; struct vi_req req; vq = &sc->vsc_queues[VTNET_RXQ]; /* Features must be negotiated */ if (!sc->features_negotiated) { return; } for (;;) { struct virtio_net_rxhdr *hdr; uint32_t riov_bytes; struct iovec *riov; uint32_t ulen; int riov_len; int n_chains; ssize_t rlen; ssize_t plen; plen = netbe_peek_recvlen(sc->vsc_be); if (plen <= 0) { /* * No more packets (plen == 0), or backend errored * (plen < 0). Interrupt if needed and stop. */ vq_endchains(vq, /*used_all_avail=*/0); return; } plen += prepend_hdr_len; /* * Get a descriptor chain to store the next ingress * packet. In case of mergeable rx buffers, get as * many chains as necessary in order to make room * for plen bytes. */ riov_bytes = 0; riov_len = 0; riov = iov; n_chains = 0; do { int n = vq_getchain(vq, riov, VTNET_MAXSEGS - riov_len, &req); info[n_chains].idx = req.idx; if (n == 0) { /* * No rx buffers. Enable RX kicks and double * check. */ vq_kick_enable(vq); if (!vq_has_descs(vq)) { /* * Still no buffers. Return the unused * chains (if any), interrupt if needed * (including for NOTIFY_ON_EMPTY), and * disable the backend until the next * kick. */ vq_retchains(vq, n_chains); vq_endchains(vq, /*used_all_avail=*/1); netbe_rx_disable(sc->vsc_be); return; } /* More rx buffers found, so keep going. */ vq_kick_disable(vq); continue; } assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS); riov_len += n; if (!sc->rx_merge) { n_chains = 1; break; } info[n_chains].len = (uint32_t)count_iov(riov, n); riov_bytes += info[n_chains].len; riov += n; n_chains++; } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS); riov = iov; hdr = riov[0].iov_base; if (prepend_hdr_len > 0) { /* * The frontend uses a virtio-net header, but the * backend does not. We need to prepend a zeroed * header. */ riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len); if (riov == NULL) { /* * The first collected chain is nonsensical, * as it is not even enough to store the * virtio-net header. Just drop it. */ vq_relchain(vq, info[0].idx, 0); vq_retchains(vq, n_chains - 1); continue; } memset(hdr, 0, prepend_hdr_len); } rlen = netbe_recv(sc->vsc_be, riov, riov_len); if (rlen != plen - prepend_hdr_len) { /* * If this happens it means there is something * wrong with the backend (e.g., some other * process is stealing our packets). */ WPRINTF(("netbe_recv: expected %zd bytes, " "got %zd", plen - prepend_hdr_len, rlen)); vq_retchains(vq, n_chains); continue; } ulen = (uint32_t)plen; /* * Publish the used buffers to the guest, reporting the * number of bytes that we wrote. */ if (!sc->rx_merge) { vq_relchain(vq, info[0].idx, ulen); } else { uint32_t iolen; int i = 0; do { iolen = info[i].len; if (iolen > ulen) { iolen = ulen; } vq_relchain_prepare(vq, info[i].idx, iolen); ulen -= iolen; i++; } while (ulen > 0); hdr->vrh_bufs = i; vq_relchain_publish(vq); assert(i == n_chains); } } } /* * Called when there is read activity on the backend file descriptor. * Each buffer posted by the guest is assumed to be able to contain * an entire ethernet frame + rx header. */ static void pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) { struct pci_vtnet_softc *sc = param; pthread_mutex_lock(&sc->rx_mtx); pci_vtnet_rx(sc); pthread_mutex_unlock(&sc->rx_mtx); } /* Called on RX kick. */ static void pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * A qnotify means that the rx process can now begin. * Enable RX only if features are negotiated. */ pthread_mutex_lock(&sc->rx_mtx); if (!sc->features_negotiated) { pthread_mutex_unlock(&sc->rx_mtx); return; } vq_kick_disable(vq); netbe_rx_enable(sc->vsc_be); pthread_mutex_unlock(&sc->rx_mtx); } /* TX virtqueue processing, called by the TX thread. */ static void pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) { struct iovec iov[VTNET_MAXSEGS + 1]; struct iovec *siov = iov; struct vi_req req; ssize_t len; int n; /* * Obtain chain of descriptors. The first descriptor also * contains the virtio-net header. */ n = vq_getchain(vq, iov, VTNET_MAXSEGS, &req); assert(n >= 1 && n <= VTNET_MAXSEGS); if (sc->vhdrlen != sc->be_vhdrlen) { /* * The frontend uses a virtio-net header, but the backend * does not. We simply strip the header and ignore it, as * it should be zero-filled. */ siov = iov_trim_hdr(siov, &n, sc->vhdrlen); } if (siov == NULL) { /* The chain is nonsensical. Just drop it. */ len = 0; } else { len = netbe_send(sc->vsc_be, siov, n); if (len < 0) { /* * If send failed, report that 0 bytes * were read. */ len = 0; } } /* * Return the processed chain to the guest, reporting * the number of bytes that we read. */ vq_relchain(vq, req.idx, len); } /* Called on TX kick. */ static void pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * Any ring entries to process? */ if (!vq_has_descs(vq)) return; /* Signal the tx thread for processing */ pthread_mutex_lock(&sc->tx_mtx); vq_kick_disable(vq); if (sc->tx_in_progress == 0) pthread_cond_signal(&sc->tx_cond); pthread_mutex_unlock(&sc->tx_mtx); } /* * Thread which will handle processing of TX desc */ static void * pci_vtnet_tx_thread(void *param) { struct pci_vtnet_softc *sc = param; struct vqueue_info *vq; int error; vq = &sc->vsc_queues[VTNET_TXQ]; /* * Let us wait till the tx queue pointers get initialised & * first tx signaled */ pthread_mutex_lock(&sc->tx_mtx); error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); for (;;) { /* note - tx mutex is locked here */ while (sc->resetting || !vq_has_descs(vq)) { vq_kick_enable(vq); if (!sc->resetting && vq_has_descs(vq)) break; sc->tx_in_progress = 0; error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); } vq_kick_disable(vq); sc->tx_in_progress = 1; pthread_mutex_unlock(&sc->tx_mtx); do { /* * Run through entries, placing them into * iovecs and sending when an end-of-packet * is found */ pci_vtnet_proctx(sc, vq); } while (vq_has_descs(vq)); /* * Generate an interrupt if needed. */ vq_endchains(vq, /*used_all_avail=*/1); pthread_mutex_lock(&sc->tx_mtx); } } #ifdef notyet static void pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) { DPRINTF(("vtnet: control qnotify!")); } #endif static int pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { struct pci_vtnet_softc *sc; const char *value; char tname[MAXCOMLEN + 1]; unsigned long mtu = ETHERMTU; int err; /* * Allocate data structures for further virtio initializations. * sc also contains a copy of vtnet_vi_consts, since capabilities * change depending on the backend. */ sc = calloc(1, sizeof(struct pci_vtnet_softc)); sc->vsc_consts = vtnet_vi_consts; pthread_mutex_init(&sc->vsc_mtx, NULL); sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; #ifdef notyet sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; #endif value = get_config_value_node(nvl, "mac"); if (value != NULL) { err = net_parsemac(value, sc->vsc_config.mac); if (err) { free(sc); return (err); } } else net_genmac(pi, sc->vsc_config.mac); value = get_config_value_node(nvl, "mtu"); if (value != NULL) { err = net_parsemtu(value, &mtu); if (err) { free(sc); return (err); } if (mtu < VTNET_MIN_MTU || mtu > VTNET_MAX_MTU) { err = EINVAL; errno = EINVAL; free(sc); return (err); } sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MTU; } sc->vsc_config.mtu = mtu; /* Permit interfaces without a configured backend. */ if (get_config_value_node(nvl, "backend") != NULL) { err = netbe_init(&sc->vsc_be, nvl, pci_vtnet_rx_callback, sc); if (err) { free(sc); return (err); } } sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF | netbe_get_cap(sc->vsc_be); - /* + /* * Since we do not actually support multiqueue, - * set the maximum virtqueue pairs to 1. + * set the maximum virtqueue pairs to 1. */ sc->vsc_config.max_virtqueue_pairs = 1; /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); /* Link is always up. */ sc->vsc_config.status = 1; - + vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); sc->vsc_vs.vs_mtx = &sc->vsc_mtx; /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { free(sc); return (1); } /* use BAR 0 to map config regs in IO space */ vi_set_io_bar(&sc->vsc_vs, 0); sc->resetting = 0; sc->rx_merge = 0; sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; - pthread_mutex_init(&sc->rx_mtx, NULL); + pthread_mutex_init(&sc->rx_mtx, NULL); - /* + /* * Initialize tx semaphore & spawn TX processing thread. * As of now, only one thread for TX desc processing is - * spawned. + * spawned. */ sc->tx_in_progress = 0; pthread_mutex_init(&sc->tx_mtx, NULL); pthread_cond_init(&sc->tx_cond, NULL); pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, pi->pi_func); pthread_set_name_np(sc->tx_tid, tname); return (0); } static int pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) { struct pci_vtnet_softc *sc = vsc; void *ptr; if (offset < (int)sizeof(sc->vsc_config.mac)) { assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); /* * The driver is allowed to change the MAC address */ ptr = &sc->vsc_config.mac[offset]; memcpy(ptr, &value, size); } else { /* silently ignore other writes */ DPRINTF(("vtnet: write to readonly reg %d", offset)); } return (0); } static int pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtnet_softc *sc = vsc; void *ptr; ptr = (uint8_t *)&sc->vsc_config + offset; memcpy(retval, ptr, size); return (0); } static void pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtnet_softc *sc = vsc; sc->vsc_features = negotiated_features; if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) { sc->vhdrlen = sizeof(struct virtio_net_rxhdr); sc->rx_merge = 1; } else { /* * Without mergeable rx buffers, virtio-net header is 2 * bytes shorter than sizeof(struct virtio_net_rxhdr). */ sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; sc->rx_merge = 0; } /* Tell the backend to enable some capabilities it has advertised. */ netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen); sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be); assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen); pthread_mutex_lock(&sc->rx_mtx); sc->features_negotiated = true; pthread_mutex_unlock(&sc->rx_mtx); } #ifdef BHYVE_SNAPSHOT static void pci_vtnet_pause(void *vsc) { struct pci_vtnet_softc *sc = vsc; DPRINTF(("vtnet: device pause requested !\n")); /* Acquire the RX lock to block RX processing. */ pthread_mutex_lock(&sc->rx_mtx); /* Wait for the transmit thread to finish its processing. */ pthread_mutex_lock(&sc->tx_mtx); while (sc->tx_in_progress) { pthread_mutex_unlock(&sc->tx_mtx); usleep(10000); pthread_mutex_lock(&sc->tx_mtx); } } static void pci_vtnet_resume(void *vsc) { struct pci_vtnet_softc *sc = vsc; DPRINTF(("vtnet: device resume requested !\n")); pthread_mutex_unlock(&sc->tx_mtx); /* The RX lock should have been acquired in vtnet_pause. */ pthread_mutex_unlock(&sc->rx_mtx); } static int pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta) { int ret; struct pci_vtnet_softc *sc = vsc; DPRINTF(("vtnet: device snapshot requested !\n")); /* * Queues and consts should have been saved by the more generic * vi_pci_snapshot function. We need to save only our features and * config. */ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done); /* Force reapply negociated features at restore time */ if (meta->op == VM_SNAPSHOT_RESTORE) { pci_vtnet_neg_features(sc, sc->vsc_features); netbe_rx_enable(sc->vsc_be); } SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->vhdrlen, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->be_vhdrlen, meta, ret, done); done: return (ret); } #endif static struct pci_devemu pci_de_vnet = { .pe_emu = "virtio-net", .pe_init = pci_vtnet_init, .pe_legacy_config = netbe_legacy_config, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read, #ifdef BHYVE_SNAPSHOT .pe_snapshot = vi_pci_snapshot, .pe_pause = vi_pci_pause, .pe_resume = vi_pci_resume, #endif }; PCI_EMUL_SET(pci_de_vnet); diff --git a/usr.sbin/bhyve/pci_virtio_scsi.c b/usr.sbin/bhyve/pci_virtio_scsi.c index f4a701c0e25e..b080749f7931 100644 --- a/usr.sbin/bhyve/pci_virtio_scsi.c +++ b/usr.sbin/bhyve/pci_virtio_scsi.c @@ -1,743 +1,743 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Jakub Klama . * Copyright (c) 2018 Marcelo Araujo . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "iov.h" #define VTSCSI_RINGSZ 64 #define VTSCSI_REQUESTQ 1 #define VTSCSI_THR_PER_Q 16 #define VTSCSI_MAXQ (VTSCSI_REQUESTQ + 2) #define VTSCSI_MAXSEG 64 #define VTSCSI_IN_HEADER_LEN(_sc) \ (sizeof(struct pci_vtscsi_req_cmd_rd) + _sc->vss_config.cdb_size) #define VTSCSI_OUT_HEADER_LEN(_sc) \ (sizeof(struct pci_vtscsi_req_cmd_wr) + _sc->vss_config.sense_size) #define VIRTIO_SCSI_MAX_CHANNEL 0 #define VIRTIO_SCSI_MAX_TARGET 0 #define VIRTIO_SCSI_MAX_LUN 16383 #define VIRTIO_SCSI_F_INOUT (1 << 0) #define VIRTIO_SCSI_F_HOTPLUG (1 << 1) #define VIRTIO_SCSI_F_CHANGE (1 << 2) static int pci_vtscsi_debug = 0; #define DPRINTF(params) if (pci_vtscsi_debug) PRINTLN params #define WPRINTF(params) PRINTLN params struct pci_vtscsi_config { uint32_t num_queues; uint32_t seg_max; uint32_t max_sectors; uint32_t cmd_per_lun; uint32_t event_info_size; uint32_t sense_size; uint32_t cdb_size; uint16_t max_channel; uint16_t max_target; uint32_t max_lun; } __attribute__((packed)); struct pci_vtscsi_queue { struct pci_vtscsi_softc * vsq_sc; struct vqueue_info * vsq_vq; pthread_mutex_t vsq_mtx; pthread_mutex_t vsq_qmtx; pthread_cond_t vsq_cv; STAILQ_HEAD(, pci_vtscsi_request) vsq_requests; LIST_HEAD(, pci_vtscsi_worker) vsq_workers; }; struct pci_vtscsi_worker { struct pci_vtscsi_queue * vsw_queue; pthread_t vsw_thread; bool vsw_exiting; LIST_ENTRY(pci_vtscsi_worker) vsw_link; }; struct pci_vtscsi_request { struct pci_vtscsi_queue * vsr_queue; struct iovec vsr_iov_in[VTSCSI_MAXSEG]; int vsr_niov_in; struct iovec vsr_iov_out[VTSCSI_MAXSEG]; int vsr_niov_out; uint32_t vsr_idx; STAILQ_ENTRY(pci_vtscsi_request) vsr_link; }; /* * Per-device softc */ struct pci_vtscsi_softc { struct virtio_softc vss_vs; struct vqueue_info vss_vq[VTSCSI_MAXQ]; struct pci_vtscsi_queue vss_queues[VTSCSI_REQUESTQ]; pthread_mutex_t vss_mtx; int vss_iid; int vss_ctl_fd; uint32_t vss_features; struct pci_vtscsi_config vss_config; }; #define VIRTIO_SCSI_T_TMF 0 #define VIRTIO_SCSI_T_TMF_ABORT_TASK 0 #define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1 #define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2 #define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3 #define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4 #define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5 #define VIRTIO_SCSI_T_TMF_QUERY_TASK 6 #define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7 /* command-specific response values */ #define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 #define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10 #define VIRTIO_SCSI_S_FUNCTION_REJECTED 11 struct pci_vtscsi_ctrl_tmf { uint32_t type; uint32_t subtype; uint8_t lun[8]; uint64_t id; uint8_t response; } __attribute__((packed)); #define VIRTIO_SCSI_T_AN_QUERY 1 #define VIRTIO_SCSI_EVT_ASYNC_OPERATIONAL_CHANGE 2 #define VIRTIO_SCSI_EVT_ASYNC_POWER_MGMT 4 #define VIRTIO_SCSI_EVT_ASYNC_EXTERNAL_REQUEST 8 #define VIRTIO_SCSI_EVT_ASYNC_MEDIA_CHANGE 16 #define VIRTIO_SCSI_EVT_ASYNC_MULTI_HOST 32 #define VIRTIO_SCSI_EVT_ASYNC_DEVICE_BUSY 64 struct pci_vtscsi_ctrl_an { uint32_t type; uint8_t lun[8]; uint32_t event_requested; uint32_t event_actual; uint8_t response; } __attribute__((packed)); /* command-specific response values */ #define VIRTIO_SCSI_S_OK 0 #define VIRTIO_SCSI_S_OVERRUN 1 #define VIRTIO_SCSI_S_ABORTED 2 #define VIRTIO_SCSI_S_BAD_TARGET 3 #define VIRTIO_SCSI_S_RESET 4 #define VIRTIO_SCSI_S_BUSY 5 #define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6 #define VIRTIO_SCSI_S_TARGET_FAILURE 7 #define VIRTIO_SCSI_S_NEXUS_FAILURE 8 #define VIRTIO_SCSI_S_FAILURE 9 #define VIRTIO_SCSI_S_INCORRECT_LUN 12 /* task_attr */ #define VIRTIO_SCSI_S_SIMPLE 0 #define VIRTIO_SCSI_S_ORDERED 1 #define VIRTIO_SCSI_S_HEAD 2 #define VIRTIO_SCSI_S_ACA 3 struct pci_vtscsi_event { uint32_t event; uint8_t lun[8]; uint32_t reason; } __attribute__((packed)); struct pci_vtscsi_req_cmd_rd { uint8_t lun[8]; uint64_t id; uint8_t task_attr; uint8_t prio; uint8_t crn; uint8_t cdb[]; } __attribute__((packed)); struct pci_vtscsi_req_cmd_wr { uint32_t sense_len; uint32_t residual; uint16_t status_qualifier; uint8_t status; uint8_t response; uint8_t sense[]; } __attribute__((packed)); static void *pci_vtscsi_proc(void *); static void pci_vtscsi_reset(void *); static void pci_vtscsi_neg_features(void *, uint64_t); static int pci_vtscsi_cfgread(void *, int, int, uint32_t *); static int pci_vtscsi_cfgwrite(void *, int, int, uint32_t); static inline int pci_vtscsi_get_lun(uint8_t *); static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *, void *, size_t); static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_tmf *); static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *, struct pci_vtscsi_ctrl_an *); static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *, struct iovec *, int, struct iovec *, int); static void pci_vtscsi_controlq_notify(void *, struct vqueue_info *); static void pci_vtscsi_eventq_notify(void *, struct vqueue_info *); static void pci_vtscsi_requestq_notify(void *, struct vqueue_info *); static int pci_vtscsi_init_queue(struct pci_vtscsi_softc *, struct pci_vtscsi_queue *, int); static int pci_vtscsi_init(struct vmctx *, struct pci_devinst *, nvlist_t *); static struct virtio_consts vtscsi_vi_consts = { "vtscsi", /* our name */ VTSCSI_MAXQ, /* we support 2+n virtqueues */ sizeof(struct pci_vtscsi_config), /* config reg size */ pci_vtscsi_reset, /* reset */ NULL, /* device-wide qnotify */ pci_vtscsi_cfgread, /* read virtio config */ pci_vtscsi_cfgwrite, /* write virtio config */ pci_vtscsi_neg_features, /* apply negotiated features */ 0, /* our capabilities */ }; static void * pci_vtscsi_proc(void *arg) { struct pci_vtscsi_worker *worker = (struct pci_vtscsi_worker *)arg; struct pci_vtscsi_queue *q = worker->vsw_queue; struct pci_vtscsi_request *req; int iolen; for (;;) { pthread_mutex_lock(&q->vsq_mtx); while (STAILQ_EMPTY(&q->vsq_requests) && !worker->vsw_exiting) pthread_cond_wait(&q->vsq_cv, &q->vsq_mtx); if (worker->vsw_exiting) break; req = STAILQ_FIRST(&q->vsq_requests); STAILQ_REMOVE_HEAD(&q->vsq_requests, vsr_link); pthread_mutex_unlock(&q->vsq_mtx); iolen = pci_vtscsi_request_handle(q, req->vsr_iov_in, req->vsr_niov_in, req->vsr_iov_out, req->vsr_niov_out); pthread_mutex_lock(&q->vsq_qmtx); vq_relchain(q->vsq_vq, req->vsr_idx, iolen); vq_endchains(q->vsq_vq, 0); pthread_mutex_unlock(&q->vsq_qmtx); DPRINTF(("virtio-scsi: request completed", req->vsr_idx)); free(req); } pthread_mutex_unlock(&q->vsq_mtx); return (NULL); } static void pci_vtscsi_reset(void *vsc) { struct pci_vtscsi_softc *sc; sc = vsc; DPRINTF(("vtscsi: device reset requested")); vi_reset_dev(&sc->vss_vs); /* initialize config structure */ sc->vss_config = (struct pci_vtscsi_config){ .num_queues = VTSCSI_REQUESTQ, /* Leave room for the request and the response. */ .seg_max = VTSCSI_MAXSEG - 2, .max_sectors = 2, .cmd_per_lun = 1, .event_info_size = sizeof(struct pci_vtscsi_event), .sense_size = 96, .cdb_size = 32, .max_channel = VIRTIO_SCSI_MAX_CHANNEL, .max_target = VIRTIO_SCSI_MAX_TARGET, .max_lun = VIRTIO_SCSI_MAX_LUN }; } static void pci_vtscsi_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtscsi_softc *sc = vsc; sc->vss_features = negotiated_features; } static int pci_vtscsi_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtscsi_softc *sc = vsc; void *ptr; ptr = (uint8_t *)&sc->vss_config + offset; memcpy(retval, ptr, size); return (0); } static int pci_vtscsi_cfgwrite(void *vsc, int offset, int size, uint32_t val) { return (0); } static inline int pci_vtscsi_get_lun(uint8_t *lun) { return (((lun[2] << 8) | lun[3]) & 0x3fff); } static int pci_vtscsi_control_handle(struct pci_vtscsi_softc *sc, void *buf, size_t bufsize) { struct pci_vtscsi_ctrl_tmf *tmf; struct pci_vtscsi_ctrl_an *an; uint32_t type; type = *(uint32_t *)buf; if (type == VIRTIO_SCSI_T_TMF) { tmf = (struct pci_vtscsi_ctrl_tmf *)buf; return (pci_vtscsi_tmf_handle(sc, tmf)); } if (type == VIRTIO_SCSI_T_AN_QUERY) { an = (struct pci_vtscsi_ctrl_an *)buf; return (pci_vtscsi_an_handle(sc, an)); } return (0); } static int pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_tmf *tmf) { union ctl_io *io; int err; io = ctl_scsi_alloc_io(sc->vss_iid); ctl_scsi_zero_io(io); io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun); io->taskio.tag_type = CTL_TAG_SIMPLE; io->taskio.tag_num = (uint32_t)tmf->id; switch (tmf->subtype) { case VIRTIO_SCSI_T_TMF_ABORT_TASK: io->taskio.task_action = CTL_TASK_ABORT_TASK; break; case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: io->taskio.task_action = CTL_TASK_ABORT_TASK_SET; break; case VIRTIO_SCSI_T_TMF_CLEAR_ACA: io->taskio.task_action = CTL_TASK_CLEAR_ACA; break; case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET; break; case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; break; case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: io->taskio.task_action = CTL_TASK_LUN_RESET; break; case VIRTIO_SCSI_T_TMF_QUERY_TASK: io->taskio.task_action = CTL_TASK_QUERY_TASK; break; case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET: io->taskio.task_action = CTL_TASK_QUERY_TASK_SET; break; } if (pci_vtscsi_debug) { struct sbuf *sb = sbuf_new_auto(); ctl_io_sbuf(io, sb); sbuf_finish(sb); DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb))); sbuf_delete(sb); } err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno))); tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); return (1); } static int pci_vtscsi_an_handle(struct pci_vtscsi_softc *sc, struct pci_vtscsi_ctrl_an *an) { return (0); } static int pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, int niov_in, struct iovec *iov_out, int niov_out) { struct pci_vtscsi_softc *sc = q->vsq_sc; struct pci_vtscsi_req_cmd_rd *cmd_rd = NULL; struct pci_vtscsi_req_cmd_wr *cmd_wr; struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; union ctl_io *io; int data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err, nxferred; seek_iov(iov_in, niov_in, data_iov_in, &data_niov_in, VTSCSI_IN_HEADER_LEN(sc)); seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, VTSCSI_OUT_HEADER_LEN(sc)); truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc)); io = ctl_scsi_alloc_io(sc->vss_iid); ctl_scsi_zero_io(io); io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun); io->io_hdr.io_type = CTL_IO_SCSI; if (data_niov_in > 0) { ext_data_ptr = (void *)data_iov_in; ext_sg_entries = data_niov_in; ext_data_len = count_iov(data_iov_in, data_niov_in); io->io_hdr.flags |= CTL_FLAG_DATA_OUT; } else if (data_niov_out > 0) { ext_data_ptr = (void *)data_iov_out; ext_sg_entries = data_niov_out; ext_data_len = count_iov(data_iov_out, data_niov_out); io->io_hdr.flags |= CTL_FLAG_DATA_IN; } io->scsiio.sense_len = sc->vss_config.sense_size; io->scsiio.tag_num = (uint32_t)cmd_rd->id; switch (cmd_rd->task_attr) { case VIRTIO_SCSI_S_ORDERED: io->scsiio.tag_type = CTL_TAG_ORDERED; break; case VIRTIO_SCSI_S_HEAD: io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; break; case VIRTIO_SCSI_S_ACA: io->scsiio.tag_type = CTL_TAG_ACA; break; case VIRTIO_SCSI_S_SIMPLE: default: io->scsiio.tag_type = CTL_TAG_SIMPLE; break; } io->scsiio.ext_sg_entries = ext_sg_entries; io->scsiio.ext_data_ptr = ext_data_ptr; io->scsiio.ext_data_len = ext_data_len; io->scsiio.ext_data_filled = 0; io->scsiio.cdb_len = sc->vss_config.cdb_size; memcpy(io->scsiio.cdb, cmd_rd->cdb, sc->vss_config.cdb_size); if (pci_vtscsi_debug) { struct sbuf *sb = sbuf_new_auto(); ctl_io_sbuf(io, sb); sbuf_finish(sb); DPRINTF(("pci_virtio_scsi: %s", sbuf_data(sb))); sbuf_delete(sb); } err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) { WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno))); cmd_wr->response = VIRTIO_SCSI_S_FAILURE; } else { cmd_wr->sense_len = MIN(io->scsiio.sense_len, sc->vss_config.sense_size); cmd_wr->residual = io->scsiio.residual; cmd_wr->status = io->scsiio.scsi_status; cmd_wr->response = VIRTIO_SCSI_S_OK; memcpy(&cmd_wr->sense, &io->scsiio.sense_data, cmd_wr->sense_len); } buf_to_iov(cmd_wr, VTSCSI_OUT_HEADER_LEN(sc), iov_out, niov_out, 0); nxferred = VTSCSI_OUT_HEADER_LEN(sc) + io->scsiio.ext_data_filled; free(cmd_rd); free(cmd_wr); ctl_scsi_free_io(io); return (nxferred); } static void pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) { struct pci_vtscsi_softc *sc; struct iovec iov[VTSCSI_MAXSEG]; struct vi_req req; void *buf = NULL; size_t bufsize; int iolen, n; sc = vsc; while (vq_has_descs(vq)) { n = vq_getchain(vq, iov, VTSCSI_MAXSEG, &req); assert(n >= 1 && n <= VTSCSI_MAXSEG); bufsize = iov_to_buf(iov, n, &buf); iolen = pci_vtscsi_control_handle(sc, buf, bufsize); buf_to_iov(buf + bufsize - iolen, iolen, iov, n, bufsize - iolen); /* * Release this chain and handle more */ vq_relchain(vq, req.idx, iolen); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ free(buf); } static void pci_vtscsi_eventq_notify(void *vsc, struct vqueue_info *vq) { vq_kick_disable(vq); } static void pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq) { struct pci_vtscsi_softc *sc; struct pci_vtscsi_queue *q; struct pci_vtscsi_request *req; struct iovec iov[VTSCSI_MAXSEG]; struct vi_req vireq; int n; sc = vsc; q = &sc->vss_queues[vq->vq_num - 2]; while (vq_has_descs(vq)) { n = vq_getchain(vq, iov, VTSCSI_MAXSEG, &vireq); assert(n >= 1 && n <= VTSCSI_MAXSEG); req = calloc(1, sizeof(struct pci_vtscsi_request)); req->vsr_idx = vireq.idx; req->vsr_queue = q; req->vsr_niov_in = vireq.readable; req->vsr_niov_out = vireq.writable; memcpy(req->vsr_iov_in, iov, req->vsr_niov_in * sizeof(struct iovec)); memcpy(req->vsr_iov_out, iov + vireq.readable, req->vsr_niov_out * sizeof(struct iovec)); pthread_mutex_lock(&q->vsq_mtx); STAILQ_INSERT_TAIL(&q->vsq_requests, req, vsr_link); pthread_cond_signal(&q->vsq_cv); pthread_mutex_unlock(&q->vsq_mtx); DPRINTF(("virtio-scsi: request enqueued", vireq.idx)); } } static int -pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, +pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, struct pci_vtscsi_queue *queue, int num) { struct pci_vtscsi_worker *worker; char tname[MAXCOMLEN + 1]; int i; queue->vsq_sc = sc; queue->vsq_vq = &sc->vss_vq[num + 2]; pthread_mutex_init(&queue->vsq_mtx, NULL); pthread_mutex_init(&queue->vsq_qmtx, NULL); pthread_cond_init(&queue->vsq_cv, NULL); STAILQ_INIT(&queue->vsq_requests); LIST_INIT(&queue->vsq_workers); for (i = 0; i < VTSCSI_THR_PER_Q; i++) { worker = calloc(1, sizeof(struct pci_vtscsi_worker)); worker->vsw_queue = queue; pthread_create(&worker->vsw_thread, NULL, &pci_vtscsi_proc, (void *)worker); snprintf(tname, sizeof(tname), "vtscsi:%d-%d", num, i); pthread_set_name_np(worker->vsw_thread, tname); LIST_INSERT_HEAD(&queue->vsq_workers, worker, vsw_link); } return (0); } static int pci_vtscsi_legacy_config(nvlist_t *nvl, const char *opts) { char *cp, *devname; if (opts == NULL) return (0); cp = strchr(opts, ','); if (cp == NULL) { set_config_value_node(nvl, "dev", opts); return (0); } devname = strndup(opts, cp - opts); set_config_value_node(nvl, "dev", devname); free(devname); return (pci_parse_legacy_config(nvl, cp + 1)); } static int pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { struct pci_vtscsi_softc *sc; const char *devname, *value; int i; sc = calloc(1, sizeof(struct pci_vtscsi_softc)); value = get_config_value_node(nvl, "iid"); if (value != NULL) sc->vss_iid = strtoul(value, NULL, 10); devname = get_config_value_node(nvl, "dev"); if (devname == NULL) devname = "/dev/cam/ctl"; sc->vss_ctl_fd = open(devname, O_RDWR); if (sc->vss_ctl_fd < 0) { WPRINTF(("cannot open %s: %s", devname, strerror(errno))); free(sc); return (1); } vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq); sc->vss_vs.vs_mtx = &sc->vss_mtx; /* controlq */ sc->vss_vq[0].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[0].vq_notify = pci_vtscsi_controlq_notify; /* eventq */ sc->vss_vq[1].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[1].vq_notify = pci_vtscsi_eventq_notify; /* request queues */ for (i = 2; i < VTSCSI_MAXQ; i++) { sc->vss_vq[i].vq_qsize = VTSCSI_RINGSZ; sc->vss_vq[i].vq_notify = pci_vtscsi_requestq_notify; pci_vtscsi_init_queue(sc, &sc->vss_queues[i - 2], i - 2); } /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_SCSI); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix())) return (1); vi_set_io_bar(&sc->vss_vs, 0); return (0); } struct pci_devemu pci_de_vscsi = { .pe_emu = "virtio-scsi", .pe_init = pci_vtscsi_init, .pe_legacy_config = pci_vtscsi_legacy_config, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vscsi); diff --git a/usr.sbin/bhyve/pci_xhci.c b/usr.sbin/bhyve/pci_xhci.c index b71d66337048..c3dd5d4c3601 100644 --- a/usr.sbin/bhyve/pci_xhci.c +++ b/usr.sbin/bhyve/pci_xhci.c @@ -1,3193 +1,3193 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Leon Dang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* XHCI options: -s ,xhci,{devices} devices: tablet USB tablet mouse */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "config.h" #include "debug.h" #include "pci_emul.h" #include "pci_xhci.h" #include "usb_emul.h" static int xhci_debug = 0; #define DPRINTF(params) if (xhci_debug) PRINTLN params #define WPRINTF(params) PRINTLN params #define XHCI_NAME "xhci" #define XHCI_MAX_DEVS 8 /* 4 USB3 + 4 USB2 devs */ #define XHCI_MAX_SLOTS 64 /* min allowed by Windows drivers */ /* * XHCI data structures can be up to 64k, but limit paddr_guest2host mapping * to 4k to avoid going over the guest physical memory barrier. */ #define XHCI_PADDR_SZ 4096 /* paddr_guest2host max size */ #define XHCI_ERST_MAX 0 /* max 2^entries event ring seg tbl */ #define XHCI_CAPLEN (4*8) /* offset of op register space */ #define XHCI_HCCPRAMS2 0x1C /* offset of HCCPARAMS2 register */ #define XHCI_PORTREGS_START 0x400 #define XHCI_DOORBELL_MAX 256 #define XHCI_STREAMS_MAX 1 /* 4-15 in XHCI spec */ /* caplength and hci-version registers */ #define XHCI_SET_CAPLEN(x) ((x) & 0xFF) #define XHCI_SET_HCIVERSION(x) (((x) & 0xFFFF) << 16) #define XHCI_GET_HCIVERSION(x) (((x) >> 16) & 0xFFFF) /* hcsparams1 register */ #define XHCI_SET_HCSP1_MAXSLOTS(x) ((x) & 0xFF) #define XHCI_SET_HCSP1_MAXINTR(x) (((x) & 0x7FF) << 8) #define XHCI_SET_HCSP1_MAXPORTS(x) (((x) & 0xFF) << 24) /* hcsparams2 register */ #define XHCI_SET_HCSP2_IST(x) ((x) & 0x0F) #define XHCI_SET_HCSP2_ERSTMAX(x) (((x) & 0x0F) << 4) #define XHCI_SET_HCSP2_MAXSCRATCH_HI(x) (((x) & 0x1F) << 21) #define XHCI_SET_HCSP2_MAXSCRATCH_LO(x) (((x) & 0x1F) << 27) /* hcsparams3 register */ #define XHCI_SET_HCSP3_U1EXITLATENCY(x) ((x) & 0xFF) #define XHCI_SET_HCSP3_U2EXITLATENCY(x) (((x) & 0xFFFF) << 16) /* hccparams1 register */ #define XHCI_SET_HCCP1_AC64(x) ((x) & 0x01) #define XHCI_SET_HCCP1_BNC(x) (((x) & 0x01) << 1) #define XHCI_SET_HCCP1_CSZ(x) (((x) & 0x01) << 2) #define XHCI_SET_HCCP1_PPC(x) (((x) & 0x01) << 3) #define XHCI_SET_HCCP1_PIND(x) (((x) & 0x01) << 4) #define XHCI_SET_HCCP1_LHRC(x) (((x) & 0x01) << 5) #define XHCI_SET_HCCP1_LTC(x) (((x) & 0x01) << 6) #define XHCI_SET_HCCP1_NSS(x) (((x) & 0x01) << 7) #define XHCI_SET_HCCP1_PAE(x) (((x) & 0x01) << 8) #define XHCI_SET_HCCP1_SPC(x) (((x) & 0x01) << 9) #define XHCI_SET_HCCP1_SEC(x) (((x) & 0x01) << 10) #define XHCI_SET_HCCP1_CFC(x) (((x) & 0x01) << 11) #define XHCI_SET_HCCP1_MAXPSA(x) (((x) & 0x0F) << 12) #define XHCI_SET_HCCP1_XECP(x) (((x) & 0xFFFF) << 16) /* hccparams2 register */ #define XHCI_SET_HCCP2_U3C(x) ((x) & 0x01) #define XHCI_SET_HCCP2_CMC(x) (((x) & 0x01) << 1) #define XHCI_SET_HCCP2_FSC(x) (((x) & 0x01) << 2) #define XHCI_SET_HCCP2_CTC(x) (((x) & 0x01) << 3) #define XHCI_SET_HCCP2_LEC(x) (((x) & 0x01) << 4) #define XHCI_SET_HCCP2_CIC(x) (((x) & 0x01) << 5) /* other registers */ #define XHCI_SET_DOORBELL(x) ((x) & ~0x03) #define XHCI_SET_RTSOFFSET(x) ((x) & ~0x0F) /* register masks */ #define XHCI_PS_PLS_MASK (0xF << 5) /* port link state */ #define XHCI_PS_SPEED_MASK (0xF << 10) /* port speed */ #define XHCI_PS_PIC_MASK (0x3 << 14) /* port indicator */ /* port register set */ #define XHCI_PORTREGS_BASE 0x400 /* base offset */ #define XHCI_PORTREGS_PORT0 0x3F0 #define XHCI_PORTREGS_SETSZ 0x10 /* size of a set */ #define MASK_64_HI(x) ((x) & ~0xFFFFFFFFULL) #define MASK_64_LO(x) ((x) & 0xFFFFFFFFULL) #define FIELD_REPLACE(a,b,m,s) (((a) & ~((m) << (s))) | \ (((b) & (m)) << (s))) #define FIELD_COPY(a,b,m,s) (((a) & ~((m) << (s))) | \ (((b) & ((m) << (s))))) #define SNAP_DEV_NAME_LEN 128 struct pci_xhci_trb_ring { uint64_t ringaddr; /* current dequeue guest address */ uint32_t ccs; /* consumer cycle state */ }; /* device endpoint transfer/stream rings */ struct pci_xhci_dev_ep { union { struct xhci_trb *_epu_tr; struct xhci_stream_ctx *_epu_sctx; } _ep_trbsctx; #define ep_tr _ep_trbsctx._epu_tr #define ep_sctx _ep_trbsctx._epu_sctx union { struct pci_xhci_trb_ring _epu_trb; struct pci_xhci_trb_ring *_epu_sctx_trbs; } _ep_trb_rings; #define ep_ringaddr _ep_trb_rings._epu_trb.ringaddr #define ep_ccs _ep_trb_rings._epu_trb.ccs #define ep_sctx_trbs _ep_trb_rings._epu_sctx_trbs struct usb_data_xfer *ep_xfer; /* transfer chain */ }; /* device context base address array: maps slot->device context */ struct xhci_dcbaa { uint64_t dcba[USB_MAX_DEVICES+1]; /* xhci_dev_ctx ptrs */ }; /* port status registers */ struct pci_xhci_portregs { uint32_t portsc; /* port status and control */ uint32_t portpmsc; /* port pwr mgmt status & control */ uint32_t portli; /* port link info */ uint32_t porthlpmc; /* port hardware LPM control */ } __packed; #define XHCI_PS_SPEED_SET(x) (((x) & 0xF) << 10) /* xHC operational registers */ struct pci_xhci_opregs { uint32_t usbcmd; /* usb command */ uint32_t usbsts; /* usb status */ uint32_t pgsz; /* page size */ uint32_t dnctrl; /* device notification control */ uint64_t crcr; /* command ring control */ uint64_t dcbaap; /* device ctx base addr array ptr */ uint32_t config; /* configure */ /* guest mapped addresses: */ struct xhci_trb *cr_p; /* crcr dequeue */ struct xhci_dcbaa *dcbaa_p; /* dev ctx array ptr */ }; /* xHC runtime registers */ struct pci_xhci_rtsregs { uint32_t mfindex; /* microframe index */ struct { /* interrupter register set */ uint32_t iman; /* interrupter management */ uint32_t imod; /* interrupter moderation */ uint32_t erstsz; /* event ring segment table size */ uint32_t rsvd; uint64_t erstba; /* event ring seg-tbl base addr */ uint64_t erdp; /* event ring dequeue ptr */ } intrreg __packed; /* guest mapped addresses */ struct xhci_event_ring_seg *erstba_p; struct xhci_trb *erst_p; /* event ring segment tbl */ int er_deq_seg; /* event ring dequeue segment */ int er_enq_idx; /* event ring enqueue index - xHCI */ int er_enq_seg; /* event ring enqueue segment */ uint32_t er_events_cnt; /* number of events in ER */ uint32_t event_pcs; /* producer cycle state flag */ }; struct pci_xhci_softc; /* * USB device emulation container. * This is referenced from usb_hci->hci_sc; 1 pci_xhci_dev_emu for each * emulated device instance. */ struct pci_xhci_dev_emu { struct pci_xhci_softc *xsc; /* XHCI contexts */ struct xhci_dev_ctx *dev_ctx; struct pci_xhci_dev_ep eps[XHCI_MAX_ENDPOINTS]; int dev_slotstate; struct usb_devemu *dev_ue; /* USB emulated dev */ void *dev_sc; /* device's softc */ struct usb_hci hci; }; struct pci_xhci_softc { struct pci_devinst *xsc_pi; pthread_mutex_t mtx; uint32_t caplength; /* caplen & hciversion */ uint32_t hcsparams1; /* structural parameters 1 */ uint32_t hcsparams2; /* structural parameters 2 */ uint32_t hcsparams3; /* structural parameters 3 */ uint32_t hccparams1; /* capability parameters 1 */ uint32_t dboff; /* doorbell offset */ uint32_t rtsoff; /* runtime register space offset */ uint32_t hccparams2; /* capability parameters 2 */ uint32_t regsend; /* end of configuration registers */ struct pci_xhci_opregs opregs; struct pci_xhci_rtsregs rtsregs; struct pci_xhci_portregs *portregs; struct pci_xhci_dev_emu **devices; /* XHCI[port] = device */ struct pci_xhci_dev_emu **slots; /* slots assigned from 1 */ int usb2_port_start; int usb3_port_start; }; /* portregs and devices arrays are set up to start from idx=1 */ #define XHCI_PORTREG_PTR(x,n) &(x)->portregs[(n)] #define XHCI_DEVINST_PTR(x,n) (x)->devices[(n)] #define XHCI_SLOTDEV_PTR(x,n) (x)->slots[(n)] #define XHCI_HALTED(sc) ((sc)->opregs.usbsts & XHCI_STS_HCH) #define XHCI_GADDR_SIZE(a) (XHCI_PADDR_SZ - \ (((uint64_t) (a)) & (XHCI_PADDR_SZ - 1))) #define XHCI_GADDR(sc,a) paddr_guest2host((sc)->xsc_pi->pi_vmctx, \ (a), XHCI_GADDR_SIZE(a)) static int xhci_in_use; /* map USB errors to XHCI */ static const int xhci_usb_errors[USB_ERR_MAX] = { [USB_ERR_NORMAL_COMPLETION] = XHCI_TRB_ERROR_SUCCESS, [USB_ERR_PENDING_REQUESTS] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NOT_STARTED] = XHCI_TRB_ERROR_ENDP_NOT_ON, [USB_ERR_INVAL] = XHCI_TRB_ERROR_INVALID, [USB_ERR_NOMEM] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_CANCELLED] = XHCI_TRB_ERROR_STOPPED, [USB_ERR_BAD_ADDRESS] = XHCI_TRB_ERROR_PARAMETER, [USB_ERR_BAD_BUFSIZE] = XHCI_TRB_ERROR_PARAMETER, [USB_ERR_BAD_FLAG] = XHCI_TRB_ERROR_PARAMETER, [USB_ERR_NO_CALLBACK] = XHCI_TRB_ERROR_STALL, [USB_ERR_IN_USE] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NO_ADDR] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NO_PIPE] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_ZERO_NFRAMES] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_ZERO_MAXP] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_SET_ADDR_FAILED] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_NO_POWER] = XHCI_TRB_ERROR_ENDP_NOT_ON, [USB_ERR_TOO_DEEP] = XHCI_TRB_ERROR_RESOURCE, [USB_ERR_IOERROR] = XHCI_TRB_ERROR_TRB, [USB_ERR_NOT_CONFIGURED] = XHCI_TRB_ERROR_ENDP_NOT_ON, [USB_ERR_TIMEOUT] = XHCI_TRB_ERROR_CMD_ABORTED, [USB_ERR_SHORT_XFER] = XHCI_TRB_ERROR_SHORT_PKT, [USB_ERR_STALLED] = XHCI_TRB_ERROR_STALL, [USB_ERR_INTERRUPTED] = XHCI_TRB_ERROR_CMD_ABORTED, [USB_ERR_DMA_LOAD_FAILED] = XHCI_TRB_ERROR_DATA_BUF, [USB_ERR_BAD_CONTEXT] = XHCI_TRB_ERROR_TRB, [USB_ERR_NO_ROOT_HUB] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_NO_INTR_THREAD] = XHCI_TRB_ERROR_UNDEFINED, [USB_ERR_NOT_LOCKED] = XHCI_TRB_ERROR_UNDEFINED, }; #define USB_TO_XHCI_ERR(e) ((e) < USB_ERR_MAX ? xhci_usb_errors[(e)] : \ XHCI_TRB_ERROR_INVALID) static int pci_xhci_insert_event(struct pci_xhci_softc *sc, struct xhci_trb *evtrb, int do_intr); static void pci_xhci_dump_trb(struct xhci_trb *trb); static void pci_xhci_assert_interrupt(struct pci_xhci_softc *sc); static void pci_xhci_reset_slot(struct pci_xhci_softc *sc, int slot); static void pci_xhci_reset_port(struct pci_xhci_softc *sc, int portn, int warm); static void pci_xhci_update_ep_ring(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, uint32_t streamid, uint64_t ringaddr, int ccs); static void pci_xhci_set_evtrb(struct xhci_trb *evtrb, uint64_t port, uint32_t errcode, uint32_t evtype) { evtrb->qwTrb0 = port << 24; evtrb->dwTrb2 = XHCI_TRB_2_ERROR_SET(errcode); evtrb->dwTrb3 = XHCI_TRB_3_TYPE_SET(evtype); } /* controller reset */ static void pci_xhci_reset(struct pci_xhci_softc *sc) { int i; sc->rtsregs.er_enq_idx = 0; sc->rtsregs.er_events_cnt = 0; sc->rtsregs.event_pcs = 1; for (i = 1; i <= XHCI_MAX_SLOTS; i++) { pci_xhci_reset_slot(sc, i); } } static uint32_t pci_xhci_usbcmd_write(struct pci_xhci_softc *sc, uint32_t cmd) { int do_intr = 0; int i; if (cmd & XHCI_CMD_RS) { do_intr = (sc->opregs.usbcmd & XHCI_CMD_RS) == 0; sc->opregs.usbcmd |= XHCI_CMD_RS; sc->opregs.usbsts &= ~XHCI_STS_HCH; sc->opregs.usbsts |= XHCI_STS_PCD; /* Queue port change event on controller run from stop */ if (do_intr) for (i = 1; i <= XHCI_MAX_DEVS; i++) { struct pci_xhci_dev_emu *dev; struct pci_xhci_portregs *port; struct xhci_trb evtrb; if ((dev = XHCI_DEVINST_PTR(sc, i)) == NULL) continue; port = XHCI_PORTREG_PTR(sc, i); port->portsc |= XHCI_PS_CSC | XHCI_PS_CCS; port->portsc &= ~XHCI_PS_PLS_MASK; /* * XHCI 4.19.3 USB2 RxDetect->Polling, * USB3 Polling->U0 */ if (dev->dev_ue->ue_usbver == 2) port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_POLL); else port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_U0); pci_xhci_set_evtrb(&evtrb, i, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); if (pci_xhci_insert_event(sc, &evtrb, 0) != XHCI_TRB_ERROR_SUCCESS) break; } } else { sc->opregs.usbcmd &= ~XHCI_CMD_RS; sc->opregs.usbsts |= XHCI_STS_HCH; sc->opregs.usbsts &= ~XHCI_STS_PCD; } /* start execution of schedule; stop when set to 0 */ cmd |= sc->opregs.usbcmd & XHCI_CMD_RS; if (cmd & XHCI_CMD_HCRST) { /* reset controller */ pci_xhci_reset(sc); cmd &= ~XHCI_CMD_HCRST; } cmd &= ~(XHCI_CMD_CSS | XHCI_CMD_CRS); if (do_intr) pci_xhci_assert_interrupt(sc); return (cmd); } static void pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { struct xhci_trb evtrb; struct pci_xhci_portregs *p; int port; uint32_t oldpls, newpls; if (sc->portregs == NULL) return; port = (offset - XHCI_PORTREGS_PORT0) / XHCI_PORTREGS_SETSZ; offset = (offset - XHCI_PORTREGS_PORT0) % XHCI_PORTREGS_SETSZ; DPRINTF(("pci_xhci: portregs wr offset 0x%lx, port %u: 0x%lx", offset, port, value)); assert(port >= 0); if (port > XHCI_MAX_DEVS) { DPRINTF(("pci_xhci: portregs_write port %d > ndevices", port)); return; } if (XHCI_DEVINST_PTR(sc, port) == NULL) { DPRINTF(("pci_xhci: portregs_write to unattached port %d", port)); } p = XHCI_PORTREG_PTR(sc, port); switch (offset) { case 0: /* port reset or warm reset */ if (value & (XHCI_PS_PR | XHCI_PS_WPR)) { pci_xhci_reset_port(sc, port, value & XHCI_PS_WPR); break; } if ((p->portsc & XHCI_PS_PP) == 0) { WPRINTF(("pci_xhci: portregs_write to unpowered " "port %d", port)); break; } /* Port status and control register */ oldpls = XHCI_PS_PLS_GET(p->portsc); newpls = XHCI_PS_PLS_GET(value); p->portsc &= XHCI_PS_PED | XHCI_PS_PLS_MASK | XHCI_PS_SPEED_MASK | XHCI_PS_PIC_MASK; - + if (XHCI_DEVINST_PTR(sc, port)) p->portsc |= XHCI_PS_CCS; p->portsc |= (value & ~(XHCI_PS_OCA | XHCI_PS_PR | XHCI_PS_PED | XHCI_PS_PLS_MASK | /* link state */ XHCI_PS_SPEED_MASK | XHCI_PS_PIC_MASK | /* port indicator */ XHCI_PS_LWS | XHCI_PS_DR | XHCI_PS_WPR)); /* clear control bits */ p->portsc &= ~(value & (XHCI_PS_CSC | XHCI_PS_PEC | XHCI_PS_WRC | XHCI_PS_OCC | XHCI_PS_PRC | XHCI_PS_PLC | XHCI_PS_CEC | XHCI_PS_CAS)); /* port disable request; for USB3, don't care */ if (value & XHCI_PS_PED) DPRINTF(("Disable port %d request", port)); if (!(value & XHCI_PS_LWS)) break; DPRINTF(("Port new PLS: %d", newpls)); switch (newpls) { case 0: /* U0 */ case 3: /* U3 */ if (oldpls != newpls) { p->portsc &= ~XHCI_PS_PLS_MASK; p->portsc |= XHCI_PS_PLS_SET(newpls) | XHCI_PS_PLC; if (oldpls != 0 && newpls == 0) { pci_xhci_set_evtrb(&evtrb, port, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); pci_xhci_insert_event(sc, &evtrb, 1); } } break; default: DPRINTF(("Unhandled change port %d PLS %u", port, newpls)); break; } break; - case 4: + case 4: /* Port power management status and control register */ p->portpmsc = value; break; case 8: /* Port link information register */ DPRINTF(("pci_xhci attempted write to PORTLI, port %d", port)); break; case 12: /* * Port hardware LPM control register. * For USB3, this register is reserved. */ p->porthlpmc = value; break; } } struct xhci_dev_ctx * pci_xhci_get_dev_ctx(struct pci_xhci_softc *sc, uint32_t slot) { uint64_t devctx_addr; struct xhci_dev_ctx *devctx; assert(slot > 0 && slot <= XHCI_MAX_DEVS); assert(XHCI_SLOTDEV_PTR(sc, slot) != NULL); assert(sc->opregs.dcbaa_p != NULL); devctx_addr = sc->opregs.dcbaa_p->dcba[slot]; if (devctx_addr == 0) { DPRINTF(("get_dev_ctx devctx_addr == 0")); return (NULL); } DPRINTF(("pci_xhci: get dev ctx, slot %u devctx addr %016lx", slot, devctx_addr)); devctx = XHCI_GADDR(sc, devctx_addr & ~0x3FUL); return (devctx); } struct xhci_trb * pci_xhci_trb_next(struct pci_xhci_softc *sc, struct xhci_trb *curtrb, uint64_t *guestaddr) { struct xhci_trb *next; assert(curtrb != NULL); if (XHCI_TRB_3_TYPE_GET(curtrb->dwTrb3) == XHCI_TRB_TYPE_LINK) { if (guestaddr) *guestaddr = curtrb->qwTrb0 & ~0xFUL; - + next = XHCI_GADDR(sc, curtrb->qwTrb0 & ~0xFUL); } else { if (guestaddr) *guestaddr += sizeof(struct xhci_trb) & ~0xFUL; next = curtrb + 1; } return (next); } static void pci_xhci_assert_interrupt(struct pci_xhci_softc *sc) { sc->rtsregs.intrreg.erdp |= XHCI_ERDP_LO_BUSY; sc->rtsregs.intrreg.iman |= XHCI_IMAN_INTR_PEND; sc->opregs.usbsts |= XHCI_STS_EINT; /* only trigger interrupt if permitted */ if ((sc->opregs.usbcmd & XHCI_CMD_INTE) && (sc->rtsregs.intrreg.iman & XHCI_IMAN_INTR_ENA)) { if (pci_msi_enabled(sc->xsc_pi)) pci_generate_msi(sc->xsc_pi, 0); else pci_lintr_assert(sc->xsc_pi); } } static void pci_xhci_deassert_interrupt(struct pci_xhci_softc *sc) { if (!pci_msi_enabled(sc->xsc_pi)) pci_lintr_assert(sc->xsc_pi); } static void pci_xhci_init_ep(struct pci_xhci_dev_emu *dev, int epid) { struct xhci_dev_ctx *dev_ctx; struct pci_xhci_dev_ep *devep; struct xhci_endp_ctx *ep_ctx; uint32_t pstreams; int i; dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; devep = &dev->eps[epid]; pstreams = XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0); if (pstreams > 0) { DPRINTF(("init_ep %d with pstreams %d", epid, pstreams)); assert(devep->ep_sctx_trbs == NULL); devep->ep_sctx = XHCI_GADDR(dev->xsc, ep_ctx->qwEpCtx2 & XHCI_EPCTX_2_TR_DQ_PTR_MASK); devep->ep_sctx_trbs = calloc(pstreams, sizeof(struct pci_xhci_trb_ring)); for (i = 0; i < pstreams; i++) { devep->ep_sctx_trbs[i].ringaddr = devep->ep_sctx[i].qwSctx0 & XHCI_SCTX_0_TR_DQ_PTR_MASK; devep->ep_sctx_trbs[i].ccs = XHCI_SCTX_0_DCS_GET(devep->ep_sctx[i].qwSctx0); } } else { DPRINTF(("init_ep %d with no pstreams", epid)); devep->ep_ringaddr = ep_ctx->qwEpCtx2 & XHCI_EPCTX_2_TR_DQ_PTR_MASK; devep->ep_ccs = XHCI_EPCTX_2_DCS_GET(ep_ctx->qwEpCtx2); devep->ep_tr = XHCI_GADDR(dev->xsc, devep->ep_ringaddr); DPRINTF(("init_ep tr DCS %x", devep->ep_ccs)); } if (devep->ep_xfer == NULL) { devep->ep_xfer = malloc(sizeof(struct usb_data_xfer)); USB_DATA_XFER_INIT(devep->ep_xfer); } } static void pci_xhci_disable_ep(struct pci_xhci_dev_emu *dev, int epid) { struct xhci_dev_ctx *dev_ctx; struct pci_xhci_dev_ep *devep; struct xhci_endp_ctx *ep_ctx; DPRINTF(("pci_xhci disable_ep %d", epid)); dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_DISABLED; devep = &dev->eps[epid]; if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) > 0 && devep->ep_sctx_trbs != NULL) free(devep->ep_sctx_trbs); if (devep->ep_xfer != NULL) { free(devep->ep_xfer); devep->ep_xfer = NULL; } memset(devep, 0, sizeof(struct pci_xhci_dev_ep)); } /* reset device at slot and data structures related to it */ static void pci_xhci_reset_slot(struct pci_xhci_softc *sc, int slot) { struct pci_xhci_dev_emu *dev; dev = XHCI_SLOTDEV_PTR(sc, slot); if (!dev) { DPRINTF(("xhci reset unassigned slot (%d)?", slot)); } else { dev->dev_slotstate = XHCI_ST_DISABLED; } /* TODO: reset ring buffer pointers */ } static int pci_xhci_insert_event(struct pci_xhci_softc *sc, struct xhci_trb *evtrb, int do_intr) { struct pci_xhci_rtsregs *rts; uint64_t erdp; int erdp_idx; int err; struct xhci_trb *evtrbptr; err = XHCI_TRB_ERROR_SUCCESS; rts = &sc->rtsregs; erdp = rts->intrreg.erdp & ~0xF; erdp_idx = (erdp - rts->erstba_p[rts->er_deq_seg].qwEvrsTablePtr) / sizeof(struct xhci_trb); DPRINTF(("pci_xhci: insert event 0[%lx] 2[%x] 3[%x]", evtrb->qwTrb0, evtrb->dwTrb2, evtrb->dwTrb3)); DPRINTF(("\terdp idx %d/seg %d, enq idx %d/seg %d, pcs %u", erdp_idx, rts->er_deq_seg, rts->er_enq_idx, rts->er_enq_seg, rts->event_pcs)); DPRINTF(("\t(erdp=0x%lx, erst=0x%lx, tblsz=%u, do_intr %d)", erdp, rts->erstba_p->qwEvrsTablePtr, rts->erstba_p->dwEvrsTableSize, do_intr)); evtrbptr = &rts->erst_p[rts->er_enq_idx]; /* TODO: multi-segment table */ if (rts->er_events_cnt >= rts->erstba_p->dwEvrsTableSize) { DPRINTF(("pci_xhci[%d] cannot insert event; ring full", __LINE__)); err = XHCI_TRB_ERROR_EV_RING_FULL; goto done; } if (rts->er_events_cnt == rts->erstba_p->dwEvrsTableSize - 1) { struct xhci_trb errev; if ((evtrbptr->dwTrb3 & 0x1) == (rts->event_pcs & 0x1)) { DPRINTF(("pci_xhci[%d] insert evt err: ring full", __LINE__)); errev.qwTrb0 = 0; errev.dwTrb2 = XHCI_TRB_2_ERROR_SET( XHCI_TRB_ERROR_EV_RING_FULL); errev.dwTrb3 = XHCI_TRB_3_TYPE_SET( XHCI_TRB_EVENT_HOST_CTRL) | rts->event_pcs; rts->er_events_cnt++; memcpy(&rts->erst_p[rts->er_enq_idx], &errev, sizeof(struct xhci_trb)); rts->er_enq_idx = (rts->er_enq_idx + 1) % rts->erstba_p->dwEvrsTableSize; err = XHCI_TRB_ERROR_EV_RING_FULL; do_intr = 1; goto done; } } else { rts->er_events_cnt++; } evtrb->dwTrb3 &= ~XHCI_TRB_3_CYCLE_BIT; evtrb->dwTrb3 |= rts->event_pcs; memcpy(&rts->erst_p[rts->er_enq_idx], evtrb, sizeof(struct xhci_trb)); rts->er_enq_idx = (rts->er_enq_idx + 1) % rts->erstba_p->dwEvrsTableSize; if (rts->er_enq_idx == 0) rts->event_pcs ^= 1; done: if (do_intr) pci_xhci_assert_interrupt(sc); return (err); } static uint32_t pci_xhci_cmd_enable_slot(struct pci_xhci_softc *sc, uint32_t *slot) { struct pci_xhci_dev_emu *dev; uint32_t cmderr; int i; cmderr = XHCI_TRB_ERROR_NO_SLOTS; if (sc->portregs != NULL) for (i = 1; i <= XHCI_MAX_SLOTS; i++) { dev = XHCI_SLOTDEV_PTR(sc, i); if (dev && dev->dev_slotstate == XHCI_ST_DISABLED) { *slot = i; dev->dev_slotstate = XHCI_ST_ENABLED; cmderr = XHCI_TRB_ERROR_SUCCESS; dev->hci.hci_address = i; break; } } DPRINTF(("pci_xhci enable slot (error=%d) slot %u", cmderr != XHCI_TRB_ERROR_SUCCESS, *slot)); return (cmderr); } static uint32_t pci_xhci_cmd_disable_slot(struct pci_xhci_softc *sc, uint32_t slot) { struct pci_xhci_dev_emu *dev; uint32_t cmderr; DPRINTF(("pci_xhci disable slot %u", slot)); cmderr = XHCI_TRB_ERROR_NO_SLOTS; if (sc->portregs == NULL) goto done; if (slot > XHCI_MAX_SLOTS) { cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; goto done; } dev = XHCI_SLOTDEV_PTR(sc, slot); if (dev) { if (dev->dev_slotstate == XHCI_ST_DISABLED) { cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; } else { dev->dev_slotstate = XHCI_ST_DISABLED; cmderr = XHCI_TRB_ERROR_SUCCESS; /* TODO: reset events and endpoints */ } } else cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; done: return (cmderr); } static uint32_t pci_xhci_cmd_reset_device(struct pci_xhci_softc *sc, uint32_t slot) { struct pci_xhci_dev_emu *dev; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; uint32_t cmderr; int i; cmderr = XHCI_TRB_ERROR_NO_SLOTS; if (sc->portregs == NULL) goto done; DPRINTF(("pci_xhci reset device slot %u", slot)); dev = XHCI_SLOTDEV_PTR(sc, slot); if (!dev || dev->dev_slotstate == XHCI_ST_DISABLED) cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; else { dev->dev_slotstate = XHCI_ST_DEFAULT; dev->hci.hci_address = 0; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); /* slot state */ dev_ctx->ctx_slot.dwSctx3 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx3, XHCI_ST_SLCTX_DEFAULT, 0x1F, 27); /* number of contexts */ dev_ctx->ctx_slot.dwSctx0 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx0, 1, 0x1F, 27); /* reset all eps other than ep-0 */ for (i = 2; i <= 31; i++) { ep_ctx = &dev_ctx->ctx_ep[i]; ep_ctx->dwEpCtx0 = FIELD_REPLACE( ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_DISABLED, 0x7, 0); } cmderr = XHCI_TRB_ERROR_SUCCESS; } pci_xhci_reset_slot(sc, slot); done: return (cmderr); } static uint32_t pci_xhci_cmd_address_device(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct pci_xhci_dev_emu *dev; struct xhci_input_dev_ctx *input_ctx; struct xhci_slot_ctx *islot_ctx; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep0_ctx; uint32_t cmderr; input_ctx = XHCI_GADDR(sc, trb->qwTrb0 & ~0xFUL); islot_ctx = &input_ctx->ctx_slot; ep0_ctx = &input_ctx->ctx_ep[1]; cmderr = XHCI_TRB_ERROR_SUCCESS; DPRINTF(("pci_xhci: address device, input ctl: D 0x%08x A 0x%08x,", input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1)); DPRINTF((" slot %08x %08x %08x %08x", islot_ctx->dwSctx0, islot_ctx->dwSctx1, islot_ctx->dwSctx2, islot_ctx->dwSctx3)); DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); /* when setting address: drop-ctx=0, add-ctx=slot+ep0 */ if ((input_ctx->ctx_input.dwInCtx0 != 0) || (input_ctx->ctx_input.dwInCtx1 & 0x03) != 0x03) { DPRINTF(("pci_xhci: address device, input ctl invalid")); cmderr = XHCI_TRB_ERROR_TRB; goto done; } /* assign address to slot */ dev_ctx = pci_xhci_get_dev_ctx(sc, slot); DPRINTF(("pci_xhci: address device, dev ctx")); DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); dev->hci.hci_address = slot; dev->dev_ctx = dev_ctx; if (dev->dev_ue->ue_reset == NULL || dev->dev_ue->ue_reset(dev->dev_sc) < 0) { cmderr = XHCI_TRB_ERROR_ENDP_NOT_ON; goto done; } memcpy(&dev_ctx->ctx_slot, islot_ctx, sizeof(struct xhci_slot_ctx)); dev_ctx->ctx_slot.dwSctx3 = XHCI_SCTX_3_SLOT_STATE_SET(XHCI_ST_SLCTX_ADDRESSED) | XHCI_SCTX_3_DEV_ADDR_SET(slot); memcpy(&dev_ctx->ctx_ep[1], ep0_ctx, sizeof(struct xhci_endp_ctx)); ep0_ctx = &dev_ctx->ctx_ep[1]; ep0_ctx->dwEpCtx0 = (ep0_ctx->dwEpCtx0 & ~0x7) | XHCI_EPCTX_0_EPSTATE_SET(XHCI_ST_EPCTX_RUNNING); pci_xhci_init_ep(dev, 1); dev->dev_slotstate = XHCI_ST_ADDRESSED; DPRINTF(("pci_xhci: address device, output ctx")); DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); done: return (cmderr); } static uint32_t pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct xhci_input_dev_ctx *input_ctx; struct pci_xhci_dev_emu *dev; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx, *iep_ctx; uint32_t cmderr; int i; cmderr = XHCI_TRB_ERROR_SUCCESS; DPRINTF(("pci_xhci config_ep slot %u", slot)); dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); if ((trb->dwTrb3 & XHCI_TRB_3_DCEP_BIT) != 0) { DPRINTF(("pci_xhci config_ep - deconfigure ep slot %u", slot)); if (dev->dev_ue->ue_stop != NULL) dev->dev_ue->ue_stop(dev->dev_sc); dev->dev_slotstate = XHCI_ST_ADDRESSED; dev->hci.hci_address = 0; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); /* number of contexts */ dev_ctx->ctx_slot.dwSctx0 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx0, 1, 0x1F, 27); /* slot state */ dev_ctx->ctx_slot.dwSctx3 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx3, XHCI_ST_SLCTX_ADDRESSED, 0x1F, 27); /* disable endpoints */ for (i = 2; i < 32; i++) pci_xhci_disable_ep(dev, i); cmderr = XHCI_TRB_ERROR_SUCCESS; goto done; } if (dev->dev_slotstate < XHCI_ST_ADDRESSED) { DPRINTF(("pci_xhci: config_ep slotstate x%x != addressed", dev->dev_slotstate)); cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; goto done; } /* In addressed/configured state; * for each drop endpoint ctx flag: * ep->state = DISABLED * for each add endpoint ctx flag: * cp(ep-in, ep-out) * ep->state = RUNNING * for each drop+add endpoint flag: * reset ep resources * cp(ep-in, ep-out) * ep->state = RUNNING * if input->DisabledCtx[2-31] < 30: (at least 1 ep not disabled) * slot->state = configured */ input_ctx = XHCI_GADDR(sc, trb->qwTrb0 & ~0xFUL); dev_ctx = dev->dev_ctx; DPRINTF(("pci_xhci: config_ep inputctx: D:x%08x A:x%08x 7:x%08x", input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1, input_ctx->ctx_input.dwInCtx7)); for (i = 2; i <= 31; i++) { ep_ctx = &dev_ctx->ctx_ep[i]; if (input_ctx->ctx_input.dwInCtx0 & XHCI_INCTX_0_DROP_MASK(i)) { DPRINTF((" config ep - dropping ep %d", i)); pci_xhci_disable_ep(dev, i); } if (input_ctx->ctx_input.dwInCtx1 & XHCI_INCTX_1_ADD_MASK(i)) { iep_ctx = &input_ctx->ctx_ep[i]; DPRINTF((" enable ep[%d] %08x %08x %016lx %08x", i, iep_ctx->dwEpCtx0, iep_ctx->dwEpCtx1, iep_ctx->qwEpCtx2, iep_ctx->dwEpCtx4)); memcpy(ep_ctx, iep_ctx, sizeof(struct xhci_endp_ctx)); pci_xhci_init_ep(dev, i); /* ep state */ ep_ctx->dwEpCtx0 = FIELD_REPLACE( ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_RUNNING, 0x7, 0); } } /* slot state to configured */ dev_ctx->ctx_slot.dwSctx3 = FIELD_REPLACE( dev_ctx->ctx_slot.dwSctx3, XHCI_ST_SLCTX_CONFIGURED, 0x1F, 27); dev_ctx->ctx_slot.dwSctx0 = FIELD_COPY( dev_ctx->ctx_slot.dwSctx0, input_ctx->ctx_slot.dwSctx0, 0x1F, 27); dev->dev_slotstate = XHCI_ST_CONFIGURED; DPRINTF(("EP configured; slot %u [0]=0x%08x [1]=0x%08x [2]=0x%08x " "[3]=0x%08x", slot, dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); done: return (cmderr); } static uint32_t pci_xhci_cmd_reset_ep(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; uint32_t cmderr, epid; uint32_t type; epid = XHCI_TRB_3_EP_GET(trb->dwTrb3); DPRINTF(("pci_xhci: reset ep %u: slot %u", epid, slot)); cmderr = XHCI_TRB_ERROR_SUCCESS; type = XHCI_TRB_3_TYPE_GET(trb->dwTrb3); dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); if (type == XHCI_TRB_TYPE_STOP_EP && (trb->dwTrb3 & XHCI_TRB_3_SUSP_EP_BIT) != 0) { /* XXX suspend endpoint for 10ms */ } if (epid < 1 || epid > 31) { DPRINTF(("pci_xhci: reset ep: invalid epid %u", epid)); cmderr = XHCI_TRB_ERROR_TRB; goto done; } devep = &dev->eps[epid]; if (devep->ep_xfer != NULL) USB_DATA_XFER_RESET(devep->ep_xfer); dev_ctx = dev->dev_ctx; assert(dev_ctx != NULL); ep_ctx = &dev_ctx->ctx_ep[epid]; ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_STOPPED; if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) == 0) ep_ctx->qwEpCtx2 = devep->ep_ringaddr | devep->ep_ccs; DPRINTF(("pci_xhci: reset ep[%u] %08x %08x %016lx %08x", epid, ep_ctx->dwEpCtx0, ep_ctx->dwEpCtx1, ep_ctx->qwEpCtx2, ep_ctx->dwEpCtx4)); if (type == XHCI_TRB_TYPE_RESET_EP && (dev->dev_ue->ue_reset == NULL || dev->dev_ue->ue_reset(dev->dev_sc) < 0)) { cmderr = XHCI_TRB_ERROR_ENDP_NOT_ON; goto done; } done: return (cmderr); } static uint32_t pci_xhci_find_stream(struct pci_xhci_softc *sc, struct xhci_endp_ctx *ep, uint32_t streamid, struct xhci_stream_ctx **osctx) { struct xhci_stream_ctx *sctx; uint32_t maxpstreams; maxpstreams = XHCI_EPCTX_0_MAXP_STREAMS_GET(ep->dwEpCtx0); if (maxpstreams == 0) return (XHCI_TRB_ERROR_TRB); if (maxpstreams > XHCI_STREAMS_MAX) return (XHCI_TRB_ERROR_INVALID_SID); if (XHCI_EPCTX_0_LSA_GET(ep->dwEpCtx0) == 0) { DPRINTF(("pci_xhci: find_stream; LSA bit not set")); return (XHCI_TRB_ERROR_INVALID_SID); } /* only support primary stream */ if (streamid > maxpstreams) return (XHCI_TRB_ERROR_STREAM_TYPE); sctx = XHCI_GADDR(sc, ep->qwEpCtx2 & ~0xFUL) + streamid; if (!XHCI_SCTX_0_SCT_GET(sctx->qwSctx0)) return (XHCI_TRB_ERROR_STREAM_TYPE); *osctx = sctx; return (XHCI_TRB_ERROR_SUCCESS); } static uint32_t pci_xhci_cmd_set_tr(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; uint32_t cmderr, epid; uint32_t streamid; cmderr = XHCI_TRB_ERROR_SUCCESS; dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); DPRINTF(("pci_xhci set_tr: new-tr x%016lx, SCT %u DCS %u", (trb->qwTrb0 & ~0xF), (uint32_t)((trb->qwTrb0 >> 1) & 0x7), (uint32_t)(trb->qwTrb0 & 0x1))); DPRINTF((" stream-id %u, slot %u, epid %u, C %u", (trb->dwTrb2 >> 16) & 0xFFFF, XHCI_TRB_3_SLOT_GET(trb->dwTrb3), XHCI_TRB_3_EP_GET(trb->dwTrb3), trb->dwTrb3 & 0x1)); epid = XHCI_TRB_3_EP_GET(trb->dwTrb3); if (epid < 1 || epid > 31) { DPRINTF(("pci_xhci: set_tr_deq: invalid epid %u", epid)); cmderr = XHCI_TRB_ERROR_TRB; goto done; } dev_ctx = dev->dev_ctx; assert(dev_ctx != NULL); ep_ctx = &dev_ctx->ctx_ep[epid]; devep = &dev->eps[epid]; switch (XHCI_EPCTX_0_EPSTATE_GET(ep_ctx->dwEpCtx0)) { case XHCI_ST_EPCTX_STOPPED: case XHCI_ST_EPCTX_ERROR: break; default: DPRINTF(("pci_xhci cmd set_tr invalid state %x", XHCI_EPCTX_0_EPSTATE_GET(ep_ctx->dwEpCtx0))); cmderr = XHCI_TRB_ERROR_CONTEXT_STATE; goto done; } streamid = XHCI_TRB_2_STREAM_GET(trb->dwTrb2); if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) > 0) { struct xhci_stream_ctx *sctx; sctx = NULL; cmderr = pci_xhci_find_stream(sc, ep_ctx, streamid, &sctx); if (sctx != NULL) { assert(devep->ep_sctx != NULL); - + devep->ep_sctx[streamid].qwSctx0 = trb->qwTrb0; devep->ep_sctx_trbs[streamid].ringaddr = trb->qwTrb0 & ~0xF; devep->ep_sctx_trbs[streamid].ccs = XHCI_EPCTX_2_DCS_GET(trb->qwTrb0); } } else { if (streamid != 0) { DPRINTF(("pci_xhci cmd set_tr streamid %x != 0", streamid)); } ep_ctx->qwEpCtx2 = trb->qwTrb0 & ~0xFUL; devep->ep_ringaddr = ep_ctx->qwEpCtx2 & ~0xFUL; devep->ep_ccs = trb->qwTrb0 & 0x1; devep->ep_tr = XHCI_GADDR(sc, devep->ep_ringaddr); DPRINTF(("pci_xhci set_tr first TRB:")); pci_xhci_dump_trb(devep->ep_tr); } ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_STOPPED; done: return (cmderr); } static uint32_t pci_xhci_cmd_eval_ctx(struct pci_xhci_softc *sc, uint32_t slot, struct xhci_trb *trb) { struct xhci_input_dev_ctx *input_ctx; struct xhci_slot_ctx *islot_ctx; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep0_ctx; uint32_t cmderr; input_ctx = XHCI_GADDR(sc, trb->qwTrb0 & ~0xFUL); islot_ctx = &input_ctx->ctx_slot; ep0_ctx = &input_ctx->ctx_ep[1]; cmderr = XHCI_TRB_ERROR_SUCCESS; DPRINTF(("pci_xhci: eval ctx, input ctl: D 0x%08x A 0x%08x,", input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1)); DPRINTF((" slot %08x %08x %08x %08x", islot_ctx->dwSctx0, islot_ctx->dwSctx1, islot_ctx->dwSctx2, islot_ctx->dwSctx3)); DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); /* this command expects drop-ctx=0 & add-ctx=slot+ep0 */ if ((input_ctx->ctx_input.dwInCtx0 != 0) || (input_ctx->ctx_input.dwInCtx1 & 0x03) == 0) { DPRINTF(("pci_xhci: eval ctx, input ctl invalid")); cmderr = XHCI_TRB_ERROR_TRB; goto done; } /* assign address to slot; in this emulation, slot_id = address */ dev_ctx = pci_xhci_get_dev_ctx(sc, slot); DPRINTF(("pci_xhci: eval ctx, dev ctx")); DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); if (input_ctx->ctx_input.dwInCtx1 & 0x01) { /* slot ctx */ /* set max exit latency */ dev_ctx->ctx_slot.dwSctx1 = FIELD_COPY( dev_ctx->ctx_slot.dwSctx1, input_ctx->ctx_slot.dwSctx1, 0xFFFF, 0); /* set interrupter target */ dev_ctx->ctx_slot.dwSctx2 = FIELD_COPY( dev_ctx->ctx_slot.dwSctx2, input_ctx->ctx_slot.dwSctx2, 0x3FF, 22); } if (input_ctx->ctx_input.dwInCtx1 & 0x02) { /* control ctx */ /* set max packet size */ dev_ctx->ctx_ep[1].dwEpCtx1 = FIELD_COPY( dev_ctx->ctx_ep[1].dwEpCtx1, ep0_ctx->dwEpCtx1, 0xFFFF, 16); ep0_ctx = &dev_ctx->ctx_ep[1]; } DPRINTF(("pci_xhci: eval ctx, output ctx")); DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); done: return (cmderr); } static int pci_xhci_complete_commands(struct pci_xhci_softc *sc) { struct xhci_trb evtrb; struct xhci_trb *trb; uint64_t crcr; uint32_t ccs; /* cycle state (XHCI 4.9.2) */ uint32_t type; uint32_t slot; uint32_t cmderr; int error; error = 0; sc->opregs.crcr |= XHCI_CRCR_LO_CRR; trb = sc->opregs.cr_p; ccs = sc->opregs.crcr & XHCI_CRCR_LO_RCS; crcr = sc->opregs.crcr & ~0xF; while (1) { sc->opregs.cr_p = trb; - + type = XHCI_TRB_3_TYPE_GET(trb->dwTrb3); if ((trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT) != (ccs & XHCI_TRB_3_CYCLE_BIT)) break; DPRINTF(("pci_xhci: cmd type 0x%x, Trb0 x%016lx dwTrb2 x%08x" " dwTrb3 x%08x, TRB_CYCLE %u/ccs %u", type, trb->qwTrb0, trb->dwTrb2, trb->dwTrb3, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT, ccs)); cmderr = XHCI_TRB_ERROR_SUCCESS; evtrb.dwTrb2 = 0; evtrb.dwTrb3 = (ccs & XHCI_TRB_3_CYCLE_BIT) | XHCI_TRB_3_TYPE_SET(XHCI_TRB_EVENT_CMD_COMPLETE); slot = 0; switch (type) { case XHCI_TRB_TYPE_LINK: /* 0x06 */ if (trb->dwTrb3 & XHCI_TRB_3_TC_BIT) ccs ^= XHCI_CRCR_LO_RCS; break; case XHCI_TRB_TYPE_ENABLE_SLOT: /* 0x09 */ cmderr = pci_xhci_cmd_enable_slot(sc, &slot); break; case XHCI_TRB_TYPE_DISABLE_SLOT: /* 0x0A */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_disable_slot(sc, slot); break; case XHCI_TRB_TYPE_ADDRESS_DEVICE: /* 0x0B */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_address_device(sc, slot, trb); break; case XHCI_TRB_TYPE_CONFIGURE_EP: /* 0x0C */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_config_ep(sc, slot, trb); break; case XHCI_TRB_TYPE_EVALUATE_CTX: /* 0x0D */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_eval_ctx(sc, slot, trb); break; case XHCI_TRB_TYPE_RESET_EP: /* 0x0E */ DPRINTF(("Reset Endpoint on slot %d", slot)); slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_ep(sc, slot, trb); break; case XHCI_TRB_TYPE_STOP_EP: /* 0x0F */ DPRINTF(("Stop Endpoint on slot %d", slot)); slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_ep(sc, slot, trb); break; case XHCI_TRB_TYPE_SET_TR_DEQUEUE: /* 0x10 */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_set_tr(sc, slot, trb); break; case XHCI_TRB_TYPE_RESET_DEVICE: /* 0x11 */ slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_device(sc, slot); break; case XHCI_TRB_TYPE_FORCE_EVENT: /* 0x12 */ /* TODO: */ break; case XHCI_TRB_TYPE_NEGOTIATE_BW: /* 0x13 */ break; case XHCI_TRB_TYPE_SET_LATENCY_TOL: /* 0x14 */ break; case XHCI_TRB_TYPE_GET_PORT_BW: /* 0x15 */ break; case XHCI_TRB_TYPE_FORCE_HEADER: /* 0x16 */ break; case XHCI_TRB_TYPE_NOOP_CMD: /* 0x17 */ break; default: DPRINTF(("pci_xhci: unsupported cmd %x", type)); break; } if (type != XHCI_TRB_TYPE_LINK) { - /* + /* * insert command completion event and assert intr */ evtrb.qwTrb0 = crcr; evtrb.dwTrb2 |= XHCI_TRB_2_ERROR_SET(cmderr); evtrb.dwTrb3 |= XHCI_TRB_3_SLOT_SET(slot); DPRINTF(("pci_xhci: command 0x%x result: 0x%x", type, cmderr)); pci_xhci_insert_event(sc, &evtrb, 1); } trb = pci_xhci_trb_next(sc, trb, &crcr); } sc->opregs.crcr = crcr | (sc->opregs.crcr & XHCI_CRCR_LO_CA) | ccs; sc->opregs.crcr &= ~XHCI_CRCR_LO_CRR; return (error); } static void pci_xhci_dump_trb(struct xhci_trb *trb) { static const char *trbtypes[] = { "RESERVED", "NORMAL", "SETUP_STAGE", "DATA_STAGE", "STATUS_STAGE", "ISOCH", "LINK", "EVENT_DATA", "NOOP", "ENABLE_SLOT", "DISABLE_SLOT", "ADDRESS_DEVICE", "CONFIGURE_EP", "EVALUATE_CTX", "RESET_EP", "STOP_EP", "SET_TR_DEQUEUE", "RESET_DEVICE", "FORCE_EVENT", "NEGOTIATE_BW", "SET_LATENCY_TOL", "GET_PORT_BW", "FORCE_HEADER", "NOOP_CMD" }; uint32_t type; type = XHCI_TRB_3_TYPE_GET(trb->dwTrb3); DPRINTF(("pci_xhci: trb[@%p] type x%02x %s 0:x%016lx 2:x%08x 3:x%08x", trb, type, type <= XHCI_TRB_TYPE_NOOP_CMD ? trbtypes[type] : "INVALID", trb->qwTrb0, trb->dwTrb2, trb->dwTrb3)); } static int pci_xhci_xfer_complete(struct pci_xhci_softc *sc, struct usb_data_xfer *xfer, uint32_t slot, uint32_t epid, int *do_intr) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; struct xhci_trb *trb; struct xhci_trb evtrb; uint32_t trbflags; uint32_t edtla; int i, err; dev = XHCI_SLOTDEV_PTR(sc, slot); devep = &dev->eps[epid]; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); assert(dev_ctx != NULL); ep_ctx = &dev_ctx->ctx_ep[epid]; err = XHCI_TRB_ERROR_SUCCESS; *do_intr = 0; edtla = 0; /* go through list of TRBs and insert event(s) */ for (i = xfer->head; xfer->ndata > 0; ) { evtrb.qwTrb0 = (uint64_t)xfer->data[i].hci_data; trb = XHCI_GADDR(sc, evtrb.qwTrb0); trbflags = trb->dwTrb3; DPRINTF(("pci_xhci: xfer[%d] done?%u:%d trb %x %016lx %x " "(err %d) IOC?%d", i, xfer->data[i].processed, xfer->data[i].blen, XHCI_TRB_3_TYPE_GET(trbflags), evtrb.qwTrb0, trbflags, err, trb->dwTrb3 & XHCI_TRB_3_IOC_BIT ? 1 : 0)); if (!xfer->data[i].processed) { xfer->head = i; break; } xfer->ndata--; edtla += xfer->data[i].bdone; trb->dwTrb3 = (trb->dwTrb3 & ~0x1) | (xfer->data[i].ccs); pci_xhci_update_ep_ring(sc, dev, devep, ep_ctx, xfer->data[i].streamid, xfer->data[i].trbnext, xfer->data[i].ccs); /* Only interrupt if IOC or short packet */ if (!(trb->dwTrb3 & XHCI_TRB_3_IOC_BIT) && !((err == XHCI_TRB_ERROR_SHORT_PKT) && (trb->dwTrb3 & XHCI_TRB_3_ISP_BIT))) { i = (i + 1) % USB_MAX_XFER_BLOCKS; continue; } evtrb.dwTrb2 = XHCI_TRB_2_ERROR_SET(err) | XHCI_TRB_2_REM_SET(xfer->data[i].blen); evtrb.dwTrb3 = XHCI_TRB_3_TYPE_SET(XHCI_TRB_EVENT_TRANSFER) | XHCI_TRB_3_SLOT_SET(slot) | XHCI_TRB_3_EP_SET(epid); if (XHCI_TRB_3_TYPE_GET(trbflags) == XHCI_TRB_TYPE_EVENT_DATA) { DPRINTF(("pci_xhci EVENT_DATA edtla %u", edtla)); evtrb.qwTrb0 = trb->qwTrb0; - evtrb.dwTrb2 = (edtla & 0xFFFFF) | + evtrb.dwTrb2 = (edtla & 0xFFFFF) | XHCI_TRB_2_ERROR_SET(err); evtrb.dwTrb3 |= XHCI_TRB_3_ED_BIT; edtla = 0; } *do_intr = 1; err = pci_xhci_insert_event(sc, &evtrb, 0); if (err != XHCI_TRB_ERROR_SUCCESS) { break; } i = (i + 1) % USB_MAX_XFER_BLOCKS; } return (err); } static void pci_xhci_update_ep_ring(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, uint32_t streamid, uint64_t ringaddr, int ccs) { if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) != 0) { devep->ep_sctx[streamid].qwSctx0 = (ringaddr & ~0xFUL) | (ccs & 0x1); devep->ep_sctx_trbs[streamid].ringaddr = ringaddr & ~0xFUL; devep->ep_sctx_trbs[streamid].ccs = ccs & 0x1; ep_ctx->qwEpCtx2 = (ep_ctx->qwEpCtx2 & ~0x1) | (ccs & 0x1); DPRINTF(("xhci update ep-ring stream %d, addr %lx", streamid, devep->ep_sctx[streamid].qwSctx0)); } else { devep->ep_ringaddr = ringaddr & ~0xFUL; devep->ep_ccs = ccs & 0x1; devep->ep_tr = XHCI_GADDR(sc, ringaddr & ~0xFUL); ep_ctx->qwEpCtx2 = (ringaddr & ~0xFUL) | (ccs & 0x1); DPRINTF(("xhci update ep-ring, addr %lx", (devep->ep_ringaddr | devep->ep_ccs))); } } /* * Outstanding transfer still in progress (device NAK'd earlier) so retry * the transfer again to see if it succeeds. */ static int pci_xhci_try_usb_xfer(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, uint32_t slot, uint32_t epid) { struct usb_data_xfer *xfer; int err; int do_intr; ep_ctx->dwEpCtx0 = FIELD_REPLACE( ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_RUNNING, 0x7, 0); err = 0; do_intr = 0; xfer = devep->ep_xfer; USB_DATA_XFER_LOCK(xfer); /* outstanding requests queued up */ if (dev->dev_ue->ue_data != NULL) { err = dev->dev_ue->ue_data(dev->dev_sc, xfer, epid & 0x1 ? USB_XFER_IN : USB_XFER_OUT, epid/2); if (err == USB_ERR_CANCELLED) { if (USB_DATA_GET_ERRCODE(&xfer->data[xfer->head]) == USB_NAK) err = XHCI_TRB_ERROR_SUCCESS; } else { err = pci_xhci_xfer_complete(sc, xfer, slot, epid, &do_intr); if (err == XHCI_TRB_ERROR_SUCCESS && do_intr) { pci_xhci_assert_interrupt(sc); } /* XXX should not do it if error? */ USB_DATA_XFER_RESET(xfer); } } USB_DATA_XFER_UNLOCK(xfer); return (err); } static int pci_xhci_handle_transfer(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, struct pci_xhci_dev_ep *devep, struct xhci_endp_ctx *ep_ctx, struct xhci_trb *trb, uint32_t slot, uint32_t epid, uint64_t addr, uint32_t ccs, uint32_t streamid) { struct xhci_trb *setup_trb; struct usb_data_xfer *xfer; struct usb_data_xfer_block *xfer_block; uint64_t val; uint32_t trbflags; int do_intr, err; int do_retry; ep_ctx->dwEpCtx0 = FIELD_REPLACE(ep_ctx->dwEpCtx0, XHCI_ST_EPCTX_RUNNING, 0x7, 0); xfer = devep->ep_xfer; USB_DATA_XFER_LOCK(xfer); DPRINTF(("pci_xhci handle_transfer slot %u", slot)); retry: err = 0; do_retry = 0; do_intr = 0; setup_trb = NULL; while (1) { pci_xhci_dump_trb(trb); trbflags = trb->dwTrb3; if (XHCI_TRB_3_TYPE_GET(trbflags) != XHCI_TRB_TYPE_LINK && (trbflags & XHCI_TRB_3_CYCLE_BIT) != (ccs & XHCI_TRB_3_CYCLE_BIT)) { DPRINTF(("Cycle-bit changed trbflags %x, ccs %x", trbflags & XHCI_TRB_3_CYCLE_BIT, ccs)); break; } xfer_block = NULL; switch (XHCI_TRB_3_TYPE_GET(trbflags)) { case XHCI_TRB_TYPE_LINK: if (trb->dwTrb3 & XHCI_TRB_3_TC_BIT) ccs ^= 0x1; xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); xfer_block->processed = 1; break; case XHCI_TRB_TYPE_SETUP_STAGE: if ((trbflags & XHCI_TRB_3_IDT_BIT) == 0 || XHCI_TRB_2_BYTES_GET(trb->dwTrb2) != 8) { DPRINTF(("pci_xhci: invalid setup trb")); err = XHCI_TRB_ERROR_TRB; goto errout; } setup_trb = trb; val = trb->qwTrb0; if (!xfer->ureq) xfer->ureq = malloc( sizeof(struct usb_device_request)); memcpy(xfer->ureq, &val, sizeof(struct usb_device_request)); xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); xfer_block->processed = 1; break; case XHCI_TRB_TYPE_NORMAL: case XHCI_TRB_TYPE_ISOCH: if (setup_trb != NULL) { DPRINTF(("pci_xhci: trb not supposed to be in " "ctl scope")); err = XHCI_TRB_ERROR_TRB; goto errout; } /* fall through */ case XHCI_TRB_TYPE_DATA_STAGE: xfer_block = usb_data_xfer_append(xfer, (void *)(trbflags & XHCI_TRB_3_IDT_BIT ? &trb->qwTrb0 : XHCI_GADDR(sc, trb->qwTrb0)), trb->dwTrb2 & 0x1FFFF, (void *)addr, ccs); break; case XHCI_TRB_TYPE_STATUS_STAGE: xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); break; case XHCI_TRB_TYPE_NOOP: xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); xfer_block->processed = 1; break; case XHCI_TRB_TYPE_EVENT_DATA: xfer_block = usb_data_xfer_append(xfer, NULL, 0, (void *)addr, ccs); if ((epid > 1) && (trbflags & XHCI_TRB_3_IOC_BIT)) { xfer_block->processed = 1; } break; default: DPRINTF(("pci_xhci: handle xfer unexpected trb type " "0x%x", XHCI_TRB_3_TYPE_GET(trbflags))); err = XHCI_TRB_ERROR_TRB; goto errout; } trb = pci_xhci_trb_next(sc, trb, &addr); DPRINTF(("pci_xhci: next trb: 0x%lx", (uint64_t)trb)); if (xfer_block) { xfer_block->trbnext = addr; xfer_block->streamid = streamid; } if (!setup_trb && !(trbflags & XHCI_TRB_3_CHAIN_BIT) && XHCI_TRB_3_TYPE_GET(trbflags) != XHCI_TRB_TYPE_LINK) { break; } /* handle current batch that requires interrupt on complete */ if (trbflags & XHCI_TRB_3_IOC_BIT) { DPRINTF(("pci_xhci: trb IOC bit set")); if (epid == 1) do_retry = 1; break; } } DPRINTF(("pci_xhci[%d]: xfer->ndata %u", __LINE__, xfer->ndata)); if (xfer->ndata <= 0) goto errout; if (epid == 1) { err = USB_ERR_NOT_STARTED; if (dev->dev_ue->ue_request != NULL) err = dev->dev_ue->ue_request(dev->dev_sc, xfer); setup_trb = NULL; } else { /* handle data transfer */ pci_xhci_try_usb_xfer(sc, dev, devep, ep_ctx, slot, epid); err = XHCI_TRB_ERROR_SUCCESS; goto errout; } err = USB_TO_XHCI_ERR(err); if ((err == XHCI_TRB_ERROR_SUCCESS) || (err == XHCI_TRB_ERROR_STALL) || (err == XHCI_TRB_ERROR_SHORT_PKT)) { err = pci_xhci_xfer_complete(sc, xfer, slot, epid, &do_intr); if (err != XHCI_TRB_ERROR_SUCCESS) do_retry = 0; } errout: if (err == XHCI_TRB_ERROR_EV_RING_FULL) DPRINTF(("pci_xhci[%d]: event ring full", __LINE__)); if (!do_retry) USB_DATA_XFER_UNLOCK(xfer); if (do_intr) pci_xhci_assert_interrupt(sc); if (do_retry) { USB_DATA_XFER_RESET(xfer); DPRINTF(("pci_xhci[%d]: retry:continuing with next TRBs", __LINE__)); goto retry; } if (epid == 1) USB_DATA_XFER_RESET(xfer); return (err); } static void pci_xhci_device_doorbell(struct pci_xhci_softc *sc, uint32_t slot, uint32_t epid, uint32_t streamid) { struct pci_xhci_dev_emu *dev; struct pci_xhci_dev_ep *devep; struct xhci_dev_ctx *dev_ctx; struct xhci_endp_ctx *ep_ctx; struct pci_xhci_trb_ring *sctx_tr; struct xhci_trb *trb; uint64_t ringaddr; uint32_t ccs; DPRINTF(("pci_xhci doorbell slot %u epid %u stream %u", slot, epid, streamid)); if (slot == 0 || slot > XHCI_MAX_SLOTS) { DPRINTF(("pci_xhci: invalid doorbell slot %u", slot)); return; } if (epid == 0 || epid >= XHCI_MAX_ENDPOINTS) { DPRINTF(("pci_xhci: invalid endpoint %u", epid)); return; } dev = XHCI_SLOTDEV_PTR(sc, slot); devep = &dev->eps[epid]; dev_ctx = pci_xhci_get_dev_ctx(sc, slot); if (!dev_ctx) { return; } ep_ctx = &dev_ctx->ctx_ep[epid]; sctx_tr = NULL; DPRINTF(("pci_xhci: device doorbell ep[%u] %08x %08x %016lx %08x", epid, ep_ctx->dwEpCtx0, ep_ctx->dwEpCtx1, ep_ctx->qwEpCtx2, ep_ctx->dwEpCtx4)); if (ep_ctx->qwEpCtx2 == 0) return; /* handle pending transfers */ if (devep->ep_xfer->ndata > 0) { pci_xhci_try_usb_xfer(sc, dev, devep, ep_ctx, slot, epid); return; } /* get next trb work item */ if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) != 0) { struct xhci_stream_ctx *sctx; /* * Stream IDs of 0, 65535 (any stream), and 65534 * (prime) are invalid. */ if (streamid == 0 || streamid == 65534 || streamid == 65535) { DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } sctx = NULL; pci_xhci_find_stream(sc, ep_ctx, streamid, &sctx); if (sctx == NULL) { DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } sctx_tr = &devep->ep_sctx_trbs[streamid]; ringaddr = sctx_tr->ringaddr; ccs = sctx_tr->ccs; trb = XHCI_GADDR(sc, sctx_tr->ringaddr & ~0xFUL); DPRINTF(("doorbell, stream %u, ccs %lx, trb ccs %x", streamid, ep_ctx->qwEpCtx2 & XHCI_TRB_3_CYCLE_BIT, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT)); } else { if (streamid != 0) { DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } ringaddr = devep->ep_ringaddr; ccs = devep->ep_ccs; trb = devep->ep_tr; DPRINTF(("doorbell, ccs %lx, trb ccs %x", ep_ctx->qwEpCtx2 & XHCI_TRB_3_CYCLE_BIT, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT)); } if (XHCI_TRB_3_TYPE_GET(trb->dwTrb3) == 0) { DPRINTF(("pci_xhci: ring %lx trb[%lx] EP %u is RESERVED?", ep_ctx->qwEpCtx2, devep->ep_ringaddr, epid)); return; } pci_xhci_handle_transfer(sc, dev, devep, ep_ctx, trb, slot, epid, ringaddr, ccs, streamid); } static void pci_xhci_dbregs_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { offset = (offset - sc->dboff) / sizeof(uint32_t); DPRINTF(("pci_xhci: doorbell write offset 0x%lx: 0x%lx", offset, value)); if (XHCI_HALTED(sc)) { DPRINTF(("pci_xhci: controller halted")); return; } if (offset == 0) pci_xhci_complete_commands(sc); else if (sc->portregs != NULL) pci_xhci_device_doorbell(sc, offset, XHCI_DB_TARGET_GET(value), XHCI_DB_SID_GET(value)); } static void pci_xhci_rtsregs_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { struct pci_xhci_rtsregs *rts; offset -= sc->rtsoff; if (offset == 0) { DPRINTF(("pci_xhci attempted write to MFINDEX")); return; } DPRINTF(("pci_xhci: runtime regs write offset 0x%lx: 0x%lx", offset, value)); offset -= 0x20; /* start of intrreg */ rts = &sc->rtsregs; switch (offset) { case 0x00: if (value & XHCI_IMAN_INTR_PEND) rts->intrreg.iman &= ~XHCI_IMAN_INTR_PEND; rts->intrreg.iman = (value & XHCI_IMAN_INTR_ENA) | (rts->intrreg.iman & XHCI_IMAN_INTR_PEND); if (!(value & XHCI_IMAN_INTR_ENA)) pci_xhci_deassert_interrupt(sc); break; case 0x04: rts->intrreg.imod = value; break; case 0x08: rts->intrreg.erstsz = value & 0xFFFF; break; case 0x10: /* ERSTBA low bits */ rts->intrreg.erstba = MASK_64_HI(sc->rtsregs.intrreg.erstba) | (value & ~0x3F); break; case 0x14: /* ERSTBA high bits */ rts->intrreg.erstba = (value << 32) | MASK_64_LO(sc->rtsregs.intrreg.erstba); rts->erstba_p = XHCI_GADDR(sc, sc->rtsregs.intrreg.erstba & ~0x3FUL); rts->erst_p = XHCI_GADDR(sc, sc->rtsregs.erstba_p->qwEvrsTablePtr & ~0x3FUL); rts->er_enq_idx = 0; rts->er_events_cnt = 0; DPRINTF(("pci_xhci: wr erstba erst (%p) ptr 0x%lx, sz %u", rts->erstba_p, rts->erstba_p->qwEvrsTablePtr, rts->erstba_p->dwEvrsTableSize)); break; case 0x18: /* ERDP low bits */ rts->intrreg.erdp = MASK_64_HI(sc->rtsregs.intrreg.erdp) | (rts->intrreg.erdp & XHCI_ERDP_LO_BUSY) | (value & ~0xF); if (value & XHCI_ERDP_LO_BUSY) { rts->intrreg.erdp &= ~XHCI_ERDP_LO_BUSY; rts->intrreg.iman &= ~XHCI_IMAN_INTR_PEND; } rts->er_deq_seg = XHCI_ERDP_LO_SINDEX(value); break; case 0x1C: /* ERDP high bits */ rts->intrreg.erdp = (value << 32) | MASK_64_LO(sc->rtsregs.intrreg.erdp); if (rts->er_events_cnt > 0) { uint64_t erdp; uint32_t erdp_i; erdp = rts->intrreg.erdp & ~0xF; erdp_i = (erdp - rts->erstba_p->qwEvrsTablePtr) / sizeof(struct xhci_trb); if (erdp_i <= rts->er_enq_idx) rts->er_events_cnt = rts->er_enq_idx - erdp_i; else rts->er_events_cnt = rts->erstba_p->dwEvrsTableSize - (erdp_i - rts->er_enq_idx); DPRINTF(("pci_xhci: erdp 0x%lx, events cnt %u", erdp, rts->er_events_cnt)); } break; default: DPRINTF(("pci_xhci attempted write to RTS offset 0x%lx", offset)); break; } } static uint64_t pci_xhci_portregs_read(struct pci_xhci_softc *sc, uint64_t offset) { int port; uint32_t *p; if (sc->portregs == NULL) return (0); port = (offset - 0x3F0) / 0x10; if (port > XHCI_MAX_DEVS) { DPRINTF(("pci_xhci: portregs_read port %d >= XHCI_MAX_DEVS", port)); /* return default value for unused port */ return (XHCI_PS_SPEED_SET(3)); } offset = (offset - 0x3F0) % 0x10; p = &sc->portregs[port].portsc; p += offset / sizeof(uint32_t); DPRINTF(("pci_xhci: portregs read offset 0x%lx port %u -> 0x%x", offset, port, *p)); return (*p); } static void pci_xhci_hostop_write(struct pci_xhci_softc *sc, uint64_t offset, uint64_t value) { offset -= XHCI_CAPLEN; if (offset < 0x400) DPRINTF(("pci_xhci: hostop write offset 0x%lx: 0x%lx", offset, value)); switch (offset) { case XHCI_USBCMD: sc->opregs.usbcmd = pci_xhci_usbcmd_write(sc, value & 0x3F0F); break; case XHCI_USBSTS: /* clear bits on write */ sc->opregs.usbsts &= ~(value & (XHCI_STS_HSE|XHCI_STS_EINT|XHCI_STS_PCD|XHCI_STS_SSS| XHCI_STS_RSS|XHCI_STS_SRE|XHCI_STS_CNR)); break; case XHCI_PAGESIZE: /* read only */ break; case XHCI_DNCTRL: sc->opregs.dnctrl = value & 0xFFFF; break; case XHCI_CRCR_LO: if (sc->opregs.crcr & XHCI_CRCR_LO_CRR) { sc->opregs.crcr &= ~(XHCI_CRCR_LO_CS|XHCI_CRCR_LO_CA); sc->opregs.crcr |= value & (XHCI_CRCR_LO_CS|XHCI_CRCR_LO_CA); } else { sc->opregs.crcr = MASK_64_HI(sc->opregs.crcr) | (value & (0xFFFFFFC0 | XHCI_CRCR_LO_RCS)); } break; case XHCI_CRCR_HI: if (!(sc->opregs.crcr & XHCI_CRCR_LO_CRR)) { sc->opregs.crcr = MASK_64_LO(sc->opregs.crcr) | (value << 32); sc->opregs.cr_p = XHCI_GADDR(sc, sc->opregs.crcr & ~0xF); } if (sc->opregs.crcr & XHCI_CRCR_LO_CS) { /* Stop operation of Command Ring */ } if (sc->opregs.crcr & XHCI_CRCR_LO_CA) { /* Abort command */ } break; case XHCI_DCBAAP_LO: sc->opregs.dcbaap = MASK_64_HI(sc->opregs.dcbaap) | (value & 0xFFFFFFC0); break; case XHCI_DCBAAP_HI: sc->opregs.dcbaap = MASK_64_LO(sc->opregs.dcbaap) | (value << 32); sc->opregs.dcbaa_p = XHCI_GADDR(sc, sc->opregs.dcbaap & ~0x3FUL); DPRINTF(("pci_xhci: opregs dcbaap = 0x%lx (vaddr 0x%lx)", sc->opregs.dcbaap, (uint64_t)sc->opregs.dcbaa_p)); break; case XHCI_CONFIG: sc->opregs.config = value & 0x03FF; break; default: if (offset >= 0x400) pci_xhci_portregs_write(sc, offset, value); break; } } static void pci_xhci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { struct pci_xhci_softc *sc; sc = pi->pi_arg; assert(baridx == 0); pthread_mutex_lock(&sc->mtx); if (offset < XHCI_CAPLEN) /* read only registers */ WPRINTF(("pci_xhci: write RO-CAPs offset %ld", offset)); else if (offset < sc->dboff) pci_xhci_hostop_write(sc, offset, value); else if (offset < sc->rtsoff) pci_xhci_dbregs_write(sc, offset, value); else if (offset < sc->regsend) pci_xhci_rtsregs_write(sc, offset, value); else WPRINTF(("pci_xhci: write invalid offset %ld", offset)); pthread_mutex_unlock(&sc->mtx); } static uint64_t pci_xhci_hostcap_read(struct pci_xhci_softc *sc, uint64_t offset) { uint64_t value; switch (offset) { case XHCI_CAPLENGTH: /* 0x00 */ value = sc->caplength; break; case XHCI_HCSPARAMS1: /* 0x04 */ value = sc->hcsparams1; break; case XHCI_HCSPARAMS2: /* 0x08 */ value = sc->hcsparams2; break; case XHCI_HCSPARAMS3: /* 0x0C */ value = sc->hcsparams3; break; case XHCI_HCSPARAMS0: /* 0x10 */ value = sc->hccparams1; break; case XHCI_DBOFF: /* 0x14 */ value = sc->dboff; break; case XHCI_RTSOFF: /* 0x18 */ value = sc->rtsoff; break; case XHCI_HCCPRAMS2: /* 0x1C */ value = sc->hccparams2; break; default: value = 0; break; } DPRINTF(("pci_xhci: hostcap read offset 0x%lx -> 0x%lx", offset, value)); return (value); } static uint64_t pci_xhci_hostop_read(struct pci_xhci_softc *sc, uint64_t offset) { uint64_t value; offset = (offset - XHCI_CAPLEN); switch (offset) { case XHCI_USBCMD: /* 0x00 */ value = sc->opregs.usbcmd; break; case XHCI_USBSTS: /* 0x04 */ value = sc->opregs.usbsts; break; case XHCI_PAGESIZE: /* 0x08 */ value = sc->opregs.pgsz; break; case XHCI_DNCTRL: /* 0x14 */ value = sc->opregs.dnctrl; break; case XHCI_CRCR_LO: /* 0x18 */ value = sc->opregs.crcr & XHCI_CRCR_LO_CRR; break; case XHCI_CRCR_HI: /* 0x1C */ value = 0; break; case XHCI_DCBAAP_LO: /* 0x30 */ value = sc->opregs.dcbaap & 0xFFFFFFFF; break; case XHCI_DCBAAP_HI: /* 0x34 */ value = (sc->opregs.dcbaap >> 32) & 0xFFFFFFFF; break; case XHCI_CONFIG: /* 0x38 */ value = sc->opregs.config; break; default: if (offset >= 0x400) value = pci_xhci_portregs_read(sc, offset); else value = 0; break; } if (offset < 0x400) DPRINTF(("pci_xhci: hostop read offset 0x%lx -> 0x%lx", offset, value)); return (value); } static uint64_t pci_xhci_dbregs_read(struct pci_xhci_softc *sc, uint64_t offset) { /* read doorbell always returns 0 */ return (0); } static uint64_t pci_xhci_rtsregs_read(struct pci_xhci_softc *sc, uint64_t offset) { uint32_t value; offset -= sc->rtsoff; value = 0; if (offset == XHCI_MFINDEX) { value = sc->rtsregs.mfindex; } else if (offset >= 0x20) { int item; uint32_t *p; offset -= 0x20; item = offset % 32; assert(offset < sizeof(sc->rtsregs.intrreg)); p = &sc->rtsregs.intrreg.iman; p += item / sizeof(uint32_t); value = *p; } DPRINTF(("pci_xhci: rtsregs read offset 0x%lx -> 0x%x", offset, value)); return (value); } static uint64_t pci_xhci_xecp_read(struct pci_xhci_softc *sc, uint64_t offset) { uint32_t value; offset -= sc->regsend; value = 0; switch (offset) { case 0: /* rev major | rev minor | next-cap | cap-id */ value = (0x02 << 24) | (4 << 8) | XHCI_ID_PROTOCOLS; break; case 4: /* name string = "USB" */ value = 0x20425355; break; case 8: /* psic | proto-defined | compat # | compat offset */ value = ((XHCI_MAX_DEVS/2) << 8) | sc->usb2_port_start; break; case 12: break; case 16: /* rev major | rev minor | next-cap | cap-id */ value = (0x03 << 24) | XHCI_ID_PROTOCOLS; break; case 20: /* name string = "USB" */ value = 0x20425355; break; case 24: /* psic | proto-defined | compat # | compat offset */ value = ((XHCI_MAX_DEVS/2) << 8) | sc->usb3_port_start; break; case 28: break; default: DPRINTF(("pci_xhci: xecp invalid offset 0x%lx", offset)); break; } DPRINTF(("pci_xhci: xecp read offset 0x%lx -> 0x%x", offset, value)); return (value); } static uint64_t pci_xhci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { struct pci_xhci_softc *sc; uint32_t value; sc = pi->pi_arg; assert(baridx == 0); pthread_mutex_lock(&sc->mtx); if (offset < XHCI_CAPLEN) value = pci_xhci_hostcap_read(sc, offset); else if (offset < sc->dboff) value = pci_xhci_hostop_read(sc, offset); else if (offset < sc->rtsoff) value = pci_xhci_dbregs_read(sc, offset); else if (offset < sc->regsend) value = pci_xhci_rtsregs_read(sc, offset); else if (offset < (sc->regsend + 4*32)) value = pci_xhci_xecp_read(sc, offset); else { value = 0; WPRINTF(("pci_xhci: read invalid offset %ld", offset)); } pthread_mutex_unlock(&sc->mtx); switch (size) { case 1: value &= 0xFF; break; case 2: value &= 0xFFFF; break; case 4: value &= 0xFFFFFFFF; break; } return (value); } static void pci_xhci_reset_port(struct pci_xhci_softc *sc, int portn, int warm) { struct pci_xhci_portregs *port; struct pci_xhci_dev_emu *dev; struct xhci_trb evtrb; int error; assert(portn <= XHCI_MAX_DEVS); DPRINTF(("xhci reset port %d", portn)); port = XHCI_PORTREG_PTR(sc, portn); dev = XHCI_DEVINST_PTR(sc, portn); if (dev) { port->portsc &= ~(XHCI_PS_PLS_MASK | XHCI_PS_PR | XHCI_PS_PRC); port->portsc |= XHCI_PS_PED | XHCI_PS_SPEED_SET(dev->dev_ue->ue_usbspeed); if (warm && dev->dev_ue->ue_usbver == 3) { port->portsc |= XHCI_PS_WRC; } if ((port->portsc & XHCI_PS_PRC) == 0) { port->portsc |= XHCI_PS_PRC; pci_xhci_set_evtrb(&evtrb, portn, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); error = pci_xhci_insert_event(sc, &evtrb, 1); if (error != XHCI_TRB_ERROR_SUCCESS) DPRINTF(("xhci reset port insert event " "failed")); } } } static void pci_xhci_init_port(struct pci_xhci_softc *sc, int portn) { struct pci_xhci_portregs *port; struct pci_xhci_dev_emu *dev; port = XHCI_PORTREG_PTR(sc, portn); dev = XHCI_DEVINST_PTR(sc, portn); if (dev) { port->portsc = XHCI_PS_CCS | /* connected */ XHCI_PS_PP; /* port power */ - + if (dev->dev_ue->ue_usbver == 2) { port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_POLL) | XHCI_PS_SPEED_SET(dev->dev_ue->ue_usbspeed); } else { port->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_U0) | XHCI_PS_PED | /* enabled */ XHCI_PS_SPEED_SET(dev->dev_ue->ue_usbspeed); } - + DPRINTF(("Init port %d 0x%x", portn, port->portsc)); } else { port->portsc = XHCI_PS_PLS_SET(UPS_PORT_LS_RX_DET) | XHCI_PS_PP; DPRINTF(("Init empty port %d 0x%x", portn, port->portsc)); } } static int pci_xhci_dev_intr(struct usb_hci *hci, int epctx) { struct pci_xhci_dev_emu *dev; struct xhci_dev_ctx *dev_ctx; struct xhci_trb evtrb; struct pci_xhci_softc *sc; struct pci_xhci_portregs *p; struct xhci_endp_ctx *ep_ctx; int error = 0; int dir_in; int epid; dir_in = epctx & 0x80; epid = epctx & ~0x80; /* HW endpoint contexts are 0-15; convert to epid based on dir */ epid = (epid * 2) + (dir_in ? 1 : 0); assert(epid >= 1 && epid <= 31); dev = hci->hci_sc; sc = dev->xsc; /* check if device is ready; OS has to initialise it */ if (sc->rtsregs.erstba_p == NULL || (sc->opregs.usbcmd & XHCI_CMD_RS) == 0 || dev->dev_ctx == NULL) return (0); p = XHCI_PORTREG_PTR(sc, hci->hci_port); /* raise event if link U3 (suspended) state */ if (XHCI_PS_PLS_GET(p->portsc) == 3) { p->portsc &= ~XHCI_PS_PLS_MASK; p->portsc |= XHCI_PS_PLS_SET(UPS_PORT_LS_RESUME); if ((p->portsc & XHCI_PS_PLC) != 0) return (0); p->portsc |= XHCI_PS_PLC; pci_xhci_set_evtrb(&evtrb, hci->hci_port, XHCI_TRB_ERROR_SUCCESS, XHCI_TRB_EVENT_PORT_STS_CHANGE); error = pci_xhci_insert_event(sc, &evtrb, 0); if (error != XHCI_TRB_ERROR_SUCCESS) goto done; } dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; if ((ep_ctx->dwEpCtx0 & 0x7) == XHCI_ST_EPCTX_DISABLED) { DPRINTF(("xhci device interrupt on disabled endpoint %d", epid)); return (0); } DPRINTF(("xhci device interrupt on endpoint %d", epid)); pci_xhci_device_doorbell(sc, hci->hci_port, epid, 0); done: return (error); } static int pci_xhci_dev_event(struct usb_hci *hci, enum hci_usbev evid, void *param) { DPRINTF(("xhci device event port %d", hci->hci_port)); return (0); } /* * Each controller contains a "slot" node which contains a list of * child nodes each of which is a device. Each slot node's name * corresponds to a specific controller slot. These nodes * contain a "device" variable identifying the device model of the * USB device. For example: * * pci.0.1.0 * .device="xhci" * .slot * .1 * .device="tablet" */ static int pci_xhci_legacy_config(nvlist_t *nvl, const char *opts) { char node_name[16]; nvlist_t *slots_nvl, *slot_nvl; char *cp, *opt, *str, *tofree; int slot; if (opts == NULL) return (0); slots_nvl = create_relative_config_node(nvl, "slot"); slot = 1; tofree = str = strdup(opts); while ((opt = strsep(&str, ",")) != NULL) { /* device[=] */ cp = strchr(opt, '='); if (cp != NULL) { *cp = '\0'; cp++; } snprintf(node_name, sizeof(node_name), "%d", slot); slot++; slot_nvl = create_relative_config_node(slots_nvl, node_name); set_config_value_node(slot_nvl, "device", opt); /* * NB: Given that we split on commas above, the legacy * format only supports a single option. */ if (cp != NULL && *cp != '\0') pci_parse_legacy_config(slot_nvl, cp); } free(tofree); return (0); } static int pci_xhci_parse_devices(struct pci_xhci_softc *sc, nvlist_t *nvl) { struct pci_xhci_dev_emu *dev; struct usb_devemu *ue; const nvlist_t *slots_nvl, *slot_nvl; const char *name, *device; char *cp; void *devsc, *cookie; long slot; int type, usb3_port, usb2_port, i, ndevices; usb3_port = sc->usb3_port_start; usb2_port = sc->usb2_port_start; sc->devices = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_dev_emu *)); sc->slots = calloc(XHCI_MAX_SLOTS, sizeof(struct pci_xhci_dev_emu *)); /* port and slot numbering start from 1 */ sc->devices--; sc->slots--; ndevices = 0; slots_nvl = find_relative_config_node(nvl, "slot"); if (slots_nvl == NULL) goto portsfinal; cookie = NULL; while ((name = nvlist_next(slots_nvl, &type, &cookie)) != NULL) { if (usb2_port == ((sc->usb2_port_start) + XHCI_MAX_DEVS/2) || usb3_port == ((sc->usb3_port_start) + XHCI_MAX_DEVS/2)) { WPRINTF(("pci_xhci max number of USB 2 or 3 " "devices reached, max %d", XHCI_MAX_DEVS/2)); goto bad; } if (type != NV_TYPE_NVLIST) { EPRINTLN( "pci_xhci: config variable '%s' under slot node", name); goto bad; } slot = strtol(name, &cp, 0); if (*cp != '\0' || slot <= 0 || slot > XHCI_MAX_SLOTS) { EPRINTLN("pci_xhci: invalid slot '%s'", name); goto bad; } if (XHCI_SLOTDEV_PTR(sc, slot) != NULL) { EPRINTLN("pci_xhci: duplicate slot '%s'", name); goto bad; } slot_nvl = nvlist_get_nvlist(slots_nvl, name); device = get_config_value_node(slot_nvl, "device"); if (device == NULL) { EPRINTLN( "pci_xhci: missing \"device\" value for slot '%s'", name); goto bad; } ue = usb_emu_finddev(device); if (ue == NULL) { EPRINTLN("pci_xhci: unknown device model \"%s\"", device); goto bad; } DPRINTF(("pci_xhci adding device %s", device)); dev = calloc(1, sizeof(struct pci_xhci_dev_emu)); dev->xsc = sc; dev->hci.hci_sc = dev; dev->hci.hci_intr = pci_xhci_dev_intr; dev->hci.hci_event = pci_xhci_dev_event; if (ue->ue_usbver == 2) { if (usb2_port == sc->usb2_port_start + XHCI_MAX_DEVS / 2) { WPRINTF(("pci_xhci max number of USB 2 devices " "reached, max %d", XHCI_MAX_DEVS / 2)); goto bad; } dev->hci.hci_port = usb2_port; usb2_port++; } else { if (usb3_port == sc->usb3_port_start + XHCI_MAX_DEVS / 2) { WPRINTF(("pci_xhci max number of USB 3 devices " "reached, max %d", XHCI_MAX_DEVS / 2)); goto bad; } dev->hci.hci_port = usb3_port; usb3_port++; } XHCI_DEVINST_PTR(sc, dev->hci.hci_port) = dev; dev->hci.hci_address = 0; devsc = ue->ue_init(&dev->hci, nvl); if (devsc == NULL) { goto bad; } dev->dev_ue = ue; dev->dev_sc = devsc; XHCI_SLOTDEV_PTR(sc, slot) = dev; ndevices++; } portsfinal: sc->portregs = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_portregs)); sc->portregs--; if (ndevices > 0) { for (i = 1; i <= XHCI_MAX_DEVS; i++) { pci_xhci_init_port(sc, i); } } else { WPRINTF(("pci_xhci no USB devices configured")); } return (0); bad: for (i = 1; i <= XHCI_MAX_DEVS; i++) { free(XHCI_DEVINST_PTR(sc, i)); } free(sc->devices + 1); free(sc->slots + 1); return (-1); } static int pci_xhci_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { struct pci_xhci_softc *sc; int error; if (xhci_in_use) { WPRINTF(("pci_xhci controller already defined")); return (-1); } xhci_in_use = 1; sc = calloc(1, sizeof(struct pci_xhci_softc)); pi->pi_arg = sc; sc->xsc_pi = pi; sc->usb2_port_start = (XHCI_MAX_DEVS/2) + 1; sc->usb3_port_start = 1; /* discover devices */ error = pci_xhci_parse_devices(sc, nvl); if (error < 0) goto done; else error = 0; sc->caplength = XHCI_SET_CAPLEN(XHCI_CAPLEN) | XHCI_SET_HCIVERSION(0x0100); sc->hcsparams1 = XHCI_SET_HCSP1_MAXPORTS(XHCI_MAX_DEVS) | XHCI_SET_HCSP1_MAXINTR(1) | /* interrupters */ XHCI_SET_HCSP1_MAXSLOTS(XHCI_MAX_SLOTS); sc->hcsparams2 = XHCI_SET_HCSP2_ERSTMAX(XHCI_ERST_MAX) | XHCI_SET_HCSP2_IST(0x04); sc->hcsparams3 = 0; /* no latency */ sc->hccparams1 = XHCI_SET_HCCP1_AC64(1) | /* 64-bit addrs */ XHCI_SET_HCCP1_NSS(1) | /* no 2nd-streams */ XHCI_SET_HCCP1_SPC(1) | /* short packet */ XHCI_SET_HCCP1_MAXPSA(XHCI_STREAMS_MAX); sc->hccparams2 = XHCI_SET_HCCP2_LEC(1) | XHCI_SET_HCCP2_U3C(1); sc->dboff = XHCI_SET_DOORBELL(XHCI_CAPLEN + XHCI_PORTREGS_START + XHCI_MAX_DEVS * sizeof(struct pci_xhci_portregs)); /* dboff must be 32-bit aligned */ if (sc->dboff & 0x3) sc->dboff = (sc->dboff + 0x3) & ~0x3; /* rtsoff must be 32-bytes aligned */ sc->rtsoff = XHCI_SET_RTSOFFSET(sc->dboff + (XHCI_MAX_SLOTS+1) * 32); if (sc->rtsoff & 0x1F) sc->rtsoff = (sc->rtsoff + 0x1F) & ~0x1F; DPRINTF(("pci_xhci dboff: 0x%x, rtsoff: 0x%x", sc->dboff, sc->rtsoff)); sc->opregs.usbsts = XHCI_STS_HCH; sc->opregs.pgsz = XHCI_PAGESIZE_4K; pci_xhci_reset(sc); sc->regsend = sc->rtsoff + 0x20 + 32; /* only 1 intrpter */ /* * Set extended capabilities pointer to be after regsend; * value of xecp field is 32-bit offset. */ sc->hccparams1 |= XHCI_SET_HCCP1_XECP(sc->regsend/4); pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1E31); pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SERIALBUS); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_SERIALBUS_USB); pci_set_cfgdata8(pi, PCIR_PROGIF,PCIP_SERIALBUS_USB_XHCI); pci_set_cfgdata8(pi, PCI_USBREV, PCI_USB_REV_3_0); pci_emul_add_msicap(pi, 1); /* regsend + xecp registers */ pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, sc->regsend + 4*32); DPRINTF(("pci_xhci pci_emu_alloc: %d", sc->regsend + 4*32)); pci_lintr_request(pi); pthread_mutex_init(&sc->mtx, NULL); done: if (error) { free(sc); } return (error); } #ifdef BHYVE_SNAPSHOT static void pci_xhci_map_devs_slots(struct pci_xhci_softc *sc, int maps[]) { int i, j; struct pci_xhci_dev_emu *dev, *slot; memset(maps, 0, sizeof(maps[0]) * XHCI_MAX_SLOTS); for (i = 1; i <= XHCI_MAX_SLOTS; i++) { for (j = 1; j <= XHCI_MAX_DEVS; j++) { slot = XHCI_SLOTDEV_PTR(sc, i); dev = XHCI_DEVINST_PTR(sc, j); if (slot == dev) maps[i] = j; } } } static int pci_xhci_snapshot_ep(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, int idx, struct vm_snapshot_meta *meta) { int k; int ret; struct usb_data_xfer *xfer; struct usb_data_xfer_block *xfer_block; /* some sanity checks */ if (meta->op == VM_SNAPSHOT_SAVE) xfer = dev->eps[idx].ep_xfer; SNAPSHOT_VAR_OR_LEAVE(xfer, meta, ret, done); if (xfer == NULL) { ret = 0; goto done; } if (meta->op == VM_SNAPSHOT_RESTORE) { pci_xhci_init_ep(dev, idx); xfer = dev->eps[idx].ep_xfer; } /* save / restore proper */ for (k = 0; k < USB_MAX_XFER_BLOCKS; k++) { xfer_block = &xfer->data[k]; SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(xfer_block->buf, XHCI_GADDR_SIZE(xfer_block->buf), true, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer_block->blen, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer_block->bdone, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer_block->processed, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer_block->hci_data, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer_block->ccs, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer_block->streamid, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer_block->trbnext, meta, ret, done); } SNAPSHOT_VAR_OR_LEAVE(xfer->ureq, meta, ret, done); if (xfer->ureq) { /* xfer->ureq is not allocated at restore time */ if (meta->op == VM_SNAPSHOT_RESTORE) xfer->ureq = malloc(sizeof(struct usb_device_request)); SNAPSHOT_BUF_OR_LEAVE(xfer->ureq, sizeof(struct usb_device_request), meta, ret, done); } SNAPSHOT_VAR_OR_LEAVE(xfer->ndata, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer->head, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(xfer->tail, meta, ret, done); done: return (ret); } static int pci_xhci_snapshot(struct vm_snapshot_meta *meta) { int i, j; int ret; int restore_idx; struct pci_devinst *pi; struct pci_xhci_softc *sc; struct pci_xhci_portregs *port; struct pci_xhci_dev_emu *dev; char dname[SNAP_DEV_NAME_LEN]; int maps[XHCI_MAX_SLOTS + 1]; pi = meta->dev_data; sc = pi->pi_arg; SNAPSHOT_VAR_OR_LEAVE(sc->caplength, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams1, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams2, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams3, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hccparams1, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->dboff, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsoff, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hccparams2, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->regsend, meta, ret, done); /* opregs */ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbcmd, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbsts, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.pgsz, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dnctrl, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.crcr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dcbaap, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.config, meta, ret, done); /* opregs.cr_p */ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.cr_p, XHCI_GADDR_SIZE(sc->opregs.cr_p), true, meta, ret, done); /* opregs.dcbaa_p */ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.dcbaa_p, XHCI_GADDR_SIZE(sc->opregs.dcbaa_p), true, meta, ret, done); /* rtsregs */ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.mfindex, meta, ret, done); /* rtsregs.intrreg */ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.iman, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.imod, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstsz, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.rsvd, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstba, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erdp, meta, ret, done); /* rtsregs.erstba_p */ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erstba_p, XHCI_GADDR_SIZE(sc->rtsregs.erstba_p), true, meta, ret, done); /* rtsregs.erst_p */ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erst_p, XHCI_GADDR_SIZE(sc->rtsregs.erst_p), true, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_deq_seg, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_idx, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_seg, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_events_cnt, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.event_pcs, meta, ret, done); /* sanity checking */ for (i = 1; i <= XHCI_MAX_DEVS; i++) { dev = XHCI_DEVINST_PTR(sc, i); if (dev == NULL) continue; if (meta->op == VM_SNAPSHOT_SAVE) restore_idx = i; SNAPSHOT_VAR_OR_LEAVE(restore_idx, meta, ret, done); /* check if the restored device (when restoring) is sane */ if (restore_idx != i) { fprintf(stderr, "%s: idx not matching: actual: %d, " "expected: %d\r\n", __func__, restore_idx, i); ret = EINVAL; goto done; } if (meta->op == VM_SNAPSHOT_SAVE) { memset(dname, 0, sizeof(dname)); strncpy(dname, dev->dev_ue->ue_emu, sizeof(dname) - 1); } SNAPSHOT_BUF_OR_LEAVE(dname, sizeof(dname), meta, ret, done); if (meta->op == VM_SNAPSHOT_RESTORE) { dname[sizeof(dname) - 1] = '\0'; if (strcmp(dev->dev_ue->ue_emu, dname)) { fprintf(stderr, "%s: device names mismatch: " "actual: %s, expected: %s\r\n", __func__, dname, dev->dev_ue->ue_emu); ret = EINVAL; goto done; } } } /* portregs */ for (i = 1; i <= XHCI_MAX_DEVS; i++) { port = XHCI_PORTREG_PTR(sc, i); dev = XHCI_DEVINST_PTR(sc, i); if (dev == NULL) continue; SNAPSHOT_VAR_OR_LEAVE(port->portsc, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->portpmsc, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->portli, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->porthlpmc, meta, ret, done); } /* slots */ if (meta->op == VM_SNAPSHOT_SAVE) pci_xhci_map_devs_slots(sc, maps); for (i = 1; i <= XHCI_MAX_SLOTS; i++) { SNAPSHOT_VAR_OR_LEAVE(maps[i], meta, ret, done); if (meta->op == VM_SNAPSHOT_SAVE) { dev = XHCI_SLOTDEV_PTR(sc, i); } else if (meta->op == VM_SNAPSHOT_RESTORE) { if (maps[i] != 0) dev = XHCI_DEVINST_PTR(sc, maps[i]); else dev = NULL; XHCI_SLOTDEV_PTR(sc, i) = dev; } else { /* error */ ret = EINVAL; goto done; } if (dev == NULL) continue; SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(dev->dev_ctx, XHCI_GADDR_SIZE(dev->dev_ctx), true, meta, ret, done); if (dev->dev_ctx != NULL) { for (j = 1; j < XHCI_MAX_ENDPOINTS; j++) { ret = pci_xhci_snapshot_ep(sc, dev, j, meta); if (ret != 0) goto done; } } SNAPSHOT_VAR_OR_LEAVE(dev->dev_slotstate, meta, ret, done); /* devices[i]->dev_sc */ dev->dev_ue->ue_snapshot(dev->dev_sc, meta); /* devices[i]->hci */ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_address, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_port, meta, ret, done); } SNAPSHOT_VAR_OR_LEAVE(sc->usb2_port_start, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->usb3_port_start, meta, ret, done); done: return (ret); } #endif struct pci_devemu pci_de_xhci = { .pe_emu = "xhci", .pe_init = pci_xhci_init, .pe_legacy_config = pci_xhci_legacy_config, .pe_barwrite = pci_xhci_write, .pe_barread = pci_xhci_read, #ifdef BHYVE_SNAPSHOT .pe_snapshot = pci_xhci_snapshot, #endif }; PCI_EMUL_SET(pci_de_xhci); diff --git a/usr.sbin/bhyve/ps2mouse.h b/usr.sbin/bhyve/ps2mouse.h index 4ae755ef4411..01f2bfa9ca0f 100644 --- a/usr.sbin/bhyve/ps2mouse.h +++ b/usr.sbin/bhyve/ps2mouse.h @@ -1,48 +1,48 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015 Tycho Nightingale * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PS2MOUSE_H_ #define _PS2MOUSE_H_ struct atkbdc_softc; -struct vm_snapshot_meta; +struct vm_snapshot_meta; struct ps2mouse_softc *ps2mouse_init(struct atkbdc_softc *sc); int ps2mouse_read(struct ps2mouse_softc *sc, uint8_t *val); void ps2mouse_write(struct ps2mouse_softc *sc, uint8_t val, int insert); void ps2mouse_toggle(struct ps2mouse_softc *sc, int enable); int ps2mouse_fifocnt(struct ps2mouse_softc *sc); #ifdef BHYVE_SNAPSHOT int ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta); #endif #endif /* _PS2MOUSE_H_ */ diff --git a/usr.sbin/bhyve/rtc.c b/usr.sbin/bhyve/rtc.c index 0f63156adb9b..9125b4f86a10 100644 --- a/usr.sbin/bhyve/rtc.c +++ b/usr.sbin/bhyve/rtc.c @@ -1,132 +1,132 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "acpi.h" #include "config.h" #include "pci_lpc.h" #include "rtc.h" #define IO_RTC 0x70 #define RTC_LMEM_LSB 0x34 #define RTC_LMEM_MSB 0x35 #define RTC_HMEM_LSB 0x5b #define RTC_HMEM_SB 0x5c #define RTC_HMEM_MSB 0x5d #define m_64KB (64*1024) #define m_16MB (16*1024*1024) #define m_4GB (4ULL*1024*1024*1024) /* * Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970 */ static time_t rtc_time(struct vmctx *ctx) { struct tm tm; time_t t; time(&t); if (get_config_bool_default("rtc.use_localtime", true)) { localtime_r(&t, &tm); t = timegm(&tm); } return (t); } void rtc_init(struct vmctx *ctx) -{ +{ size_t himem; size_t lomem; int err; /* XXX init diag/reset code/equipment/checksum ? */ /* * Report guest memory size in nvram cells as required by UEFI. * Little-endian encoding. * 0x34/0x35 - 64KB chunks above 16MB, below 4GB * 0x5b/0x5c/0x5d - 64KB chunks above 4GB */ lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB; err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem); assert(err == 0); err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8); assert(err == 0); himem = vm_get_highmem_size(ctx) / m_64KB; err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem); assert(err == 0); err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8); assert(err == 0); err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16); assert(err == 0); err = vm_rtc_settime(ctx, rtc_time(ctx)); assert(err == 0); } static void rtc_dsdt(void) { dsdt_line(""); dsdt_line("Device (RTC)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0B00\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_RTC, 2); dsdt_fixed_irq(8); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } LPC_DSDT(rtc_dsdt); /* * Reserve the extended RTC I/O ports although they are not emulated at this * time. */ SYSRES_IO(0x72, 6); diff --git a/usr.sbin/bhyve/uart_emul.c b/usr.sbin/bhyve/uart_emul.c index fd0ad2c846f4..725a789a5898 100644 --- a/usr.sbin/bhyve/uart_emul.c +++ b/usr.sbin/bhyve/uart_emul.c @@ -1,761 +1,761 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 NetApp, Inc. * Copyright (c) 2013 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #ifndef WITHOUT_CAPSICUM #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mevent.h" #include "uart_emul.h" #include "debug.h" #define COM1_BASE 0x3F8 #define COM1_IRQ 4 #define COM2_BASE 0x2F8 #define COM2_IRQ 3 #define COM3_BASE 0x3E8 #define COM3_IRQ 4 #define COM4_BASE 0x2E8 #define COM4_IRQ 3 #define DEFAULT_RCLK 1843200 #define DEFAULT_BAUD 9600 #define FCR_RX_MASK 0xC0 #define MCR_OUT1 0x04 #define MCR_OUT2 0x08 #define MSR_DELTA_MASK 0x0f #ifndef REG_SCR #define REG_SCR com_scr #endif #define FIFOSZ 16 static bool uart_stdio; /* stdio in use for i/o */ static struct termios tio_stdio_orig; static struct { int baseaddr; int irq; bool inuse; } uart_lres[] = { { COM1_BASE, COM1_IRQ, false}, { COM2_BASE, COM2_IRQ, false}, { COM3_BASE, COM3_IRQ, false}, { COM4_BASE, COM4_IRQ, false}, }; #define UART_NLDEVS (sizeof(uart_lres) / sizeof(uart_lres[0])) struct fifo { uint8_t buf[FIFOSZ]; int rindex; /* index to read from */ int windex; /* index to write to */ int num; /* number of characters in the fifo */ int size; /* size of the fifo */ }; struct ttyfd { bool opened; int rfd; /* fd for reading */ int wfd; /* fd for writing, may be == rfd */ }; struct uart_softc { pthread_mutex_t mtx; /* protects all softc elements */ uint8_t data; /* Data register (R/W) */ uint8_t ier; /* Interrupt enable register (R/W) */ uint8_t lcr; /* Line control register (R/W) */ uint8_t mcr; /* Modem control register (R/W) */ uint8_t lsr; /* Line status register (R/W) */ uint8_t msr; /* Modem status register (R/W) */ uint8_t fcr; /* FIFO control register (W) */ uint8_t scr; /* Scratch register (R/W) */ uint8_t dll; /* Baudrate divisor latch LSB */ uint8_t dlh; /* Baudrate divisor latch MSB */ struct fifo rxfifo; struct mevent *mev; struct ttyfd tty; bool thre_int_pending; /* THRE interrupt pending */ void *arg; uart_intr_func_t intr_assert; uart_intr_func_t intr_deassert; }; static void uart_drain(int fd, enum ev_type ev, void *arg); static void ttyclose(void) { tcsetattr(STDIN_FILENO, TCSANOW, &tio_stdio_orig); } static void ttyopen(struct ttyfd *tf) { struct termios orig, new; tcgetattr(tf->rfd, &orig); new = orig; cfmakeraw(&new); new.c_cflag |= CLOCAL; tcsetattr(tf->rfd, TCSANOW, &new); if (uart_stdio) { tio_stdio_orig = orig; atexit(ttyclose); } raw_stdio = 1; } static int ttyread(struct ttyfd *tf) { unsigned char rb; if (read(tf->rfd, &rb, 1) == 1) return (rb); else return (-1); } static void ttywrite(struct ttyfd *tf, unsigned char wb) { (void)write(tf->wfd, &wb, 1); } static void rxfifo_reset(struct uart_softc *sc, int size) { char flushbuf[32]; struct fifo *fifo; ssize_t nread; int error; fifo = &sc->rxfifo; bzero(fifo, sizeof(struct fifo)); fifo->size = size; if (sc->tty.opened) { /* * Flush any unread input from the tty buffer. */ while (1) { nread = read(sc->tty.rfd, flushbuf, sizeof(flushbuf)); if (nread != sizeof(flushbuf)) break; } /* * Enable mevent to trigger when new characters are available * on the tty fd. */ error = mevent_enable(sc->mev); assert(error == 0); } } static int rxfifo_available(struct uart_softc *sc) { struct fifo *fifo; fifo = &sc->rxfifo; return (fifo->num < fifo->size); } static int rxfifo_putchar(struct uart_softc *sc, uint8_t ch) { struct fifo *fifo; int error; fifo = &sc->rxfifo; if (fifo->num < fifo->size) { fifo->buf[fifo->windex] = ch; fifo->windex = (fifo->windex + 1) % fifo->size; fifo->num++; if (!rxfifo_available(sc)) { if (sc->tty.opened) { /* * Disable mevent callback if the FIFO is full. */ error = mevent_disable(sc->mev); assert(error == 0); } } return (0); } else return (-1); } static int rxfifo_getchar(struct uart_softc *sc) { struct fifo *fifo; int c, error, wasfull; wasfull = 0; fifo = &sc->rxfifo; if (fifo->num > 0) { if (!rxfifo_available(sc)) wasfull = 1; c = fifo->buf[fifo->rindex]; fifo->rindex = (fifo->rindex + 1) % fifo->size; fifo->num--; if (wasfull) { if (sc->tty.opened) { error = mevent_enable(sc->mev); assert(error == 0); } } return (c); } else return (-1); } static int rxfifo_numchars(struct uart_softc *sc) { struct fifo *fifo = &sc->rxfifo; return (fifo->num); } static void uart_opentty(struct uart_softc *sc) { ttyopen(&sc->tty); sc->mev = mevent_add(sc->tty.rfd, EVF_READ, uart_drain, sc); assert(sc->mev != NULL); } static uint8_t modem_status(uint8_t mcr) { uint8_t msr; if (mcr & MCR_LOOPBACK) { /* * In the loopback mode certain bits from the MCR are * reflected back into MSR. */ msr = 0; if (mcr & MCR_RTS) msr |= MSR_CTS; if (mcr & MCR_DTR) msr |= MSR_DSR; if (mcr & MCR_OUT1) msr |= MSR_RI; if (mcr & MCR_OUT2) msr |= MSR_DCD; } else { /* * Always assert DCD and DSR so tty open doesn't block * even if CLOCAL is turned off. */ msr = MSR_DCD | MSR_DSR; } assert((msr & MSR_DELTA_MASK) == 0); return (msr); } /* * The IIR returns a prioritized interrupt reason: * - receive data available * - transmit holding register empty * - modem status change * * Return an interrupt reason if one is available. */ static int uart_intr_reason(struct uart_softc *sc) { if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0) return (IIR_RLS); else if (rxfifo_numchars(sc) > 0 && (sc->ier & IER_ERXRDY) != 0) return (IIR_RXTOUT); else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0) return (IIR_TXRDY); else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0) return (IIR_MLSC); else return (IIR_NOPEND); } static void uart_reset(struct uart_softc *sc) { uint16_t divisor; divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16; sc->dll = divisor; sc->dlh = divisor >> 16; sc->msr = modem_status(sc->mcr); rxfifo_reset(sc, 1); /* no fifo until enabled by software */ } /* * Toggle the COM port's intr pin depending on whether or not we have an * interrupt condition to report to the processor. */ static void uart_toggle_intr(struct uart_softc *sc) { uint8_t intr_reason; intr_reason = uart_intr_reason(sc); if (intr_reason == IIR_NOPEND) (*sc->intr_deassert)(sc->arg); else (*sc->intr_assert)(sc->arg); } static void uart_drain(int fd, enum ev_type ev, void *arg) { struct uart_softc *sc; int ch; - sc = arg; + sc = arg; assert(fd == sc->tty.rfd); assert(ev == EVF_READ); - + /* * This routine is called in the context of the mevent thread * to take out the softc lock to protect against concurrent * access from a vCPU i/o exit */ pthread_mutex_lock(&sc->mtx); if ((sc->mcr & MCR_LOOPBACK) != 0) { (void) ttyread(&sc->tty); } else { while (rxfifo_available(sc) && ((ch = ttyread(&sc->tty)) != -1)) { rxfifo_putchar(sc, ch); } uart_toggle_intr(sc); } pthread_mutex_unlock(&sc->mtx); } void uart_write(struct uart_softc *sc, int offset, uint8_t value) { int fifosz; uint8_t msr; pthread_mutex_lock(&sc->mtx); /* * Take care of the special case DLAB accesses first */ if ((sc->lcr & LCR_DLAB) != 0) { if (offset == REG_DLL) { sc->dll = value; goto done; } - + if (offset == REG_DLH) { sc->dlh = value; goto done; } } switch (offset) { case REG_DATA: if (sc->mcr & MCR_LOOPBACK) { if (rxfifo_putchar(sc, value) != 0) sc->lsr |= LSR_OE; } else if (sc->tty.opened) { ttywrite(&sc->tty, value); } /* else drop on floor */ sc->thre_int_pending = true; break; case REG_IER: /* Set pending when IER_ETXRDY is raised (edge-triggered). */ if ((sc->ier & IER_ETXRDY) == 0 && (value & IER_ETXRDY) != 0) sc->thre_int_pending = true; /* * Apply mask so that bits 4-7 are 0 * Also enables bits 0-3 only if they're 1 */ sc->ier = value & 0x0F; break; case REG_FCR: /* * When moving from FIFO and 16450 mode and vice versa, * the FIFO contents are reset. */ if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) { fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1; rxfifo_reset(sc, fifosz); } /* * The FCR_ENABLE bit must be '1' for the programming * of other FCR bits to be effective. */ if ((value & FCR_ENABLE) == 0) { sc->fcr = 0; } else { if ((value & FCR_RCV_RST) != 0) rxfifo_reset(sc, FIFOSZ); sc->fcr = value & (FCR_ENABLE | FCR_DMA | FCR_RX_MASK); } break; case REG_LCR: sc->lcr = value; break; case REG_MCR: /* Apply mask so that bits 5-7 are 0 */ sc->mcr = value & 0x1F; msr = modem_status(sc->mcr); /* * Detect if there has been any change between the * previous and the new value of MSR. If there is * then assert the appropriate MSR delta bit. */ if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS)) sc->msr |= MSR_DCTS; if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR)) sc->msr |= MSR_DDSR; if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD)) sc->msr |= MSR_DDCD; if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0) sc->msr |= MSR_TERI; /* * Update the value of MSR while retaining the delta * bits. */ sc->msr &= MSR_DELTA_MASK; sc->msr |= msr; break; case REG_LSR: /* * Line status register is not meant to be written to * during normal operation. */ break; case REG_MSR: /* * As far as I can tell MSR is a read-only register. */ break; case REG_SCR: sc->scr = value; break; default: break; } done: uart_toggle_intr(sc); pthread_mutex_unlock(&sc->mtx); } uint8_t uart_read(struct uart_softc *sc, int offset) { uint8_t iir, intr_reason, reg; pthread_mutex_lock(&sc->mtx); /* * Take care of the special case DLAB accesses first */ if ((sc->lcr & LCR_DLAB) != 0) { if (offset == REG_DLL) { reg = sc->dll; goto done; } - + if (offset == REG_DLH) { reg = sc->dlh; goto done; } } switch (offset) { case REG_DATA: reg = rxfifo_getchar(sc); break; case REG_IER: reg = sc->ier; break; case REG_IIR: iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0; intr_reason = uart_intr_reason(sc); - + /* * Deal with side effects of reading the IIR register */ if (intr_reason == IIR_TXRDY) sc->thre_int_pending = false; iir |= intr_reason; reg = iir; break; case REG_LCR: reg = sc->lcr; break; case REG_MCR: reg = sc->mcr; break; case REG_LSR: /* Transmitter is always ready for more data */ sc->lsr |= LSR_TEMT | LSR_THRE; /* Check for new receive data */ if (rxfifo_numchars(sc) > 0) sc->lsr |= LSR_RXRDY; else sc->lsr &= ~LSR_RXRDY; reg = sc->lsr; /* The LSR_OE bit is cleared on LSR read */ sc->lsr &= ~LSR_OE; break; case REG_MSR: /* * MSR delta bits are cleared on read */ reg = sc->msr; sc->msr &= ~MSR_DELTA_MASK; break; case REG_SCR: reg = sc->scr; break; default: reg = 0xFF; break; } done: uart_toggle_intr(sc); pthread_mutex_unlock(&sc->mtx); return (reg); } int uart_legacy_alloc(int which, int *baseaddr, int *irq) { if (which < 0 || which >= UART_NLDEVS || uart_lres[which].inuse) return (-1); uart_lres[which].inuse = true; *baseaddr = uart_lres[which].baseaddr; *irq = uart_lres[which].irq; return (0); } struct uart_softc * uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert, void *arg) { struct uart_softc *sc; sc = calloc(1, sizeof(struct uart_softc)); sc->arg = arg; sc->intr_assert = intr_assert; sc->intr_deassert = intr_deassert; pthread_mutex_init(&sc->mtx, NULL); uart_reset(sc); return (sc); } static int uart_stdio_backend(struct uart_softc *sc) { #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ }; #endif if (uart_stdio) return (-1); sc->tty.rfd = STDIN_FILENO; sc->tty.wfd = STDOUT_FILENO; sc->tty.opened = true; if (fcntl(sc->tty.rfd, F_SETFL, O_NONBLOCK) != 0) return (-1); if (fcntl(sc->tty.wfd, F_SETFL, O_NONBLOCK) != 0) return (-1); #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ); if (caph_rights_limit(sc->tty.rfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_ioctls_limit(sc->tty.rfd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif uart_stdio = true; return (0); } static int uart_tty_backend(struct uart_softc *sc, const char *path) { #ifndef WITHOUT_CAPSICUM cap_rights_t rights; cap_ioctl_t cmds[] = { TIOCGETA, TIOCSETA, TIOCGWINSZ }; #endif int fd; fd = open(path, O_RDWR | O_NONBLOCK); if (fd < 0) return (-1); if (!isatty(fd)) { close(fd); return (-1); } sc->tty.rfd = sc->tty.wfd = fd; sc->tty.opened = true; #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif return (0); } int uart_set_backend(struct uart_softc *sc, const char *device) { int retval; if (device == NULL) return (0); if (strcmp("stdio", device) == 0) retval = uart_stdio_backend(sc); else retval = uart_tty_backend(sc, device); if (retval == 0) uart_opentty(sc); return (retval); } #ifdef BHYVE_SNAPSHOT int uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta) { int ret; SNAPSHOT_VAR_OR_LEAVE(sc->data, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->ier, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->lcr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->mcr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->lsr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->msr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->fcr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->scr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->dll, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->dlh, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.rindex, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.windex, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.num, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.size, meta, ret, done); SNAPSHOT_BUF_OR_LEAVE(sc->rxfifo.buf, sizeof(sc->rxfifo.buf), meta, ret, done); sc->thre_int_pending = 1; done: return (ret); } #endif diff --git a/usr.sbin/bhyve/usb_mouse.c b/usr.sbin/bhyve/usb_mouse.c index eda3878ff5fb..fbda9ef2579b 100644 --- a/usr.sbin/bhyve/usb_mouse.c +++ b/usr.sbin/bhyve/usb_mouse.c @@ -1,828 +1,828 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Leon Dang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "usb_emul.h" #include "console.h" #include "bhyvegc.h" #include "debug.h" static int umouse_debug = 0; #define DPRINTF(params) if (umouse_debug) PRINTLN params #define WPRINTF(params) PRINTLN params /* USB endpoint context (1-15) for reporting mouse data events*/ #define UMOUSE_INTR_ENDPT 1 #define UMOUSE_REPORT_DESC_TYPE 0x22 #define UMOUSE_GET_REPORT 0x01 #define UMOUSE_GET_IDLE 0x02 #define UMOUSE_GET_PROTOCOL 0x03 #define UMOUSE_SET_REPORT 0x09 #define UMOUSE_SET_IDLE 0x0A #define UMOUSE_SET_PROTOCOL 0x0B #define HSETW(ptr, val) ptr = { (uint8_t)(val), (uint8_t)((val) >> 8) } enum { UMSTR_LANG, UMSTR_MANUFACTURER, UMSTR_PRODUCT, UMSTR_SERIAL, UMSTR_CONFIG, UMSTR_MAX }; static const char *umouse_desc_strings[] = { "\x09\x04", "BHYVE", "HID Tablet", "01", "HID Tablet Device", }; struct umouse_hid_descriptor { uint8_t bLength; uint8_t bDescriptorType; uint8_t bcdHID[2]; uint8_t bCountryCode; uint8_t bNumDescriptors; uint8_t bReportDescriptorType; uint8_t wItemLength[2]; } __packed; struct umouse_config_desc { struct usb_config_descriptor confd; struct usb_interface_descriptor ifcd; struct umouse_hid_descriptor hidd; struct usb_endpoint_descriptor endpd; struct usb_endpoint_ss_comp_descriptor sscompd; } __packed; #define MOUSE_MAX_X 0x8000 #define MOUSE_MAX_Y 0x8000 static const uint8_t umouse_report_desc[] = { 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ 0x09, 0x02, /* USAGE (Mouse) */ 0xa1, 0x01, /* COLLECTION (Application) */ 0x09, 0x01, /* USAGE (Pointer) */ 0xa1, 0x00, /* COLLECTION (Physical) */ 0x05, 0x09, /* USAGE_PAGE (Button) */ 0x19, 0x01, /* USAGE_MINIMUM (Button 1) */ 0x29, 0x03, /* USAGE_MAXIMUM (Button 3) */ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ 0x75, 0x01, /* REPORT_SIZE (1) */ 0x95, 0x03, /* REPORT_COUNT (3) */ 0x81, 0x02, /* INPUT (Data,Var,Abs); 3 buttons */ 0x75, 0x05, /* REPORT_SIZE (5) */ 0x95, 0x01, /* REPORT_COUNT (1) */ 0x81, 0x03, /* INPUT (Cnst,Var,Abs); padding */ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ 0x09, 0x30, /* USAGE (X) */ 0x09, 0x31, /* USAGE (Y) */ 0x35, 0x00, /* PHYSICAL_MINIMUM (0) */ 0x46, 0xff, 0x7f, /* PHYSICAL_MAXIMUM (0x7fff) */ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ 0x26, 0xff, 0x7f, /* LOGICAL_MAXIMUM (0x7fff) */ 0x75, 0x10, /* REPORT_SIZE (16) */ 0x95, 0x02, /* REPORT_COUNT (2) */ 0x81, 0x02, /* INPUT (Data,Var,Abs) */ 0x05, 0x01, /* USAGE Page (Generic Desktop) */ 0x09, 0x38, /* USAGE (Wheel) */ 0x35, 0x00, /* PHYSICAL_MINIMUM (0) */ 0x45, 0x00, /* PHYSICAL_MAXIMUM (0) */ 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */ 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */ 0x75, 0x08, /* REPORT_SIZE (8) */ 0x95, 0x01, /* REPORT_COUNT (1) */ 0x81, 0x06, /* INPUT (Data,Var,Rel) */ 0xc0, /* END_COLLECTION */ 0xc0 /* END_COLLECTION */ }; struct umouse_report { uint8_t buttons; /* bits: 0 left, 1 right, 2 middle */ int16_t x; /* x position */ int16_t y; /* y position */ int8_t z; /* z wheel position */ } __packed; #define MSETW(ptr, val) ptr = { (uint8_t)(val), (uint8_t)((val) >> 8) } static struct usb_device_descriptor umouse_dev_desc = { .bLength = sizeof(umouse_dev_desc), .bDescriptorType = UDESC_DEVICE, MSETW(.bcdUSB, UD_USB_3_0), .bMaxPacketSize = 8, /* max packet size */ MSETW(.idVendor, 0xFB5D), /* vendor */ MSETW(.idProduct, 0x0001), /* product */ MSETW(.bcdDevice, 0), /* device version */ .iManufacturer = UMSTR_MANUFACTURER, .iProduct = UMSTR_PRODUCT, .iSerialNumber = UMSTR_SERIAL, .bNumConfigurations = 1, }; static struct umouse_config_desc umouse_confd = { .confd = { .bLength = sizeof(umouse_confd.confd), .bDescriptorType = UDESC_CONFIG, .wTotalLength[0] = sizeof(umouse_confd), .bNumInterface = 1, .bConfigurationValue = 1, .iConfiguration = UMSTR_CONFIG, .bmAttributes = UC_BUS_POWERED | UC_REMOTE_WAKEUP, .bMaxPower = 0, }, .ifcd = { .bLength = sizeof(umouse_confd.ifcd), .bDescriptorType = UDESC_INTERFACE, .bNumEndpoints = 1, .bInterfaceClass = UICLASS_HID, .bInterfaceSubClass = UISUBCLASS_BOOT, .bInterfaceProtocol = UIPROTO_MOUSE, }, .hidd = { .bLength = sizeof(umouse_confd.hidd), .bDescriptorType = 0x21, .bcdHID = { 0x01, 0x10 }, .bCountryCode = 0, .bNumDescriptors = 1, .bReportDescriptorType = UMOUSE_REPORT_DESC_TYPE, .wItemLength = { sizeof(umouse_report_desc), 0 }, }, .endpd = { .bLength = sizeof(umouse_confd.endpd), .bDescriptorType = UDESC_ENDPOINT, .bEndpointAddress = UE_DIR_IN | UMOUSE_INTR_ENDPT, .bmAttributes = UE_INTERRUPT, .wMaxPacketSize[0] = 8, .bInterval = 0xA, }, .sscompd = { .bLength = sizeof(umouse_confd.sscompd), .bDescriptorType = UDESC_ENDPOINT_SS_COMP, .bMaxBurst = 0, .bmAttributes = 0, MSETW(.wBytesPerInterval, 0), }, }; struct umouse_bos_desc { struct usb_bos_descriptor bosd; struct usb_devcap_ss_descriptor usbssd; } __packed; struct umouse_bos_desc umouse_bosd = { .bosd = { .bLength = sizeof(umouse_bosd.bosd), .bDescriptorType = UDESC_BOS, HSETW(.wTotalLength, sizeof(umouse_bosd)), .bNumDeviceCaps = 1, }, .usbssd = { .bLength = sizeof(umouse_bosd.usbssd), .bDescriptorType = UDESC_DEVICE_CAPABILITY, .bDevCapabilityType = 3, .bmAttributes = 0, HSETW(.wSpeedsSupported, 0x08), .bFunctionalitySupport = 3, .bU1DevExitLat = 0xa, /* dummy - not used */ .wU2DevExitLat = { 0x20, 0x00 }, } }; struct umouse_softc { struct usb_hci *hci; struct umouse_report um_report; int newdata; struct { uint8_t idle; uint8_t protocol; uint8_t feature; } hid; pthread_mutex_t mtx; pthread_mutex_t ev_mtx; int polling; struct timeval prev_evt; }; static void umouse_event(uint8_t button, int x, int y, void *arg) { struct umouse_softc *sc; struct bhyvegc_image *gc; gc = console_get_image(); if (gc == NULL) { /* not ready */ return; } sc = arg; pthread_mutex_lock(&sc->mtx); sc->um_report.buttons = 0; sc->um_report.z = 0; if (button & 0x01) sc->um_report.buttons |= 0x01; /* left */ if (button & 0x02) sc->um_report.buttons |= 0x04; /* middle */ if (button & 0x04) sc->um_report.buttons |= 0x02; /* right */ if (button & 0x8) sc->um_report.z = 1; if (button & 0x10) sc->um_report.z = -1; /* scale coords to mouse resolution */ sc->um_report.x = MOUSE_MAX_X * x / gc->width; sc->um_report.y = MOUSE_MAX_Y * y / gc->height; sc->newdata = 1; pthread_mutex_unlock(&sc->mtx); pthread_mutex_lock(&sc->ev_mtx); sc->hci->hci_intr(sc->hci, UE_DIR_IN | UMOUSE_INTR_ENDPT); pthread_mutex_unlock(&sc->ev_mtx); } static void * umouse_init(struct usb_hci *hci, nvlist_t *nvl) { struct umouse_softc *sc; sc = calloc(1, sizeof(struct umouse_softc)); sc->hci = hci; sc->hid.protocol = 1; /* REPORT protocol */ pthread_mutex_init(&sc->mtx, NULL); pthread_mutex_init(&sc->ev_mtx, NULL); console_ptr_register(umouse_event, sc, 10); return (sc); } #define UREQ(x,y) ((x) | ((y) << 8)) static int umouse_request(void *scarg, struct usb_data_xfer *xfer) { struct umouse_softc *sc; struct usb_data_xfer_block *data; const char *str; uint16_t value; uint16_t index; uint16_t len; uint16_t slen; uint8_t *udata; int err; int i, idx; int eshort; sc = scarg; data = NULL; udata = NULL; idx = xfer->head; for (i = 0; i < xfer->ndata; i++) { xfer->data[idx].bdone = 0; if (data == NULL && USB_DATA_OK(xfer,i)) { data = &xfer->data[idx]; udata = data->buf; } xfer->data[idx].processed = 1; idx = (idx + 1) % USB_MAX_XFER_BLOCKS; } err = USB_ERR_NORMAL_COMPLETION; eshort = 0; if (!xfer->ureq) { DPRINTF(("umouse_request: port %d", sc->hci->hci_port)); goto done; } value = UGETW(xfer->ureq->wValue); index = UGETW(xfer->ureq->wIndex); len = UGETW(xfer->ureq->wLength); DPRINTF(("umouse_request: port %d, type 0x%x, req 0x%x, val 0x%x, " "idx 0x%x, len %u", sc->hci->hci_port, xfer->ureq->bmRequestType, xfer->ureq->bRequest, value, index, len)); switch (UREQ(xfer->ureq->bRequest, xfer->ureq->bmRequestType)) { case UREQ(UR_GET_CONFIG, UT_READ_DEVICE): DPRINTF(("umouse: (UR_GET_CONFIG, UT_READ_DEVICE)")); if (!data) break; *udata = umouse_confd.confd.bConfigurationValue; data->blen = len > 0 ? len - 1 : 0; eshort = data->blen > 0; data->bdone += 1; break; case UREQ(UR_GET_DESCRIPTOR, UT_READ_DEVICE): DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_DEVICE) val %x", value >> 8)); if (!data) break; switch (value >> 8) { case UDESC_DEVICE: DPRINTF(("umouse: (->UDESC_DEVICE) len %u ?= " "sizeof(umouse_dev_desc) %lu", len, sizeof(umouse_dev_desc))); if ((value & 0xFF) != 0) { err = USB_ERR_STALLED; goto done; } if (len > sizeof(umouse_dev_desc)) { data->blen = len - sizeof(umouse_dev_desc); len = sizeof(umouse_dev_desc); } else data->blen = 0; memcpy(data->buf, &umouse_dev_desc, len); data->bdone += len; break; case UDESC_CONFIG: DPRINTF(("umouse: (->UDESC_CONFIG)")); if ((value & 0xFF) != 0) { err = USB_ERR_STALLED; goto done; } if (len > sizeof(umouse_confd)) { data->blen = len - sizeof(umouse_confd); len = sizeof(umouse_confd); } else data->blen = 0; memcpy(data->buf, &umouse_confd, len); data->bdone += len; break; case UDESC_STRING: DPRINTF(("umouse: (->UDESC_STRING)")); str = NULL; if ((value & 0xFF) < UMSTR_MAX) str = umouse_desc_strings[value & 0xFF]; else goto done; if ((value & 0xFF) == UMSTR_LANG) { udata[0] = 4; udata[1] = UDESC_STRING; data->blen = len - 2; len -= 2; data->bdone += 2; if (len >= 2) { udata[2] = str[0]; udata[3] = str[1]; data->blen -= 2; data->bdone += 2; } else data->blen = 0; goto done; } slen = 2 + strlen(str) * 2; udata[0] = slen; udata[1] = UDESC_STRING; if (len > slen) { data->blen = len - slen; len = slen; } else data->blen = 0; for (i = 2; i < len; i += 2) { udata[i] = *str++; udata[i+1] = '\0'; } data->bdone += slen; break; case UDESC_BOS: DPRINTF(("umouse: USB3 BOS")); if (len > sizeof(umouse_bosd)) { data->blen = len - sizeof(umouse_bosd); len = sizeof(umouse_bosd); } else data->blen = 0; memcpy(udata, &umouse_bosd, len); data->bdone += len; break; default: DPRINTF(("umouse: unknown(%d)->ERROR", value >> 8)); err = USB_ERR_STALLED; goto done; } eshort = data->blen > 0; break; case UREQ(UR_GET_DESCRIPTOR, UT_READ_INTERFACE): DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_INTERFACE) " "0x%x", (value >> 8))); if (!data) break; switch (value >> 8) { case UMOUSE_REPORT_DESC_TYPE: if (len > sizeof(umouse_report_desc)) { data->blen = len - sizeof(umouse_report_desc); len = sizeof(umouse_report_desc); } else data->blen = 0; memcpy(data->buf, umouse_report_desc, len); data->bdone += len; break; default: DPRINTF(("umouse: IO ERROR")); err = USB_ERR_STALLED; goto done; } eshort = data->blen > 0; break; case UREQ(UR_GET_INTERFACE, UT_READ_INTERFACE): DPRINTF(("umouse: (UR_GET_INTERFACE, UT_READ_INTERFACE)")); if (index != 0) { DPRINTF(("umouse get_interface, invalid index %d", index)); err = USB_ERR_STALLED; goto done; } if (!data) break; if (len > 0) { *udata = 0; data->blen = len - 1; } eshort = data->blen > 0; data->bdone += 1; break; case UREQ(UR_GET_STATUS, UT_READ_DEVICE): DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_DEVICE)")); if (data != NULL && len > 1) { if (sc->hid.feature == UF_DEVICE_REMOTE_WAKEUP) USETW(udata, UDS_REMOTE_WAKEUP); else USETW(udata, 0); data->blen = len - 2; data->bdone += 2; } eshort = data->blen > 0; break; - case UREQ(UR_GET_STATUS, UT_READ_INTERFACE): - case UREQ(UR_GET_STATUS, UT_READ_ENDPOINT): + case UREQ(UR_GET_STATUS, UT_READ_INTERFACE): + case UREQ(UR_GET_STATUS, UT_READ_ENDPOINT): DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_INTERFACE)")); if (data != NULL && len > 1) { USETW(udata, 0); data->blen = len - 2; data->bdone += 2; } eshort = data->blen > 0; break; case UREQ(UR_SET_ADDRESS, UT_WRITE_DEVICE): /* XXX Controller should've handled this */ DPRINTF(("umouse set address %u", value)); break; case UREQ(UR_SET_CONFIG, UT_WRITE_DEVICE): DPRINTF(("umouse set config %u", value)); break; case UREQ(UR_SET_DESCRIPTOR, UT_WRITE_DEVICE): DPRINTF(("umouse set descriptor %u", value)); break; case UREQ(UR_CLEAR_FEATURE, UT_WRITE_DEVICE): DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x", value)); if (value == UF_DEVICE_REMOTE_WAKEUP) sc->hid.feature = 0; break; case UREQ(UR_SET_FEATURE, UT_WRITE_DEVICE): DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x", value)); if (value == UF_DEVICE_REMOTE_WAKEUP) sc->hid.feature = UF_DEVICE_REMOTE_WAKEUP; break; case UREQ(UR_CLEAR_FEATURE, UT_WRITE_INTERFACE): case UREQ(UR_CLEAR_FEATURE, UT_WRITE_ENDPOINT): case UREQ(UR_SET_FEATURE, UT_WRITE_INTERFACE): case UREQ(UR_SET_FEATURE, UT_WRITE_ENDPOINT): DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_INTERFACE)")); err = USB_ERR_STALLED; goto done; case UREQ(UR_SET_INTERFACE, UT_WRITE_INTERFACE): DPRINTF(("umouse set interface %u", value)); break; case UREQ(UR_ISOCH_DELAY, UT_WRITE_DEVICE): DPRINTF(("umouse set isoch delay %u", value)); break; case UREQ(UR_SET_SEL, 0): DPRINTF(("umouse set sel")); break; case UREQ(UR_SYNCH_FRAME, UT_WRITE_ENDPOINT): DPRINTF(("umouse synch frame")); break; /* HID device requests */ case UREQ(UMOUSE_GET_REPORT, UT_READ_CLASS_INTERFACE): DPRINTF(("umouse: (UMOUSE_GET_REPORT, UT_READ_CLASS_INTERFACE) " "0x%x", (value >> 8))); if (!data) break; if ((value >> 8) == 0x01 && len >= sizeof(sc->um_report)) { /* TODO read from backend */ if (len > sizeof(sc->um_report)) { data->blen = len - sizeof(sc->um_report); len = sizeof(sc->um_report); } else data->blen = 0; memcpy(data->buf, &sc->um_report, len); data->bdone += len; } else { err = USB_ERR_STALLED; goto done; } eshort = data->blen > 0; break; case UREQ(UMOUSE_GET_IDLE, UT_READ_CLASS_INTERFACE): if (data != NULL && len > 0) { *udata = sc->hid.idle; data->blen = len - 1; data->bdone += 1; } eshort = data->blen > 0; break; case UREQ(UMOUSE_GET_PROTOCOL, UT_READ_CLASS_INTERFACE): if (data != NULL && len > 0) { *udata = sc->hid.protocol; data->blen = len - 1; data->bdone += 1; } eshort = data->blen > 0; break; case UREQ(UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE): DPRINTF(("umouse: (UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE) ignored")); break; case UREQ(UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE): sc->hid.idle = UGETW(xfer->ureq->wValue) >> 8; DPRINTF(("umouse: (UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE) %x", sc->hid.idle)); break; case UREQ(UMOUSE_SET_PROTOCOL, UT_WRITE_CLASS_INTERFACE): sc->hid.protocol = UGETW(xfer->ureq->wValue) >> 8; DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_CLASS_INTERFACE) %x", sc->hid.protocol)); break; default: DPRINTF(("**** umouse request unhandled")); err = USB_ERR_STALLED; break; } done: if (xfer->ureq && (xfer->ureq->bmRequestType & UT_WRITE) && (err == USB_ERR_NORMAL_COMPLETION) && (data != NULL)) data->blen = 0; else if (eshort) err = USB_ERR_SHORT_XFER; DPRINTF(("umouse request error code %d (0=ok), blen %u txlen %u", err, (data ? data->blen : 0), (data ? data->bdone : 0))); return (err); } static int umouse_data_handler(void *scarg, struct usb_data_xfer *xfer, int dir, int epctx) { struct umouse_softc *sc; struct usb_data_xfer_block *data; uint8_t *udata; int len, i, idx; int err; DPRINTF(("umouse handle data - DIR=%s|EP=%d, blen %d", dir ? "IN" : "OUT", epctx, xfer->data[0].blen)); /* find buffer to add data */ udata = NULL; err = USB_ERR_NORMAL_COMPLETION; /* handle xfer at first unprocessed item with buffer */ data = NULL; idx = xfer->head; for (i = 0; i < xfer->ndata; i++) { data = &xfer->data[idx]; if (data->buf != NULL && data->blen != 0) { break; } else { data->processed = 1; data = NULL; } idx = (idx + 1) % USB_MAX_XFER_BLOCKS; } if (!data) goto done; udata = data->buf; len = data->blen; if (udata == NULL) { DPRINTF(("umouse no buffer provided for input")); err = USB_ERR_NOMEM; goto done; } sc = scarg; if (dir) { pthread_mutex_lock(&sc->mtx); if (!sc->newdata) { err = USB_ERR_CANCELLED; USB_DATA_SET_ERRCODE(&xfer->data[xfer->head], USB_NAK); pthread_mutex_unlock(&sc->mtx); goto done; } if (sc->polling) { err = USB_ERR_STALLED; USB_DATA_SET_ERRCODE(data, USB_STALL); pthread_mutex_unlock(&sc->mtx); goto done; } sc->polling = 1; if (len > 0) { sc->newdata = 0; data->processed = 1; data->bdone += 6; memcpy(udata, &sc->um_report, 6); data->blen = len - 6; if (data->blen > 0) err = USB_ERR_SHORT_XFER; } sc->polling = 0; pthread_mutex_unlock(&sc->mtx); - } else { + } else { USB_DATA_SET_ERRCODE(data, USB_STALL); err = USB_ERR_STALLED; } done: return (err); } static int umouse_reset(void *scarg) { struct umouse_softc *sc; sc = scarg; sc->newdata = 0; return (0); } static int umouse_remove(void *scarg) { return (0); } static int umouse_stop(void *scarg) { return (0); } #ifdef BHYVE_SNAPSHOT static int umouse_snapshot(void *scarg, struct vm_snapshot_meta *meta) { int ret; struct umouse_softc *sc; sc = scarg; SNAPSHOT_VAR_OR_LEAVE(sc->um_report, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->newdata, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hid.idle, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hid.protocol, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->hid.feature, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->polling, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_sec, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_usec, meta, ret, done); done: return (ret); } #endif struct usb_devemu ue_mouse = { .ue_emu = "tablet", .ue_usbver = 3, .ue_usbspeed = USB_SPEED_HIGH, .ue_init = umouse_init, .ue_request = umouse_request, .ue_data = umouse_data_handler, .ue_reset = umouse_reset, .ue_remove = umouse_remove, .ue_stop = umouse_stop, #ifdef BHYVE_SNAPSHOT .ue_snapshot = umouse_snapshot, #endif }; USB_EMUL_SET(ue_mouse);