Index: head/sys/amd64/amd64/apic_vector.S =================================================================== --- head/sys/amd64/amd64/apic_vector.S (revision 282211) +++ head/sys/amd64/amd64/apic_vector.S (revision 282212) @@ -1,335 +1,351 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD$ */ /* * Interrupt entry points for external interrupts triggered by I/O APICs * as well as IPI handlers. */ #include "opt_smp.h" #include #include #include #include "assym.s" #ifdef SMP #define LK lock ; #else #define LK #endif .text SUPERALIGN_TEXT /* End Of Interrupt to APIC */ as_lapic_eoi: cmpl $0,x2apic_mode jne 1f movq lapic_map,%rax movl $0,LA_EOI(%rax) ret 1: movl $MSR_APIC_EOI,%ecx xorl %eax,%eax xorl %edx,%edx wrmsr ret /* * I/O Interrupt Entry Point. Rather than having one entry point for * each interrupt source, we use one entry point for each 32-bit word * in the ISR. The handler determines the highest bit set in the ISR, * translates that into a vector, and passes the vector to the * lapic_handle_intr() function. */ #define ISR_VEC(index, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ FAKE_MCOUNT(TF_RIP(%rsp)) ; \ cmpl $0,x2apic_mode ; \ je 1f ; \ movl $(MSR_APIC_ISR0 + index),%ecx ; \ rdmsr ; \ jmp 2f ; \ 1: ; \ movq lapic_map, %rdx ; /* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%rdx), %eax ; /* load ISR */ \ 2: ; \ bsrl %eax, %eax ; /* index of highest set bit in ISR */ \ jz 3f ; \ addl $(32 * index),%eax ; \ movq %rsp, %rsi ; \ movl %eax, %edi ; /* pass the IRQ */ \ call lapic_handle_intr ; \ 3: ; \ MEXITCOUNT ; \ jmp doreti /* * Handle "spurious INTerrupts". * Notes: * This is different than the "spurious INTerrupt" generated by an * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ .text SUPERALIGN_TEXT IDTVEC(spuriousint) /* No EOI cycle used here */ jmp doreti_iret ISR_VEC(1, apic_isr1) ISR_VEC(2, apic_isr2) ISR_VEC(3, apic_isr3) ISR_VEC(4, apic_isr4) ISR_VEC(5, apic_isr5) ISR_VEC(6, apic_isr6) ISR_VEC(7, apic_isr7) /* * Local APIC periodic timer handler. */ .text SUPERALIGN_TEXT IDTVEC(timerint) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call lapic_handle_timer MEXITCOUNT jmp doreti /* * Local APIC CMCI handler. */ .text SUPERALIGN_TEXT IDTVEC(cmcint) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) call lapic_handle_cmc MEXITCOUNT jmp doreti /* * Local APIC error interrupt handler. */ .text SUPERALIGN_TEXT IDTVEC(errorint) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) call lapic_handle_error MEXITCOUNT jmp doreti #ifdef XENHVM /* * Xen event channel upcall interrupt handler. * Only used when the hypervisor supports direct vector callbacks. */ .text SUPERALIGN_TEXT IDTVEC(xen_intr_upcall) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call xen_intr_handle_upcall MEXITCOUNT jmp doreti #endif +#ifdef HYPERV +/* + * This is the Hyper-V vmbus channel direct callback interrupt. + * Only used when it is running on Hyper-V. + */ + .text + SUPERALIGN_TEXT +IDTVEC(hv_vmbus_callback) + PUSH_FRAME + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp, %rdi + call hv_vector_handler + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. */ .text #define NAKE_INTR_CS 24 SUPERALIGN_TEXT invltlb_ret: call as_lapic_eoi POP_FRAME jmp doreti_iret SUPERALIGN_TEXT IDTVEC(invltlb_pcid) PUSH_FRAME call invltlb_pcid_handler jmp invltlb_ret SUPERALIGN_TEXT IDTVEC(invltlb) PUSH_FRAME call invltlb_handler jmp invltlb_ret /* * Single page TLB shootdown */ .text SUPERALIGN_TEXT IDTVEC(invlpg_pcid) PUSH_FRAME call invlpg_pcid_handler jmp invltlb_ret SUPERALIGN_TEXT IDTVEC(invlpg) PUSH_FRAME call invlpg_handler jmp invltlb_ret /* * Page range TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invlrng) PUSH_FRAME call invlrng_handler jmp invltlb_ret /* * Invalidate cache. */ .text SUPERALIGN_TEXT IDTVEC(invlcache) PUSH_FRAME call invlcache_handler jmp invltlb_ret /* * Handler for IPIs sent via the per-cpu IPI bitmap. */ .text SUPERALIGN_TEXT IDTVEC(ipi_intr_bitmap_handler) PUSH_FRAME call as_lapic_eoi FAKE_MCOUNT(TF_RIP(%rsp)) call ipi_bitmap_handler MEXITCOUNT jmp doreti /* * Executed by a CPU when it receives an IPI_STOP from another CPU. */ .text SUPERALIGN_TEXT IDTVEC(cpustop) PUSH_FRAME call as_lapic_eoi call cpustop_handler jmp doreti /* * Executed by a CPU when it receives an IPI_SUSPEND from another CPU. */ .text SUPERALIGN_TEXT IDTVEC(cpususpend) PUSH_FRAME call cpususpend_handler call as_lapic_eoi jmp doreti /* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. * * - Calls the generic rendezvous action function. */ .text SUPERALIGN_TEXT IDTVEC(rendezvous) PUSH_FRAME #ifdef COUNT_IPIS movl PCPU(CPUID), %eax movq ipi_rendezvous_counts(,%rax,8), %rax incq (%rax) #endif call smp_rendezvous_action call as_lapic_eoi jmp doreti /* * IPI handler whose purpose is to interrupt the CPU with minimum overhead. * This is used by bhyve to force a host cpu executing in guest context to * trap into the hypervisor. * * This handler is different from other IPI handlers in the following aspects: * * 1. It doesn't push a trapframe on the stack. * * This implies that a DDB backtrace involving 'justreturn' will skip the * function that was interrupted by this handler. * * 2. It doesn't 'swapgs' when userspace is interrupted. * * The 'justreturn' handler does not access any pcpu data so it is not an * issue. Moreover the 'justreturn' handler can only be interrupted by an NMI * whose handler already doesn't trust GS.base when kernel code is interrupted. */ .text SUPERALIGN_TEXT IDTVEC(justreturn) pushq %rax pushq %rcx pushq %rdx call as_lapic_eoi popq %rdx popq %rcx popq %rax jmp doreti_iret #endif /* SMP */ Index: head/sys/amd64/conf/GENERIC =================================================================== --- head/sys/amd64/conf/GENERIC (revision 282211) +++ head/sys/amd64/conf/GENERIC (revision 282212) @@ -1,356 +1,358 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/amd64 # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu HAMMER ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_PART_GPT # GUID Partition Tables. options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options COMPAT_FREEBSD32 # Compatible with i386 binaries options COMPAT_FREEBSD4 # Compatible with FreeBSD4 options COMPAT_FREEBSD5 # Compatible with FreeBSD5 options COMPAT_FREEBSD6 # Compatible with FreeBSD6 options COMPAT_FREEBSD7 # Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel # CPU frequency control device cpufreq # Bus support. device acpi options ACPI_DMAR device pci options PCI_IOV # PCI SR-IOV support # Floppy drives device fdc # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers options ATA_STATIC_ID # Static device numbering device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # SCSI Controllers device ahc # AHA2940 and onboard AIC7xxx devices options AHC_REG_PRETTY_PRINT # Print register bitfields in debug # output. Adds ~128k to driver. device ahd # AHA39320/29320 and onboard AIC79xx devices options AHD_REG_PRETTY_PRINT # Print register bitfields in debug # output. Adds ~215k to driver. device esp # AMD Am53C974 (Tekram DC-390(T)) device hptiop # Highpoint RocketRaid 3xxx series device isp # Qlogic family #device ispfw # Firmware for QLogic HBAs- normally a module device mpt # LSI-Logic MPT-Fusion device mps # LSI-Logic MPT-Fusion 2 device mpr # LSI-Logic MPT-Fusion 3 #device ncr # NCR/Symbios Logic device sym # NCR/Symbios Logic (newer chipsets + those of `ncr') device trm # Tekram DC395U/UW/F DC315U adapters device adv # Advansys SCSI adapters device adw # Advansys wide SCSI adapters device aic # Adaptec 15[012]x SCSI adapters, AIC-6[23]60. device bt # Buslogic/Mylex MultiMaster SCSI adapters device isci # Intel C600 SAS controller # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) #device ctl # CAM Target Layer # RAID controllers interfaced to the SCSI subsystem device amr # AMI MegaRAID device arcmsr # Areca SATA II RAID device ciss # Compaq Smart RAID 5* device dpt # DPT Smartcache III, IV - See NOTES for options device hptmv # Highpoint RocketRAID 182x device hptnr # Highpoint DC7280, R750 device hptrr # Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx device hpt27xx # Highpoint RocketRAID 27xx device iir # Intel Integrated RAID device ips # IBM (Adaptec) ServeRAID device mly # Mylex AcceleRAID/eXtremeRAID device twa # 3ware 9000 series PATA/SATA RAID device tws # LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller # RAID controllers device aac # Adaptec FSA RAID device aacp # SCSI passthrough for aac (requires CAM) device aacraid # Adaptec by PMC RAID device ida # Compaq Smart RAID device mfi # LSI MegaRAID SAS device mlx # Mylex DAC960 family device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s #XXX pointer/int warnings #device pst # Promise Supertrak SX6000 device twe # 3ware ATA RAID # atkbdc0 controls both the keyboard and the PS/2 mouse device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device kbdmux # keyboard multiplexer device vga # VGA video card driver options VESA # Add support for VESA BIOS Extensions (VBE) device splash # Splash screen and screen saver support # syscons is the default console driver, resembling an SCO console device sc options SC_PIXEL_MODE # add support for the raster text mode # vt is the new video console driver device vt device vt_vga device vt_efifb device agp # support several AGP chipsets # PCCARD (PCMCIA) support # PCMCIA and cardbus bridge support device cbb # cardbus (yenta) bridge device pccard # PC Card (16-bit) bus device cardbus # CardBus (32-bit) bus # Serial (COM) ports device uart # Generic UART driver # Parallel port device ppc device ppbus # Parallel port bus (required) device lpt # Printer device ppi # Parallel port interface device #device vpo # Requires scbus and da device puc # Multi I/O cards and multi-channel UARTs # PCI Ethernet NICs. device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device de # DEC/Intel DC21x4x (``Tulip'') device em # Intel PRO/1000 Gigabit Ethernet Family device igb # Intel PRO/1000 PCIE Server Gigabit Family device ix # Intel PRO/10GbE PCIE PF Ethernet device ixv # Intel PRO/10GbE PCIE VF Ethernet device ixl # Intel XL710 40Gbe PCIE Ethernet device ixlv # Intel XL710 40Gbe VF PCIE Ethernet device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') device vx # 3Com 3c590, 3c595 (``Vortex'') # PCI Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus # MII bus support device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) device gem # Sun GEM/Sun ERI/Apple GMAC device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device nfe # nVidia nForce MCP on-board Ethernet device nge # NatSemi DP83820 gigabit Ethernet device pcn # AMD Am79C97x PCI 10/100 (precedence over 'le') device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sf # Adaptec AIC-6915 (``Starfire'') device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device tl # Texas Instruments ThunderLAN device tx # SMC EtherPower II (83c170 ``EPIC'') device vge # VIA VT612x gigabit Ethernet device vr # VIA Rhine, Rhine II device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # Wireless NIC cards device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_AMPDU_AGE # age frames in AMPDU reorder q's options IEEE80211_SUPPORT_MESH # enable 802.11s draft support device wlan_wep # 802.11 WEP support device wlan_ccmp # 802.11 CCMP support device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device an # Aironet 4500/4800 802.11 wireless NICs. device ath # Atheros NICs device ath_pci # Atheros pci/cardbus glue device ath_hal # pci/cardbus chip support options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later device ath_rate_sample # SampleRate tx rate control for ath #device bwi # Broadcom BCM430x/BCM431x wireless NICs. #device bwn # Broadcom BCM43xx wireless NICs. device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device ral # Ralink Technology RT2500 wireless NICs. device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. device wpi # Intel 3945ABG wireless NICs. # Pseudo devices. device loop # Network loopback device random # Entropy device device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device xhci # XHCI PCI->USB interface (USB 3.0) device usb # USB Bus (required) device ukbd # Keyboard device umass # Disks/Mass storage - Requires scbus and da # Sound support device sound # Generic sound driver (required) device snd_cmi # CMedia CMI8338/CMI8738 device snd_csa # Crystal Semiconductor CS461x/428x device snd_emu10kx # Creative SoundBlaster Live! and Audigy device snd_es137x # Ensoniq AudioPCI ES137x device snd_hda # Intel High Definition Audio device snd_ich # Intel, NVidia and other ICH AC'97 Audio device snd_via8233 # VIA VT8233x Audio # MMC/SD device mmc # MMC/SD bus device mmcsd # MMC/SD memory card device sdhci # Generic PCI SD Host Controller # VirtIO support device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI device device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device -# HyperV drivers +# HyperV drivers and enchancement support +# NOTE: HYPERV depends on hyperv. They must be added or removed together. +options HYPERV # Hyper-V kernel infrastructure device hyperv # HyperV drivers # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver # VMware support device vmx # VMware VMXNET3 Ethernet # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support Index: head/sys/amd64/conf/NOTES =================================================================== --- head/sys/amd64/conf/NOTES (revision 282211) +++ head/sys/amd64/conf/NOTES (revision 282212) @@ -1,670 +1,672 @@ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # This file contains machine dependent kernel configuration notes. For # machine independent notes, look in /sys/conf/NOTES. # # $FreeBSD$ # # # We want LINT to cover profiling as well. profile 2 # # Enable the kernel DTrace hooks which are required to load the DTrace # kernel modules. # options KDTRACE_HOOKS ##################################################################### # SMP OPTIONS: # # Notes: # # IPI_PREEMPTION instructs the kernel to preempt threads running on other # CPUS if needed. Relies on the PREEMPTION option # Optional: options IPI_PREEMPTION device atpic # Optional legacy pic support device mptable # Optional MPSPEC mptable support # # Watchdog routines. # options MP_WATCHDOG # Debugging options. # options COUNT_XINVLTLB_HITS # Counters for TLB events options COUNT_IPIS # Per-CPU IPI interrupt counters ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. # cpu HAMMER # aka K8, aka Opteron & Athlon64 # # Options for CPU features. # # # PERFMON causes the driver for Pentium/Pentium Pro performance counters # to be compiled. See perfmon(4) for more information. # #XXX#options PERFMON ##################################################################### # NETWORKING OPTIONS # # DEVICE_POLLING adds support for mixed interrupt-polling handling # of network device drivers, which has significant benefits in terms # of robustness to overloads and responsivity, as well as permitting # accurate scheduling of the CPU time between kernel network processing # and other activities. The drawback is a moderate (up to 1/HZ seconds) # potential increase in response times. # It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING # to achieve smoother behaviour. # Additionally, you can enable/disable polling at runtime with help of # the ifconfig(8) utility, and select the CPU fraction reserved to # userland with the sysctl variable kern.polling.user_frac # (default 50, range 0..100). # # Not all device drivers support this mode of operation at the time of # this writing. See polling(4) for more details. options DEVICE_POLLING # BPF_JITTER adds support for BPF just-in-time compiler. options BPF_JITTER # OpenFabrics Enterprise Distribution (Infiniband). options OFED options OFED_DEBUG_INIT # Sockets Direct Protocol options SDP options SDP_DEBUG # IP over Infiniband options IPOIB options IPOIB_DEBUG options IPOIB_CM ##################################################################### # CLOCK OPTIONS # Provide read/write access to the memory in the clock chip. device nvram # Access to rtc cmos via /dev/nvram ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device speaker #Play IBM BASIC-style noises out your speaker hint.speaker.0.at="isa" hint.speaker.0.port="0x61" device gzip #Exec gzipped a.out's. REQUIRES COMPAT_AOUT! ##################################################################### # HARDWARE BUS CONFIGURATION # # ISA bus # device isa # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. options AUTO_EOI_1 #options AUTO_EOI_2 options MAXMEM=(128*1024) #options BROKEN_KEYBOARD_RESET # # PCI bus & PCI options: # device pci # # AGP GART support device agp # # AGP debugging. # options AGP_DEBUG ##################################################################### # HARDWARE DEVICE CONFIGURATION # To include support for VGA VESA video modes options VESA # Turn on extra debugging checks and output for VESA support. options VESA_DEBUG device dpms # DPMS suspend & resume via VESA BIOS # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa options X86BIOS # # Optional devices: # # PS/2 mouse device psm hint.psm.0.at="atkbdc" hint.psm.0.irq="12" # Options for psm: options PSM_HOOKRESUME #hook the system resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event # The keyboard controller; it controls the keyboard and the PS/2 mouse. device atkbdc hint.atkbdc.0.at="isa" hint.atkbdc.0.port="0x060" # The AT keyboard device atkbd hint.atkbd.0.at="atkbdc" hint.atkbd.0.irq="1" # Options for atkbd: options ATKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions ATKBD_DFLT_KEYMAP=fr.dvorak # `flags' for atkbd: # 0x01 Force detection of keyboard, else we always assume a keyboard # 0x02 Don't reset keyboard, useful for some newer ThinkPads # 0x03 Force detection and avoid reset, might help with certain # dockingstations # 0x04 Old-style (XT) keyboard support, useful for older ThinkPads # Video card driver for VGA adapters. device vga hint.vga.0.at="isa" # Options for vga: # Try the following option if the mouse pointer is not drawn correctly # or font does not seem to be loaded properly. May cause flicker on # some systems. options VGA_ALT_SEQACCESS # If you can dispense with some vga driver features, you may want to # use the following options to save some memory. #options VGA_NO_FONT_LOADING # don't save/load font #options VGA_NO_MODE_CHANGE # don't change video modes # Older video cards may require this option for proper operation. options VGA_SLOW_IOACCESS # do byte-wide i/o's to TS and GDC regs # The following option probably won't work with the LCD displays. options VGA_WIDTH90 # support 90 column modes # Debugging. options VGA_DEBUG # vt(4) drivers. device vt_vga # VGA device vt_efifb # EFI framebuffer # Linear framebuffer driver for S3 VESA 1.2 cards. Works on top of VESA. device s3pci # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create # the /dev/3dfx0 device to work with glide implementations. This should get # linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as # the tdfx DRI module from XFree86 and is completely unrelated. # # To enable Linuxulator support, one must also include COMPAT_LINUX in the # config as well. The other option is to load both as modules. device tdfx # Enable 3Dfx Voodoo support #XXX#device tdfx_linux # Enable Linuxulator support # # ACPI support using the Intel ACPI Component Architecture reference # implementation. # # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer # kernel environment variables to select initial debugging levels for the # Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER # defined when it is built). device acpi options ACPI_DEBUG # The cpufreq(4) driver provides support for non-ACPI CPU frequency control device cpufreq # Direct Rendering modules for 3D acceleration. device drm # DRM core module required by DRM drivers device i915drm # Intel i830 through i915 device mach64drm # ATI Rage Pro, Rage Mobility P/M, Rage XL device mgadrm # AGP Matrox G200, G400, G450, G550 device r128drm # ATI Rage 128 device radeondrm # ATI Radeon device savagedrm # S3 Savage3D, Savage4 device sisdrm # SiS 300/305, 540, 630 device tdfxdrm # 3dfx Voodoo 3/4/5 and Banshee device viadrm # VIA options DRM_DEBUG # Include debug printfs (slow) # # Network interfaces: # # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # HP PC Lan+, various PC Card devices # (requires miibus) # ipw: Intel PRO/Wireless 2100 IEEE 802.11 adapter # Requires the ipw firmware module # iwi: Intel PRO/Wireless 2200BG/2225BG/2915ABG IEEE 802.11 adapters # Requires the iwi firmware module # iwn: Intel Wireless WiFi Link 1000/105/135/2000/4965/5000/6000/6050 abgn # 802.11 network adapters # Requires the iwn firmware module # ixl: Intel XL710 40Gbe PCIE Ethernet # ixlv: Intel XL710 40Gbe VF PCIE Ethernet # mlx4ib: Mellanox ConnectX HCA InfiniBand # mlxen: Mellanox ConnectX HCA Ethernet # mthca: Mellanox HCA InfiniBand # nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source) # sfxge: Solarflare SFC9000 family 10Gb Ethernet adapters # vmx: VMware VMXNET3 Ethernet (BSD open source) # wpi: Intel 3945ABG Wireless LAN controller # Requires the wpi firmware module device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards options ED_3C503 options ED_HPP options ED_SIC device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. device ixl # Intel XL710 40Gbe PCIE Ethernet device ixlv # Intel XL710 40Gbe VF PCIE Ethernet device mlx4ib # Mellanox ConnectX HCA InfiniBand device mlxen # Mellanox ConnectX HCA Ethernet device mthca # Mellanox HCA InfiniBand device nfe # nVidia nForce MCP on-board Ethernet device sfxge # Solarflare SFC9000 10Gb Ethernet device vmx # VMware VMXNET3 Ethernet device wpi # Intel 3945ABG wireless NICs. # IEEE 802.11 adapter firmware modules # Intel PRO/Wireless 2100 firmware: # ipwfw: BSS/IBSS/monitor mode firmware # ipwbssfw: BSS mode firmware # ipwibssfw: IBSS mode firmware # ipwmonitorfw: Monitor mode firmware # Intel PRO/Wireless 2200BG/2225BG/2915ABG firmware: # iwifw: BSS/IBSS/monitor mode firmware # iwibssfw: BSS mode firmware # iwiibssfw: IBSS mode firmware # iwimonitorfw: Monitor mode firmware # Intel Wireless WiFi Link 4965/1000/5000/6000 series firmware: # iwnfw: Single module to support all devices # iwn1000fw: Specific module for the 1000 only # iwn105fw: Specific module for the 105 only # iwn135fw: Specific module for the 135 only # iwn2000fw: Specific module for the 2000 only # iwn2030fw: Specific module for the 2030 only # iwn4965fw: Specific module for the 4965 only # iwn5000fw: Specific module for the 5000 only # iwn5150fw: Specific module for the 5150 only # iwn6000fw: Specific module for the 6000 only # iwn6000g2afw: Specific module for the 6000g2a only # iwn6000g2bfw: Specific module for the 6000g2b only # iwn6050fw: Specific module for the 6050 only # wpifw: Intel 3945ABG Wireless LAN Controller firmware device iwifw device iwibssfw device iwiibssfw device iwimonitorfw device ipwfw device ipwbssfw device ipwibssfw device ipwmonitorfw device iwnfw device iwn1000fw device iwn105fw device iwn135fw device iwn2000fw device iwn2030fw device iwn4965fw device iwn5000fw device iwn5150fw device iwn6000fw device iwn6000g2afw device iwn6000g2bfw device iwn6050fw device wpifw # Intel Non-Transparent Bridge (NTB) hardware device ntb_hw # Hardware Abstraction Layer for the NTB device if_ntb # Simulated ethernet device using the NTB # #XXX this stores pointers in a 32bit field that is defined by the hardware #device pst # # Areca 11xx and 12xx series of SATA II RAID controllers. # CAM is required. # device arcmsr # Areca SATA II RAID # # 3ware 9000 series PATA/SATA RAID controller driver and options. # The driver is implemented as a SIM, and so, needs the CAM infrastructure. # options TWA_DEBUG # 0-10; 10 prints the most messages. options TWA_FLASH_FIRMWARE # firmware image bundled when defined. device twa # 3ware 9000 series PATA/SATA RAID # # SCSI host adapters: # # ncv: NCR 53C500 based SCSI host adapters. # nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters. # stg: TMC 18C30, 18C50 based SCSI host adapters. device ncv device nsp device stg # # Adaptec FSA RAID controllers, including integrated DELL controllers, # the Dell PERC 2/QC and the HP NetRAID-4M device aac device aacp # SCSI Passthrough interface (optional, CAM required) # # Adaptec by PMC RAID controllers, Series 6/7/8 and upcoming families device aacraid # Container interface, CAM required # # Highpoint RocketRAID 27xx. device hpt27xx # # Highpoint RocketRAID 182x. device hptmv # # Highpoint DC7280 and R750. device hptnr # # Highpoint RocketRAID. Supports RR172x, RR222x, RR2240, RR232x, RR2340, # RR2210, RR174x, RR2522, RR231x, RR230x. device hptrr # # Highpoint RocketRaid 3xxx series SATA RAID device hptiop # # IBM (now Adaptec) ServeRAID controllers device ips # # Intel C600 (Patsburg) integrated SAS controller device isci options ISCI_LOGGING # enable debugging in isci HAL # # NVM Express (NVMe) support device nvme # base NVMe driver device nvd # expose NVMe namespaces as disks, depends on nvme # # SafeNet crypto driver: can be moved to the MI NOTES as soon as # it's tested on a big-endian machine # device safe # SafeNet 1141 options SAFE_DEBUG # enable debugging support: hw.safe.debug options SAFE_RNDTEST # enable rndtest support # # VirtIO support # # The virtio entry provides a generic bus for use by the device drivers. # It must be combined with an interface that communicates with the host. # Multiple such interfaces are defined by the VirtIO specification. FreeBSD # only has support for PCI. Therefore, virtio_pci must be statically # compiled in or loaded as a module for the device drivers to function. # device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI Interface device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device +# Microsoft Hyper-V enchancement support +options HYPERV # Hyper-V kernel infrastructure device hyperv # HyperV drivers # Xen HVM Guest Optimizations options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver ##################################################################### # # Miscellaneous hardware: # # ipmi: Intelligent Platform Management Interface # pbio: Parallel (8255 PPI) basic I/O (mode 0) port (e.g. Advantech PCL-724) # smbios: DMI/SMBIOS entry point # vpd: Vital Product Data kernel interface # asmc: Apple System Management Controller # si: Specialix International SI/XIO or SX intelligent serial card # tpm: Trusted Platform Module # Notes on the Specialix SI/XIO driver: # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. device ipmi device pbio hint.pbio.0.at="isa" hint.pbio.0.port="0x360" device smbios device vpd device asmc device si device tpm device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device aesni # AES-NI OpenCrypto module # # Laptop/Notebook options: # # # I2C Bus # # # Hardware watchdog timers: # # ichwd: Intel ICH watchdog timer # amdsbwd: AMD SB7xx watchdog timer # viawd: VIA south bridge watchdog timer # wbwd: Winbond watchdog timer # device ichwd device amdsbwd device viawd device wbwd # # Temperature sensors: # # coretemp: on-die sensor on Intel Core and newer CPUs # amdtemp: on-die sensor on AMD K8/K10/K11 CPUs # device coretemp device amdtemp # # CPU control pseudo-device. Provides access to MSRs, CPUID info and # microcode update feature. # device cpuctl # # System Management Bus (SMB) # options ENABLE_ALART # Control alarm on Intel intpm driver # # Number of initial kernel page table pages used for early bootstrap. # This number should include enough pages to map the kernel and any # modules or other data loaded with the kernel by the loader. Each # page table page maps 2MB. # options NKPT=31 ##################################################################### # ABI Emulation #XXX keep these here for now and reactivate when support for emulating #XXX these 32 bit binaries is added. # Enable 32-bit runtime support for FreeBSD/i386 binaries. options COMPAT_FREEBSD32 # Enable iBCS2 runtime support for SCO and ISC binaries #XXX#options IBCS2 # Emulate spx device for client side of SVR3 local X interface #XXX#options SPX_HACK # Enable Linux ABI emulation #XXX#options COMPAT_LINUX # Enable 32-bit Linux ABI emulation (requires COMPAT_43 and COMPAT_FREEBSD32) options COMPAT_LINUX32 # Enable the linux-like proc filesystem support (requires COMPAT_LINUX32 # and PSEUDOFS) options LINPROCFS #Enable the linux-like sys filesystem support (requires COMPAT_LINUX32 # and PSEUDOFS) options LINSYSFS # # SysVR4 ABI emulation # # The svr4 ABI emulator can be statically compiled into the kernel or loaded as # a KLD module. # The STREAMS network emulation code can also be compiled statically or as a # module. If loaded as a module, it must be loaded before the svr4 module # (the /usr/sbin/svr4 script does this for you). If compiling statically, # the `streams' device must be configured into any kernel which also # specifies COMPAT_SVR4. It is possible to have a statically-configured # STREAMS device and a dynamically loadable svr4 emulator; the /usr/sbin/svr4 # script understands that it doesn't need to load the `streams' module under # those circumstances. # Caveat: At this time, `options KTRACE' is required for the svr4 emulator # (whether static or dynamic). # #XXX#options COMPAT_SVR4 # build emulator statically #XXX#options DEBUG_SVR4 # enable verbose debugging #XXX#device streams # STREAMS network driver (required for svr4). ##################################################################### # VM OPTIONS # KSTACK_PAGES is the number of memory pages to assign to the kernel # stack of each thread. options KSTACK_PAGES=5 # Enable detailed accounting by the PV entry allocator. options PV_STATS ##################################################################### # More undocumented options for linting. # Note that documenting these are not considered an affront. options FB_INSTALL_CDEV # install a CDEV entry in /dev options KBDIO_DEBUG=2 options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options PSM_DEBUG=1 options TIMER_FREQ=((14318182+6)/12) options VM_KMEM_SIZE options VM_KMEM_SIZE_MAX options VM_KMEM_SIZE_SCALE # Enable NDIS binary driver support options NDISAPI device ndis Index: head/sys/conf/options.amd64 =================================================================== --- head/sys/conf/options.amd64 (revision 282211) +++ head/sys/conf/options.amd64 (revision 282212) @@ -1,67 +1,69 @@ # $FreeBSD$ # Options specific to AMD64 platform kernels AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h COUNT_XINVLTLB_HITS opt_smp.h COUNT_IPIS opt_smp.h MAXMEM PERFMON MPTABLE_FORCE_HTT MP_WATCHDOG NKPT opt_pmap.h PV_STATS opt_pmap.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_FREEBSD32 opt_compat.h #IBCS2 opt_dontuse.h #COMPAT_LINUX opt_dontuse.h COMPAT_LINUX32 opt_compat.h #COMPAT_SVR4 opt_dontuse.h #DEBUG_SVR4 opt_svr4.h LINPROCFS opt_dontuse.h LINSYSFS opt_dontuse.h NDISAPI opt_dontuse.h TIMER_FREQ opt_clock.h # options for serial support COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h CONSPEED opt_sio.h GDBSPEED opt_sio.h COM_NO_ACPI opt_sio.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA VESA_DEBUG opt_vesa.h # AGP debugging support AGP_DEBUG opt_agp.h ATKBD_DFLT_KEYMAP opt_atkbd.h # ------------------------------- # EOF # ------------------------------- HAMMER opt_cpu.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h PSM_DEBUG opt_psm.h DEV_ATPIC opt_atpic.h # BPF just-in-time compiler BPF_JITTER opt_bpf.h XENHVM opt_global.h +HYPERV opt_global.h + # options for the Intel C600 SAS driver (isci) ISCI_LOGGING opt_isci.h Index: head/sys/conf/options.i386 =================================================================== --- head/sys/conf/options.i386 (revision 282211) +++ head/sys/conf/options.i386 (revision 282212) @@ -1,129 +1,131 @@ # $FreeBSD$ # Options specific to the i386 platform kernels AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h BROKEN_KEYBOARD_RESET opt_reset.h COUNT_XINVLTLB_HITS opt_smp.h COUNT_IPIS opt_smp.h DISABLE_PG_G opt_pmap.h DISABLE_PSE opt_pmap.h I586_PMC_GUPROF opt_i586_guprof.h MAXMEM MPTABLE_FORCE_HTT MP_WATCHDOG NKPT opt_pmap.h PERFMON PMAP_SHPGPERPROC opt_pmap.h POWERFAIL_NMI opt_trap.h PV_STATS opt_pmap.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_AOUT opt_dontuse.h IBCS2 opt_dontuse.h COMPAT_LINUX opt_dontuse.h COMPAT_SVR4 opt_dontuse.h DEBUG_SVR4 opt_svr4.h LINPROCFS opt_dontuse.h LINSYSFS opt_dontuse.h NDISAPI opt_dontuse.h # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h # Physical address extensions and support for >4G ram. As above. PAE opt_global.h # Use PAE page tables, but limit memory support to 4GB. # This keeps the i386 non-PAE KBI, in particular, drivers see # 32bit vm_paddr_t. PAE_TABLES opt_global.h TIMER_FREQ opt_clock.h CPU_ATHLON_SSE_HACK opt_cpu.h CPU_BLUELIGHTNING_3X opt_cpu.h CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h CPU_BTB_EN opt_cpu.h CPU_CYRIX_NO_LOCK opt_cpu.h CPU_DIRECT_MAPPED_CACHE opt_cpu.h CPU_DISABLE_5X86_LSSER opt_cpu.h CPU_DISABLE_CMPXCHG opt_global.h # XXX global, unlike other CPU_* CPU_DISABLE_SSE opt_cpu.h CPU_ELAN opt_cpu.h CPU_ELAN_PPS opt_cpu.h CPU_ELAN_XTAL opt_cpu.h CPU_ENABLE_LONGRUN opt_cpu.h CPU_FASTER_5X86_FPU opt_cpu.h CPU_GEODE opt_cpu.h CPU_I486_ON_386 opt_cpu.h CPU_IORT opt_cpu.h CPU_L2_LATENCY opt_cpu.h CPU_LOOP_EN opt_cpu.h CPU_PPRO2CELERON opt_cpu.h CPU_RSTK_EN opt_cpu.h CPU_SOEKRIS opt_cpu.h CPU_SUSP_HLT opt_cpu.h CPU_UPGRADE_HW_CACHE opt_cpu.h CPU_WT_ALLOC opt_cpu.h CYRIX_CACHE_REALLY_WORKS opt_cpu.h CYRIX_CACHE_WORKS opt_cpu.h NO_F00F_HACK opt_cpu.h NO_MEMORY_HOLE opt_cpu.h # The CPU type affects the endian conversion functions all over the kernel. I486_CPU opt_global.h I586_CPU opt_global.h I686_CPU opt_global.h # options for serial support COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h CONSPEED opt_sio.h GDBSPEED opt_sio.h COM_NO_ACPI opt_sio.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA VESA_DEBUG opt_vesa.h # AGP debugging support AGP_DEBUG opt_agp.h PSM_DEBUG opt_psm.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h ATKBD_DFLT_KEYMAP opt_atkbd.h # Video spigot SPIGOT_UNSECURE opt_spigot.h # Enables NETGRAPH support for Cronyx adapters NETGRAPH_CRONYX opt_ng_cronyx.h # Device options DEV_APIC opt_apic.h DEV_ATPIC opt_atpic.h DEV_NPX opt_npx.h # Debugging NPX_DEBUG opt_npx.h # BPF just-in-time compiler BPF_JITTER opt_bpf.h NATIVE opt_global.h XEN opt_global.h XENHVM opt_global.h +HYPERV opt_global.h + # options for the Intel C600 SAS driver (isci) ISCI_LOGGING opt_isci.h Index: head/sys/dev/hyperv/include/hyperv.h =================================================================== --- head/sys/dev/hyperv/include/hyperv.h (revision 282211) +++ head/sys/dev/hyperv/include/hyperv.h (revision 282212) @@ -1,828 +1,981 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV definitions for messages that are sent between instances of the * Channel Management Library in separate partitions, or in some cases, * back to itself. */ #ifndef __HYPERV_H__ #define __HYPERV_H__ #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include typedef uint8_t hv_bool_uint8_t; #define HV_S_OK 0x00000000 #define HV_E_FAIL 0x80004005 #define HV_ERROR_NOT_SUPPORTED 0x80070032 #define HV_ERROR_MACHINE_LOCKED 0x800704F7 /* - * A revision number of vmbus that is used for ensuring both ends on a - * partition are using compatible versions. + * VMBUS version is 32 bit, upper 16 bit for major_number and lower + * 16 bit for minor_number. + * + * 0.13 -- Windows Server 2008 + * 1.1 -- Windows 7 + * 2.4 -- Windows 8 + * 3.0 -- Windows 8.1 */ +#define HV_VMBUS_VERSION_WS2008 ((0 << 16) | (13)) +#define HV_VMBUS_VERSION_WIN7 ((1 << 16) | (1)) +#define HV_VMBUS_VERSION_WIN8 ((2 << 16) | (4)) +#define HV_VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) -#define HV_VMBUS_REVISION_NUMBER 13 +#define HV_VMBUS_VERSION_INVALID -1 +#define HV_VMBUS_VERSION_CURRENT HV_VMBUS_VERSION_WIN8_1 + /* * Make maximum size of pipe payload of 16K */ #define HV_MAX_PIPE_DATA_PAYLOAD (sizeof(BYTE) * 16384) /* * Define pipe_mode values */ #define HV_VMBUS_PIPE_TYPE_BYTE 0x00000000 #define HV_VMBUS_PIPE_TYPE_MESSAGE 0x00000004 /* * The size of the user defined data buffer for non-pipe offers */ #define HV_MAX_USER_DEFINED_BYTES 120 /* * The size of the user defined data buffer for pipe offers */ #define HV_MAX_PIPE_USER_DEFINED_BYTES 116 #define HV_MAX_PAGE_BUFFER_COUNT 16 #define HV_MAX_MULTIPAGE_BUFFER_COUNT 32 #define HV_ALIGN_UP(value, align) \ (((value) & (align-1)) ? \ (((value) + (align-1)) & ~(align-1) ) : (value)) #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) ) #define HV_NUM_PAGES_SPANNED(addr, len) \ ((HV_ALIGN_UP(addr+len, PAGE_SIZE) - \ HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT ) typedef struct hv_guid { unsigned char data[16]; } __packed hv_guid; +#define HV_NIC_GUID \ + .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, \ + 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} + +#define HV_IDE_GUID \ + .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ + 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} + +#define HV_SCSI_GUID \ + .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ + 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} + /* * At the center of the Channel Management library is * the Channel Offer. This struct contains the * fundamental information about an offer. */ typedef struct hv_vmbus_channel_offer { hv_guid interface_type; hv_guid interface_instance; uint64_t interrupt_latency_in_100ns_units; uint32_t interface_revision; uint32_t server_context_area_size; /* in bytes */ uint16_t channel_flags; uint16_t mmio_megabytes; /* in bytes * 1024 * 1024 */ union { /* * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes. */ struct { uint8_t user_defined[HV_MAX_USER_DEFINED_BYTES]; } __packed standard; /* * Pipes: The following structure is an integrated pipe protocol, which * is implemented on top of standard user-defined data. pipe * clients have HV_MAX_PIPE_USER_DEFINED_BYTES left for their * own use. */ struct { uint32_t pipe_mode; uint8_t user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES]; } __packed pipe; } u; - uint32_t padding; + /* + * Sub_channel_index, newly added in Win8. + */ + uint16_t sub_channel_index; + uint16_t padding; } __packed hv_vmbus_channel_offer; typedef uint32_t hv_gpadl_handle; typedef struct { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; } __packed hv_vm_packet_descriptor; typedef uint32_t hv_previous_packet_offset; typedef struct { hv_previous_packet_offset previous_packet_start_offset; hv_vm_packet_descriptor descriptor; } __packed hv_vm_packet_header; typedef struct { uint32_t byte_count; uint32_t byte_offset; } __packed hv_vm_transfer_page; typedef struct { hv_vm_packet_descriptor d; uint16_t transfer_page_set_id; hv_bool_uint8_t sender_owns_set; uint8_t reserved; uint32_t range_count; hv_vm_transfer_page ranges[1]; } __packed hv_vm_transfer_page_packet_header; typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint32_t reserved; } __packed hv_vm_gpadl_packet_header; typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint16_t transfer_page_set_id; uint16_t reserved; } __packed hv_vm_add_remove_transfer_page_set; /* * This structure defines a range in guest * physical space that can be made * to look virtually contiguous. */ typedef struct { uint32_t byte_count; uint32_t byte_offset; uint64_t pfn_array[0]; } __packed hv_gpa_range; /* * This is the format for an Establish Gpadl packet, which contains a handle * by which this GPADL will be known and a set of GPA ranges associated with * it. This can be converted to a MDL by the guest OS. If there are multiple * GPA ranges, then the resulting MDL will be "chained," representing multiple * VA ranges. */ typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint32_t range_count; hv_gpa_range range[1]; } __packed hv_vm_establish_gpadl; /* * This is the format for a Teardown Gpadl packet, which indicates that the * GPADL handle in the Establish Gpadl packet will never be referenced again. */ typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; /* for alignment to a 8-byte boundary */ uint32_t reserved; } __packed hv_vm_teardown_gpadl; /* * This is the format for a GPA-Direct packet, which contains a set of GPA * ranges, in addition to commands and/or data. */ typedef struct { hv_vm_packet_descriptor d; uint32_t reserved; uint32_t range_count; hv_gpa_range range[1]; } __packed hv_vm_data_gpa_direct; /* * This is the format for a Additional data Packet. */ typedef struct { hv_vm_packet_descriptor d; uint64_t total_bytes; uint32_t byte_offset; uint32_t byte_count; uint8_t data[1]; } __packed hv_vm_additional_data; typedef union { hv_vm_packet_descriptor simple_header; hv_vm_transfer_page_packet_header transfer_page_header; hv_vm_gpadl_packet_header gpadl_header; hv_vm_add_remove_transfer_page_set add_remove_transfer_page_header; hv_vm_establish_gpadl establish_gpadl_header; hv_vm_teardown_gpadl teardown_gpadl_header; hv_vm_data_gpa_direct data_gpa_direct_header; } __packed hv_vm_packet_largest_possible_header; typedef enum { HV_VMBUS_PACKET_TYPE_INVALID = 0x0, HV_VMBUS_PACKET_TYPES_SYNCH = 0x1, HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET = 0x2, HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET = 0x3, HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL = 0x4, HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL = 0x5, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND = 0x6, HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES = 0x7, HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL = 0x8, HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT = 0x9, HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST = 0xa, HV_VMBUS_PACKET_TYPE_COMPLETION = 0xb, HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS = 0xc, HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd } hv_vmbus_packet_type; #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1 /* * Version 1 messages */ typedef enum { HV_CHANNEL_MESSAGE_INVALID = 0, HV_CHANNEL_MESSAGE_OFFER_CHANNEL = 1, HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER = 2, HV_CHANNEL_MESSAGE_REQUEST_OFFERS = 3, HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED = 4, HV_CHANNEL_MESSAGE_OPEN_CHANNEL = 5, HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT = 6, HV_CHANNEL_MESSAGE_CLOSE_CHANNEL = 7, HV_CHANNEL_MESSAGEL_GPADL_HEADER = 8, HV_CHANNEL_MESSAGE_GPADL_BODY = 9, HV_CHANNEL_MESSAGE_GPADL_CREATED = 10, HV_CHANNEL_MESSAGE_GPADL_TEARDOWN = 11, HV_CHANNEL_MESSAGE_GPADL_TORNDOWN = 12, HV_CHANNEL_MESSAGE_REL_ID_RELEASED = 13, HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14, HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15, HV_CHANNEL_MESSAGE_UNLOAD = 16, #ifdef HV_VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD HV_CHANNEL_MESSAGE_VIEW_RANGE_ADD = 17, HV_CHANNEL_MESSAGE_VIEW_RANGE_REMOVE = 18, #endif HV_CHANNEL_MESSAGE_COUNT } hv_vmbus_channel_msg_type; typedef struct { hv_vmbus_channel_msg_type message_type; uint32_t padding; } __packed hv_vmbus_channel_msg_header; /* * Query VMBus Version parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t version; } __packed hv_vmbus_channel_query_vmbus_version; /* * VMBus Version Supported parameters */ typedef struct { hv_vmbus_channel_msg_header header; hv_bool_uint8_t version_supported; } __packed hv_vmbus_channel_version_supported; /* * Channel Offer parameters */ typedef struct { hv_vmbus_channel_msg_header header; hv_vmbus_channel_offer offer; uint32_t child_rel_id; uint8_t monitor_id; - hv_bool_uint8_t monitor_allocated; + /* + * This field has been split into a bit field on Win7 + * and higher. + */ + uint8_t monitor_allocated:1; + uint8_t reserved:7; + /* + * Following fields were added in win7 and higher. + * Make sure to check the version before accessing these fields. + * + * If "is_dedicated_interrupt" is set, we must not set the + * associated bit in the channel bitmap while sending the + * interrupt to the host. + * + * connection_id is used in signaling the host. + */ + uint16_t is_dedicated_interrupt:1; + uint16_t reserved1:15; + uint32_t connection_id; } __packed hv_vmbus_channel_offer_channel; /* * Rescind Offer parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_rescind_offer; /* * Request Offer -- no parameters, SynIC message contains the partition ID * * Set Snoop -- no parameters, SynIC message contains the partition ID * * Clear Snoop -- no parameters, SynIC message contains the partition ID * * All Offers Delivered -- no parameters, SynIC message contains the * partition ID * * Flush Client -- no parameters, SynIC message contains the partition ID */ /* * Open Channel parameters */ typedef struct { hv_vmbus_channel_msg_header header; /* * Identifies the specific VMBus channel that is being opened. */ uint32_t child_rel_id; /* * ID making a particular open request at a channel offer unique. */ uint32_t open_id; /* * GPADL for the channel's ring buffer. */ hv_gpadl_handle ring_buffer_gpadl_handle; /* - * GPADL for the channel's server context save area. + * Before win8, all incoming channel interrupts are only + * delivered on cpu 0. Setting this value to 0 would + * preserve the earlier behavior. */ - hv_gpadl_handle server_context_area_gpadl_handle; + uint32_t target_vcpu; /* * The upstream ring buffer begins at offset zero in the memory described * by ring_buffer_gpadl_handle. The downstream ring buffer follows it at * this offset (in pages). */ uint32_t downstream_ring_buffer_page_offset; /* * User-specific data to be passed along to the server endpoint. */ uint8_t user_data[HV_MAX_USER_DEFINED_BYTES]; } __packed hv_vmbus_channel_open_channel; typedef uint32_t hv_nt_status; /* * Open Channel Result parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t open_id; hv_nt_status status; } __packed hv_vmbus_channel_open_result; /* * Close channel parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_close_channel; /* * Channel Message GPADL */ #define HV_GPADL_TYPE_RING_BUFFER 1 #define HV_GPADL_TYPE_SERVER_SAVE_AREA 2 #define HV_GPADL_TYPE_TRANSACTION 8 /* * The number of PFNs in a GPADL message is defined by the number of pages * that would be spanned by byte_count and byte_offset. If the implied number * of PFNs won't fit in this packet, there will be a follow-up packet that * contains more */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; uint16_t range_buf_len; uint16_t range_count; hv_gpa_range range[0]; } __packed hv_vmbus_channel_gpadl_header; /* * This is the follow-up packet that contains more PFNs */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t message_number; uint32_t gpadl; uint64_t pfn[0]; } __packed hv_vmbus_channel_gpadl_body; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; uint32_t creation_status; } __packed hv_vmbus_channel_gpadl_created; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; } __packed hv_vmbus_channel_gpadl_teardown; typedef struct { hv_vmbus_channel_msg_header header; uint32_t gpadl; } __packed hv_vmbus_channel_gpadl_torndown; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_relid_released; typedef struct { hv_vmbus_channel_msg_header header; uint32_t vmbus_version_requested; uint32_t padding2; uint64_t interrupt_page; uint64_t monitor_page_1; uint64_t monitor_page_2; } __packed hv_vmbus_channel_initiate_contact; typedef struct { hv_vmbus_channel_msg_header header; hv_bool_uint8_t version_supported; } __packed hv_vmbus_channel_version_response; typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload; #define HW_MACADDR_LEN 6 /* * Fixme: Added to quiet "typeof" errors involving hv_vmbus.h when * the including C file was compiled with "-std=c99". */ #ifndef typeof #define typeof __typeof #endif #ifndef NULL #define NULL (void *)0 #endif typedef void *hv_vmbus_handle; #ifndef CONTAINING_RECORD #define CONTAINING_RECORD(address, type, field) ((type *)( \ (uint8_t *)(address) - \ (uint8_t *)(&((type *)0)->field))) #endif /* CONTAINING_RECORD */ #define container_of(ptr, type, member) ({ \ __typeof__( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) enum { HV_VMBUS_IVAR_TYPE, HV_VMBUS_IVAR_INSTANCE, HV_VMBUS_IVAR_NODE, HV_VMBUS_IVAR_DEVCTX }; #define HV_VMBUS_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type) HV_VMBUS_ACCESSOR(type, TYPE, const char *) HV_VMBUS_ACCESSOR(devctx, DEVCTX, struct hv_device *) /* * Common defines for Hyper-V ICs */ #define HV_ICMSGTYPE_NEGOTIATE 0 #define HV_ICMSGTYPE_HEARTBEAT 1 #define HV_ICMSGTYPE_KVPEXCHANGE 2 #define HV_ICMSGTYPE_SHUTDOWN 3 #define HV_ICMSGTYPE_TIMESYNC 4 #define HV_ICMSGTYPE_VSS 5 #define HV_ICMSGHDRFLAG_TRANSACTION 1 #define HV_ICMSGHDRFLAG_REQUEST 2 #define HV_ICMSGHDRFLAG_RESPONSE 4 typedef struct hv_vmbus_pipe_hdr { uint32_t flags; uint32_t msgsize; } __packed hv_vmbus_pipe_hdr; typedef struct hv_vmbus_ic_version { uint16_t major; uint16_t minor; } __packed hv_vmbus_ic_version; typedef struct hv_vmbus_icmsg_hdr { hv_vmbus_ic_version icverframe; uint16_t icmsgtype; hv_vmbus_ic_version icvermsg; uint16_t icmsgsize; uint32_t status; uint8_t ictransaction_id; uint8_t icflags; uint8_t reserved[2]; } __packed hv_vmbus_icmsg_hdr; typedef struct hv_vmbus_icmsg_negotiate { uint16_t icframe_vercnt; uint16_t icmsg_vercnt; uint32_t reserved; hv_vmbus_ic_version icversion_data[1]; /* any size array */ } __packed hv_vmbus_icmsg_negotiate; typedef struct hv_vmbus_shutdown_msg_data { uint32_t reason_code; uint32_t timeout_seconds; uint32_t flags; uint8_t display_message[2048]; } __packed hv_vmbus_shutdown_msg_data; typedef struct hv_vmbus_heartbeat_msg_data { uint64_t seq_num; uint32_t reserved[8]; } __packed hv_vmbus_heartbeat_msg_data; typedef struct { /* * offset in bytes from the start of ring data below */ volatile uint32_t write_index; /* * offset in bytes from the start of ring data below */ volatile uint32_t read_index; /* * NOTE: The interrupt_mask field is used only for channels, but * vmbus connection also uses this data structure */ volatile uint32_t interrupt_mask; /* pad it to PAGE_SIZE so that data starts on a page */ uint8_t reserved[4084]; /* * WARNING: Ring data starts here + ring_data_start_offset * !!! DO NOT place any fields below this !!! */ uint8_t buffer[0]; /* doubles as interrupt mask */ } __packed hv_vmbus_ring_buffer; typedef struct { int length; int offset; uint64_t pfn; } __packed hv_vmbus_page_buffer; typedef struct { int length; int offset; uint64_t pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT]; } __packed hv_vmbus_multipage_buffer; typedef struct { hv_vmbus_ring_buffer* ring_buffer; uint32_t ring_size; /* Include the shared header */ struct mtx ring_lock; uint32_t ring_data_size; /* ring_size */ uint32_t ring_data_start_offset; } hv_vmbus_ring_buffer_info; typedef void (*hv_vmbus_pfn_channel_callback)(void *context); +typedef void (*hv_vmbus_sc_creation_callback)(void *context); typedef enum { HV_CHANNEL_OFFER_STATE, HV_CHANNEL_OPENING_STATE, HV_CHANNEL_OPEN_STATE, + HV_CHANNEL_OPENED_STATE, HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE, } hv_vmbus_channel_state; +/* + * Connection identifier type + */ +typedef union { + uint32_t as_uint32_t; + struct { + uint32_t id:24; + uint32_t reserved:8; + } u; + +} __packed hv_vmbus_connection_id; + +/* + * Definition of the hv_vmbus_signal_event hypercall input structure + */ +typedef struct { + hv_vmbus_connection_id connection_id; + uint16_t flag_number; + uint16_t rsvd_z; +} __packed hv_vmbus_input_signal_event; + +typedef struct { + uint64_t align8; + hv_vmbus_input_signal_event event; +} __packed hv_vmbus_input_signal_event_buffer; + typedef struct hv_vmbus_channel { TAILQ_ENTRY(hv_vmbus_channel) list_entry; struct hv_device* device; hv_vmbus_channel_state state; hv_vmbus_channel_offer_channel offer_msg; /* * These are based on the offer_msg.monitor_id. * Save it here for easy access. */ uint8_t monitor_group; uint8_t monitor_bit; uint32_t ring_buffer_gpadl_handle; /* * Allocated memory for ring buffer */ void* ring_buffer_pages; unsigned long ring_buffer_size; uint32_t ring_buffer_page_count; /* * send to parent */ hv_vmbus_ring_buffer_info outbound; /* * receive from parent */ hv_vmbus_ring_buffer_info inbound; struct mtx inbound_lock; hv_vmbus_handle control_work_queue; hv_vmbus_pfn_channel_callback on_channel_callback; void* channel_callback_context; + /* + * If batched_reading is set to "true", mask the interrupt + * and read until the channel is empty. + * If batched_reading is set to "false", the channel is not + * going to perform batched reading. + * + * Batched reading is enabled by default; specific + * drivers that don't want this behavior can turn it off. + */ + boolean_t batched_reading; + + boolean_t is_dedicated_interrupt; + + /* + * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall. + */ + hv_vmbus_input_signal_event_buffer signal_event_buffer; + /* + * 8-bytes aligned of the buffer above + */ + hv_vmbus_input_signal_event *signal_event_param; + + /* + * From Win8, this field specifies the target virtual process + * on which to deliver the interupt from the host to guest. + * Before Win8, all channel interrupts would only be + * delivered on cpu 0. Setting this value to 0 would preserve + * the earlier behavior. + */ + uint32_t target_vcpu; + /* The corresponding CPUID in the guest */ + uint32_t target_cpu; + + /* + * Support for multi-channels. + * The initial offer is considered the primary channel and this + * offer message will indicate if the host supports multi-channels. + * The guest is free to ask for multi-channels to be offerred and can + * open these multi-channels as a normal "primary" channel. However, + * all multi-channels will have the same type and instance guids as the + * primary channel. Requests sent on a given channel will result in a + * response on the same channel. + */ + + /* + * Multi-channel creation callback. This callback will be called in + * process context when a Multi-channel offer is received from the host. + * The guest can open the Multi-channel in the context of this callback. + */ + hv_vmbus_sc_creation_callback sc_creation_callback; + + struct mtx sc_lock; + + /* + * Link list of all the multi-channels if this is a primary channel + */ + TAILQ_HEAD(, hv_vmbus_channel) sc_list_anchor; + TAILQ_ENTRY(hv_vmbus_channel) sc_list_entry; + + /* + * The primary channel this sub-channle belongs to. + * This will be NULL for the primary channel. + */ + struct hv_vmbus_channel *primary_channel; + /* + * Support per channel state for use by vmbus drivers. + */ + void *per_channel_state; } hv_vmbus_channel; +static inline void +hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state) +{ + channel->batched_reading = state; +} + typedef struct hv_device { hv_guid class_id; hv_guid device_id; device_t device; hv_vmbus_channel* channel; } hv_device; int hv_vmbus_channel_recv_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_recv_packet_raw( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_open( hv_vmbus_channel* channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, hv_vmbus_pfn_channel_callback pfn_on_channel_callback, void* context); void hv_vmbus_channel_close(hv_vmbus_channel *channel); int hv_vmbus_channel_send_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint64_t request_id, hv_vmbus_packet_type type, uint32_t flags); int hv_vmbus_channel_send_packet_pagebuffer( hv_vmbus_channel* channel, hv_vmbus_page_buffer page_buffers[], uint32_t page_count, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_send_packet_multipagebuffer( hv_vmbus_channel* channel, hv_vmbus_multipage_buffer* multi_page_buffer, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_establish_gpadl( hv_vmbus_channel* channel, /* must be phys and virt contiguous */ void* contig_buffer, /* page-size multiple */ uint32_t size, uint32_t* gpadl_handle); int hv_vmbus_channel_teardown_gpdal( hv_vmbus_channel* channel, uint32_t gpadl_handle); +struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary); + /* * Work abstraction defines */ typedef struct hv_work_queue { struct taskqueue* queue; struct proc* proc; struct sema* work_sema; } hv_work_queue; typedef struct hv_work_item { struct task work; void (*callback)(void *); void* context; hv_work_queue* wq; } hv_work_item; struct hv_work_queue* hv_work_queue_create(char* name); void hv_work_queue_close(struct hv_work_queue* wq); int hv_queue_work_item( hv_work_queue* wq, void (*callback)(void *), void* context); /** * @brief Get physical address from virtual */ static inline unsigned long hv_get_phys_addr(void *virt) { unsigned long ret; ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK)); return (ret); } /** * KVP related structures * */ typedef struct hv_vmbus_service { hv_guid guid; /* Hyper-V GUID */ char *name; /* name of service */ boolean_t enabled; /* service enabled */ hv_work_queue *work_queue; /* background work queue */ /* * function to initialize service */ int (*init)(struct hv_vmbus_service *); /* * function to process Hyper-V messages */ void (*callback)(void *); } hv_vmbus_service; extern uint8_t* receive_buffer[]; extern hv_vmbus_service service_table[]; +extern uint32_t hv_vmbus_protocal_version; void hv_kvp_callback(void *context); int hv_kvp_init(hv_vmbus_service *serv); void hv_kvp_deinit(void); #endif /* __HYPERV_H__ */ Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 282211) +++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 282212) @@ -1,1393 +1,2004 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * StorVSC driver for Hyper-V. This driver presents a SCSI HBA interface * to the Comman Access Method (CAM) layer. CAM control blocks (CCBs) are * converted into VSCSI protocol messages which are delivered to the parent * partition StorVSP driver over the Hyper-V VMBUS. */ #include __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include +#include +#include +#include #include #include #include #include #include #include #include #include #include - #include #include "hv_vstorage.h" #define STORVSC_RINGBUFFER_SIZE (20*PAGE_SIZE) #define STORVSC_MAX_LUNS_PER_TARGET (64) #define STORVSC_MAX_IO_REQUESTS (STORVSC_MAX_LUNS_PER_TARGET * 2) #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1) #define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS #define STORVSC_MAX_TARGETS (2) +#define STORVSC_WIN7_MAJOR 4 +#define STORVSC_WIN7_MINOR 2 + +#define STORVSC_WIN8_MAJOR 5 +#define STORVSC_WIN8_MINOR 1 + +#define HV_ALIGN(x, a) roundup2(x, a) + struct storvsc_softc; +struct hv_sgl_node { + LIST_ENTRY(hv_sgl_node) link; + struct sglist *sgl_data; +}; + +struct hv_sgl_page_pool{ + LIST_HEAD(, hv_sgl_node) in_use_sgl_list; + LIST_HEAD(, hv_sgl_node) free_sgl_list; + boolean_t is_init; +} g_hv_sgl_page_pool; + +#define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT + enum storvsc_request_type { WRITE_TYPE, READ_TYPE, UNKNOWN_TYPE }; struct hv_storvsc_request { LIST_ENTRY(hv_storvsc_request) link; struct vstor_packet vstor_packet; hv_vmbus_multipage_buffer data_buf; void *sense_data; uint8_t sense_info_len; uint8_t retries; union ccb *ccb; struct storvsc_softc *softc; struct callout callout; struct sema synch_sema; /*Synchronize the request/response if needed */ + struct sglist *bounce_sgl; + unsigned int bounce_sgl_count; + uint64_t not_aligned_seg_bits; }; struct storvsc_softc { struct hv_device *hs_dev; - LIST_HEAD(, hv_storvsc_request) hs_free_list; - struct mtx hs_lock; - struct storvsc_driver_props *hs_drv_props; - int hs_unit; - uint32_t hs_frozen; - struct cam_sim *hs_sim; - struct cam_path *hs_path; + LIST_HEAD(, hv_storvsc_request) hs_free_list; + struct mtx hs_lock; + struct storvsc_driver_props *hs_drv_props; + int hs_unit; + uint32_t hs_frozen; + struct cam_sim *hs_sim; + struct cam_path *hs_path; uint32_t hs_num_out_reqs; boolean_t hs_destroy; boolean_t hs_drain_notify; + boolean_t hs_open_multi_channel; struct sema hs_drain_sema; struct hv_storvsc_request hs_init_req; struct hv_storvsc_request hs_reset_req; }; /** * HyperV storvsc timeout testing cases: * a. IO returned after first timeout; * b. IO returned after second timeout and queue freeze; * c. IO returned while timer handler is running * The first can be tested by "sg_senddiag -vv /dev/daX", * and the second and third can be done by * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX". - */ + */ #define HVS_TIMEOUT_TEST 0 /* * Bus/adapter reset functionality on the Hyper-V host is * buggy and it will be disabled until * it can be further tested. */ #define HVS_HOST_RESET 0 struct storvsc_driver_props { char *drv_name; char *drv_desc; uint8_t drv_max_luns_per_target; - uint8_t drv_max_ios_per_target; + uint8_t drv_max_ios_per_target; uint32_t drv_ringbuffer_size; }; enum hv_storage_type { DRIVER_BLKVSC, DRIVER_STORVSC, DRIVER_UNKNOWN }; #define HS_MAX_ADAPTERS 10 +#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1 + /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */ static const hv_guid gStorVscDeviceType={ .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} }; /* {32412632-86cb-44a2-9b5c-50d1417354f5} */ static const hv_guid gBlkVscDeviceType={ .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} }; static struct storvsc_driver_props g_drv_props_table[] = { {"blkvsc", "Hyper-V IDE Storage Interface", BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE}, {"storvsc", "Hyper-V SCSI Storage Interface", STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE} }; +static int storvsc_current_major; +static int storvsc_current_minor; + /* static functions */ static int storvsc_probe(device_t dev); static int storvsc_attach(device_t dev); static int storvsc_detach(device_t dev); static void storvsc_poll(struct cam_sim * sim); static void storvsc_action(struct cam_sim * sim, union ccb * ccb); -static void create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp); +static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp); static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp); static enum hv_storage_type storvsc_get_storage_type(device_t dev); static void hv_storvsc_on_channel_callback(void *context); static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request); static int hv_storvsc_connect_vsp(struct hv_device *device); static void storvsc_io_done(struct hv_storvsc_request *reqp); +static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, + bus_dma_segment_t *orig_sgl, + unsigned int orig_sgl_count, + uint64_t seg_bits); +void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, + unsigned int dest_sgl_count, + struct sglist* src_sgl, + uint64_t seg_bits); static device_method_t storvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, storvsc_probe), DEVMETHOD(device_attach, storvsc_attach), DEVMETHOD(device_detach, storvsc_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD_END }; static driver_t storvsc_driver = { "storvsc", storvsc_methods, sizeof(struct storvsc_softc), }; static devclass_t storvsc_devclass; DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0); MODULE_VERSION(storvsc, 1); MODULE_DEPEND(storvsc, vmbus, 1, 1, 1); /** - * The host is capable of sending messages to us that are + * The host is capable of sending messages to us that are * completely unsolicited. So, we need to address the race * condition where we may be in the process of unloading the * driver when the host may send us an unsolicited message. * We address this issue by implementing a sequentially * consistent protocol: * * 1. Channel callback is invoked while holding the the channel lock * and an unloading driver will reset the channel callback under * the protection of this channel lock. * * 2. To ensure bounded wait time for unloading a driver, we don't * permit outgoing traffic once the device is marked as being * destroyed. * * 3. Once the device is marked as being destroyed, we only - * permit incoming traffic to properly account for + * permit incoming traffic to properly account for * packets already sent out. */ static inline struct storvsc_softc * get_stor_device(struct hv_device *device, boolean_t outbound) { struct storvsc_softc *sc; sc = device_get_softc(device->device); if (sc == NULL) { return NULL; } if (outbound) { /* * Here we permit outgoing I/O only * if the device is not being destroyed. */ if (sc->hs_destroy) { sc = NULL; } } else { /* * inbound case; if being destroyed * only permit to account for * messages already sent out. */ if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) { sc = NULL; } } return sc; } /** + * @brief Callback handler, will be invoked when receive mutil-channel offer + * + * @param context new multi-channel + */ +static void +storvsc_handle_sc_creation(void *context) +{ + hv_vmbus_channel *new_channel; + struct hv_device *device; + struct storvsc_softc *sc; + struct vmstor_chan_props props; + int ret = 0; + + new_channel = (hv_vmbus_channel *)context; + device = new_channel->primary_channel->device; + sc = get_stor_device(device, TRUE); + if (sc == NULL) + return; + + if (FALSE == sc->hs_open_multi_channel) + return; + + memset(&props, 0, sizeof(props)); + + ret = hv_vmbus_channel_open(new_channel, + sc->hs_drv_props->drv_ringbuffer_size, + sc->hs_drv_props->drv_ringbuffer_size, + (void *)&props, + sizeof(struct vmstor_chan_props), + hv_storvsc_on_channel_callback, + new_channel); + + return; +} + +/** + * @brief Send multi-channel creation request to host + * + * @param device a Hyper-V device pointer + * @param max_chans the max channels supported by vmbus + */ +static void +storvsc_send_multichannel_request(struct hv_device *dev, int max_chans) +{ + struct storvsc_softc *sc; + struct hv_storvsc_request *request; + struct vstor_packet *vstor_packet; + int request_channels_cnt = 0; + int ret; + + /* get multichannels count that need to create */ + request_channels_cnt = MIN(max_chans, mp_ncpus); + + sc = get_stor_device(dev, TRUE); + if (sc == NULL) { + printf("Storvsc_error: get sc failed while send mutilchannel " + "request\n"); + return; + } + + request = &sc->hs_init_req; + + /* Establish a handler for multi-channel */ + dev->channel->sc_creation_callback = storvsc_handle_sc_creation; + + /* request the host to create multi-channel */ + memset(request, 0, sizeof(struct hv_storvsc_request)); + + sema_init(&request->synch_sema, 0, ("stor_synch_sema")); + + vstor_packet = &request->vstor_packet; + + vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS; + vstor_packet->flags = REQUEST_COMPLETION_FLAG; + vstor_packet->u.multi_channels_cnt = request_channels_cnt; + + ret = hv_vmbus_channel_send_packet( + dev->channel, + vstor_packet, + sizeof(struct vstor_packet), + (uint64_t)(uintptr_t)request, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, + HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + + /* wait for 5 seconds */ + ret = sema_timedwait(&request->synch_sema, 5 * hz); + if (ret != 0) { + printf("Storvsc_error: create multi-channel timeout, %d\n", + ret); + return; + } + + if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || + vstor_packet->status != 0) { + printf("Storvsc_error: create multi-channel invalid operation " + "(%d) or statue (%u)\n", + vstor_packet->operation, vstor_packet->status); + return; + } + + sc->hs_open_multi_channel = TRUE; + + if (bootverbose) + printf("Storvsc create multi-channel success!\n"); +} + +/** * @brief initialize channel connection to parent partition * * @param dev a Hyper-V device pointer * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_channel_init(struct hv_device *dev) { int ret = 0; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; struct storvsc_softc *sc; + uint16_t max_chans = 0; + boolean_t support_multichannel = FALSE; + max_chans = 0; + support_multichannel = FALSE; + sc = get_stor_device(dev, TRUE); - if (sc == NULL) { - return ENODEV; - } + if (sc == NULL) + return (ENODEV); request = &sc->hs_init_req; memset(request, 0, sizeof(struct hv_storvsc_request)); vstor_packet = &request->vstor_packet; request->softc = sc; /** * Initiate the vsc/vsp initialization protocol on the open channel */ sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, sizeof(struct vstor_packet), (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); - if (ret != 0) { + if (ret != 0) goto cleanup; - } - ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */ - - if (ret != 0) { + /* wait 5 seconds */ + ret = sema_timedwait(&request->synch_sema, 5 * hz); + if (ret != 0) goto cleanup; - } if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } /* reuse the packet for version range supported */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; - vstor_packet->u.version.major_minor = VMSTOR_PROTOCOL_VERSION_CURRENT; + vstor_packet->u.version.major_minor = + VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor); /* revision is only significant for Windows guests */ vstor_packet->u.version.revision = 0; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, sizeof(struct vstor_packet), (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); - if (ret != 0) { + if (ret != 0) goto cleanup; - } - ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */ + /* wait 5 seconds */ + ret = sema_timedwait(&request->synch_sema, 5 * hz); - if (ret) { + if (ret) goto cleanup; - } /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || - vstor_packet->status != 0) { + vstor_packet->status != 0) goto cleanup; - } /** * Query channel properties */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, sizeof(struct vstor_packet), (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); - if ( ret != 0) { + if ( ret != 0) goto cleanup; - } - ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */ + /* wait 5 seconds */ + ret = sema_timedwait(&request->synch_sema, 5 * hz); - if (ret != 0) { + if (ret != 0) goto cleanup; - } /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || - vstor_packet->status != 0) { + vstor_packet->status != 0) { goto cleanup; } + /* multi-channels feature is supported by WIN8 and above version */ + max_chans = vstor_packet->u.chan_props.max_channel_cnt; + if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) && + (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) && + (vstor_packet->u.chan_props.flags & + HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) { + support_multichannel = TRUE; + } + memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, sizeof(struct vstor_packet), (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } - ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */ + /* wait 5 seconds */ + ret = sema_timedwait(&request->synch_sema, 5 * hz); - if (ret != 0) { + if (ret != 0) goto cleanup; - } if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || - vstor_packet->status != 0) { + vstor_packet->status != 0) goto cleanup; - } + /* + * If multi-channel is supported, send multichannel create + * request to host. + */ + if (support_multichannel) + storvsc_send_multichannel_request(dev, max_chans); + cleanup: sema_destroy(&request->synch_sema); return (ret); } /** * @brief Open channel connection to paraent partition StorVSP driver * * Open and initialize channel connection to parent partition StorVSP driver. * * @param pointer to a Hyper-V device * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_connect_vsp(struct hv_device *dev) { int ret = 0; struct vmstor_chan_props props; struct storvsc_softc *sc; sc = device_get_softc(dev->device); memset(&props, 0, sizeof(struct vmstor_chan_props)); /* * Open the channel */ ret = hv_vmbus_channel_open( dev->channel, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, - dev); + dev->channel); - if (ret != 0) { return ret; } ret = hv_storvsc_channel_init(dev); return (ret); } #if HVS_HOST_RESET static int hv_storvsc_host_reset(struct hv_device *dev) { int ret = 0; struct storvsc_softc *sc; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; sc = get_stor_device(dev, TRUE); if (sc == NULL) { return ENODEV; } request = &sc->hs_reset_req; request->softc = sc; vstor_packet = &request->vstor_packet; sema_init(&request->synch_sema, 0, "stor synch sema"); vstor_packet->operation = VSTOR_OPERATION_RESETBUS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet(dev->channel, vstor_packet, sizeof(struct vstor_packet), (uint64_t)(uintptr_t)&sc->hs_reset_req, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } - ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */ + ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */ if (ret) { goto cleanup; } /* - * At this point, all outstanding requests in the adapter + * At this point, all outstanding requests in the adapter * should have been flushed out and return to us */ cleanup: sema_destroy(&request->synch_sema); return (ret); } #endif /* HVS_HOST_RESET */ /** * @brief Function to initiate an I/O request * * @param device Hyper-V device pointer * @param request pointer to a request structure * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_io_request(struct hv_device *device, struct hv_storvsc_request *request) { struct storvsc_softc *sc; struct vstor_packet *vstor_packet = &request->vstor_packet; + struct hv_vmbus_channel* outgoing_channel = NULL; int ret = 0; sc = get_stor_device(device, TRUE); if (sc == NULL) { return ENODEV; } vstor_packet->flags |= REQUEST_COMPLETION_FLAG; vstor_packet->u.vm_srb.length = sizeof(struct vmscsi_req); vstor_packet->u.vm_srb.sense_info_len = SENSE_BUFFER_SIZE; vstor_packet->u.vm_srb.transfer_len = request->data_buf.length; vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB; + outgoing_channel = vmbus_select_outgoing_channel(device->channel); mtx_unlock(&request->softc->hs_lock); if (request->data_buf.length) { ret = hv_vmbus_channel_send_packet_multipagebuffer( - device->channel, + outgoing_channel, &request->data_buf, - vstor_packet, - sizeof(struct vstor_packet), + vstor_packet, + sizeof(struct vstor_packet), (uint64_t)(uintptr_t)request); } else { ret = hv_vmbus_channel_send_packet( - device->channel, + outgoing_channel, vstor_packet, sizeof(struct vstor_packet), (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); } mtx_lock(&request->softc->hs_lock); if (ret != 0) { printf("Unable to send packet %p ret %d", vstor_packet, ret); } else { atomic_add_int(&sc->hs_num_out_reqs, 1); } return (ret); } /** * Process IO_COMPLETION_OPERATION and ready * the result to be completed for upper layer * processing by the CAM layer. */ static void hv_storvsc_on_iocompletion(struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request) { struct vmscsi_req *vm_srb; vm_srb = &vstor_packet->u.vm_srb; if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) && (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) { /* Autosense data available */ KASSERT(vm_srb->sense_info_len <= request->sense_info_len, ("vm_srb->sense_info_len <= " "request->sense_info_len")); memcpy(request->sense_data, vm_srb->u.sense_data, vm_srb->sense_info_len); request->sense_info_len = vm_srb->sense_info_len; } /* Complete request by passing to the CAM layer */ storvsc_io_done(request); atomic_subtract_int(&sc->hs_num_out_reqs, 1); if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) { sema_post(&sc->hs_drain_sema); } } static void hv_storvsc_on_channel_callback(void *context) { int ret = 0; - struct hv_device *device = (struct hv_device *)context; + hv_vmbus_channel *channel = (hv_vmbus_channel *)context; + struct hv_device *device = NULL; struct storvsc_softc *sc; uint32_t bytes_recvd; uint64_t request_id; uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)]; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; + if (channel->primary_channel != NULL){ + device = channel->primary_channel->device; + } else { + device = channel->device; + } + + KASSERT(device, ("device is NULL")); + sc = get_stor_device(device, FALSE); if (sc == NULL) { + printf("Storvsc_error: get stor device failed.\n"); return; } - KASSERT(device, ("device")); - ret = hv_vmbus_channel_recv_packet( - device->channel, + channel, packet, roundup2(sizeof(struct vstor_packet), 8), &bytes_recvd, &request_id); while ((ret == 0) && (bytes_recvd > 0)) { request = (struct hv_storvsc_request *)(uintptr_t)request_id; - KASSERT(request, ("request")); if ((request == &sc->hs_init_req) || (request == &sc->hs_reset_req)) { memcpy(&request->vstor_packet, packet, sizeof(struct vstor_packet)); - sema_post(&request->synch_sema); + sema_post(&request->synch_sema); } else { vstor_packet = (struct vstor_packet *)packet; switch(vstor_packet->operation) { case VSTOR_OPERATION_COMPLETEIO: + if (request == NULL) + panic("VMBUS: storvsc received a " + "packet with NULL request id in " + "COMPLETEIO operation."); + hv_storvsc_on_iocompletion(sc, vstor_packet, request); break; case VSTOR_OPERATION_REMOVEDEVICE: + case VSTOR_OPERATION_ENUMERATE_BUS: + printf("VMBUS: storvsc operation %d not " + "implemented.\n", vstor_packet->operation); /* TODO: implement */ break; default: break; } } ret = hv_vmbus_channel_recv_packet( - device->channel, + channel, packet, roundup2(sizeof(struct vstor_packet), 8), &bytes_recvd, &request_id); } } /** * @brief StorVSC probe function * * Device probe function. Returns 0 if the input device is a StorVSC * device. Otherwise, a ENXIO is returned. If the input device is * for BlkVSC (paravirtual IDE) device and this support is disabled in * favor of the emulated ATA/IDE device, return ENXIO. * * @param a device * @returns 0 on success, ENXIO if not a matcing StorVSC device */ static int storvsc_probe(device_t dev) { int ata_disk_enable = 0; int ret = ENXIO; - + + if ((HV_VMBUS_VERSION_WIN8 == hv_vmbus_protocal_version) || + (HV_VMBUS_VERSION_WIN8_1 == hv_vmbus_protocal_version)){ + storvsc_current_major = STORVSC_WIN8_MAJOR; + storvsc_current_minor = STORVSC_WIN8_MINOR; + } else { + storvsc_current_major = STORVSC_WIN7_MAJOR; + storvsc_current_minor = STORVSC_WIN7_MINOR; + } + switch (storvsc_get_storage_type(dev)) { case DRIVER_BLKVSC: if(bootverbose) device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n"); if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) { if(bootverbose) device_printf(dev, "Enlightened ATA/IDE detected\n"); ret = BUS_PROBE_DEFAULT; } else if(bootverbose) device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n"); break; case DRIVER_STORVSC: if(bootverbose) device_printf(dev, "Enlightened SCSI device detected\n"); ret = BUS_PROBE_DEFAULT; break; default: ret = ENXIO; } return (ret); } /** * @brief StorVSC attach function * * Function responsible for allocating per-device structures, * setting up CAM interfaces and scanning for available LUNs to * be used for SCSI device peripherals. * * @param a device * @returns 0 on success or an error on failure */ static int storvsc_attach(device_t dev) { struct hv_device *hv_dev = vmbus_get_devctx(dev); enum hv_storage_type stor_type; struct storvsc_softc *sc; struct cam_devq *devq; - int ret, i; + int ret, i, j; struct hv_storvsc_request *reqp; struct root_hold_token *root_mount_token = NULL; + struct hv_sgl_node *sgl_node = NULL; + void *tmp_buff = NULL; /* * We need to serialize storvsc attach calls. */ root_mount_token = root_mount_hold("storvsc"); sc = device_get_softc(dev); if (sc == NULL) { ret = ENOMEM; goto cleanup; } stor_type = storvsc_get_storage_type(dev); if (stor_type == DRIVER_UNKNOWN) { ret = ENODEV; goto cleanup; } bzero(sc, sizeof(struct storvsc_softc)); /* fill in driver specific properties */ sc->hs_drv_props = &g_drv_props_table[stor_type]; /* fill in device specific properties */ sc->hs_unit = device_get_unit(dev); sc->hs_dev = hv_dev; device_set_desc(dev, g_drv_props_table[stor_type].drv_desc); LIST_INIT(&sc->hs_free_list); mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF); for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) { reqp = malloc(sizeof(struct hv_storvsc_request), M_DEVBUF, M_WAITOK|M_ZERO); reqp->softc = sc; LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } + /* create sg-list page pool */ + if (FALSE == g_hv_sgl_page_pool.is_init) { + g_hv_sgl_page_pool.is_init = TRUE; + LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list); + LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list); + + /* + * Pre-create SG list, each SG list with + * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each + * segment has one page buffer + */ + for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) { + sgl_node = malloc(sizeof(struct hv_sgl_node), + M_DEVBUF, M_WAITOK|M_ZERO); + + sgl_node->sgl_data = + sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT, + M_WAITOK|M_ZERO); + + for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) { + tmp_buff = malloc(PAGE_SIZE, + M_DEVBUF, M_WAITOK|M_ZERO); + + sgl_node->sgl_data->sg_segs[j].ss_paddr = + (vm_paddr_t)tmp_buff; + } + + LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, + sgl_node, link); + } + } + sc->hs_destroy = FALSE; sc->hs_drain_notify = FALSE; + sc->hs_open_multi_channel = FALSE; sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema"); ret = hv_storvsc_connect_vsp(hv_dev); if (ret != 0) { goto cleanup; } /* * Create the device queue. * Hyper-V maps each target to one SCSI HBA */ devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target); if (devq == NULL) { device_printf(dev, "Failed to alloc device queue\n"); ret = ENOMEM; goto cleanup; } sc->hs_sim = cam_sim_alloc(storvsc_action, storvsc_poll, sc->hs_drv_props->drv_name, sc, sc->hs_unit, &sc->hs_lock, 1, sc->hs_drv_props->drv_max_ios_per_target, devq); if (sc->hs_sim == NULL) { device_printf(dev, "Failed to alloc sim\n"); cam_simq_free(devq); ret = ENOMEM; goto cleanup; } mtx_lock(&sc->hs_lock); /* bus_id is set to 0, need to get it from VMBUS channel query? */ if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) { cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to register SCSI bus\n"); ret = ENXIO; goto cleanup; } if (xpt_create_path(&sc->hs_path, /*periph*/NULL, cam_sim_path(sc->hs_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->hs_sim)); cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to create path\n"); ret = ENXIO; goto cleanup; } mtx_unlock(&sc->hs_lock); root_mount_rel(root_mount_token); return (0); cleanup: root_mount_rel(root_mount_token); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); free(reqp, M_DEVBUF); } + + while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { + sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); + LIST_REMOVE(sgl_node, link); + for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) { + if (NULL != + (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { + free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); + } + } + sglist_free(sgl_node->sgl_data); + free(sgl_node, M_DEVBUF); + } + return (ret); } /** * @brief StorVSC device detach function * * This function is responsible for safely detaching a * StorVSC device. This includes waiting for inbound responses * to complete and freeing associated per-device structures. * * @param dev a device * returns 0 on success */ static int storvsc_detach(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp = NULL; struct hv_device *hv_device = vmbus_get_devctx(dev); + struct hv_sgl_node *sgl_node = NULL; + int j = 0; mtx_lock(&hv_device->channel->inbound_lock); sc->hs_destroy = TRUE; mtx_unlock(&hv_device->channel->inbound_lock); /* * At this point, all outbound traffic should be disabled. We * only allow inbound traffic (responses) to proceed so that * outstanding requests can be completed. */ sc->hs_drain_notify = TRUE; sema_wait(&sc->hs_drain_sema); sc->hs_drain_notify = FALSE; /* * Since we have already drained, we don't need to busy wait. * The call to close the channel will reset the callback * under the protection of the incoming channel lock. */ hv_vmbus_channel_close(hv_device->channel); mtx_lock(&sc->hs_lock); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); free(reqp, M_DEVBUF); } mtx_unlock(&sc->hs_lock); + + while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { + sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); + LIST_REMOVE(sgl_node, link); + for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){ + if (NULL != + (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { + free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); + } + } + sglist_free(sgl_node->sgl_data); + free(sgl_node, M_DEVBUF); + } + return (0); } #if HVS_TIMEOUT_TEST /** * @brief unit test for timed out operations * * This function provides unit testing capability to simulate * timed out operations. Recompilation with HV_TIMEOUT_TEST=1 * is required. * * @param reqp pointer to a request structure * @param opcode SCSI operation being performed * @param wait if 1, wait for I/O to complete */ static void storvsc_timeout_test(struct hv_storvsc_request *reqp, uint8_t opcode, int wait) { int ret; union ccb *ccb = reqp->ccb; struct storvsc_softc *sc = reqp->softc; if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) { return; } if (wait) { mtx_lock(&reqp->event.mtx); } ret = hv_storvsc_io_request(sc->hs_dev, reqp); if (ret != 0) { if (wait) { mtx_unlock(&reqp->event.mtx); } printf("%s: io_request failed with %d.\n", __func__, ret); ccb->ccb_h.status = CAM_PROVIDE_FAIL; mtx_lock(&sc->hs_lock); storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); return; } if (wait) { xpt_print(ccb->ccb_h.path, "%u: %s: waiting for IO return.\n", ticks, __func__); ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz); mtx_unlock(&reqp->event.mtx); xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n", ticks, __func__, (ret == 0)? "IO return detected" : "IO return not detected"); - /* + /* * Now both the timer handler and io done are running * simultaneously. We want to confirm the io done always * finishes after the timer handler exits. So reqp used by * timer handler is not freed or stale. Do busy loop for * another 1/10 second to make sure io done does * wait for the timer handler to complete. */ DELAY(100*1000); mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: %s: finishing, queue frozen %d, " "ccb status 0x%x scsi_status 0x%x.\n", ticks, __func__, sc->hs_frozen, ccb->ccb_h.status, ccb->csio.scsi_status); mtx_unlock(&sc->hs_lock); } } #endif /* HVS_TIMEOUT_TEST */ /** * @brief timeout handler for requests * * This function is called as a result of a callout expiring. * * @param arg pointer to a request */ static void storvsc_timeout(void *arg) { struct hv_storvsc_request *reqp = arg; struct storvsc_softc *sc = reqp->softc; union ccb *ccb = reqp->ccb; if (reqp->retries == 0) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO timed out (req=0x%p), wait for another %u secs.\n", ticks, reqp, ccb->ccb_h.timeout / 1000); cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL); mtx_unlock(&sc->hs_lock); reqp->retries++; callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0); #endif return; } mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n", ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000, (sc->hs_frozen == 0)? "freezing the queue" : "the queue is already frozen"); if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1); } mtx_unlock(&sc->hs_lock); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, MODE_SELECT_10, 1); #endif } /** * @brief StorVSC device poll function * * This function is responsible for servicing requests when * interrupts are disabled (i.e when we are dumping core.) * * @param sim a pointer to a CAM SCSI interface module */ static void storvsc_poll(struct cam_sim *sim) { struct storvsc_softc *sc = cam_sim_softc(sim); mtx_assert(&sc->hs_lock, MA_OWNED); mtx_unlock(&sc->hs_lock); - hv_storvsc_on_channel_callback(sc->hs_dev); + hv_storvsc_on_channel_callback(sc->hs_dev->channel); mtx_lock(&sc->hs_lock); } /** * @brief StorVSC device action function * * This function is responsible for handling SCSI operations which * are passed from the CAM layer. The requests are in the form of * CAM control blocks which indicate the action being performed. * Not all actions require converting the request to a VSCSI protocol * message - these actions can be responded to by this driver. * Requests which are destined for a backend storage device are converted * to a VSCSI protocol message and sent on the channel connection associated * with this device. * * @param sim pointer to a CAM SCSI interface module * @param ccb pointer to a CAM control block */ static void storvsc_action(struct cam_sim *sim, union ccb *ccb) { struct storvsc_softc *sc = cam_sim_softc(sim); int res; mtx_assert(&sc->hs_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET; cpi->hba_eng_cnt = 0; cpi->max_target = STORVSC_MAX_TARGETS; cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target; cpi->initiator_id = cpi->max_target; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 300000; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC2; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; cts->transport = XPORT_SAS; cts->transport_version = 0; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC2; /* enable tag queuing and disconnected mode */ cts->proto_specific.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; cts->xport_specific.valid = CTS_SPI_VALID_DISC; cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_SET_TRAN_SETTINGS: { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_CALC_GEOMETRY:{ cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); return; } case XPT_RESET_BUS: case XPT_RESET_DEV:{ #if HVS_HOST_RESET if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_host_reset failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; #else xpt_print(ccb->ccb_h.path, "%s reset not supported.\n", (ccb->ccb_h.func_code == XPT_RESET_BUS)? "bus" : "dev"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; #endif /* HVS_HOST_RESET */ } case XPT_SCSI_IO: case XPT_IMMED_NOTIFY: { struct hv_storvsc_request *reqp = NULL; if (ccb->csio.cdb_len == 0) { panic("cdl_len is 0\n"); } if (LIST_EMPTY(&sc->hs_free_list)) { ccb->ccb_h.status = CAM_REQUEUE_REQ; if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(sim, /* count*/1); } xpt_done(ccb); return; } reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bzero(reqp, sizeof(struct hv_storvsc_request)); reqp->softc = sc; + + ccb->ccb_h.status |= CAM_SIM_QUEUED; + if ((res = create_storvsc_request(ccb, reqp)) != 0) { + ccb->ccb_h.status = CAM_REQ_INVALID; + xpt_done(ccb); + return; + } - ccb->ccb_h.status |= CAM_SIM_QUEUED; - create_storvsc_request(ccb, reqp); - if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_init(&reqp->callout, CALLOUT_MPSAFE); callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST cv_init(&reqp->event.cv, "storvsc timeout cv"); mtx_init(&reqp->event.mtx, "storvsc timeout mutex", NULL, MTX_DEF); switch (reqp->vstor_packet.vm_srb.cdb[0]) { case MODE_SELECT_10: case SEND_DIAGNOSTIC: /* To have timer send the request. */ return; default: break; } #endif /* HVS_TIMEOUT_TEST */ } if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_io_request failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; storvsc_free_request(sc, reqp); xpt_done(ccb); return; } return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } } /** + * @brief destroy bounce buffer + * + * This function is responsible for destroy a Scatter/Gather list + * that create by storvsc_create_bounce_buffer() + * + * @param sgl- the Scatter/Gather need be destroy + * @param sg_count- page count of the SG list. + * + */ +static void +storvsc_destroy_bounce_buffer(struct sglist *sgl) +{ + struct hv_sgl_node *sgl_node = NULL; + + sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); + LIST_REMOVE(sgl_node, link); + if (NULL == sgl_node) { + printf("storvsc error: not enough in use sgl\n"); + return; + } + sgl_node->sgl_data = sgl; + LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); +} + +/** + * @brief create bounce buffer + * + * This function is responsible for create a Scatter/Gather list, + * which hold several pages that can be aligned with page size. + * + * @param seg_count- SG-list segments count + * @param write - if WRITE_TYPE, set SG list page used size to 0, + * otherwise set used size to page size. + * + * return NULL if create failed + */ +static struct sglist * +storvsc_create_bounce_buffer(uint16_t seg_count, int write) +{ + int i = 0; + struct sglist *bounce_sgl = NULL; + unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE); + struct hv_sgl_node *sgl_node = NULL; + + /* get struct sglist from free_sgl_list */ + sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); + LIST_REMOVE(sgl_node, link); + if (NULL == sgl_node) { + printf("storvsc error: not enough free sgl\n"); + return NULL; + } + bounce_sgl = sgl_node->sgl_data; + LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link); + + bounce_sgl->sg_maxseg = seg_count; + + if (write == WRITE_TYPE) + bounce_sgl->sg_nseg = 0; + else + bounce_sgl->sg_nseg = seg_count; + + for (i = 0; i < seg_count; i++) + bounce_sgl->sg_segs[i].ss_len = buf_len; + + return bounce_sgl; +} + +/** + * @brief copy data from SG list to bounce buffer + * + * This function is responsible for copy data from one SG list's segments + * to another SG list which used as bounce buffer. + * + * @param bounce_sgl - the destination SG list + * @param orig_sgl - the segment of the source SG list. + * @param orig_sgl_count - the count of segments. + * @param orig_sgl_count - indicate which segment need bounce buffer, + * set 1 means need. + * + */ +static void +storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, + bus_dma_segment_t *orig_sgl, + unsigned int orig_sgl_count, + uint64_t seg_bits) +{ + int src_sgl_idx = 0; + + for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) { + if (seg_bits & (1 << src_sgl_idx)) { + memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr, + (void*)orig_sgl[src_sgl_idx].ds_addr, + orig_sgl[src_sgl_idx].ds_len); + + bounce_sgl->sg_segs[src_sgl_idx].ss_len = + orig_sgl[src_sgl_idx].ds_len; + } + } +} + +/** + * @brief copy data from SG list which used as bounce to another SG list + * + * This function is responsible for copy data from one SG list with bounce + * buffer to another SG list's segments. + * + * @param dest_sgl - the destination SG list's segments + * @param dest_sgl_count - the count of destination SG list's segment. + * @param src_sgl - the source SG list. + * @param seg_bits - indicate which segment used bounce buffer of src SG-list. + * + */ +void +storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, + unsigned int dest_sgl_count, + struct sglist* src_sgl, + uint64_t seg_bits) +{ + int sgl_idx = 0; + + for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) { + if (seg_bits & (1 << sgl_idx)) { + memcpy((void*)(dest_sgl[sgl_idx].ds_addr), + (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr), + src_sgl->sg_segs[sgl_idx].ss_len); + } + } +} + +/** + * @brief check SG list with bounce buffer or not + * + * This function is responsible for check if need bounce buffer for SG list. + * + * @param sgl - the SG list's segments + * @param sg_count - the count of SG list's segment. + * @param bits - segmengs number that need bounce buffer + * + * return -1 if SG list needless bounce buffer + */ +static int +storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl, + unsigned int sg_count, + uint64_t *bits) +{ + int i = 0; + int offset = 0; + uint64_t phys_addr = 0; + uint64_t tmp_bits = 0; + boolean_t found_hole = FALSE; + boolean_t pre_aligned = TRUE; + + if (sg_count < 2){ + return -1; + } + + *bits = 0; + + phys_addr = vtophys(sgl[0].ds_addr); + offset = phys_addr - trunc_page(phys_addr); + + if (offset != 0) { + pre_aligned = FALSE; + tmp_bits |= 1; + } + + for (i = 1; i < sg_count; i++) { + phys_addr = vtophys(sgl[i].ds_addr); + offset = phys_addr - trunc_page(phys_addr); + + if (offset == 0) { + if (FALSE == pre_aligned){ + /* + * This segment is aligned, if the previous + * one is not aligned, find a hole + */ + found_hole = TRUE; + } + pre_aligned = TRUE; + } else { + tmp_bits |= 1 << i; + if (!pre_aligned) { + if (phys_addr != vtophys(sgl[i-1].ds_addr + + sgl[i-1].ds_len)) { + /* + * Check whether connect to previous + * segment,if not, find the hole + */ + found_hole = TRUE; + } + } else { + found_hole = TRUE; + } + pre_aligned = FALSE; + } + } + + if (!found_hole) { + return (-1); + } else { + *bits = tmp_bits; + return 0; + } +} + +/** * @brief Fill in a request structure based on a CAM control block * * Fills in a request structure based on the contents of a CAM control * block. The request structure holds the payload information for * VSCSI protocol request. * * @param ccb pointer to a CAM contorl block * @param reqp pointer to a request structure */ -static void +static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp) { struct ccb_scsiio *csio = &ccb->csio; uint64_t phys_addr; uint32_t bytes_to_copy = 0; uint32_t pfn_num = 0; uint32_t pfn; + uint64_t not_aligned_seg_bits = 0; /* refer to struct vmscsi_req for meanings of these two fields */ reqp->vstor_packet.u.vm_srb.port = cam_sim_unit(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.path_id = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id; reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun; reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len; if(ccb->ccb_h.flags & CAM_CDB_POINTER) { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); } else { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); } switch (ccb->ccb_h.flags & CAM_DIR_MASK) { - case CAM_DIR_OUT: - reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE; - break; - case CAM_DIR_IN: - reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE; - break; - case CAM_DIR_NONE: - reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; - break; - default: - reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; - break; + case CAM_DIR_OUT: + reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE; + break; + case CAM_DIR_IN: + reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE; + break; + case CAM_DIR_NONE: + reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; + break; + default: + reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; + break; } reqp->sense_data = &csio->sense_data; reqp->sense_info_len = csio->sense_len; reqp->ccb = ccb; - /* - KASSERT((ccb->ccb_h.flags & CAM_SCATTER_VALID) == 0, - ("ccb is scatter gather valid\n")); - */ - if (csio->dxfer_len != 0) { - reqp->data_buf.length = csio->dxfer_len; + + if (0 == csio->dxfer_len) { + return (0); + } + + reqp->data_buf.length = csio->dxfer_len; + + switch (ccb->ccb_h.flags & CAM_DATA_MASK) { + case CAM_DATA_VADDR: + { bytes_to_copy = csio->dxfer_len; phys_addr = vtophys(csio->data_ptr); - reqp->data_buf.offset = phys_addr - trunc_page(phys_addr); + reqp->data_buf.offset = phys_addr & PAGE_MASK; + + while (bytes_to_copy != 0) { + int bytes, page_offset; + phys_addr = + vtophys(&csio->data_ptr[reqp->data_buf.length - + bytes_to_copy]); + pfn = phys_addr >> PAGE_SHIFT; + reqp->data_buf.pfn_array[pfn_num] = pfn; + page_offset = phys_addr & PAGE_MASK; + + bytes = min(PAGE_SIZE - page_offset, bytes_to_copy); + + bytes_to_copy -= bytes; + pfn_num++; + } + break; } - while (bytes_to_copy != 0) { - int bytes, page_offset; - phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length - - bytes_to_copy]); - pfn = phys_addr >> PAGE_SHIFT; - reqp->data_buf.pfn_array[pfn_num] = pfn; - page_offset = phys_addr - trunc_page(phys_addr); + case CAM_DATA_SG: + { + int i = 0; + int offset = 0; + int ret; - bytes = min(PAGE_SIZE - page_offset, bytes_to_copy); + bus_dma_segment_t *storvsc_sglist = + (bus_dma_segment_t *)ccb->csio.data_ptr; + u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt; - bytes_to_copy -= bytes; - pfn_num++; + printf("Storvsc: get SG I/O operation, %d\n", + reqp->vstor_packet.u.vm_srb.data_in); + + if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){ + printf("Storvsc: %d segments is too much, " + "only support %d segments\n", + storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT); + return (EINVAL); + } + + /* + * We create our own bounce buffer function currently. Idealy + * we should use BUS_DMA(9) framework. But with current BUS_DMA + * code there is no callback API to check the page alignment of + * middle segments before busdma can decide if a bounce buffer + * is needed for particular segment. There is callback, + * "bus_dma_filter_t *filter", but the parrameters are not + * sufficient for storvsc driver. + * TODO: + * Add page alignment check in BUS_DMA(9) callback. Once + * this is complete, switch the following code to use + * BUS_DMA(9) for storvsc bounce buffer support. + */ + /* check if we need to create bounce buffer */ + ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist, + storvsc_sg_count, ¬_aligned_seg_bits); + if (ret != -1) { + reqp->bounce_sgl = + storvsc_create_bounce_buffer(storvsc_sg_count, + reqp->vstor_packet.u.vm_srb.data_in); + if (NULL == reqp->bounce_sgl) { + printf("Storvsc_error: " + "create bounce buffer failed.\n"); + return (ENOMEM); + } + + reqp->bounce_sgl_count = storvsc_sg_count; + reqp->not_aligned_seg_bits = not_aligned_seg_bits; + + /* + * if it is write, we need copy the original data + *to bounce buffer + */ + if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { + storvsc_copy_sgl_to_bounce_buf( + reqp->bounce_sgl, + storvsc_sglist, + storvsc_sg_count, + reqp->not_aligned_seg_bits); + } + + /* transfer virtual address to physical frame number */ + if (reqp->not_aligned_seg_bits & 0x1){ + phys_addr = + vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr); + }else{ + phys_addr = + vtophys(storvsc_sglist[0].ds_addr); + } + reqp->data_buf.offset = phys_addr & PAGE_MASK; + + pfn = phys_addr >> PAGE_SHIFT; + reqp->data_buf.pfn_array[0] = pfn; + + for (i = 1; i < storvsc_sg_count; i++) { + if (reqp->not_aligned_seg_bits & (1 << i)) { + phys_addr = + vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr); + } else { + phys_addr = + vtophys(storvsc_sglist[i].ds_addr); + } + + pfn = phys_addr >> PAGE_SHIFT; + reqp->data_buf.pfn_array[i] = pfn; + } + } else { + phys_addr = vtophys(storvsc_sglist[0].ds_addr); + + reqp->data_buf.offset = phys_addr & PAGE_MASK; + + for (i = 0; i < storvsc_sg_count; i++) { + phys_addr = vtophys(storvsc_sglist[i].ds_addr); + pfn = phys_addr >> PAGE_SHIFT; + reqp->data_buf.pfn_array[i] = pfn; + } + + /* check the last segment cross boundary or not */ + offset = phys_addr & PAGE_MASK; + if (offset) { + phys_addr = + vtophys(storvsc_sglist[i-1].ds_addr + + PAGE_SIZE - offset); + pfn = phys_addr >> PAGE_SHIFT; + reqp->data_buf.pfn_array[i] = pfn; + } + + reqp->bounce_sgl_count = 0; + } + break; } + default: + printf("Unknow flags: %d\n", ccb->ccb_h.flags); + return(EINVAL); + } + + return(0); } /** * @brief completion function before returning to CAM * * I/O process has been completed and the result needs * to be passed to the CAM layer. * Free resources related to this request. * * @param reqp pointer to a request structure */ static void storvsc_io_done(struct hv_storvsc_request *reqp) { union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_softc *sc = reqp->softc; struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb; - + bus_dma_segment_t *ori_sglist = NULL; + int ori_sg_count = 0; + + /* destroy bounce buffer if it is used */ + if (reqp->bounce_sgl_count) { + ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; + ori_sg_count = ccb->csio.sglist_cnt; + + /* + * If it is READ operation, we should copy back the data + * to original SG list. + */ + if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { + storvsc_copy_from_bounce_buf_to_sgl(ori_sglist, + ori_sg_count, + reqp->bounce_sgl, + reqp->not_aligned_seg_bits); + } + + storvsc_destroy_bounce_buffer(reqp->bounce_sgl); + reqp->bounce_sgl_count = 0; + } + if (reqp->retries > 0) { mtx_lock(&sc->hs_lock); #if HVS_TIMEOUT_TEST xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "waking up timer handler if any.\n", ticks); mtx_lock(&reqp->event.mtx); cv_signal(&reqp->event.cv); mtx_unlock(&reqp->event.mtx); #endif reqp->retries = 0; xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "stopping timer if any.\n", ticks); mtx_unlock(&sc->hs_lock); } - /* + /* * callout_drain() will wait for the timer handler to finish * if it is running. So we don't need any lock to synchronize * between this routine and the timer handler. * Note that we need to make sure reqp is not freed when timer * handler is using or will use it. */ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_drain(&reqp->callout); } ccb->ccb_h.status &= ~CAM_SIM_QUEUED; ccb->ccb_h.status &= ~CAM_STATUS_MASK; if (vm_srb->scsi_status == SCSI_STATUS_OK) { ccb->ccb_h.status |= CAM_REQ_CMP; } else { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "srovsc scsi_status = %d\n", vm_srb->scsi_status); mtx_unlock(&sc->hs_lock); ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF); ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len; if (reqp->sense_info_len != 0) { csio->sense_resid = csio->sense_len - reqp->sense_info_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } mtx_lock(&sc->hs_lock); if (reqp->softc->hs_frozen == 1) { xpt_print(ccb->ccb_h.path, "%u: storvsc unfreezing softc 0x%p.\n", ticks, reqp->softc); ccb->ccb_h.status |= CAM_RELEASE_SIMQ; reqp->softc->hs_frozen = 0; } storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); } /** * @brief Free a request structure * * Free a request structure by returning it to the free list * * @param sc pointer to a softc * @param reqp pointer to a request structure */ static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp) { LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /** * @brief Determine type of storage device from GUID * * Using the type GUID, determine if this is a StorVSC (paravirtual * SCSI or BlkVSC (paravirtual IDE) device. * * @param dev a device * returns an enum */ static enum hv_storage_type storvsc_get_storage_type(device_t dev) { const char *p = vmbus_get_type(dev); if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) { return DRIVER_BLKVSC; } else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) { return DRIVER_STORVSC; } return (DRIVER_UNKNOWN); } Index: head/sys/dev/hyperv/storvsc/hv_vstorage.h =================================================================== --- head/sys/dev/hyperv/storvsc/hv_vstorage.h (revision 282211) +++ head/sys/dev/hyperv/storvsc/hv_vstorage.h (revision 282212) @@ -1,233 +1,243 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HV_VSTORAGE_H__ #define __HV_VSTORAGE_H__ /* * Major/minor macros. Minor version is in LSB, meaning that earlier flat * version numbers will be interpreted as "0.x" (i.e., 1 becomes 0.1). */ #define VMSTOR_PROTOCOL_MAJOR(VERSION_) (((VERSION_) >> 8) & 0xff) #define VMSTOR_PROTOCOL_MINOR(VERSION_) (((VERSION_) ) & 0xff) #define VMSTOR_PROTOCOL_VERSION(MAJOR_, MINOR_) ((((MAJOR_) & 0xff) << 8) | \ (((MINOR_) & 0xff) )) /* * Invalid version. */ #define VMSTOR_INVALID_PROTOCOL_VERSION -1 /* * Version history: * V1 Beta 0.1 * V1 RC < 2008/1/31 1.0 * V1 RC > 2008/1/31 2.0 */ -#define VMSTOR_PROTOCOL_VERSION_CURRENT VMSTOR_PROTOCOL_VERSION(2, 0) +#define VMSTOR_PROTOCOL_VERSION_CURRENT VMSTOR_PROTOCOL_VERSION(5, 1) /** * Packet structure ops describing virtual storage requests. */ enum vstor_packet_ops { VSTOR_OPERATION_COMPLETEIO = 1, VSTOR_OPERATION_REMOVEDEVICE = 2, VSTOR_OPERATION_EXECUTESRB = 3, VSTOR_OPERATION_RESETLUN = 4, VSTOR_OPERATION_RESETADAPTER = 5, VSTOR_OPERATION_RESETBUS = 6, VSTOR_OPERATION_BEGININITIALIZATION = 7, VSTOR_OPERATION_ENDINITIALIZATION = 8, VSTOR_OPERATION_QUERYPROTOCOLVERSION = 9, VSTOR_OPERATION_QUERYPROPERTIES = 10, - VSTOR_OPERATION_MAXIMUM = 10 + VSTOR_OPERATION_ENUMERATE_BUS = 11, + VSTOR_OPERATION_FCHBA_DATA = 12, + VSTOR_OPERATION_CREATE_MULTI_CHANNELS = 13, + VSTOR_OPERATION_MAXIMUM = 13 }; /* * Platform neutral description of a scsi request - * this remains the same across the write regardless of 32/64 bit * note: it's patterned off the Windows DDK SCSI_PASS_THROUGH structure */ #define CDB16GENERIC_LENGTH 0x10 #define SENSE_BUFFER_SIZE 0x12 #define MAX_DATA_BUFFER_LENGTH_WITH_PADDING 0x14 struct vmscsi_req { uint16_t length; uint8_t srb_status; uint8_t scsi_status; /* HBA number, set to the order number detected by initiator. */ uint8_t port; /* SCSI bus number or bus_id, different from CAM's path_id. */ uint8_t path_id; uint8_t target_id; uint8_t lun; uint8_t cdb_len; uint8_t sense_info_len; uint8_t data_in; uint8_t reserved; uint32_t transfer_len; union { uint8_t cdb[CDB16GENERIC_LENGTH]; uint8_t sense_data[SENSE_BUFFER_SIZE]; uint8_t reserved_array[MAX_DATA_BUFFER_LENGTH_WITH_PADDING]; } u; } __packed; /** * This structure is sent during the initialization phase to get the different * properties of the channel. */ struct vmstor_chan_props { uint16_t proto_ver; uint8_t path_id; uint8_t target_id; + uint16_t max_channel_cnt; + /** * Note: port number is only really known on the client side */ - uint32_t port; + uint16_t port; uint32_t flags; uint32_t max_transfer_bytes; /** * This id is unique for each channel and will correspond with * vendor specific data in the inquiry_ata */ uint64_t unique_id; } __packed; /** * This structure is sent during the storage protocol negotiations. */ struct vmstor_proto_ver { /** * Major (MSW) and minor (LSW) version numbers. */ uint16_t major_minor; uint16_t revision; /* always zero */ } __packed; /** * Channel Property Flags */ #define STORAGE_CHANNEL_REMOVABLE_FLAG 0x1 #define STORAGE_CHANNEL_EMULATED_IDE_FLAG 0x2 struct vstor_packet { /** * Requested operation type */ enum vstor_packet_ops operation; /* * Flags - see below for values */ uint32_t flags; /** * Status of the request returned from the server side. */ uint32_t status; union { /** * Structure used to forward SCSI commands from the client to * the server. */ struct vmscsi_req vm_srb; /** * Structure used to query channel properties. */ struct vmstor_chan_props chan_props; /** * Used during version negotiations. */ struct vmstor_proto_ver version; + + /** + * Number of multichannels to create + */ + uint16_t multi_channels_cnt; } u; } __packed; /** * SRB (SCSI Request Block) Status Codes */ #define SRB_STATUS_PENDING 0x00 #define SRB_STATUS_SUCCESS 0x01 #define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_ABORT_FAILED 0x03 #define SRB_STATUS_ERROR 0x04 #define SRB_STATUS_BUSY 0x05 /** * SRB Status Masks (can be combined with above status codes) */ #define SRB_STATUS_QUEUE_FROZEN 0x40 #define SRB_STATUS_AUTOSENSE_VALID 0x80 /** * Packet flags */ /** * This flag indicates that the server should send back a completion for this * packet. */ #define REQUEST_COMPLETION_FLAG 0x1 /** * This is the set of flags that the vsc can set in any packets it sends */ #define VSC_LEGAL_FLAGS (REQUEST_COMPLETION_FLAG) #endif /* __HV_VSTORAGE_H__ */ Index: head/sys/dev/hyperv/utilities/hv_kvp.c =================================================================== --- head/sys/dev/hyperv/utilities/hv_kvp.c (revision 282211) +++ head/sys/dev/hyperv/utilities/hv_kvp.c (revision 282212) @@ -1,1001 +1,1002 @@ /*- * Copyright (c) 2014 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Author: Sainath Varanasi. * Date: 4/2012 * Email: bsdic@microsoft.com */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include "unicode.h" #include "hv_kvp.h" /* hv_kvp defines */ #define BUFFERSIZE sizeof(struct hv_kvp_msg) #define KVP_SUCCESS 0 #define KVP_ERROR 1 #define kvp_hdr hdr.kvp_hdr /* hv_kvp debug control */ static int hv_kvp_log = 0; SYSCTL_INT(_dev, OID_AUTO, hv_kvp_log, CTLFLAG_RW, &hv_kvp_log, 0, "hv_kvp log"); #define hv_kvp_log_error(...) do { \ if (hv_kvp_log > 0) \ log(LOG_ERR, "hv_kvp: " __VA_ARGS__); \ } while (0) #define hv_kvp_log_info(...) do { \ if (hv_kvp_log > 1) \ log(LOG_INFO, "hv_kvp: " __VA_ARGS__); \ } while (0) /* character device prototypes */ static d_open_t hv_kvp_dev_open; static d_close_t hv_kvp_dev_close; static d_read_t hv_kvp_dev_daemon_read; static d_write_t hv_kvp_dev_daemon_write; static d_poll_t hv_kvp_dev_daemon_poll; /* hv_kvp prototypes */ static int hv_kvp_req_in_progress(void); static void hv_kvp_transaction_init(uint32_t, hv_vmbus_channel *, uint64_t, uint8_t *); static void hv_kvp_send_msg_to_daemon(void); static void hv_kvp_process_request(void *context); /* hv_kvp character device structure */ static struct cdevsw hv_kvp_cdevsw = { .d_version = D_VERSION, .d_open = hv_kvp_dev_open, .d_close = hv_kvp_dev_close, .d_read = hv_kvp_dev_daemon_read, .d_write = hv_kvp_dev_daemon_write, .d_poll = hv_kvp_dev_daemon_poll, .d_name = "hv_kvp_dev", }; static struct cdev *hv_kvp_dev; static struct hv_kvp_msg *hv_kvp_dev_buf; struct proc *daemon_task; /* * Global state to track and synchronize multiple * KVP transaction requests from the host. */ static struct { /* Pre-allocated work item for queue */ hv_work_item work_item; /* Unless specified the pending mutex should be * used to alter the values of the following paramters: * 1. req_in_progress * 2. req_timed_out * 3. pending_reqs. */ struct mtx pending_mutex; /* To track if transaction is active or not */ boolean_t req_in_progress; /* Tracks if daemon did not reply back in time */ boolean_t req_timed_out; /* Tracks if daemon is serving a request currently */ boolean_t daemon_busy; /* Count of KVP requests from Hyper-V. */ uint64_t pending_reqs; /* Length of host message */ uint32_t host_msg_len; /* Pointer to channel */ hv_vmbus_channel *channelp; /* Host message id */ uint64_t host_msg_id; /* Current kvp message from the host */ struct hv_kvp_msg *host_kvp_msg; /* Current kvp message for daemon */ struct hv_kvp_msg daemon_kvp_msg; /* Rcv buffer for communicating with the host*/ uint8_t *rcv_buf; /* Device semaphore to control communication */ struct sema dev_sema; /* Indicates if daemon registered with driver */ boolean_t register_done; /* Character device status */ boolean_t dev_accessed; } kvp_globals; /* global vars */ MALLOC_DECLARE(M_HV_KVP_DEV_BUF); MALLOC_DEFINE(M_HV_KVP_DEV_BUF, "hv_kvp_dev buffer", "buffer for hv_kvp_dev module"); /* * hv_kvp low level functions */ /* * Check if kvp transaction is in progres */ static int hv_kvp_req_in_progress(void) { return (kvp_globals.req_in_progress); } /* * This routine is called whenever a message is received from the host */ static void hv_kvp_transaction_init(uint32_t rcv_len, hv_vmbus_channel *rcv_channel, uint64_t request_id, uint8_t *rcv_buf) { /* Store all the relevant message details in the global structure */ /* Do not need to use mutex for req_in_progress here */ kvp_globals.req_in_progress = true; kvp_globals.host_msg_len = rcv_len; kvp_globals.channelp = rcv_channel; kvp_globals.host_msg_id = request_id; kvp_globals.rcv_buf = rcv_buf; kvp_globals.host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[ sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; } /* * hv_kvp - version neogtiation function */ static void hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp, struct hv_vmbus_icmsg_negotiate *negop, uint8_t *buf) { int icframe_vercnt; int icmsg_vercnt; icmsghdrp->icmsgsize = 0x10; negop = (struct hv_vmbus_icmsg_negotiate *)&buf[ sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; icframe_vercnt = negop->icframe_vercnt; icmsg_vercnt = negop->icmsg_vercnt; /* * Select the framework version number we will support */ if ((icframe_vercnt >= 2) && (negop->icversion_data[1].major == 3)) { icframe_vercnt = 3; - if (icmsg_vercnt >= 2) + if (icmsg_vercnt > 2) icmsg_vercnt = 4; else icmsg_vercnt = 3; } else { icframe_vercnt = 1; icmsg_vercnt = 1; } negop->icframe_vercnt = 1; negop->icmsg_vercnt = 1; negop->icversion_data[0].major = icframe_vercnt; negop->icversion_data[0].minor = 0; negop->icversion_data[1].major = icmsg_vercnt; negop->icversion_data[1].minor = 0; } /* * Convert ip related info in umsg from utf8 to utf16 and store in hmsg */ static int hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg, struct hv_kvp_ip_msg *host_ip_msg) { int err_ip, err_subnet, err_gway, err_dns, err_adap; int UNUSED_FLAG = 1; utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.ip_addr, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.ip_addr, strlen((char *)umsg->body.kvp_ip_val.ip_addr), UNUSED_FLAG, &err_ip); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.sub_net, strlen((char *)umsg->body.kvp_ip_val.sub_net), UNUSED_FLAG, &err_subnet); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.gate_way, MAX_GATEWAY_SIZE, (char *)umsg->body.kvp_ip_val.gate_way, strlen((char *)umsg->body.kvp_ip_val.gate_way), UNUSED_FLAG, &err_gway); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.dns_addr, strlen((char *)umsg->body.kvp_ip_val.dns_addr), UNUSED_FLAG, &err_dns); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.adapter_id, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.adapter_id, strlen((char *)umsg->body.kvp_ip_val.adapter_id), UNUSED_FLAG, &err_adap); host_ip_msg->kvp_ip_val.dhcp_enabled = umsg->body.kvp_ip_val.dhcp_enabled; host_ip_msg->kvp_ip_val.addr_family = umsg->body.kvp_ip_val.addr_family; return (err_ip | err_subnet | err_gway | err_dns | err_adap); } /* * Convert ip related info in hmsg from utf16 to utf8 and store in umsg */ static int hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg, struct hv_kvp_msg *umsg) { int err_ip, err_subnet, err_gway, err_dns, err_adap; int UNUSED_FLAG = 1; int guid_index; struct hv_device *hv_dev; /* GUID Data Structure */ hn_softc_t *sc; /* hn softc structure */ char if_name[4]; unsigned char guid_instance[40]; char *guid_data = NULL; char buf[39]; struct guid_extract { char a1[2]; char a2[2]; char a3[2]; char a4[2]; char b1[2]; char b2[2]; char c1[2]; char c2[2]; char d[4]; char e[12]; }; struct guid_extract *id; device_t *devs; int devcnt; /* IP Address */ utf16_to_utf8((char *)umsg->body.kvp_ip_val.ip_addr, MAX_IP_ADDR_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.ip_addr, MAX_IP_ADDR_SIZE, UNUSED_FLAG, &err_ip); /* Adapter ID : GUID */ utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, UNUSED_FLAG, &err_adap); if (devclass_get_devices(devclass_find("hn"), &devs, &devcnt) == 0) { for (devcnt = devcnt - 1; devcnt >= 0; devcnt--) { sc = device_get_softc(devs[devcnt]); /* Trying to find GUID of Network Device */ hv_dev = sc->hn_dev_obj; for (guid_index = 0; guid_index < 16; guid_index++) { sprintf(&guid_instance[guid_index * 2], "%02x", hv_dev->device_id.data[guid_index]); } guid_data = (char *)guid_instance; id = (struct guid_extract *)guid_data; snprintf(buf, sizeof(buf), "{%.2s%.2s%.2s%.2s-%.2s%.2s-%.2s%.2s-%.4s-%s}", id->a4, id->a3, id->a2, id->a1, id->b2, id->b1, id->c2, id->c1, id->d, id->e); guid_data = NULL; sprintf(if_name, "%s%d", "hn", device_get_unit(devs[devcnt])); if (strncmp(buf, (char *)umsg->body.kvp_ip_val.adapter_id, 39) == 0) { strcpy((char *)umsg->body.kvp_ip_val.adapter_id, if_name); break; } } free(devs, M_TEMP); } /* Address Family , DHCP , SUBNET, Gateway, DNS */ umsg->kvp_hdr.operation = host_ip_msg->operation; umsg->body.kvp_ip_val.addr_family = host_ip_msg->kvp_ip_val.addr_family; umsg->body.kvp_ip_val.dhcp_enabled = host_ip_msg->kvp_ip_val.dhcp_enabled; utf16_to_utf8((char *)umsg->body.kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE, UNUSED_FLAG, &err_subnet); utf16_to_utf8((char *)umsg->body.kvp_ip_val.gate_way, MAX_GATEWAY_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.gate_way, MAX_GATEWAY_SIZE, UNUSED_FLAG, &err_gway); utf16_to_utf8((char *)umsg->body.kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE, UNUSED_FLAG, &err_dns); return (err_ip | err_subnet | err_gway | err_dns | err_adap); } /* * Prepare a user kvp msg based on host kvp msg (utf16 to utf8) * Ensure utf16_utf8 takes care of the additional string terminating char!! */ static void hv_kvp_convert_hostmsg_to_usermsg(void) { int utf_err = 0; uint32_t value_type; struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *) kvp_globals.host_kvp_msg; struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg; struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg; memset(umsg, 0, sizeof(struct hv_kvp_msg)); umsg->kvp_hdr.operation = hmsg->kvp_hdr.operation; umsg->kvp_hdr.pool = hmsg->kvp_hdr.pool; switch (umsg->kvp_hdr.operation) { case HV_KVP_OP_SET_IP_INFO: hv_kvp_convert_utf16_ipinfo_to_utf8(host_ip_msg, umsg); break; case HV_KVP_OP_GET_IP_INFO: utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, 1, &utf_err); umsg->body.kvp_ip_val.addr_family = host_ip_msg->kvp_ip_val.addr_family; break; case HV_KVP_OP_SET: value_type = hmsg->body.kvp_set.data.value_type; switch (value_type) { case HV_REG_SZ: umsg->body.kvp_set.data.value_size = utf16_to_utf8( (char *)umsg->body.kvp_set.data.msg_value.value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1, (uint16_t *)hmsg->body.kvp_set.data.msg_value.value, hmsg->body.kvp_set.data.value_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_set.data.value_size = umsg->body.kvp_set.data.value_size / 2; break; case HV_REG_U32: umsg->body.kvp_set.data.value_size = sprintf(umsg->body.kvp_set.data.msg_value.value, "%d", hmsg->body.kvp_set.data.msg_value.value_u32) + 1; break; case HV_REG_U64: umsg->body.kvp_set.data.value_size = sprintf(umsg->body.kvp_set.data.msg_value.value, "%llu", (unsigned long long) hmsg->body.kvp_set.data.msg_value.value_u64) + 1; break; } umsg->body.kvp_set.data.key_size = utf16_to_utf8( umsg->body.kvp_set.data.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1, (uint16_t *)hmsg->body.kvp_set.data.key, hmsg->body.kvp_set.data.key_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_set.data.key_size = umsg->body.kvp_set.data.key_size / 2; break; case HV_KVP_OP_GET: umsg->body.kvp_get.data.key_size = utf16_to_utf8(umsg->body.kvp_get.data.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1, (uint16_t *)hmsg->body.kvp_get.data.key, hmsg->body.kvp_get.data.key_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_get.data.key_size = umsg->body.kvp_get.data.key_size / 2; break; case HV_KVP_OP_DELETE: umsg->body.kvp_delete.key_size = utf16_to_utf8(umsg->body.kvp_delete.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1, (uint16_t *)hmsg->body.kvp_delete.key, hmsg->body.kvp_delete.key_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_delete.key_size = umsg->body.kvp_delete.key_size / 2; break; case HV_KVP_OP_ENUMERATE: umsg->body.kvp_enum_data.index = hmsg->body.kvp_enum_data.index; break; default: hv_kvp_log_info("%s: daemon_kvp_msg: Invalid operation : %d\n", __func__, umsg->kvp_hdr.operation); } } /* * Prepare a host kvp msg based on user kvp msg (utf8 to utf16) */ static int hv_kvp_convert_usermsg_to_hostmsg(void) { int hkey_len = 0, hvalue_len = 0, utf_err = 0; struct hv_kvp_exchg_msg_value *host_exchg_data; char *key_name, *value; struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg; struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg; struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)hmsg; switch (hmsg->kvp_hdr.operation) { case HV_KVP_OP_GET_IP_INFO: return (hv_kvp_convert_utf8_ipinfo_to_utf16(umsg, host_ip_msg)); case HV_KVP_OP_SET_IP_INFO: case HV_KVP_OP_SET: case HV_KVP_OP_DELETE: return (KVP_SUCCESS); case HV_KVP_OP_ENUMERATE: host_exchg_data = &hmsg->body.kvp_enum_data.data; key_name = umsg->body.kvp_enum_data.data.key; hkey_len = utf8_to_utf16((uint16_t *)host_exchg_data->key, ((HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2) - 2), key_name, strlen(key_name), 1, &utf_err); /* utf16 encoding */ host_exchg_data->key_size = 2 * (hkey_len + 1); value = umsg->body.kvp_enum_data.data.msg_value.value; hvalue_len = utf8_to_utf16( (uint16_t *)host_exchg_data->msg_value.value, ((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2), value, strlen(value), 1, &utf_err); host_exchg_data->value_size = 2 * (hvalue_len + 1); host_exchg_data->value_type = HV_REG_SZ; if ((hkey_len < 0) || (hvalue_len < 0)) return (HV_KVP_E_FAIL); return (KVP_SUCCESS); case HV_KVP_OP_GET: host_exchg_data = &hmsg->body.kvp_get.data; value = umsg->body.kvp_get.data.msg_value.value; hvalue_len = utf8_to_utf16( (uint16_t *)host_exchg_data->msg_value.value, ((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2), value, strlen(value), 1, &utf_err); /* Convert value size to uft16 */ host_exchg_data->value_size = 2 * (hvalue_len + 1); /* Use values by string */ host_exchg_data->value_type = HV_REG_SZ; if ((hkey_len < 0) || (hvalue_len < 0)) return (HV_KVP_E_FAIL); return (KVP_SUCCESS); default: return (HV_KVP_E_FAIL); } } /* * Send the response back to the host. */ static void hv_kvp_respond_host(int error) { struct hv_vmbus_icmsg_hdr *hv_icmsg_hdrp; hv_icmsg_hdrp = (struct hv_vmbus_icmsg_hdr *) &kvp_globals.rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)]; if (error) error = HV_KVP_E_FAIL; hv_icmsg_hdrp->status = error; hv_icmsg_hdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; error = hv_vmbus_channel_send_packet(kvp_globals.channelp, kvp_globals.rcv_buf, kvp_globals.host_msg_len, kvp_globals.host_msg_id, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); if (error) hv_kvp_log_info("%s: hv_kvp_respond_host: sendpacket error:%d\n", __func__, error); } /* * This is the main kvp kernel process that interacts with both user daemon * and the host */ static void hv_kvp_send_msg_to_daemon(void) { /* Prepare kvp_msg to be sent to user */ hv_kvp_convert_hostmsg_to_usermsg(); /* Send the msg to user via function deamon_read - setting sema */ sema_post(&kvp_globals.dev_sema); } /* * Function to read the kvp request buffer from host * and interact with daemon */ static void hv_kvp_process_request(void *context) { uint8_t *kvp_buf; hv_vmbus_channel *channel = context; uint32_t recvlen = 0; uint64_t requestid; struct hv_vmbus_icmsg_hdr *icmsghdrp; int ret = 0; uint64_t pending_cnt = 1; hv_kvp_log_info("%s: entering hv_kvp_process_request\n", __func__); kvp_buf = receive_buffer[HV_KVP]; ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE, &recvlen, &requestid); /* * We start counting only after the daemon registers * and therefore there could be requests pending in * the VMBus that are not reflected in pending_cnt. * Therefore we continue reading as long as either of * the below conditions is true. */ while ((pending_cnt>0) || ((ret == 0) && (recvlen > 0))) { if ((ret == 0) && (recvlen>0)) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)]; hv_kvp_transaction_init(recvlen, channel, requestid, kvp_buf); if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf); hv_kvp_respond_host(ret); /* * It is ok to not acquire the mutex before setting * req_in_progress here because negotiation is the * first thing that happens and hence there is no * chance of a race condition. */ kvp_globals.req_in_progress = false; hv_kvp_log_info("%s :version negotiated\n", __func__); } else { if (!kvp_globals.daemon_busy) { hv_kvp_log_info("%s: issuing qury to daemon\n", __func__); mtx_lock(&kvp_globals.pending_mutex); kvp_globals.req_timed_out = false; kvp_globals.daemon_busy = true; mtx_unlock(&kvp_globals.pending_mutex); hv_kvp_send_msg_to_daemon(); hv_kvp_log_info("%s: waiting for daemon\n", __func__); } /* Wait 5 seconds for daemon to respond back */ tsleep(&kvp_globals, 0, "kvpworkitem", 5 * hz); hv_kvp_log_info("%s: came out of wait\n", __func__); } } mtx_lock(&kvp_globals.pending_mutex); /* Notice that once req_timed_out is set to true * it will remain true until the next request is * sent to the daemon. The response from daemon * is forwarded to host only when this flag is * false. */ kvp_globals.req_timed_out = true; /* * Cancel request if so need be. */ if (hv_kvp_req_in_progress()) { hv_kvp_log_info("%s: request was still active after wait so failing\n", __func__); hv_kvp_respond_host(HV_KVP_E_FAIL); kvp_globals.req_in_progress = false; } /* * Decrement pending request count and */ if (kvp_globals.pending_reqs>0) { kvp_globals.pending_reqs = kvp_globals.pending_reqs - 1; } pending_cnt = kvp_globals.pending_reqs; mtx_unlock(&kvp_globals.pending_mutex); /* * Try reading next buffer */ recvlen = 0; ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE, &recvlen, &requestid); - hv_kvp_log_info("%s: read: context %p, pending_cnt %ju ret =%d, recvlen=%d\n", - __func__, context, pending_cnt, ret, recvlen); + hv_kvp_log_info("%s: read: context %p, pending_cnt %llu ret =%d, recvlen=%d\n", + __func__, context, (unsigned long long)pending_cnt, ret, recvlen); } } /* * Callback routine that gets called whenever there is a message from host */ void hv_kvp_callback(void *context) { uint64_t pending_cnt = 0; if (kvp_globals.register_done == false) { kvp_globals.channelp = context; } else { mtx_lock(&kvp_globals.pending_mutex); kvp_globals.pending_reqs = kvp_globals.pending_reqs + 1; pending_cnt = kvp_globals.pending_reqs; mtx_unlock(&kvp_globals.pending_mutex); if (pending_cnt == 1) { hv_kvp_log_info("%s: Queuing work item\n", __func__); hv_queue_work_item( service_table[HV_KVP].work_queue, hv_kvp_process_request, context ); } } } /* * This function is called by the hv_kvp_init - * creates character device hv_kvp_dev * allocates memory to hv_kvp_dev_buf * */ static int hv_kvp_dev_init(void) { int error = 0; /* initialize semaphore */ sema_init(&kvp_globals.dev_sema, 0, "hv_kvp device semaphore"); /* create character device */ error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &hv_kvp_dev, &hv_kvp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0640, "hv_kvp_dev"); if (error != 0) return (error); /* * Malloc with M_WAITOK flag will never fail. */ hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_HV_KVP_DEV_BUF, M_WAITOK | M_ZERO); return (0); } /* * This function is called by the hv_kvp_deinit - * destroy character device */ static void hv_kvp_dev_destroy(void) { - if (daemon_task != NULL) { + if (daemon_task != NULL) { PROC_LOCK(daemon_task); - kern_psignal(daemon_task, SIGKILL); + kern_psignal(daemon_task, SIGKILL); PROC_UNLOCK(daemon_task); } destroy_dev(hv_kvp_dev); free(hv_kvp_dev_buf, M_HV_KVP_DEV_BUF); return; } static int hv_kvp_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { hv_kvp_log_info("%s: Opened device \"hv_kvp_device\" successfully.\n", __func__); if (kvp_globals.dev_accessed) return (-EBUSY); daemon_task = curproc; kvp_globals.dev_accessed = true; kvp_globals.daemon_busy = false; return (0); } static int hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused, struct thread *td __unused) { hv_kvp_log_info("%s: Closing device \"hv_kvp_device\".\n", __func__); kvp_globals.dev_accessed = false; kvp_globals.register_done = false; return (0); } /* * hv_kvp_daemon read invokes this function * acts as a send to daemon */ static int hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __unused) { size_t amt; int error = 0; /* Check hv_kvp daemon registration status*/ if (!kvp_globals.register_done) return (KVP_ERROR); sema_wait(&kvp_globals.dev_sema); memcpy(hv_kvp_dev_buf, &kvp_globals.daemon_kvp_msg, sizeof(struct hv_kvp_msg)); amt = MIN(uio->uio_resid, uio->uio_offset >= BUFFERSIZE + 1 ? 0 : BUFFERSIZE + 1 - uio->uio_offset); if ((error = uiomove(hv_kvp_dev_buf, amt, uio)) != 0) hv_kvp_log_info("%s: hv_kvp uiomove read failed!\n", __func__); return (error); } /* * hv_kvp_daemon write invokes this function * acts as a recieve from daemon */ static int hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag __unused) { size_t amt; int error = 0; uio->uio_offset = 0; amt = MIN(uio->uio_resid, BUFFERSIZE); error = uiomove(hv_kvp_dev_buf, amt, uio); if (error != 0) return (error); memcpy(&kvp_globals.daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg)); if (kvp_globals.register_done == false) { if (kvp_globals.daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) { kvp_globals.register_done = true; if (kvp_globals.channelp) { hv_kvp_callback(kvp_globals.channelp); } } else { hv_kvp_log_info("%s, KVP Registration Failed\n", __func__); return (KVP_ERROR); } } else { mtx_lock(&kvp_globals.pending_mutex); if(!kvp_globals.req_timed_out) { hv_kvp_convert_usermsg_to_hostmsg(); hv_kvp_respond_host(KVP_SUCCESS); wakeup(&kvp_globals); kvp_globals.req_in_progress = false; } kvp_globals.daemon_busy = false; mtx_unlock(&kvp_globals.pending_mutex); } return (error); } /* * hv_kvp_daemon poll invokes this function to check if data is available * for daemon to read. */ static int hv_kvp_dev_daemon_poll(struct cdev *dev __unused, int events, struct thread *td __unused) { int revents = 0; mtx_lock(&kvp_globals.pending_mutex); /* * We check global flag daemon_busy for the data availiability for * userland to read. Deamon_busy is set to true before driver has data * for daemon to read. It is set to false after daemon sends * then response back to driver. */ if (kvp_globals.daemon_busy == true) revents = POLLIN; mtx_unlock(&kvp_globals.pending_mutex); return (revents); } /* * hv_kvp initialization function * called from hv_util service. * */ int hv_kvp_init(hv_vmbus_service *srv) { int error = 0; hv_work_queue *work_queue = NULL; memset(&kvp_globals, 0, sizeof(kvp_globals)); work_queue = hv_work_queue_create("KVP Service"); if (work_queue == NULL) { hv_kvp_log_info("%s: Work queue alloc failed\n", __func__); error = ENOMEM; hv_kvp_log_error("%s: ENOMEM\n", __func__); goto Finish; } srv->work_queue = work_queue; error = hv_kvp_dev_init(); mtx_init(&kvp_globals.pending_mutex, "hv-kvp pending mutex", NULL, MTX_DEF); kvp_globals.pending_reqs = 0; Finish: return (error); } void hv_kvp_deinit(void) { hv_kvp_dev_destroy(); mtx_destroy(&kvp_globals.pending_mutex); return; } Index: head/sys/dev/hyperv/utilities/hv_util.c =================================================================== --- head/sys/dev/hyperv/utilities/hv_util.c (revision 282211) +++ head/sys/dev/hyperv/utilities/hv_util.c (revision 282212) @@ -1,491 +1,500 @@ /*- * Copyright (c) 2014 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * A common driver for all hyper-V util services. */ #include #include #include #include #include #include #include #include #include #include "hv_kvp.h" /* Time Sync data */ typedef struct { uint64_t data; } time_sync_data; static void hv_shutdown_cb(void *context); static void hv_heartbeat_cb(void *context); static void hv_timesync_cb(void *context); static int hv_timesync_init(hv_vmbus_service *serv); /* * Note: GUID codes below are predefined by the host hypervisor * (Hyper-V and Azure)interface and required for correct operation. */ hv_vmbus_service service_table[] = { /* Shutdown Service */ { .guid.data = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49, 0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB}, .name = "Hyper-V Shutdown Service\n", .enabled = TRUE, .callback = hv_shutdown_cb, }, /* Time Synch Service */ { .guid.data = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf}, .name = "Hyper-V Time Synch Service\n", .enabled = TRUE, .init = hv_timesync_init, .callback = hv_timesync_cb, }, /* Heartbeat Service */ { .guid.data = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d}, .name = "Hyper-V Heartbeat Service\n", .enabled = TRUE, .callback = hv_heartbeat_cb, }, /* KVP (Key Value Pair) Service */ { .guid.data = {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6}, .name = "Hyper-V KVP Service\n", .enabled = TRUE, .init = hv_kvp_init, .callback = hv_kvp_callback, }, }; /* * Receive buffer pointers. There is one buffer per utility service. The * buffer is allocated during attach(). */ uint8_t *receive_buffer[HV_MAX_UTIL_SERVICES]; static boolean_t destroyed_kvp = FALSE; struct hv_ictimesync_data { uint64_t parenttime; uint64_t childtime; uint64_t roundtriptime; uint8_t flags; } __packed; static int hv_timesync_init(hv_vmbus_service *serv) { serv->work_queue = hv_work_queue_create("Time Sync"); if (serv->work_queue == NULL) return (ENOMEM); return (0); } static void hv_negotiate_version( struct hv_vmbus_icmsg_hdr* icmsghdrp, struct hv_vmbus_icmsg_negotiate* negop, uint8_t* buf) { icmsghdrp->icmsgsize = 0x10; negop = (struct hv_vmbus_icmsg_negotiate *)&buf[ sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; if (negop->icframe_vercnt >= 2 && negop->icversion_data[1].major == 3) { negop->icversion_data[0].major = 3; negop->icversion_data[0].minor = 0; negop->icversion_data[1].major = 3; negop->icversion_data[1].minor = 0; } else { negop->icversion_data[0].major = 1; negop->icversion_data[0].minor = 0; negop->icversion_data[1].major = 1; negop->icversion_data[1].minor = 0; } negop->icframe_vercnt = 1; negop->icmsg_vercnt = 1; } /** * Set host time based on time sync message from host */ static void hv_set_host_time(void *context) { time_sync_data* time_msg = (time_sync_data*) context; uint64_t hosttime = time_msg->data; struct timespec guest_ts, host_ts; uint64_t host_tns; int64_t diff; int error; host_tns = (hosttime - HV_WLTIMEDELTA) * 100; host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC); host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC); nanotime(&guest_ts); diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec; /* * If host differs by 5 seconds then make the guest catch up */ if (diff > 5 || diff < -5) { error = kern_clock_settime(curthread, CLOCK_REALTIME, &host_ts); } /* * Free the hosttime that was allocated in hv_adj_guesttime() */ free(time_msg, M_DEVBUF); } /** * @brief Synchronize time with host after reboot, restore, etc. * * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM. * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time * message after the timesync channel is opened. Since the hv_utils module is * loaded after hv_vmbus, the first message is usually missed. The other * thing is, systime is automatically set to emulated hardware clock which may * not be UTC time or in the same time zone. So, to override these effects, we * use the first 50 time samples for initial system time setting. */ static inline void hv_adj_guesttime(uint64_t hosttime, uint8_t flags) { time_sync_data* time_msg; time_msg = malloc(sizeof(time_sync_data), M_DEVBUF, M_NOWAIT); if (time_msg == NULL) return; time_msg->data = hosttime; if ((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) { hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue, hv_set_host_time, time_msg); } else if ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0) { hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue, hv_set_host_time, time_msg); } else { free(time_msg, M_DEVBUF); } } /** * Time Sync Channel message handler */ static void hv_timesync_cb(void *context) { hv_vmbus_channel* channel = context; hv_vmbus_icmsg_hdr* icmsghdrp; uint32_t recvlen; uint64_t requestId; int ret; uint8_t* time_buf; struct hv_ictimesync_data* timedatap; time_buf = receive_buffer[HV_TIME_SYNCH]; ret = hv_vmbus_channel_recv_packet(channel, time_buf, PAGE_SIZE, &recvlen, &requestId); if ((ret == 0) && recvlen > 0) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[ sizeof(struct hv_vmbus_pipe_hdr)]; if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_negotiate_version(icmsghdrp, NULL, time_buf); } else { timedatap = (struct hv_ictimesync_data *) &time_buf[ sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; hv_adj_guesttime(timedatap->parenttime, timedatap->flags); } icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; hv_vmbus_channel_send_packet(channel, time_buf, recvlen, requestId, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); } } /** * Shutdown */ static void hv_shutdown_cb(void *context) { uint8_t* buf; hv_vmbus_channel* channel = context; uint8_t execute_shutdown = 0; hv_vmbus_icmsg_hdr* icmsghdrp; uint32_t recv_len; uint64_t request_id; int ret; hv_vmbus_shutdown_msg_data* shutdown_msg; buf = receive_buffer[HV_SHUT_DOWN]; ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recv_len, &request_id); if ((ret == 0) && recv_len > 0) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &buf[sizeof(struct hv_vmbus_pipe_hdr)]; if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_negotiate_version(icmsghdrp, NULL, buf); } else { shutdown_msg = (struct hv_vmbus_shutdown_msg_data *) &buf[sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; switch (shutdown_msg->flags) { case 0: case 1: icmsghdrp->status = HV_S_OK; execute_shutdown = 1; if(bootverbose) printf("Shutdown request received -" " graceful shutdown initiated\n"); break; default: icmsghdrp->status = HV_E_FAIL; execute_shutdown = 0; printf("Shutdown request received -" " Invalid request\n"); break; } } icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; hv_vmbus_channel_send_packet(channel, buf, recv_len, request_id, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); } if (execute_shutdown) shutdown_nice(RB_POWEROFF); } /** * Process heartbeat message */ static void hv_heartbeat_cb(void *context) { uint8_t* buf; hv_vmbus_channel* channel = context; uint32_t recvlen; uint64_t requestid; int ret; struct hv_vmbus_heartbeat_msg_data* heartbeat_msg; struct hv_vmbus_icmsg_hdr* icmsghdrp; buf = receive_buffer[HV_HEART_BEAT]; ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen, &requestid); if ((ret == 0) && recvlen > 0) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &buf[sizeof(struct hv_vmbus_pipe_hdr)]; if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_negotiate_version(icmsghdrp, NULL, buf); } else { heartbeat_msg = (struct hv_vmbus_heartbeat_msg_data *) &buf[sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; heartbeat_msg->seq_num += 1; } icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); } } static int hv_util_probe(device_t dev) { int i; int rtn_value = ENXIO; for (i = 0; i < HV_MAX_UTIL_SERVICES; i++) { const char *p = vmbus_get_type(dev); if (service_table[i].enabled && !memcmp(p, &service_table[i].guid, sizeof(hv_guid))) { device_set_softc(dev, (void *) (&service_table[i])); rtn_value = BUS_PROBE_DEFAULT; } } return rtn_value; } static int hv_util_attach(device_t dev) { struct hv_device* hv_dev; struct hv_vmbus_service* service; int ret; size_t receive_buffer_offset; hv_dev = vmbus_get_devctx(dev); service = device_get_softc(dev); receive_buffer_offset = service - &service_table[0]; device_printf(dev, "Hyper-V Service attaching: %s\n", service->name); receive_buffer[receive_buffer_offset] = malloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); if (service->init != NULL) { ret = service->init(service); if (ret) { ret = ENODEV; goto error0; } } + /* + * These services are not performance critical and do not need + * batched reading. Furthermore, some services such as KVP can + * only handle one message from the host at a time. + * Turn off batched reading for all util drivers before we open the + * channel. + */ + hv_set_channel_read_state(hv_dev->channel, FALSE); + ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0, service->callback, hv_dev->channel); if (ret) goto error0; return (0); error0: free(receive_buffer[receive_buffer_offset], M_DEVBUF); receive_buffer[receive_buffer_offset] = NULL; return (ret); } static int hv_util_detach(device_t dev) { struct hv_device* hv_dev; struct hv_vmbus_service* service; size_t receive_buffer_offset; if (!destroyed_kvp) { hv_kvp_deinit(); destroyed_kvp = TRUE; } hv_dev = vmbus_get_devctx(dev); hv_vmbus_channel_close(hv_dev->channel); service = device_get_softc(dev); receive_buffer_offset = service - &service_table[0]; if (service->work_queue != NULL) hv_work_queue_close(service->work_queue); free(receive_buffer[receive_buffer_offset], M_DEVBUF); receive_buffer[receive_buffer_offset] = NULL; return (0); } static void hv_util_init(void) { } static int hv_util_modevent(module_t mod, int event, void *arg) { switch (event) { case MOD_LOAD: break; case MOD_UNLOAD: break; default: break; } return (0); } static device_method_t util_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hv_util_probe), DEVMETHOD(device_attach, hv_util_attach), DEVMETHOD(device_detach, hv_util_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), { 0, 0 } } ; static driver_t util_driver = { "hyperv-utils", util_methods, 0 }; static devclass_t util_devclass; DRIVER_MODULE(hv_utils, vmbus, util_driver, util_devclass, hv_util_modevent, 0); MODULE_VERSION(hv_utils, 1); MODULE_DEPEND(hv_utils, vmbus, 1, 1, 1); SYSINIT(hv_util_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1, hv_util_init, NULL); Index: head/sys/dev/hyperv/vmbus/hv_channel.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel.c (revision 282211) +++ head/sys/dev/hyperv/vmbus/hv_channel.c (revision 282212) @@ -1,845 +1,881 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" static int vmbus_channel_create_gpadl_header( /* must be phys and virt contiguous*/ void* contig_buffer, /* page-size multiple */ uint32_t size, hv_vmbus_channel_msg_info** msg_info, uint32_t* message_count); static void vmbus_channel_set_event(hv_vmbus_channel* channel); /** * @brief Trigger an event notification on the specified channel */ static void vmbus_channel_set_event(hv_vmbus_channel *channel) { hv_vmbus_monitor_page *monitor_page; if (channel->offer_msg.monitor_allocated) { /* Each uint32_t represents 32 channels */ synch_set_bit((channel->offer_msg.child_rel_id & 31), ((uint32_t *)hv_vmbus_g_connection.send_interrupt_page + ((channel->offer_msg.child_rel_id >> 5)))); monitor_page = (hv_vmbus_monitor_page *) hv_vmbus_g_connection.monitor_pages; monitor_page++; /* Get the child to parent monitor page */ synch_set_bit(channel->monitor_bit, (uint32_t *)&monitor_page-> trigger_group[channel->monitor_group].u.pending); } else { - hv_vmbus_set_event(channel->offer_msg.child_rel_id); + hv_vmbus_set_event(channel); } } /** * @brief Open the specified channel */ int hv_vmbus_channel_open( hv_vmbus_channel* new_channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, hv_vmbus_pfn_channel_callback pfn_on_channel_callback, void* context) { int ret = 0; void *in, *out; hv_vmbus_channel_open_channel* open_msg; hv_vmbus_channel_msg_info* open_info; + mtx_lock(&new_channel->sc_lock); + if (new_channel->state == HV_CHANNEL_OPEN_STATE) { + new_channel->state = HV_CHANNEL_OPENING_STATE; + } else { + mtx_unlock(&new_channel->sc_lock); + if(bootverbose) + printf("VMBUS: Trying to open channel <%p> which in " + "%d state.\n", new_channel, new_channel->state); + return (EINVAL); + } + mtx_unlock(&new_channel->sc_lock); + new_channel->on_channel_callback = pfn_on_channel_callback; new_channel->channel_callback_context = context; /* Allocate the ring buffer */ out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size), M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); KASSERT(out != NULL, ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!")); if (out == NULL) return (ENOMEM); in = ((uint8_t *) out + send_ring_buffer_size); new_channel->ring_buffer_pages = out; new_channel->ring_buffer_page_count = (send_ring_buffer_size + recv_ring_buffer_size) >> PAGE_SHIFT; new_channel->ring_buffer_size = send_ring_buffer_size + recv_ring_buffer_size; hv_vmbus_ring_buffer_init( &new_channel->outbound, out, send_ring_buffer_size); hv_vmbus_ring_buffer_init( &new_channel->inbound, in, recv_ring_buffer_size); /** * Establish the gpadl for the ring buffer */ new_channel->ring_buffer_gpadl_handle = 0; ret = hv_vmbus_channel_establish_gpadl(new_channel, new_channel->outbound.ring_buffer, send_ring_buffer_size + recv_ring_buffer_size, &new_channel->ring_buffer_gpadl_handle); /** * Create and init the channel open message */ open_info = (hv_vmbus_channel_msg_info*) malloc( sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_open_channel), M_DEVBUF, M_NOWAIT); KASSERT(open_info != NULL, ("Error VMBUS: malloc failed to allocate Open Channel message!")); if (open_info == NULL) return (ENOMEM); sema_init(&open_info->wait_sema, 0, "Open Info Sema"); open_msg = (hv_vmbus_channel_open_channel*) open_info->msg; open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL; open_msg->open_id = new_channel->offer_msg.child_rel_id; open_msg->child_rel_id = new_channel->offer_msg.child_rel_id; open_msg->ring_buffer_gpadl_handle = new_channel->ring_buffer_gpadl_handle; open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size >> PAGE_SHIFT; - open_msg->server_context_area_gpadl_handle = 0; + open_msg->target_vcpu = new_channel->target_vcpu; if (user_data_len) memcpy(open_msg->user_data, user_data, user_data_len); mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_msg_anchor, open_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); ret = hv_vmbus_post_message( open_msg, sizeof(hv_vmbus_channel_open_channel)); if (ret != 0) goto cleanup; ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */ - if (ret) + if (ret) { + if(bootverbose) + printf("VMBUS: channel <%p> open timeout.\n", new_channel); goto cleanup; + } if (open_info->response.open_result.status == 0) { + new_channel->state = HV_CHANNEL_OPENED_STATE; if(bootverbose) printf("VMBUS: channel <%p> open success.\n", new_channel); } else { if(bootverbose) printf("Error VMBUS: channel <%p> open failed - %d!\n", new_channel, open_info->response.open_result.status); } cleanup: mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE( &hv_vmbus_g_connection.channel_msg_anchor, open_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); sema_destroy(&open_info->wait_sema); free(open_info, M_DEVBUF); return (ret); } /** * @brief Create a gpadl for the specified buffer */ static int vmbus_channel_create_gpadl_header( void* contig_buffer, uint32_t size, /* page-size multiple */ hv_vmbus_channel_msg_info** msg_info, uint32_t* message_count) { int i; int page_count; unsigned long long pfn; uint32_t msg_size; hv_vmbus_channel_gpadl_header* gpa_header; hv_vmbus_channel_gpadl_body* gpadl_body; hv_vmbus_channel_msg_info* msg_header; hv_vmbus_channel_msg_info* msg_body; int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize; page_count = size >> PAGE_SHIFT; pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT; /*do we need a gpadl body msg */ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE - sizeof(hv_vmbus_channel_gpadl_header) - sizeof(hv_gpa_range); pfnCount = pfnSize / sizeof(uint64_t); if (page_count > pfnCount) { /* if(we need a gpadl body) */ /* fill in the header */ msg_size = sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_header) + sizeof(hv_gpa_range) + pfnCount * sizeof(uint64_t); msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT( msg_header != NULL, ("Error VMBUS: malloc failed to allocate Gpadl Message!")); if (msg_header == NULL) return (ENOMEM); TAILQ_INIT(&msg_header->sub_msg_list_anchor); msg_header->message_size = msg_size; gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg; gpa_header->range_count = 1; gpa_header->range_buf_len = sizeof(hv_gpa_range) + page_count * sizeof(uint64_t); gpa_header->range[0].byte_offset = 0; gpa_header->range[0].byte_count = size; for (i = 0; i < pfnCount; i++) { gpa_header->range[0].pfn_array[i] = pfn + i; } *msg_info = msg_header; *message_count = 1; pfnSum = pfnCount; pfnLeft = page_count - pfnCount; /* * figure out how many pfns we can fit */ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE - sizeof(hv_vmbus_channel_gpadl_body); pfnCount = pfnSize / sizeof(uint64_t); /* * fill in the body */ while (pfnLeft) { if (pfnLeft > pfnCount) { pfnCurr = pfnCount; } else { pfnCurr = pfnLeft; } msg_size = sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_body) + pfnCurr * sizeof(uint64_t); msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT( msg_body != NULL, ("Error VMBUS: malloc failed to allocate Gpadl msg_body!")); if (msg_body == NULL) return (ENOMEM); msg_body->message_size = msg_size; (*message_count)++; gpadl_body = (hv_vmbus_channel_gpadl_body*) msg_body->msg; /* * gpadl_body->gpadl = kbuffer; */ for (i = 0; i < pfnCurr; i++) { gpadl_body->pfn[i] = pfn + pfnSum + i; } TAILQ_INSERT_TAIL( &msg_header->sub_msg_list_anchor, msg_body, msg_list_entry); pfnSum += pfnCurr; pfnLeft -= pfnCurr; } } else { /* else everything fits in a header */ msg_size = sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_header) + sizeof(hv_gpa_range) + page_count * sizeof(uint64_t); msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT( msg_header != NULL, ("Error VMBUS: malloc failed to allocate Gpadl Message!")); if (msg_header == NULL) return (ENOMEM); msg_header->message_size = msg_size; gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg; gpa_header->range_count = 1; gpa_header->range_buf_len = sizeof(hv_gpa_range) + page_count * sizeof(uint64_t); gpa_header->range[0].byte_offset = 0; gpa_header->range[0].byte_count = size; for (i = 0; i < page_count; i++) { gpa_header->range[0].pfn_array[i] = pfn + i; } *msg_info = msg_header; *message_count = 1; } return (0); } /** * @brief Establish a GPADL for the specified buffer */ int hv_vmbus_channel_establish_gpadl( hv_vmbus_channel* channel, void* contig_buffer, uint32_t size, /* page-size multiple */ uint32_t* gpadl_handle) { int ret = 0; hv_vmbus_channel_gpadl_header* gpadl_msg; hv_vmbus_channel_gpadl_body* gpadl_body; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_info* sub_msg_info; uint32_t msg_count; hv_vmbus_channel_msg_info* curr; uint32_t next_gpadl_handle; next_gpadl_handle = hv_vmbus_g_connection.next_gpadl_handle; atomic_add_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1); ret = vmbus_channel_create_gpadl_header( contig_buffer, size, &msg_info, &msg_count); if(ret != 0) { /* if(allocation failed) return immediately */ /* reverse atomic_add_int above */ atomic_subtract_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1); return ret; } sema_init(&msg_info->wait_sema, 0, "Open Info Sema"); gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg; gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER; gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id; gpadl_msg->gpadl = next_gpadl_handle; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_msg_anchor, msg_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); ret = hv_vmbus_post_message( gpadl_msg, msg_info->message_size - (uint32_t) sizeof(hv_vmbus_channel_msg_info)); if (ret != 0) goto cleanup; if (msg_count > 1) { TAILQ_FOREACH(curr, &msg_info->sub_msg_list_anchor, msg_list_entry) { sub_msg_info = curr; gpadl_body = (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg; gpadl_body->header.message_type = HV_CHANNEL_MESSAGE_GPADL_BODY; gpadl_body->gpadl = next_gpadl_handle; ret = hv_vmbus_post_message( gpadl_body, sub_msg_info->message_size - (uint32_t) sizeof(hv_vmbus_channel_msg_info)); /* if (the post message failed) give up and clean up */ if(ret != 0) goto cleanup; } } ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds*/ if (ret != 0) goto cleanup; *gpadl_handle = gpadl_msg->gpadl; cleanup: mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor, msg_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); return (ret); } /** * @brief Teardown the specified GPADL handle */ int hv_vmbus_channel_teardown_gpdal( hv_vmbus_channel* channel, uint32_t gpadl_handle) { int ret = 0; hv_vmbus_channel_gpadl_teardown* msg; hv_vmbus_channel_msg_info* info; info = (hv_vmbus_channel_msg_info *) malloc( sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_teardown), M_DEVBUF, M_NOWAIT); KASSERT(info != NULL, ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!")); if (info == NULL) { ret = ENOMEM; goto cleanup; } sema_init(&info->wait_sema, 0, "Open Info Sema"); msg = (hv_vmbus_channel_gpadl_teardown*) info->msg; msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN; msg->child_rel_id = channel->offer_msg.child_rel_id; msg->gpadl = gpadl_handle; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor, info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_gpadl_teardown)); if (ret != 0) goto cleanup; ret = sema_timedwait(&info->wait_sema, 500); /* KYS 5 seconds */ cleanup: /* * Received a torndown response */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor, info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); sema_destroy(&info->wait_sema); free(info, M_DEVBUF); return (ret); } -/** - * @brief Close the specified channel - */ -void -hv_vmbus_channel_close(hv_vmbus_channel *channel) +static void +hv_vmbus_channel_close_internal(hv_vmbus_channel *channel) { int ret = 0; hv_vmbus_channel_close_channel* msg; hv_vmbus_channel_msg_info* info; + channel->state = HV_CHANNEL_OPEN_STATE; + channel->sc_creation_callback = NULL; + + /* + * Grab the lock to prevent race condition when a packet received + * and unloading driver is in the process. + */ mtx_lock(&channel->inbound_lock); channel->on_channel_callback = NULL; mtx_unlock(&channel->inbound_lock); /** * Send a closing message */ info = (hv_vmbus_channel_msg_info *) malloc( sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_close_channel), M_DEVBUF, M_NOWAIT); KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!")); if(info == NULL) return; msg = (hv_vmbus_channel_close_channel*) info->msg; msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL; msg->child_rel_id = channel->offer_msg.child_rel_id; ret = hv_vmbus_post_message( msg, sizeof(hv_vmbus_channel_close_channel)); /* Tear down the gpadl for the channel's ring buffer */ if (channel->ring_buffer_gpadl_handle) { hv_vmbus_channel_teardown_gpdal(channel, channel->ring_buffer_gpadl_handle); } /* TODO: Send a msg to release the childRelId */ /* cleanup the ring buffers for this channel */ hv_ring_buffer_cleanup(&channel->outbound); hv_ring_buffer_cleanup(&channel->inbound); contigfree(channel->ring_buffer_pages, channel->ring_buffer_size, M_DEVBUF); free(info, M_DEVBUF); +} - /* - * If we are closing the channel during an error path in - * opening the channel, don't free the channel - * since the caller will free the channel - */ - if (channel->state == HV_CHANNEL_OPEN_STATE) { - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); - TAILQ_REMOVE( - &hv_vmbus_g_connection.channel_anchor, - channel, - list_entry); - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); +/** + * @brief Close the specified channel + */ +void +hv_vmbus_channel_close(hv_vmbus_channel *channel) +{ + hv_vmbus_channel* sub_channel; - hv_vmbus_free_vmbus_channel(channel); + if (channel->primary_channel != NULL) { + /* + * We only close multi-channels when the primary is + * closed. + */ + return; } + /* + * Close all multi-channels first. + */ + TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor, + sc_list_entry) { + if (sub_channel->state != HV_CHANNEL_OPENED_STATE) + continue; + hv_vmbus_channel_close_internal(sub_channel); + } + /* + * Then close the primary channel. + */ + hv_vmbus_channel_close_internal(channel); } /** * @brief Send the specified buffer on the given channel */ int hv_vmbus_channel_send_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint64_t request_id, hv_vmbus_packet_type type, uint32_t flags) { int ret = 0; hv_vm_packet_descriptor desc; uint32_t packet_len; uint64_t aligned_data; uint32_t packet_len_aligned; + boolean_t need_sig; hv_vmbus_sg_buffer_list buffer_list[3]; packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len; packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); aligned_data = 0; /* Setup the descriptor */ desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND; */ desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */ /* in 8-bytes granularity */ desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3; desc.length8 = (uint16_t) (packet_len_aligned >> 3); desc.transaction_id = request_id; buffer_list[0].data = &desc; buffer_list[0].length = sizeof(hv_vm_packet_descriptor); buffer_list[1].data = buffer; buffer_list[1].length = buffer_len; buffer_list[2].data = &aligned_data; buffer_list[2].length = packet_len_aligned - packet_len; - ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3); + ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3, + &need_sig); /* TODO: We should determine if this is optional */ - if (ret == 0 - && !hv_vmbus_get_ring_buffer_interrupt_mask( - &channel->outbound)) { + if (ret == 0 && need_sig) { vmbus_channel_set_event(channel); } return (ret); } /** * @brief Send a range of single-page buffer packets using * a GPADL Direct packet type */ int hv_vmbus_channel_send_packet_pagebuffer( hv_vmbus_channel* channel, hv_vmbus_page_buffer page_buffers[], uint32_t page_count, void* buffer, uint32_t buffer_len, uint64_t request_id) { int ret = 0; int i = 0; + boolean_t need_sig; uint32_t packet_len; uint32_t packetLen_aligned; hv_vmbus_sg_buffer_list buffer_list[3]; hv_vmbus_channel_packet_page_buffer desc; uint32_t descSize; uint64_t alignedData = 0; if (page_count > HV_MAX_PAGE_BUFFER_COUNT) return (EINVAL); /* * Adjust the size down since hv_vmbus_channel_packet_page_buffer * is the largest size we support */ descSize = sizeof(hv_vmbus_channel_packet_page_buffer) - ((HV_MAX_PAGE_BUFFER_COUNT - page_count) * sizeof(hv_vmbus_page_buffer)); packet_len = descSize + buffer_len; packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); /* Setup the descriptor */ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT; desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */ desc.length8 = (uint16_t) (packetLen_aligned >> 3); desc.transaction_id = request_id; desc.range_count = page_count; for (i = 0; i < page_count; i++) { desc.range[i].length = page_buffers[i].length; desc.range[i].offset = page_buffers[i].offset; desc.range[i].pfn = page_buffers[i].pfn; } buffer_list[0].data = &desc; buffer_list[0].length = descSize; buffer_list[1].data = buffer; buffer_list[1].length = buffer_len; buffer_list[2].data = &alignedData; buffer_list[2].length = packetLen_aligned - packet_len; - ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3); + ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3, + &need_sig); /* TODO: We should determine if this is optional */ - if (ret == 0 && - !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) { + if (ret == 0 && need_sig) { vmbus_channel_set_event(channel); } return (ret); } /** * @brief Send a multi-page buffer packet using a GPADL Direct packet type */ int hv_vmbus_channel_send_packet_multipagebuffer( hv_vmbus_channel* channel, hv_vmbus_multipage_buffer* multi_page_buffer, void* buffer, uint32_t buffer_len, uint64_t request_id) { int ret = 0; uint32_t desc_size; + boolean_t need_sig; uint32_t packet_len; uint32_t packet_len_aligned; uint32_t pfn_count; uint64_t aligned_data = 0; hv_vmbus_sg_buffer_list buffer_list[3]; hv_vmbus_channel_packet_multipage_buffer desc; pfn_count = HV_NUM_PAGES_SPANNED( multi_page_buffer->offset, multi_page_buffer->length); if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT)) return (EINVAL); /* * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer * is the largest size we support */ desc_size = sizeof(hv_vmbus_channel_packet_multipage_buffer) - ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) * sizeof(uint64_t)); packet_len = desc_size + buffer_len; packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); /* * Setup the descriptor */ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT; desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */ desc.length8 = (uint16_t) (packet_len_aligned >> 3); desc.transaction_id = request_id; desc.range_count = 1; desc.range.length = multi_page_buffer->length; desc.range.offset = multi_page_buffer->offset; memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array, pfn_count * sizeof(uint64_t)); buffer_list[0].data = &desc; buffer_list[0].length = desc_size; buffer_list[1].data = buffer; buffer_list[1].length = buffer_len; buffer_list[2].data = &aligned_data; buffer_list[2].length = packet_len_aligned - packet_len; - ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3); + ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3, + &need_sig); /* TODO: We should determine if this is optional */ - if (ret == 0 && - !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) { + if (ret == 0 && need_sig) { vmbus_channel_set_event(channel); } return (ret); } /** * @brief Retrieve the user packet on the specified channel */ int hv_vmbus_channel_recv_packet( hv_vmbus_channel* channel, void* Buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id) { int ret; uint32_t user_len; uint32_t packet_len; hv_vm_packet_descriptor desc; *buffer_actual_len = 0; *request_id = 0; ret = hv_ring_buffer_peek(&channel->inbound, &desc, sizeof(hv_vm_packet_descriptor)); if (ret != 0) return (0); packet_len = desc.length8 << 3; user_len = packet_len - (desc.data_offset8 << 3); *buffer_actual_len = user_len; if (user_len > buffer_len) return (EINVAL); *request_id = desc.transaction_id; /* Copy over the packet to the user buffer */ ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len, (desc.data_offset8 << 3)); return (0); } /** * @brief Retrieve the raw packet on the specified channel */ int hv_vmbus_channel_recv_packet_raw( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id) { int ret; uint32_t packetLen; uint32_t userLen; hv_vm_packet_descriptor desc; *buffer_actual_len = 0; *request_id = 0; ret = hv_ring_buffer_peek( &channel->inbound, &desc, sizeof(hv_vm_packet_descriptor)); if (ret != 0) return (0); packetLen = desc.length8 << 3; userLen = packetLen - (desc.data_offset8 << 3); *buffer_actual_len = packetLen; if (packetLen > buffer_len) return (ENOBUFS); *request_id = desc.transaction_id; /* Copy over the entire packet to the user buffer */ ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0); return (0); } Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 282211) +++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 282212) @@ -1,680 +1,863 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "hv_vmbus_priv.h" typedef void (*hv_pfn_channel_msg_handler)(hv_vmbus_channel_msg_header* msg); typedef struct hv_vmbus_channel_msg_table_entry { hv_vmbus_channel_msg_type messageType; hv_pfn_channel_msg_handler messageHandler; } hv_vmbus_channel_msg_table_entry; /* * Internal functions */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_process_offer(void *context); +struct hv_vmbus_channel* + vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary); /** * Channel message dispatch table */ hv_vmbus_channel_msg_table_entry g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = { { HV_CHANNEL_MESSAGE_INVALID, NULL }, { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer }, { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER, vmbus_channel_on_offer_rescind }, { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL }, { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED, vmbus_channel_on_offers_delivered }, { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL }, { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT, vmbus_channel_on_open_result }, { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL }, { HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL }, { HV_CHANNEL_MESSAGE_GPADL_BODY, NULL }, { HV_CHANNEL_MESSAGE_GPADL_CREATED, vmbus_channel_on_gpadl_created }, { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL }, { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN, vmbus_channel_on_gpadl_torndown }, { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL }, { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL }, { HV_CHANNEL_MESSAGE_VERSION_RESPONSE, vmbus_channel_on_version_response }, { HV_CHANNEL_MESSAGE_UNLOAD, NULL } }; /** * Implementation of the work abstraction. */ static void work_item_callback(void *work, int pending) { struct hv_work_item *w = (struct hv_work_item *)work; /* * Serialize work execution. */ if (w->wq->work_sema != NULL) { sema_wait(w->wq->work_sema); } w->callback(w->context); if (w->wq->work_sema != NULL) { sema_post(w->wq->work_sema); } free(w, M_DEVBUF); } struct hv_work_queue* hv_work_queue_create(char* name) { static unsigned int qid = 0; char qname[64]; int pri; struct hv_work_queue* wq; wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n")); if (wq == NULL) return (NULL); /* * We use work abstraction to handle messages * coming from the host and these are typically offers. * Some FreeBsd drivers appear to have a concurrency issue * where probe/attach needs to be serialized. We ensure that * by having only one thread process work elements in a * specific queue by serializing work execution. * */ if (strcmp(name, "vmbusQ") == 0) { pri = PI_DISK; } else { /* control */ pri = PI_NET; /* * Initialize semaphore for this queue by pointing * to the globale semaphore used for synchronizing all * control messages. */ wq->work_sema = &hv_vmbus_g_connection.control_sema; } sprintf(qname, "hv_%s_%u", name, qid); /* * Fixme: FreeBSD 8.2 has a different prototype for * taskqueue_create(), and for certain other taskqueue functions. * We need to research the implications of these changes. * Fixme: Not sure when the changes were introduced. */ wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue, &wq->queue #if __FreeBSD_version < 800000 , &wq->proc #endif ); if (wq->queue == NULL) { free(wq, M_DEVBUF); return (NULL); } if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) { taskqueue_free(wq->queue); free(wq, M_DEVBUF); return (NULL); } qid++; return (wq); } void hv_work_queue_close(struct hv_work_queue *wq) { /* * KYS: Need to drain the taskqueue * before we close the hv_work_queue. */ /*KYS: taskqueue_drain(wq->tq, ); */ taskqueue_free(wq->queue); free(wq, M_DEVBUF); } /** * @brief Create work item */ int hv_queue_work_item( struct hv_work_queue *wq, void (*callback)(void *), void *context) { struct hv_work_item *w = malloc(sizeof(struct hv_work_item), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n")); if (w == NULL) return (ENOMEM); w->callback = callback; w->context = context; w->wq = wq; TASK_INIT(&w->work, 0, work_item_callback, w); return (taskqueue_enqueue(wq->queue, &w->work)); } /** * @brief Rescind the offer by initiating a device removal */ static void vmbus_channel_process_rescind_offer(void *context) { hv_vmbus_channel* channel = (hv_vmbus_channel*) context; hv_vmbus_child_device_unregister(channel->device); } /** * @brief Allocate and initialize a vmbus channel object */ hv_vmbus_channel* hv_vmbus_allocate_channel(void) { hv_vmbus_channel* channel; channel = (hv_vmbus_channel*) malloc( sizeof(hv_vmbus_channel), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!")); if (channel == NULL) return (NULL); mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF); + mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF); + TAILQ_INIT(&channel->sc_list_anchor); + channel->control_work_queue = hv_work_queue_create("control"); if (channel->control_work_queue == NULL) { mtx_destroy(&channel->inbound_lock); free(channel, M_DEVBUF); return (NULL); } return (channel); } /** * @brief Release the vmbus channel object itself */ static inline void ReleaseVmbusChannel(void *context) { hv_vmbus_channel* channel = (hv_vmbus_channel*) context; hv_work_queue_close(channel->control_work_queue); free(channel, M_DEVBUF); } /** * @brief Release the resources used by the vmbus channel object */ void hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) { + mtx_destroy(&channel->sc_lock); mtx_destroy(&channel->inbound_lock); /* * We have to release the channel's workqueue/thread in * the vmbus's workqueue/thread context * ie we can't destroy ourselves */ hv_queue_work_item(hv_vmbus_g_connection.work_queue, ReleaseVmbusChannel, (void *) channel); } /** * @brief Process the offer by creating a channel/device * associated with this offer */ static void vmbus_channel_process_offer(void *context) { - int ret; hv_vmbus_channel* new_channel; boolean_t f_new; hv_vmbus_channel* channel; + int ret; new_channel = (hv_vmbus_channel*) context; f_new = TRUE; channel = NULL; /* * Make sure this is a new offer */ - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, list_entry) { - if (!memcmp( - &channel->offer_msg.offer.interface_type, - &new_channel->offer_msg.offer.interface_type, - sizeof(hv_guid)) - && !memcmp( - &channel->offer_msg.offer.interface_instance, + if (memcmp(&channel->offer_msg.offer.interface_type, + &new_channel->offer_msg.offer.interface_type, + sizeof(hv_guid)) == 0 && + memcmp(&channel->offer_msg.offer.interface_instance, &new_channel->offer_msg.offer.interface_instance, - sizeof(hv_guid))) { - f_new = FALSE; - break; - } + sizeof(hv_guid)) == 0) { + f_new = FALSE; + break; + } } if (f_new) { - /* Insert at tail */ - TAILQ_INSERT_TAIL( - &hv_vmbus_g_connection.channel_anchor, - new_channel, - list_entry); + /* Insert at tail */ + TAILQ_INSERT_TAIL( + &hv_vmbus_g_connection.channel_anchor, + new_channel, + list_entry); } - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); + /*XXX add new channel to percpu_list */ + if (!f_new) { + /* + * Check if this is a sub channel. + */ + if (new_channel->offer_msg.offer.sub_channel_index != 0) { + /* + * It is a sub channel offer, process it. + */ + new_channel->primary_channel = channel; + mtx_lock(&channel->sc_lock); + TAILQ_INSERT_TAIL( + &channel->sc_list_anchor, + new_channel, + sc_list_entry); + mtx_unlock(&channel->sc_lock); + + /* Insert new channel into channel_anchor. */ + printf("Storvsc get multi-channel offer, rel=%u.\n", + new_channel->offer_msg.child_rel_id); + mtx_lock(&hv_vmbus_g_connection.channel_lock); + TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor, + new_channel, list_entry); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); + + if(bootverbose) + printf("VMBUS: new multi-channel offer <%p>.\n", + new_channel); + + /*XXX add it to percpu_list */ + + new_channel->state = HV_CHANNEL_OPEN_STATE; + if (channel->sc_creation_callback != NULL) { + channel->sc_creation_callback(new_channel); + } + return; + } + hv_vmbus_free_vmbus_channel(new_channel); return; } + new_channel->state = HV_CHANNEL_OPEN_STATE; + /* * Start the process of binding this offer to the driver * (We need to set the device field before calling * hv_vmbus_child_device_add()) */ new_channel->device = hv_vmbus_child_device_create( new_channel->offer_msg.offer.interface_type, new_channel->offer_msg.offer.interface_instance, new_channel); /* - * TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below - * but in the "open" channel request. The ret != 0 logic below - * doesn't take into account that a channel - * may have been opened successfully - */ - - /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. */ ret = hv_vmbus_child_device_register(new_channel->device); if (ret != 0) { - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); - TAILQ_REMOVE( - &hv_vmbus_g_connection.channel_anchor, - new_channel, - list_entry); - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); - hv_vmbus_free_vmbus_channel(new_channel); - } else { - /* - * This state is used to indicate a successful open - * so that when we do close the channel normally, - * we can clean up properly - */ - new_channel->state = HV_CHANNEL_OPEN_STATE; + mtx_lock(&hv_vmbus_g_connection.channel_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_anchor, + new_channel, + list_entry); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); + hv_vmbus_free_vmbus_channel(new_channel); + } +} +/** + * Array of device guids that are performance critical. We try to distribute + * the interrupt load for these devices across all online cpus. + */ +static const hv_guid high_perf_devices[] = { + {HV_NIC_GUID, }, + {HV_IDE_GUID, }, + {HV_SCSI_GUID, }, +}; + +enum { + PERF_CHN_NIC = 0, + PERF_CHN_IDE, + PERF_CHN_SCSI, + MAX_PERF_CHN, +}; + +/* + * We use this static number to distribute the channel interrupt load. + */ +static uint32_t next_vcpu; + +/** + * Starting with Win8, we can statically distribute the incoming + * channel interrupt load by binding a channel to VCPU. We + * implement here a simple round robin scheme for distributing + * the interrupt load. + * We will bind channels that are not performance critical to cpu 0 and + * performance critical channels (IDE, SCSI and Network) will be uniformly + * distributed across all available CPUs. + */ +static void +vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid) +{ + uint32_t current_cpu; + int i; + boolean_t is_perf_channel = FALSE; + + for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) { + if (memcmp(guid->data, high_perf_devices[i].data, + sizeof(hv_guid)) == 0) { + is_perf_channel = TRUE; + break; + } } + + if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || + (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) || + (!is_perf_channel)) { + /* Host's view of guest cpu */ + channel->target_vcpu = 0; + /* Guest's own view of cpu */ + channel->target_cpu = 0; + return; + } + /* mp_ncpus should have the number cpus currently online */ + current_cpu = (++next_vcpu % mp_ncpus); + channel->target_cpu = current_cpu; + channel->target_vcpu = + hv_vmbus_g_context.hv_vcpu_index[current_cpu]; + if (bootverbose) + printf("VMBUS: Total online cpus %d, assign perf channel %d " + "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu, + current_cpu); } /** * @brief Handler for channel offers from Hyper-V/Azure * * Handler for channel offers from vmbus in parent partition. We ignore * all offers except network and storage offers. For each network and storage * offers, we create a channel object and queue a work item to the channel * object to process the offer synchronously */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_offer_channel* offer; hv_vmbus_channel* new_channel; offer = (hv_vmbus_channel_offer_channel*) hdr; hv_guid *guidType; hv_guid *guidInstance; guidType = &offer->offer.interface_type; guidInstance = &offer->offer.interface_instance; /* Allocate the channel object and save this offer */ new_channel = hv_vmbus_allocate_channel(); if (new_channel == NULL) return; + /* + * By default we setup state to enable batched + * reading. A specific service can choose to + * disable this prior to opening the channel. + */ + new_channel->batched_reading = TRUE; + + new_channel->signal_event_param = + (hv_vmbus_input_signal_event *) + (HV_ALIGN_UP((unsigned long) + &new_channel->signal_event_buffer, + HV_HYPERCALL_PARAM_ALIGN)); + + new_channel->signal_event_param->connection_id.as_uint32_t = 0; + new_channel->signal_event_param->connection_id.u.id = + HV_VMBUS_EVENT_CONNECTION_ID; + new_channel->signal_event_param->flag_number = 0; + new_channel->signal_event_param->rsvd_z = 0; + + if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) { + new_channel->is_dedicated_interrupt = + (offer->is_dedicated_interrupt != 0); + new_channel->signal_event_param->connection_id.u.id = + offer->connection_id; + } + + /* + * Bind the channel to a chosen cpu. + */ + vmbus_channel_select_cpu(new_channel, + &offer->offer.interface_type); + memcpy(&new_channel->offer_msg, offer, sizeof(hv_vmbus_channel_offer_channel)); new_channel->monitor_group = (uint8_t) offer->monitor_id / 32; new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32; /* TODO: Make sure the offer comes from our parent partition */ hv_queue_work_item( new_channel->control_work_queue, vmbus_channel_process_offer, new_channel); } /** * @brief Rescind offer handler. * * We queue a work item to process this offer * synchronously */ static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_rescind_offer* rescind; hv_vmbus_channel* channel; rescind = (hv_vmbus_channel_rescind_offer*) hdr; channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id); if (channel == NULL) return; hv_queue_work_item(channel->control_work_queue, vmbus_channel_process_rescind_offer, channel); } /** * * @brief Invoked when all offers have been delivered. */ static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr) { } /** * @brief Open result handler. * * This is invoked when we received a response * to our channel open request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_open_result* result; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_open_channel* openMsg; result = (hv_vmbus_channel_open_result*) hdr; /* * Find the open msg, copy the result and signal/unblock the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_OPEN_CHANNEL) { openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg; if (openMsg->child_rel_id == result->child_rel_id && openMsg->open_id == result->open_id) { memcpy(&msg_info->response.open_result, result, sizeof(hv_vmbus_channel_open_result)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL created handler. * * This is invoked when we received a response * to our gpadl create request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_created* gpadl_created; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* request_header; hv_vmbus_channel_gpadl_header* gpadl_header; gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr; /* Find the establish msg, copy the result and signal/unblock * the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { request_header = (hv_vmbus_channel_msg_header*) msg_info->msg; if (request_header->message_type == HV_CHANNEL_MESSAGEL_GPADL_HEADER) { gpadl_header = (hv_vmbus_channel_gpadl_header*) request_header; if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id) && (gpadl_created->gpadl == gpadl_header->gpadl)) { memcpy(&msg_info->response.gpadl_created, gpadl_created, sizeof(hv_vmbus_channel_gpadl_created)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL torndown handler. * * This is invoked when we received a respons * to our gpadl teardown request. Find the matching request, copy the * response and signal the requesting thread */ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_torndown* gpadl_torndown; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_gpadl_teardown* gpadlTeardown; gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr; /* * Find the open msg, copy the result and signal/unblock the * wait event. */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) { gpadlTeardown = (hv_vmbus_channel_gpadl_teardown*) requestHeader; if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) { memcpy(&msg_info->response.gpadl_torndown, gpadl_torndown, sizeof(hv_vmbus_channel_gpadl_torndown)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Version response handler. * * This is invoked when we received a response * to our initiate contact request. Find the matching request, copy th * response and signal the requesting thread. */ static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_initiate_contact* initiate; hv_vmbus_channel_version_response* versionResponse; versionResponse = (hv_vmbus_channel_version_response*)hdr; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) { initiate = (hv_vmbus_channel_initiate_contact*) requestHeader; memcpy(&msg_info->response.version_response, versionResponse, sizeof(hv_vmbus_channel_version_response)); sema_post(&msg_info->wait_sema); } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Handler for channel protocol messages. * * This is invoked in the vmbus worker thread context. */ void hv_vmbus_on_channel_message(void *context) { hv_vmbus_message* msg; hv_vmbus_channel_msg_header* hdr; int size; msg = (hv_vmbus_message*) context; hdr = (hv_vmbus_channel_msg_header*) msg->u.payload; size = msg->header.payload_size; if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) { free(msg, M_DEVBUF); return; } if (g_channel_message_table[hdr->message_type].messageHandler) { g_channel_message_table[hdr->message_type].messageHandler(hdr); } /* Free the msg that was allocated in VmbusOnMsgDPC() */ free(msg, M_DEVBUF); } /** * @brief Send a request to get all our pending offers. */ int hv_vmbus_request_channel_offers(void) { int ret; hv_vmbus_channel_msg_header* msg; hv_vmbus_channel_msg_info* msg_info; msg_info = (hv_vmbus_channel_msg_info *) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT); if (msg_info == NULL) { if(bootverbose) printf("Error VMBUS: malloc failed for Request Offers\n"); return (ENOMEM); } msg = (hv_vmbus_channel_msg_header*) msg_info->msg; msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS; ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header)); if (msg_info) free(msg_info, M_DEVBUF); return (ret); } /** * @brief Release channels that are unattached/unconnected (i.e., no drivers associated) */ void hv_vmbus_release_unattached_channels(void) { hv_vmbus_channel *channel; - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_lock(&hv_vmbus_g_connection.channel_lock); while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) { channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor, channel, list_entry); hv_vmbus_child_device_unregister(channel->device); hv_vmbus_free_vmbus_channel(channel); } - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); +} + +/** + * @brief Select the best outgoing channel + * + * The channel whose vcpu binding is closest to the currect vcpu will + * be selected. + * If no multi-channel, always select primary channel + * + * @param primary - primary channel + */ +struct hv_vmbus_channel * +vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) +{ + hv_vmbus_channel *new_channel = NULL; + hv_vmbus_channel *outgoing_channel = primary; + int old_cpu_distance = 0; + int new_cpu_distance = 0; + int cur_vcpu = 0; + int smp_pro_id = PCPU_GET(cpuid); + + if (TAILQ_EMPTY(&primary->sc_list_anchor)) { + return outgoing_channel; + } + + if (smp_pro_id >= MAXCPU) { + return outgoing_channel; + } + + cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id]; + + TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) { + if (new_channel->state != HV_CHANNEL_OPENED_STATE){ + continue; + } + + if (new_channel->target_vcpu == cur_vcpu){ + return new_channel; + } + + old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ? + (outgoing_channel->target_vcpu - cur_vcpu) : + (cur_vcpu - outgoing_channel->target_vcpu)); + + new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ? + (new_channel->target_vcpu - cur_vcpu) : + (cur_vcpu - new_channel->target_vcpu)); + + if (old_cpu_distance < new_cpu_distance) { + continue; + } + + outgoing_channel = new_channel; + } + + return(outgoing_channel); } Property changes on: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: head/sys/dev/hyperv/vmbus/hv_connection.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_connection.c (revision 282211) +++ head/sys/dev/hyperv/vmbus/hv_connection.c (revision 282212) @@ -1,431 +1,559 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" /* * Globals */ hv_vmbus_connection hv_vmbus_g_connection = { .connect_state = HV_DISCONNECTED, .next_gpadl_handle = 0xE1E10, }; +uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008; + +static uint32_t +hv_vmbus_get_next_version(uint32_t current_ver) +{ + switch (current_ver) { + case (HV_VMBUS_VERSION_WIN7): + return(HV_VMBUS_VERSION_WS2008); + + case (HV_VMBUS_VERSION_WIN8): + return(HV_VMBUS_VERSION_WIN7); + + case (HV_VMBUS_VERSION_WIN8_1): + return(HV_VMBUS_VERSION_WIN8); + + case (HV_VMBUS_VERSION_WS2008): + default: + return(HV_VMBUS_VERSION_INVALID); + } +} + /** + * Negotiate the highest supported hypervisor version. + */ +static int +hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, + uint32_t version) +{ + int ret = 0; + hv_vmbus_channel_initiate_contact *msg; + + sema_init(&msg_info->wait_sema, 0, "Msg Info Sema"); + msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg; + + msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT; + msg->vmbus_version_requested = version; + + msg->interrupt_page = hv_get_phys_addr( + hv_vmbus_g_connection.interrupt_page); + + msg->monitor_page_1 = hv_get_phys_addr( + hv_vmbus_g_connection.monitor_pages); + + msg->monitor_page_2 = + hv_get_phys_addr( + ((uint8_t *) hv_vmbus_g_connection.monitor_pages + + PAGE_SIZE)); + + /** + * Add to list before we send the request since we may receive the + * response before returning from this routine + */ + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + TAILQ_INSERT_TAIL( + &hv_vmbus_g_connection.channel_msg_anchor, + msg_info, + msg_list_entry); + + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + ret = hv_vmbus_post_message( + msg, + sizeof(hv_vmbus_channel_initiate_contact)); + + if (ret != 0) { + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_msg_anchor, + msg_info, + msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + return (ret); + } + + /** + * Wait for the connection response + */ + ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */ + + mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + TAILQ_REMOVE( + &hv_vmbus_g_connection.channel_msg_anchor, + msg_info, + msg_list_entry); + mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + + /** + * Check if successful + */ + if (msg_info->response.version_response.version_supported) { + hv_vmbus_g_connection.connect_state = HV_CONNECTED; + } else { + ret = ECONNREFUSED; + } + + return (ret); +} + +/** * Send a connect request on the partition service connection */ int hv_vmbus_connect(void) { int ret = 0; + uint32_t version; hv_vmbus_channel_msg_info* msg_info = NULL; - hv_vmbus_channel_initiate_contact* msg; /** * Make sure we are not connecting or connected */ if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) { return (-1); } /** * Initialize the vmbus connection */ hv_vmbus_g_connection.connect_state = HV_CONNECTING; hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ"); sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema"); TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor); mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg", NULL, MTX_SPIN); TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor); mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel", - NULL, MTX_SPIN); + NULL, MTX_DEF); /** * Setup the vmbus event connection for channel interrupt abstraction * stuff */ hv_vmbus_g_connection.interrupt_page = contigmalloc( PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); KASSERT(hv_vmbus_g_connection.interrupt_page != NULL, ("Error VMBUS: malloc failed to allocate Channel" " Request Event message!")); if (hv_vmbus_g_connection.interrupt_page == NULL) { ret = ENOMEM; goto cleanup; } hv_vmbus_g_connection.recv_interrupt_page = hv_vmbus_g_connection.interrupt_page; hv_vmbus_g_connection.send_interrupt_page = ((uint8_t *) hv_vmbus_g_connection.interrupt_page + (PAGE_SIZE >> 1)); /** * Set up the monitor notification facility. The 1st page for * parent->child and the 2nd page for child->parent */ hv_vmbus_g_connection.monitor_pages = contigmalloc( 2 * PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); KASSERT(hv_vmbus_g_connection.monitor_pages != NULL, ("Error VMBUS: malloc failed to allocate Monitor Pages!")); if (hv_vmbus_g_connection.monitor_pages == NULL) { ret = ENOMEM; goto cleanup; } msg_info = (hv_vmbus_channel_msg_info*) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_initiate_contact), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(msg_info != NULL, ("Error VMBUS: malloc failed for Initiate Contact message!")); if (msg_info == NULL) { ret = ENOMEM; goto cleanup; } - sema_init(&msg_info->wait_sema, 0, "Msg Info Sema"); - msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg; - - msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT; - msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER; - - msg->interrupt_page = hv_get_phys_addr( - hv_vmbus_g_connection.interrupt_page); - - msg->monitor_page_1 = hv_get_phys_addr( - hv_vmbus_g_connection.monitor_pages); - - msg->monitor_page_2 = - hv_get_phys_addr( - ((uint8_t *) hv_vmbus_g_connection.monitor_pages - + PAGE_SIZE)); - - /** - * Add to list before we send the request since we may receive the - * response before returning from this routine + /* + * Find the highest vmbus version number we can support. */ - mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); + version = HV_VMBUS_VERSION_CURRENT; - TAILQ_INSERT_TAIL( - &hv_vmbus_g_connection.channel_msg_anchor, - msg_info, - msg_list_entry); + do { + ret = hv_vmbus_negotiate_version(msg_info, version); + if (ret == EWOULDBLOCK) { + /* + * We timed out. + */ + goto cleanup; + } - mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); + if (hv_vmbus_g_connection.connect_state == HV_CONNECTED) + break; - ret = hv_vmbus_post_message( - msg, - sizeof(hv_vmbus_channel_initiate_contact)); + version = hv_vmbus_get_next_version(version); + } while (version != HV_VMBUS_VERSION_INVALID); - if (ret != 0) { - mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); - TAILQ_REMOVE( - &hv_vmbus_g_connection.channel_msg_anchor, - msg_info, - msg_list_entry); - mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); - goto cleanup; - } + hv_vmbus_protocal_version = version; + if (bootverbose) + printf("VMBUS: Portocal Version: %d.%d\n", + version >> 16, version & 0xFFFF); - /** - * Wait for the connection response - */ - ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */ - - mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); - TAILQ_REMOVE( - &hv_vmbus_g_connection.channel_msg_anchor, - msg_info, - msg_list_entry); - mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); - - /** - * Check if successful - */ - if (msg_info->response.version_response.version_supported) { - hv_vmbus_g_connection.connect_state = HV_CONNECTED; - } else { - ret = ECONNREFUSED; - goto cleanup; - } - sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); return (0); /* * Cleanup after failure! */ cleanup: hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; hv_work_queue_close(hv_vmbus_g_connection.work_queue); sema_destroy(&hv_vmbus_g_connection.control_sema); mtx_destroy(&hv_vmbus_g_connection.channel_lock); mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); if (hv_vmbus_g_connection.interrupt_page != NULL) { contigfree( hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF); hv_vmbus_g_connection.interrupt_page = NULL; } if (hv_vmbus_g_connection.monitor_pages != NULL) { contigfree( hv_vmbus_g_connection.monitor_pages, 2 * PAGE_SIZE, M_DEVBUF); hv_vmbus_g_connection.monitor_pages = NULL; } if (msg_info) { sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); } return (ret); } /** * Send a disconnect request on the partition service connection */ int hv_vmbus_disconnect(void) { int ret = 0; hv_vmbus_channel_unload* msg; msg = malloc(sizeof(hv_vmbus_channel_unload), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(msg != NULL, ("Error VMBUS: malloc failed to allocate Channel Unload Msg!")); if (msg == NULL) return (ENOMEM); msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD; ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload)); contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF); mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); hv_work_queue_close(hv_vmbus_g_connection.work_queue); sema_destroy(&hv_vmbus_g_connection.control_sema); hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; free(msg, M_DEVBUF); return (ret); } /** * Get the channel object given its child relative id (ie channel id) */ hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) { hv_vmbus_channel* channel; hv_vmbus_channel* foundChannel = NULL; /* * TODO: * Consider optimization where relids are stored in a fixed size array * and channels are accessed without the need to take this lock or search * the list. */ - mtx_lock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, list_entry) { if (channel->offer_msg.child_rel_id == rel_id) { foundChannel = channel; break; } } - mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock); + mtx_unlock(&hv_vmbus_g_connection.channel_lock); return (foundChannel); } /** * Process a channel event notification */ static void VmbusProcessChannelEvent(uint32_t relid) { + void* arg; + uint32_t bytes_to_read; hv_vmbus_channel* channel; + boolean_t is_batched_reading; /** * Find the channel based on this relid and invokes * the channel callback to process the event */ channel = hv_vmbus_get_channel_from_rel_id(relid); if (channel == NULL) { return; } /** * To deal with the race condition where we might * receive a packet while the relevant driver is * being unloaded, dispatch the callback while * holding the channel lock. The unloading driver * will acquire the same channel lock to set the * callback to NULL. This closes the window. */ - mtx_lock(&channel->inbound_lock); + /* + * Disable the lock due to newly added WITNESS check in r277723. + * Will seek other way to avoid race condition. + * -- whu + */ + // mtx_lock(&channel->inbound_lock); if (channel->on_channel_callback != NULL) { - channel->on_channel_callback(channel->channel_callback_context); + arg = channel->channel_callback_context; + is_batched_reading = channel->batched_reading; + /* + * Optimize host to guest signaling by ensuring: + * 1. While reading the channel, we disable interrupts from + * host. + * 2. Ensure that we process all posted messages from the host + * before returning from this callback. + * 3. Once we return, enable signaling from the host. Once this + * state is set we check to see if additional packets are + * available to read. In this case we repeat the process. + */ + do { + if (is_batched_reading) + hv_ring_buffer_read_begin(&channel->inbound); + + channel->on_channel_callback(arg); + + if (is_batched_reading) + bytes_to_read = + hv_ring_buffer_read_end(&channel->inbound); + else + bytes_to_read = 0; + } while (is_batched_reading && (bytes_to_read != 0)); } - mtx_unlock(&channel->inbound_lock); + // mtx_unlock(&channel->inbound_lock); } +#ifdef HV_DEBUG_INTR +extern uint32_t hv_intr_count; +extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; +extern uint32_t hv_vmbus_intr_cpu[MAXCPU]; +#endif + /** * Handler for events */ void hv_vmbus_on_events(void *arg) { - int dword; int bit; + int cpu; + int dword; + void *page_addr; + uint32_t* recv_interrupt_page = NULL; int rel_id; - int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5; + int maxdword; + hv_vmbus_synic_event_flags *event; /* int maxdword = PAGE_SIZE >> 3; */ - /* - * receive size is 1/2 page and divide that by 4 bytes - */ + cpu = (int)(long)arg; + KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: " + "cpu out of range!")); - uint32_t* recv_interrupt_page = - hv_vmbus_g_connection.recv_interrupt_page; +#ifdef HV_DEBUG_INTR + int i; + hv_vmbus_swintr_event_cpu[cpu]++; + if (hv_intr_count % 10000 == 0) { + printf("VMBUS: Total interrupt %d\n", hv_intr_count); + for (i = 0; i < mp_ncpus; i++) + printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n", + i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]); + } +#endif + if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || + (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { + maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5; + /* + * receive size is 1/2 page and divide that by 4 bytes + */ + recv_interrupt_page = + hv_vmbus_g_connection.recv_interrupt_page; + } else { + /* + * On Host with Win8 or above, the event page can be + * checked directly to get the id of the channel + * that has the pending interrupt. + */ + maxdword = HV_EVENT_FLAGS_DWORD_COUNT; + page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; + event = (hv_vmbus_synic_event_flags *) + page_addr + HV_VMBUS_MESSAGE_SINT; + recv_interrupt_page = event->flags32; + } + /* * Check events */ if (recv_interrupt_page != NULL) { for (dword = 0; dword < maxdword; dword++) { if (recv_interrupt_page[dword]) { for (bit = 0; bit < 32; bit++) { if (synch_test_and_clear_bit(bit, (uint32_t *) &recv_interrupt_page[dword])) { rel_id = (dword << 5) + bit; if (rel_id == 0) { /* * Special case - * vmbus channel protocol msg. */ continue; } else { VmbusProcessChannelEvent(rel_id); } } } } } } return; } /** * Send a msg on the vmbus's message connection */ int hv_vmbus_post_message(void *buffer, size_t bufferLen) { int ret = 0; hv_vmbus_connection_id connId; unsigned retries = 0; /* NetScaler delays from previous code were consolidated here */ static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000}; /* for(each entry in delayAmount) try to post message, * delay a little bit before retrying */ for (retries = 0; retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) { connId.as_uint32_t = 0; connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID; ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen); if (ret != HV_STATUS_INSUFFICIENT_BUFFERS) break; /* TODO: KYS We should use a blocking wait call */ DELAY(delayAmount[retries]); } KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n")); return (ret); } /** * Send an event notification to the parent */ int -hv_vmbus_set_event(uint32_t child_rel_id) { +hv_vmbus_set_event(hv_vmbus_channel *channel) { int ret = 0; + uint32_t child_rel_id = channel->offer_msg.child_rel_id; /* Each uint32_t represents 32 channels */ synch_set_bit(child_rel_id & 31, (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page + (child_rel_id >> 5)))); - ret = hv_vmbus_signal_event(); + ret = hv_vmbus_signal_event(channel->signal_event_param); return (ret); } - Property changes on: head/sys/dev/hyperv/vmbus/hv_connection.c ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: head/sys/dev/hyperv/vmbus/hv_hv.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_hv.c (revision 282211) +++ head/sys/dev/hyperv/vmbus/hv_hv.c (revision 282212) @@ -1,504 +1,470 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Implements low-level interactions with Hypver-V/Azure */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" #define HV_X64_MSR_GUEST_OS_ID 0x40000000 #define HV_X64_CPUID_MIN 0x40000005 #define HV_X64_CPUID_MAX 0x4000ffff #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 #define HV_NANOSECONDS_PER_SEC 1000000000L static u_int hv_get_timecount(struct timecounter *tc); static inline void do_cpuid_inline(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "c" (ecx)); } /** * Globals */ hv_vmbus_context hv_vmbus_g_context = { .syn_ic_initialized = FALSE, .hypercall_page = NULL, - .signal_event_param = NULL, - .signal_event_buffer = NULL, }; static struct timecounter hv_timecounter = { hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100 }; static u_int hv_get_timecount(struct timecounter *tc) { u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); return (now); } /** * @brief Query the cpuid for presence of windows hypervisor */ int hv_vmbus_query_hypervisor_presence(void) { u_int regs[4]; int hyper_v_detected = 0; /* * When Xen is detected and native Xen PV support is enabled, * ignore Xen's HyperV emulation. */ if (vm_guest == VM_GUEST_XEN) return (0); do_cpuid(1, regs); if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */ /* make sure this really is Hyper-V */ /* we look at the CPUID info */ do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs); hyper_v_detected = regs[0] >= HV_X64_CPUID_MIN && regs[0] <= HV_X64_CPUID_MAX && !memcmp("Microsoft Hv", ®s[1], 12); } return (hyper_v_detected); } /** * @brief Get version of the windows hypervisor */ static int hv_vmbus_get_hypervisor_version(void) { unsigned int eax; unsigned int ebx; unsigned int ecx; unsigned int edx; unsigned int maxLeaf; unsigned int op; /* * Its assumed that this is called after confirming that * Viridian is present * Query id and revision. */ eax = 0; ebx = 0; ecx = 0; edx = 0; op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION; do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); maxLeaf = eax; eax = 0; ebx = 0; ecx = 0; edx = 0; op = HV_CPU_ID_FUNCTION_HV_INTERFACE; do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) { eax = 0; ebx = 0; ecx = 0; edx = 0; op = HV_CPU_ID_FUNCTION_MS_HV_VERSION; do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); } return (maxLeaf); } /** * @brief Invoke the specified hypercall */ static uint64_t hv_vmbus_do_hypercall(uint64_t control, void* input, void* output) { #ifdef __x86_64__ uint64_t hv_status = 0; uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; __asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8"); __asm__ __volatile__ ("call *%3" : "=a"(hv_status): "c" (control), "d" (input_address), "m" (hypercall_page)); return (hv_status); #else uint32_t control_high = control >> 32; uint32_t control_low = control & 0xFFFFFFFF; uint32_t hv_status_high = 1; uint32_t hv_status_low = 1; uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; uint32_t input_address_high = input_address >> 32; uint32_t input_address_low = input_address & 0xFFFFFFFF; uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; uint32_t output_address_high = output_address >> 32; uint32_t output_address_low = output_address & 0xFFFFFFFF; volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; __asm__ __volatile__ ("call *%8" : "=d"(hv_status_high), "=a"(hv_status_low) : "d" (control_high), "a" (control_low), "b" (input_address_high), "c" (input_address_low), "D"(output_address_high), "S"(output_address_low), "m" (hypercall_page)); return (hv_status_low | ((uint64_t)hv_status_high << 32)); #endif /* __x86_64__ */ } /** * @brief Main initialization routine. * * This routine must be called * before any other routines in here are called */ int hv_vmbus_init(void) { int max_leaf; hv_vmbus_x64_msr_hypercall_contents hypercall_msr; void* virt_addr = 0; memset( hv_vmbus_g_context.syn_ic_event_page, 0, sizeof(hv_vmbus_handle) * MAXCPU); memset( hv_vmbus_g_context.syn_ic_msg_page, 0, sizeof(hv_vmbus_handle) * MAXCPU); if (vm_guest != VM_GUEST_HV) goto cleanup; max_leaf = hv_vmbus_get_hypervisor_version(); /* * Write our OS info */ uint64_t os_guest_info = HV_FREEBSD_GUEST_ID; wrmsr(HV_X64_MSR_GUEST_OS_ID, os_guest_info); hv_vmbus_g_context.guest_id = os_guest_info; /* * See if the hypercall page is already set */ hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL); virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(virt_addr != NULL, ("Error VMBUS: malloc failed to allocate page during init!")); if (virt_addr == NULL) goto cleanup; hypercall_msr.u.enable = 1; hypercall_msr.u.guest_physical_address = (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT); wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); /* * Confirm that hypercall page did get set up */ hypercall_msr.as_uint64_t = 0; hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL); if (!hypercall_msr.u.enable) goto cleanup; hv_vmbus_g_context.hypercall_page = virt_addr; - /* - * Setup the global signal event param for the signal event hypercall - */ - hv_vmbus_g_context.signal_event_buffer = - malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF, - M_ZERO | M_NOWAIT); - KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL, - ("Error VMBUS: Failed to allocate signal_event_buffer\n")); - if (hv_vmbus_g_context.signal_event_buffer == NULL) - goto cleanup; - - hv_vmbus_g_context.signal_event_param = - (hv_vmbus_input_signal_event*) - (HV_ALIGN_UP((unsigned long) - hv_vmbus_g_context.signal_event_buffer, - HV_HYPERCALL_PARAM_ALIGN)); - hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0; - hv_vmbus_g_context.signal_event_param->connection_id.u.id = - HV_VMBUS_EVENT_CONNECTION_ID; - hv_vmbus_g_context.signal_event_param->flag_number = 0; - hv_vmbus_g_context.signal_event_param->rsvd_z = 0; - tc_init(&hv_timecounter); /* register virtual timecount */ return (0); cleanup: if (virt_addr != NULL) { if (hypercall_msr.u.enable) { hypercall_msr.as_uint64_t = 0; wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); } free(virt_addr, M_DEVBUF); } return (ENOTSUP); } /** * @brief Cleanup routine, called normally during driver unloading or exiting */ void hv_vmbus_cleanup(void) { hv_vmbus_x64_msr_hypercall_contents hypercall_msr; - if (hv_vmbus_g_context.signal_event_buffer != NULL) { - free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF); - hv_vmbus_g_context.signal_event_buffer = NULL; - hv_vmbus_g_context.signal_event_param = NULL; - } - if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) { if (hv_vmbus_g_context.hypercall_page != NULL) { hypercall_msr.as_uint64_t = 0; wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); free(hv_vmbus_g_context.hypercall_page, M_DEVBUF); hv_vmbus_g_context.hypercall_page = NULL; } } } /** * @brief Post a message using the hypervisor message IPC. * (This involves a hypercall.) */ hv_vmbus_status hv_vmbus_post_msg_via_msg_ipc( hv_vmbus_connection_id connection_id, hv_vmbus_msg_type message_type, void* payload, size_t payload_size) { struct alignedinput { uint64_t alignment8; hv_vmbus_input_post_message msg; }; hv_vmbus_input_post_message* aligned_msg; hv_vmbus_status status; size_t addr; if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) return (EMSGSIZE); addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF, M_ZERO | M_NOWAIT); KASSERT(addr != 0, ("Error VMBUS: malloc failed to allocate message buffer!")); if (addr == 0) return (ENOMEM); aligned_msg = (hv_vmbus_input_post_message*) (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN)); aligned_msg->connection_id = connection_id; aligned_msg->message_type = message_type; aligned_msg->payload_size = payload_size; memcpy((void*) aligned_msg->payload, payload, payload_size); status = hv_vmbus_do_hypercall( HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF; free((void *) addr, M_DEVBUF); return (status); } /** * @brief Signal an event on the specified connection using the hypervisor * event IPC. (This involves a hypercall.) */ hv_vmbus_status -hv_vmbus_signal_event() +hv_vmbus_signal_event(void *con_id) { hv_vmbus_status status; status = hv_vmbus_do_hypercall( HV_CALL_SIGNAL_EVENT, - hv_vmbus_g_context.signal_event_param, + con_id, 0) & 0xFFFF; return (status); } /** * @brief hv_vmbus_synic_init */ void hv_vmbus_synic_init(void *arg) { int cpu; + uint64_t hv_vcpu_index; hv_vmbus_synic_simp simp; hv_vmbus_synic_siefp siefp; hv_vmbus_synic_scontrol sctrl; hv_vmbus_synic_sint shared_sint; uint64_t version; hv_setup_args* setup_args = (hv_setup_args *)arg; cpu = PCPU_GET(cpuid); if (hv_vmbus_g_context.hypercall_page == NULL) return; /* - * KYS: Looks like we can only initialize on cpu0; don't we support - * SMP guests? - * - * TODO: Need to add SMP support for FreeBSD V9 - */ - - if (cpu != 0) - return; - - /* * TODO: Check the version */ version = rdmsr(HV_X64_MSR_SVERSION); - - hv_vmbus_g_context.syn_ic_msg_page[cpu] = setup_args->page_buffers[0]; - hv_vmbus_g_context.syn_ic_event_page[cpu] = setup_args->page_buffers[1]; + hv_vmbus_g_context.syn_ic_msg_page[cpu] = + setup_args->page_buffers[2 * cpu]; + hv_vmbus_g_context.syn_ic_event_page[cpu] = + setup_args->page_buffers[2 * cpu + 1]; /* * Setup the Synic's message page */ simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP); simp.u.simp_enabled = 1; simp.u.base_simp_gpa = ((hv_get_phys_addr( hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT); wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t); /* * Setup the Synic's event page */ siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP); siefp.u.siefp_enabled = 1; siefp.u.base_siefp_gpa = ((hv_get_phys_addr( hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT); wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); /*HV_SHARED_SINT_IDT_VECTOR + 0x20; */ + shared_sint.as_uint64_t = 0; shared_sint.u.vector = setup_args->vector; shared_sint.u.masked = FALSE; - shared_sint.u.auto_eoi = FALSE; + shared_sint.u.auto_eoi = TRUE; wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, shared_sint.as_uint64_t); /* Enable the global synic bit */ sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL); sctrl.u.enable = 1; wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t); hv_vmbus_g_context.syn_ic_initialized = TRUE; + /* + * Set up the cpuid mapping from Hyper-V to FreeBSD. + * The array is indexed using FreeBSD cpuid. + */ + hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX); + hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index; + return; } /** * @brief Cleanup routine for hv_vmbus_synic_init() */ void hv_vmbus_synic_cleanup(void *arg) { hv_vmbus_synic_sint shared_sint; hv_vmbus_synic_simp simp; hv_vmbus_synic_siefp siefp; - int cpu = PCPU_GET(cpuid); if (!hv_vmbus_g_context.syn_ic_initialized) return; - - if (cpu != 0) - return; /* TODO: XXXKYS: SMP? */ shared_sint.as_uint64_t = rdmsr( HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT); shared_sint.u.masked = 1; /* * Disable the interrupt */ wrmsr( HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, shared_sint.as_uint64_t); simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP); simp.u.simp_enabled = 0; simp.u.base_simp_gpa = 0; wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t); siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP); siefp.u.siefp_enabled = 0; siefp.u.base_siefp_gpa = 0; wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); } Index: head/sys/dev/hyperv/vmbus/hv_ring_buffer.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_ring_buffer.c (revision 282211) +++ head/sys/dev/hyperv/vmbus/hv_ring_buffer.c (revision 282212) @@ -1,440 +1,510 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include "hv_vmbus_priv.h" /* Amount of space to write to */ #define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \ ((z) - ((w) - (r))):((r) - (w)) /** * @brief Get number of bytes available to read and to write to * for the specified ring buffer */ static inline void get_ring_buffer_avail_bytes( hv_vmbus_ring_buffer_info* rbi, uint32_t* read, uint32_t* write) { uint32_t read_loc, write_loc; /* * Capture the read/write indices before they changed */ read_loc = rbi->ring_buffer->read_index; write_loc = rbi->ring_buffer->write_index; *write = HV_BYTES_AVAIL_TO_WRITE( read_loc, write_loc, rbi->ring_data_size); *read = rbi->ring_data_size - *write; } /** * @brief Get the next write location for the specified ring buffer */ static inline uint32_t get_next_write_location(hv_vmbus_ring_buffer_info* ring_info) { uint32_t next = ring_info->ring_buffer->write_index; return (next); } /** * @brief Set the next write location for the specified ring buffer */ static inline void set_next_write_location( hv_vmbus_ring_buffer_info* ring_info, uint32_t next_write_location) { ring_info->ring_buffer->write_index = next_write_location; } /** * @brief Get the next read location for the specified ring buffer */ static inline uint32_t get_next_read_location(hv_vmbus_ring_buffer_info* ring_info) { uint32_t next = ring_info->ring_buffer->read_index; return (next); } /** * @brief Get the next read location + offset for the specified ring buffer. * This allows the caller to skip. */ static inline uint32_t get_next_read_location_with_offset( hv_vmbus_ring_buffer_info* ring_info, uint32_t offset) { uint32_t next = ring_info->ring_buffer->read_index; next += offset; next %= ring_info->ring_data_size; return (next); } /** * @brief Set the next read location for the specified ring buffer */ static inline void set_next_read_location( hv_vmbus_ring_buffer_info* ring_info, uint32_t next_read_location) { ring_info->ring_buffer->read_index = next_read_location; } /** * @brief Get the start of the ring buffer */ static inline void * get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info) { return (void *) ring_info->ring_buffer->buffer; } /** * @brief Get the size of the ring buffer. */ static inline uint32_t get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info) { return ring_info->ring_data_size; } /** * Get the read and write indices as uint64_t of the specified ring buffer. */ static inline uint64_t get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info) { return (uint64_t) ring_info->ring_buffer->write_index << 32; } +void +hv_ring_buffer_read_begin( + hv_vmbus_ring_buffer_info* ring_info) +{ + ring_info->ring_buffer->interrupt_mask = 1; + mb(); +} + +uint32_t +hv_ring_buffer_read_end( + hv_vmbus_ring_buffer_info* ring_info) +{ + uint32_t read, write; + + ring_info->ring_buffer->interrupt_mask = 0; + mb(); + + /* + * Now check to see if the ring buffer is still empty. + * If it is not, we raced and we need to process new + * incoming messages. + */ + get_ring_buffer_avail_bytes(ring_info, &read, &write); + + return (read); +} + +/* + * When we write to the ring buffer, check if the host needs to + * be signaled. Here is the details of this protocol: + * + * 1. The host guarantees that while it is draining the + * ring buffer, it will set the interrupt_mask to + * indicate it does not need to be interrupted when + * new data is placed. + * + * 2. The host guarantees that it will completely drain + * the ring buffer before exiting the read loop. Further, + * once the ring buffer is empty, it will clear the + * interrupt_mask and re-check to see if new data has + * arrived. + */ +static boolean_t +hv_ring_buffer_needsig_on_write( + uint32_t old_write_location, + hv_vmbus_ring_buffer_info* rbi) +{ + mb(); + if (rbi->ring_buffer->interrupt_mask) + return (FALSE); + + /* Read memory barrier */ + rmb(); + /* + * This is the only case we need to signal when the + * ring transitions from being empty to non-empty. + */ + if (old_write_location == rbi->ring_buffer->read_index) + return (TRUE); + + return (FALSE); +} + static uint32_t copy_to_ring_buffer( hv_vmbus_ring_buffer_info* ring_info, uint32_t start_write_offset, char* src, uint32_t src_len); static uint32_t copy_from_ring_buffer( hv_vmbus_ring_buffer_info* ring_info, char* dest, uint32_t dest_len, uint32_t start_read_offset); /** * @brief Get the interrupt mask for the specified ring buffer. */ uint32_t hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi) { return rbi->ring_buffer->interrupt_mask; } /** * @brief Initialize the ring buffer. */ int hv_vmbus_ring_buffer_init( hv_vmbus_ring_buffer_info* ring_info, void* buffer, uint32_t buffer_len) { memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info)); ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer; ring_info->ring_buffer->read_index = ring_info->ring_buffer->write_index = 0; ring_info->ring_size = buffer_len; ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer); mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN); return (0); } /** * @brief Cleanup the ring buffer. */ void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info) { mtx_destroy(&ring_info->ring_lock); } /** * @brief Write to the ring buffer. */ int hv_ring_buffer_write( hv_vmbus_ring_buffer_info* out_ring_info, hv_vmbus_sg_buffer_list sg_buffers[], - uint32_t sg_buffer_count) + uint32_t sg_buffer_count, + boolean_t *need_sig) { int i = 0; uint32_t byte_avail_to_write; uint32_t byte_avail_to_read; + uint32_t old_write_location; uint32_t total_bytes_to_write = 0; volatile uint32_t next_write_location; uint64_t prev_indices = 0; for (i = 0; i < sg_buffer_count; i++) { total_bytes_to_write += sg_buffers[i].length; } total_bytes_to_write += sizeof(uint64_t); mtx_lock_spin(&out_ring_info->ring_lock); get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read, &byte_avail_to_write); /* * If there is only room for the packet, assume it is full. * Otherwise, the next time around, we think the ring buffer * is empty since the read index == write index */ if (byte_avail_to_write <= total_bytes_to_write) { mtx_unlock_spin(&out_ring_info->ring_lock); return (EAGAIN); } /* * Write to the ring buffer */ next_write_location = get_next_write_location(out_ring_info); + old_write_location = next_write_location; + for (i = 0; i < sg_buffer_count; i++) { next_write_location = copy_to_ring_buffer(out_ring_info, next_write_location, (char *) sg_buffers[i].data, sg_buffers[i].length); } /* * Set previous packet start */ prev_indices = get_ring_buffer_indices(out_ring_info); next_write_location = copy_to_ring_buffer( out_ring_info, next_write_location, (char *) &prev_indices, sizeof(uint64_t)); /* - * Make sure we flush all writes before updating the writeIndex + * Full memory barrier before upding the write index. */ - wmb(); + mb(); /* * Now, update the write location */ set_next_write_location(out_ring_info, next_write_location); mtx_unlock_spin(&out_ring_info->ring_lock); + + *need_sig = hv_ring_buffer_needsig_on_write(old_write_location, + out_ring_info); return (0); } /** * @brief Read without advancing the read index. */ int hv_ring_buffer_peek( hv_vmbus_ring_buffer_info* in_ring_info, void* buffer, uint32_t buffer_len) { uint32_t bytesAvailToWrite; uint32_t bytesAvailToRead; uint32_t nextReadLocation = 0; mtx_lock_spin(&in_ring_info->ring_lock); get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead, &bytesAvailToWrite); /* * Make sure there is something to read */ if (bytesAvailToRead < buffer_len) { mtx_unlock_spin(&in_ring_info->ring_lock); return (EAGAIN); } /* * Convert to byte offset */ nextReadLocation = get_next_read_location(in_ring_info); nextReadLocation = copy_from_ring_buffer( in_ring_info, (char *)buffer, buffer_len, nextReadLocation); mtx_unlock_spin(&in_ring_info->ring_lock); return (0); } /** * @brief Read and advance the read index. */ int hv_ring_buffer_read( hv_vmbus_ring_buffer_info* in_ring_info, void* buffer, uint32_t buffer_len, uint32_t offset) { uint32_t bytes_avail_to_write; uint32_t bytes_avail_to_read; uint32_t next_read_location = 0; uint64_t prev_indices = 0; if (buffer_len <= 0) return (EINVAL); mtx_lock_spin(&in_ring_info->ring_lock); get_ring_buffer_avail_bytes( in_ring_info, &bytes_avail_to_read, &bytes_avail_to_write); /* * Make sure there is something to read */ if (bytes_avail_to_read < buffer_len) { mtx_unlock_spin(&in_ring_info->ring_lock); return (EAGAIN); } next_read_location = get_next_read_location_with_offset( in_ring_info, offset); next_read_location = copy_from_ring_buffer( in_ring_info, (char *) buffer, buffer_len, next_read_location); next_read_location = copy_from_ring_buffer( in_ring_info, (char *) &prev_indices, sizeof(uint64_t), next_read_location); /* * Make sure all reads are done before we update the read index since * the writer may start writing to the read area once the read index * is updated. */ wmb(); /* * Update the read index */ set_next_read_location(in_ring_info, next_read_location); mtx_unlock_spin(&in_ring_info->ring_lock); return (0); } /** * @brief Helper routine to copy from source to ring buffer. * * Assume there is enough room. Handles wrap-around in dest case only! */ uint32_t copy_to_ring_buffer( hv_vmbus_ring_buffer_info* ring_info, uint32_t start_write_offset, char* src, uint32_t src_len) { char *ring_buffer = get_ring_buffer(ring_info); uint32_t ring_buffer_size = get_ring_buffer_size(ring_info); uint32_t fragLen; if (src_len > ring_buffer_size - start_write_offset) { /* wrap-around detected! */ fragLen = ring_buffer_size - start_write_offset; memcpy(ring_buffer + start_write_offset, src, fragLen); memcpy(ring_buffer, src + fragLen, src_len - fragLen); } else { memcpy(ring_buffer + start_write_offset, src, src_len); } start_write_offset += src_len; start_write_offset %= ring_buffer_size; return (start_write_offset); } /** * @brief Helper routine to copy to source from ring buffer. * * Assume there is enough room. Handles wrap-around in src case only! */ uint32_t copy_from_ring_buffer( hv_vmbus_ring_buffer_info* ring_info, char* dest, uint32_t dest_len, uint32_t start_read_offset) { uint32_t fragLen; char *ring_buffer = get_ring_buffer(ring_info); uint32_t ring_buffer_size = get_ring_buffer_size(ring_info); if (dest_len > ring_buffer_size - start_read_offset) { /* wrap-around detected at the src */ fragLen = ring_buffer_size - start_read_offset; memcpy(dest, ring_buffer + start_read_offset, fragLen); memcpy(dest + fragLen, ring_buffer, dest_len - fragLen); } else { memcpy(dest, ring_buffer + start_read_offset, dest_len); } start_read_offset += dest_len; start_read_offset %= ring_buffer_size; return (start_read_offset); } Property changes on: head/sys/dev/hyperv/vmbus/hv_ring_buffer.c ___________________________________________________________________ Added: fbsd:nokeywords ## -0,0 +1 ## +yes \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c (revision 282211) +++ head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c (revision 282212) @@ -1,608 +1,732 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * VM Bus Driver Implementation */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include +#include #include "hv_vmbus_priv.h" #define VMBUS_IRQ 0x5 -static struct intr_event *hv_msg_intr_event; -static struct intr_event *hv_event_intr_event; -static void *msg_swintr; -static void *event_swintr; static device_t vmbus_devp; -static void *vmbus_cookiep; -static int vmbus_rid; -struct resource *intr_res; -static int vmbus_irq = VMBUS_IRQ; static int vmbus_inited; static hv_setup_args setup_args; /* only CPU 0 supported at this time */ /** * @brief Software interrupt thread routine to handle channel messages from * the hypervisor. */ static void -vmbus_msg_swintr(void *dummy) +vmbus_msg_swintr(void *arg) { int cpu; void* page_addr; hv_vmbus_message* msg; hv_vmbus_message* copied; - cpu = PCPU_GET(cpuid); + cpu = (int)(long)arg; + KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: " + "cpu out of range!")); + page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; for (;;) { if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { break; /* no message */ } else { copied = malloc(sizeof(hv_vmbus_message), M_DEVBUF, M_NOWAIT); KASSERT(copied != NULL, ("Error VMBUS: malloc failed to allocate" " hv_vmbus_message!")); if (copied == NULL) continue; memcpy(copied, msg, sizeof(hv_vmbus_message)); hv_queue_work_item(hv_vmbus_g_connection.work_queue, hv_vmbus_on_channel_message, copied); } msg->header.message_type = HV_MESSAGE_TYPE_NONE; /* * Make sure the write to message_type (ie set to * HV_MESSAGE_TYPE_NONE) happens before we read the * message_pending and EOMing. Otherwise, the EOMing will * not deliver any more messages * since there is no empty slot */ wmb(); if (msg->header.message_flags.u.message_pending) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(HV_X64_MSR_EOM, 0); } } } /** * @brief Interrupt filter routine for VMBUS. * * The purpose of this routine is to determine the type of VMBUS protocol * message to process - an event or a channel message. - * As this is an interrupt filter routine, the function runs in a very - * restricted envinronment. From the manpage for bus_setup_intr(9) - * - * In this restricted environment, care must be taken to account for all - * races. A careful analysis of races should be done as well. It is gener- - * ally cheaper to take an extra interrupt, for example, than to protect - * variables with spinlocks. Read, modify, write cycles of hardware regis- - * ters need to be carefully analyzed if other threads are accessing the - * same registers. */ -static int +static inline int hv_vmbus_isr(void *unused) { int cpu; hv_vmbus_message* msg; hv_vmbus_synic_event_flags* event; void* page_addr; cpu = PCPU_GET(cpuid); - /* (Temporary limit) */ - KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero")); /* * The Windows team has advised that we check for events * before checking for messages. This is the way they do it * in Windows when running as a guest in Hyper-V */ page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; event = (hv_vmbus_synic_event_flags*) page_addr + HV_VMBUS_MESSAGE_SINT; - /* Since we are a child, we only need to check bit 0 */ - if (synch_test_and_clear_bit(0, &event->flags32[0])) { - swi_sched(event_swintr, 0); + if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || + (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { + /* Since we are a child, we only need to check bit 0 */ + if (synch_test_and_clear_bit(0, &event->flags32[0])) { + swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0); + } + } else { + /* + * On host with Win8 or above, we can directly look at + * the event page. If bit n is set, we have an interrupt + * on the channel with id n. + * Directly schedule the event software interrupt on + * current cpu. + */ + swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0); } /* Check if there are actual msgs to be process */ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { - swi_sched(msg_swintr, 0); + swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0); } return FILTER_HANDLED; } +#ifdef HV_DEBUG_INTR +uint32_t hv_intr_count = 0; +#endif +uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; +uint32_t hv_vmbus_intr_cpu[MAXCPU]; + +void +hv_vector_handler(struct trapframe *trap_frame) +{ +#ifdef HV_DEBUG_INTR + int cpu; +#endif + + /* + * Disable preemption. + */ + critical_enter(); + +#ifdef HV_DEBUG_INTR + /* + * Do a little interrupt counting. + */ + cpu = PCPU_GET(cpuid); + hv_vmbus_intr_cpu[cpu]++; + hv_intr_count++; +#endif + + hv_vmbus_isr(NULL); + + /* + * Enable preemption. + */ + critical_exit(); +} + static int vmbus_read_ivar( device_t dev, device_t child, int index, uintptr_t* result) { struct hv_device *child_dev_ctx = device_get_ivars(child); switch (index) { case HV_VMBUS_IVAR_TYPE: *result = (uintptr_t) &child_dev_ctx->class_id; return (0); case HV_VMBUS_IVAR_INSTANCE: *result = (uintptr_t) &child_dev_ctx->device_id; return (0); case HV_VMBUS_IVAR_DEVCTX: *result = (uintptr_t) child_dev_ctx; return (0); case HV_VMBUS_IVAR_NODE: *result = (uintptr_t) child_dev_ctx->device; return (0); } return (ENOENT); } static int vmbus_write_ivar( device_t dev, device_t child, int index, uintptr_t value) { switch (index) { case HV_VMBUS_IVAR_TYPE: case HV_VMBUS_IVAR_INSTANCE: case HV_VMBUS_IVAR_DEVCTX: case HV_VMBUS_IVAR_NODE: /* read-only */ return (EINVAL); } return (ENOENT); } struct hv_device* hv_vmbus_child_device_create( hv_guid type, hv_guid instance, hv_vmbus_channel* channel) { hv_device* child_dev; /* * Allocate the new child device */ child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(child_dev != NULL, ("Error VMBUS: malloc failed to allocate hv_device!")); if (child_dev == NULL) return (NULL); child_dev->channel = channel; memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); return (child_dev); } static void print_dev_guid(struct hv_device *dev) { int i; unsigned char guid_name[100]; for (i = 0; i < 32; i += 2) sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); if(bootverbose) printf("VMBUS: Class ID: %s\n", guid_name); } int hv_vmbus_child_device_register(struct hv_device *child_dev) { device_t child; int ret = 0; print_dev_guid(child_dev); child = device_add_child(vmbus_devp, NULL, -1); child_dev->device = child; device_set_ivars(child, child_dev); mtx_lock(&Giant); ret = device_probe_and_attach(child); mtx_unlock(&Giant); return (0); } int hv_vmbus_child_device_unregister(struct hv_device *child_dev) { int ret = 0; /* * XXXKYS: Ensure that this is the opposite of * device_add_child() */ mtx_lock(&Giant); ret = device_delete_child(vmbus_devp, child_dev->device); mtx_unlock(&Giant); return(ret); } static void vmbus_identify(driver_t *driver, device_t parent) { if (!hv_vmbus_query_hypervisor_presence()) return; vm_guest = VM_GUEST_HV; BUS_ADD_CHILD(parent, 0, "vmbus", 0); } static int vmbus_probe(device_t dev) { if(bootverbose) device_printf(dev, "VMBUS: probe\n"); device_set_desc(dev, "Vmbus Devices"); return (BUS_PROBE_NOWILDCARD); } +#ifdef HYPERV +extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback); + /** + * @brief Find a free IDT slot and setup the interrupt handler. + */ +static int +vmbus_vector_alloc(void) +{ + int vector; + uintptr_t func; + struct gate_descriptor *ip; + + /* + * Search backwards form the highest IDT vector available for use + * as vmbus channel callback vector. We install 'hv_vmbus_callback' + * handler at that vector and use it to interrupt vcpus. + */ + vector = APIC_SPURIOUS_INT; + while (--vector >= APIC_IPI_INTS) { + ip = &idt[vector]; + func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); + if (func == (uintptr_t)&IDTVEC(rsvd)) { +#ifdef __i386__ + setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT, + SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#else + setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT, + SEL_KPL, 0); +#endif + + return (vector); + } + } + return (0); +} + +/** + * @brief Restore the IDT slot to rsvd. + */ +static void +vmbus_vector_free(int vector) +{ + uintptr_t func; + struct gate_descriptor *ip; + + if (vector == 0) + return; + + KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT, + ("invalid vector %d", vector)); + + ip = &idt[vector]; + func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); + KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback), + ("invalid vector %d", vector)); + + setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); +} + +#else /* HYPERV */ + +static int +vmbus_vector_alloc(void) +{ + return(0); +} + +static void +vmbus_vector_free(int vector) +{ +} + +#endif /* HYPERV */ + +/** * @brief Main vmbus driver initialization routine. * * Here, we * - initialize the vmbus driver context * - setup various driver entry points * - invoke the vmbus hv main init routine * - get the irq resource * - invoke the vmbus to add the vmbus root device * - setup the vmbus root device * - retrieve the channel offers */ static int vmbus_bus_init(void) { - struct ioapic_intsrc { - struct intsrc io_intsrc; - u_int io_irq; - u_int io_intpin:8; - u_int io_vector:8; - u_int io_cpu:8; - u_int io_activehi:1; - u_int io_edgetrigger:1; - u_int io_masked:1; - int io_bus:4; - uint32_t io_lowreg; - }; - int i, ret; - unsigned int vector = 0; - struct intsrc *isrc; - struct ioapic_intsrc *intpin; + int i, j, n, ret; if (vmbus_inited) return (0); vmbus_inited = 1; ret = hv_vmbus_init(); if (ret) { if(bootverbose) printf("Error VMBUS: Hypervisor Initialization Failed!\n"); return (ret); } - ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr, - NULL, SWI_CLOCK, 0, &msg_swintr); - - if (ret) - goto cleanup; - /* - * Message SW interrupt handler checks a per-CPU page and - * thus the thread needs to be bound to CPU-0 - which is where - * all interrupts are processed. + * Find a free IDT slot for vmbus callback. */ - ret = intr_event_bind(hv_msg_intr_event, 0); + hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc(); - if (ret) - goto cleanup1; + if (hv_vmbus_g_context.hv_cb_vector == 0) { + if(bootverbose) + printf("Error VMBUS: Cannot find free IDT slot for " + "vmbus callback!\n"); + goto cleanup; + } - ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events, - NULL, SWI_CLOCK, 0, &event_swintr); + if(bootverbose) + printf("VMBUS: vmbus callback vector %d\n", + hv_vmbus_g_context.hv_cb_vector); - if (ret) - goto cleanup1; + /* + * Notify the hypervisor of our vector. + */ + setup_args.vector = hv_vmbus_g_context.hv_cb_vector; - intr_res = bus_alloc_resource(vmbus_devp, - SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE); + CPU_FOREACH(j) { + hv_vmbus_intr_cpu[j] = 0; + hv_vmbus_swintr_event_cpu[j] = 0; + hv_vmbus_g_context.hv_event_intr_event[j] = NULL; + hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; + hv_vmbus_g_context.event_swintr[j] = NULL; + hv_vmbus_g_context.msg_swintr[j] = NULL; - if (intr_res == NULL) { - ret = ENOMEM; /* XXXKYS: Need a better errno */ - goto cleanup2; + for (i = 0; i < 2; i++) + setup_args.page_buffers[2 * j + i] = NULL; } /* - * Setup interrupt filter handler + * Per cpu setup. */ - ret = bus_setup_intr(vmbus_devp, intr_res, - INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL, - NULL, &vmbus_cookiep); + CPU_FOREACH(j) { + /* + * Setup software interrupt thread and handler for msg handling. + */ + ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j], + "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0, + &hv_vmbus_g_context.msg_swintr[j]); + if (ret) { + if(bootverbose) + printf("VMBUS: failed to setup msg swi for " + "cpu %d\n", j); + goto cleanup1; + } - if (ret != 0) - goto cleanup3; + /* + * Bind the swi thread to the cpu. + */ + ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j], + j); + if (ret) { + if(bootverbose) + printf("VMBUS: failed to bind msg swi thread " + "to cpu %d\n", j); + goto cleanup1; + } - ret = bus_bind_intr(vmbus_devp, intr_res, 0); - if (ret != 0) - goto cleanup4; + /* + * Setup software interrupt thread and handler for + * event handling. + */ + ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j], + "hv_event", hv_vmbus_on_events, (void *)(long)j, + SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]); + if (ret) { + if(bootverbose) + printf("VMBUS: failed to setup event swi for " + "cpu %d\n", j); + goto cleanup1; + } - isrc = intr_lookup_source(vmbus_irq); - if ((isrc == NULL) || (isrc->is_event == NULL)) { - ret = EINVAL; - goto cleanup4; - } - - /* vector = isrc->is_event->ie_vector; */ - intpin = (struct ioapic_intsrc *)isrc; - vector = intpin->io_vector; - - if(bootverbose) - printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector); - - /** - * Notify the hypervisor of our irq. - */ - setup_args.vector = vector; - for(i = 0; i < 2; i++) { - setup_args.page_buffers[i] = + /* + * Prepare the per cpu msg and event pages to be called on each cpu. + */ + for(i = 0; i < 2; i++) { + setup_args.page_buffers[2 * j + i] = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); - if (setup_args.page_buffers[i] == NULL) { - KASSERT(setup_args.page_buffers[i] != NULL, + if (setup_args.page_buffers[2 * j + i] == NULL) { + KASSERT(setup_args.page_buffers[2 * j + i] != NULL, ("Error VMBUS: malloc failed!")); - if (i > 0) - free(setup_args.page_buffers[0], M_DEVBUF); - goto cleanup4; + goto cleanup1; + } } } - /* only CPU #0 supported at this time */ + if (bootverbose) + printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n", + smp_started); + smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); /* * Connect to VMBus in the root partition */ ret = hv_vmbus_connect(); if (ret != 0) - goto cleanup4; + goto cleanup1; hv_vmbus_request_channel_offers(); return (ret); - cleanup4: + cleanup1: + /* + * Free pages alloc'ed + */ + for (n = 0; n < 2 * MAXCPU; n++) + if (setup_args.page_buffers[n] != NULL) + free(setup_args.page_buffers[n], M_DEVBUF); /* - * remove swi, bus and intr resource + * remove swi and vmbus callback vector; */ - bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); + CPU_FOREACH(j) { + if (hv_vmbus_g_context.msg_swintr[j] != NULL) + swi_remove(hv_vmbus_g_context.msg_swintr[j]); + if (hv_vmbus_g_context.event_swintr[j] != NULL) + swi_remove(hv_vmbus_g_context.event_swintr[j]); + hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; + hv_vmbus_g_context.hv_event_intr_event[j] = NULL; + } - cleanup3: - bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); + vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); - cleanup2: - swi_remove(event_swintr); - - cleanup1: - swi_remove(msg_swintr); - cleanup: hv_vmbus_cleanup(); return (ret); } static int vmbus_attach(device_t dev) { if(bootverbose) device_printf(dev, "VMBUS: attach dev: %p\n", dev); vmbus_devp = dev; /* * If the system has already booted and thread * scheduling is possible indicated by the global * cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); return (0); } static void vmbus_init(void) { if (vm_guest != VM_GUEST_HV) return; /* * If the system has already booted and thread * scheduling is possible, as indicated by the * global cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); } static void vmbus_bus_exit(void) { int i; hv_vmbus_release_unattached_channels(); hv_vmbus_disconnect(); smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); - for(i = 0; i < 2; i++) { + for(i = 0; i < 2 * MAXCPU; i++) { if (setup_args.page_buffers[i] != 0) free(setup_args.page_buffers[i], M_DEVBUF); } hv_vmbus_cleanup(); - /* remove swi, bus and intr resource */ - bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep); + /* remove swi */ + CPU_FOREACH(i) { + if (hv_vmbus_g_context.msg_swintr[i] != NULL) + swi_remove(hv_vmbus_g_context.msg_swintr[i]); + if (hv_vmbus_g_context.event_swintr[i] != NULL) + swi_remove(hv_vmbus_g_context.event_swintr[i]); + hv_vmbus_g_context.hv_msg_intr_event[i] = NULL; + hv_vmbus_g_context.hv_event_intr_event[i] = NULL; + } - bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res); + vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); - swi_remove(msg_swintr); - swi_remove(event_swintr); - return; } static void vmbus_exit(void) { vmbus_bus_exit(); } static int vmbus_detach(device_t dev) { vmbus_exit(); return (0); } static void vmbus_mod_load(void) { if(bootverbose) printf("VMBUS: load\n"); } static void vmbus_mod_unload(void) { if(bootverbose) printf("VMBUS: unload\n"); } static int vmbus_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: vmbus_mod_load(); break; case MOD_UNLOAD: vmbus_mod_unload(); break; } return (0); } static device_method_t vmbus_methods[] = { /** Device interface */ DEVMETHOD(device_identify, vmbus_identify), DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /** Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, vmbus_read_ivar), DEVMETHOD(bus_write_ivar, vmbus_write_ivar), { 0, 0 } }; static char driver_name[] = "vmbus"; static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; devclass_t vmbus_devclass; DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); MODULE_VERSION(vmbus,1); -/* TODO: We want to be earlier than SI_SUB_VFS */ -SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL); +/* We want to be started after SMP is initialized */ +SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL); Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h =================================================================== --- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 282211) +++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 282212) @@ -1,724 +1,715 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HYPERV_PRIV_H__ #define __HYPERV_PRIV_H__ #include #include #include #include #include /* * Status codes for hypervisor operations. */ typedef uint16_t hv_vmbus_status; #define HV_MESSAGE_SIZE (256) #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) #define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) #define HV_ANY_VP (0xFFFFFFFF) /* * Synthetic interrupt controller flag constants. */ #define HV_EVENT_FLAGS_COUNT (256 * 8) #define HV_EVENT_FLAGS_BYTE_COUNT (256) #define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t)) /* * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS * MessageText: * You did not supply enough message buffers to send a message. */ #define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013) typedef void (*hv_vmbus_channel_callback)(void *context); typedef struct { void* data; uint32_t length; } hv_vmbus_sg_buffer_list; typedef struct { uint32_t current_interrupt_mask; uint32_t current_read_index; uint32_t current_write_index; uint32_t bytes_avail_to_read; uint32_t bytes_avail_to_write; } hv_vmbus_ring_buffer_debug_info; typedef struct { uint32_t rel_id; hv_vmbus_channel_state state; hv_guid interface_type; hv_guid interface_instance; uint32_t monitor_id; uint32_t server_monitor_pending; uint32_t server_monitor_latency; uint32_t server_monitor_connection_id; uint32_t client_monitor_pending; uint32_t client_monitor_latency; uint32_t client_monitor_connection_id; hv_vmbus_ring_buffer_debug_info inbound; hv_vmbus_ring_buffer_debug_info outbound; } hv_vmbus_channel_debug_info; typedef union { hv_vmbus_channel_version_supported version_supported; hv_vmbus_channel_open_result open_result; hv_vmbus_channel_gpadl_torndown gpadl_torndown; hv_vmbus_channel_gpadl_created gpadl_created; hv_vmbus_channel_version_response version_response; } hv_vmbus_channel_msg_response; /* * Represents each channel msg on the vmbus connection * This is a variable-size data structure depending on * the msg type itself */ typedef struct hv_vmbus_channel_msg_info { /* * Bookkeeping stuff */ TAILQ_ENTRY(hv_vmbus_channel_msg_info) msg_list_entry; /* * So far, this is only used to handle * gpadl body message */ TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor; /* * Synchronize the request/response if * needed. * KYS: Use a semaphore for now. * Not perf critical. */ struct sema wait_sema; hv_vmbus_channel_msg_response response; uint32_t message_size; /** * The channel message that goes out on * the "wire". It will contain at * minimum the * hv_vmbus_channel_msg_header * header. */ unsigned char msg[0]; } hv_vmbus_channel_msg_info; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_page_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT]; } __packed hv_vmbus_channel_packet_page_buffer; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_multipage_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; /* Always 1 in this case */ hv_vmbus_multipage_buffer range; } __packed hv_vmbus_channel_packet_multipage_buffer; enum { HV_VMBUS_MESSAGE_CONNECTION_ID = 1, HV_VMBUS_MESSAGE_PORT_ID = 1, HV_VMBUS_EVENT_CONNECTION_ID = 2, HV_VMBUS_EVENT_PORT_ID = 2, HV_VMBUS_MONITOR_CONNECTION_ID = 3, HV_VMBUS_MONITOR_PORT_ID = 3, HV_VMBUS_MESSAGE_SINT = 2 }; #define HV_PRESENT_BIT 0x80000000 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t) -/* - * Connection identifier type - */ -typedef union { - uint32_t as_uint32_t; - struct { - uint32_t id:24; - uint32_t reserved:8; - } u; - -} __packed hv_vmbus_connection_id; - -/* - * Definition of the hv_vmbus_signal_event hypercall input structure - */ typedef struct { - hv_vmbus_connection_id connection_id; - uint16_t flag_number; - uint16_t rsvd_z; -} __packed hv_vmbus_input_signal_event; - -typedef struct { - uint64_t align8; - hv_vmbus_input_signal_event event; -} __packed hv_vmbus_input_signal_event_buffer; - -typedef struct { uint64_t guest_id; void* hypercall_page; hv_bool_uint8_t syn_ic_initialized; + + hv_vmbus_handle syn_ic_msg_page[MAXCPU]; + hv_vmbus_handle syn_ic_event_page[MAXCPU]; /* - * This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall. - * The input param is immutable in our usage and - * must be dynamic mem (vs stack or global). + * For FreeBSD cpuid to Hyper-V vcpuid mapping. */ - hv_vmbus_input_signal_event_buffer *signal_event_buffer; + uint32_t hv_vcpu_index[MAXCPU]; /* - * 8-bytes aligned of the buffer above + * Each cpu has its own software interrupt handler for channel + * event and msg handling. */ - hv_vmbus_input_signal_event *signal_event_param; - - hv_vmbus_handle syn_ic_msg_page[MAXCPU]; - hv_vmbus_handle syn_ic_event_page[MAXCPU]; + struct intr_event *hv_event_intr_event[MAXCPU]; + struct intr_event *hv_msg_intr_event[MAXCPU]; + void *event_swintr[MAXCPU]; + void *msg_swintr[MAXCPU]; + /* + * Host use this vector to intrrupt guest for vmbus channel + * event and msg. + */ + unsigned int hv_cb_vector; } hv_vmbus_context; /* * Define hypervisor message types */ typedef enum { HV_MESSAGE_TYPE_NONE = 0x00000000, /* * Memory access messages */ HV_MESSAGE_TYPE_UNMAPPED_GPA = 0x80000000, HV_MESSAGE_TYPE_GPA_INTERCEPT = 0x80000001, /* * Timer notification messages */ HV_MESSAGE_TIMER_EXPIRED = 0x80000010, /* * Error messages */ HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE = 0x80000020, HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION = 0x80000021, HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE = 0x80000022, /* * Trace buffer complete messages */ HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE = 0x80000040, /* * Platform-specific processor intercept messages */ HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT = 0x80010000, HV_MESSAGE_TYPE_X64_MSR_INTERCEPT = 0x80010001, HV_MESSAGE_TYPE_X64_CPU_INTERCEPT = 0x80010002, HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT = 0x80010003, HV_MESSAGE_TYPE_X64_APIC_EOI = 0x80010004, HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR = 0x80010005 } hv_vmbus_msg_type; /* * Define port identifier type */ typedef union _hv_vmbus_port_id { uint32_t as_uint32_t; struct { uint32_t id:24; uint32_t reserved:8; } u ; } hv_vmbus_port_id; /* * Define synthetic interrupt controller message flag */ typedef union { uint8_t as_uint8_t; struct { uint8_t message_pending:1; uint8_t reserved:7; } u; } hv_vmbus_msg_flags; typedef uint64_t hv_vmbus_partition_id; /* * Define synthetic interrupt controller message header */ typedef struct { hv_vmbus_msg_type message_type; uint8_t payload_size; hv_vmbus_msg_flags message_flags; uint8_t reserved[2]; union { hv_vmbus_partition_id sender; hv_vmbus_port_id port; } u; } hv_vmbus_msg_header; /* * Define synthetic interrupt controller message format */ typedef struct { hv_vmbus_msg_header header; union { uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; } u ; } hv_vmbus_message; /* * Maximum channels is determined by the size of the interrupt * page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for * send endpoint interrupt and the other is receive * endpoint interrupt. * * Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels */ #define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3 /* * (The value here must be in multiple of 32) */ #define HV_MAX_NUM_CHANNELS_SUPPORTED 256 /* * VM Bus connection states */ typedef enum { HV_DISCONNECTED, HV_CONNECTING, HV_CONNECTED, HV_DISCONNECTING } hv_vmbus_connect_state; #define HV_MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT typedef struct { hv_vmbus_connect_state connect_state; uint32_t next_gpadl_handle; /** * Represents channel interrupts. Each bit position * represents a channel. * When a channel sends an interrupt via VMBUS, it * finds its bit in the send_interrupt_page, set it and * calls Hv to generate a port event. The other end * receives the port event and parse the * recv_interrupt_page to see which bit is set */ void *interrupt_page; void *send_interrupt_page; void *recv_interrupt_page; /* * 2 pages - 1st page for parent->child * notification and 2nd is child->parent * notification */ void *monitor_pages; TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor; struct mtx channel_msg_lock; /** - * List of channels + * List of primary channels. Sub channels will be linked + * under their primary channel. */ TAILQ_HEAD(, hv_vmbus_channel) channel_anchor; struct mtx channel_lock; hv_vmbus_handle work_queue; struct sema control_sema; } hv_vmbus_connection; /* * Declare the MSR used to identify the guest OS */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 typedef union { uint64_t as_uint64_t; struct { uint64_t build_number : 16; uint64_t service_version : 8; /* Service Pack, etc. */ uint64_t minor_version : 8; uint64_t major_version : 8; /* * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS) * HV_GUEST_OS_VENDOR */ uint64_t os_id : 8; uint64_t vendor_id : 16; } u; } hv_vmbus_x64_msr_guest_os_id_contents; /* * Declare the MSR used to setup pages used to communicate with the hypervisor */ #define HV_X64_MSR_HYPERCALL 0x40000001 typedef union { uint64_t as_uint64_t; struct { uint64_t enable :1; uint64_t reserved :11; uint64_t guest_physical_address :52; } u; } hv_vmbus_x64_msr_hypercall_contents; typedef union { uint32_t as_uint32_t; struct { uint32_t group_enable :4; uint32_t rsvd_z :28; } u; } hv_vmbus_monitor_trigger_state; typedef union { uint64_t as_uint64_t; struct { uint32_t pending; uint32_t armed; } u; } hv_vmbus_monitor_trigger_group; typedef struct { hv_vmbus_connection_id connection_id; uint16_t flag_number; uint16_t rsvd_z; } hv_vmbus_monitor_parameter; /* * hv_vmbus_monitor_page Layout * ------------------------------------------------------ * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) | * | 8 | trigger_group[0] | * | 10 | trigger_group[1] | * | 18 | trigger_group[2] | * | 20 | trigger_group[3] | * | 28 | Rsvd2[0] | * | 30 | Rsvd2[1] | * | 38 | Rsvd2[2] | * | 40 | next_check_time[0][0] | next_check_time[0][1] | * | ... | * | 240 | latency[0][0..3] | * | 340 | Rsvz3[0] | * | 440 | parameter[0][0] | * | 448 | parameter[0][1] | * | ... | * | 840 | Rsvd4[0] | * ------------------------------------------------------ */ typedef struct { hv_vmbus_monitor_trigger_state trigger_state; uint32_t rsvd_z1; hv_vmbus_monitor_trigger_group trigger_group[4]; uint64_t rsvd_z2[3]; int32_t next_check_time[4][32]; uint16_t latency[4][32]; uint64_t rsvd_z3[32]; hv_vmbus_monitor_parameter parameter[4][32]; uint8_t rsvd_z4[1984]; } hv_vmbus_monitor_page; /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES). */ typedef enum { HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001, HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000, HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001, /* * The remaining functions depend on the value * of hv_cpu_id_function_interface */ HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002, HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003, HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004, HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005 } hv_vmbus_cpuid_function; /* * Define the format of the SIMP register */ typedef union { uint64_t as_uint64_t; struct { uint64_t simp_enabled : 1; uint64_t preserved : 11; uint64_t base_simp_gpa : 52; } u; } hv_vmbus_synic_simp; /* * Define the format of the SIEFP register */ typedef union { uint64_t as_uint64_t; struct { uint64_t siefp_enabled : 1; uint64_t preserved : 11; uint64_t base_siefp_gpa : 52; } u; } hv_vmbus_synic_siefp; /* * Define synthetic interrupt source */ typedef union { uint64_t as_uint64_t; struct { uint64_t vector : 8; uint64_t reserved1 : 8; uint64_t masked : 1; uint64_t auto_eoi : 1; uint64_t reserved2 : 46; } u; } hv_vmbus_synic_sint; /* * Define syn_ic control register */ typedef union _hv_vmbus_synic_scontrol { uint64_t as_uint64_t; struct { uint64_t enable : 1; uint64_t reserved : 63; } u; } hv_vmbus_synic_scontrol; /* * Define the hv_vmbus_post_message hypercall input structure */ typedef struct { hv_vmbus_connection_id connection_id; uint32_t reserved; hv_vmbus_msg_type message_type; uint32_t payload_size; uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; } hv_vmbus_input_post_message; /* * Define the synthetic interrupt controller event flags format */ typedef union { uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT]; uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT]; } hv_vmbus_synic_event_flags; +/* MSR used to provide vcpu index */ +#define HV_X64_MSR_VP_INDEX (0x40000002) /* * Define synthetic interrupt controller model specific registers */ #define HV_X64_MSR_SCONTROL (0x40000080) #define HV_X64_MSR_SVERSION (0x40000081) #define HV_X64_MSR_SIEFP (0x40000082) #define HV_X64_MSR_SIMP (0x40000083) #define HV_X64_MSR_EOM (0x40000084) #define HV_X64_MSR_SINT0 (0x40000090) #define HV_X64_MSR_SINT1 (0x40000091) #define HV_X64_MSR_SINT2 (0x40000092) #define HV_X64_MSR_SINT3 (0x40000093) #define HV_X64_MSR_SINT4 (0x40000094) #define HV_X64_MSR_SINT5 (0x40000095) #define HV_X64_MSR_SINT6 (0x40000096) #define HV_X64_MSR_SINT7 (0x40000097) #define HV_X64_MSR_SINT8 (0x40000098) #define HV_X64_MSR_SINT9 (0x40000099) #define HV_X64_MSR_SINT10 (0x4000009A) #define HV_X64_MSR_SINT11 (0x4000009B) #define HV_X64_MSR_SINT12 (0x4000009C) #define HV_X64_MSR_SINT13 (0x4000009D) #define HV_X64_MSR_SINT14 (0x4000009E) #define HV_X64_MSR_SINT15 (0x4000009F) /* * Declare the various hypercall operations */ typedef enum { HV_CALL_POST_MESSAGE = 0x005c, HV_CALL_SIGNAL_EVENT = 0x005d, } hv_vmbus_call_code; /** * Global variables */ extern hv_vmbus_context hv_vmbus_g_context; extern hv_vmbus_connection hv_vmbus_g_connection; /* * Private, VM Bus functions */ int hv_vmbus_ring_buffer_init( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); void hv_ring_buffer_cleanup( hv_vmbus_ring_buffer_info *ring_info); int hv_ring_buffer_write( hv_vmbus_ring_buffer_info *ring_info, hv_vmbus_sg_buffer_list sg_buffers[], - uint32_t sg_buff_count); + uint32_t sg_buff_count, + boolean_t *need_sig); int hv_ring_buffer_peek( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); int hv_ring_buffer_read( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len, uint32_t offset); uint32_t hv_vmbus_get_ring_buffer_interrupt_mask( hv_vmbus_ring_buffer_info *ring_info); void hv_vmbus_dump_ring_info( hv_vmbus_ring_buffer_info *ring_info, char *prefix); +void hv_ring_buffer_read_begin( + hv_vmbus_ring_buffer_info *ring_info); + +uint32_t hv_ring_buffer_read_end( + hv_vmbus_ring_buffer_info *ring_info); + hv_vmbus_channel* hv_vmbus_allocate_channel(void); void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel); void hv_vmbus_on_channel_message(void *context); int hv_vmbus_request_channel_offers(void); void hv_vmbus_release_unattached_channels(void); int hv_vmbus_init(void); void hv_vmbus_cleanup(void); uint16_t hv_vmbus_post_msg_via_msg_ipc( hv_vmbus_connection_id connection_id, hv_vmbus_msg_type message_type, void *payload, size_t payload_size); -uint16_t hv_vmbus_signal_event(void); +uint16_t hv_vmbus_signal_event(void *con_id); void hv_vmbus_synic_init(void *irq_arg); void hv_vmbus_synic_cleanup(void *arg); int hv_vmbus_query_hypervisor_presence(void); struct hv_device* hv_vmbus_child_device_create( hv_guid device_type, hv_guid device_instance, hv_vmbus_channel *channel); int hv_vmbus_child_device_register( struct hv_device *child_dev); int hv_vmbus_child_device_unregister( struct hv_device *child_dev); hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id); /** * Connection interfaces */ int hv_vmbus_connect(void); int hv_vmbus_disconnect(void); int hv_vmbus_post_message(void *buffer, size_t buf_size); -int hv_vmbus_set_event(uint32_t child_rel_id); +int hv_vmbus_set_event(hv_vmbus_channel *channel); void hv_vmbus_on_events(void *); /* * The guest OS needs to register the guest ID with the hypervisor. * The guest ID is a 64 bit entity and the structure of this ID is * specified in the Hyper-V specification: * * http://msdn.microsoft.com/en-us/library/windows/ * hardware/ff542653%28v=vs.85%29.aspx * * While the current guideline does not specify how FreeBSD guest ID(s) * need to be generated, our plan is to publish the guidelines for * FreeBSD and other guest operating systems that currently are hosted * on Hyper-V. The implementation here conforms to this yet * unpublished guidelines. * * Bit(s) * 63 - Indicates if the OS is Open Source or not; 1 is Open Source * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200 * 55:48 - Distro specific identification * 47:16 - FreeBSD kernel version number * 15:0 - Distro specific identification * */ #define HV_FREEBSD_VENDOR_ID 0x8200 #define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0) static inline uint64_t hv_generate_guest_id( uint8_t distro_id_part1, uint16_t distro_id_part2) { uint64_t guest_id; guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48); guest_id |= (((uint64_t)(distro_id_part1)) << 48); guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */ guest_id |= ((uint64_t)(distro_id_part2)); return guest_id; } typedef struct { unsigned int vector; - void *page_buffers[2]; + void *page_buffers[2 * MAXCPU]; } hv_setup_args; #endif /* __HYPERV_PRIV_H__ */ Index: head/sys/i386/conf/GENERIC =================================================================== --- head/sys/i386/conf/GENERIC (revision 282211) +++ head/sys/i386/conf/GENERIC (revision 282212) @@ -1,370 +1,372 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/i386 # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu I486_CPU cpu I586_CPU cpu I686_CPU ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_PART_GPT # GUID Partition Tables. options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options COMPAT_FREEBSD4 # Compatible with FreeBSD4 options COMPAT_FREEBSD5 # Compatible with FreeBSD5 options COMPAT_FREEBSD6 # Compatible with FreeBSD6 options COMPAT_FREEBSD7 # Compatible with FreeBSD7 options COMPAT_FREEBSD9 # Compatible with FreeBSD9 options COMPAT_FREEBSD10 # Compatible with FreeBSD10 options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS options WITNESS # Enable checks to detect deadlocks and cycles options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones # To make an SMP kernel, the next two lines are needed options SMP # Symmetric MultiProcessor Kernel device apic # I/O APIC # CPU frequency control device cpufreq # Bus support. device acpi device pci options PCI_IOV # PCI SR-IOV support # Floppy drives device fdc # ATA controllers device ahci # AHCI-compatible SATA controllers device ata # Legacy ATA/SATA controllers options ATA_STATIC_ID # Static device numbering device mvs # Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA device siis # SiliconImage SiI3124/SiI3132/SiI3531 SATA # SCSI Controllers device ahc # AHA2940 and onboard AIC7xxx devices options AHC_REG_PRETTY_PRINT # Print register bitfields in debug # output. Adds ~128k to driver. device ahd # AHA39320/29320 and onboard AIC79xx devices options AHD_REG_PRETTY_PRINT # Print register bitfields in debug # output. Adds ~215k to driver. device esp # AMD Am53C974 (Tekram DC-390(T)) device hptiop # Highpoint RocketRaid 3xxx series device isp # Qlogic family #device ispfw # Firmware for QLogic HBAs- normally a module device mpt # LSI-Logic MPT-Fusion device mps # LSI-Logic MPT-Fusion 2 device mpr # LSI-Logic MPT-Fusion 3 #device ncr # NCR/Symbios Logic device sym # NCR/Symbios Logic (newer chipsets + those of `ncr') device trm # Tekram DC395U/UW/F DC315U adapters device adv # Advansys SCSI adapters device adw # Advansys wide SCSI adapters device aha # Adaptec 154x SCSI adapters device aic # Adaptec 15[012]x SCSI adapters, AIC-6[23]60. device bt # Buslogic/Mylex MultiMaster SCSI adapters device ncv # NCR 53C500 device nsp # Workbit Ninja SCSI-3 device stg # TMC 18C30/18C50 device isci # Intel C600 SAS controller # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) #device ctl # CAM Target Layer # RAID controllers interfaced to the SCSI subsystem device amr # AMI MegaRAID device arcmsr # Areca SATA II RAID device ciss # Compaq Smart RAID 5* device dpt # DPT Smartcache III, IV - See NOTES for options device hptmv # Highpoint RocketRAID 182x device hptnr # Highpoint DC7280, R750 device hptrr # Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx device hpt27xx # Highpoint RocketRAID 27xx device iir # Intel Integrated RAID device ips # IBM (Adaptec) ServeRAID device mly # Mylex AcceleRAID/eXtremeRAID device twa # 3ware 9000 series PATA/SATA RAID device tws # LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller # RAID controllers device aac # Adaptec FSA RAID device aacp # SCSI passthrough for aac (requires CAM) device aacraid # Adaptec by PMC RAID device ida # Compaq Smart RAID device mfi # LSI MegaRAID SAS device mlx # Mylex DAC960 family device mrsas # LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s device pst # Promise Supertrak SX6000 device twe # 3ware ATA RAID # atkbdc0 controls both the keyboard and the PS/2 mouse device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device kbdmux # keyboard multiplexer device vga # VGA video card driver options VESA # Add support for VESA BIOS Extensions (VBE) device splash # Splash screen and screen saver support # syscons is the default console driver, resembling an SCO console device sc options SC_PIXEL_MODE # add support for the raster text mode # vt is the new video console driver device vt device vt_vga device agp # support several AGP chipsets # Power management support (see NOTES for more options) #device apm # Add suspend/resume support for the i8254. device pmtimer # PCCARD (PCMCIA) support # PCMCIA and cardbus bridge support device cbb # cardbus (yenta) bridge device pccard # PC Card (16-bit) bus device cardbus # CardBus (32-bit) bus # Serial (COM) ports device uart # Generic UART driver # Parallel port device ppc device ppbus # Parallel port bus (required) device lpt # Printer device ppi # Parallel port interface device #device vpo # Requires scbus and da device puc # Multi I/O cards and multi-channel UARTs # PCI Ethernet NICs. device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device de # DEC/Intel DC21x4x (``Tulip'') device em # Intel PRO/1000 Gigabit Ethernet Family device igb # Intel PRO/1000 PCIE Server Gigabit Family device ixgb # Intel PRO/10GbE Ethernet Card device le # AMD Am7900 LANCE and Am79C9xx PCnet device ti # Alteon Networks Tigon I/II gigabit Ethernet device txp # 3Com 3cR990 (``Typhoon'') device vx # 3Com 3c590, 3c595 (``Vortex'') # PCI Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus # MII bus support device ae # Attansic/Atheros L2 FastEthernet device age # Attansic/Atheros L1 Gigabit Ethernet device alc # Atheros AR8131/AR8132 Ethernet device ale # Atheros AR8121/AR8113/AR8114 Ethernet device bce # Broadcom BCM5706/BCM5708 Gigabit Ethernet device bfe # Broadcom BCM440x 10/100 Ethernet device bge # Broadcom BCM570xx Gigabit Ethernet device cas # Sun Cassini/Cassini+ and NS DP83065 Saturn device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) device gem # Sun GEM/Sun ERI/Apple GMAC device hme # Sun HME (Happy Meal Ethernet) device jme # JMicron JMC250 Gigabit/JMC260 Fast Ethernet device lge # Level 1 LXT1001 gigabit Ethernet device msk # Marvell/SysKonnect Yukon II Gigabit Ethernet device nfe # nVidia nForce MCP on-board Ethernet device nge # NatSemi DP83820 gigabit Ethernet device pcn # AMD Am79C97x PCI 10/100 (precedence over 'le') device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 device sf # Adaptec AIC-6915 (``Starfire'') device sge # Silicon Integrated Systems SiS190/191 device sis # Silicon Integrated Systems SiS 900/SiS 7016 device sk # SysKonnect SK-984x & SK-982x gigabit Ethernet device ste # Sundance ST201 (D-Link DFE-550TX) device stge # Sundance/Tamarack TC9021 gigabit Ethernet device tl # Texas Instruments ThunderLAN device tx # SMC EtherPower II (83c170 ``EPIC'') device vge # VIA VT612x gigabit Ethernet device vr # VIA Rhine, Rhine II device vte # DM&P Vortex86 RDC R6040 Fast Ethernet device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') # ISA Ethernet NICs. pccard NICs included. device cs # Crystal Semiconductor CS89x0 NIC # 'device ed' requires 'device miibus' device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards device ex # Intel EtherExpress Pro/10 and Pro/10+ device ep # Etherlink III based cards device fe # Fujitsu MB8696x based cards device ie # EtherExpress 8/16, 3C507, StarLAN 10 etc. device sn # SMC's 9000 series of Ethernet chips device xe # Xircom pccard Ethernet # Wireless NIC cards device wlan # 802.11 support options IEEE80211_DEBUG # enable debug msgs options IEEE80211_AMPDU_AGE # age frames in AMPDU reorder q's options IEEE80211_SUPPORT_MESH # enable 802.11s draft support device wlan_wep # 802.11 WEP support device wlan_ccmp # 802.11 CCMP support device wlan_tkip # 802.11 TKIP support device wlan_amrr # AMRR transmit rate control algorithm device an # Aironet 4500/4800 802.11 wireless NICs. device ath # Atheros NICs device ath_pci # Atheros pci/cardbus glue device ath_hal # pci/cardbus chip support options AH_SUPPORT_AR5416 # enable AR5416 tx/rx descriptors options AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation options ATH_ENABLE_11N # Enable 802.11n support for AR5416 and later device ath_rate_sample # SampleRate tx rate control for ath #device bwi # Broadcom BCM430x/BCM431x wireless NICs. #device bwn # Broadcom BCM43xx wireless NICs. device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. device malo # Marvell Libertas wireless NICs. device mwl # Marvell 88W8363 802.11n wireless NICs. device ral # Ralink Technology RT2500 wireless NICs. device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. #device wl # Older non 802.11 Wavelan wireless NIC. device wpi # Intel 3945ABG wireless NICs. # Pseudo devices. device loop # Network loopback device random # Entropy device device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter # USB support options USB_DEBUG # enable debug msgs device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device ehci # EHCI PCI->USB interface (USB 2.0) device xhci # XHCI PCI->USB interface (USB 3.0) device usb # USB Bus (required) device ukbd # Keyboard device umass # Disks/Mass storage - Requires scbus and da # Sound support device sound # Generic sound driver (required) device snd_cmi # CMedia CMI8338/CMI8738 device snd_csa # Crystal Semiconductor CS461x/428x device snd_emu10kx # Creative SoundBlaster Live! and Audigy device snd_es137x # Ensoniq AudioPCI ES137x device snd_hda # Intel High Definition Audio device snd_ich # Intel, NVidia and other ICH AC'97 Audio device snd_via8233 # VIA VT8233x Audio # MMC/SD device mmc # MMC/SD bus device mmcsd # MMC/SD memory card device sdhci # Generic PCI SD Host Controller # VirtIO support device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI device device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device -# HyperV drivers +# HyperV drivers and enchancement support +# NOTE: HYPERV depends on hyperv. They must be added or removed together. +options HYPERV # Hyper-V kernel infrastructure device hyperv # HyperV drivers # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver # VMware support device vmx # VMware VMXNET3 Ethernet Index: head/sys/i386/i386/apic_vector.s =================================================================== --- head/sys/i386/i386/apic_vector.s (revision 282211) +++ head/sys/i386/i386/apic_vector.s (revision 282212) @@ -1,323 +1,342 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: vector.s, 386BSD 0.1 unknown origin * $FreeBSD$ */ /* * Interrupt entry points for external interrupts triggered by I/O APICs * as well as IPI handlers. */ #include "opt_smp.h" #include #include #include #include "assym.s" .text SUPERALIGN_TEXT /* End Of Interrupt to APIC */ as_lapic_eoi: cmpl $0,x2apic_mode jne 1f movl lapic_map,%eax movl $0,LA_EOI(%eax) ret 1: movl $MSR_APIC_EOI,%ecx xorl %eax,%eax xorl %edx,%edx wrmsr ret /* * I/O Interrupt Entry Point. Rather than having one entry point for * each interrupt source, we use one entry point for each 32-bit word * in the ISR. The handler determines the highest bit set in the ISR, * translates that into a vector, and passes the vector to the * lapic_handle_intr() function. */ #define ISR_VEC(index, vec_name) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ PUSH_FRAME ; \ SET_KERNEL_SREGS ; \ cld ; \ FAKE_MCOUNT(TF_EIP(%esp)) ; \ cmpl $0,x2apic_mode ; \ je 1f ; \ movl $(MSR_APIC_ISR0 + index),%ecx ; \ rdmsr ; \ jmp 2f ; \ 1: ; \ movl lapic_map, %edx ;/* pointer to local APIC */ \ movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \ 2: ; \ bsrl %eax, %eax ; /* index of highest set bit in ISR */ \ jz 3f ; \ addl $(32 * index),%eax ; \ pushl %esp ; \ pushl %eax ; /* pass the IRQ */ \ call lapic_handle_intr ; \ addl $8, %esp ; /* discard parameter */ \ 3: ; \ MEXITCOUNT ; \ jmp doreti /* * Handle "spurious INTerrupts". * Notes: * This is different than the "spurious INTerrupt" generated by an * 8259 PIC for missing INTs. See the APIC documentation for details. * This routine should NOT do an 'EOI' cycle. */ .text SUPERALIGN_TEXT IDTVEC(spuriousint) /* No EOI cycle used here */ iret ISR_VEC(1, apic_isr1) ISR_VEC(2, apic_isr2) ISR_VEC(3, apic_isr3) ISR_VEC(4, apic_isr4) ISR_VEC(5, apic_isr5) ISR_VEC(6, apic_isr6) ISR_VEC(7, apic_isr7) /* * Local APIC periodic timer handler. */ .text SUPERALIGN_TEXT IDTVEC(timerint) PUSH_FRAME SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call lapic_handle_timer add $4, %esp MEXITCOUNT jmp doreti /* * Local APIC CMCI handler. */ .text SUPERALIGN_TEXT IDTVEC(cmcint) PUSH_FRAME SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) call lapic_handle_cmc MEXITCOUNT jmp doreti /* * Local APIC error interrupt handler. */ .text SUPERALIGN_TEXT IDTVEC(errorint) PUSH_FRAME SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) call lapic_handle_error MEXITCOUNT jmp doreti #ifdef XENHVM /* * Xen event channel upcall interrupt handler. * Only used when the hypervisor supports direct vector callbacks. */ .text SUPERALIGN_TEXT IDTVEC(xen_intr_upcall) PUSH_FRAME SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call xen_intr_handle_upcall add $4, %esp MEXITCOUNT jmp doreti #endif +#ifdef HYPERV +/* + * This is the Hyper-V vmbus channel direct callback interrupt. + * Only used when it is running on Hyper-V. + */ + .text + SUPERALIGN_TEXT +IDTVEC(hv_vmbus_callback) + PUSH_FRAME + SET_KERNEL_SREGS + cld + FAKE_MCOUNT(TF_EIP(%esp)) + pushl %esp + call hv_vector_handler + add $4, %esp + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. */ .text SUPERALIGN_TEXT invltlb_ret: call as_lapic_eoi POP_FRAME iret SUPERALIGN_TEXT IDTVEC(invltlb) PUSH_FRAME SET_KERNEL_SREGS cld call invltlb_handler jmp invltlb_ret /* * Single page TLB shootdown */ .text SUPERALIGN_TEXT IDTVEC(invlpg) PUSH_FRAME SET_KERNEL_SREGS cld call invlpg_handler jmp invltlb_ret /* * Page range TLB shootdown. */ .text SUPERALIGN_TEXT IDTVEC(invlrng) PUSH_FRAME SET_KERNEL_SREGS cld call invlrng_handler jmp invltlb_ret /* * Invalidate cache. */ .text SUPERALIGN_TEXT IDTVEC(invlcache) PUSH_FRAME SET_KERNEL_SREGS cld call invlcache_handler jmp invltlb_ret /* * Handler for IPIs sent via the per-cpu IPI bitmap. */ #ifndef XEN .text SUPERALIGN_TEXT IDTVEC(ipi_intr_bitmap_handler) PUSH_FRAME SET_KERNEL_SREGS cld call as_lapic_eoi FAKE_MCOUNT(TF_EIP(%esp)) call ipi_bitmap_handler MEXITCOUNT jmp doreti #endif /* * Executed by a CPU when it receives an IPI_STOP from another CPU. */ .text SUPERALIGN_TEXT IDTVEC(cpustop) PUSH_FRAME SET_KERNEL_SREGS cld call as_lapic_eoi call cpustop_handler POP_FRAME iret /* * Executed by a CPU when it receives an IPI_SUSPEND from another CPU. */ #ifndef XEN .text SUPERALIGN_TEXT IDTVEC(cpususpend) PUSH_FRAME SET_KERNEL_SREGS cld call as_lapic_eoi call cpususpend_handler POP_FRAME jmp doreti_iret #endif /* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. * * - Calls the generic rendezvous action function. */ .text SUPERALIGN_TEXT IDTVEC(rendezvous) PUSH_FRAME SET_KERNEL_SREGS cld #ifdef COUNT_IPIS movl PCPU(CPUID), %eax movl ipi_rendezvous_counts(,%eax,4), %eax incl (%eax) #endif call smp_rendezvous_action call as_lapic_eoi POP_FRAME iret #endif /* SMP */ Index: head/sys/x86/include/apicvar.h =================================================================== --- head/sys/x86/include/apicvar.h (revision 282211) +++ head/sys/x86/include/apicvar.h (revision 282212) @@ -1,466 +1,467 @@ /*- * Copyright (c) 2003 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_APICVAR_H_ #define _X86_APICVAR_H_ /* * Local && I/O APIC variable definitions. */ /* * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ * | | 15 (Spurious / IPIs / Local Interrupts) * 0xf0 (240) +-------------+ * | | 14 (I/O Interrupts / Timer) * 0xe0 (224) +-------------+ * | | 13 (I/O Interrupts) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ * | | 11 (I/O Interrupts) * 0xb0 (176) +-------------+ * | | 10 (I/O Interrupts) * 0xa0 (160) +-------------+ * | | 9 (I/O Interrupts) * 0x90 (144) +-------------+ * | | 8 (I/O Interrupts / System Calls) * 0x80 (128) +-------------+ * | | 7 (I/O Interrupts) * 0x70 (112) +-------------+ * | | 6 (I/O Interrupts) * 0x60 (96) +-------------+ * | | 5 (I/O Interrupts) * 0x50 (80) +-------------+ * | | 4 (I/O Interrupts) * 0x40 (64) +-------------+ * | | 3 (I/O Interrupts) * 0x30 (48) +-------------+ * | | 2 (ATPIC Interrupts) * 0x20 (32) +-------------+ * | | 1 (Exceptions, traps, faults, etc.) * 0x10 (16) +-------------+ * | | 0 (Exceptions, traps, faults, etc.) * 0x00 (0) +-------------+ * * Note: 0x80 needs to be handled specially and not allocated to an * I/O device! */ #define MAX_APIC_ID 0xfe #define APIC_ID_ALL 0xff /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) #define APIC_NUM_IOINTS 191 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) /* ********************* !!! WARNING !!! ****************************** * Each local apic has an interrupt receive fifo that is two entries deep * for each interrupt priority class (higher 4 bits of interrupt vector). * Once the fifo is full the APIC can no longer receive interrupts for this * class and sending IPIs from other CPUs will be blocked. * To avoid deadlocks there should be no more than two IPI interrupts * pending at the same time. * Currently this is guaranteed by dividing the IPIs in two groups that have * each at most one IPI interrupt pending. The first group is protected by the * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user * at a time) The second group uses a single interrupt and a bitmap to avoid * redundant IPI interrupts. */ /* Interrupts for local APIC LVT entries other than the timer. */ #define APIC_LOCAL_INTS 240 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_CMC_INT (APIC_LOCAL_INTS + 2) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) #define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ #define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ #define IPI_INVLPG (APIC_IPI_INTS + 2) #define IPI_INVLRNG (APIC_IPI_INTS + 3) #define IPI_INVLCACHE (APIC_IPI_INTS + 4) /* Vector to handle bitmap based IPIs */ #define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 5) /* IPIs handled by IPI_BITMAP_VECTOR */ #define IPI_AST 0 /* Generate software trap. */ #define IPI_PREEMPT 1 #define IPI_HARDCLOCK 2 #define IPI_BITMAP_LAST IPI_HARDCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ #define IPI_SUSPEND (APIC_IPI_INTS + 7) /* Suspend CPU until restarted. */ #ifdef __i386__ #define IPI_LAZYPMAP (APIC_IPI_INTS + 8) /* Lazy pmap release. */ #define IPI_DYN_FIRST (APIC_IPI_INTS + 9) #else #define IPI_DYN_FIRST (APIC_IPI_INTS + 8) #endif #define IPI_DYN_LAST (254) /* IPIs allocated at runtime */ /* * IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since * it is delivered using an NMI anyways. */ #define IPI_STOP_HARD 255 /* Stop CPU with a NMI. */ /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) */ #define APIC_SPURIOUS_INT 255 #ifndef LOCORE #define APIC_IPI_DEST_SELF -1 #define APIC_IPI_DEST_ALL -2 #define APIC_IPI_DEST_OTHERS -3 #define APIC_BUS_UNKNOWN -1 #define APIC_BUS_ISA 0 #define APIC_BUS_EISA 1 #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI #define IRQ_EXTINT (NUM_IO_INTS + 1) #define IRQ_NMI (NUM_IO_INTS + 2) #define IRQ_SMI (NUM_IO_INTS + 3) #define IRQ_DISABLED (NUM_IO_INTS + 4) /* * An APIC enumerator is a psuedo bus driver that enumerates APIC's including * CPU's and I/O APIC's. */ struct apic_enumerator { const char *apic_name; int (*apic_probe)(void); int (*apic_probe_cpus)(void); int (*apic_setup_local)(void); int (*apic_setup_io)(void); SLIST_ENTRY(apic_enumerator) apic_next; }; inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint), IDTVEC(spuriousint), IDTVEC(timerint); extern vm_paddr_t lapic_paddr; extern int apic_cpuids[]; void apic_register_enumerator(struct apic_enumerator *enumerator); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); void ioapic_register(void *cookie); int ioapic_remap_vector(void *cookie, u_int pin, int vector); int ioapic_set_bus(void *cookie, u_int pin, int bus_type); int ioapic_set_extint(void *cookie, u_int pin); int ioapic_set_nmi(void *cookie, u_int pin); int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol); int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger); int ioapic_set_smi(void *cookie, u_int pin); /* * Struct containing pointers to APIC functions whose * implementation is run time selectable. */ struct apic_ops { void (*create)(u_int, int); void (*init)(vm_paddr_t); void (*xapic_mode)(void); void (*setup)(int); void (*dump)(const char *); void (*disable)(void); void (*eoi)(void); int (*id)(void); int (*intr_pending)(u_int); void (*set_logical_id)(u_int, u_int, u_int); u_int (*cpuid)(u_int); /* Vectors */ u_int (*alloc_vector)(u_int, u_int); u_int (*alloc_vectors)(u_int, u_int *, u_int, u_int); void (*enable_vector)(u_int, u_int); void (*disable_vector)(u_int, u_int); void (*free_vector)(u_int, u_int, u_int); /* PMC */ int (*enable_pmc)(void); void (*disable_pmc)(void); void (*reenable_pmc)(void); /* CMC */ void (*enable_cmc)(void); /* IPI */ void (*ipi_raw)(register_t, u_int); void (*ipi_vectored)(u_int, int); int (*ipi_wait)(int); int (*ipi_alloc)(inthand_t *ipifunc); void (*ipi_free)(int vector); /* LVT */ int (*set_lvt_mask)(u_int, u_int, u_char); int (*set_lvt_mode)(u_int, u_int, u_int32_t); int (*set_lvt_polarity)(u_int, u_int, enum intr_polarity); int (*set_lvt_triggermode)(u_int, u_int, enum intr_trigger); }; extern struct apic_ops apic_ops; static inline void lapic_create(u_int apic_id, int boot_cpu) { apic_ops.create(apic_id, boot_cpu); } static inline void lapic_init(vm_paddr_t addr) { apic_ops.init(addr); } static inline void lapic_xapic_mode(void) { apic_ops.xapic_mode(); } static inline void lapic_setup(int boot) { apic_ops.setup(boot); } static inline void lapic_dump(const char *str) { apic_ops.dump(str); } static inline void lapic_disable(void) { apic_ops.disable(); } static inline void lapic_eoi(void) { apic_ops.eoi(); } static inline int lapic_id(void) { return (apic_ops.id()); } static inline int lapic_intr_pending(u_int vector) { return (apic_ops.intr_pending(vector)); } /* XXX: UNUSED */ static inline void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { apic_ops.set_logical_id(apic_id, cluster, cluster_id); } static inline u_int apic_cpuid(u_int apic_id) { return (apic_ops.cpuid(apic_id)); } static inline u_int apic_alloc_vector(u_int apic_id, u_int irq) { return (apic_ops.alloc_vector(apic_id, irq)); } static inline u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { return (apic_ops.alloc_vectors(apic_id, irqs, count, align)); } static inline void apic_enable_vector(u_int apic_id, u_int vector) { apic_ops.enable_vector(apic_id, vector); } static inline void apic_disable_vector(u_int apic_id, u_int vector) { apic_ops.disable_vector(apic_id, vector); } static inline void apic_free_vector(u_int apic_id, u_int vector, u_int irq) { apic_ops.free_vector(apic_id, vector, irq); } static inline int lapic_enable_pmc(void) { return (apic_ops.enable_pmc()); } static inline void lapic_disable_pmc(void) { apic_ops.disable_pmc(); } static inline void lapic_reenable_pmc(void) { apic_ops.reenable_pmc(); } static inline void lapic_enable_cmc(void) { apic_ops.enable_cmc(); } static inline void lapic_ipi_raw(register_t icrlo, u_int dest) { apic_ops.ipi_raw(icrlo, dest); } static inline void lapic_ipi_vectored(u_int vector, int dest) { apic_ops.ipi_vectored(vector, dest); } static inline int lapic_ipi_wait(int delay) { return (apic_ops.ipi_wait(delay)); } static inline int lapic_ipi_alloc(inthand_t *ipifunc) { return (apic_ops.ipi_alloc(ipifunc)); } static inline void lapic_ipi_free(int vector) { return (apic_ops.ipi_free(vector)); } static inline int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked) { return (apic_ops.set_lvt_mask(apic_id, lvt, masked)); } static inline int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) { return (apic_ops.set_lvt_mode(apic_id, lvt, mode)); } static inline int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { return (apic_ops.set_lvt_polarity(apic_id, lvt, pol)); } static inline int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { return (apic_ops.set_lvt_triggermode(apic_id, lvt, trigger)); } void lapic_handle_cmc(void); void lapic_handle_error(void); void lapic_handle_intr(int vector, struct trapframe *frame); void lapic_handle_timer(struct trapframe *frame); void xen_intr_handle_upcall(struct trapframe *frame); +void hv_vector_handler(struct trapframe *frame); extern int x2apic_mode; extern int lapic_eoi_suppression; #ifdef _SYS_SYSCTL_H_ SYSCTL_DECL(_hw_apic); #endif #endif /* !LOCORE */ #endif /* _X86_APICVAR_H_ */