Index: head/sys/amd64/amd64/apic_vector.S
===================================================================
--- head/sys/amd64/amd64/apic_vector.S	(revision 282211)
+++ head/sys/amd64/amd64/apic_vector.S	(revision 282212)
@@ -1,335 +1,351 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
  * $FreeBSD$
  */
 
 /*
  * Interrupt entry points for external interrupts triggered by I/O APICs
  * as well as IPI handlers.
  */
 
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
 
 #ifdef SMP
 #define LK	lock ;
 #else
 #define LK
 #endif
 
 	.text
 	SUPERALIGN_TEXT
 	/* End Of Interrupt to APIC */
 as_lapic_eoi:
 	cmpl	$0,x2apic_mode
 	jne	1f
 	movq	lapic_map,%rax
 	movl	$0,LA_EOI(%rax)
 	ret
 1:
 	movl	$MSR_APIC_EOI,%ecx
 	xorl	%eax,%eax
 	xorl	%edx,%edx
 	wrmsr
 	ret
 
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
  * in the ISR.  The handler determines the highest bit set in the ISR,
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
 #define	ISR_VEC(index, vec_name)					\
 	.text ;								\
 	SUPERALIGN_TEXT ;						\
 IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	FAKE_MCOUNT(TF_RIP(%rsp)) ;					\
 	cmpl	$0,x2apic_mode ;					\
 	je	1f ;							\
 	movl	$(MSR_APIC_ISR0 + index),%ecx ;				\
 	rdmsr ;								\
 	jmp	2f ;							\
 1: ;									\
 	movq	lapic_map, %rdx ;	/* pointer to local APIC */	\
 	movl	LA_ISR + 16 * (index)(%rdx), %eax ;	/* load ISR */	\
 2: ;									\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
 	jz	3f ;							\
 	addl	$(32 * index),%eax ;					\
 	movq	%rsp, %rsi	;                                       \
 	movl	%eax, %edi ;	/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
 3: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
 /*
  * Handle "spurious INTerrupts".
  * Notes:
  *  This is different than the "spurious INTerrupt" generated by an
  *   8259 PIC for missing INTs.  See the APIC documentation for details.
  *  This routine should NOT do an 'EOI' cycle.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(spuriousint)
 
 	/* No EOI cycle used here */
 
 	jmp	doreti_iret
 
 	ISR_VEC(1, apic_isr1)
 	ISR_VEC(2, apic_isr2)
 	ISR_VEC(3, apic_isr3)
 	ISR_VEC(4, apic_isr4)
 	ISR_VEC(5, apic_isr5)
 	ISR_VEC(6, apic_isr6)
 	ISR_VEC(7, apic_isr7)
 
 /*
  * Local APIC periodic timer handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(timerint)
 	PUSH_FRAME
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	movq	%rsp, %rdi
 	call	lapic_handle_timer
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC CMCI handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cmcint)
 	PUSH_FRAME
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	call	lapic_handle_cmc
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC error interrupt handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(errorint)
 	PUSH_FRAME
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	call	lapic_handle_error
 	MEXITCOUNT
 	jmp	doreti
 
 #ifdef XENHVM
 /*
  * Xen event channel upcall interrupt handler.
  * Only used when the hypervisor supports direct vector callbacks.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(xen_intr_upcall)
 	PUSH_FRAME
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	movq	%rsp, %rdi
 	call	xen_intr_handle_upcall
 	MEXITCOUNT
 	jmp	doreti
 #endif
 
+#ifdef HYPERV
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(hv_vmbus_callback)
+	PUSH_FRAME
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp, %rdi
+	call	hv_vector_handler
+	MEXITCOUNT
+	jmp	doreti
+#endif
+
 #ifdef SMP
 /*
  * Global address space TLB shootdown.
  */
 	.text
 
 #define	NAKE_INTR_CS	24
 
 	SUPERALIGN_TEXT
 invltlb_ret:
 	call	as_lapic_eoi
 	POP_FRAME
 	jmp	doreti_iret
 
 	SUPERALIGN_TEXT
 IDTVEC(invltlb_pcid)
 	PUSH_FRAME
 
 	call	invltlb_pcid_handler
 	jmp	invltlb_ret
 
 
 	SUPERALIGN_TEXT
 IDTVEC(invltlb)
 	PUSH_FRAME
 
 	call	invltlb_handler
 	jmp	invltlb_ret
 
 /*
  * Single page TLB shootdown
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlpg_pcid)
 	PUSH_FRAME
 
 	call	invlpg_pcid_handler
 	jmp	invltlb_ret
 
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)
 	PUSH_FRAME
 
 	call	invlpg_handler
 	jmp	invltlb_ret
 
 /*
  * Page range TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlrng)
 	PUSH_FRAME
 
 	call	invlrng_handler
 	jmp	invltlb_ret
 
 /*
  * Invalidate cache.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlcache)
 	PUSH_FRAME
 
 	call	invlcache_handler
 	jmp	invltlb_ret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(ipi_intr_bitmap_handler)		
 	PUSH_FRAME
 
 	call	as_lapic_eoi
 	
 	FAKE_MCOUNT(TF_RIP(%rsp))
 
 	call	ipi_bitmap_handler
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Executed by a CPU when it receives an IPI_STOP from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpustop)
 	PUSH_FRAME
 
 	call	as_lapic_eoi
 
 	call	cpustop_handler
 	jmp	doreti
 
 /*
  * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpususpend)
 	PUSH_FRAME
 
 	call	cpususpend_handler
 	call	as_lapic_eoi
 	jmp	doreti
 
 /*
  * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
  *
  * - Calls the generic rendezvous action function.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(rendezvous)
 	PUSH_FRAME
 #ifdef COUNT_IPIS
 	movl	PCPU(CPUID), %eax
 	movq	ipi_rendezvous_counts(,%rax,8), %rax
 	incq	(%rax)
 #endif
 	call	smp_rendezvous_action
 	call	as_lapic_eoi
 	jmp	doreti
 
 /*
  * IPI handler whose purpose is to interrupt the CPU with minimum overhead.
  * This is used by bhyve to force a host cpu executing in guest context to
  * trap into the hypervisor.
  *
  * This handler is different from other IPI handlers in the following aspects:
  *
  * 1. It doesn't push a trapframe on the stack.
  *
  * This implies that a DDB backtrace involving 'justreturn' will skip the
  * function that was interrupted by this handler.
  *
  * 2. It doesn't 'swapgs' when userspace is interrupted.
  *
  * The 'justreturn' handler does not access any pcpu data so it is not an
  * issue. Moreover the 'justreturn' handler can only be interrupted by an NMI
  * whose handler already doesn't trust GS.base when kernel code is interrupted.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(justreturn)
 	pushq	%rax
 	pushq	%rcx
 	pushq	%rdx
 	call	as_lapic_eoi
 	popq	%rdx
 	popq	%rcx
 	popq	%rax
 	jmp	doreti_iret
 
 #endif /* SMP */
Index: head/sys/amd64/conf/GENERIC
===================================================================
--- head/sys/amd64/conf/GENERIC	(revision 282211)
+++ head/sys/amd64/conf/GENERIC	(revision 282212)
@@ -1,356 +1,358 @@
 #
 # GENERIC -- Generic kernel configuration file for FreeBSD/amd64
 #
 # For more information on this file, please read the config(5) manual page,
 # and/or the handbook section on Kernel Configuration Files:
 #
 #    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the ../../conf/NOTES and NOTES files.
 # If you are in doubt as to the purpose or necessity of a line, check first
 # in NOTES.
 #
 # $FreeBSD$
 
 cpu		HAMMER
 ident		GENERIC
 
 makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
 makeoptions	WITH_CTF=1		# Run ctfconvert(1) for DTrace support
 
 options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption
 options 	INET			# InterNETworking
 options 	INET6			# IPv6 communications protocols
 options 	TCP_OFFLOAD		# TCP offload
 options 	SCTP			# Stream Control Transmission Protocol
 options 	FFS			# Berkeley Fast Filesystem
 options 	SOFTUPDATES		# Enable FFS soft updates support
 options 	UFS_ACL			# Support for access control lists
 options 	UFS_DIRHASH		# Improve performance on big directories
 options 	UFS_GJOURNAL		# Enable gjournal-based UFS journaling
 options 	QUOTA			# Enable disk quotas for UFS
 options 	MD_ROOT			# MD is a potential root device
 options 	NFSCL			# Network Filesystem Client
 options 	NFSD			# Network Filesystem Server
 options 	NFSLOCKD		# Network Lock Manager
 options 	NFS_ROOT		# NFS usable as /, requires NFSCL
 options 	MSDOSFS			# MSDOS Filesystem
 options 	CD9660			# ISO 9660 Filesystem
 options 	PROCFS			# Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		# Pseudo-filesystem framework
 options 	GEOM_PART_GPT		# GUID Partition Tables.
 options 	GEOM_RAID		# Soft RAID functionality.
 options 	GEOM_LABEL		# Provides labelization
 options 	COMPAT_FREEBSD32	# Compatible with i386 binaries
 options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
 options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
 options 	COMPAT_FREEBSD6		# Compatible with FreeBSD6
 options 	COMPAT_FREEBSD7		# Compatible with FreeBSD7
 options 	COMPAT_FREEBSD9		# Compatible with FreeBSD9
 options 	COMPAT_FREEBSD10	# Compatible with FreeBSD10
 options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
 options 	KTRACE			# ktrace(1) support
 options 	STACK			# stack(9) support
 options 	SYSVSHM			# SYSV-style shared memory
 options 	SYSVMSG			# SYSV-style message queues
 options 	SYSVSEM			# SYSV-style semaphores
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	CAPABILITY_MODE		# Capsicum capability mode
 options 	CAPABILITIES		# Capsicum capabilities
 options 	MAC			# TrustedBSD MAC Framework
 options 	KDTRACE_FRAME		# Ensure frames are compiled in
 options 	KDTRACE_HOOKS		# Kernel DTrace hooks
 options 	DDB_CTF			# Kernel ELF linker loads CTF data
 options 	INCLUDE_CONFIG_FILE	# Include this file in kernel
 
 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
 options 	KDB_TRACE		# Print a stack trace for a panic.
 # For full debugger support use (turn off in stable branch):
 options 	DDB			# Support DDB.
 options 	GDB			# Support remote GDB.
 options 	DEADLKRES		# Enable the deadlock resolver
 options 	INVARIANTS		# Enable calls of extra sanity checking
 options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
 options 	WITNESS			# Enable checks to detect deadlocks and cycles
 options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
 options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
 
 # Make an SMP-capable kernel by default
 options 	SMP			# Symmetric MultiProcessor Kernel
 
 # CPU frequency control
 device		cpufreq
 
 # Bus support.
 device		acpi
 options 	ACPI_DMAR
 device		pci
 options		PCI_IOV			# PCI SR-IOV support
 
 # Floppy drives
 device		fdc
 
 # ATA controllers
 device		ahci			# AHCI-compatible SATA controllers
 device		ata			# Legacy ATA/SATA controllers
 options 	ATA_STATIC_ID		# Static device numbering
 device		mvs			# Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA
 device		siis			# SiliconImage SiI3124/SiI3132/SiI3531 SATA
 
 # SCSI Controllers
 device		ahc			# AHA2940 and onboard AIC7xxx devices
 options 	AHC_REG_PRETTY_PRINT	# Print register bitfields in debug
 					# output.  Adds ~128k to driver.
 device		ahd			# AHA39320/29320 and onboard AIC79xx devices
 options 	AHD_REG_PRETTY_PRINT	# Print register bitfields in debug
 					# output.  Adds ~215k to driver.
 device		esp			# AMD Am53C974 (Tekram DC-390(T))
 device		hptiop			# Highpoint RocketRaid 3xxx series
 device		isp			# Qlogic family
 #device		ispfw			# Firmware for QLogic HBAs- normally a module
 device		mpt			# LSI-Logic MPT-Fusion
 device		mps			# LSI-Logic MPT-Fusion 2
 device		mpr			# LSI-Logic MPT-Fusion 3
 #device		ncr			# NCR/Symbios Logic
 device		sym			# NCR/Symbios Logic (newer chipsets + those of `ncr')
 device		trm			# Tekram DC395U/UW/F DC315U adapters
 
 device		adv			# Advansys SCSI adapters
 device		adw			# Advansys wide SCSI adapters
 device		aic			# Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
 device		bt			# Buslogic/Mylex MultiMaster SCSI adapters
 device		isci			# Intel C600 SAS controller
 
 # ATA/SCSI peripherals
 device		scbus			# SCSI bus (required for ATA/SCSI)
 device		ch			# SCSI media changers
 device		da			# Direct Access (disks)
 device		sa			# Sequential Access (tape etc)
 device		cd			# CD
 device		pass			# Passthrough device (direct ATA/SCSI access)
 device		ses			# Enclosure Services (SES and SAF-TE)
 #device		ctl			# CAM Target Layer
 
 # RAID controllers interfaced to the SCSI subsystem
 device		amr			# AMI MegaRAID
 device		arcmsr			# Areca SATA II RAID
 device		ciss			# Compaq Smart RAID 5*
 device		dpt			# DPT Smartcache III, IV - See NOTES for options
 device		hptmv			# Highpoint RocketRAID 182x
 device		hptnr			# Highpoint DC7280, R750
 device		hptrr			# Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx
 device		hpt27xx			# Highpoint RocketRAID 27xx
 device		iir			# Intel Integrated RAID
 device		ips			# IBM (Adaptec) ServeRAID
 device		mly			# Mylex AcceleRAID/eXtremeRAID
 device		twa			# 3ware 9000 series PATA/SATA RAID
 device		tws			# LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller
 
 # RAID controllers
 device		aac			# Adaptec FSA RAID
 device		aacp			# SCSI passthrough for aac (requires CAM)
 device		aacraid			# Adaptec by PMC RAID
 device		ida			# Compaq Smart RAID
 device		mfi			# LSI MegaRAID SAS
 device		mlx			# Mylex DAC960 family
 device		mrsas			# LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s
 #XXX pointer/int warnings
 #device		pst			# Promise Supertrak SX6000
 device		twe			# 3ware ATA RAID
 
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device		atkbdc			# AT keyboard controller
 device		atkbd			# AT keyboard
 device		psm			# PS/2 mouse
 
 device		kbdmux			# keyboard multiplexer
 
 device		vga			# VGA video card driver
 options 	VESA			# Add support for VESA BIOS Extensions (VBE)
 
 device		splash			# Splash screen and screen saver support
 
 # syscons is the default console driver, resembling an SCO console
 device		sc
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # vt is the new video console driver
 device		vt
 device		vt_vga
 device		vt_efifb
 
 device		agp			# support several AGP chipsets
 
 # PCCARD (PCMCIA) support
 # PCMCIA and cardbus bridge support
 device		cbb			# cardbus (yenta) bridge
 device		pccard			# PC Card (16-bit) bus
 device		cardbus			# CardBus (32-bit) bus
 
 # Serial (COM) ports
 device		uart			# Generic UART driver
 
 # Parallel port
 device		ppc
 device		ppbus			# Parallel port bus (required)
 device		lpt			# Printer
 device		ppi			# Parallel port interface device
 #device		vpo			# Requires scbus and da
 
 device		puc			# Multi I/O cards and multi-channel UARTs
 
 # PCI Ethernet NICs.
 device		bxe			# Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE
 device		de			# DEC/Intel DC21x4x (``Tulip'')
 device		em			# Intel PRO/1000 Gigabit Ethernet Family
 device		igb			# Intel PRO/1000 PCIE Server Gigabit Family
 device		ix			# Intel PRO/10GbE PCIE PF Ethernet
 device		ixv			# Intel PRO/10GbE PCIE VF Ethernet
 device		ixl			# Intel XL710 40Gbe PCIE Ethernet
 device		ixlv			# Intel XL710 40Gbe VF PCIE Ethernet
 device		le			# AMD Am7900 LANCE and Am79C9xx PCnet
 device		ti			# Alteon Networks Tigon I/II gigabit Ethernet
 device		txp			# 3Com 3cR990 (``Typhoon'')
 device		vx			# 3Com 3c590, 3c595 (``Vortex'')
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
 device		miibus			# MII bus support
 device		ae			# Attansic/Atheros L2 FastEthernet
 device		age			# Attansic/Atheros L1 Gigabit Ethernet
 device		alc			# Atheros AR8131/AR8132 Ethernet
 device		ale			# Atheros AR8121/AR8113/AR8114 Ethernet
 device		bce			# Broadcom BCM5706/BCM5708 Gigabit Ethernet
 device		bfe			# Broadcom BCM440x 10/100 Ethernet
 device		bge			# Broadcom BCM570xx Gigabit Ethernet
 device		cas			# Sun Cassini/Cassini+ and NS DP83065 Saturn
 device		dc			# DEC/Intel 21143 and various workalikes
 device		et			# Agere ET1310 10/100/Gigabit Ethernet
 device		fxp			# Intel EtherExpress PRO/100B (82557, 82558)
 device		gem			# Sun GEM/Sun ERI/Apple GMAC
 device		hme			# Sun HME (Happy Meal Ethernet)
 device		jme			# JMicron JMC250 Gigabit/JMC260 Fast Ethernet
 device		lge			# Level 1 LXT1001 gigabit Ethernet
 device		msk			# Marvell/SysKonnect Yukon II Gigabit Ethernet
 device		nfe			# nVidia nForce MCP on-board Ethernet
 device		nge			# NatSemi DP83820 gigabit Ethernet
 device		pcn			# AMD Am79C97x PCI 10/100 (precedence over 'le')
 device		re			# RealTek 8139C+/8169/8169S/8110S
 device		rl			# RealTek 8129/8139
 device		sf			# Adaptec AIC-6915 (``Starfire'')
 device		sge			# Silicon Integrated Systems SiS190/191
 device		sis			# Silicon Integrated Systems SiS 900/SiS 7016
 device		sk			# SysKonnect SK-984x & SK-982x gigabit Ethernet
 device		ste			# Sundance ST201 (D-Link DFE-550TX)
 device		stge			# Sundance/Tamarack TC9021 gigabit Ethernet
 device		tl			# Texas Instruments ThunderLAN
 device		tx			# SMC EtherPower II (83c170 ``EPIC'')
 device		vge			# VIA VT612x gigabit Ethernet
 device		vr			# VIA Rhine, Rhine II
 device		wb			# Winbond W89C840F
 device		xl			# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # Wireless NIC cards
 device		wlan			# 802.11 support
 options 	IEEE80211_DEBUG		# enable debug msgs
 options 	IEEE80211_AMPDU_AGE	# age frames in AMPDU reorder q's
 options 	IEEE80211_SUPPORT_MESH	# enable 802.11s draft support
 device		wlan_wep		# 802.11 WEP support
 device		wlan_ccmp		# 802.11 CCMP support
 device		wlan_tkip		# 802.11 TKIP support
 device		wlan_amrr		# AMRR transmit rate control algorithm
 device		an			# Aironet 4500/4800 802.11 wireless NICs.
 device		ath			# Atheros NICs
 device		ath_pci			# Atheros pci/cardbus glue
 device		ath_hal			# pci/cardbus chip support
 options 	AH_SUPPORT_AR5416	# enable AR5416 tx/rx descriptors
 options 	AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation
 options 	ATH_ENABLE_11N		# Enable 802.11n support for AR5416 and later
 device		ath_rate_sample		# SampleRate tx rate control for ath
 #device		bwi			# Broadcom BCM430x/BCM431x wireless NICs.
 #device		bwn			# Broadcom BCM43xx wireless NICs.
 device		ipw			# Intel 2100 wireless NICs.
 device		iwi			# Intel 2200BG/2225BG/2915ABG wireless NICs.
 device		iwn			# Intel 4965/1000/5000/6000 wireless NICs.
 device		malo			# Marvell Libertas wireless NICs.
 device		mwl			# Marvell 88W8363 802.11n wireless NICs.
 device		ral			# Ralink Technology RT2500 wireless NICs.
 device		wi			# WaveLAN/Intersil/Symbol 802.11 wireless NICs.
 device		wpi			# Intel 3945ABG wireless NICs.
 
 # Pseudo devices.
 device		loop			# Network loopback
 device		random			# Entropy device
 device		padlock_rng		# VIA Padlock RNG
 device		rdrand_rng		# Intel Bull Mountain RNG
 device		ether			# Ethernet support
 device		vlan			# 802.1Q VLAN support
 device		tun			# Packet tunnel.
 device		md			# Memory "disks"
 device		gif			# IPv6 and IPv4 tunneling
 device		firmware		# firmware assist module
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 # Note that 'bpf' is required for DHCP.
 device		bpf			# Berkeley packet filter
 
 # USB support
 options 	USB_DEBUG		# enable debug msgs
 device		uhci			# UHCI PCI->USB interface
 device		ohci			# OHCI PCI->USB interface
 device		ehci			# EHCI PCI->USB interface (USB 2.0)
 device		xhci			# XHCI PCI->USB interface (USB 3.0)
 device		usb			# USB Bus (required)
 device		ukbd			# Keyboard
 device		umass			# Disks/Mass storage - Requires scbus and da
 
 # Sound support
 device		sound			# Generic sound driver (required)
 device		snd_cmi			# CMedia CMI8338/CMI8738
 device		snd_csa			# Crystal Semiconductor CS461x/428x
 device		snd_emu10kx		# Creative SoundBlaster Live! and Audigy
 device		snd_es137x		# Ensoniq AudioPCI ES137x
 device		snd_hda			# Intel High Definition Audio
 device		snd_ich			# Intel, NVidia and other ICH AC'97 Audio
 device		snd_via8233		# VIA VT8233x Audio
 
 # MMC/SD
 device		mmc			# MMC/SD bus
 device		mmcsd			# MMC/SD memory card
 device		sdhci			# Generic PCI SD Host Controller
 
 # VirtIO support
 device		virtio			# Generic VirtIO bus (required)
 device		virtio_pci		# VirtIO PCI device
 device		vtnet			# VirtIO Ethernet device
 device		virtio_blk		# VirtIO Block device
 device		virtio_scsi		# VirtIO SCSI device
 device		virtio_balloon		# VirtIO Memory Balloon device
 
-# HyperV drivers
+# HyperV drivers and enchancement support
+# NOTE: HYPERV depends on hyperv.  They must be added or removed together.
+options 	HYPERV			# Hyper-V kernel infrastructure
 device		hyperv			# HyperV drivers 
 
 # Xen HVM Guest Optimizations
 # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
 options 	XENHVM			# Xen HVM kernel infrastructure
 device		xenpci			# Xen HVM Hypervisor services driver
 
 # VMware support
 device		vmx			# VMware VMXNET3 Ethernet
 
 # Netmap provides direct access to TX/RX rings on supported NICs
 device		netmap			# netmap(4) support
 
Index: head/sys/amd64/conf/NOTES
===================================================================
--- head/sys/amd64/conf/NOTES	(revision 282211)
+++ head/sys/amd64/conf/NOTES	(revision 282212)
@@ -1,670 +1,672 @@
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
 # This file contains machine dependent kernel configuration notes.  For
 # machine independent notes, look in /sys/conf/NOTES.
 #
 # $FreeBSD$
 #
 
 #
 # We want LINT to cover profiling as well.
 profile         2
 
 #
 # Enable the kernel DTrace hooks which are required to load the DTrace
 # kernel modules.
 #
 options 	KDTRACE_HOOKS
 
 
 #####################################################################
 # SMP OPTIONS:
 #
 # Notes:
 #
 # IPI_PREEMPTION instructs the kernel to preempt threads running on other
 #	  CPUS if needed.  Relies on the PREEMPTION option
 
 # Optional:
 options 	IPI_PREEMPTION
 device		atpic			# Optional legacy pic support
 device		mptable			# Optional MPSPEC mptable support
 
 #
 # Watchdog routines.
 #
 options 	MP_WATCHDOG
 
 # Debugging options.
 #
 options 	COUNT_XINVLTLB_HITS	# Counters for TLB events
 options 	COUNT_IPIS		# Per-CPU IPI interrupt counters
 
 
 
 #####################################################################
 # CPU OPTIONS
 
 #
 # You must specify at least one CPU (the one you intend to run on);
 # deleting the specification for CPUs you don't need to use may make
 # parts of the system run faster.
 #
 cpu		HAMMER			# aka K8, aka Opteron & Athlon64
 
 #
 # Options for CPU features.
 #
 
 #
 # PERFMON causes the driver for Pentium/Pentium Pro performance counters
 # to be compiled.  See perfmon(4) for more information.
 #
 #XXX#options 	PERFMON
 
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # DEVICE_POLLING adds support for mixed interrupt-polling handling
 # of network device drivers, which has significant benefits in terms
 # of robustness to overloads and responsivity, as well as permitting
 # accurate scheduling of the CPU time between kernel network processing
 # and other activities.  The drawback is a moderate (up to 1/HZ seconds)
 # potential increase in response times.
 # It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING
 # to achieve smoother behaviour.
 # Additionally, you can enable/disable polling at runtime with help of
 # the ifconfig(8) utility, and select the CPU fraction reserved to
 # userland with the sysctl variable kern.polling.user_frac
 # (default 50, range 0..100).
 #
 # Not all device drivers support this mode of operation at the time of
 # this writing.  See polling(4) for more details.
 
 options 	DEVICE_POLLING
 
 # BPF_JITTER adds support for BPF just-in-time compiler.
 
 options 	BPF_JITTER
 
 # OpenFabrics Enterprise Distribution (Infiniband).
 options 	OFED
 options 	OFED_DEBUG_INIT
 
 # Sockets Direct Protocol
 options 	SDP
 options 	SDP_DEBUG
 
 # IP over Infiniband
 options 	IPOIB
 options 	IPOIB_DEBUG
 options 	IPOIB_CM
 
 
 #####################################################################
 # CLOCK OPTIONS
 
 # Provide read/write access to the memory in the clock chip.
 device		nvram		# Access to rtc cmos via /dev/nvram
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 device		speaker		#Play IBM BASIC-style noises out your speaker
 hint.speaker.0.at="isa"
 hint.speaker.0.port="0x61"
 device		gzip		#Exec gzipped a.out's.  REQUIRES COMPAT_AOUT!
 
 
 #####################################################################
 # HARDWARE BUS CONFIGURATION
 
 #
 # ISA bus
 #
 device		isa
 
 #
 # Options for `isa':
 #
 # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # This option breaks suspend/resume on some portables.
 #
 # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # Automatic EOI is documented not to work for for the slave with the
 # original i8259A, but it works for some clones and some integrated
 # versions.
 #
 # MAXMEM specifies the amount of RAM on the machine; if this is not
 # specified, FreeBSD will first read the amount of memory from the CMOS
 # RAM, so the amount of memory will initially be limited to 64MB or 16MB
 # depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
 # then attempt to detect the installed amount of RAM.  If this probe
 # fails to detect >64MB RAM you will have to use the MAXMEM option.
 # The amount is in kilobytes, so for a machine with 128MB of RAM, it would
 # be 131072 (128 * 1024).
 #
 # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
 # reset the CPU for reboot.  This is needed on some systems with broken
 # keyboard controllers.
 
 options 	AUTO_EOI_1
 #options 	AUTO_EOI_2
 
 options 	MAXMEM=(128*1024)
 #options 	BROKEN_KEYBOARD_RESET
 
 #
 # PCI bus & PCI options:
 #
 device		pci
 
 #
 # AGP GART support
 device		agp
 
 #
 # AGP debugging.
 #
 options 	AGP_DEBUG
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # To include support for VGA VESA video modes
 options 	VESA
 
 # Turn on extra debugging checks and output for VESA support.
 options 	VESA_DEBUG
 
 device		dpms		# DPMS suspend & resume via VESA BIOS
 
 # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa
 options 	X86BIOS
 
 #
 # Optional devices:
 #
 
 # PS/2 mouse
 device		psm
 hint.psm.0.at="atkbdc"
 hint.psm.0.irq="12"
 
 # Options for psm:
 options 	PSM_HOOKRESUME		#hook the system resume event, useful
 					#for some laptops
 options 	PSM_RESETAFTERSUSPEND	#reset the device at the resume event
 
 # The keyboard controller; it controls the keyboard and the PS/2 mouse.
 device		atkbdc
 hint.atkbdc.0.at="isa"
 hint.atkbdc.0.port="0x060"
 
 # The AT keyboard
 device		atkbd
 hint.atkbd.0.at="atkbdc"
 hint.atkbd.0.irq="1"
 
 # Options for atkbd:
 options 	ATKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	ATKBD_DFLT_KEYMAP=fr.dvorak
 
 # `flags' for atkbd:
 #       0x01    Force detection of keyboard, else we always assume a keyboard
 #       0x02    Don't reset keyboard, useful for some newer ThinkPads
 #	0x03	Force detection and avoid reset, might help with certain
 #		dockingstations
 #       0x04    Old-style (XT) keyboard support, useful for older ThinkPads
 
 # Video card driver for VGA adapters.
 device		vga
 hint.vga.0.at="isa"
 
 # Options for vga:
 # Try the following option if the mouse pointer is not drawn correctly
 # or font does not seem to be loaded properly.  May cause flicker on
 # some systems.
 options 	VGA_ALT_SEQACCESS
 
 # If you can dispense with some vga driver features, you may want to
 # use the following options to save some memory.
 #options 	VGA_NO_FONT_LOADING	# don't save/load font
 #options 	VGA_NO_MODE_CHANGE	# don't change video modes
 
 # Older video cards may require this option for proper operation.
 options 	VGA_SLOW_IOACCESS	# do byte-wide i/o's to TS and GDC regs
 
 # The following option probably won't work with the LCD displays.
 options 	VGA_WIDTH90		# support 90 column modes
 
 # Debugging.
 options 	VGA_DEBUG
 
 # vt(4) drivers.
 device		vt_vga		# VGA
 device		vt_efifb	# EFI framebuffer
 
 # Linear framebuffer driver for S3 VESA 1.2 cards. Works on top of VESA.
 device		s3pci
 
 # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support.  This will create
 # the /dev/3dfx0 device to work with glide implementations.  This should get
 # linked to /dev/3dfx and /dev/voodoo.  Note that this is not the same as
 # the tdfx DRI module from XFree86 and is completely unrelated.
 #
 # To enable Linuxulator support, one must also include COMPAT_LINUX in the
 # config as well.  The other option is to load both as modules.
 
 device		tdfx			# Enable 3Dfx Voodoo support
 #XXX#device 	tdfx_linux		# Enable Linuxulator support
 
 #
 # ACPI support using the Intel ACPI Component Architecture reference
 # implementation.
 #
 # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer
 # kernel environment variables to select initial debugging levels for the
 # Intel ACPICA code.  (Note that the Intel code must also have USE_DEBUGGER
 # defined when it is built).
 
 device		acpi
 options 	ACPI_DEBUG
 
 # The cpufreq(4) driver provides support for non-ACPI CPU frequency control
 device		cpufreq
 
 # Direct Rendering modules for 3D acceleration.
 device		drm		# DRM core module required by DRM drivers
 device		i915drm		# Intel i830 through i915
 device		mach64drm	# ATI Rage Pro, Rage Mobility P/M, Rage XL
 device		mgadrm		# AGP Matrox G200, G400, G450, G550
 device		r128drm		# ATI Rage 128
 device		radeondrm	# ATI Radeon
 device		savagedrm	# S3 Savage3D, Savage4
 device		sisdrm		# SiS 300/305, 540, 630
 device		tdfxdrm		# 3dfx Voodoo 3/4/5 and Banshee
 device		viadrm		# VIA
 options 	DRM_DEBUG	# Include debug printfs (slow)
 
 #
 # Network interfaces:
 #
 
 # bxe:  Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet
 #       adapters.
 # ed:   Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
 #       HP PC Lan+, various PC Card devices
 #       (requires miibus)
 # ipw:	Intel PRO/Wireless 2100 IEEE 802.11 adapter
 #	Requires the ipw firmware module
 # iwi:	Intel PRO/Wireless 2200BG/2225BG/2915ABG IEEE 802.11 adapters
 #	Requires the iwi firmware module
 # iwn:	Intel Wireless WiFi Link 1000/105/135/2000/4965/5000/6000/6050 abgn
 #	802.11 network adapters
 #	Requires the iwn firmware module
 # ixl:	Intel XL710 40Gbe PCIE Ethernet
 # ixlv:	Intel XL710 40Gbe VF PCIE Ethernet
 # mlx4ib: Mellanox ConnectX HCA InfiniBand
 # mlxen: Mellanox ConnectX HCA Ethernet
 # mthca: Mellanox HCA InfiniBand
 # nfe:	nVidia nForce MCP on-board Ethernet Networking (BSD open source)
 # sfxge: Solarflare SFC9000 family 10Gb Ethernet adapters
 # vmx:	VMware VMXNET3 Ethernet (BSD open source)
 # wpi:	Intel 3945ABG Wireless LAN controller
 #	Requires the wpi firmware module
 
 device		bxe		# Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE
 device		ed		# NE[12]000, SMC Ultra, 3c503, DS8390 cards
 options 	ED_3C503
 options 	ED_HPP
 options 	ED_SIC
 device		ipw		# Intel 2100 wireless NICs.
 device		iwi		# Intel 2200BG/2225BG/2915ABG wireless NICs.
 device		iwn		# Intel 4965/1000/5000/6000 wireless NICs.
 device		ixl		# Intel XL710 40Gbe PCIE Ethernet
 device		ixlv		# Intel XL710 40Gbe VF PCIE Ethernet
 device  	mlx4ib		# Mellanox ConnectX HCA InfiniBand
 device  	mlxen		# Mellanox ConnectX HCA Ethernet
 device  	mthca		# Mellanox HCA InfiniBand
 device		nfe		# nVidia nForce MCP on-board Ethernet
 device		sfxge		# Solarflare SFC9000 10Gb Ethernet
 device		vmx		# VMware VMXNET3 Ethernet
 device		wpi		# Intel 3945ABG wireless NICs.
 
 # IEEE 802.11 adapter firmware modules
 
 # Intel PRO/Wireless 2100 firmware:
 #   ipwfw:		BSS/IBSS/monitor mode firmware
 #   ipwbssfw:		BSS mode firmware
 #   ipwibssfw:		IBSS mode firmware
 #   ipwmonitorfw:	Monitor mode firmware
 # Intel PRO/Wireless 2200BG/2225BG/2915ABG firmware:
 #   iwifw:		BSS/IBSS/monitor mode firmware
 #   iwibssfw:		BSS mode firmware
 #   iwiibssfw:		IBSS mode firmware
 #   iwimonitorfw:	Monitor mode firmware
 # Intel Wireless WiFi Link 4965/1000/5000/6000 series firmware:
 #   iwnfw:		Single module to support all devices
 #   iwn1000fw:		Specific module for the 1000 only
 #   iwn105fw:		Specific module for the 105 only
 #   iwn135fw:		Specific module for the 135 only
 #   iwn2000fw:		Specific module for the 2000 only
 #   iwn2030fw:		Specific module for the 2030 only
 #   iwn4965fw:		Specific module for the 4965 only
 #   iwn5000fw:		Specific module for the 5000 only
 #   iwn5150fw:		Specific module for the 5150 only
 #   iwn6000fw:		Specific module for the 6000 only
 #   iwn6000g2afw:	Specific module for the 6000g2a only
 #   iwn6000g2bfw:	Specific module for the 6000g2b only
 #   iwn6050fw:		Specific module for the 6050 only
 # wpifw:	Intel 3945ABG Wireless LAN Controller firmware
 
 device		iwifw
 device		iwibssfw
 device		iwiibssfw
 device		iwimonitorfw
 device		ipwfw
 device		ipwbssfw
 device		ipwibssfw
 device		ipwmonitorfw
 device		iwnfw
 device		iwn1000fw
 device		iwn105fw
 device		iwn135fw
 device		iwn2000fw
 device		iwn2030fw
 device		iwn4965fw
 device		iwn5000fw
 device		iwn5150fw
 device		iwn6000fw
 device		iwn6000g2afw
 device		iwn6000g2bfw
 device		iwn6050fw
 device		wpifw
 
 # Intel Non-Transparent Bridge (NTB) hardware
 device		ntb_hw	# Hardware Abstraction Layer for the NTB
 device		if_ntb	# Simulated ethernet device using the NTB
 
 #
 #XXX this stores pointers in a 32bit field that is defined by the hardware
 #device	pst
 
 #
 # Areca 11xx and 12xx series of SATA II RAID controllers.
 # CAM is required.
 #
 device		arcmsr		# Areca SATA II RAID
 
 #
 # 3ware 9000 series PATA/SATA RAID controller driver and options.
 # The driver is implemented as a SIM, and so, needs the CAM infrastructure.
 #
 options 	TWA_DEBUG		# 0-10; 10 prints the most messages.
 options 	TWA_FLASH_FIRMWARE	# firmware image bundled when defined.
 device		twa			# 3ware 9000 series PATA/SATA RAID
 
 #
 # SCSI host adapters:
 #
 # ncv: NCR 53C500 based SCSI host adapters.
 # nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters.
 # stg: TMC 18C30, 18C50 based SCSI host adapters.
 
 device		ncv
 device		nsp
 device		stg
 
 #
 # Adaptec FSA RAID controllers, including integrated DELL controllers,
 # the Dell PERC 2/QC and the HP NetRAID-4M
 device		aac
 device		aacp	# SCSI Passthrough interface (optional, CAM required)
 
 #
 # Adaptec by PMC RAID controllers, Series 6/7/8 and upcoming families
 device		aacraid		# Container interface, CAM required
 
 #
 # Highpoint RocketRAID 27xx.
 device		hpt27xx
 
 #
 # Highpoint RocketRAID 182x.
 device		hptmv
 
 #
 # Highpoint DC7280 and R750.
 device		hptnr
 
 #
 # Highpoint RocketRAID.  Supports RR172x, RR222x, RR2240, RR232x, RR2340,
 # RR2210, RR174x, RR2522, RR231x, RR230x.
 device		hptrr
 
 #
 # Highpoint RocketRaid 3xxx series SATA RAID
 device		hptiop
 
 #
 # IBM (now Adaptec) ServeRAID controllers
 device		ips
 
 #
 # Intel C600 (Patsburg) integrated SAS controller
 device		isci
 options 	ISCI_LOGGING	# enable debugging in isci HAL
 
 #
 # NVM Express (NVMe) support
 device         nvme    # base NVMe driver
 device         nvd     # expose NVMe namespaces as disks, depends on nvme
 
 #
 # SafeNet crypto driver: can be moved to the MI NOTES as soon as
 # it's tested on a big-endian machine
 #
 device		safe		# SafeNet 1141
 options 	SAFE_DEBUG	# enable debugging support: hw.safe.debug
 options 	SAFE_RNDTEST	# enable rndtest support
 
 #
 # VirtIO support
 #
 # The virtio entry provides a generic bus for use by the device drivers.
 # It must be combined with an interface that communicates with the host.
 # Multiple such interfaces are defined by the VirtIO specification. FreeBSD
 # only has support for PCI. Therefore, virtio_pci must be statically
 # compiled in or loaded as a module for the device drivers to function.
 #
 device		virtio		# Generic VirtIO bus (required)
 device		virtio_pci	# VirtIO PCI Interface
 device		vtnet		# VirtIO Ethernet device
 device		virtio_blk	# VirtIO Block device
 device		virtio_scsi	# VirtIO SCSI device
 device		virtio_balloon	# VirtIO Memory Balloon device
 device		virtio_random	# VirtIO Entropy device
 device		virtio_console	# VirtIO Console device
 
+# Microsoft Hyper-V enchancement support
+options 	HYPERV		# Hyper-V kernel infrastructure
 device 		hyperv		# HyperV drivers
 
 # Xen HVM Guest Optimizations
 options 	XENHVM		# Xen HVM kernel infrastructure
 device 		xenpci		# Xen HVM Hypervisor services driver
 
 #####################################################################
 
 #
 # Miscellaneous hardware:
 #
 # ipmi: Intelligent Platform Management Interface
 # pbio: Parallel (8255 PPI) basic I/O (mode 0) port (e.g. Advantech PCL-724)
 # smbios: DMI/SMBIOS entry point
 # vpd: Vital Product Data kernel interface
 # asmc: Apple System Management Controller
 # si: Specialix International SI/XIO or SX intelligent serial card
 # tpm: Trusted Platform Module
 
 # Notes on the Specialix SI/XIO driver:
 #  The host card is memory, not IO mapped.
 #  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
 #  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
 #  The cards can use an IRQ of 11, 12 or 15.
 
 device		ipmi
 device		pbio
 hint.pbio.0.at="isa"
 hint.pbio.0.port="0x360"
 device		smbios
 device		vpd
 device		asmc
 device		si
 device		tpm
 device		padlock_rng	# VIA Padlock RNG
 device		rdrand_rng	# Intel Bull Mountain RNG
 device		aesni		# AES-NI OpenCrypto module
 
 #
 # Laptop/Notebook options:
 #
 
 
 #
 # I2C Bus
 #
 
 #
 # Hardware watchdog timers:
 #
 # ichwd: Intel ICH watchdog timer
 # amdsbwd: AMD SB7xx watchdog timer
 # viawd: VIA south bridge watchdog timer
 # wbwd: Winbond watchdog timer
 #
 device		ichwd
 device		amdsbwd
 device		viawd
 device		wbwd
 
 #
 # Temperature sensors:
 #
 # coretemp: on-die sensor on Intel Core and newer CPUs
 # amdtemp: on-die sensor on AMD K8/K10/K11 CPUs
 #
 device		coretemp
 device		amdtemp
 
 #
 # CPU control pseudo-device. Provides access to MSRs, CPUID info and
 # microcode update feature.
 #
 device		cpuctl
 
 #
 # System Management Bus (SMB)
 #
 options 	ENABLE_ALART		# Control alarm on Intel intpm driver
 
 #
 # Number of initial kernel page table pages used for early bootstrap.
 # This number should include enough pages to map the kernel and any
 # modules or other data loaded with the kernel by the loader.  Each
 # page table page maps 2MB.
 #
 options 	NKPT=31
 
 
 #####################################################################
 # ABI Emulation
 
 #XXX keep these here for now and reactivate when support for emulating
 #XXX these 32 bit binaries is added.
 
 # Enable 32-bit runtime support for FreeBSD/i386 binaries.
 options 	COMPAT_FREEBSD32
 
 # Enable iBCS2 runtime support for SCO and ISC binaries
 #XXX#options 	IBCS2
 
 # Emulate spx device for client side of SVR3 local X interface
 #XXX#options 	SPX_HACK
 
 # Enable Linux ABI emulation
 #XXX#options 	COMPAT_LINUX
 
 # Enable 32-bit Linux ABI emulation (requires COMPAT_43 and COMPAT_FREEBSD32)
 options 	COMPAT_LINUX32
 
 # Enable the linux-like proc filesystem support (requires COMPAT_LINUX32
 # and PSEUDOFS)
 options 	LINPROCFS
 
 #Enable the linux-like sys filesystem support (requires COMPAT_LINUX32
 # and PSEUDOFS)
 options 	LINSYSFS
 
 #
 # SysVR4 ABI emulation
 #
 # The svr4 ABI emulator can be statically compiled into the kernel or loaded as
 # a KLD module.
 # The STREAMS network emulation code can also be compiled statically or as a
 # module.  If loaded as a module, it must be loaded before the svr4 module
 # (the /usr/sbin/svr4 script does this for you).  If compiling statically,
 # the `streams' device must be configured into any kernel which also
 # specifies COMPAT_SVR4.  It is possible to have a statically-configured
 # STREAMS device and a dynamically loadable svr4 emulator;  the /usr/sbin/svr4
 # script understands that it doesn't need to load the `streams' module under
 # those circumstances.
 # Caveat:  At this time, `options KTRACE' is required for the svr4 emulator
 # (whether static or dynamic).
 #
 #XXX#options 	COMPAT_SVR4	# build emulator statically
 #XXX#options 	DEBUG_SVR4	# enable verbose debugging
 #XXX#device	streams		# STREAMS network driver (required for svr4).
 
 
 #####################################################################
 # VM OPTIONS
 
 # KSTACK_PAGES is the number of memory pages to assign to the kernel
 # stack of each thread.
 
 options 	KSTACK_PAGES=5
 
 # Enable detailed accounting by the PV entry allocator.
 
 options 	PV_STATS
 
 #####################################################################
 
 # More undocumented options for linting.
 # Note that documenting these are not considered an affront.
 
 options 	FB_INSTALL_CDEV		# install a CDEV entry in /dev
 
 options 	KBDIO_DEBUG=2
 options 	KBD_MAXRETRY=4
 options 	KBD_MAXWAIT=6
 options 	KBD_RESETDELAY=201
 
 options 	PSM_DEBUG=1
 
 options 	TIMER_FREQ=((14318182+6)/12)
 
 options 	VM_KMEM_SIZE
 options 	VM_KMEM_SIZE_MAX
 options 	VM_KMEM_SIZE_SCALE
 
 # Enable NDIS binary driver support
 options 	NDISAPI
 device		ndis
Index: head/sys/conf/options.amd64
===================================================================
--- head/sys/conf/options.amd64	(revision 282211)
+++ head/sys/conf/options.amd64	(revision 282212)
@@ -1,67 +1,69 @@
 # $FreeBSD$
 # Options specific to AMD64 platform kernels
 
 AUTO_EOI_1		opt_auto_eoi.h
 AUTO_EOI_2		opt_auto_eoi.h
 COUNT_XINVLTLB_HITS	opt_smp.h
 COUNT_IPIS		opt_smp.h
 MAXMEM
 PERFMON
 MPTABLE_FORCE_HTT
 MP_WATCHDOG
 NKPT			opt_pmap.h
 PV_STATS		opt_pmap.h
 
 # Options for emulators.  These should only be used at config time, so
 # they are handled like options for static filesystems
 # (see src/sys/conf/options), except for broken debugging options.
 COMPAT_FREEBSD32	opt_compat.h
 #IBCS2			opt_dontuse.h
 #COMPAT_LINUX		opt_dontuse.h
 COMPAT_LINUX32		opt_compat.h
 #COMPAT_SVR4		opt_dontuse.h
 #DEBUG_SVR4		opt_svr4.h
 LINPROCFS		opt_dontuse.h
 LINSYSFS		opt_dontuse.h
 NDISAPI			opt_dontuse.h
 
 TIMER_FREQ			opt_clock.h
 
 # options for serial support
 COM_ESP			opt_sio.h
 COM_MULTIPORT		opt_sio.h
 CONSPEED		opt_sio.h
 GDBSPEED		opt_sio.h
 COM_NO_ACPI		opt_sio.h
 
 VGA_ALT_SEQACCESS	opt_vga.h
 VGA_DEBUG		opt_vga.h
 VGA_NO_FONT_LOADING	opt_vga.h
 VGA_NO_MODE_CHANGE	opt_vga.h
 VGA_SLOW_IOACCESS	opt_vga.h
 VGA_WIDTH90		opt_vga.h
 
 VESA
 VESA_DEBUG		opt_vesa.h
 
 # AGP debugging support
 AGP_DEBUG		opt_agp.h
 
 ATKBD_DFLT_KEYMAP	opt_atkbd.h
 
 # -------------------------------
 # EOF
 # -------------------------------
 HAMMER			opt_cpu.h
 PSM_HOOKRESUME		opt_psm.h
 PSM_RESETAFTERSUSPEND	opt_psm.h
 PSM_DEBUG		opt_psm.h
 DEV_ATPIC		opt_atpic.h
 
 # BPF just-in-time compiler
 BPF_JITTER		opt_bpf.h
 
 XENHVM			opt_global.h
 
+HYPERV			opt_global.h
+
 # options for the Intel C600 SAS driver (isci)
 ISCI_LOGGING	opt_isci.h
Index: head/sys/conf/options.i386
===================================================================
--- head/sys/conf/options.i386	(revision 282211)
+++ head/sys/conf/options.i386	(revision 282212)
@@ -1,129 +1,131 @@
 # $FreeBSD$
 # Options specific to the i386 platform kernels
 
 AUTO_EOI_1		opt_auto_eoi.h
 AUTO_EOI_2		opt_auto_eoi.h
 BROKEN_KEYBOARD_RESET	opt_reset.h
 COUNT_XINVLTLB_HITS	opt_smp.h
 COUNT_IPIS		opt_smp.h
 DISABLE_PG_G		opt_pmap.h
 DISABLE_PSE		opt_pmap.h
 I586_PMC_GUPROF		opt_i586_guprof.h
 MAXMEM
 MPTABLE_FORCE_HTT
 MP_WATCHDOG
 NKPT			opt_pmap.h
 PERFMON
 PMAP_SHPGPERPROC	opt_pmap.h
 POWERFAIL_NMI		opt_trap.h
 PV_STATS		opt_pmap.h
 
 # Options for emulators.  These should only be used at config time, so
 # they are handled like options for static filesystems
 # (see src/sys/conf/options), except for broken debugging options.
 COMPAT_AOUT		opt_dontuse.h
 IBCS2			opt_dontuse.h
 COMPAT_LINUX		opt_dontuse.h
 COMPAT_SVR4		opt_dontuse.h
 DEBUG_SVR4		opt_svr4.h
 LINPROCFS		opt_dontuse.h
 LINSYSFS		opt_dontuse.h
 NDISAPI			opt_dontuse.h
 
 # Change KVM size.  Changes things all over the kernel.
 KVA_PAGES		opt_global.h
 
 # Physical address extensions and support for >4G ram.  As above.
 PAE			opt_global.h
 
 # Use PAE page tables, but limit memory support to 4GB.
 # This keeps the i386 non-PAE KBI, in particular, drivers see
 # 32bit vm_paddr_t.
 PAE_TABLES		opt_global.h
 
 TIMER_FREQ			opt_clock.h
 
 CPU_ATHLON_SSE_HACK		opt_cpu.h
 CPU_BLUELIGHTNING_3X		opt_cpu.h
 CPU_BLUELIGHTNING_FPU_OP_CACHE	opt_cpu.h
 CPU_BTB_EN			opt_cpu.h
 CPU_CYRIX_NO_LOCK		opt_cpu.h
 CPU_DIRECT_MAPPED_CACHE		opt_cpu.h
 CPU_DISABLE_5X86_LSSER		opt_cpu.h
 CPU_DISABLE_CMPXCHG		opt_global.h	# XXX global, unlike other CPU_*
 CPU_DISABLE_SSE			opt_cpu.h
 CPU_ELAN			opt_cpu.h
 CPU_ELAN_PPS			opt_cpu.h
 CPU_ELAN_XTAL			opt_cpu.h
 CPU_ENABLE_LONGRUN		opt_cpu.h
 CPU_FASTER_5X86_FPU		opt_cpu.h
 CPU_GEODE			opt_cpu.h
 CPU_I486_ON_386			opt_cpu.h
 CPU_IORT			opt_cpu.h
 CPU_L2_LATENCY			opt_cpu.h
 CPU_LOOP_EN			opt_cpu.h
 CPU_PPRO2CELERON		opt_cpu.h
 CPU_RSTK_EN			opt_cpu.h
 CPU_SOEKRIS			opt_cpu.h
 CPU_SUSP_HLT			opt_cpu.h
 CPU_UPGRADE_HW_CACHE		opt_cpu.h
 CPU_WT_ALLOC			opt_cpu.h
 CYRIX_CACHE_REALLY_WORKS	opt_cpu.h
 CYRIX_CACHE_WORKS		opt_cpu.h
 NO_F00F_HACK			opt_cpu.h
 NO_MEMORY_HOLE			opt_cpu.h
 
 # The CPU type affects the endian conversion functions all over the kernel.
 I486_CPU		opt_global.h
 I586_CPU		opt_global.h
 I686_CPU		opt_global.h
 
 # options for serial support
 COM_ESP			opt_sio.h
 COM_MULTIPORT		opt_sio.h
 CONSPEED		opt_sio.h
 GDBSPEED		opt_sio.h
 COM_NO_ACPI		opt_sio.h
 
 VGA_ALT_SEQACCESS	opt_vga.h
 VGA_DEBUG		opt_vga.h
 VGA_NO_FONT_LOADING	opt_vga.h
 VGA_NO_MODE_CHANGE	opt_vga.h
 VGA_SLOW_IOACCESS	opt_vga.h
 VGA_WIDTH90		opt_vga.h
 
 VESA
 VESA_DEBUG		opt_vesa.h
 
 # AGP debugging support
 AGP_DEBUG		opt_agp.h
 
 PSM_DEBUG		opt_psm.h
 PSM_HOOKRESUME		opt_psm.h
 PSM_RESETAFTERSUSPEND	opt_psm.h
 
 ATKBD_DFLT_KEYMAP	opt_atkbd.h
 
 # Video spigot
 SPIGOT_UNSECURE		opt_spigot.h
 
 # Enables NETGRAPH support for Cronyx adapters
 NETGRAPH_CRONYX		opt_ng_cronyx.h
 
 # Device options
 DEV_APIC		opt_apic.h
 DEV_ATPIC		opt_atpic.h
 DEV_NPX			opt_npx.h
 
 # Debugging
 NPX_DEBUG		opt_npx.h
 
 # BPF just-in-time compiler
 BPF_JITTER		opt_bpf.h
 
 NATIVE			opt_global.h
 XEN			opt_global.h
 XENHVM			opt_global.h
 
+HYPERV			opt_global.h
+
 # options for the Intel C600 SAS driver (isci)
 ISCI_LOGGING	opt_isci.h
Index: head/sys/dev/hyperv/include/hyperv.h
===================================================================
--- head/sys/dev/hyperv/include/hyperv.h	(revision 282211)
+++ head/sys/dev/hyperv/include/hyperv.h	(revision 282212)
@@ -1,828 +1,981 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /**
  * HyperV definitions for messages that are sent between instances of the
  * Channel Management Library in separate partitions, or in some cases,
  * back to itself.
  */
 
 #ifndef __HYPERV_H__
 #define __HYPERV_H__
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
+#include <sys/smp.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <amd64/include/xen/synch_bitops.h>
 #include <amd64/include/atomic.h>
 
 typedef uint8_t	hv_bool_uint8_t;
 
 #define HV_S_OK			0x00000000
 #define HV_E_FAIL		0x80004005
 #define HV_ERROR_NOT_SUPPORTED	0x80070032
 #define HV_ERROR_MACHINE_LOCKED	0x800704F7
 
 /*
- * A revision number of vmbus that is used for ensuring both ends on a
- * partition are using compatible versions.
+ * VMBUS version is 32 bit, upper 16 bit for major_number and lower
+ * 16 bit for minor_number.
+ *
+ * 0.13  --  Windows Server 2008
+ * 1.1   --  Windows 7
+ * 2.4   --  Windows 8
+ * 3.0   --  Windows 8.1
  */
+#define HV_VMBUS_VERSION_WS2008		((0 << 16) | (13))
+#define HV_VMBUS_VERSION_WIN7		((1 << 16) | (1))
+#define HV_VMBUS_VERSION_WIN8		((2 << 16) | (4))
+#define HV_VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
 
-#define HV_VMBUS_REVISION_NUMBER	13
+#define HV_VMBUS_VERSION_INVALID	-1
 
+#define HV_VMBUS_VERSION_CURRENT	HV_VMBUS_VERSION_WIN8_1
+
 /*
  * Make maximum size of pipe payload of 16K
  */
 
 #define HV_MAX_PIPE_DATA_PAYLOAD	(sizeof(BYTE) * 16384)
 
 /*
  * Define pipe_mode values
  */
 
 #define HV_VMBUS_PIPE_TYPE_BYTE		0x00000000
 #define HV_VMBUS_PIPE_TYPE_MESSAGE	0x00000004
 
 /*
  * The size of the user defined data buffer for non-pipe offers
  */
 
 #define HV_MAX_USER_DEFINED_BYTES	120
 
 /*
  *  The size of the user defined data buffer for pipe offers
  */
 
 #define HV_MAX_PIPE_USER_DEFINED_BYTES	116
 
 
 #define HV_MAX_PAGE_BUFFER_COUNT	16
 #define HV_MAX_MULTIPAGE_BUFFER_COUNT	32
 
 #define HV_ALIGN_UP(value, align)					\
 		(((value) & (align-1)) ?				\
 		    (((value) + (align-1)) & ~(align-1) ) : (value))
 
 #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) )
 
 #define HV_NUM_PAGES_SPANNED(addr, len)					\
 		((HV_ALIGN_UP(addr+len, PAGE_SIZE) -			\
 		    HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT )
 
 typedef struct hv_guid {
 	 unsigned char data[16];
 } __packed hv_guid;
 
+#define HV_NIC_GUID							\
+	.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,	\
+		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
+
+#define HV_IDE_GUID							\
+	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,	\
+		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
+
+#define HV_SCSI_GUID							\
+	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,	\
+		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
+
 /*
  * At the center of the Channel Management library is
  * the Channel Offer. This struct contains the
  * fundamental information about an offer.
  */
 
 typedef struct hv_vmbus_channel_offer {
 	hv_guid		interface_type;
 	hv_guid		interface_instance;
 	uint64_t	interrupt_latency_in_100ns_units;
 	uint32_t	interface_revision;
 	uint32_t	server_context_area_size; /* in bytes */
 	uint16_t	channel_flags;
 	uint16_t	mmio_megabytes;		  /* in bytes * 1024 * 1024 */
 	union
 	{
         /*
          * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes.
          */
 		struct {
 			uint8_t	user_defined[HV_MAX_USER_DEFINED_BYTES];
 		} __packed standard;
 
         /*
          * Pipes: The following structure is an integrated pipe protocol, which
          *        is implemented on top of standard user-defined data. pipe
          *        clients  have HV_MAX_PIPE_USER_DEFINED_BYTES left for their
          *        own use.
          */
 		struct {
 			uint32_t	pipe_mode;
 			uint8_t	user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES];
 		} __packed pipe;
 	} u;
 
-	uint32_t	padding;
+	/*
+	 * Sub_channel_index, newly added in Win8.
+	 */
+	uint16_t	sub_channel_index;
+	uint16_t	padding;
 
 } __packed hv_vmbus_channel_offer;
 
 typedef uint32_t hv_gpadl_handle;
 
 typedef struct {
 	uint16_t type;
 	uint16_t data_offset8;
 	uint16_t length8;
 	uint16_t flags;
 	uint64_t transaction_id;
 } __packed hv_vm_packet_descriptor;
 
 typedef uint32_t hv_previous_packet_offset;
 
 typedef struct {
 	hv_previous_packet_offset	previous_packet_start_offset;
 	hv_vm_packet_descriptor		descriptor;
 } __packed hv_vm_packet_header;
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 } __packed hv_vm_transfer_page;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint16_t		transfer_page_set_id;
 	hv_bool_uint8_t		sender_owns_set;
 	uint8_t			reserved;
 	uint32_t		range_count;
 	hv_vm_transfer_page	ranges[1];
 } __packed hv_vm_transfer_page_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		reserved;
 } __packed hv_vm_gpadl_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint16_t		transfer_page_set_id;
 	uint16_t		reserved;
 } __packed hv_vm_add_remove_transfer_page_set;
 
 /*
  * This structure defines a range in guest
  * physical space that can be made
  * to look virtually contiguous.
  */
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 	uint64_t pfn_array[0];
 } __packed hv_gpa_range;
 
 /*
  * This is the format for an Establish Gpadl packet, which contains a handle
  * by which this GPADL will be known and a set of GPA ranges associated with
  * it.  This can be converted to a MDL by the guest OS.  If there are multiple
  * GPA ranges, then the resulting MDL will be "chained," representing multiple
  * VA ranges.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_establish_gpadl;
 
 /*
  * This is the format for a Teardown Gpadl packet, which indicates that the
  * GPADL handle in the Establish Gpadl packet will never be referenced again.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 				/* for alignment to a 8-byte boundary */
 	uint32_t		reserved;
 } __packed hv_vm_teardown_gpadl;
 
 /*
  * This is the format for a GPA-Direct packet, which contains a set of GPA
  * ranges, in addition to commands and/or data.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		reserved;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_data_gpa_direct;
 
 /*
  * This is the format for a Additional data Packet.
  */
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint64_t		total_bytes;
 	uint32_t		byte_offset;
 	uint32_t		byte_count;
 	uint8_t			data[1];
 } __packed hv_vm_additional_data;
 
 typedef union {
 	hv_vm_packet_descriptor             simple_header;
 	hv_vm_transfer_page_packet_header   transfer_page_header;
 	hv_vm_gpadl_packet_header           gpadl_header;
 	hv_vm_add_remove_transfer_page_set  add_remove_transfer_page_header;
 	hv_vm_establish_gpadl               establish_gpadl_header;
 	hv_vm_teardown_gpadl                teardown_gpadl_header;
 	hv_vm_data_gpa_direct               data_gpa_direct_header;
 } __packed hv_vm_packet_largest_possible_header;
 
 typedef enum {
 	HV_VMBUS_PACKET_TYPE_INVALID				= 0x0,
 	HV_VMBUS_PACKET_TYPES_SYNCH				= 0x1,
 	HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET		= 0x2,
 	HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET		= 0x3,
 	HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL			= 0x4,
 	HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL			= 0x5,
 	HV_VMBUS_PACKET_TYPE_DATA_IN_BAND			= 0x6,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES		= 0x7,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL			= 0x8,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT		= 0x9,
 	HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST			= 0xa,
 	HV_VMBUS_PACKET_TYPE_COMPLETION				= 0xb,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS	= 0xc,
 	HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd
 } hv_vmbus_packet_type;
 
 #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED    1
 
 /*
  * Version 1 messages
  */
 typedef enum {
 	HV_CHANNEL_MESSAGE_INVALID			= 0,
 	HV_CHANNEL_MESSAGE_OFFER_CHANNEL		= 1,
 	HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER	= 2,
 	HV_CHANNEL_MESSAGE_REQUEST_OFFERS		= 3,
 	HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED		= 4,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL			= 5,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT		= 6,
 	HV_CHANNEL_MESSAGE_CLOSE_CHANNEL		= 7,
 	HV_CHANNEL_MESSAGEL_GPADL_HEADER		= 8,
 	HV_CHANNEL_MESSAGE_GPADL_BODY			= 9,
 	HV_CHANNEL_MESSAGE_GPADL_CREATED		= 10,
 	HV_CHANNEL_MESSAGE_GPADL_TEARDOWN		= 11,
 	HV_CHANNEL_MESSAGE_GPADL_TORNDOWN		= 12,
 	HV_CHANNEL_MESSAGE_REL_ID_RELEASED		= 13,
 	HV_CHANNEL_MESSAGE_INITIATED_CONTACT		= 14,
 	HV_CHANNEL_MESSAGE_VERSION_RESPONSE		= 15,
 	HV_CHANNEL_MESSAGE_UNLOAD			= 16,
 
 #ifdef	HV_VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD
 	HV_CHANNEL_MESSAGE_VIEW_RANGE_ADD		= 17,
 	HV_CHANNEL_MESSAGE_VIEW_RANGE_REMOVE		= 18,
 #endif
 	HV_CHANNEL_MESSAGE_COUNT
 } hv_vmbus_channel_msg_type;
 
 typedef struct {
 	hv_vmbus_channel_msg_type	message_type;
 	uint32_t			padding;
 } __packed hv_vmbus_channel_msg_header;
 
 /*
  * Query VMBus Version parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			version;
 } __packed hv_vmbus_channel_query_vmbus_version;
 
 /*
  * VMBus Version Supported parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	hv_bool_uint8_t			version_supported;
 } __packed hv_vmbus_channel_version_supported;
 
 /*
  * Channel Offer parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	hv_vmbus_channel_offer		offer;
 	uint32_t			child_rel_id;
 	uint8_t				monitor_id;
-	hv_bool_uint8_t			monitor_allocated;
+	/*
+	 * This field has been split into a bit field on Win7
+	 * and higher.
+	 */
+	uint8_t				monitor_allocated:1;
+	uint8_t				reserved:7;
+	/*
+	 * Following fields were added in win7 and higher.
+	 * Make sure to check the version before accessing these fields.
+	 *
+	 * If "is_dedicated_interrupt" is set, we must not set the
+	 * associated bit in the channel bitmap while sending the
+	 * interrupt to the host.
+	 *
+	 * connection_id is used in signaling the host.
+	 */
+	uint16_t			is_dedicated_interrupt:1;
+	uint16_t			reserved1:15;
+	uint32_t			connection_id;
 } __packed hv_vmbus_channel_offer_channel;
 
 /*
  * Rescind Offer parameters
  */
 typedef struct
 {
     hv_vmbus_channel_msg_header	header;
     uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_rescind_offer;
 
 
 /*
  * Request Offer -- no parameters, SynIC message contains the partition ID
  *
  * Set Snoop -- no parameters, SynIC message contains the partition ID
  *
  * Clear Snoop -- no parameters, SynIC message contains the partition ID
  *
  * All Offers Delivered -- no parameters, SynIC message contains the
  * partition ID
  *
  * Flush Client -- no parameters, SynIC message contains the partition ID
  */
 
 
 /*
  * Open Channel parameters
  */
 typedef struct
 {
     hv_vmbus_channel_msg_header header;
 
     /*
      * Identifies the specific VMBus channel that is being opened.
      */
     uint32_t		child_rel_id;
 
     /*
      * ID making a particular open request at a channel offer unique.
      */
     uint32_t		open_id;
 
     /*
      * GPADL for the channel's ring buffer.
      */
     hv_gpadl_handle	ring_buffer_gpadl_handle;
 
     /*
-     * GPADL for the channel's server context save area.
+     * Before win8, all incoming channel interrupts are only
+     * delivered on cpu 0. Setting this value to 0 would
+     * preserve the earlier behavior.
      */
-    hv_gpadl_handle	server_context_area_gpadl_handle;
+    uint32_t		target_vcpu;
 
     /*
      * The upstream ring buffer begins at offset zero in the memory described
      * by ring_buffer_gpadl_handle. The downstream ring buffer follows it at
      * this offset (in pages).
      */
     uint32_t		downstream_ring_buffer_page_offset;
 
     /*
      * User-specific data to be passed along to the server endpoint.
      */
     uint8_t		user_data[HV_MAX_USER_DEFINED_BYTES];
 
 } __packed hv_vmbus_channel_open_channel;
 
 typedef uint32_t hv_nt_status;
 
 /*
  * Open Channel Result parameters
  */
 typedef struct
 {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			open_id;
 	hv_nt_status			status;
 } __packed hv_vmbus_channel_open_result;
 
 /*
  * Close channel parameters
  */
 typedef struct
 {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_close_channel;
 
 /*
  * Channel Message GPADL
  */
 #define HV_GPADL_TYPE_RING_BUFFER	1
 #define HV_GPADL_TYPE_SERVER_SAVE_AREA	2
 #define HV_GPADL_TYPE_TRANSACTION	8
 
 /*
  * The number of PFNs in a GPADL message is defined by the number of pages
  * that would be spanned by byte_count and byte_offset.  If the implied number
  * of PFNs won't fit in this packet, there will be a follow-up packet that
  * contains more
  */
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 	uint16_t			range_buf_len;
 	uint16_t			range_count;
 	hv_gpa_range			range[0];
 } __packed hv_vmbus_channel_gpadl_header;
 
 /*
  * This is the follow-up packet that contains more PFNs
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			message_number;
 	uint32_t 			gpadl;
 	uint64_t 			pfn[0];
 } __packed hv_vmbus_channel_gpadl_body;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 	uint32_t			creation_status;
 } __packed hv_vmbus_channel_gpadl_created;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 } __packed hv_vmbus_channel_gpadl_teardown;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			gpadl;
 } __packed hv_vmbus_channel_gpadl_torndown;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_relid_released;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			vmbus_version_requested;
 	uint32_t			padding2;
 	uint64_t			interrupt_page;
 	uint64_t			monitor_page_1;
 	uint64_t			monitor_page_2;
 } __packed hv_vmbus_channel_initiate_contact;
 
 typedef struct {
 	hv_vmbus_channel_msg_header header;
 	hv_bool_uint8_t		version_supported;
 } __packed hv_vmbus_channel_version_response;
 
 typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload;
 
 #define HW_MACADDR_LEN	6
 
 /*
  * Fixme:  Added to quiet "typeof" errors involving hv_vmbus.h when
  * the including C file was compiled with "-std=c99".
  */
 #ifndef typeof
 #define typeof __typeof
 #endif
 
 #ifndef NULL
 #define NULL  (void *)0
 #endif
 
 typedef void *hv_vmbus_handle;
 
 #ifndef CONTAINING_RECORD
 #define CONTAINING_RECORD(address, type, field) ((type *)(	\
 		(uint8_t *)(address) -				\
 		(uint8_t *)(&((type *)0)->field)))
 #endif /* CONTAINING_RECORD */
 
 
 #define container_of(ptr, type, member) ({				\
 		__typeof__( ((type *)0)->member ) *__mptr = (ptr);	\
 		(type *)( (char *)__mptr - offsetof(type,member) );})
 
 enum {
 	HV_VMBUS_IVAR_TYPE,
 	HV_VMBUS_IVAR_INSTANCE,
 	HV_VMBUS_IVAR_NODE,
 	HV_VMBUS_IVAR_DEVCTX
 };
 
 #define HV_VMBUS_ACCESSOR(var, ivar, type) \
 		__BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type)
 
 HV_VMBUS_ACCESSOR(type, TYPE,  const char *)
 HV_VMBUS_ACCESSOR(devctx, DEVCTX,  struct hv_device *)
 
 
 /*
  * Common defines for Hyper-V ICs
  */
 #define HV_ICMSGTYPE_NEGOTIATE		0
 #define HV_ICMSGTYPE_HEARTBEAT		1
 #define HV_ICMSGTYPE_KVPEXCHANGE	2
 #define HV_ICMSGTYPE_SHUTDOWN		3
 #define HV_ICMSGTYPE_TIMESYNC		4
 #define HV_ICMSGTYPE_VSS		5
 
 #define HV_ICMSGHDRFLAG_TRANSACTION	1
 #define HV_ICMSGHDRFLAG_REQUEST		2
 #define HV_ICMSGHDRFLAG_RESPONSE	4
 
 typedef struct hv_vmbus_pipe_hdr {
 	uint32_t flags;
 	uint32_t msgsize;
 } __packed hv_vmbus_pipe_hdr;
 
 typedef struct hv_vmbus_ic_version {
 	uint16_t major;
 	uint16_t minor;
 } __packed hv_vmbus_ic_version;
 
 typedef struct hv_vmbus_icmsg_hdr {
 	hv_vmbus_ic_version	icverframe;
 	uint16_t		icmsgtype;
 	hv_vmbus_ic_version	icvermsg;
 	uint16_t		icmsgsize;
 	uint32_t		status;
 	uint8_t			ictransaction_id;
 	uint8_t			icflags;
 	uint8_t			reserved[2];
 } __packed hv_vmbus_icmsg_hdr;
 
 typedef struct hv_vmbus_icmsg_negotiate {
 	uint16_t		icframe_vercnt;
 	uint16_t		icmsg_vercnt;
 	uint32_t		reserved;
 	hv_vmbus_ic_version	icversion_data[1]; /* any size array */
 } __packed hv_vmbus_icmsg_negotiate;
 
 typedef struct hv_vmbus_shutdown_msg_data {
 	uint32_t		reason_code;
 	uint32_t		timeout_seconds;
 	uint32_t 		flags;
 	uint8_t			display_message[2048];
 } __packed hv_vmbus_shutdown_msg_data;
 
 typedef struct hv_vmbus_heartbeat_msg_data {
 	uint64_t 		seq_num;
 	uint32_t 		reserved[8];
 } __packed hv_vmbus_heartbeat_msg_data;
 
 typedef struct {
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       write_index;
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       read_index;
 	/*
 	 * NOTE: The interrupt_mask field is used only for channels, but
 	 * vmbus connection also uses this data structure
 	 */
 	volatile uint32_t       interrupt_mask;
 	/* pad it to PAGE_SIZE so that data starts on a page */
 	uint8_t                 reserved[4084];
 
 	/*
 	 * WARNING: Ring data starts here + ring_data_start_offset
 	 *  !!! DO NOT place any fields below this !!!
 	 */
 	uint8_t			buffer[0];	/* doubles as interrupt mask */
 } __packed hv_vmbus_ring_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn;
 } __packed hv_vmbus_page_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT];
 } __packed hv_vmbus_multipage_buffer;
 
 typedef struct {
 	hv_vmbus_ring_buffer*	ring_buffer;
 	uint32_t		ring_size;	/* Include the shared header */
 	struct mtx		ring_lock;
 	uint32_t		ring_data_size;	/* ring_size */
 	uint32_t		ring_data_start_offset;
 } hv_vmbus_ring_buffer_info;
 
 typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
+typedef void (*hv_vmbus_sc_creation_callback)(void *context);
 
 typedef enum {
 	HV_CHANNEL_OFFER_STATE,
 	HV_CHANNEL_OPENING_STATE,
 	HV_CHANNEL_OPEN_STATE,
+	HV_CHANNEL_OPENED_STATE,
 	HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
 } hv_vmbus_channel_state;
 
+/*
+ *  Connection identifier type
+ */
+typedef union {
+	uint32_t		as_uint32_t;
+	struct {
+		uint32_t	id:24;
+		uint32_t	reserved:8;
+	} u;
+
+} __packed hv_vmbus_connection_id;
+
+/*
+ * Definition of the hv_vmbus_signal_event hypercall input structure
+ */
+typedef struct {
+	hv_vmbus_connection_id	connection_id;
+	uint16_t		flag_number;
+	uint16_t		rsvd_z;
+} __packed hv_vmbus_input_signal_event;
+
+typedef struct {
+	uint64_t			align8;
+	hv_vmbus_input_signal_event	event;
+} __packed hv_vmbus_input_signal_event_buffer;
+
 typedef struct hv_vmbus_channel {
 	TAILQ_ENTRY(hv_vmbus_channel)	list_entry;
 	struct hv_device*		device;
 	hv_vmbus_channel_state		state;
 	hv_vmbus_channel_offer_channel	offer_msg;
 	/*
 	 * These are based on the offer_msg.monitor_id.
 	 * Save it here for easy access.
 	 */
 	uint8_t				monitor_group;
 	uint8_t				monitor_bit;
 
 	uint32_t			ring_buffer_gpadl_handle;
 	/*
 	 * Allocated memory for ring buffer
 	 */
 	void*				ring_buffer_pages;
 	unsigned long			ring_buffer_size;
 	uint32_t			ring_buffer_page_count;
 	/*
 	 * send to parent
 	 */
 	hv_vmbus_ring_buffer_info	outbound;
 	/*
 	 * receive from parent
 	 */
 	hv_vmbus_ring_buffer_info	inbound;
 
 	struct mtx			inbound_lock;
 	hv_vmbus_handle			control_work_queue;
 
 	hv_vmbus_pfn_channel_callback	on_channel_callback;
 	void*				channel_callback_context;
 
+	/*
+	 * If batched_reading is set to "true", mask the interrupt
+	 * and read until the channel is empty.
+	 * If batched_reading is set to "false", the channel is not
+	 * going to perform batched reading.
+	 *
+	 * Batched reading is enabled by default; specific
+	 * drivers that don't want this behavior can turn it off.
+	 */
+	boolean_t			batched_reading;
+
+	boolean_t			is_dedicated_interrupt;
+
+	/*
+	 * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall.
+	 */
+	hv_vmbus_input_signal_event_buffer	signal_event_buffer;
+	/*
+	 * 8-bytes aligned of the buffer above
+	 */
+	hv_vmbus_input_signal_event	*signal_event_param;
+
+	/*
+	 * From Win8, this field specifies the target virtual process
+	 * on which to deliver the interupt from the host to guest.
+	 * Before Win8, all channel interrupts would only be
+	 * delivered on cpu 0. Setting this value to 0 would preserve
+	 * the earlier behavior.
+	 */
+	uint32_t			target_vcpu;
+	/* The corresponding CPUID in the guest */
+	uint32_t			target_cpu;
+
+	/*
+	 * Support for multi-channels.
+	 * The initial offer is considered the primary channel and this
+	 * offer message will indicate if the host supports multi-channels.
+	 * The guest is free to ask for multi-channels to be offerred and can
+	 * open these multi-channels as a normal "primary" channel. However,
+	 * all multi-channels will have the same type and instance guids as the
+	 * primary channel. Requests sent on a given channel will result in a
+	 * response on the same channel.
+	 */
+
+	/*
+	 * Multi-channel creation callback. This callback will be called in
+	 * process context when a Multi-channel offer is received from the host.
+	 * The guest can open the Multi-channel in the context of this callback.
+	 */
+	hv_vmbus_sc_creation_callback	sc_creation_callback;
+
+	struct mtx			sc_lock;
+
+	/*
+	 * Link list of all the multi-channels if this is a primary channel
+	 */
+	TAILQ_HEAD(, hv_vmbus_channel)	sc_list_anchor;
+	TAILQ_ENTRY(hv_vmbus_channel)	sc_list_entry;
+
+	/*
+	 * The primary channel this sub-channle belongs to.
+	 * This will be NULL for the primary channel.
+	 */
+	struct hv_vmbus_channel		*primary_channel;
+	/*
+	 * Support per channel state for use by vmbus drivers.
+	 */
+	void				*per_channel_state;
 } hv_vmbus_channel;
 
+static inline void
+hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state)
+{
+	channel->batched_reading = state;
+}
+
 typedef struct hv_device {
 	hv_guid		    class_id;
 	hv_guid		    device_id;
 	device_t	    device;
 	hv_vmbus_channel*   channel;
 } hv_device;
 
 
 
 int		hv_vmbus_channel_recv_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_recv_packet_raw(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_open(
 				hv_vmbus_channel*	channel,
 				uint32_t		send_ring_buffer_size,
 				uint32_t		recv_ring_buffer_size,
 				void*			user_data,
 				uint32_t		user_data_len,
 				hv_vmbus_pfn_channel_callback
 							pfn_on_channel_callback,
 				void*			context);
 
 void		hv_vmbus_channel_close(hv_vmbus_channel *channel);
 
 int		hv_vmbus_channel_send_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id,
 				hv_vmbus_packet_type	type,
 				uint32_t		flags);
 
 int		hv_vmbus_channel_send_packet_pagebuffer(
 				hv_vmbus_channel*	channel,
 				hv_vmbus_page_buffer	page_buffers[],
 				uint32_t		page_count,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id);
 
 int		hv_vmbus_channel_send_packet_multipagebuffer(
 				hv_vmbus_channel*	    channel,
 				hv_vmbus_multipage_buffer*  multi_page_buffer,
 				void*			    buffer,
 				uint32_t		    buffer_len,
 				uint64_t		    request_id);
 
 int		hv_vmbus_channel_establish_gpadl(
 				hv_vmbus_channel*	channel,
 				/* must be phys and virt contiguous */
 				void*			contig_buffer,
 				/*  page-size multiple	*/
 				uint32_t		size,
 				uint32_t*		gpadl_handle);
 
 int		hv_vmbus_channel_teardown_gpdal(
 				hv_vmbus_channel*	channel,
 				uint32_t		gpadl_handle);
 
+struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
+
 /*
  * Work abstraction defines
  */
 typedef struct hv_work_queue {
 	struct taskqueue*	queue;
 	struct proc*		proc;
 	struct sema*		work_sema;
 } hv_work_queue;
 
 typedef struct hv_work_item {
 	struct task	work;
 	void		(*callback)(void *);
 	void*		context;
 	hv_work_queue*	wq;
 } hv_work_item;
 
 struct hv_work_queue*	hv_work_queue_create(char* name);
 
 void			hv_work_queue_close(struct hv_work_queue* wq);
 
 int			hv_queue_work_item(
 				hv_work_queue*	wq,
 				void		(*callback)(void *),
 				void*		context);
 /**
  * @brief Get physical address from virtual
  */
 static inline unsigned long
 hv_get_phys_addr(void *virt)
 {
 	unsigned long ret;
 	ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK));
 	return (ret);
 }
 
 
 /**
  * KVP related structures
  * 
  */
 typedef struct hv_vmbus_service {
         hv_guid       guid;             /* Hyper-V GUID */
         char          *name;            /* name of service */
         boolean_t     enabled;          /* service enabled */
         hv_work_queue *work_queue;      /* background work queue */
 
         /*
          * function to initialize service
          */
         int (*init)(struct hv_vmbus_service *);
 
         /*
          * function to process Hyper-V messages
          */
         void (*callback)(void *);
 } hv_vmbus_service;
 
 extern uint8_t* receive_buffer[];
 extern hv_vmbus_service service_table[];
+extern uint32_t hv_vmbus_protocal_version;
 
 void hv_kvp_callback(void *context);
 int hv_kvp_init(hv_vmbus_service *serv);
 void hv_kvp_deinit(void);
 
 #endif  /* __HYPERV_H__ */
 
Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 282211)
+++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 282212)
@@ -1,1393 +1,2004 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
  * converted into VSCSI protocol messages which are delivered to the parent
  * partition StorVSP driver over the Hyper-V VMBUS.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
+#include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/bus.h>
 #include <sys/mutex.h>
 #include <sys/callout.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/uma.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
+#include <sys/sglist.h>
+#include <machine/bus.h>
+#include <sys/bus_dma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_internal.h>
 #include <cam/cam_debug.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
-
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_vstorage.h"
 
 #define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
 #define STORVSC_MAX_TARGETS		(2)
 
+#define STORVSC_WIN7_MAJOR 4
+#define STORVSC_WIN7_MINOR 2
+
+#define STORVSC_WIN8_MAJOR 5
+#define STORVSC_WIN8_MINOR 1
+
+#define HV_ALIGN(x, a) roundup2(x, a)
+
 struct storvsc_softc;
 
+struct hv_sgl_node {
+	LIST_ENTRY(hv_sgl_node) link;
+	struct sglist *sgl_data;
+};
+
+struct hv_sgl_page_pool{
+	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
+	LIST_HEAD(, hv_sgl_node) free_sgl_list;
+	boolean_t                is_init;
+} g_hv_sgl_page_pool;
+
+#define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
+
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
 	UNKNOWN_TYPE
 };
 
 struct hv_storvsc_request {
 	LIST_ENTRY(hv_storvsc_request) link;
 	struct vstor_packet	vstor_packet;
 	hv_vmbus_multipage_buffer data_buf;
 	void *sense_data;
 	uint8_t sense_info_len;
 	uint8_t retries;
 	union ccb *ccb;
 	struct storvsc_softc *softc;
 	struct callout callout;
 	struct sema synch_sema; /*Synchronize the request/response if needed */
+	struct sglist *bounce_sgl;
+	unsigned int bounce_sgl_count;
+	uint64_t not_aligned_seg_bits;
 };
 
 struct storvsc_softc {
 	struct hv_device		*hs_dev;
-        LIST_HEAD(, hv_storvsc_request) hs_free_list;
-        struct mtx      		hs_lock;
-        struct storvsc_driver_props     *hs_drv_props;
-        int 				hs_unit;
-        uint32_t         		hs_frozen;
-        struct cam_sim  		*hs_sim;
-        struct cam_path 		*hs_path;
+	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
+	struct mtx			hs_lock;
+	struct storvsc_driver_props	*hs_drv_props;
+	int 				hs_unit;
+	uint32_t			hs_frozen;
+	struct cam_sim			*hs_sim;
+	struct cam_path 		*hs_path;
 	uint32_t			hs_num_out_reqs;
 	boolean_t			hs_destroy;
 	boolean_t			hs_drain_notify;
+	boolean_t			hs_open_multi_channel;
 	struct sema 			hs_drain_sema;	
 	struct hv_storvsc_request	hs_init_req;
 	struct hv_storvsc_request	hs_reset_req;
 };
 
 
 /**
  * HyperV storvsc timeout testing cases:
  * a. IO returned after first timeout;
  * b. IO returned after second timeout and queue freeze;
  * c. IO returned while timer handler is running
  * The first can be tested by "sg_senddiag -vv /dev/daX",
  * and the second and third can be done by
  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
- */ 
+ */
 #define HVS_TIMEOUT_TEST 0
 
 /*
  * Bus/adapter reset functionality on the Hyper-V host is
  * buggy and it will be disabled until
  * it can be further tested.
  */
 #define HVS_HOST_RESET 0
 
 struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
-	uint8_t		drv_max_ios_per_target; 
+	uint8_t		drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
 enum hv_storage_type {
 	DRIVER_BLKVSC,
 	DRIVER_STORVSC,
 	DRIVER_UNKNOWN
 };
 
 #define HS_MAX_ADAPTERS 10
 
+#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
+
 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
 static const hv_guid gStorVscDeviceType={
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 };
 
 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
 static const hv_guid gBlkVscDeviceType={
 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 };
 
 static struct storvsc_driver_props g_drv_props_table[] = {
 	{"blkvsc", "Hyper-V IDE Storage Interface",
 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE},
 	{"storvsc", "Hyper-V SCSI Storage Interface",
 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE}
 };
 
+static int storvsc_current_major;
+static int storvsc_current_minor;
+
 /* static functions */
 static int storvsc_probe(device_t dev);
 static int storvsc_attach(device_t dev);
 static int storvsc_detach(device_t dev);
 static void storvsc_poll(struct cam_sim * sim);
 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
-static void create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
+static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
 static void hv_storvsc_on_channel_callback(void *context);
 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
 					struct vstor_packet *vstor_packet,
 					struct hv_storvsc_request *request);
 static int hv_storvsc_connect_vsp(struct hv_device *device);
 static void storvsc_io_done(struct hv_storvsc_request *reqp);
+static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+				bus_dma_segment_t *orig_sgl,
+				unsigned int orig_sgl_count,
+				uint64_t seg_bits);
+void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				unsigned int dest_sgl_count,
+				struct sglist* src_sgl,
+				uint64_t seg_bits);
 
 static device_method_t storvsc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		storvsc_probe),
 	DEVMETHOD(device_attach,	storvsc_attach),
 	DEVMETHOD(device_detach,	storvsc_detach),
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t storvsc_driver = {
 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
 };
 
 static devclass_t storvsc_devclass;
 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
 MODULE_VERSION(storvsc, 1);
 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
 
 
 /**
- * The host is capable of sending messages to us that are 
+ * The host is capable of sending messages to us that are
  * completely unsolicited. So, we need to address the race
  * condition where we may be in the process of unloading the
  * driver when the host may send us an unsolicited message.
  * We address this issue by implementing a sequentially
  * consistent protocol:
  *
  * 1. Channel callback is invoked while holding the the channel lock
  *    and an unloading driver will reset the channel callback under
  *    the protection of this channel lock.
  *
  * 2. To ensure bounded wait time for unloading a driver, we don't
  *    permit outgoing traffic once the device is marked as being
  *    destroyed.
  *
  * 3. Once the device is marked as being destroyed, we only
- *    permit incoming traffic to properly account for 
+ *    permit incoming traffic to properly account for
  *    packets already sent out.
  */
 static inline struct storvsc_softc *
 get_stor_device(struct hv_device *device,
 				boolean_t outbound)
 {
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(device->device);
 	if (sc == NULL) {
 		return NULL;
 	}
 
 	if (outbound) {
 		/*
 		 * Here we permit outgoing I/O only
 		 * if the device is not being destroyed.
 		 */
 
 		if (sc->hs_destroy) {
 			sc = NULL;
 		}
 	} else {
 		/*
 		 * inbound case; if being destroyed
 		 * only permit to account for
 		 * messages already sent out.
 		 */
 		if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
 			sc = NULL;
 		}
 	}
 	return sc;
 }
 
 /**
+ * @brief Callback handler, will be invoked when receive mutil-channel offer
+ *
+ * @param context  new multi-channel
+ */
+static void
+storvsc_handle_sc_creation(void *context)
+{
+	hv_vmbus_channel *new_channel;
+	struct hv_device *device;
+	struct storvsc_softc *sc;
+	struct vmstor_chan_props props;
+	int ret = 0;
+
+	new_channel = (hv_vmbus_channel *)context;
+	device = new_channel->primary_channel->device;
+	sc = get_stor_device(device, TRUE);
+	if (sc == NULL)
+		return;
+
+	if (FALSE == sc->hs_open_multi_channel)
+		return;
+	
+	memset(&props, 0, sizeof(props));
+
+	ret = hv_vmbus_channel_open(new_channel,
+	    sc->hs_drv_props->drv_ringbuffer_size,
+  	    sc->hs_drv_props->drv_ringbuffer_size,
+	    (void *)&props,
+	    sizeof(struct vmstor_chan_props),
+	    hv_storvsc_on_channel_callback,
+	    new_channel);
+
+	return;
+}
+
+/**
+ * @brief Send multi-channel creation request to host
+ *
+ * @param device  a Hyper-V device pointer
+ * @param max_chans  the max channels supported by vmbus
+ */
+static void
+storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
+{
+	struct storvsc_softc *sc;
+	struct hv_storvsc_request *request;
+	struct vstor_packet *vstor_packet;	
+	int request_channels_cnt = 0;
+	int ret;
+
+	/* get multichannels count that need to create */
+	request_channels_cnt = MIN(max_chans, mp_ncpus);
+
+	sc = get_stor_device(dev, TRUE);
+	if (sc == NULL) {
+		printf("Storvsc_error: get sc failed while send mutilchannel "
+		    "request\n");
+		return;
+	}
+
+	request = &sc->hs_init_req;
+
+	/* Establish a handler for multi-channel */
+	dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
+
+	/* request the host to create multi-channel */
+	memset(request, 0, sizeof(struct hv_storvsc_request));
+	
+	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
+
+	vstor_packet = &request->vstor_packet;
+	
+	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
+
+	ret = hv_vmbus_channel_send_packet(
+	    dev->channel,
+	    vstor_packet,
+	    sizeof(struct vstor_packet),
+	    (uint64_t)(uintptr_t)request,
+	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+	/* wait for 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
+	if (ret != 0) {		
+		printf("Storvsc_error: create multi-channel timeout, %d\n",
+		    ret);
+		return;
+	}
+
+	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+	    vstor_packet->status != 0) {		
+		printf("Storvsc_error: create multi-channel invalid operation "
+		    "(%d) or statue (%u)\n",
+		    vstor_packet->operation, vstor_packet->status);
+		return;
+	}
+
+	sc->hs_open_multi_channel = TRUE;
+
+	if (bootverbose)
+		printf("Storvsc create multi-channel success!\n");
+}
+
+/**
  * @brief initialize channel connection to parent partition
  *
  * @param dev  a Hyper-V device pointer
  * @returns  0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_channel_init(struct hv_device *dev)
 {
 	int ret = 0;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 	struct storvsc_softc *sc;
+	uint16_t max_chans = 0;
+	boolean_t support_multichannel = FALSE;
 
+	max_chans = 0;
+	support_multichannel = FALSE;
+
 	sc = get_stor_device(dev, TRUE);
-	if (sc == NULL) {
-		return ENODEV;
-	}
+	if (sc == NULL)
+		return (ENODEV);
 
 	request = &sc->hs_init_req;
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	vstor_packet = &request->vstor_packet;
 	request->softc = sc;
 
 	/**
 	 * Initiate the vsc/vsp initialization protocol on the open channel
 	 */
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			sizeof(struct vstor_packet),
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
-
-	if (ret != 0) {
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	/* reuse the packet for version range supported */
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
-	vstor_packet->u.version.major_minor = VMSTOR_PROTOCOL_VERSION_CURRENT;
+	vstor_packet->u.version.major_minor =
+	    VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor);
 
 	/* revision is only significant for Windows guests */
 	vstor_packet->u.version.revision = 0;
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			sizeof(struct vstor_packet),
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret) {
+	if (ret)
 		goto cleanup;
-	}
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+		vstor_packet->status != 0)
 		goto cleanup;
-	}
 
 	/**
 	 * Query channel properties
 	 */
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 				dev->channel,
 				vstor_packet,
 				sizeof(struct vstor_packet),
 				(uint64_t)(uintptr_t)request,
 				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if ( ret != 0) {
+	if ( ret != 0)
 		goto cleanup;
-	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+	    vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
+	/* multi-channels feature is supported by WIN8 and above version */
+	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
+	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
+	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
+	    (vstor_packet->u.chan_props.flags &
+	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
+		support_multichannel = TRUE;
+	}
+
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			sizeof(struct vstor_packet),
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+	    vstor_packet->status != 0)
 		goto cleanup;
-	}
 
+	/*
+	 * If multi-channel is supported, send multichannel create
+	 * request to host.
+	 */
+	if (support_multichannel)
+		storvsc_send_multichannel_request(dev, max_chans);
+
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 
 /**
  * @brief Open channel connection to paraent partition StorVSP driver
  *
  * Open and initialize channel connection to parent partition StorVSP driver.
  *
  * @param pointer to a Hyper-V device
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_connect_vsp(struct hv_device *dev)
 {	
 	int ret = 0;
 	struct vmstor_chan_props props;
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(dev->device);
 		
 	memset(&props, 0, sizeof(struct vmstor_chan_props));
 
 	/*
 	 * Open the channel
 	 */
 
 	ret = hv_vmbus_channel_open(
 		dev->channel,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		(void *)&props,
 		sizeof(struct vmstor_chan_props),
 		hv_storvsc_on_channel_callback,
-		dev);
+		dev->channel);
 
-
 	if (ret != 0) {
 		return ret;
 	}
 
 	ret = hv_storvsc_channel_init(dev);
 
 	return (ret);
 }
 
 #if HVS_HOST_RESET
 static int
 hv_storvsc_host_reset(struct hv_device *dev)
 {
 	int ret = 0;
 	struct storvsc_softc *sc;
 
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	request = &sc->hs_reset_req;
 	request->softc = sc;
 	vstor_packet = &request->vstor_packet;
 
 	sema_init(&request->synch_sema, 0, "stor synch sema");
 
 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(dev->channel,
 			vstor_packet,
 			sizeof(struct vstor_packet),
 			(uint64_t)(uintptr_t)&sc->hs_reset_req,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 		goto cleanup;
 	}
 
 
 	/*
-	 * At this point, all outstanding requests in the adapter 
+	 * At this point, all outstanding requests in the adapter
 	 * should have been flushed out and return to us
 	 */
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 #endif /* HVS_HOST_RESET */
 
 /**
  * @brief Function to initiate an I/O request
  *
  * @param device Hyper-V device pointer
  * @param request pointer to a request structure
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_io_request(struct hv_device *device,
 					  struct hv_storvsc_request *request)
 {
 	struct storvsc_softc *sc;
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
+	struct hv_vmbus_channel* outgoing_channel = NULL;
 	int ret = 0;
 
 	sc = get_stor_device(device, TRUE);
 
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->u.vm_srb.length = sizeof(struct vmscsi_req);
 	
 	vstor_packet->u.vm_srb.sense_info_len = SENSE_BUFFER_SIZE;
 
 	vstor_packet->u.vm_srb.transfer_len = request->data_buf.length;
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
+	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->data_buf.length) {
 		ret = hv_vmbus_channel_send_packet_multipagebuffer(
-				device->channel,
+				outgoing_channel,
 				&request->data_buf,
-				vstor_packet, 
-				sizeof(struct vstor_packet), 
+				vstor_packet,
+				sizeof(struct vstor_packet),
 				(uint64_t)(uintptr_t)request);
 
 	} else {
 		ret = hv_vmbus_channel_send_packet(
-			device->channel,
+			outgoing_channel,
 			vstor_packet,
 			sizeof(struct vstor_packet),
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 	mtx_lock(&request->softc->hs_lock);
 
 	if (ret != 0) {
 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
 	} else {
 		atomic_add_int(&sc->hs_num_out_reqs, 1);
 	}
 
 	return (ret);
 }
 
 
 /**
  * Process IO_COMPLETION_OPERATION and ready
  * the result to be completed for upper layer
  * processing by the CAM layer.
  */
 static void
 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
 			   struct vstor_packet *vstor_packet,
 			   struct hv_storvsc_request *request)
 {
 	struct vmscsi_req *vm_srb;
 
 	vm_srb = &vstor_packet->u.vm_srb;
 
 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
 		/* Autosense data available */
 
 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
 				("vm_srb->sense_info_len <= "
 				 "request->sense_info_len"));
 
 		memcpy(request->sense_data, vm_srb->u.sense_data,
 			vm_srb->sense_info_len);
 
 		request->sense_info_len = vm_srb->sense_info_len;
 	}
 
 	/* Complete request by passing to the CAM layer */
 	storvsc_io_done(request);
 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
 		sema_post(&sc->hs_drain_sema);
 	}
 }
 
 static void
 hv_storvsc_on_channel_callback(void *context)
 {
 	int ret = 0;
-	struct hv_device *device = (struct hv_device *)context;
+	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
+	struct hv_device *device = NULL;
 	struct storvsc_softc *sc;
 	uint32_t bytes_recvd;
 	uint64_t request_id;
 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
+	if (channel->primary_channel != NULL){
+		device = channel->primary_channel->device;
+	} else {
+		device = channel->device;
+	}
+
+	KASSERT(device, ("device is NULL"));
+
 	sc = get_stor_device(device, FALSE);
 	if (sc == NULL) {
+		printf("Storvsc_error: get stor device failed.\n");
 		return;
 	}
 
-	KASSERT(device, ("device"));
-
 	ret = hv_vmbus_channel_recv_packet(
-			device->channel,
+			channel,
 			packet,
 			roundup2(sizeof(struct vstor_packet), 8),
 			&bytes_recvd,
 			&request_id);
 
 	while ((ret == 0) && (bytes_recvd > 0)) {
 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
-		KASSERT(request, ("request"));
 
 		if ((request == &sc->hs_init_req) ||
 			(request == &sc->hs_reset_req)) {
 			memcpy(&request->vstor_packet, packet,
 				   sizeof(struct vstor_packet));
-			sema_post(&request->synch_sema); 
+			sema_post(&request->synch_sema);
 		} else {
 			vstor_packet = (struct vstor_packet *)packet;
 			switch(vstor_packet->operation) {
 			case VSTOR_OPERATION_COMPLETEIO:
+				if (request == NULL)
+					panic("VMBUS: storvsc received a "
+					    "packet with NULL request id in "
+					    "COMPLETEIO operation.");
+
 				hv_storvsc_on_iocompletion(sc,
 							vstor_packet, request);
 				break;
 			case VSTOR_OPERATION_REMOVEDEVICE:
+			case VSTOR_OPERATION_ENUMERATE_BUS:
+				printf("VMBUS: storvsc operation %d not "
+				    "implemented.\n", vstor_packet->operation);
 				/* TODO: implement */
 				break;
 			default:
 				break;
 			}			
 		}
 		ret = hv_vmbus_channel_recv_packet(
-				device->channel,
+				channel,
 				packet,
 				roundup2(sizeof(struct vstor_packet), 8),
 				&bytes_recvd,
 				&request_id);
 	}
 }
 
 /**
  * @brief StorVSC probe function
  *
  * Device probe function.  Returns 0 if the input device is a StorVSC
  * device.  Otherwise, a ENXIO is returned.  If the input device is
  * for BlkVSC (paravirtual IDE) device and this support is disabled in
  * favor of the emulated ATA/IDE device, return ENXIO.
  *
  * @param a device
  * @returns 0 on success, ENXIO if not a matcing StorVSC device
  */
 static int
 storvsc_probe(device_t dev)
 {
 	int ata_disk_enable = 0;
 	int ret	= ENXIO;
-
+	
+	if ((HV_VMBUS_VERSION_WIN8 == hv_vmbus_protocal_version) ||
+	    (HV_VMBUS_VERSION_WIN8_1 == hv_vmbus_protocal_version)){
+		storvsc_current_major = STORVSC_WIN8_MAJOR;
+		storvsc_current_minor = STORVSC_WIN8_MINOR;
+	} else {
+		storvsc_current_major = STORVSC_WIN7_MAJOR;
+		storvsc_current_minor = STORVSC_WIN7_MINOR;
+	}
+	
 	switch (storvsc_get_storage_type(dev)) {
 	case DRIVER_BLKVSC:
 		if(bootverbose)
 			device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
 		if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
 			if(bootverbose)
 				device_printf(dev,
 					"Enlightened ATA/IDE detected\n");
 			ret = BUS_PROBE_DEFAULT;
 		} else if(bootverbose)
 			device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
 		break;
 	case DRIVER_STORVSC:
 		if(bootverbose)
 			device_printf(dev, "Enlightened SCSI device detected\n");
 		ret = BUS_PROBE_DEFAULT;
 		break;
 	default:
 		ret = ENXIO;
 	}
 	return (ret);
 }
 
 /**
  * @brief StorVSC attach function
  *
  * Function responsible for allocating per-device structures,
  * setting up CAM interfaces and scanning for available LUNs to
  * be used for SCSI device peripherals.
  *
  * @param a device
  * @returns 0 on success or an error on failure
  */
 static int
 storvsc_attach(device_t dev)
 {
 	struct hv_device *hv_dev = vmbus_get_devctx(dev);
 	enum hv_storage_type stor_type;
 	struct storvsc_softc *sc;
 	struct cam_devq *devq;
-	int ret, i;
+	int ret, i, j;
 	struct hv_storvsc_request *reqp;
 	struct root_hold_token *root_mount_token = NULL;
+	struct hv_sgl_node *sgl_node = NULL;
+	void *tmp_buff = NULL;
 
 	/*
 	 * We need to serialize storvsc attach calls.
 	 */
 	root_mount_token = root_mount_hold("storvsc");
 
 	sc = device_get_softc(dev);
 	if (sc == NULL) {
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	stor_type = storvsc_get_storage_type(dev);
 
 	if (stor_type == DRIVER_UNKNOWN) {
 		ret = ENODEV;
 		goto cleanup;
 	}
 
 	bzero(sc, sizeof(struct storvsc_softc));
 
 	/* fill in driver specific properties */
 	sc->hs_drv_props = &g_drv_props_table[stor_type];
 
 	/* fill in device specific properties */
 	sc->hs_unit	= device_get_unit(dev);
 	sc->hs_dev	= hv_dev;
 	device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
 
 	LIST_INIT(&sc->hs_free_list);
 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
 
 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
 		reqp = malloc(sizeof(struct hv_storvsc_request),
 				 M_DEVBUF, M_WAITOK|M_ZERO);
 		reqp->softc = sc;
 
 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 	}
 
+	/* create sg-list page pool */
+	if (FALSE == g_hv_sgl_page_pool.is_init) {
+		g_hv_sgl_page_pool.is_init = TRUE;
+		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
+		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
+
+		/*
+		 * Pre-create SG list, each SG list with
+		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
+		 * segment has one page buffer
+		 */
+		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
+	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
+			    M_DEVBUF, M_WAITOK|M_ZERO);
+
+			sgl_node->sgl_data =
+			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
+			    M_WAITOK|M_ZERO);
+
+			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+				tmp_buff = malloc(PAGE_SIZE,
+				    M_DEVBUF, M_WAITOK|M_ZERO);
+
+				sgl_node->sgl_data->sg_segs[j].ss_paddr =
+				    (vm_paddr_t)tmp_buff;
+			}
+
+			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
+			    sgl_node, link);
+		}
+	}
+
 	sc->hs_destroy = FALSE;
 	sc->hs_drain_notify = FALSE;
+	sc->hs_open_multi_channel = FALSE;
 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
 
 	ret = hv_storvsc_connect_vsp(hv_dev);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Create the device queue.
 	 * Hyper-V maps each target to one SCSI HBA
 	 */
 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
 	if (devq == NULL) {
 		device_printf(dev, "Failed to alloc device queue\n");
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	sc->hs_sim = cam_sim_alloc(storvsc_action,
 				storvsc_poll,
 				sc->hs_drv_props->drv_name,
 				sc,
 				sc->hs_unit,
 				&sc->hs_lock, 1,
 				sc->hs_drv_props->drv_max_ios_per_target,
 				devq);
 
 	if (sc->hs_sim == NULL) {
 		device_printf(dev, "Failed to alloc sim\n");
 		cam_simq_free(devq);
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to register SCSI bus\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
 		 cam_sim_path(sc->hs_sim),
 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to create path\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	mtx_unlock(&sc->hs_lock);
 
 	root_mount_rel(root_mount_token);
 	return (0);
 
 
 cleanup:
 	root_mount_rel(root_mount_token);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 		free(reqp, M_DEVBUF);
 	}
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+
 	return (ret);
 }
 
 /**
  * @brief StorVSC device detach function
  *
  * This function is responsible for safely detaching a
  * StorVSC device.  This includes waiting for inbound responses
  * to complete and freeing associated per-device structures.
  *
  * @param dev a device
  * returns 0 on success
  */
 static int
 storvsc_detach(device_t dev)
 {
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp = NULL;
 	struct hv_device *hv_device = vmbus_get_devctx(dev);
+	struct hv_sgl_node *sgl_node = NULL;
+	int j = 0;
 
 	mtx_lock(&hv_device->channel->inbound_lock);
 	sc->hs_destroy = TRUE;
 	mtx_unlock(&hv_device->channel->inbound_lock);
 
 	/*
 	 * At this point, all outbound traffic should be disabled. We
 	 * only allow inbound traffic (responses) to proceed so that
 	 * outstanding requests can be completed.
 	 */
 
 	sc->hs_drain_notify = TRUE;
 	sema_wait(&sc->hs_drain_sema);
 	sc->hs_drain_notify = FALSE;
 
 	/*
 	 * Since we have already drained, we don't need to busy wait.
 	 * The call to close the channel will reset the callback
 	 * under the protection of the incoming channel lock.
 	 */
 
 	hv_vmbus_channel_close(hv_device->channel);
 
 	mtx_lock(&sc->hs_lock);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		free(reqp, M_DEVBUF);
 	}
 	mtx_unlock(&sc->hs_lock);
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+	
 	return (0);
 }
 
 #if HVS_TIMEOUT_TEST
 /**
  * @brief unit test for timed out operations
  *
  * This function provides unit testing capability to simulate
  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
  * is required.
  *
  * @param reqp pointer to a request structure
  * @param opcode SCSI operation being performed
  * @param wait if 1, wait for I/O to complete
  */
 static void
 storvsc_timeout_test(struct hv_storvsc_request *reqp,
 		uint8_t opcode, int wait)
 {
 	int ret;
 	union ccb *ccb = reqp->ccb;
 	struct storvsc_softc *sc = reqp->softc;
 
 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
 		return;
 	}
 
 	if (wait) {
 		mtx_lock(&reqp->event.mtx);
 	}
 	ret = hv_storvsc_io_request(sc->hs_dev, reqp);
 	if (ret != 0) {
 		if (wait) {
 			mtx_unlock(&reqp->event.mtx);
 		}
 		printf("%s: io_request failed with %d.\n",
 				__func__, ret);
 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 		mtx_lock(&sc->hs_lock);
 		storvsc_free_request(sc, reqp);
 		xpt_done(ccb);
 		mtx_unlock(&sc->hs_lock);
 		return;
 	}
 
 	if (wait) {
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: waiting for IO return.\n",
 				ticks, __func__);
 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
 		mtx_unlock(&reqp->event.mtx);
 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
 				ticks, __func__, (ret == 0)?
 				"IO return detected" :
 				"IO return not detected");
-		/* 
+		/*
 		 * Now both the timer handler and io done are running
 		 * simultaneously. We want to confirm the io done always
 		 * finishes after the timer handler exits. So reqp used by
 		 * timer handler is not freed or stale. Do busy loop for
 		 * another 1/10 second to make sure io done does
 		 * wait for the timer handler to complete.
 		 */
 		DELAY(100*1000);
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: finishing, queue frozen %d, "
 				"ccb status 0x%x scsi_status 0x%x.\n",
 				ticks, __func__, sc->hs_frozen,
 				ccb->ccb_h.status,
 				ccb->csio.scsi_status);
 		mtx_unlock(&sc->hs_lock);
 	}
 }
 #endif /* HVS_TIMEOUT_TEST */
 
 /**
  * @brief timeout handler for requests
  *
  * This function is called as a result of a callout expiring.
  *
  * @param arg pointer to a request
  */
 static void
 storvsc_timeout(void *arg)
 {
 	struct hv_storvsc_request *reqp = arg;
 	struct storvsc_softc *sc = reqp->softc;
 	union ccb *ccb = reqp->ccb;
 
 	if (reqp->retries == 0) {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
 		mtx_unlock(&sc->hs_lock);
 
 		reqp->retries++;
 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
 		    0, storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
 #endif
 		return;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	xpt_print(ccb->ccb_h.path,
 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
 		(sc->hs_frozen == 0)?
 		"freezing the queue" : "the queue is already frozen");
 	if (sc->hs_frozen == 0) {
 		sc->hs_frozen = 1;
 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
 	}
 	mtx_unlock(&sc->hs_lock);
 	
 #if HVS_TIMEOUT_TEST
 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
 #endif
 }
 
 /**
  * @brief StorVSC device poll function
  *
  * This function is responsible for servicing requests when
  * interrupts are disabled (i.e when we are dumping core.)
  *
  * @param sim a pointer to a CAM SCSI interface module
  */
 static void
 storvsc_poll(struct cam_sim *sim)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	mtx_unlock(&sc->hs_lock);
-	hv_storvsc_on_channel_callback(sc->hs_dev);
+	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
 	mtx_lock(&sc->hs_lock);
 }
 
 /**
  * @brief StorVSC device action function
  *
  * This function is responsible for handling SCSI operations which
  * are passed from the CAM layer.  The requests are in the form of
  * CAM control blocks which indicate the action being performed.
  * Not all actions require converting the request to a VSCSI protocol
  * message - these actions can be responded to by this driver.
  * Requests which are destined for a backend storage device are converted
  * to a VSCSI protocol message and sent on the channel connection associated
  * with this device.
  *
  * @param sim pointer to a CAM SCSI interface module
  * @param ccb pointer to a CAM control block
  */
 static void
 storvsc_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 	int res;
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	switch (ccb->ccb_h.func_code) {
 	case XPT_PATH_INQ: {
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_NOBUSRESET;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = STORVSC_MAX_TARGETS;
 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
 		cpi->initiator_id = cpi->max_target;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 300000;
 		cpi->transport = XPORT_SAS;
 		cpi->transport_version = 0;
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_SPC2;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_GET_TRAN_SETTINGS: {
 		struct  ccb_trans_settings *cts = &ccb->cts;
 
 		cts->transport = XPORT_SAS;
 		cts->transport_version = 0;
 		cts->protocol = PROTO_SCSI;
 		cts->protocol_version = SCSI_REV_SPC2;
 
 		/* enable tag queuing and disconnected mode */
 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
 			
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_SET_TRAN_SETTINGS:	{
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_CALC_GEOMETRY:{
 		cam_calc_geometry(&ccb->ccg, 1);
 		xpt_done(ccb);
 		return;
 	}
 	case  XPT_RESET_BUS:
 	case  XPT_RESET_DEV:{
 #if HVS_HOST_RESET
 		if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_host_reset failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			xpt_done(ccb);
 			return;
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 #else
 		xpt_print(ccb->ccb_h.path,
 				  "%s reset not supported.\n",
 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
 				  "bus" : "dev");
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 #endif	/* HVS_HOST_RESET */
 	}
 	case XPT_SCSI_IO:
 	case XPT_IMMED_NOTIFY: {
 		struct hv_storvsc_request *reqp = NULL;
 
 		if (ccb->csio.cdb_len == 0) {
 			panic("cdl_len is 0\n");
 		}
 
 		if (LIST_EMPTY(&sc->hs_free_list)) {
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			if (sc->hs_frozen == 0) {
 				sc->hs_frozen = 1;
 				xpt_freeze_simq(sim, /* count*/1);
 			}
 			xpt_done(ccb);
 			return;
 		}
 
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		bzero(reqp, sizeof(struct hv_storvsc_request));
 		reqp->softc = sc;
+		
+		ccb->ccb_h.status |= CAM_SIM_QUEUED;
+		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
+			ccb->ccb_h.status = CAM_REQ_INVALID;
+			xpt_done(ccb);
+			return;
+		}
 
-		ccb->ccb_h.status |= CAM_SIM_QUEUED;	    
-		create_storvsc_request(ccb, reqp);
-
 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 			callout_init(&reqp->callout, CALLOUT_MPSAFE);
 			callout_reset_sbt(&reqp->callout,
 			    SBT_1MS * ccb->ccb_h.timeout, 0,
 			    storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 			cv_init(&reqp->event.cv, "storvsc timeout cv");
 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
 					NULL, MTX_DEF);
 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
 				case MODE_SELECT_10:
 				case SEND_DIAGNOSTIC:
 					/* To have timer send the request. */
 					return;
 				default:
 					break;
 			}
 #endif /* HVS_TIMEOUT_TEST */
 		}
 
 		if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_io_request failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			storvsc_free_request(sc, reqp);
 			xpt_done(ccb);
 			return;
 		}
 		return;
 	}
 
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 }
 
 /**
+ * @brief destroy bounce buffer
+ *
+ * This function is responsible for destroy a Scatter/Gather list
+ * that create by storvsc_create_bounce_buffer()
+ *
+ * @param sgl- the Scatter/Gather need be destroy
+ * @param sg_count- page count of the SG list.
+ *
+ */
+static void
+storvsc_destroy_bounce_buffer(struct sglist *sgl)
+{
+	struct hv_sgl_node *sgl_node = NULL;
+
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	if (NULL == sgl_node) {
+		printf("storvsc error: not enough in use sgl\n");
+		return;
+	}
+	sgl_node->sgl_data = sgl;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
+}
+
+/**
+ * @brief create bounce buffer
+ *
+ * This function is responsible for create a Scatter/Gather list,
+ * which hold several pages that can be aligned with page size.
+ *
+ * @param seg_count- SG-list segments count
+ * @param write - if WRITE_TYPE, set SG list page used size to 0,
+ * otherwise set used size to page size.
+ *
+ * return NULL if create failed
+ */
+static struct sglist *
+storvsc_create_bounce_buffer(uint16_t seg_count, int write)
+{
+	int i = 0;
+	struct sglist *bounce_sgl = NULL;
+	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
+	struct hv_sgl_node *sgl_node = NULL;	
+
+	/* get struct sglist from free_sgl_list */
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	if (NULL == sgl_node) {
+		printf("storvsc error: not enough free sgl\n");
+		return NULL;
+	}
+	bounce_sgl = sgl_node->sgl_data;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
+
+	bounce_sgl->sg_maxseg = seg_count;
+
+	if (write == WRITE_TYPE)
+		bounce_sgl->sg_nseg = 0;
+	else
+		bounce_sgl->sg_nseg = seg_count;
+
+	for (i = 0; i < seg_count; i++)
+	        bounce_sgl->sg_segs[i].ss_len = buf_len;
+
+	return bounce_sgl;
+}
+
+/**
+ * @brief copy data from SG list to bounce buffer
+ *
+ * This function is responsible for copy data from one SG list's segments
+ * to another SG list which used as bounce buffer.
+ *
+ * @param bounce_sgl - the destination SG list
+ * @param orig_sgl - the segment of the source SG list.
+ * @param orig_sgl_count - the count of segments.
+ * @param orig_sgl_count - indicate which segment need bounce buffer,
+ *  set 1 means need.
+ *
+ */
+static void
+storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+			       bus_dma_segment_t *orig_sgl,
+			       unsigned int orig_sgl_count,
+			       uint64_t seg_bits)
+{
+	int src_sgl_idx = 0;
+
+	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
+		if (seg_bits & (1 << src_sgl_idx)) {
+			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
+			    (void*)orig_sgl[src_sgl_idx].ds_addr,
+			    orig_sgl[src_sgl_idx].ds_len);
+
+			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
+			    orig_sgl[src_sgl_idx].ds_len;
+		}
+	}
+}
+
+/**
+ * @brief copy data from SG list which used as bounce to another SG list
+ *
+ * This function is responsible for copy data from one SG list with bounce
+ * buffer to another SG list's segments.
+ *
+ * @param dest_sgl - the destination SG list's segments
+ * @param dest_sgl_count - the count of destination SG list's segment.
+ * @param src_sgl - the source SG list.
+ * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
+ *
+ */
+void
+storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				    unsigned int dest_sgl_count,
+				    struct sglist* src_sgl,
+				    uint64_t seg_bits)
+{
+	int sgl_idx = 0;
+	
+	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
+		if (seg_bits & (1 << sgl_idx)) {
+			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
+			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
+			    src_sgl->sg_segs[sgl_idx].ss_len);
+		}
+	}
+}
+
+/**
+ * @brief check SG list with bounce buffer or not
+ *
+ * This function is responsible for check if need bounce buffer for SG list.
+ *
+ * @param sgl - the SG list's segments
+ * @param sg_count - the count of SG list's segment.
+ * @param bits - segmengs number that need bounce buffer
+ *
+ * return -1 if SG list needless bounce buffer
+ */
+static int
+storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
+				unsigned int sg_count,
+				uint64_t *bits)
+{
+	int i = 0;
+	int offset = 0;
+	uint64_t phys_addr = 0;
+	uint64_t tmp_bits = 0;
+	boolean_t found_hole = FALSE;
+	boolean_t pre_aligned = TRUE;
+
+	if (sg_count < 2){
+		return -1;
+	}
+
+	*bits = 0;
+	
+	phys_addr = vtophys(sgl[0].ds_addr);
+	offset =  phys_addr - trunc_page(phys_addr);
+
+	if (offset != 0) {
+		pre_aligned = FALSE;
+		tmp_bits |= 1;
+	}
+
+	for (i = 1; i < sg_count; i++) {
+		phys_addr = vtophys(sgl[i].ds_addr);
+		offset =  phys_addr - trunc_page(phys_addr);
+
+		if (offset == 0) {
+			if (FALSE == pre_aligned){
+				/*
+				 * This segment is aligned, if the previous
+				 * one is not aligned, find a hole
+				 */
+				found_hole = TRUE;
+			}
+			pre_aligned = TRUE;
+		} else {
+			tmp_bits |= 1 << i;
+			if (!pre_aligned) {
+				if (phys_addr != vtophys(sgl[i-1].ds_addr +
+				    sgl[i-1].ds_len)) {
+					/*
+					 * Check whether connect to previous
+					 * segment,if not, find the hole
+					 */
+					found_hole = TRUE;
+				}
+			} else {
+				found_hole = TRUE;
+			}
+			pre_aligned = FALSE;
+		}
+	}
+
+	if (!found_hole) {
+		return (-1);
+	} else {
+		*bits = tmp_bits;
+		return 0;
+	}
+}
+
+/**
  * @brief Fill in a request structure based on a CAM control block
  *
  * Fills in a request structure based on the contents of a CAM control
  * block.  The request structure holds the payload information for
  * VSCSI protocol request.
  *
  * @param ccb pointer to a CAM contorl block
  * @param reqp pointer to a request structure
  */
-static void
+static int
 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 {
 	struct ccb_scsiio *csio = &ccb->csio;
 	uint64_t phys_addr;
 	uint32_t bytes_to_copy = 0;
 	uint32_t pfn_num = 0;
 	uint32_t pfn;
+	uint64_t not_aligned_seg_bits = 0;
 	
 	/* refer to struct vmscsi_req for meanings of these two fields */
 	reqp->vstor_packet.u.vm_srb.port =
 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
 	reqp->vstor_packet.u.vm_srb.path_id =
 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
 
 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
 
 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
 			csio->cdb_len);
 	} else {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
 			csio->cdb_len);
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
-    	case CAM_DIR_OUT: 
-    		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
-    		break;
-    	case CAM_DIR_IN:
-    		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
-    		break;
-    	case CAM_DIR_NONE:
-    		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
-    		break;
-    	default:
-    		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
-    		break;
+	case CAM_DIR_OUT:
+		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
+		break;
+	case CAM_DIR_IN:
+		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
+		break;
+	case CAM_DIR_NONE:
+		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		break;
+	default:
+		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		break;
 	}
 
 	reqp->sense_data     = &csio->sense_data;
 	reqp->sense_info_len = csio->sense_len;
 
 	reqp->ccb = ccb;
-	/*
-	KASSERT((ccb->ccb_h.flags & CAM_SCATTER_VALID) == 0,
-			("ccb is scatter gather valid\n"));
-	*/
-	if (csio->dxfer_len != 0) {
-		reqp->data_buf.length = csio->dxfer_len;
+
+	if (0 == csio->dxfer_len) {
+		return (0);
+	}
+
+	reqp->data_buf.length = csio->dxfer_len;
+
+	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
+	case CAM_DATA_VADDR:
+	{
 		bytes_to_copy = csio->dxfer_len;
 		phys_addr = vtophys(csio->data_ptr);
-		reqp->data_buf.offset = phys_addr - trunc_page(phys_addr);
+		reqp->data_buf.offset = phys_addr & PAGE_MASK;
+		
+		while (bytes_to_copy != 0) {
+			int bytes, page_offset;
+			phys_addr =
+			    vtophys(&csio->data_ptr[reqp->data_buf.length -
+			    bytes_to_copy]);
+			pfn = phys_addr >> PAGE_SHIFT;
+			reqp->data_buf.pfn_array[pfn_num] = pfn;
+			page_offset = phys_addr & PAGE_MASK;
+
+			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+
+			bytes_to_copy -= bytes;
+			pfn_num++;
+		}
+		break;
 	}
 
-	while (bytes_to_copy != 0) {
-		int bytes, page_offset;
-		phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length -
-		                                    bytes_to_copy]);
-		pfn = phys_addr >> PAGE_SHIFT;
-		reqp->data_buf.pfn_array[pfn_num] = pfn;
-		page_offset = phys_addr - trunc_page(phys_addr);
+	case CAM_DATA_SG:
+	{
+		int i = 0;
+		int offset = 0;
+		int ret;
 
-		bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+		bus_dma_segment_t *storvsc_sglist =
+		    (bus_dma_segment_t *)ccb->csio.data_ptr;
+		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
 
-		bytes_to_copy -= bytes;
-		pfn_num++;
+		printf("Storvsc: get SG I/O operation, %d\n",
+		    reqp->vstor_packet.u.vm_srb.data_in);
+
+		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
+			printf("Storvsc: %d segments is too much, "
+			    "only support %d segments\n",
+			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
+			return (EINVAL);
+		}
+
+		/*
+		 * We create our own bounce buffer function currently. Idealy
+		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
+		 * code there is no callback API to check the page alignment of
+		 * middle segments before busdma can decide if a bounce buffer
+		 * is needed for particular segment. There is callback,
+		 * "bus_dma_filter_t *filter", but the parrameters are not
+		 * sufficient for storvsc driver.
+		 * TODO:
+		 *	Add page alignment check in BUS_DMA(9) callback. Once
+		 *	this is complete, switch the following code to use
+		 *	BUS_DMA(9) for storvsc bounce buffer support.
+		 */
+		/* check if we need to create bounce buffer */
+		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
+		    storvsc_sg_count, &not_aligned_seg_bits);
+		if (ret != -1) {
+			reqp->bounce_sgl =
+			    storvsc_create_bounce_buffer(storvsc_sg_count,
+			    reqp->vstor_packet.u.vm_srb.data_in);
+			if (NULL == reqp->bounce_sgl) {
+				printf("Storvsc_error: "
+				    "create bounce buffer failed.\n");
+				return (ENOMEM);
+			}
+
+			reqp->bounce_sgl_count = storvsc_sg_count;
+			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
+
+			/*
+			 * if it is write, we need copy the original data
+			 *to bounce buffer
+			 */
+			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+				storvsc_copy_sgl_to_bounce_buf(
+				    reqp->bounce_sgl,
+				    storvsc_sglist,
+				    storvsc_sg_count,
+				    reqp->not_aligned_seg_bits);
+			}
+
+			/* transfer virtual address to physical frame number */
+			if (reqp->not_aligned_seg_bits & 0x1){
+ 				phys_addr =
+				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
+			}else{
+ 				phys_addr =
+					vtophys(storvsc_sglist[0].ds_addr);
+			}
+			reqp->data_buf.offset = phys_addr & PAGE_MASK;
+
+			pfn = phys_addr >> PAGE_SHIFT;
+			reqp->data_buf.pfn_array[0] = pfn;
+			
+			for (i = 1; i < storvsc_sg_count; i++) {
+				if (reqp->not_aligned_seg_bits & (1 << i)) {
+					phys_addr =
+					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
+				} else {
+					phys_addr =
+					    vtophys(storvsc_sglist[i].ds_addr);
+				}
+
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
+		} else {
+			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
+
+			reqp->data_buf.offset = phys_addr & PAGE_MASK;
+
+			for (i = 0; i < storvsc_sg_count; i++) {
+				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
+
+			/* check the last segment cross boundary or not */
+			offset = phys_addr & PAGE_MASK;
+			if (offset) {
+				phys_addr =
+				    vtophys(storvsc_sglist[i-1].ds_addr +
+				    PAGE_SIZE - offset);
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
+			
+			reqp->bounce_sgl_count = 0;
+		}
+		break;
 	}
+	default:
+		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
+		return(EINVAL);
+	}
+
+	return(0);
 }
 
 /**
  * @brief completion function before returning to CAM
  *
  * I/O process has been completed and the result needs
  * to be passed to the CAM layer.
  * Free resources related to this request.
  *
  * @param reqp pointer to a request structure
  */
 static void
 storvsc_io_done(struct hv_storvsc_request *reqp)
 {
 	union ccb *ccb = reqp->ccb;
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_softc *sc = reqp->softc;
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
-	
+	bus_dma_segment_t *ori_sglist = NULL;
+	int ori_sg_count = 0;
+
+	/* destroy bounce buffer if it is used */
+	if (reqp->bounce_sgl_count) {
+		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
+		ori_sg_count = ccb->csio.sglist_cnt;
+
+		/*
+		 * If it is READ operation, we should copy back the data
+		 * to original SG list.
+		 */
+		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
+			    ori_sg_count,
+			    reqp->bounce_sgl,
+			    reqp->not_aligned_seg_bits);
+		}
+
+		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
+		reqp->bounce_sgl_count = 0;
+	}
+		
 	if (reqp->retries > 0) {
 		mtx_lock(&sc->hs_lock);
 #if HVS_TIMEOUT_TEST
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"waking up timer handler if any.\n", ticks);
 		mtx_lock(&reqp->event.mtx);
 		cv_signal(&reqp->event.cv);
 		mtx_unlock(&reqp->event.mtx);
 #endif
 		reqp->retries = 0;
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"stopping timer if any.\n", ticks);
 		mtx_unlock(&sc->hs_lock);
 	}
 
-	/* 
+	/*
 	 * callout_drain() will wait for the timer handler to finish
 	 * if it is running. So we don't need any lock to synchronize
 	 * between this routine and the timer handler.
 	 * Note that we need to make sure reqp is not freed when timer
 	 * handler is using or will use it.
 	 */
 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 		callout_drain(&reqp->callout);
 	}
 
 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
 		ccb->ccb_h.status |= CAM_REQ_CMP;
 	 } else {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 			"srovsc scsi_status = %d\n",
 			vm_srb->scsi_status);
 		mtx_unlock(&sc->hs_lock);
 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 	}
 
 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
 	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
 
 	if (reqp->sense_info_len != 0) {
 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	if (reqp->softc->hs_frozen == 1) {
 		xpt_print(ccb->ccb_h.path,
 			"%u: storvsc unfreezing softc 0x%p.\n",
 			ticks, reqp->softc);
 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		reqp->softc->hs_frozen = 0;
 	}
 	storvsc_free_request(sc, reqp);
 	xpt_done(ccb);
 	mtx_unlock(&sc->hs_lock);
 }
 
 /**
  * @brief Free a request structure
  *
  * Free a request structure by returning it to the free list
  *
  * @param sc pointer to a softc
  * @param reqp pointer to a request structure
  */	
 static void
 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
 {
 
 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 }
 
 /**
  * @brief Determine type of storage device from GUID
  *
  * Using the type GUID, determine if this is a StorVSC (paravirtual
  * SCSI or BlkVSC (paravirtual IDE) device.
  *
  * @param dev a device
  * returns an enum
  */
 static enum hv_storage_type
 storvsc_get_storage_type(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_BLKVSC;
 	} else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_STORVSC;
 	}
 	return (DRIVER_UNKNOWN);
 }
 
Index: head/sys/dev/hyperv/storvsc/hv_vstorage.h
===================================================================
--- head/sys/dev/hyperv/storvsc/hv_vstorage.h	(revision 282211)
+++ head/sys/dev/hyperv/storvsc/hv_vstorage.h	(revision 282212)
@@ -1,233 +1,243 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __HV_VSTORAGE_H__
 #define __HV_VSTORAGE_H__
 
 /*
  * Major/minor macros.  Minor version is in LSB, meaning that earlier flat
  * version numbers will be interpreted as "0.x" (i.e., 1 becomes 0.1).
  */
 
 #define VMSTOR_PROTOCOL_MAJOR(VERSION_)         (((VERSION_) >> 8) & 0xff)
 #define VMSTOR_PROTOCOL_MINOR(VERSION_)         (((VERSION_)     ) & 0xff)
 #define VMSTOR_PROTOCOL_VERSION(MAJOR_, MINOR_) ((((MAJOR_) & 0xff) << 8) | \
                                                  (((MINOR_) & 0xff)     ))
 
 /*
  * Invalid version.
  */
 #define VMSTOR_INVALID_PROTOCOL_VERSION  -1
 
 /*
  * Version history:
  * V1 Beta                    0.1
  * V1 RC < 2008/1/31          1.0
  * V1 RC > 2008/1/31          2.0
  */
 
-#define VMSTOR_PROTOCOL_VERSION_CURRENT	VMSTOR_PROTOCOL_VERSION(2, 0)
+#define VMSTOR_PROTOCOL_VERSION_CURRENT	VMSTOR_PROTOCOL_VERSION(5, 1)
 
 /**
  *  Packet structure ops describing virtual storage requests.
  */
 enum vstor_packet_ops {
 	VSTOR_OPERATION_COMPLETEIO            = 1,
 	VSTOR_OPERATION_REMOVEDEVICE          = 2,
 	VSTOR_OPERATION_EXECUTESRB            = 3,
 	VSTOR_OPERATION_RESETLUN              = 4,
 	VSTOR_OPERATION_RESETADAPTER          = 5,
 	VSTOR_OPERATION_RESETBUS              = 6,
 	VSTOR_OPERATION_BEGININITIALIZATION   = 7,
 	VSTOR_OPERATION_ENDINITIALIZATION     = 8,
 	VSTOR_OPERATION_QUERYPROTOCOLVERSION  = 9,
 	VSTOR_OPERATION_QUERYPROPERTIES       = 10,
-	VSTOR_OPERATION_MAXIMUM               = 10
+	VSTOR_OPERATION_ENUMERATE_BUS         = 11,
+	VSTOR_OPERATION_FCHBA_DATA            = 12,
+	VSTOR_OPERATION_CREATE_MULTI_CHANNELS = 13,
+	VSTOR_OPERATION_MAXIMUM               = 13
 };
 
 
 /*
  *  Platform neutral description of a scsi request -
  *  this remains the same across the write regardless of 32/64 bit
  *  note: it's patterned off the Windows DDK SCSI_PASS_THROUGH structure
  */
 
 #define CDB16GENERIC_LENGTH			0x10
 #define SENSE_BUFFER_SIZE			0x12
 #define MAX_DATA_BUFFER_LENGTH_WITH_PADDING	0x14
 
 struct vmscsi_req {
 	uint16_t length;
 	uint8_t  srb_status;
 	uint8_t  scsi_status;
 
 	/* HBA number, set to the order number detected by initiator. */
 	uint8_t  port;
 	/* SCSI bus number or bus_id, different from CAM's path_id. */
 	uint8_t  path_id;
 
 	uint8_t  target_id;
 	uint8_t  lun;
 
 	uint8_t  cdb_len;
 	uint8_t  sense_info_len;
 	uint8_t  data_in;
 	uint8_t  reserved;
 
 	uint32_t transfer_len;
 
 	union {
 	    uint8_t cdb[CDB16GENERIC_LENGTH];
 
 	    uint8_t sense_data[SENSE_BUFFER_SIZE];
 
 	    uint8_t reserved_array[MAX_DATA_BUFFER_LENGTH_WITH_PADDING];
 	} u;
 
 } __packed;
 
 /**
  *  This structure is sent during the initialization phase to get the different
  *  properties of the channel.
  */
 
 struct vmstor_chan_props {
 	uint16_t proto_ver;
 	uint8_t  path_id;
 	uint8_t  target_id;
 
+	uint16_t max_channel_cnt;
+
 	/**
 	 * Note: port number is only really known on the client side
 	 */
-	uint32_t port;
+	uint16_t port;
 	uint32_t flags;
 	uint32_t max_transfer_bytes;
 
 	/**
 	 *  This id is unique for each channel and will correspond with
 	 *  vendor specific data in the inquiry_ata
 	 */
 	uint64_t unique_id;
 
 } __packed;
 
 /**
  *  This structure is sent during the storage protocol negotiations.
  */
 
 struct vmstor_proto_ver
 {
 	/**
 	 * Major (MSW) and minor (LSW) version numbers.
 	 */
 	uint16_t major_minor;
 
 	uint16_t revision;			/* always zero */
 } __packed;
 
 /**
  * Channel Property Flags
  */
 
 #define STORAGE_CHANNEL_REMOVABLE_FLAG                  0x1
 #define STORAGE_CHANNEL_EMULATED_IDE_FLAG               0x2
 
 
 struct vstor_packet {
 	/**
 	 * Requested operation type
 	 */
 	enum vstor_packet_ops operation;
 
 	/*
 	 * Flags - see below for values
 	 */
 	uint32_t flags;
 
 	/**
 	 * Status of the request returned from the server side.
 	 */
 	uint32_t status;
 
 	union
 	{
 	    /**
 	     * Structure used to forward SCSI commands from the client to
 	     * the server.
 	     */
 	    struct vmscsi_req vm_srb;
 
 	    /**
 	     * Structure used to query channel properties.
 	     */
 	    struct vmstor_chan_props chan_props;
 
 	    /**
 	     * Used during version negotiations.
 	     */
 	    struct vmstor_proto_ver version;
+
+	    /**
+             * Number of multichannels to create
+	     */
+	    uint16_t multi_channels_cnt;
 	} u;
 
 } __packed;
 
 
 /**
  * SRB (SCSI Request Block) Status Codes
  */
 #define SRB_STATUS_PENDING		0x00
 #define SRB_STATUS_SUCCESS		0x01
 #define SRB_STATUS_ABORTED		0x02
 #define SRB_STATUS_ABORT_FAILED	0x03
 #define SRB_STATUS_ERROR 		0x04
 #define SRB_STATUS_BUSY			0x05
 
 /**
  * SRB Status Masks (can be combined with above status codes)
  */
 #define SRB_STATUS_QUEUE_FROZEN		0x40
 #define SRB_STATUS_AUTOSENSE_VALID	0x80
 
 
 /**
  *  Packet flags
  */
 
 /**
  *  This flag indicates that the server should send back a completion for this
  *  packet.
  */
 #define REQUEST_COMPLETION_FLAG	0x1
 
 /**
  *  This is the set of flags that the vsc can set in any packets it sends
  */
 #define VSC_LEGAL_FLAGS (REQUEST_COMPLETION_FLAG)
 
 #endif /* __HV_VSTORAGE_H__ */
Index: head/sys/dev/hyperv/utilities/hv_kvp.c
===================================================================
--- head/sys/dev/hyperv/utilities/hv_kvp.c	(revision 282211)
+++ head/sys/dev/hyperv/utilities/hv_kvp.c	(revision 282212)
@@ -1,1001 +1,1002 @@
 /*-
  * Copyright (c) 2014 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  *	Author:	Sainath Varanasi.
  *	Date:	4/2012
  *	Email:	bsdic@microsoft.com
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/reboot.h>
 #include <sys/lock.h>
 #include <sys/taskqueue.h>
 #include <sys/sysctl.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/un.h>
 #include <sys/endian.h>
 #include <sys/_null.h>
 #include <sys/signal.h>
 #include <sys/syslog.h>
+#include <sys/systm.h>
 #include <sys/mutex.h>
 #include <net/if_arp.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/netvsc/hv_net_vsc.h>
 
 #include "unicode.h"
 #include "hv_kvp.h"
 
 /* hv_kvp defines */
 #define BUFFERSIZE	sizeof(struct hv_kvp_msg)
 #define KVP_SUCCESS	0
 #define KVP_ERROR	1
 #define kvp_hdr		hdr.kvp_hdr
 
 /* hv_kvp debug control */
 static int hv_kvp_log = 0;
 SYSCTL_INT(_dev, OID_AUTO, hv_kvp_log, CTLFLAG_RW, &hv_kvp_log, 0,
 	"hv_kvp log");
 
 #define	hv_kvp_log_error(...)	do {				\
 	if (hv_kvp_log > 0)				\
 		log(LOG_ERR, "hv_kvp: " __VA_ARGS__);	\
 } while (0)
 
 #define	hv_kvp_log_info(...) do {				\
 	if (hv_kvp_log > 1)				\
 		log(LOG_INFO, "hv_kvp: " __VA_ARGS__);		\
 } while (0)
 
 /* character device prototypes */
 static d_open_t		hv_kvp_dev_open;
 static d_close_t	hv_kvp_dev_close;
 static d_read_t		hv_kvp_dev_daemon_read;
 static d_write_t	hv_kvp_dev_daemon_write;
 static d_poll_t		hv_kvp_dev_daemon_poll;
 
 /* hv_kvp prototypes */
 static int	hv_kvp_req_in_progress(void);
 static void	hv_kvp_transaction_init(uint32_t, hv_vmbus_channel *, uint64_t, uint8_t *);
 static void	hv_kvp_send_msg_to_daemon(void);
 static void	hv_kvp_process_request(void *context);
 
 /* hv_kvp character device structure */
 static struct cdevsw hv_kvp_cdevsw =
 {
 	.d_version	= D_VERSION,
 	.d_open		= hv_kvp_dev_open,
 	.d_close	= hv_kvp_dev_close,
 	.d_read		= hv_kvp_dev_daemon_read,
 	.d_write	= hv_kvp_dev_daemon_write,
 	.d_poll		= hv_kvp_dev_daemon_poll,
 	.d_name		= "hv_kvp_dev",
 };
 static struct cdev *hv_kvp_dev;
 static struct hv_kvp_msg *hv_kvp_dev_buf;
 struct proc *daemon_task;
 
 /*
  * Global state to track and synchronize multiple
  * KVP transaction requests from the host.
  */
 static struct {
 
 	/* Pre-allocated work item for queue */
 	hv_work_item		work_item;	
 
 	/* Unless specified the pending mutex should be 
 	 * used to alter the values of the following paramters:
 	 * 1. req_in_progress
 	 * 2. req_timed_out
 	 * 3. pending_reqs.
 	 */
 	struct mtx		pending_mutex;	  
 	
 	/* To track if transaction is active or not */
 	boolean_t		req_in_progress;    
 	/* Tracks if daemon did not reply back in time */
 	boolean_t		req_timed_out;	  
 	/* Tracks if daemon is serving a request currently */
 	boolean_t		daemon_busy;
 	/* Count of KVP requests from Hyper-V. */
 	uint64_t		pending_reqs;       
 	
 	
 	/* Length of host message */
 	uint32_t		host_msg_len;	    
 
 	/* Pointer to channel */
 	hv_vmbus_channel	*channelp;	    
 
 	/* Host message id */
 	uint64_t		host_msg_id;	   
 	
 	/* Current kvp message from the host */
 	struct hv_kvp_msg	*host_kvp_msg;      
 	
 	 /* Current kvp message for daemon */
 	struct hv_kvp_msg	daemon_kvp_msg;    
 	
 	/* Rcv buffer for communicating with the host*/
 	uint8_t			*rcv_buf;	    
 	
 	/* Device semaphore to control communication */
 	struct sema		dev_sema;	   
 	
 	/* Indicates if daemon registered with driver */
 	boolean_t		register_done;      
 	
 	/* Character device status */
 	boolean_t		dev_accessed;	    
 } kvp_globals;
 
 /* global vars */
 MALLOC_DECLARE(M_HV_KVP_DEV_BUF);
 MALLOC_DEFINE(M_HV_KVP_DEV_BUF, "hv_kvp_dev buffer", "buffer for hv_kvp_dev module");
 
 /*
  * hv_kvp low level functions
  */
 
 /*
  * Check if kvp transaction is in progres
  */
 static int
 hv_kvp_req_in_progress(void)
 {
 
 	return (kvp_globals.req_in_progress);
 }
 
 
 /*
  * This routine is called whenever a message is received from the host
  */
 static void
 hv_kvp_transaction_init(uint32_t rcv_len, hv_vmbus_channel *rcv_channel,
 			uint64_t request_id, uint8_t *rcv_buf)
 {
 	
 	/* Store all the relevant message details in the global structure */
 	/* Do not need to use mutex for req_in_progress here */
 	kvp_globals.req_in_progress = true;
 	kvp_globals.host_msg_len = rcv_len;
 	kvp_globals.channelp = rcv_channel;
 	kvp_globals.host_msg_id = request_id;
 	kvp_globals.rcv_buf = rcv_buf;
 	kvp_globals.host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[
 		sizeof(struct hv_vmbus_pipe_hdr) +
 		sizeof(struct hv_vmbus_icmsg_hdr)];
 }
 
 
 /*
  * hv_kvp - version neogtiation function
  */
 static void
 hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp,
 			 struct hv_vmbus_icmsg_negotiate *negop,
 			 uint8_t *buf)
 {
 	int icframe_vercnt;
 	int icmsg_vercnt;
 
 	icmsghdrp->icmsgsize = 0x10;
 
 	negop = (struct hv_vmbus_icmsg_negotiate *)&buf[
 		sizeof(struct hv_vmbus_pipe_hdr) +
 		sizeof(struct hv_vmbus_icmsg_hdr)];
 	icframe_vercnt = negop->icframe_vercnt;
 	icmsg_vercnt = negop->icmsg_vercnt;
 
 	/*
 	 * Select the framework version number we will support
 	 */
 	if ((icframe_vercnt >= 2) && (negop->icversion_data[1].major == 3)) {
 		icframe_vercnt = 3;
-		if (icmsg_vercnt >= 2)
+		if (icmsg_vercnt > 2)
 			icmsg_vercnt = 4;
 		else
 			icmsg_vercnt = 3;
 	} else {
 		icframe_vercnt = 1;
 		icmsg_vercnt = 1;
 	}
 
 	negop->icframe_vercnt = 1;
 	negop->icmsg_vercnt = 1;
 	negop->icversion_data[0].major = icframe_vercnt;
 	negop->icversion_data[0].minor = 0;
 	negop->icversion_data[1].major = icmsg_vercnt;
 	negop->icversion_data[1].minor = 0;
 }
 
 
 /*
  * Convert ip related info in umsg from utf8 to utf16 and store in hmsg
  */
 static int
 hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg, 
 				    struct hv_kvp_ip_msg *host_ip_msg)
 {
 	int err_ip, err_subnet, err_gway, err_dns, err_adap;
 	int UNUSED_FLAG = 1;
  		
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.ip_addr,
 	    strlen((char *)umsg->body.kvp_ip_val.ip_addr),
 	    UNUSED_FLAG,
 	    &err_ip);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.sub_net,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.sub_net,
 	    strlen((char *)umsg->body.kvp_ip_val.sub_net),
 	    UNUSED_FLAG,
 	    &err_subnet);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
 	    MAX_GATEWAY_SIZE,
 	    (char *)umsg->body.kvp_ip_val.gate_way,
 	    strlen((char *)umsg->body.kvp_ip_val.gate_way),
 	    UNUSED_FLAG,
 	    &err_gway);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.dns_addr,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.dns_addr,
 	    strlen((char *)umsg->body.kvp_ip_val.dns_addr),
 	    UNUSED_FLAG,
 	    &err_dns);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.adapter_id,
 	    strlen((char *)umsg->body.kvp_ip_val.adapter_id),
 	    UNUSED_FLAG,
 	    &err_adap);
 	
 	host_ip_msg->kvp_ip_val.dhcp_enabled = umsg->body.kvp_ip_val.dhcp_enabled;
 	host_ip_msg->kvp_ip_val.addr_family = umsg->body.kvp_ip_val.addr_family;
 
 	return (err_ip | err_subnet | err_gway | err_dns | err_adap);
 }
 
 
 /*
  * Convert ip related info in hmsg from utf16 to utf8 and store in umsg
  */
 static int
 hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
 				    struct hv_kvp_msg *umsg)
 {
 	int err_ip, err_subnet, err_gway, err_dns, err_adap;
 	int UNUSED_FLAG = 1;
 	int guid_index;
 	struct hv_device *hv_dev;       /* GUID Data Structure */
 	hn_softc_t *sc;                 /* hn softc structure  */
 	char if_name[4];
 	unsigned char guid_instance[40];
 	char *guid_data = NULL;
 	char buf[39];
 
 	struct guid_extract {
 		char	a1[2];
 		char	a2[2];
 		char	a3[2];
 		char	a4[2];
 		char	b1[2];
 		char	b2[2];
 		char	c1[2];
 		char	c2[2];
 		char	d[4];
 		char	e[12];
 	};
 
 	struct guid_extract *id;
 	device_t *devs;
 	int devcnt;
 
 	/* IP Address */
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.ip_addr,
 	    MAX_IP_ADDR_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
 	    MAX_IP_ADDR_SIZE,
 	    UNUSED_FLAG,
 	    &err_ip);
 
 	/* Adapter ID : GUID */
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id,
 	    MAX_ADAPTER_ID_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
 	    MAX_ADAPTER_ID_SIZE,
 	    UNUSED_FLAG,
 	    &err_adap);
 
 	if (devclass_get_devices(devclass_find("hn"), &devs, &devcnt) == 0) {
 		for (devcnt = devcnt - 1; devcnt >= 0; devcnt--) {
 			sc = device_get_softc(devs[devcnt]);
 
 			/* Trying to find GUID of Network Device */
 			hv_dev = sc->hn_dev_obj;
 
 			for (guid_index = 0; guid_index < 16; guid_index++) {
 				sprintf(&guid_instance[guid_index * 2], "%02x",
 				    hv_dev->device_id.data[guid_index]);
 			}
 
 			guid_data = (char *)guid_instance;
 			id = (struct guid_extract *)guid_data;
 			snprintf(buf, sizeof(buf), "{%.2s%.2s%.2s%.2s-%.2s%.2s-%.2s%.2s-%.4s-%s}",
 			    id->a4, id->a3, id->a2, id->a1,
 			    id->b2, id->b1, id->c2, id->c1, id->d, id->e);
 			guid_data = NULL;
 			sprintf(if_name, "%s%d", "hn", device_get_unit(devs[devcnt]));
 
 			if (strncmp(buf, (char *)umsg->body.kvp_ip_val.adapter_id, 39) == 0) {
 				strcpy((char *)umsg->body.kvp_ip_val.adapter_id, if_name);
 				break;
 			}
 		}
 		free(devs, M_TEMP);
 	}
 
 	/* Address Family , DHCP , SUBNET, Gateway, DNS */
 	umsg->kvp_hdr.operation = host_ip_msg->operation;
 	umsg->body.kvp_ip_val.addr_family = host_ip_msg->kvp_ip_val.addr_family;
 	umsg->body.kvp_ip_val.dhcp_enabled = host_ip_msg->kvp_ip_val.dhcp_enabled;
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.sub_net,
 	    MAX_IP_ADDR_SIZE,
 	    UNUSED_FLAG,
 	    &err_subnet);
 	
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.gate_way, MAX_GATEWAY_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
 	    MAX_GATEWAY_SIZE,
 	    UNUSED_FLAG,
 	    &err_gway);
 
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.dns_addr,
 	    MAX_IP_ADDR_SIZE,
 	    UNUSED_FLAG,
 	    &err_dns);
 
 	return (err_ip | err_subnet | err_gway | err_dns | err_adap);
 }
 
 
 /*
  * Prepare a user kvp msg based on host kvp msg (utf16 to utf8)
  * Ensure utf16_utf8 takes care of the additional string terminating char!!
  */
 static void
 hv_kvp_convert_hostmsg_to_usermsg(void)
 {
 	int utf_err = 0;
 	uint32_t value_type;
 	struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)
 		kvp_globals.host_kvp_msg;
 
 	struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg;
 	struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg;
 
 	memset(umsg, 0, sizeof(struct hv_kvp_msg));
 
 	umsg->kvp_hdr.operation = hmsg->kvp_hdr.operation;
 	umsg->kvp_hdr.pool = hmsg->kvp_hdr.pool;
 
 	switch (umsg->kvp_hdr.operation) {
 	case HV_KVP_OP_SET_IP_INFO:
 		hv_kvp_convert_utf16_ipinfo_to_utf8(host_ip_msg, umsg);
 		break;
 
 	case HV_KVP_OP_GET_IP_INFO:
 		utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id,
 		    MAX_ADAPTER_ID_SIZE,
 		    (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
 		    MAX_ADAPTER_ID_SIZE, 1, &utf_err);
 
 		umsg->body.kvp_ip_val.addr_family =
 		    host_ip_msg->kvp_ip_val.addr_family;
 		break;
 
 	case HV_KVP_OP_SET:
 		value_type = hmsg->body.kvp_set.data.value_type;
 
 		switch (value_type) {
 		case HV_REG_SZ:
 			umsg->body.kvp_set.data.value_size =
 			    utf16_to_utf8(
 				(char *)umsg->body.kvp_set.data.msg_value.value,
 				HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1,
 				(uint16_t *)hmsg->body.kvp_set.data.msg_value.value,
 				hmsg->body.kvp_set.data.value_size,
 				1, &utf_err);
 			/* utf8 encoding */
 			umsg->body.kvp_set.data.value_size =
 			    umsg->body.kvp_set.data.value_size / 2;
 			break;
 
 		case HV_REG_U32:
 			umsg->body.kvp_set.data.value_size =
 			    sprintf(umsg->body.kvp_set.data.msg_value.value, "%d",
 				hmsg->body.kvp_set.data.msg_value.value_u32) + 1;
 			break;
 
 		case HV_REG_U64:
 			umsg->body.kvp_set.data.value_size =
 			    sprintf(umsg->body.kvp_set.data.msg_value.value, "%llu",
 				(unsigned long long)
 				hmsg->body.kvp_set.data.msg_value.value_u64) + 1;
 			break;
 		}
 
 		umsg->body.kvp_set.data.key_size =
 		    utf16_to_utf8(
 			umsg->body.kvp_set.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
 			(uint16_t *)hmsg->body.kvp_set.data.key,
 			hmsg->body.kvp_set.data.key_size,
 			1, &utf_err);
 
 		/* utf8 encoding */
 		umsg->body.kvp_set.data.key_size =
 		    umsg->body.kvp_set.data.key_size / 2;
 		break;
 
 	case HV_KVP_OP_GET:
 		umsg->body.kvp_get.data.key_size =
 		    utf16_to_utf8(umsg->body.kvp_get.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
 			(uint16_t *)hmsg->body.kvp_get.data.key,
 			hmsg->body.kvp_get.data.key_size,
 			1, &utf_err);
 		/* utf8 encoding */
 		umsg->body.kvp_get.data.key_size =
 		    umsg->body.kvp_get.data.key_size / 2;
 		break;
 
 	case HV_KVP_OP_DELETE:
 		umsg->body.kvp_delete.key_size =
 		    utf16_to_utf8(umsg->body.kvp_delete.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
 			(uint16_t *)hmsg->body.kvp_delete.key,
 			hmsg->body.kvp_delete.key_size,
 			1, &utf_err);
 		/* utf8 encoding */
 		umsg->body.kvp_delete.key_size =
 		    umsg->body.kvp_delete.key_size / 2;
 		break;
 
 	case HV_KVP_OP_ENUMERATE:
 		umsg->body.kvp_enum_data.index =
 		    hmsg->body.kvp_enum_data.index;
 		break;
 
 	default:
 		hv_kvp_log_info("%s: daemon_kvp_msg: Invalid operation : %d\n",
 		    __func__, umsg->kvp_hdr.operation);
 	}
 }
 
 
 /*
  * Prepare a host kvp msg based on user kvp msg (utf8 to utf16)
  */
 static int
 hv_kvp_convert_usermsg_to_hostmsg(void)
 {
 	int hkey_len = 0, hvalue_len = 0, utf_err = 0;
 	struct hv_kvp_exchg_msg_value *host_exchg_data;
 	char *key_name, *value;
 
 	struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg;
 	struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg;
 	struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)hmsg;
 
 	switch (hmsg->kvp_hdr.operation) {
 	case HV_KVP_OP_GET_IP_INFO:
 		return (hv_kvp_convert_utf8_ipinfo_to_utf16(umsg, host_ip_msg));
 
 	case HV_KVP_OP_SET_IP_INFO:
 	case HV_KVP_OP_SET:
 	case HV_KVP_OP_DELETE:
 		return (KVP_SUCCESS);
 
 	case HV_KVP_OP_ENUMERATE:
 		host_exchg_data = &hmsg->body.kvp_enum_data.data;
 		key_name = umsg->body.kvp_enum_data.data.key;
 		hkey_len = utf8_to_utf16((uint16_t *)host_exchg_data->key,
 				((HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2) - 2),
 				key_name, strlen(key_name),
 				1, &utf_err);
 		/* utf16 encoding */
 		host_exchg_data->key_size = 2 * (hkey_len + 1);
 		value = umsg->body.kvp_enum_data.data.msg_value.value;
 		hvalue_len = utf8_to_utf16(
 				(uint16_t *)host_exchg_data->msg_value.value,
 				((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2),
 				value, strlen(value),
 				1, &utf_err);
 		host_exchg_data->value_size = 2 * (hvalue_len + 1);
 		host_exchg_data->value_type = HV_REG_SZ;
 
 		if ((hkey_len < 0) || (hvalue_len < 0))
 			return (HV_KVP_E_FAIL);
 			
 		return (KVP_SUCCESS);
 
 	case HV_KVP_OP_GET:
 		host_exchg_data = &hmsg->body.kvp_get.data;
 		value = umsg->body.kvp_get.data.msg_value.value;
 		hvalue_len = utf8_to_utf16(
 				(uint16_t *)host_exchg_data->msg_value.value,
 				((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2),
 				value, strlen(value),
 				1, &utf_err);
 		/* Convert value size to uft16 */
 		host_exchg_data->value_size = 2 * (hvalue_len + 1);
 		/* Use values by string */
 		host_exchg_data->value_type = HV_REG_SZ;
 
 		if ((hkey_len < 0) || (hvalue_len < 0)) 
 			return (HV_KVP_E_FAIL);
 			
 		return (KVP_SUCCESS);
 
 	default:
 		return (HV_KVP_E_FAIL);
 	}
 }
 
 
 /*
  * Send the response back to the host.
  */
 static void
 hv_kvp_respond_host(int error)
 {
 	struct hv_vmbus_icmsg_hdr *hv_icmsg_hdrp;
 
 	hv_icmsg_hdrp = (struct hv_vmbus_icmsg_hdr *)
 	    &kvp_globals.rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 	if (error)
 		error = HV_KVP_E_FAIL;
 
 	hv_icmsg_hdrp->status = error;
 	hv_icmsg_hdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
 	
 	error = hv_vmbus_channel_send_packet(kvp_globals.channelp,
 			kvp_globals.rcv_buf,
 			kvp_globals.host_msg_len, kvp_globals.host_msg_id,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 
 	if (error)
 		hv_kvp_log_info("%s: hv_kvp_respond_host: sendpacket error:%d\n",
 			__func__, error);
 }
 
 
 /*
  * This is the main kvp kernel process that interacts with both user daemon
  * and the host
  */
 static void
 hv_kvp_send_msg_to_daemon(void)
 {
 	/* Prepare kvp_msg to be sent to user */
 	hv_kvp_convert_hostmsg_to_usermsg();
 
 	/* Send the msg to user via function deamon_read - setting sema */
 	sema_post(&kvp_globals.dev_sema);
 }
 
 
 /*
  * Function to read the kvp request buffer from host
  * and interact with daemon
  */
 static void
 hv_kvp_process_request(void *context)
 {
 	uint8_t *kvp_buf;
 	hv_vmbus_channel *channel = context;
 	uint32_t recvlen = 0;
 	uint64_t requestid;
 	struct hv_vmbus_icmsg_hdr *icmsghdrp;
 	int ret = 0;
 	uint64_t pending_cnt = 1;
 	
 	hv_kvp_log_info("%s: entering hv_kvp_process_request\n", __func__);
 	kvp_buf = receive_buffer[HV_KVP];
 	ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
 		&recvlen, &requestid);
 
 	/*
 	 * We start counting only after the daemon registers
 	 * and therefore there could be requests pending in 
 	 * the VMBus that are not reflected in pending_cnt.
 	 * Therefore we continue reading as long as either of
 	 * the below conditions is true.
 	 */
 
 	while ((pending_cnt>0) || ((ret == 0) && (recvlen > 0))) {
 
 		if ((ret == 0) && (recvlen>0)) {
 			
 			icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
 					&kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)];
 	
 			hv_kvp_transaction_init(recvlen, channel, requestid, kvp_buf);
 			if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 				hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf);
 				hv_kvp_respond_host(ret);
 					
 				/*
 				 * It is ok to not acquire the mutex before setting 
 				 * req_in_progress here because negotiation is the
 				 * first thing that happens and hence there is no
 				 * chance of a race condition.
 				 */
 				
 				kvp_globals.req_in_progress = false;
 				hv_kvp_log_info("%s :version negotiated\n", __func__);
 
 			} else {
 				if (!kvp_globals.daemon_busy) {
 
 					hv_kvp_log_info("%s: issuing qury to daemon\n", __func__);
 					mtx_lock(&kvp_globals.pending_mutex);
 					kvp_globals.req_timed_out = false;
 					kvp_globals.daemon_busy = true;
 					mtx_unlock(&kvp_globals.pending_mutex);
 
 					hv_kvp_send_msg_to_daemon();
 					hv_kvp_log_info("%s: waiting for daemon\n", __func__);
 				}
 				
 				/* Wait 5 seconds for daemon to respond back */
 				tsleep(&kvp_globals, 0, "kvpworkitem", 5 * hz);
 				hv_kvp_log_info("%s: came out of wait\n", __func__);
 			}
 		}
 
 		mtx_lock(&kvp_globals.pending_mutex);
 		
 		/* Notice that once req_timed_out is set to true
 		 * it will remain true until the next request is
 		 * sent to the daemon. The response from daemon
 		 * is forwarded to host only when this flag is 
 		 * false. 
 		 */
 		kvp_globals.req_timed_out = true;
 
 		/*
 		 * Cancel request if so need be.
 		 */
 		if (hv_kvp_req_in_progress()) {
 			hv_kvp_log_info("%s: request was still active after wait so failing\n", __func__);
 			hv_kvp_respond_host(HV_KVP_E_FAIL);
 			kvp_globals.req_in_progress = false;	
 		}
 	
 		/*
 		* Decrement pending request count and
 		*/
 		if (kvp_globals.pending_reqs>0) {
 			kvp_globals.pending_reqs = kvp_globals.pending_reqs - 1;
 		}
 		pending_cnt = kvp_globals.pending_reqs;
 		
 		mtx_unlock(&kvp_globals.pending_mutex);
 
 		/*
 		 * Try reading next buffer
 		 */
 		recvlen = 0;
 		ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
 			&recvlen, &requestid);
-		hv_kvp_log_info("%s: read: context %p, pending_cnt %ju ret =%d, recvlen=%d\n",
-			__func__, context, pending_cnt, ret, recvlen);
+		hv_kvp_log_info("%s: read: context %p, pending_cnt %llu ret =%d, recvlen=%d\n",
+			__func__, context, (unsigned long long)pending_cnt, ret, recvlen);
 	} 
 }
 
 
 /*
  * Callback routine that gets called whenever there is a message from host
  */
 void
 hv_kvp_callback(void *context)
 {
 	uint64_t pending_cnt = 0;
 
 	if (kvp_globals.register_done == false) {
 		
 		kvp_globals.channelp = context;
 	} else {
 		
 		mtx_lock(&kvp_globals.pending_mutex);
 		kvp_globals.pending_reqs = kvp_globals.pending_reqs + 1;
 		pending_cnt = kvp_globals.pending_reqs;
 		mtx_unlock(&kvp_globals.pending_mutex);
 		if (pending_cnt == 1) {
 			hv_kvp_log_info("%s: Queuing work item\n", __func__);
 			hv_queue_work_item(
 					service_table[HV_KVP].work_queue,
 					hv_kvp_process_request,
 					context
 					);
 		}
 	}	
 }
 
 
 /*
  * This function is called by the hv_kvp_init -
  * creates character device hv_kvp_dev 
  * allocates memory to hv_kvp_dev_buf
  *
  */
 static int
 hv_kvp_dev_init(void)
 {
 	int error = 0;
 
 	/* initialize semaphore */
 	sema_init(&kvp_globals.dev_sema, 0, "hv_kvp device semaphore");
 	/* create character device */
 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
 			&hv_kvp_dev,
 			&hv_kvp_cdevsw,
 			0,
 			UID_ROOT,
 			GID_WHEEL,
 			0640,
 			"hv_kvp_dev");
 					   
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Malloc with M_WAITOK flag will never fail.
 	 */
 	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_HV_KVP_DEV_BUF, M_WAITOK |
 				M_ZERO);
 
 	return (0);
 }
 
 
 /*
  * This function is called by the hv_kvp_deinit -
  * destroy character device
  */
 static void
 hv_kvp_dev_destroy(void)
 {
 
-        if (daemon_task != NULL) {
+	if (daemon_task != NULL) {
 		PROC_LOCK(daemon_task);
-        	kern_psignal(daemon_task, SIGKILL);
+		kern_psignal(daemon_task, SIGKILL);
 		PROC_UNLOCK(daemon_task);
 	}
 	
 	destroy_dev(hv_kvp_dev);
 	free(hv_kvp_dev_buf, M_HV_KVP_DEV_BUF);
 	return;
 }
 
 
 static int
 hv_kvp_dev_open(struct cdev *dev, int oflags, int devtype,
 				struct thread *td)
 {
 	
 	hv_kvp_log_info("%s: Opened device \"hv_kvp_device\" successfully.\n", __func__);
 	if (kvp_globals.dev_accessed)
 		return (-EBUSY);
 	
 	daemon_task = curproc;
 	kvp_globals.dev_accessed = true;
 	kvp_globals.daemon_busy = false;
 	return (0);
 }
 
 
 static int
 hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused,
 				 struct thread *td __unused)
 {
 
 	hv_kvp_log_info("%s: Closing device \"hv_kvp_device\".\n", __func__);
 	kvp_globals.dev_accessed = false;
 	kvp_globals.register_done = false;
 	return (0);
 }
 
 
 /*
  * hv_kvp_daemon read invokes this function
  * acts as a send to daemon
  */
 static int
 hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __unused)
 {
 	size_t amt;
 	int error = 0;
 
 	/* Check hv_kvp daemon registration status*/
 	if (!kvp_globals.register_done)
 		return (KVP_ERROR);
 
 	sema_wait(&kvp_globals.dev_sema);
 
 	memcpy(hv_kvp_dev_buf, &kvp_globals.daemon_kvp_msg, sizeof(struct hv_kvp_msg));
 
 	amt = MIN(uio->uio_resid, uio->uio_offset >= BUFFERSIZE + 1 ? 0 :
 		BUFFERSIZE + 1 - uio->uio_offset);
 
 	if ((error = uiomove(hv_kvp_dev_buf, amt, uio)) != 0)
 		hv_kvp_log_info("%s: hv_kvp uiomove read failed!\n", __func__);
 
 	return (error);
 }
 
 
 /*
  * hv_kvp_daemon write invokes this function
  * acts as a recieve from daemon
  */
 static int
 hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag __unused)
 {
 	size_t amt;
 	int error = 0;
 
 	uio->uio_offset = 0;
 
 	amt = MIN(uio->uio_resid, BUFFERSIZE);
 	error = uiomove(hv_kvp_dev_buf, amt, uio);
 
 	if (error != 0)
 		return (error);
 
 	memcpy(&kvp_globals.daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg));
 
 	if (kvp_globals.register_done == false) {
 		if (kvp_globals.daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) {
 
 			kvp_globals.register_done = true;
 			if (kvp_globals.channelp) {
 			
 				hv_kvp_callback(kvp_globals.channelp);
 			}
 		}
 		else {
 			hv_kvp_log_info("%s, KVP Registration Failed\n", __func__);
 			return (KVP_ERROR);
 		}
 	} else {
 
 		mtx_lock(&kvp_globals.pending_mutex);
 
 		if(!kvp_globals.req_timed_out) {
 
 			hv_kvp_convert_usermsg_to_hostmsg();
 			hv_kvp_respond_host(KVP_SUCCESS);
 			wakeup(&kvp_globals);
 			kvp_globals.req_in_progress = false;
 		}
 
 		kvp_globals.daemon_busy = false;
 		mtx_unlock(&kvp_globals.pending_mutex);
 	}
 
 	return (error);
 }
 
 
 /*
  * hv_kvp_daemon poll invokes this function to check if data is available
  * for daemon to read.
  */
 static int
 hv_kvp_dev_daemon_poll(struct cdev *dev __unused, int events, struct thread *td  __unused)
 {
 	int revents = 0;
 
 	mtx_lock(&kvp_globals.pending_mutex);
 	/*
 	 * We check global flag daemon_busy for the data availiability for
 	 * userland to read. Deamon_busy is set to true before driver has data
 	 * for daemon to read. It is set to false after daemon sends
 	 * then response back to driver.
 	 */
 	if (kvp_globals.daemon_busy == true)
 		revents = POLLIN;
 	mtx_unlock(&kvp_globals.pending_mutex);
 
 	return (revents);
 }
 
 
 /* 
  * hv_kvp initialization function 
  * called from hv_util service.
  *
  */
 int
 hv_kvp_init(hv_vmbus_service *srv)
 {
 	int error = 0;
 	hv_work_queue *work_queue = NULL;
 	
 	memset(&kvp_globals, 0, sizeof(kvp_globals));
 
 	work_queue = hv_work_queue_create("KVP Service");
 	if (work_queue == NULL) {
 		hv_kvp_log_info("%s: Work queue alloc failed\n", __func__);
 		error = ENOMEM;
 		hv_kvp_log_error("%s: ENOMEM\n", __func__);
 		goto Finish;
 	}
 	srv->work_queue = work_queue;
 
 	error = hv_kvp_dev_init();
 	mtx_init(&kvp_globals.pending_mutex, "hv-kvp pending mutex",
 		       	NULL, MTX_DEF);	
 	kvp_globals.pending_reqs = 0;
 
 
 Finish:
 	return (error);
 }
 
 
 void
 hv_kvp_deinit(void)
 {
 	hv_kvp_dev_destroy();
 	mtx_destroy(&kvp_globals.pending_mutex);
 
 	return;
 }
Index: head/sys/dev/hyperv/utilities/hv_util.c
===================================================================
--- head/sys/dev/hyperv/utilities/hv_util.c	(revision 282211)
+++ head/sys/dev/hyperv/utilities/hv_util.c	(revision 282212)
@@ -1,491 +1,500 @@
 /*-
  * Copyright (c) 2014 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * A common driver for all hyper-V util services.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/reboot.h>
 #include <sys/timetc.h>
 #include <sys/syscallsubr.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_kvp.h"
 
 /* Time Sync data */
 typedef struct {
 	uint64_t data;
 } time_sync_data;
 
 static void hv_shutdown_cb(void *context);
 static void hv_heartbeat_cb(void *context);
 static void hv_timesync_cb(void *context);
 
 static int hv_timesync_init(hv_vmbus_service *serv);
 
 /*
  * Note: GUID codes below are predefined by the host hypervisor
  * (Hyper-V and Azure)interface and required for correct operation.
  */
 hv_vmbus_service service_table[] = {
 	/* Shutdown Service */
 	{ .guid.data = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
 			0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB},
 	  .name  = "Hyper-V Shutdown Service\n",
 	  .enabled = TRUE,
 	  .callback = hv_shutdown_cb,
 	},
 
         /* Time Synch Service */
         { .guid.data = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
 			0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf},
 	  .name = "Hyper-V Time Synch Service\n",
 	  .enabled = TRUE,
 	  .init = hv_timesync_init,
 	  .callback = hv_timesync_cb,
 	},
 
         /* Heartbeat Service */
         { .guid.data = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
 			0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d},
 	  .name = "Hyper-V Heartbeat Service\n",
 	  .enabled = TRUE,
   	  .callback = hv_heartbeat_cb,
 	},
 
         /* KVP (Key Value Pair) Service */
         { .guid.data = {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
 			0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3,  0xe6},
 	  .name = "Hyper-V KVP Service\n",
 	  .enabled = TRUE,
 	  .init = hv_kvp_init,
 	  .callback = hv_kvp_callback,
 	},
 };
 
 /*
  * Receive buffer pointers. There is one buffer per utility service. The
  * buffer is allocated during attach().
  */
 uint8_t *receive_buffer[HV_MAX_UTIL_SERVICES];
 
 static boolean_t destroyed_kvp = FALSE;
 
 struct hv_ictimesync_data {
 	uint64_t    parenttime;
 	uint64_t    childtime;
 	uint64_t    roundtriptime;
 	uint8_t     flags;
 } __packed;
 
 static int
 hv_timesync_init(hv_vmbus_service *serv)
 {
 
 	serv->work_queue = hv_work_queue_create("Time Sync");
 	if (serv->work_queue == NULL)
 		return (ENOMEM);
 	return (0);
 }
 
 static void
 hv_negotiate_version(
 	struct hv_vmbus_icmsg_hdr*		icmsghdrp,
 	struct hv_vmbus_icmsg_negotiate*	negop,
 	uint8_t*				buf)
 {
 	icmsghdrp->icmsgsize = 0x10;
 
 	negop = (struct hv_vmbus_icmsg_negotiate *)&buf[
 		sizeof(struct hv_vmbus_pipe_hdr) +
 		sizeof(struct hv_vmbus_icmsg_hdr)];
 
 	if (negop->icframe_vercnt >= 2 &&
 	    negop->icversion_data[1].major == 3) {
 		negop->icversion_data[0].major = 3;
 		negop->icversion_data[0].minor = 0;
 		negop->icversion_data[1].major = 3;
 		negop->icversion_data[1].minor = 0;
 	} else {
 		negop->icversion_data[0].major = 1;
 		negop->icversion_data[0].minor = 0;
 		negop->icversion_data[1].major = 1;
 		negop->icversion_data[1].minor = 0;
 	}
 
 	negop->icframe_vercnt = 1;
 	negop->icmsg_vercnt = 1;
 }
 
 
 /**
  * Set host time based on time sync message from host
  */
 static void
 hv_set_host_time(void *context)
 {
  	time_sync_data* time_msg = (time_sync_data*) context;	
 	uint64_t hosttime = time_msg->data;
 	struct timespec guest_ts, host_ts;
 	uint64_t host_tns;
 	int64_t diff;
 	int error;
 
 	host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
 	host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
 	host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
 
 	nanotime(&guest_ts);
 	
 	diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
 
 	/*
 	 * If host differs by 5 seconds then make the guest catch up
 	 */
 	if (diff > 5 || diff < -5) {
 		error = kern_clock_settime(curthread, CLOCK_REALTIME,
 		    &host_ts);
 	} 
 
 	/*
 	 * Free the hosttime that was allocated in hv_adj_guesttime()
 	 */
 	free(time_msg, M_DEVBUF);
 }
 
 /**
  * @brief Synchronize time with host after reboot, restore, etc.
  *
  * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
  * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
  * message after the timesync channel is opened. Since the hv_utils module is
  * loaded after hv_vmbus, the first message is usually missed. The other
  * thing is, systime is automatically set to emulated hardware clock which may
  * not be UTC time or in the same time zone. So, to override these effects, we
  * use the first 50 time samples for initial system time setting.
  */
 static inline
 void hv_adj_guesttime(uint64_t hosttime, uint8_t flags)
 {
 	time_sync_data* time_msg;
 
 	time_msg = malloc(sizeof(time_sync_data), M_DEVBUF, M_NOWAIT);
 
 	if (time_msg == NULL)
 		return;
 	
 	time_msg->data = hosttime;
 
 	if ((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) {
 		hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
 		    hv_set_host_time, time_msg);
 	} else if ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0) {
 		hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
 		    hv_set_host_time, time_msg);
 	} else {
 		free(time_msg, M_DEVBUF);
 	}
 }
 
 /**
  * Time Sync Channel message handler
  */
 static void
 hv_timesync_cb(void *context)
 {
 	hv_vmbus_channel*	channel = context;
 	hv_vmbus_icmsg_hdr*	icmsghdrp;
 	uint32_t		recvlen;
 	uint64_t		requestId;
 	int			ret;
 	uint8_t*		time_buf;
 	struct hv_ictimesync_data* timedatap;
 
 	time_buf = receive_buffer[HV_TIME_SYNCH];
 
 	ret = hv_vmbus_channel_recv_packet(channel, time_buf,
 					    PAGE_SIZE, &recvlen, &requestId);
 
 	if ((ret == 0) && recvlen > 0) {
 	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
 		sizeof(struct hv_vmbus_pipe_hdr)];
 
 	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 		hv_negotiate_version(icmsghdrp, NULL, time_buf);
 	    } else {
 		timedatap = (struct hv_ictimesync_data *) &time_buf[
 		    sizeof(struct hv_vmbus_pipe_hdr) +
 			sizeof(struct hv_vmbus_icmsg_hdr)];
 		hv_adj_guesttime(timedatap->parenttime, timedatap->flags);
 	    }
 
 	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
 		| HV_ICMSGHDRFLAG_RESPONSE;
 
 	    hv_vmbus_channel_send_packet(channel, time_buf,
 		recvlen, requestId,
 		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	}
 }
 
 /**
  * Shutdown
  */
 static void
 hv_shutdown_cb(void *context)
 {
 	uint8_t*		buf;
 	hv_vmbus_channel*		channel = context;
 	uint8_t			execute_shutdown = 0;
 	hv_vmbus_icmsg_hdr*		icmsghdrp;
 	uint32_t		recv_len;
 	uint64_t		request_id;
 	int				ret;
 	hv_vmbus_shutdown_msg_data*	shutdown_msg;
 
 	buf = receive_buffer[HV_SHUT_DOWN];
 
 	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
 					    &recv_len, &request_id);
 
 	if ((ret == 0) && recv_len > 0) {
 
 	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
 		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 		hv_negotiate_version(icmsghdrp, NULL, buf);
 
 	    } else {
 		shutdown_msg =
 		    (struct hv_vmbus_shutdown_msg_data *)
 		    &buf[sizeof(struct hv_vmbus_pipe_hdr) +
 			sizeof(struct hv_vmbus_icmsg_hdr)];
 
 		switch (shutdown_msg->flags) {
 		    case 0:
 		    case 1:
 			icmsghdrp->status = HV_S_OK;
 			execute_shutdown = 1;
 			if(bootverbose)
 			    printf("Shutdown request received -"
 				    " graceful shutdown initiated\n");
 			break;
 		    default:
 			icmsghdrp->status = HV_E_FAIL;
 			execute_shutdown = 0;
 			printf("Shutdown request received -"
 			    " Invalid request\n");
 			break;
 		    }
 	    }
 
 	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
 				 HV_ICMSGHDRFLAG_RESPONSE;
 
 	    hv_vmbus_channel_send_packet(channel, buf,
 					recv_len, request_id,
 					HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	}
 
 	if (execute_shutdown)
 	    shutdown_nice(RB_POWEROFF);
 }
 
 /**
  * Process heartbeat message
  */
 static void
 hv_heartbeat_cb(void *context)
 {
 	uint8_t*		buf;
 	hv_vmbus_channel*	channel = context;
 	uint32_t		recvlen;
 	uint64_t		requestid;
 	int			ret;
 
 	struct hv_vmbus_heartbeat_msg_data*	heartbeat_msg;
 	struct hv_vmbus_icmsg_hdr*		icmsghdrp;
 
 	buf = receive_buffer[HV_HEART_BEAT];
 
 	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
 					    &requestid);
 
 	if ((ret == 0) && recvlen > 0) {
 
 	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
 		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 		hv_negotiate_version(icmsghdrp, NULL, buf);
 
 	    } else {
 		heartbeat_msg =
 		    (struct hv_vmbus_heartbeat_msg_data *)
 			&buf[sizeof(struct hv_vmbus_pipe_hdr) +
 			     sizeof(struct hv_vmbus_icmsg_hdr)];
 
 		heartbeat_msg->seq_num += 1;
 	    }
 
 	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
 				 HV_ICMSGHDRFLAG_RESPONSE;
 
 	    hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
 		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	}
 }
 
 
 static int
 hv_util_probe(device_t dev)
 {
 	int i;
 	int rtn_value = ENXIO;
 
 	for (i = 0; i < HV_MAX_UTIL_SERVICES; i++) {
 	    const char *p = vmbus_get_type(dev);
 	    if (service_table[i].enabled && !memcmp(p, &service_table[i].guid, sizeof(hv_guid))) {
 		device_set_softc(dev, (void *) (&service_table[i]));
 		rtn_value = BUS_PROBE_DEFAULT;
 	    }
 	}
 
 	return rtn_value;
 }
 
 static int
 hv_util_attach(device_t dev)
 {
 	struct hv_device*		hv_dev;
 	struct hv_vmbus_service*	service;
 	int				ret;
 	size_t				receive_buffer_offset;
 
 	hv_dev = vmbus_get_devctx(dev);
 	service = device_get_softc(dev);
 	receive_buffer_offset = service - &service_table[0];
 	device_printf(dev, "Hyper-V Service attaching: %s\n", service->name);
 	receive_buffer[receive_buffer_offset] =
 		malloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
 
 	if (service->init != NULL) {
 	    ret = service->init(service);
 	    if (ret) {
 		ret = ENODEV;
 		goto error0;
 	    }
 	}
 
+	/*
+	 * These services are not performance critical and do not need
+	 * batched reading. Furthermore, some services such as KVP can
+	 * only handle one message from the host at a time.
+	 * Turn off batched reading for all util drivers before we open the
+	 * channel.
+	 */
+	hv_set_channel_read_state(hv_dev->channel, FALSE);
+
 	ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
 		    4 * PAGE_SIZE, NULL, 0,
 		    service->callback, hv_dev->channel);
 
 	if (ret)
 	    goto error0;
 
 	return (0);
 
 	error0:
 
 	    free(receive_buffer[receive_buffer_offset], M_DEVBUF);
 	    receive_buffer[receive_buffer_offset] = NULL;
 
 	return (ret);
 }
 
 static int
 hv_util_detach(device_t dev)
 {
 	struct hv_device*		hv_dev;
 	struct hv_vmbus_service*	service;
 	size_t				receive_buffer_offset;
 
 	if (!destroyed_kvp) {
 		hv_kvp_deinit();
 		destroyed_kvp = TRUE;
 	}
 
 	hv_dev = vmbus_get_devctx(dev);
 
 	hv_vmbus_channel_close(hv_dev->channel);
 	service = device_get_softc(dev);
 	receive_buffer_offset = service - &service_table[0];
 
 	if (service->work_queue != NULL)
 	    hv_work_queue_close(service->work_queue);
 
 	free(receive_buffer[receive_buffer_offset], M_DEVBUF);
 	receive_buffer[receive_buffer_offset] = NULL;
 	return (0);
 }
 
 static void
 hv_util_init(void)
 {
 }
 
 static int
 hv_util_modevent(module_t mod, int event, void *arg)
 {
 	switch (event) {
         case MOD_LOAD:
                 break;
         case MOD_UNLOAD:
 		break;
 	default:
 		break;
         }
         return (0);
 }
 
 static device_method_t util_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, hv_util_probe),
 	DEVMETHOD(device_attach, hv_util_attach),
 	DEVMETHOD(device_detach, hv_util_detach),
 	DEVMETHOD(device_shutdown, bus_generic_shutdown),
 	{ 0, 0 } }
 ;
 
 static driver_t util_driver = { "hyperv-utils", util_methods, 0 };
 
 static devclass_t util_devclass;
 
 DRIVER_MODULE(hv_utils, vmbus, util_driver, util_devclass, hv_util_modevent, 0);
 MODULE_VERSION(hv_utils, 1);
 MODULE_DEPEND(hv_utils, vmbus, 1, 1, 1);
 
 SYSINIT(hv_util_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1,
 	hv_util_init, NULL);
Index: head/sys/dev/hyperv/vmbus/hv_channel.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 282211)
+++ head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 282212)
@@ -1,845 +1,881 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "hv_vmbus_priv.h"
 
 static int 	vmbus_channel_create_gpadl_header(
 			/* must be phys and virt contiguous*/
 			void*				contig_buffer,
 			/* page-size multiple */
 			uint32_t 			size,
 			hv_vmbus_channel_msg_info**	msg_info,
 			uint32_t*			message_count);
 
 static void 	vmbus_channel_set_event(hv_vmbus_channel* channel);
 
 /**
  *  @brief Trigger an event notification on the specified channel
  */
 static void
 vmbus_channel_set_event(hv_vmbus_channel *channel)
 {
 	hv_vmbus_monitor_page *monitor_page;
 
 	if (channel->offer_msg.monitor_allocated) {
 		/* Each uint32_t represents 32 channels */
 		synch_set_bit((channel->offer_msg.child_rel_id & 31),
 			((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
 				+ ((channel->offer_msg.child_rel_id >> 5))));
 
 		monitor_page = (hv_vmbus_monitor_page *)
 			hv_vmbus_g_connection.monitor_pages;
 
 		monitor_page++; /* Get the child to parent monitor page */
 
 		synch_set_bit(channel->monitor_bit,
 			(uint32_t *)&monitor_page->
 				trigger_group[channel->monitor_group].u.pending);
 	} else {
-		hv_vmbus_set_event(channel->offer_msg.child_rel_id);
+		hv_vmbus_set_event(channel);
 	}
 
 }
 
 /**
  * @brief Open the specified channel
  */
 int
 hv_vmbus_channel_open(
 	hv_vmbus_channel*		new_channel,
 	uint32_t			send_ring_buffer_size,
 	uint32_t			recv_ring_buffer_size,
 	void*				user_data,
 	uint32_t			user_data_len,
 	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
 	void* 				context)
 {
 
 	int ret = 0;
 	void *in, *out;
 	hv_vmbus_channel_open_channel*	open_msg;
 	hv_vmbus_channel_msg_info* 	open_info;
 
+	mtx_lock(&new_channel->sc_lock);
+	if (new_channel->state == HV_CHANNEL_OPEN_STATE) {
+	    new_channel->state = HV_CHANNEL_OPENING_STATE;
+	} else {
+	    mtx_unlock(&new_channel->sc_lock);
+	    if(bootverbose)
+		printf("VMBUS: Trying to open channel <%p> which in "
+		    "%d state.\n", new_channel, new_channel->state);
+	    return (EINVAL);
+	}
+	mtx_unlock(&new_channel->sc_lock);
+
 	new_channel->on_channel_callback = pfn_on_channel_callback;
 	new_channel->channel_callback_context = context;
 
 	/* Allocate the ring buffer */
 	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
 	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 	KASSERT(out != NULL,
 	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
 	if (out == NULL)
 		return (ENOMEM);
 
 	in = ((uint8_t *) out + send_ring_buffer_size);
 
 	new_channel->ring_buffer_pages = out;
 	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
 	    recv_ring_buffer_size) >> PAGE_SHIFT;
 	new_channel->ring_buffer_size = send_ring_buffer_size +
 	    recv_ring_buffer_size;
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->outbound,
 		out,
 		send_ring_buffer_size);
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->inbound,
 		in,
 		recv_ring_buffer_size);
 
 	/**
 	 * Establish the gpadl for the ring buffer
 	 */
 	new_channel->ring_buffer_gpadl_handle = 0;
 
 	ret = hv_vmbus_channel_establish_gpadl(new_channel,
 		new_channel->outbound.ring_buffer,
 		send_ring_buffer_size + recv_ring_buffer_size,
 		&new_channel->ring_buffer_gpadl_handle);
 
 	/**
 	 * Create and init the channel open message
 	 */
 	open_info = (hv_vmbus_channel_msg_info*) malloc(
 		sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_open_channel),
 		M_DEVBUF,
 		M_NOWAIT);
 	KASSERT(open_info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Open Channel message!"));
 
 	if (open_info == NULL)
 		return (ENOMEM);
 
 	sema_init(&open_info->wait_sema, 0, "Open Info Sema");
 
 	open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
 	open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
 	open_msg->open_id = new_channel->offer_msg.child_rel_id;
 	open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
 	open_msg->ring_buffer_gpadl_handle =
 		new_channel->ring_buffer_gpadl_handle;
 	open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
 		>> PAGE_SHIFT;
-	open_msg->server_context_area_gpadl_handle = 0;
+	open_msg->target_vcpu = new_channel->target_vcpu;
 
 	if (user_data_len)
 		memcpy(open_msg->user_data, user_data, user_data_len);
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		open_msg, sizeof(hv_vmbus_channel_open_channel));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */
 
-	if (ret)
+	if (ret) {
+	    if(bootverbose)
+		printf("VMBUS: channel <%p> open timeout.\n", new_channel);
 	    goto cleanup;
+	}
 
 	if (open_info->response.open_result.status == 0) {
+	    new_channel->state = HV_CHANNEL_OPENED_STATE;
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open success.\n", new_channel);
 	} else {
 	    if(bootverbose)
 		printf("Error VMBUS: channel <%p> open failed - %d!\n",
 			new_channel, open_info->response.open_result.status);
 	}
 
 	cleanup:
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&open_info->wait_sema);
 	free(open_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Create a gpadl for the specified buffer
  */
 static int
 vmbus_channel_create_gpadl_header(
 	void*				contig_buffer,
 	uint32_t			size,	/* page-size multiple */
 	hv_vmbus_channel_msg_info**	msg_info,
 	uint32_t*			message_count)
 {
 	int				i;
 	int				page_count;
 	unsigned long long 		pfn;
 	uint32_t			msg_size;
 	hv_vmbus_channel_gpadl_header*	gpa_header;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_header;
 	hv_vmbus_channel_msg_info*	msg_body;
 
 	int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
 
 	page_count = size >> PAGE_SHIFT;
 	pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
 
 	/*do we need a gpadl body msg */
 	pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 	    - sizeof(hv_vmbus_channel_gpadl_header)
 	    - sizeof(hv_gpa_range);
 	pfnCount = pfnSize / sizeof(uint64_t);
 
 	if (page_count > pfnCount) { /* if(we need a gpadl body)	*/
 	    /* fill in the header		*/
 	    msg_size = sizeof(hv_vmbus_channel_msg_info)
 		+ sizeof(hv_vmbus_channel_gpadl_header)
 		+ sizeof(hv_gpa_range)
 		+ pfnCount * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    TAILQ_INIT(&msg_header->sub_msg_list_anchor);
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range)
 		+ page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < pfnCount; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 	    *msg_info = msg_header;
 	    *message_count = 1;
 
 	    pfnSum = pfnCount;
 	    pfnLeft = page_count - pfnCount;
 
 	    /*
 	     *  figure out how many pfns we can fit
 	     */
 	    pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 		- sizeof(hv_vmbus_channel_gpadl_body);
 	    pfnCount = pfnSize / sizeof(uint64_t);
 
 	    /*
 	     * fill in the body
 	     */
 	    while (pfnLeft) {
 		if (pfnLeft > pfnCount) {
 		    pfnCurr = pfnCount;
 		} else {
 		    pfnCurr = pfnLeft;
 		}
 
 		msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		    sizeof(hv_vmbus_channel_gpadl_body) +
 		    pfnCurr * sizeof(uint64_t);
 		msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 		KASSERT(
 		    msg_body != NULL,
 		    ("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
 		if (msg_body == NULL)
 		    return (ENOMEM);
 
 		msg_body->message_size = msg_size;
 		(*message_count)++;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) msg_body->msg;
 		/*
 		 * gpadl_body->gpadl = kbuffer;
 		 */
 		for (i = 0; i < pfnCurr; i++) {
 		    gpadl_body->pfn[i] = pfn + pfnSum + i;
 		}
 
 		TAILQ_INSERT_TAIL(
 		    &msg_header->sub_msg_list_anchor,
 		    msg_body,
 		    msg_list_entry);
 		pfnSum += pfnCurr;
 		pfnLeft -= pfnCurr;
 	    }
 	} else { /* else everything fits in a header */
 
 	    msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		sizeof(hv_vmbus_channel_gpadl_header) +
 		sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < page_count; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 
 	    *msg_info = msg_header;
 	    *message_count = 1;
 	}
 
 	return (0);
 }
 
 /**
  * @brief Establish a GPADL for the specified buffer
  */
 int
 hv_vmbus_channel_establish_gpadl(
 	hv_vmbus_channel*	channel,
 	void*			contig_buffer,
 	uint32_t		size, /* page-size multiple */
 	uint32_t*		gpadl_handle)
 
 {
 	int ret = 0;
 	hv_vmbus_channel_gpadl_header*	gpadl_msg;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_info;
 	hv_vmbus_channel_msg_info*	sub_msg_info;
 	uint32_t			msg_count;
 	hv_vmbus_channel_msg_info*	curr;
 	uint32_t			next_gpadl_handle;
 
 	next_gpadl_handle = hv_vmbus_g_connection.next_gpadl_handle;
 	atomic_add_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1);
 
 	ret = vmbus_channel_create_gpadl_header(
 		contig_buffer, size, &msg_info, &msg_count);
 
 	if(ret != 0) { /* if(allocation failed) return immediately */
 	    /* reverse atomic_add_int above */
 	    atomic_subtract_int((int*)
 		    &hv_vmbus_g_connection.next_gpadl_handle, 1);
 	    return ret;
 	}
 
 	sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
 	gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
 	gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
 	gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
 	gpadl_msg->gpadl = next_gpadl_handle;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info,
 		msg_list_entry);
 
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		gpadl_msg,
 		msg_info->message_size -
 		    (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	if (msg_count > 1) {
 	    TAILQ_FOREACH(curr,
 		    &msg_info->sub_msg_list_anchor, msg_list_entry) {
 		sub_msg_info = curr;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
 
 		gpadl_body->header.message_type =
 		    HV_CHANNEL_MESSAGE_GPADL_BODY;
 		gpadl_body->gpadl = next_gpadl_handle;
 
 		ret = hv_vmbus_post_message(
 			gpadl_body,
 			sub_msg_info->message_size
 			    - (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 		 /* if (the post message failed) give up and clean up */
 		if(ret != 0)
 		    goto cleanup;
 	    }
 	}
 
 	ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds*/
 	if (ret != 0)
 	    goto cleanup;
 
 	*gpadl_handle = gpadl_msg->gpadl;
 
 cleanup:
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	sema_destroy(&msg_info->wait_sema);
 	free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Teardown the specified GPADL handle
  */
 int
 hv_vmbus_channel_teardown_gpdal(
 	hv_vmbus_channel*	channel,
 	uint32_t		gpadl_handle)
 {
 	int					ret = 0;
 	hv_vmbus_channel_gpadl_teardown*	msg;
 	hv_vmbus_channel_msg_info*		info;
 
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_gpadl_teardown),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
 	if (info == NULL) {
 	    ret = ENOMEM;
 	    goto cleanup;
 	}
 
 	sema_init(&info->wait_sema, 0, "Open Info Sema");
 
 	msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
 
 	msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 	msg->gpadl = gpadl_handle;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(msg,
 			sizeof(hv_vmbus_channel_gpadl_teardown));
 	if (ret != 0) 
 	    goto cleanup;
 	
 	ret = sema_timedwait(&info->wait_sema, 500); /* KYS 5 seconds */
 
 cleanup:
 	/*
 	 * Received a torndown response
 	 */
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&info->wait_sema);
 	free(info, M_DEVBUF);
 
 	return (ret);
 }
 
-/**
- * @brief Close the specified channel
- */
-void
-hv_vmbus_channel_close(hv_vmbus_channel *channel)
+static void
+hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
 {
 	int ret = 0;
 	hv_vmbus_channel_close_channel* msg;
 	hv_vmbus_channel_msg_info* info;
 
+	channel->state = HV_CHANNEL_OPEN_STATE;
+	channel->sc_creation_callback = NULL;
+
+	/*
+	 * Grab the lock to prevent race condition when a packet received
+	 * and unloading driver is in the process.
+	 */
 	mtx_lock(&channel->inbound_lock);
 	channel->on_channel_callback = NULL;
 	mtx_unlock(&channel->inbound_lock);
 
 	/**
 	 * Send a closing message
 	 */
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_close_channel),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
 	if(info == NULL)
 	    return;
 
 	msg = (hv_vmbus_channel_close_channel*) info->msg;
 	msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 
 	ret = hv_vmbus_post_message(
 		msg, sizeof(hv_vmbus_channel_close_channel));
 
 	/* Tear down the gpadl for the channel's ring buffer */
 	if (channel->ring_buffer_gpadl_handle) {
 		hv_vmbus_channel_teardown_gpdal(channel,
 			channel->ring_buffer_gpadl_handle);
 	}
 
 	/* TODO: Send a msg to release the childRelId */
 
 	/* cleanup the ring buffers for this channel */
 	hv_ring_buffer_cleanup(&channel->outbound);
 	hv_ring_buffer_cleanup(&channel->inbound);
 
 	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
 	    M_DEVBUF);
 
 	free(info, M_DEVBUF);
+}
 
-	/*
-	 *  If we are closing the channel during an error path in
-	 *  opening the channel, don't free the channel
-	 *  since the caller will free the channel
-	 */
-	if (channel->state == HV_CHANNEL_OPEN_STATE) {
-		mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
-		TAILQ_REMOVE(
-			&hv_vmbus_g_connection.channel_anchor,
-			channel,
-			list_entry);
-		mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+/**
+ * @brief Close the specified channel
+ */
+void
+hv_vmbus_channel_close(hv_vmbus_channel *channel)
+{
+	hv_vmbus_channel*	sub_channel;
 
-		hv_vmbus_free_vmbus_channel(channel);
+	if (channel->primary_channel != NULL) {
+		/*
+		 * We only close multi-channels when the primary is
+		 * closed.
+		 */
+		return;
 	}
 
+	/*
+	 * Close all multi-channels first.
+	 */
+	TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor,
+	    sc_list_entry) {
+		if (sub_channel->state != HV_CHANNEL_OPENED_STATE)
+			continue;
+		hv_vmbus_channel_close_internal(sub_channel);
+	}
+	/*
+	 * Then close the primary channel.
+	 */
+	hv_vmbus_channel_close_internal(channel);
 }
 
 /**
  * @brief Send the specified buffer on the given channel
  */
 int
 hv_vmbus_channel_send_packet(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id,
 	hv_vmbus_packet_type	type,
 	uint32_t		flags)
 {
 	int			ret = 0;
 	hv_vm_packet_descriptor	desc;
 	uint32_t		packet_len;
 	uint64_t		aligned_data;
 	uint32_t		packet_len_aligned;
+	boolean_t		need_sig;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 
 	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 	aligned_data = 0;
 
 	/* Setup the descriptor */
 	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
 	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
 			    /* in 8-bytes granularity */
 	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
-	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
-	if (ret == 0
-		&& !hv_vmbus_get_ring_buffer_interrupt_mask(
-			&channel->outbound)) {
+	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a range of single-page buffer packets using
  * a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_pagebuffer(
 	hv_vmbus_channel*	channel,
 	hv_vmbus_page_buffer	page_buffers[],
 	uint32_t		page_count,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id)
 {
 
 	int					ret = 0;
 	int					i = 0;
+	boolean_t				need_sig;
 	uint32_t				packet_len;
 	uint32_t				packetLen_aligned;
 	hv_vmbus_sg_buffer_list			buffer_list[3];
 	hv_vmbus_channel_packet_page_buffer	desc;
 	uint32_t				descSize;
 	uint64_t				alignedData = 0;
 
 	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
 		return (EINVAL);
 
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
 	 *  is the largest size we support
 	 */
 	descSize = sizeof(hv_vmbus_channel_packet_page_buffer) -
 			((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
 			sizeof(hv_vmbus_page_buffer));
 	packet_len = descSize + buffer_len;
 	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/* Setup the descriptor */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
 	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = page_count;
 
 	for (i = 0; i < page_count; i++) {
 		desc.range[i].length = page_buffers[i].length;
 		desc.range[i].offset = page_buffers[i].offset;
 		desc.range[i].pfn = page_buffers[i].pfn;
 	}
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = descSize;
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &alignedData;
 	buffer_list[2].length = packetLen_aligned - packet_len;
 
-	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
-	if (ret == 0 &&
-		!hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a multi-page buffer packet using a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_multipagebuffer(
 	hv_vmbus_channel*		channel,
 	hv_vmbus_multipage_buffer*	multi_page_buffer,
 	void*				buffer,
 	uint32_t			buffer_len,
 	uint64_t			request_id)
 {
 
 	int			ret = 0;
 	uint32_t		desc_size;
+	boolean_t		need_sig;
 	uint32_t		packet_len;
 	uint32_t		packet_len_aligned;
 	uint32_t		pfn_count;
 	uint64_t		aligned_data = 0;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 	hv_vmbus_channel_packet_multipage_buffer desc;
 
 	pfn_count =
 	    HV_NUM_PAGES_SPANNED(
 		    multi_page_buffer->offset,
 		    multi_page_buffer->length);
 
 	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
 	    return (EINVAL);
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
 	 * is the largest size we support
 	 */
 	desc_size =
 	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
 		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
 			sizeof(uint64_t));
 	packet_len = desc_size + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/*
 	 * Setup the descriptor
 	 */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = 1;
 
 	desc.range.length = multi_page_buffer->length;
 	desc.range.offset = multi_page_buffer->offset;
 
 	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
 		pfn_count * sizeof(uint64_t));
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = desc_size;
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
-	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
-	if (ret == 0 &&
-	    !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+	if (ret == 0 && need_sig) {
 	    vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Retrieve the user packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet(
 	hv_vmbus_channel*	channel,
 	void*			Buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int			ret;
 	uint32_t		user_len;
 	uint32_t		packet_len;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 	if (ret != 0)
 		return (0);
 
 	packet_len = desc.length8 << 3;
 	user_len = packet_len - (desc.data_offset8 << 3);
 
 	*buffer_actual_len = user_len;
 
 	if (user_len > buffer_len)
 		return (EINVAL);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
 		(desc.data_offset8 << 3));
 
 	return (0);
 }
 
 /**
  * @brief Retrieve the raw packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet_raw(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int		ret;
 	uint32_t	packetLen;
 	uint32_t	userLen;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(
 		&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 
 	if (ret != 0)
 	    return (0);
 
 	packetLen = desc.length8 << 3;
 	userLen = packetLen - (desc.data_offset8 << 3);
 
 	*buffer_actual_len = packetLen;
 
 	if (packetLen > buffer_len)
 	    return (ENOBUFS);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the entire packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
 
 	return (0);
 }
Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 282211)
+++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 282212)
@@ -1,680 +1,863 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
 #include "hv_vmbus_priv.h"
 
 typedef void (*hv_pfn_channel_msg_handler)(hv_vmbus_channel_msg_header* msg);
 
 typedef struct hv_vmbus_channel_msg_table_entry {
 	hv_vmbus_channel_msg_type    messageType;
 	hv_pfn_channel_msg_handler   messageHandler;
 } hv_vmbus_channel_msg_table_entry;
 
 /*
  * Internal functions
  */
 
 static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_process_offer(void *context);
+struct hv_vmbus_channel*
+    vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
 /**
  * Channel message dispatch table
  */
 hv_vmbus_channel_msg_table_entry
     g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
 	{ HV_CHANNEL_MESSAGE_INVALID, NULL },
 	{ HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer },
 	{ HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
 		vmbus_channel_on_offer_rescind },
 	{ HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL },
 	{ HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
 		vmbus_channel_on_offers_delivered },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
 		vmbus_channel_on_open_result },
 	{ HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL },
 	{ HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_BODY, NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_CREATED,
 		vmbus_channel_on_gpadl_created },
 	{ HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
 		vmbus_channel_on_gpadl_torndown },
 	{ HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL },
 	{ HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL },
 	{ HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
 		vmbus_channel_on_version_response },
 	{ HV_CHANNEL_MESSAGE_UNLOAD, NULL }
 };
 
 
 /**
  * Implementation of the work abstraction.
  */
 static void
 work_item_callback(void *work, int pending)
 {
 	struct hv_work_item *w = (struct hv_work_item *)work;
 
 	/*
 	 * Serialize work execution.
 	 */
 	if (w->wq->work_sema != NULL) {
 		sema_wait(w->wq->work_sema);
 	}
 
 	w->callback(w->context);
 
 	if (w->wq->work_sema != NULL) {
 		sema_post(w->wq->work_sema);
 	} 
 
 	free(w, M_DEVBUF);
 }
 
 struct hv_work_queue*
 hv_work_queue_create(char* name)
 {
 	static unsigned int	qid = 0;
 	char			qname[64];
 	int			pri;
 	struct hv_work_queue*	wq;
 
 	wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
 	KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n"));
 	if (wq == NULL)
 	    return (NULL);
 
 	/*
 	 * We use work abstraction to handle messages
 	 * coming from the host and these are typically offers.
 	 * Some FreeBsd drivers appear to have a concurrency issue
 	 * where probe/attach needs to be serialized. We ensure that
 	 * by having only one thread process work elements in a 
 	 * specific queue by serializing work execution.
 	 *
 	 */
 	if (strcmp(name, "vmbusQ") == 0) {
 	    pri = PI_DISK;
 	} else { /* control */
 	    pri = PI_NET;
 	    /*
 	     * Initialize semaphore for this queue by pointing
 	     * to the globale semaphore used for synchronizing all
 	     * control messages.
 	     */
 	    wq->work_sema = &hv_vmbus_g_connection.control_sema;
 	}
 
 	sprintf(qname, "hv_%s_%u", name, qid);
 
 	/*
 	 * Fixme:  FreeBSD 8.2 has a different prototype for
 	 * taskqueue_create(), and for certain other taskqueue functions.
 	 * We need to research the implications of these changes.
 	 * Fixme:  Not sure when the changes were introduced.
 	 */
 	wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue,
 	    &wq->queue
 	    #if __FreeBSD_version < 800000
 	    , &wq->proc
 	    #endif
 	    );
 
 	if (wq->queue == NULL) {
 	    free(wq, M_DEVBUF);
 	    return (NULL);
 	}
 
 	if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) {
 	    taskqueue_free(wq->queue);
 	    free(wq, M_DEVBUF);
 	    return (NULL);
 	}
 
 	qid++;
 
 	return (wq);
 }
 
 void
 hv_work_queue_close(struct hv_work_queue *wq)
 {
 	/*
 	 * KYS: Need to drain the taskqueue
 	 * before we close the hv_work_queue.
 	 */
 	/*KYS: taskqueue_drain(wq->tq, ); */
 	taskqueue_free(wq->queue);
 	free(wq, M_DEVBUF);
 }
 
 /**
  * @brief Create work item
  */
 int
 hv_queue_work_item(
 	struct hv_work_queue *wq,
 	void (*callback)(void *), void *context)
 {
 	struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
 					M_DEVBUF, M_NOWAIT | M_ZERO);
 	KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
 	if (w == NULL)
 	    return (ENOMEM);
 
 	w->callback = callback;
 	w->context = context;
 	w->wq = wq;
 
 	TASK_INIT(&w->work, 0, work_item_callback, w);
 
 	return (taskqueue_enqueue(wq->queue, &w->work));
 }
 
 /**
  * @brief Rescind the offer by initiating a device removal
  */
 static void
 vmbus_channel_process_rescind_offer(void *context)
 {
 	hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
 	hv_vmbus_child_device_unregister(channel->device);
 }
 
 /**
  * @brief Allocate and initialize a vmbus channel object
  */
 hv_vmbus_channel*
 hv_vmbus_allocate_channel(void)
 {
 	hv_vmbus_channel* channel;
 
 	channel = (hv_vmbus_channel*) malloc(
 					sizeof(hv_vmbus_channel),
 					M_DEVBUF,
 					M_NOWAIT | M_ZERO);
 	KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!"));
 	if (channel == NULL)
 	    return (NULL);
 
 	mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
 
+	TAILQ_INIT(&channel->sc_list_anchor);
+
 	channel->control_work_queue = hv_work_queue_create("control");
 
 	if (channel->control_work_queue == NULL) {
 	    mtx_destroy(&channel->inbound_lock);
 	    free(channel, M_DEVBUF);
 	    return (NULL);
 	}
 
 	return (channel);
 }
 
 /**
  * @brief Release the vmbus channel object itself
  */
 static inline void
 ReleaseVmbusChannel(void *context)
 {
 	hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
 	hv_work_queue_close(channel->control_work_queue);
 	free(channel, M_DEVBUF);
 }
 
 /**
  * @brief Release the resources used by the vmbus channel object
  */
 void
 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
+	mtx_destroy(&channel->sc_lock);
 	mtx_destroy(&channel->inbound_lock);
 	/*
 	 * We have to release the channel's workqueue/thread in
 	 *  the vmbus's workqueue/thread context
 	 * ie we can't destroy ourselves
 	 */
 	hv_queue_work_item(hv_vmbus_g_connection.work_queue,
 	    ReleaseVmbusChannel, (void *) channel);
 }
 
 /**
  * @brief Process the offer by creating a channel/device
  * associated with this offer
  */
 static void
 vmbus_channel_process_offer(void *context)
 {
-	int			ret;
 	hv_vmbus_channel*	new_channel;
 	boolean_t		f_new;
 	hv_vmbus_channel*	channel;
+	int			ret;
 
 	new_channel = (hv_vmbus_channel*) context;
 	f_new = TRUE;
 	channel = NULL;
 
 	/*
 	 * Make sure this is a new offer
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
 	    list_entry)
 	{
-	    if (!memcmp(
-		&channel->offer_msg.offer.interface_type,
-		&new_channel->offer_msg.offer.interface_type,
-		sizeof(hv_guid))
-		&& !memcmp(
-		    &channel->offer_msg.offer.interface_instance,
+		if (memcmp(&channel->offer_msg.offer.interface_type,
+		    &new_channel->offer_msg.offer.interface_type,
+		    sizeof(hv_guid)) == 0 &&
+		    memcmp(&channel->offer_msg.offer.interface_instance,
 		    &new_channel->offer_msg.offer.interface_instance,
-		    sizeof(hv_guid))) {
-		f_new = FALSE;
-		break;
-	    }
+		    sizeof(hv_guid)) == 0) {
+			f_new = FALSE;
+			break;
+		}
 	}
 
 	if (f_new) {
-	    /* Insert at tail */
-	    TAILQ_INSERT_TAIL(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
+		/* Insert at tail */
+		TAILQ_INSERT_TAIL(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
+	/*XXX add new channel to percpu_list */
+
 	if (!f_new) {
+		/*
+		 * Check if this is a sub channel.
+		 */
+		if (new_channel->offer_msg.offer.sub_channel_index != 0) {
+			/*
+			 * It is a sub channel offer, process it.
+			 */
+			new_channel->primary_channel = channel;
+			mtx_lock(&channel->sc_lock);
+			TAILQ_INSERT_TAIL(
+			    &channel->sc_list_anchor,
+			    new_channel,
+			    sc_list_entry);
+			mtx_unlock(&channel->sc_lock);
+
+			/* Insert new channel into channel_anchor. */
+			printf("Storvsc get multi-channel offer, rel=%u.\n",
+			    new_channel->offer_msg.child_rel_id);	
+			mtx_lock(&hv_vmbus_g_connection.channel_lock);
+			TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
+			    new_channel, list_entry);				
+			mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+			if(bootverbose)
+				printf("VMBUS: new multi-channel offer <%p>.\n",
+				    new_channel);
+
+			/*XXX add it to percpu_list */
+
+			new_channel->state = HV_CHANNEL_OPEN_STATE;
+			if (channel->sc_creation_callback != NULL) {
+				channel->sc_creation_callback(new_channel);
+			}
+			return;
+		}
+
 	    hv_vmbus_free_vmbus_channel(new_channel);
 	    return;
 	}
 
+	new_channel->state = HV_CHANNEL_OPEN_STATE;
+
 	/*
 	 * Start the process of binding this offer to the driver
 	 * (We need to set the device field before calling
 	 * hv_vmbus_child_device_add())
 	 */
 	new_channel->device = hv_vmbus_child_device_create(
 	    new_channel->offer_msg.offer.interface_type,
 	    new_channel->offer_msg.offer.interface_instance, new_channel);
 
 	/*
-	 *  TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
-	 *  but in the "open" channel request. The ret != 0 logic below
-	 *  doesn't take into account that a channel
-	 *  may have been opened successfully
-	 */
-
-	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
 	ret = hv_vmbus_child_device_register(new_channel->device);
 	if (ret != 0) {
-	    mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
-	    TAILQ_REMOVE(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
-	    mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
-	    hv_vmbus_free_vmbus_channel(new_channel);
-	} else {
-	    /*
-	     * This state is used to indicate a successful open
-	     * so that when we do close the channel normally,
-	     * we can clean up properly
-	     */
-	    new_channel->state = HV_CHANNEL_OPEN_STATE;
+		mtx_lock(&hv_vmbus_g_connection.channel_lock);
+		TAILQ_REMOVE(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
+		mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+		hv_vmbus_free_vmbus_channel(new_channel);
+	}
+}
 
+/**
+ * Array of device guids that are performance critical. We try to distribute
+ * the interrupt load for these devices across all online cpus. 
+ */
+static const hv_guid high_perf_devices[] = {
+	{HV_NIC_GUID, },
+	{HV_IDE_GUID, },
+	{HV_SCSI_GUID, },
+};
+
+enum {
+	PERF_CHN_NIC = 0,
+	PERF_CHN_IDE,
+	PERF_CHN_SCSI,
+	MAX_PERF_CHN,
+};
+
+/*
+ * We use this static number to distribute the channel interrupt load.
+ */
+static uint32_t next_vcpu;
+
+/**
+ * Starting with Win8, we can statically distribute the incoming
+ * channel interrupt load by binding a channel to VCPU. We
+ * implement here a simple round robin scheme for distributing
+ * the interrupt load.
+ * We will bind channels that are not performance critical to cpu 0 and
+ * performance critical channels (IDE, SCSI and Network) will be uniformly
+ * distributed across all available CPUs.
+ */
+static void
+vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
+{
+	uint32_t current_cpu;
+	int i;
+	boolean_t is_perf_channel = FALSE;
+
+	for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
+		if (memcmp(guid->data, high_perf_devices[i].data,
+		    sizeof(hv_guid)) == 0) {
+			is_perf_channel = TRUE;
+			break;
+		}
 	}
+
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
+	    (!is_perf_channel)) {
+		/* Host's view of guest cpu */
+		channel->target_vcpu = 0;
+		/* Guest's own view of cpu */
+		channel->target_cpu = 0;
+		return;
+	}
+	/* mp_ncpus should have the number cpus currently online */
+	current_cpu = (++next_vcpu % mp_ncpus);
+	channel->target_cpu = current_cpu;
+	channel->target_vcpu =
+	    hv_vmbus_g_context.hv_vcpu_index[current_cpu];
+	if (bootverbose)
+		printf("VMBUS: Total online cpus %d, assign perf channel %d "
+		    "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
+		    current_cpu);
 }
 
 /**
  * @brief Handler for channel offers from Hyper-V/Azure
  *
  * Handler for channel offers from vmbus in parent partition. We ignore
  * all offers except network and storage offers. For each network and storage
  * offers, we create a channel object and queue a work item to the channel
  * object to process the offer synchronously
  */
 static void
 vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_offer_channel* offer;
 	hv_vmbus_channel* new_channel;
 
 	offer = (hv_vmbus_channel_offer_channel*) hdr;
 
 	hv_guid *guidType;
 	hv_guid *guidInstance;
 
 	guidType = &offer->offer.interface_type;
 	guidInstance = &offer->offer.interface_instance;
 
 	/* Allocate the channel object and save this offer */
 	new_channel = hv_vmbus_allocate_channel();
 	if (new_channel == NULL)
 	    return;
 
+	/*
+	 * By default we setup state to enable batched
+	 * reading. A specific service can choose to
+	 * disable this prior to opening the channel.
+	 */
+	new_channel->batched_reading = TRUE;
+
+	new_channel->signal_event_param =
+	    (hv_vmbus_input_signal_event *)
+	    (HV_ALIGN_UP((unsigned long)
+		&new_channel->signal_event_buffer,
+		HV_HYPERCALL_PARAM_ALIGN));
+
+ 	new_channel->signal_event_param->connection_id.as_uint32_t = 0;	
+	new_channel->signal_event_param->connection_id.u.id =
+	    HV_VMBUS_EVENT_CONNECTION_ID;
+	new_channel->signal_event_param->flag_number = 0;
+	new_channel->signal_event_param->rsvd_z = 0;
+
+	if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
+		new_channel->is_dedicated_interrupt =
+		    (offer->is_dedicated_interrupt != 0);
+		new_channel->signal_event_param->connection_id.u.id =
+		    offer->connection_id;
+	}
+
+	/*
+	 * Bind the channel to a chosen cpu.
+	 */
+	vmbus_channel_select_cpu(new_channel,
+	    &offer->offer.interface_type);
+
 	memcpy(&new_channel->offer_msg, offer,
 	    sizeof(hv_vmbus_channel_offer_channel));
 	new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
 	new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
 
 	/* TODO: Make sure the offer comes from our parent partition */
 	hv_queue_work_item(
 	    new_channel->control_work_queue,
 	    vmbus_channel_process_offer,
 	    new_channel);
 }
 
 /**
  * @brief Rescind offer handler.
  *
  * We queue a work item to process this offer
  * synchronously
  */
 static void
 vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_rescind_offer*	rescind;
 	hv_vmbus_channel*		channel;
 
 	rescind = (hv_vmbus_channel_rescind_offer*) hdr;
 
 	channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id);
 	if (channel == NULL) 
 	    return;
 
 	hv_queue_work_item(channel->control_work_queue,
 	    vmbus_channel_process_rescind_offer, channel);
 }
 
 /**
  *
  * @brief Invoked when all offers have been delivered.
  */
 static void
 vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
 {
 }
 
 /**
  * @brief Open result handler.
  *
  * This is invoked when we received a response
  * to our channel open request. Find the matching request, copy the
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_open_result*	result;
 	hv_vmbus_channel_msg_info*	msg_info;
 	hv_vmbus_channel_msg_header*	requestHeader;
 	hv_vmbus_channel_open_channel*	openMsg;
 
 	result = (hv_vmbus_channel_open_result*) hdr;
 
 	/*
 	 * Find the open msg, copy the result and signal/unblock the wait event
 	 */
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 	    msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 
 	    if (requestHeader->message_type ==
 		    HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
 		openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
 		if (openMsg->child_rel_id == result->child_rel_id
 		    && openMsg->open_id == result->open_id) {
 		    memcpy(&msg_info->response.open_result, result,
 			sizeof(hv_vmbus_channel_open_result));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 }
 
 /**
  * @brief GPADL created handler.
  *
  * This is invoked when we received a response
  * to our gpadl create request. Find the matching request, copy the
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_gpadl_created*		gpadl_created;
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		request_header;
 	hv_vmbus_channel_gpadl_header*		gpadl_header;
 
 	gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
 
 	/* Find the establish msg, copy the result and signal/unblock
 	 * the wait event
 	 */
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 		msg_list_entry) {
 	    request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	    if (request_header->message_type ==
 		    HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
 		gpadl_header =
 		    (hv_vmbus_channel_gpadl_header*) request_header;
 
 		if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
 		    && (gpadl_created->gpadl == gpadl_header->gpadl)) {
 		    memcpy(&msg_info->response.gpadl_created,
 			gpadl_created,
 			sizeof(hv_vmbus_channel_gpadl_created));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 }
 
 /**
  * @brief GPADL torndown handler.
  *
  * This is invoked when we received a respons
  * to our gpadl teardown request. Find the matching request, copy the
  * response and signal the requesting thread
  */
 static void
 vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_gpadl_torndown*	gpadl_torndown;
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		requestHeader;
 	hv_vmbus_channel_gpadl_teardown*	gpadlTeardown;
 
 	gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
 
 	/*
 	 * Find the open msg, copy the result and signal/unblock the
 	 * wait event.
 	 */
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 		msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 
 	    if (requestHeader->message_type
 		    == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
 		gpadlTeardown =
 		    (hv_vmbus_channel_gpadl_teardown*) requestHeader;
 
 		if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
 		    memcpy(&msg_info->response.gpadl_torndown,
 			gpadl_torndown,
 			sizeof(hv_vmbus_channel_gpadl_torndown));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
     mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 }
 
 /**
  * @brief Version response handler.
  *
  * This is invoked when we received a response
  * to our initiate contact request. Find the matching request, copy th
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		requestHeader;
 	hv_vmbus_channel_initiate_contact*	initiate;
 	hv_vmbus_channel_version_response*	versionResponse;
 
 	versionResponse = (hv_vmbus_channel_version_response*)hdr;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 	    msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	    if (requestHeader->message_type
 		== HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
 		initiate =
 		    (hv_vmbus_channel_initiate_contact*) requestHeader;
 		memcpy(&msg_info->response.version_response,
 		    versionResponse,
 		    sizeof(hv_vmbus_channel_version_response));
 		sema_post(&msg_info->wait_sema);
 	    }
 	}
     mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 }
 
 /**
  * @brief Handler for channel protocol messages.
  *
  * This is invoked in the vmbus worker thread context.
  */
 void
 hv_vmbus_on_channel_message(void *context)
 {
 	hv_vmbus_message*		msg;
 	hv_vmbus_channel_msg_header*	hdr;
 	int				size;
 
 	msg = (hv_vmbus_message*) context;
 	hdr = (hv_vmbus_channel_msg_header*) msg->u.payload;
 	size = msg->header.payload_size;
 
 	if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) {
 	    free(msg, M_DEVBUF);
 	    return;
 	}
 
 	if (g_channel_message_table[hdr->message_type].messageHandler) {
 	    g_channel_message_table[hdr->message_type].messageHandler(hdr);
 	}
 
 	/* Free the msg that was allocated in VmbusOnMsgDPC() */
 	free(msg, M_DEVBUF);
 }
 
 /**
  *  @brief Send a request to get all our pending offers.
  */
 int
 hv_vmbus_request_channel_offers(void)
 {
 	int				ret;
 	hv_vmbus_channel_msg_header*	msg;
 	hv_vmbus_channel_msg_info*	msg_info;
 
 	msg_info = (hv_vmbus_channel_msg_info *)
 	    malloc(sizeof(hv_vmbus_channel_msg_info)
 		    + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
 
 	if (msg_info == NULL) {
 	    if(bootverbose)
 		printf("Error VMBUS: malloc failed for Request Offers\n");
 	    return (ENOMEM);
 	}
 
 	msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
 
 	ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
 
 	if (msg_info)
 	    free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
  */
 void
 hv_vmbus_release_unattached_channels(void) 
 {
 	hv_vmbus_channel *channel;
 
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
 	    channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
 	    TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
 			    channel, list_entry);
 
 	    hv_vmbus_child_device_unregister(channel->device);
 	    hv_vmbus_free_vmbus_channel(channel);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+/**
+ * @brief Select the best outgoing channel
+ * 
+ * The channel whose vcpu binding is closest to the currect vcpu will
+ * be selected.
+ * If no multi-channel, always select primary channel
+ * 
+ * @param primary - primary channel
+ */
+struct hv_vmbus_channel *
+vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
+{
+	hv_vmbus_channel *new_channel = NULL;
+	hv_vmbus_channel *outgoing_channel = primary;
+	int old_cpu_distance = 0;
+	int new_cpu_distance = 0;
+	int cur_vcpu = 0;
+	int smp_pro_id = PCPU_GET(cpuid);
+
+	if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
+		return outgoing_channel;
+	}
+
+	if (smp_pro_id >= MAXCPU) {
+		return outgoing_channel;
+	}
+
+	cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
+	
+	TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
+		if (new_channel->state != HV_CHANNEL_OPENED_STATE){
+			continue;
+		}
+
+		if (new_channel->target_vcpu == cur_vcpu){
+			return new_channel;
+		}
+
+		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
+		    (outgoing_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - outgoing_channel->target_vcpu));
+
+		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
+		    (new_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - new_channel->target_vcpu));
+
+		if (old_cpu_distance < new_cpu_distance) {
+			continue;
+		}
+
+		outgoing_channel = new_channel;
+	}
+
+	return(outgoing_channel);
 }

Property changes on: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
___________________________________________________________________
Added: fbsd:nokeywords
## -0,0 +1 ##
+yes
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/dev/hyperv/vmbus/hv_connection.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_connection.c	(revision 282211)
+++ head/sys/dev/hyperv/vmbus/hv_connection.c	(revision 282212)
@@ -1,431 +1,559 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "hv_vmbus_priv.h"
 
 /*
  * Globals
  */
 hv_vmbus_connection hv_vmbus_g_connection =
 	{ .connect_state = HV_DISCONNECTED,
 	  .next_gpadl_handle = 0xE1E10, };
 
+uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
+
+static uint32_t
+hv_vmbus_get_next_version(uint32_t current_ver)
+{
+	switch (current_ver) {
+	case (HV_VMBUS_VERSION_WIN7):
+		return(HV_VMBUS_VERSION_WS2008);
+
+	case (HV_VMBUS_VERSION_WIN8):
+		return(HV_VMBUS_VERSION_WIN7);
+
+	case (HV_VMBUS_VERSION_WIN8_1):
+		return(HV_VMBUS_VERSION_WIN8);
+
+	case (HV_VMBUS_VERSION_WS2008):
+	default:
+		return(HV_VMBUS_VERSION_INVALID);
+	}
+}
+
 /**
+ * Negotiate the highest supported hypervisor version.
+ */
+static int
+hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
+	uint32_t version)
+{
+	int					ret = 0;
+	hv_vmbus_channel_initiate_contact	*msg;
+
+	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
+	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
+
+	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
+	msg->vmbus_version_requested = version;
+
+	msg->interrupt_page = hv_get_phys_addr(
+		hv_vmbus_g_connection.interrupt_page);
+
+	msg->monitor_page_1 = hv_get_phys_addr(
+		hv_vmbus_g_connection.monitor_pages);
+
+	msg->monitor_page_2 =
+		hv_get_phys_addr(
+			((uint8_t *) hv_vmbus_g_connection.monitor_pages
+			+ PAGE_SIZE));
+
+	/**
+	 * Add to list before we send the request since we may receive the
+	 * response before returning from this routine
+	 */
+	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+	TAILQ_INSERT_TAIL(
+		&hv_vmbus_g_connection.channel_msg_anchor,
+		msg_info,
+		msg_list_entry);
+
+	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+	ret = hv_vmbus_post_message(
+		msg,
+		sizeof(hv_vmbus_channel_initiate_contact));
+
+	if (ret != 0) {
+		mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+		TAILQ_REMOVE(
+			&hv_vmbus_g_connection.channel_msg_anchor,
+			msg_info,
+			msg_list_entry);
+		mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+		return (ret);
+	}
+
+	/**
+	 * Wait for the connection response
+	 */
+	ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
+
+	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+	TAILQ_REMOVE(
+		&hv_vmbus_g_connection.channel_msg_anchor,
+		msg_info,
+		msg_list_entry);
+	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+	/**
+	 * Check if successful
+	 */
+	if (msg_info->response.version_response.version_supported) {
+		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
+	} else {
+		ret = ECONNREFUSED;
+	}
+
+	return (ret);
+}
+
+/**
  * Send a connect request on the partition service connection
  */
 int
 hv_vmbus_connect(void) {
 	int					ret = 0;
+	uint32_t				version;
 	hv_vmbus_channel_msg_info*		msg_info = NULL;
-	hv_vmbus_channel_initiate_contact*	msg;
 
 	/**
 	 * Make sure we are not connecting or connected
 	 */
 	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
 		return (-1);
 	}
 
 	/**
 	 * Initialize the vmbus connection
 	 */
 	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
 	hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
 	sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
 
 	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
 	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
 		NULL, MTX_SPIN);
 
 	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
 	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
-		NULL, MTX_SPIN);
+		NULL, MTX_DEF);
 
 	/**
 	 * Setup the vmbus event connection for channel interrupt abstraction
 	 * stuff
 	 */
 	hv_vmbus_g_connection.interrupt_page = contigmalloc(
 					PAGE_SIZE, M_DEVBUF,
 					M_NOWAIT | M_ZERO, 0UL,
 					BUS_SPACE_MAXADDR,
 					PAGE_SIZE, 0);
 	KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
 	    ("Error VMBUS: malloc failed to allocate Channel"
 		" Request Event message!"));
 	if (hv_vmbus_g_connection.interrupt_page == NULL) {
 	    ret = ENOMEM;
 	    goto cleanup;
 	}
 
 	hv_vmbus_g_connection.recv_interrupt_page =
 		hv_vmbus_g_connection.interrupt_page;
 
 	hv_vmbus_g_connection.send_interrupt_page =
 		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
 		    (PAGE_SIZE >> 1));
 
 	/**
 	 * Set up the monitor notification facility. The 1st page for
 	 * parent->child and the 2nd page for child->parent
 	 */
 	hv_vmbus_g_connection.monitor_pages = contigmalloc(
 		2 * PAGE_SIZE,
 		M_DEVBUF,
 		M_NOWAIT | M_ZERO,
 		0UL,
 		BUS_SPACE_MAXADDR,
 		PAGE_SIZE,
 		0);
 	KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
 	    ("Error VMBUS: malloc failed to allocate Monitor Pages!"));
 	if (hv_vmbus_g_connection.monitor_pages == NULL) {
 	    ret = ENOMEM;
 	    goto cleanup;
 	}
 
 	msg_info = (hv_vmbus_channel_msg_info*)
 		malloc(sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_initiate_contact),
 			M_DEVBUF, M_NOWAIT | M_ZERO);
 	KASSERT(msg_info != NULL,
 	    ("Error VMBUS: malloc failed for Initiate Contact message!"));
 	if (msg_info == NULL) {
 	    ret = ENOMEM;
 	    goto cleanup;
 	}
 
-	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
-	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
-
-	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
-	msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER;
-
-	msg->interrupt_page = hv_get_phys_addr(
-		hv_vmbus_g_connection.interrupt_page);
-
-	msg->monitor_page_1 = hv_get_phys_addr(
-		hv_vmbus_g_connection.monitor_pages);
-
-	msg->monitor_page_2 =
-		hv_get_phys_addr(
-			((uint8_t *) hv_vmbus_g_connection.monitor_pages
-			+ PAGE_SIZE));
-
-	/**
-	 * Add to list before we send the request since we may receive the
-	 * response before returning from this routine
+	/*
+	 * Find the highest vmbus version number we can support.
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+	version = HV_VMBUS_VERSION_CURRENT;
 
-	TAILQ_INSERT_TAIL(
-		&hv_vmbus_g_connection.channel_msg_anchor,
-		msg_info,
-		msg_list_entry);
+	do {
+		ret = hv_vmbus_negotiate_version(msg_info, version);
+		if (ret == EWOULDBLOCK) {
+			/*
+			 * We timed out.
+			 */
+			goto cleanup;
+		}
 
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
+			break;
 
-	ret = hv_vmbus_post_message(
-		msg,
-		sizeof(hv_vmbus_channel_initiate_contact));
+		version = hv_vmbus_get_next_version(version);
+	} while (version != HV_VMBUS_VERSION_INVALID);
 
-	if (ret != 0) {
-		mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-		TAILQ_REMOVE(
-			&hv_vmbus_g_connection.channel_msg_anchor,
-			msg_info,
-			msg_list_entry);
-		mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-		goto cleanup;
-	}
+	hv_vmbus_protocal_version = version;
+	if (bootverbose)
+		printf("VMBUS: Portocal Version: %d.%d\n",
+		    version >> 16, version & 0xFFFF);
 
-	/**
-	 * Wait for the connection response
-	 */
-	ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
-
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-	TAILQ_REMOVE(
-		&hv_vmbus_g_connection.channel_msg_anchor,
-		msg_info,
-		msg_list_entry);
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-
-	/**
-	 * Check if successful
-	 */
-	if (msg_info->response.version_response.version_supported) {
-		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
-	} else {
-		ret = ECONNREFUSED;
-		goto cleanup;
-	}
-
 	sema_destroy(&msg_info->wait_sema);
 	free(msg_info, M_DEVBUF);
 
 	return (0);
 
 	/*
 	 * Cleanup after failure!
 	 */
 	cleanup:
 
 	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
 
 	hv_work_queue_close(hv_vmbus_g_connection.work_queue);
 	sema_destroy(&hv_vmbus_g_connection.control_sema);
 	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
 	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
 
 	if (hv_vmbus_g_connection.interrupt_page != NULL) {
 		contigfree(
 			hv_vmbus_g_connection.interrupt_page,
 			PAGE_SIZE,
 			M_DEVBUF);
 		hv_vmbus_g_connection.interrupt_page = NULL;
 	}
 
 	if (hv_vmbus_g_connection.monitor_pages != NULL) {
 		contigfree(
 			hv_vmbus_g_connection.monitor_pages,
 			2 * PAGE_SIZE,
 			M_DEVBUF);
 		hv_vmbus_g_connection.monitor_pages = NULL;
 	}
 
 	if (msg_info) {
 		sema_destroy(&msg_info->wait_sema);
 		free(msg_info, M_DEVBUF);
 	}
 
 	return (ret);
 }
 
 /**
  * Send a disconnect request on the partition service connection
  */
 int
 hv_vmbus_disconnect(void) {
 	int			 ret = 0;
 	hv_vmbus_channel_unload* msg;
 
 	msg = malloc(sizeof(hv_vmbus_channel_unload),
 	    M_DEVBUF, M_NOWAIT | M_ZERO);
 	KASSERT(msg != NULL,
 	    ("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
 	if (msg == NULL)
 	    return (ENOMEM);
 
 	msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
 
 	ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
 
 
 	contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
 
 	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
 
 	hv_work_queue_close(hv_vmbus_g_connection.work_queue);
 	sema_destroy(&hv_vmbus_g_connection.control_sema);
 
 	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
 
 	free(msg, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * Get the channel object given its child relative id (ie channel id)
  */
 hv_vmbus_channel*
 hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
 
 	hv_vmbus_channel* channel;
 	hv_vmbus_channel* foundChannel = NULL;
 
 	/*
 	 * TODO:
 	 * Consider optimization where relids are stored in a fixed size array
 	 *  and channels are accessed without the need to take this lock or search
 	 *  the list.
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 	TAILQ_FOREACH(channel,
 		&hv_vmbus_g_connection.channel_anchor, list_entry) {
 
 	    if (channel->offer_msg.child_rel_id == rel_id) {
 		foundChannel = channel;
 		break;
 	    }
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 	return (foundChannel);
 }
 
 /**
  * Process a channel event notification
  */
 static void
 VmbusProcessChannelEvent(uint32_t relid) 
 {
+	void* arg;
+	uint32_t bytes_to_read;
 	hv_vmbus_channel* channel;
+	boolean_t is_batched_reading;
 
 	/**
 	 * Find the channel based on this relid and invokes
 	 * the channel callback to process the event
 	 */
 
 	channel = hv_vmbus_get_channel_from_rel_id(relid);
 
 	if (channel == NULL) {
 		return;
 	}
 	/**
 	 * To deal with the race condition where we might
 	 * receive a packet while the relevant driver is 
 	 * being unloaded, dispatch the callback while 
 	 * holding the channel lock. The unloading driver
 	 * will acquire the same channel lock to set the
 	 * callback to NULL. This closes the window.
 	 */
 
-	mtx_lock(&channel->inbound_lock);
+	/*
+	 * Disable the lock due to newly added WITNESS check in r277723.
+	 * Will seek other way to avoid race condition.
+	 * -- whu
+	 */
+	// mtx_lock(&channel->inbound_lock);
 	if (channel->on_channel_callback != NULL) {
-		channel->on_channel_callback(channel->channel_callback_context);
+		arg = channel->channel_callback_context;
+		is_batched_reading = channel->batched_reading;
+		/*
+		 * Optimize host to guest signaling by ensuring:
+		 * 1. While reading the channel, we disable interrupts from
+		 *    host.
+		 * 2. Ensure that we process all posted messages from the host
+		 *    before returning from this callback.
+		 * 3. Once we return, enable signaling from the host. Once this
+		 *    state is set we check to see if additional packets are
+		 *    available to read. In this case we repeat the process.
+		 */
+		do {
+			if (is_batched_reading)
+				hv_ring_buffer_read_begin(&channel->inbound);
+
+			channel->on_channel_callback(arg);
+
+			if (is_batched_reading)
+				bytes_to_read =
+				    hv_ring_buffer_read_end(&channel->inbound);
+			else
+				bytes_to_read = 0;
+		} while (is_batched_reading && (bytes_to_read != 0));
 	}
-	mtx_unlock(&channel->inbound_lock);
+	// mtx_unlock(&channel->inbound_lock);
 }
 
+#ifdef HV_DEBUG_INTR
+extern uint32_t hv_intr_count;
+extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
+extern uint32_t hv_vmbus_intr_cpu[MAXCPU];
+#endif
+
 /**
  * Handler for events
  */
 void
 hv_vmbus_on_events(void *arg) 
 {
-	int dword;
 	int bit;
+	int cpu;
+	int dword;
+	void *page_addr;
+	uint32_t* recv_interrupt_page = NULL;
 	int rel_id;
-	int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+	int maxdword;
+	hv_vmbus_synic_event_flags *event;
 	/* int maxdword = PAGE_SIZE >> 3; */
 
-	/*
-	 * receive size is 1/2 page and divide that by 4 bytes
-	 */
+	cpu = (int)(long)arg;
+	KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
+	    "cpu out of range!"));
 
-	uint32_t* recv_interrupt_page =
-	    hv_vmbus_g_connection.recv_interrupt_page;
+#ifdef HV_DEBUG_INTR
+	int i;
+	hv_vmbus_swintr_event_cpu[cpu]++;
+	if (hv_intr_count % 10000 == 0) {
+                printf("VMBUS: Total interrupt %d\n", hv_intr_count);
+                for (i = 0; i < mp_ncpus; i++)
+                        printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n",
+			    i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]);
+        }
+#endif
 
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
+		maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+		/*
+		 * receive size is 1/2 page and divide that by 4 bytes
+		 */
+		recv_interrupt_page =
+		    hv_vmbus_g_connection.recv_interrupt_page;
+	} else {
+		/*
+		 * On Host with Win8 or above, the event page can be
+		 * checked directly to get the id of the channel
+		 * that has the pending interrupt.
+		 */
+		maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
+		page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
+		event = (hv_vmbus_synic_event_flags *)
+		    page_addr + HV_VMBUS_MESSAGE_SINT;
+		recv_interrupt_page = event->flags32;
+	}
+
 	/*
 	 * Check events
 	 */
 	if (recv_interrupt_page != NULL) {
 	    for (dword = 0; dword < maxdword; dword++) {
 		if (recv_interrupt_page[dword]) {
 		    for (bit = 0; bit < 32; bit++) {
 			if (synch_test_and_clear_bit(bit,
 			    (uint32_t *) &recv_interrupt_page[dword])) {
 			    rel_id = (dword << 5) + bit;
 			    if (rel_id == 0) {
 				/*
 				 * Special case -
 				 * vmbus channel protocol msg.
 				 */
 				continue;
 			    } else {
 				VmbusProcessChannelEvent(rel_id);
 
 			    }
 			}
 		    }
 		}
 	    }
 	}
 
 	return;
 }
 
 /**
  * Send a msg on the vmbus's message connection
  */
 int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
 	int ret = 0;
 	hv_vmbus_connection_id connId;
 	unsigned retries = 0;
 
 	/* NetScaler delays from previous code were consolidated here */
 	static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000};
 
 	/* for(each entry in delayAmount) try to post message,
 	 *  delay a little bit before retrying
 	 */
 	for (retries = 0;
 	    retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) {
 	    connId.as_uint32_t = 0;
 	    connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
 	    ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen);
 	    if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
 		break;
 	    /* TODO: KYS We should use a blocking wait call */
 	    DELAY(delayAmount[retries]);
 	}
 
 	KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n"));
 
 	return (ret);
 }
 
 /**
  * Send an event notification to the parent
  */
 int
-hv_vmbus_set_event(uint32_t child_rel_id) {
+hv_vmbus_set_event(hv_vmbus_channel *channel) {
 	int ret = 0;
+	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
 
 	/* Each uint32_t represents 32 channels */
 
 	synch_set_bit(child_rel_id & 31,
 		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
 			+ (child_rel_id >> 5))));
-	ret = hv_vmbus_signal_event();
+	ret = hv_vmbus_signal_event(channel->signal_event_param);
 
 	return (ret);
 }
-

Property changes on: head/sys/dev/hyperv/vmbus/hv_connection.c
___________________________________________________________________
Added: fbsd:nokeywords
## -0,0 +1 ##
+yes
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/dev/hyperv/vmbus/hv_hv.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_hv.c	(revision 282211)
+++ head/sys/dev/hyperv/vmbus/hv_hv.c	(revision 282212)
@@ -1,504 +1,470 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * Implements low-level interactions with Hypver-V/Azure
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/timetc.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 
 #include "hv_vmbus_priv.h"
 
 #define HV_X64_MSR_GUEST_OS_ID		0x40000000
 
 #define HV_X64_CPUID_MIN		0x40000005
 #define HV_X64_CPUID_MAX		0x4000ffff
 #define HV_X64_MSR_TIME_REF_COUNT	0x40000020
 
 #define HV_NANOSECONDS_PER_SEC		1000000000L
 
 
 static u_int hv_get_timecount(struct timecounter *tc);
 
 static inline void do_cpuid_inline(unsigned int op, unsigned int *eax,
 	unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
 	__asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx),
 			     "=d" (*edx) : "0" (op), "c" (ecx));
 }
 
 /**
  * Globals
  */
 hv_vmbus_context hv_vmbus_g_context = {
 	.syn_ic_initialized = FALSE,
 	.hypercall_page = NULL,
-	.signal_event_param = NULL,
-	.signal_event_buffer = NULL,
 };
 
 static struct timecounter hv_timecounter = {
 	hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100
 };
 
 static u_int
 hv_get_timecount(struct timecounter *tc)
 {
 	u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
 	return (now);
 }
 
 /**
  * @brief Query the cpuid for presence of windows hypervisor
  */
 int
 hv_vmbus_query_hypervisor_presence(void) 
 {
 	u_int regs[4];
 	int hyper_v_detected = 0;
 
 	/*
 	 * When Xen is detected and native Xen PV support is enabled,
 	 * ignore Xen's HyperV emulation.
 	 */
 	if (vm_guest == VM_GUEST_XEN)
 		return (0);
 
 	do_cpuid(1, regs);
 	if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */
 		/* make sure this really is Hyper-V */
 		/* we look at the CPUID info */
 		do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs);
 		hyper_v_detected =
 				regs[0] >= HV_X64_CPUID_MIN &&
 				regs[0] <= HV_X64_CPUID_MAX &&
 				!memcmp("Microsoft Hv", &regs[1], 12);
 	}
 	return (hyper_v_detected);
 }
 
 /**
  * @brief Get version of the windows hypervisor
  */
 static int
 hv_vmbus_get_hypervisor_version(void) 
 {
 	unsigned int eax;
 	unsigned int ebx;
 	unsigned int ecx;
 	unsigned int edx;
 	unsigned int maxLeaf;
 	unsigned int op;
 
 	/*
 	 * Its assumed that this is called after confirming that
 	 * Viridian is present
 	 * Query id and revision.
 	 */
 	eax = 0;
 	ebx = 0;
 	ecx = 0;
 	edx = 0;
 	op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
 	do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
 
 	maxLeaf = eax;
 	eax = 0;
 	ebx = 0;
 	ecx = 0;
 	edx = 0;
 	op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
 	do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
 
 	if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) {
 	    eax = 0;
 	    ebx = 0;
 	    ecx = 0;
 	    edx = 0;
 	    op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
 	    do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
 	}
 	return (maxLeaf);
 }
 
 /**
  * @brief Invoke the specified hypercall
  */
 static uint64_t
 hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
 {
 #ifdef __x86_64__
 	uint64_t hv_status = 0;
 	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
 	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
 	volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
 
 	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8");
 	__asm__ __volatile__ ("call *%3" : "=a"(hv_status):
 				"c" (control), "d" (input_address),
 				"m" (hypercall_page));
 	return (hv_status);
 #else
 	uint32_t control_high = control >> 32;
 	uint32_t control_low = control & 0xFFFFFFFF;
 	uint32_t hv_status_high = 1;
 	uint32_t hv_status_low = 1;
 	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
 	uint32_t input_address_high = input_address >> 32;
 	uint32_t input_address_low = input_address & 0xFFFFFFFF;
 	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
 	uint32_t output_address_high = output_address >> 32;
 	uint32_t output_address_low = output_address & 0xFFFFFFFF;
 	volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
 
 	__asm__ __volatile__ ("call *%8" : "=d"(hv_status_high),
 				"=a"(hv_status_low) : "d" (control_high),
 				"a" (control_low), "b" (input_address_high),
 				"c" (input_address_low),
 				"D"(output_address_high),
 				"S"(output_address_low), "m" (hypercall_page));
 	return (hv_status_low | ((uint64_t)hv_status_high << 32));
 #endif /* __x86_64__ */
 }
 
 /**
  *  @brief Main initialization routine.
  *
  *  This routine must be called
  *  before any other routines in here are called
  */
 int
 hv_vmbus_init(void) 
 {
 	int					max_leaf;
 	hv_vmbus_x64_msr_hypercall_contents	hypercall_msr;
 	void* 					virt_addr = 0;
 
 	memset(
 	    hv_vmbus_g_context.syn_ic_event_page,
 	    0,
 	    sizeof(hv_vmbus_handle) * MAXCPU);
 
 	memset(
 	    hv_vmbus_g_context.syn_ic_msg_page,
 	    0,
 	    sizeof(hv_vmbus_handle) * MAXCPU);
 
 	if (vm_guest != VM_GUEST_HV)
 	    goto cleanup;
 
 	max_leaf = hv_vmbus_get_hypervisor_version();
 
 	/*
 	 * Write our OS info
 	 */
 	uint64_t os_guest_info = HV_FREEBSD_GUEST_ID;
 	wrmsr(HV_X64_MSR_GUEST_OS_ID, os_guest_info);
 	hv_vmbus_g_context.guest_id = os_guest_info;
 
 	/*
 	 * See if the hypercall page is already set
 	 */
 	hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
 	virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
 	KASSERT(virt_addr != NULL,
 	    ("Error VMBUS: malloc failed to allocate page during init!"));
 	if (virt_addr == NULL)
 	    goto cleanup;
 
 	hypercall_msr.u.enable = 1;
 	hypercall_msr.u.guest_physical_address =
 	    (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT);
 	wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t);
 
 	/*
 	 * Confirm that hypercall page did get set up
 	 */
 	hypercall_msr.as_uint64_t = 0;
 	hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
 
 	if (!hypercall_msr.u.enable)
 	    goto cleanup;
 
 	hv_vmbus_g_context.hypercall_page = virt_addr;
 
-	/*
-	 * Setup the global signal event param for the signal event hypercall
-	 */
-	hv_vmbus_g_context.signal_event_buffer =
-	    malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF,
-		M_ZERO | M_NOWAIT);
-	KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL,
-	    ("Error VMBUS: Failed to allocate signal_event_buffer\n"));
-	if (hv_vmbus_g_context.signal_event_buffer == NULL)
-	    goto cleanup;
-
-	hv_vmbus_g_context.signal_event_param =
-	    (hv_vmbus_input_signal_event*)
-	    (HV_ALIGN_UP((unsigned long)
-		hv_vmbus_g_context.signal_event_buffer,
-		HV_HYPERCALL_PARAM_ALIGN));
-	hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0;
-	hv_vmbus_g_context.signal_event_param->connection_id.u.id =
-	    HV_VMBUS_EVENT_CONNECTION_ID;
-	hv_vmbus_g_context.signal_event_param->flag_number = 0;
-	hv_vmbus_g_context.signal_event_param->rsvd_z = 0;
-	
 	tc_init(&hv_timecounter); /* register virtual timecount */
 	
 	return (0);
 
 	cleanup:
 	if (virt_addr != NULL) {
 	    if (hypercall_msr.u.enable) {
 		hypercall_msr.as_uint64_t = 0;
 		wrmsr(HV_X64_MSR_HYPERCALL,
 					hypercall_msr.as_uint64_t);
 	    }
 
 	    free(virt_addr, M_DEVBUF);
 	}
 	return (ENOTSUP);
 }
 
 /**
  * @brief Cleanup routine, called normally during driver unloading or exiting
  */
 void
 hv_vmbus_cleanup(void) 
 {
 	hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
 
-	if (hv_vmbus_g_context.signal_event_buffer != NULL) {
-	    free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF);
-	    hv_vmbus_g_context.signal_event_buffer = NULL;
-	    hv_vmbus_g_context.signal_event_param = NULL;
-	}
-
 	if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
 	    if (hv_vmbus_g_context.hypercall_page != NULL) {
 		hypercall_msr.as_uint64_t = 0;
 		wrmsr(HV_X64_MSR_HYPERCALL,
 					hypercall_msr.as_uint64_t);
 		free(hv_vmbus_g_context.hypercall_page, M_DEVBUF);
 		hv_vmbus_g_context.hypercall_page = NULL;
 	    }
 	}
 }
 
 /**
  * @brief Post a message using the hypervisor message IPC.
  * (This involves a hypercall.)
  */
 hv_vmbus_status
 hv_vmbus_post_msg_via_msg_ipc(
 	hv_vmbus_connection_id	connection_id,
 	hv_vmbus_msg_type	message_type,
 	void*			payload,
 	size_t			payload_size)
 {
 	struct alignedinput {
 	    uint64_t alignment8;
 	    hv_vmbus_input_post_message msg;
 	};
 
 	hv_vmbus_input_post_message*	aligned_msg;
 	hv_vmbus_status 		status;
 	size_t				addr;
 
 	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
 	    return (EMSGSIZE);
 
 	addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF,
 			    M_ZERO | M_NOWAIT);
 	KASSERT(addr != 0,
 	    ("Error VMBUS: malloc failed to allocate message buffer!"));
 	if (addr == 0)
 	    return (ENOMEM);
 
 	aligned_msg = (hv_vmbus_input_post_message*)
 	    (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN));
 
 	aligned_msg->connection_id = connection_id;
 	aligned_msg->message_type = message_type;
 	aligned_msg->payload_size = payload_size;
 	memcpy((void*) aligned_msg->payload, payload, payload_size);
 
 	status = hv_vmbus_do_hypercall(
 		    HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF;
 
 	free((void *) addr, M_DEVBUF);
 	return (status);
 }
 
 /**
  * @brief Signal an event on the specified connection using the hypervisor
  * event IPC. (This involves a hypercall.)
  */
 hv_vmbus_status
-hv_vmbus_signal_event()
+hv_vmbus_signal_event(void *con_id)
 {
 	hv_vmbus_status status;
 
 	status = hv_vmbus_do_hypercall(
 		    HV_CALL_SIGNAL_EVENT,
-		    hv_vmbus_g_context.signal_event_param,
+		    con_id,
 		    0) & 0xFFFF;
 
 	return (status);
 }
 
 /**
  * @brief hv_vmbus_synic_init
  */
 void
 hv_vmbus_synic_init(void *arg)
 
 {
 	int			cpu;
+	uint64_t		hv_vcpu_index;
 	hv_vmbus_synic_simp	simp;
 	hv_vmbus_synic_siefp	siefp;
 	hv_vmbus_synic_scontrol sctrl;
 	hv_vmbus_synic_sint	shared_sint;
 	uint64_t		version;
 	hv_setup_args* 		setup_args = (hv_setup_args *)arg;
 
 	cpu = PCPU_GET(cpuid);
 
 	if (hv_vmbus_g_context.hypercall_page == NULL)
 	    return;
 
 	/*
-	 * KYS: Looks like we can only initialize on cpu0; don't we support
-	 * SMP guests?
-	 *
-	 * TODO: Need to add SMP support for FreeBSD V9
-	 */
-
-	if (cpu != 0)
-	    return;
-
-	/*
 	 * TODO: Check the version
 	 */
 	version = rdmsr(HV_X64_MSR_SVERSION);
-
 	
-	hv_vmbus_g_context.syn_ic_msg_page[cpu] = setup_args->page_buffers[0];
-	hv_vmbus_g_context.syn_ic_event_page[cpu] = setup_args->page_buffers[1];
+	hv_vmbus_g_context.syn_ic_msg_page[cpu] =
+	    setup_args->page_buffers[2 * cpu];
+	hv_vmbus_g_context.syn_ic_event_page[cpu] =
+	    setup_args->page_buffers[2 * cpu + 1];
 
 	/*
 	 * Setup the Synic's message page
 	 */
 
 	simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
 	simp.u.simp_enabled = 1;
 	simp.u.base_simp_gpa = ((hv_get_phys_addr(
 	    hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT);
 
 	wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
 
 	/*
 	 * Setup the Synic's event page
 	 */
 	siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
 	siefp.u.siefp_enabled = 1;
 	siefp.u.base_siefp_gpa = ((hv_get_phys_addr(
 	    hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT);
 
 	wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
 
 	/*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
+	shared_sint.as_uint64_t = 0;
 	shared_sint.u.vector = setup_args->vector;
 	shared_sint.u.masked = FALSE;
-	shared_sint.u.auto_eoi = FALSE;
+	shared_sint.u.auto_eoi = TRUE;
 
 	wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
 	    shared_sint.as_uint64_t);
 
 	/* Enable the global synic bit */
 	sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL);
 	sctrl.u.enable = 1;
 
 	wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t);
 
 	hv_vmbus_g_context.syn_ic_initialized = TRUE;
 
+	/*
+	 * Set up the cpuid mapping from Hyper-V to FreeBSD.
+	 * The array is indexed using FreeBSD cpuid.
+	 */
+	hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX);
+	hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index;
+
 	return;
 }
 
 /**
  * @brief Cleanup routine for hv_vmbus_synic_init()
  */
 void hv_vmbus_synic_cleanup(void *arg)
 {
 	hv_vmbus_synic_sint	shared_sint;
 	hv_vmbus_synic_simp	simp;
 	hv_vmbus_synic_siefp	siefp;
-	int			cpu = PCPU_GET(cpuid);
 
 	if (!hv_vmbus_g_context.syn_ic_initialized)
 	    return;
-
-	if (cpu != 0)
-	    return; /* TODO: XXXKYS: SMP? */
 
 	shared_sint.as_uint64_t = rdmsr(
 	    HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
 
 	shared_sint.u.masked = 1;
 
 	/*
 	 * Disable the interrupt
 	 */
 	wrmsr(
 	    HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
 	    shared_sint.as_uint64_t);
 
 	simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
 	simp.u.simp_enabled = 0;
 	simp.u.base_simp_gpa = 0;
 
 	wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
 
 	siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
 	siefp.u.siefp_enabled = 0;
 	siefp.u.base_siefp_gpa = 0;
 
 	wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
 }
 
Index: head/sys/dev/hyperv/vmbus/hv_ring_buffer.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 282211)
+++ head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 282212)
@@ -1,440 +1,510 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include "hv_vmbus_priv.h"
 
 /* Amount of space to write to */
 #define	HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
 				((z) - ((w) - (r))):((r) - (w))
 
 /**
  * @brief Get number of bytes available to read and to write to
  * for the specified ring buffer
  */
 static inline void
 get_ring_buffer_avail_bytes(
 	    hv_vmbus_ring_buffer_info*	rbi,
 	    uint32_t*			read,
 	    uint32_t*			write)
 {
 	uint32_t read_loc, write_loc;
 
 	/*
 	 * Capture the read/write indices before they changed
 	 */
 	read_loc = rbi->ring_buffer->read_index;
 	write_loc = rbi->ring_buffer->write_index;
 
 	*write = HV_BYTES_AVAIL_TO_WRITE(
 		read_loc, write_loc, rbi->ring_data_size);
 	*read = rbi->ring_data_size - *write;
 }
 
 /**
  * @brief Get the next write location for the specified ring buffer
  */
 static inline uint32_t
 get_next_write_location(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	uint32_t next = ring_info->ring_buffer->write_index;
 	return (next);
 }
 
 /**
  * @brief Set the next write location for the specified ring buffer
  */
 static inline void
 set_next_write_location(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			next_write_location)
 {
 	ring_info->ring_buffer->write_index = next_write_location;
 }
 
 /**
  * @brief Get the next read location for the specified ring buffer
  */
 static inline uint32_t
 get_next_read_location(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	uint32_t next = ring_info->ring_buffer->read_index;
 	return (next);
 }
 
 /**
  * @brief Get the next read location + offset for the specified ring buffer.
  * This allows the caller to skip.
  */
 static inline uint32_t
 get_next_read_location_with_offset(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			offset)
 {
 	uint32_t next = ring_info->ring_buffer->read_index;
 	next += offset;
 	next %= ring_info->ring_data_size;
 	return (next);
 }
 
 /**
  * @brief Set the next read location for the specified ring buffer
  */
 static inline void
 set_next_read_location(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			next_read_location)
 {
 	ring_info->ring_buffer->read_index = next_read_location;
 }
 
 /**
  * @brief Get the start of the ring buffer
  */
 static inline void *
 get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return (void *) ring_info->ring_buffer->buffer;
 }
 
 /**
  * @brief Get the size of the ring buffer.
  */
 static inline uint32_t
 get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return ring_info->ring_data_size;
 }
 
 /**
  * Get the read and write indices as uint64_t of the specified ring buffer.
  */
 static inline uint64_t
 get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return (uint64_t) ring_info->ring_buffer->write_index << 32;
 }
 
+void
+hv_ring_buffer_read_begin(
+	hv_vmbus_ring_buffer_info*	ring_info)
+{
+	ring_info->ring_buffer->interrupt_mask = 1;
+	mb();
+}
+
+uint32_t
+hv_ring_buffer_read_end(
+	hv_vmbus_ring_buffer_info*	ring_info)
+{
+	uint32_t read, write;	
+
+	ring_info->ring_buffer->interrupt_mask = 0;
+	mb();
+
+	/*
+	 * Now check to see if the ring buffer is still empty.
+	 * If it is not, we raced and we need to process new
+	 * incoming messages.
+	 */
+	get_ring_buffer_avail_bytes(ring_info, &read, &write);
+
+	return (read);
+}
+
+/*
+ * When we write to the ring buffer, check if the host needs to
+ * be signaled. Here is the details of this protocol:
+ *
+ *	1. The host guarantees that while it is draining the
+ *	   ring buffer, it will set the interrupt_mask to
+ *	   indicate it does not need to be interrupted when
+ *	   new data is placed.
+ *
+ *	2. The host guarantees that it will completely drain
+ *	   the ring buffer before exiting the read loop. Further,
+ *	   once the ring buffer is empty, it will clear the
+ *	   interrupt_mask and re-check to see if new data has
+ *	   arrived.
+ */
+static boolean_t
+hv_ring_buffer_needsig_on_write(
+	uint32_t			old_write_location,
+	hv_vmbus_ring_buffer_info*	rbi)
+{
+	mb();
+	if (rbi->ring_buffer->interrupt_mask)
+		return (FALSE);
+
+	/* Read memory barrier */
+	rmb();
+	/*
+	 * This is the only case we need to signal when the
+	 * ring transitions from being empty to non-empty.
+	 */
+	if (old_write_location == rbi->ring_buffer->read_index)
+		return (TRUE);
+
+	return (FALSE);
+}
+
 static uint32_t	copy_to_ring_buffer(
 			hv_vmbus_ring_buffer_info*	ring_info,
 			uint32_t			start_write_offset,
 			char*				src,
 			uint32_t			src_len);
 
 static uint32_t copy_from_ring_buffer(
 			hv_vmbus_ring_buffer_info*	ring_info,
 			char*				dest,
 			uint32_t			dest_len,
 			uint32_t			start_read_offset);
 
 
 /**
  * @brief Get the interrupt mask for the specified ring buffer.
  */
 uint32_t
 hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi) 
 {
 	return rbi->ring_buffer->interrupt_mask;
 }
 
 /**
  * @brief Initialize the ring buffer.
  */
 int
 hv_vmbus_ring_buffer_init(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	void*				buffer,
 	uint32_t			buffer_len)
 {
 	memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
 
 	ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
 	ring_info->ring_buffer->read_index =
 	    ring_info->ring_buffer->write_index = 0;
 
 	ring_info->ring_size = buffer_len;
 	ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
 
 	mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
 
 	return (0);
 }
 
 /**
  * @brief Cleanup the ring buffer.
  */
 void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	mtx_destroy(&ring_info->ring_lock);
 }
 
 /**
  * @brief Write to the ring buffer.
  */
 int
 hv_ring_buffer_write(
 	hv_vmbus_ring_buffer_info*	out_ring_info,
 	hv_vmbus_sg_buffer_list		sg_buffers[],
-	uint32_t			sg_buffer_count)
+	uint32_t			sg_buffer_count,
+	boolean_t			*need_sig)
 {
 	int i = 0;
 	uint32_t byte_avail_to_write;
 	uint32_t byte_avail_to_read;
+	uint32_t old_write_location;
 	uint32_t total_bytes_to_write = 0;
 
 	volatile uint32_t next_write_location;
 	uint64_t prev_indices = 0;
 
 	for (i = 0; i < sg_buffer_count; i++) {
 	    total_bytes_to_write += sg_buffers[i].length;
 	}
 
 	total_bytes_to_write += sizeof(uint64_t);
 
 	mtx_lock_spin(&out_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
 	    &byte_avail_to_write);
 
 	/*
 	 * If there is only room for the packet, assume it is full.
 	 * Otherwise, the next time around, we think the ring buffer
 	 * is empty since the read index == write index
 	 */
 
 	if (byte_avail_to_write <= total_bytes_to_write) {
 
 	    mtx_unlock_spin(&out_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	/*
 	 * Write to the ring buffer
 	 */
 	next_write_location = get_next_write_location(out_ring_info);
 
+	old_write_location = next_write_location;
+
 	for (i = 0; i < sg_buffer_count; i++) {
 	    next_write_location = copy_to_ring_buffer(out_ring_info,
 		next_write_location, (char *) sg_buffers[i].data,
 		sg_buffers[i].length);
 	}
 
 	/*
 	 * Set previous packet start
 	 */
 	prev_indices = get_ring_buffer_indices(out_ring_info);
 
 	next_write_location = copy_to_ring_buffer(
 		out_ring_info, next_write_location,
 		(char *) &prev_indices, sizeof(uint64_t));
 
 	/*
-	 * Make sure we flush all writes before updating the writeIndex
+	 * Full memory barrier before upding the write index. 
 	 */
-	wmb();
+	mb();
 
 	/*
 	 * Now, update the write location
 	 */
 	set_next_write_location(out_ring_info, next_write_location);
 
 	mtx_unlock_spin(&out_ring_info->ring_lock);
+
+	*need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
+	    out_ring_info);
 
 	return (0);
 }
 
 /**
  * @brief Read without advancing the read index.
  */
 int
 hv_ring_buffer_peek(
 	hv_vmbus_ring_buffer_info*	in_ring_info,
 	void*				buffer,
 	uint32_t			buffer_len)
 {
 	uint32_t bytesAvailToWrite;
 	uint32_t bytesAvailToRead;
 	uint32_t nextReadLocation = 0;
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
 		&bytesAvailToWrite);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytesAvailToRead < buffer_len) {
 	    mtx_unlock_spin(&in_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	/*
 	 * Convert to byte offset
 	 */
 	nextReadLocation = get_next_read_location(in_ring_info);
 
 	nextReadLocation = copy_from_ring_buffer(
 		in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Read and advance the read index.
  */
 int
 hv_ring_buffer_read(
 	hv_vmbus_ring_buffer_info*	in_ring_info,
 	void*				buffer,
 	uint32_t			buffer_len,
 	uint32_t			offset)
 {
 	uint32_t bytes_avail_to_write;
 	uint32_t bytes_avail_to_read;
 	uint32_t next_read_location = 0;
 	uint64_t prev_indices = 0;
 
 	if (buffer_len <= 0)
 	    return (EINVAL);
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(
 	    in_ring_info, &bytes_avail_to_read,
 	    &bytes_avail_to_write);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytes_avail_to_read < buffer_len) {
 	    mtx_unlock_spin(&in_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	next_read_location = get_next_read_location_with_offset(
 	    in_ring_info,
 	    offset);
 
 	next_read_location = copy_from_ring_buffer(
 	    in_ring_info,
 	    (char *) buffer,
 	    buffer_len,
 	    next_read_location);
 
 	next_read_location = copy_from_ring_buffer(
 	    in_ring_info,
 	    (char *) &prev_indices,
 	    sizeof(uint64_t),
 	    next_read_location);
 
 	/*
 	 * Make sure all reads are done before we update the read index since
 	 * the writer may start writing to the read area once the read index
 	 * is updated.
 	 */
 	wmb();
 
 	/*
 	 * Update the read index
 	 */
 	set_next_read_location(in_ring_info, next_read_location);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Helper routine to copy from source to ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in dest case only!
  */
 uint32_t
 copy_to_ring_buffer(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t 			start_write_offset,
 	char*				src,
 	uint32_t			src_len)
 {
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 	uint32_t fragLen;
 
 	if (src_len > ring_buffer_size - start_write_offset)  {
 	    /* wrap-around detected! */
 	    fragLen = ring_buffer_size - start_write_offset;
 	    memcpy(ring_buffer + start_write_offset, src, fragLen);
 	    memcpy(ring_buffer, src + fragLen, src_len - fragLen);
 	} else {
 	    memcpy(ring_buffer + start_write_offset, src, src_len);
 	}
 
 	start_write_offset += src_len;
 	start_write_offset %= ring_buffer_size;
 
 	return (start_write_offset);
 }
 
 /**
  * @brief Helper routine to copy to source from ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in src case only!
  */
 uint32_t
 copy_from_ring_buffer(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	char*				dest,
 	uint32_t			dest_len,
 	uint32_t			start_read_offset)
 {
 	uint32_t fragLen;
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 
 	if (dest_len > ring_buffer_size - start_read_offset) {
 	    /*  wrap-around detected at the src */
 	    fragLen = ring_buffer_size - start_read_offset;
 	    memcpy(dest, ring_buffer + start_read_offset, fragLen);
 	    memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
 	} else {
 	    memcpy(dest, ring_buffer + start_read_offset, dest_len);
 	}
 
 	start_read_offset += dest_len;
 	start_read_offset %= ring_buffer_size;
 
 	return (start_read_offset);
 }
 

Property changes on: head/sys/dev/hyperv/vmbus/hv_ring_buffer.c
___________________________________________________________________
Added: fbsd:nokeywords
## -0,0 +1 ##
+yes
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c	(revision 282211)
+++ head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c	(revision 282212)
@@ -1,608 +1,732 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * VM Bus Driver Implementation
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/rtprio.h>
 #include <sys/interrupt.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <machine/stdarg.h>
 #include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
 #include <sys/pcpu.h>
+#include <x86/apicvar.h>
 
 #include "hv_vmbus_priv.h"
 
 
 #define VMBUS_IRQ	0x5
 
-static struct intr_event *hv_msg_intr_event;
-static struct intr_event *hv_event_intr_event;
-static void *msg_swintr;
-static void *event_swintr;
 static device_t vmbus_devp;
-static void *vmbus_cookiep;
-static int vmbus_rid;
-struct resource *intr_res;
-static int vmbus_irq = VMBUS_IRQ;
 static int vmbus_inited;
 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
 
 /**
  * @brief Software interrupt thread routine to handle channel messages from
  * the hypervisor.
  */
 static void
-vmbus_msg_swintr(void *dummy)
+vmbus_msg_swintr(void *arg)
 {
 	int 			cpu;
 	void*			page_addr;
 	hv_vmbus_message*	msg;
 	hv_vmbus_message*	copied;
 
-	cpu = PCPU_GET(cpuid);
+	cpu = (int)(long)arg;
+	KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
+	    "cpu out of range!"));
+
 	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
 	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
 
 	for (;;) {
 		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
 			break; /* no message */
 		} else {
 			copied = malloc(sizeof(hv_vmbus_message),
 					M_DEVBUF, M_NOWAIT);
 			KASSERT(copied != NULL,
 				("Error VMBUS: malloc failed to allocate"
 					" hv_vmbus_message!"));
 			if (copied == NULL)
 				continue;
 			memcpy(copied, msg, sizeof(hv_vmbus_message));
 			hv_queue_work_item(hv_vmbus_g_connection.work_queue,
 			hv_vmbus_on_channel_message, copied);
 	    }
 
 	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
 
 	    /*
 	     * Make sure the write to message_type (ie set to
 	     * HV_MESSAGE_TYPE_NONE) happens before we read the
 	     * message_pending and EOMing. Otherwise, the EOMing will
 	     * not deliver any more messages
 	     * since there is no empty slot
 	     */
 	    wmb();
 
 	    if (msg->header.message_flags.u.message_pending) {
 			/*
 			 * This will cause message queue rescan to possibly
 			 * deliver another msg from the hypervisor
 			 */
 			wrmsr(HV_X64_MSR_EOM, 0);
 	    }
 	}
 }
 
 /**
  * @brief Interrupt filter routine for VMBUS.
  *
  * The purpose of this routine is to determine the type of VMBUS protocol
  * message to process - an event or a channel message.
- * As this is an interrupt filter routine, the function runs in a very
- * restricted envinronment.  From the manpage for bus_setup_intr(9)
- *
- *   In this restricted environment, care must be taken to account for all
- *   races.  A careful analysis of races should be done as well.  It is gener-
- *   ally cheaper to take an extra interrupt, for example, than to protect
- *   variables with spinlocks.	Read, modify, write cycles of hardware regis-
- *   ters need to be carefully analyzed if other threads are accessing the
- *   same registers.
  */
-static int
+static inline int
 hv_vmbus_isr(void *unused) 
 {
 	int				cpu;
 	hv_vmbus_message*		msg;
 	hv_vmbus_synic_event_flags*	event;
 	void*				page_addr;
 
 	cpu = PCPU_GET(cpuid);
-	/* (Temporary limit) */
-	KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
 
 	/*
 	 * The Windows team has advised that we check for events
 	 * before checking for messages. This is the way they do it
 	 * in Windows when running as a guest in Hyper-V
 	 */
 
 	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
 	event = (hv_vmbus_synic_event_flags*)
 		    page_addr + HV_VMBUS_MESSAGE_SINT;
 
-	/* Since we are a child, we only need to check bit 0 */
-	if (synch_test_and_clear_bit(0, &event->flags32[0])) {
-		swi_sched(event_swintr, 0);
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
+		/* Since we are a child, we only need to check bit 0 */
+		if (synch_test_and_clear_bit(0, &event->flags32[0])) {
+			swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+		}
+	} else {
+		/*
+		 * On host with Win8 or above, we can directly look at
+		 * the event page. If bit n is set, we have an interrupt 
+		 * on the channel with id n.
+		 * Directly schedule the event software interrupt on
+		 * current cpu.
+		 */
+		swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
 	}
 
 	/* Check if there are actual msgs to be process */
 	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
 	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
 
 	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
-		swi_sched(msg_swintr, 0);
+		swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
 	}
 
 	return FILTER_HANDLED;
 }
 
+#ifdef HV_DEBUG_INTR 
+uint32_t hv_intr_count = 0;
+#endif
+uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
+uint32_t hv_vmbus_intr_cpu[MAXCPU];
+
+void
+hv_vector_handler(struct trapframe *trap_frame)
+{
+#ifdef HV_DEBUG_INTR
+	int cpu;
+#endif
+
+	/*
+	 * Disable preemption.
+	 */
+	critical_enter();
+
+#ifdef HV_DEBUG_INTR
+	/*
+	 * Do a little interrupt counting.
+	 */
+	cpu = PCPU_GET(cpuid);
+	hv_vmbus_intr_cpu[cpu]++;
+	hv_intr_count++;
+#endif
+
+	hv_vmbus_isr(NULL); 
+
+	/*
+	 * Enable preemption.
+	 */
+	critical_exit();
+}
+
 static int
 vmbus_read_ivar(
 	device_t	dev,
 	device_t	child,
 	int		index,
 	uintptr_t*	result)
 {
 	struct hv_device *child_dev_ctx = device_get_ivars(child);
 
 	switch (index) {
 
 	case HV_VMBUS_IVAR_TYPE:
 		*result = (uintptr_t) &child_dev_ctx->class_id;
 		return (0);
 	case HV_VMBUS_IVAR_INSTANCE:
 		*result = (uintptr_t) &child_dev_ctx->device_id;
 		return (0);
 	case HV_VMBUS_IVAR_DEVCTX:
 		*result = (uintptr_t) child_dev_ctx;
 		return (0);
 	case HV_VMBUS_IVAR_NODE:
 		*result = (uintptr_t) child_dev_ctx->device;
 		return (0);
 	}
 	return (ENOENT);
 }
 
 static int
 vmbus_write_ivar(
 	device_t	dev,
 	device_t	child,
 	int		index,
 	uintptr_t	value)
 {
 	switch (index) {
 
 	case HV_VMBUS_IVAR_TYPE:
 	case HV_VMBUS_IVAR_INSTANCE:
 	case HV_VMBUS_IVAR_DEVCTX:
 	case HV_VMBUS_IVAR_NODE:
 		/* read-only */
 		return (EINVAL);
 	}
 	return (ENOENT);
 }
 
 struct hv_device*
 hv_vmbus_child_device_create(
 	hv_guid		type,
 	hv_guid		instance,
 	hv_vmbus_channel*	channel)
 {
 	hv_device* child_dev;
 
 	/*
 	 * Allocate the new child device
 	 */
 	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
 			M_NOWAIT |  M_ZERO);
 	KASSERT(child_dev != NULL,
 	    ("Error VMBUS: malloc failed to allocate hv_device!"));
 
 	if (child_dev == NULL)
 		return (NULL);
 
 	child_dev->channel = channel;
 	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
 	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
 
 	return (child_dev);
 }
 
 static void
 print_dev_guid(struct hv_device *dev)
 {
 	int i;
 	unsigned char guid_name[100];
 	for (i = 0; i < 32; i += 2)
 		sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
 	if(bootverbose)
 		printf("VMBUS: Class ID: %s\n", guid_name);
 }
 
 int
 hv_vmbus_child_device_register(struct hv_device *child_dev)
 {
 	device_t child;
 	int ret = 0;
 
 	print_dev_guid(child_dev);
 
 
 	child = device_add_child(vmbus_devp, NULL, -1);
 	child_dev->device = child;
 	device_set_ivars(child, child_dev);
 
 	mtx_lock(&Giant);
 	ret = device_probe_and_attach(child);
 	mtx_unlock(&Giant);
 
 	return (0);
 }
 
 int
 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
 {
 	int ret = 0;
 	/*
 	 * XXXKYS: Ensure that this is the opposite of
 	 * device_add_child()
 	 */
 	mtx_lock(&Giant);
 	ret = device_delete_child(vmbus_devp, child_dev->device);
 	mtx_unlock(&Giant);
 	return(ret);
 }
 
 static void
 vmbus_identify(driver_t *driver, device_t parent)
 {
 	if (!hv_vmbus_query_hypervisor_presence())
 		return;
 
 	vm_guest = VM_GUEST_HV;
 
 	BUS_ADD_CHILD(parent, 0, "vmbus", 0);
 }
 
 static int
 vmbus_probe(device_t dev) {
 	if(bootverbose)
 		device_printf(dev, "VMBUS: probe\n");
 
 	device_set_desc(dev, "Vmbus Devices");
 
 	return (BUS_PROBE_NOWILDCARD);
 }
 
+#ifdef HYPERV
+extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
+
 /**
+ * @brief Find a free IDT slot and setup the interrupt handler.
+ */
+static int
+vmbus_vector_alloc(void)
+{
+	int vector;
+	uintptr_t func;
+	struct gate_descriptor *ip;
+
+	/*
+	 * Search backwards form the highest IDT vector available for use
+	 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
+	 * handler at that vector and use it to interrupt vcpus.
+	 */
+	vector = APIC_SPURIOUS_INT;
+	while (--vector >= APIC_IPI_INTS) {
+		ip = &idt[vector];
+		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+		if (func == (uintptr_t)&IDTVEC(rsvd)) {
+#ifdef __i386__
+			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
+			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
+			    SEL_KPL, 0);
+#endif
+
+			return (vector);
+		}
+	}
+	return (0);
+}
+
+/**
+ * @brief Restore the IDT slot to rsvd.
+ */
+static void
+vmbus_vector_free(int vector)
+{
+        uintptr_t func;
+        struct gate_descriptor *ip;
+
+	if (vector == 0)
+		return;
+
+        KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
+            ("invalid vector %d", vector));
+
+        ip = &idt[vector];
+        func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+        KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
+            ("invalid vector %d", vector));
+
+        setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+}
+
+#else /* HYPERV */
+
+static int
+vmbus_vector_alloc(void)
+{
+	return(0);
+}
+
+static void
+vmbus_vector_free(int vector)
+{
+}
+
+#endif /* HYPERV */
+
+/**
  * @brief Main vmbus driver initialization routine.
  *
  * Here, we
  * - initialize the vmbus driver context
  * - setup various driver entry points
  * - invoke the vmbus hv main init routine
  * - get the irq resource
  * - invoke the vmbus to add the vmbus root device
  * - setup the vmbus root device
  * - retrieve the channel offers
  */
 static int
 vmbus_bus_init(void)
 {
-	struct ioapic_intsrc {
-		struct intsrc io_intsrc;
-		u_int io_irq;
-		u_int io_intpin:8;
-		u_int io_vector:8;
-		u_int io_cpu:8;
-		u_int io_activehi:1;
-		u_int io_edgetrigger:1;
-		u_int io_masked:1;
-		int io_bus:4;
-		uint32_t io_lowreg;
-	};
-	int i, ret;
-	unsigned int vector = 0;
-	struct intsrc *isrc;
-	struct ioapic_intsrc *intpin;
+	int i, j, n, ret;
 
 	if (vmbus_inited)
 		return (0);
 
 	vmbus_inited = 1;
 
 	ret = hv_vmbus_init();
 
 	if (ret) {
 		if(bootverbose)
 			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
 		return (ret);
 	}
 
-	ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
-	    NULL, SWI_CLOCK, 0, &msg_swintr);
-
-	if (ret)
-	    goto cleanup;
-
 	/*
-	 * Message SW interrupt handler checks a per-CPU page and
-	 * thus the thread needs to be bound to CPU-0 - which is where
-	 * all interrupts are processed.
+	 * Find a free IDT slot for vmbus callback.
 	 */
-	ret = intr_event_bind(hv_msg_intr_event, 0);
+	hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
 
-	if (ret)
-		goto cleanup1;
+	if (hv_vmbus_g_context.hv_cb_vector == 0) {
+		if(bootverbose)
+			printf("Error VMBUS: Cannot find free IDT slot for "
+			    "vmbus callback!\n");
+		goto cleanup;
+	}
 
-	ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
-	    NULL, SWI_CLOCK, 0, &event_swintr);
+	if(bootverbose)
+		printf("VMBUS: vmbus callback vector %d\n",
+		    hv_vmbus_g_context.hv_cb_vector);
 
-	if (ret)
-		goto cleanup1;
+	/*
+	 * Notify the hypervisor of our vector.
+	 */
+	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
 
-	intr_res = bus_alloc_resource(vmbus_devp,
-	    SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
+	CPU_FOREACH(j) {
+		hv_vmbus_intr_cpu[j] = 0;
+		hv_vmbus_swintr_event_cpu[j] = 0;
+		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
+		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
+		hv_vmbus_g_context.event_swintr[j] = NULL;
+		hv_vmbus_g_context.msg_swintr[j] = NULL;
 
-	if (intr_res == NULL) {
-		ret = ENOMEM; /* XXXKYS: Need a better errno */
-		goto cleanup2;
+		for (i = 0; i < 2; i++)
+			setup_args.page_buffers[2 * j + i] = NULL;
 	}
 
 	/*
-	 * Setup interrupt filter handler
+	 * Per cpu setup.
 	 */
-	ret = bus_setup_intr(vmbus_devp, intr_res,
-	    INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
-	    NULL, &vmbus_cookiep);
+	CPU_FOREACH(j) {
+		/*
+		 * Setup software interrupt thread and handler for msg handling.
+		 */
+		ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
+		    "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
+		    &hv_vmbus_g_context.msg_swintr[j]);
+		if (ret) {
+			if(bootverbose)
+				printf("VMBUS: failed to setup msg swi for "
+				    "cpu %d\n", j);
+			goto cleanup1;
+		}
 
-	if (ret != 0)
-		goto cleanup3;
+		/*
+		 * Bind the swi thread to the cpu.
+		 */
+		ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
+		    j);
+	 	if (ret) {
+			if(bootverbose)
+				printf("VMBUS: failed to bind msg swi thread "
+				    "to cpu %d\n", j);
+			goto cleanup1;
+		}
 
-	ret = bus_bind_intr(vmbus_devp, intr_res, 0);
-	if (ret != 0)
-		goto cleanup4;
+		/*
+		 * Setup software interrupt thread and handler for
+		 * event handling.
+		 */
+		ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
+		    "hv_event", hv_vmbus_on_events, (void *)(long)j,
+		    SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
+		if (ret) {
+			if(bootverbose)
+				printf("VMBUS: failed to setup event swi for "
+				    "cpu %d\n", j);
+			goto cleanup1;
+		}
 
-	isrc = intr_lookup_source(vmbus_irq);
-	if ((isrc == NULL) || (isrc->is_event == NULL)) {
-		ret = EINVAL;
-		goto cleanup4;
-	}
-
-	/* vector = isrc->is_event->ie_vector; */
-	intpin = (struct ioapic_intsrc *)isrc;
-	vector = intpin->io_vector;
-
-	if(bootverbose)
-		printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
-
-	/**
-	 * Notify the hypervisor of our irq.
-	 */
-	setup_args.vector = vector;
-	for(i = 0; i < 2; i++) {
-		setup_args.page_buffers[i] =
+		/*
+		 * Prepare the per cpu msg and event pages to be called on each cpu.
+		 */
+		for(i = 0; i < 2; i++) {
+			setup_args.page_buffers[2 * j + i] =
 				malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
-		if (setup_args.page_buffers[i] == NULL) {
-			KASSERT(setup_args.page_buffers[i] != NULL,
+			if (setup_args.page_buffers[2 * j + i] == NULL) {
+				KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
 					("Error VMBUS: malloc failed!"));
-			if (i > 0)
-				free(setup_args.page_buffers[0], M_DEVBUF);
-			goto cleanup4;
+				goto cleanup1;
+			}
 		}
 	}
 
-	/* only CPU #0 supported at this time */
+	if (bootverbose)
+		printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
+		    smp_started);
+
 	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
 
 	/*
 	 * Connect to VMBus in the root partition
 	 */
 	ret = hv_vmbus_connect();
 
 	if (ret != 0)
-	    goto cleanup4;
+		goto cleanup1;
 
 	hv_vmbus_request_channel_offers();
 	return (ret);
 
-	cleanup4:
+	cleanup1:
+	/*
+	 * Free pages alloc'ed
+	 */
+	for (n = 0; n < 2 * MAXCPU; n++)
+		if (setup_args.page_buffers[n] != NULL)
+			free(setup_args.page_buffers[n], M_DEVBUF);
 
 	/*
-	 * remove swi, bus and intr resource
+	 * remove swi and vmbus callback vector;
 	 */
-	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+	CPU_FOREACH(j) {
+		if (hv_vmbus_g_context.msg_swintr[j] != NULL)
+			swi_remove(hv_vmbus_g_context.msg_swintr[j]);
+		if (hv_vmbus_g_context.event_swintr[j] != NULL)
+			swi_remove(hv_vmbus_g_context.event_swintr[j]);
+		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;	
+		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;	
+	}
 
-	cleanup3:
-	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
 
-	cleanup2:
-	swi_remove(event_swintr);
-
-	cleanup1:
-	swi_remove(msg_swintr);
-
 	cleanup:
 	hv_vmbus_cleanup();
 
 	return (ret);
 }
 
 static int
 vmbus_attach(device_t dev)
 {
 	if(bootverbose)
 		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
 	vmbus_devp = dev;
 
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible indicated by the global
 	 * cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold)
 		vmbus_bus_init();
 
 	return (0);
 }
 
 static void
 vmbus_init(void)
 {
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible, as indicated by the
 	 * global cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold) 
 		vmbus_bus_init();
 }
 
 static void
 vmbus_bus_exit(void)
 {
 	int i;
 
 	hv_vmbus_release_unattached_channels();
 	hv_vmbus_disconnect();
 
 	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
 
-	for(i = 0; i < 2; i++) {
+	for(i = 0; i < 2 * MAXCPU; i++) {
 		if (setup_args.page_buffers[i] != 0)
 			free(setup_args.page_buffers[i], M_DEVBUF);
 	}
 
 	hv_vmbus_cleanup();
 
-	/* remove swi, bus and intr resource */
-	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+	/* remove swi */
+	CPU_FOREACH(i) {
+		if (hv_vmbus_g_context.msg_swintr[i] != NULL)
+			swi_remove(hv_vmbus_g_context.msg_swintr[i]);
+		if (hv_vmbus_g_context.event_swintr[i] != NULL)
+			swi_remove(hv_vmbus_g_context.event_swintr[i]);
+		hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;	
+		hv_vmbus_g_context.hv_event_intr_event[i] = NULL;	
+	}
 
-	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
 
-	swi_remove(msg_swintr);
-	swi_remove(event_swintr);
-
 	return;
 }
 
 static void
 vmbus_exit(void)
 {
 	vmbus_bus_exit();
 }
 
 static int
 vmbus_detach(device_t dev)
 {
 	vmbus_exit();
 	return (0);
 }
 
 static void
 vmbus_mod_load(void)
 {
 	if(bootverbose)
 		printf("VMBUS: load\n");
 }
 
 static void
 vmbus_mod_unload(void)
 {
 	if(bootverbose)
 		printf("VMBUS: unload\n");
 }
 
 static int
 vmbus_modevent(module_t mod, int what, void *arg)
 {
 	switch (what) {
 
 	case MOD_LOAD:
 		vmbus_mod_load();
 		break;
 	case MOD_UNLOAD:
 		vmbus_mod_unload();
 		break;
 	}
 
 	return (0);
 }
 
 static device_method_t vmbus_methods[] = {
 	/** Device interface */
 	DEVMETHOD(device_identify, vmbus_identify),
 	DEVMETHOD(device_probe, vmbus_probe),
 	DEVMETHOD(device_attach, vmbus_attach),
 	DEVMETHOD(device_detach, vmbus_detach),
 	DEVMETHOD(device_shutdown, bus_generic_shutdown),
 	DEVMETHOD(device_suspend, bus_generic_suspend),
 	DEVMETHOD(device_resume, bus_generic_resume),
 
 	/** Bus interface */
 	DEVMETHOD(bus_add_child, bus_generic_add_child),
 	DEVMETHOD(bus_print_child, bus_generic_print_child),
 	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
 	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
 
 	{ 0, 0 } };
 
 static char driver_name[] = "vmbus";
 static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
 
 
 devclass_t vmbus_devclass;
 
 DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
 MODULE_VERSION(vmbus,1);
 
-/* TODO: We want to be earlier than SI_SUB_VFS */
-SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
+/* We want to be started after SMP is initialized */
+SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
 
Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(revision 282211)
+++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(revision 282212)
@@ -1,724 +1,715 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __HYPERV_PRIV_H__
 #define __HYPERV_PRIV_H__
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sema.h>
 
 #include <dev/hyperv/include/hyperv.h>
 
 
 /*
  *  Status codes for hypervisor operations.
  */
 
 typedef uint16_t hv_vmbus_status;
 
 #define HV_MESSAGE_SIZE                 (256)
 #define HV_MESSAGE_PAYLOAD_BYTE_COUNT   (240)
 #define HV_MESSAGE_PAYLOAD_QWORD_COUNT  (30)
 #define HV_ANY_VP                       (0xFFFFFFFF)
 
 /*
  * Synthetic interrupt controller flag constants.
  */
 
 #define HV_EVENT_FLAGS_COUNT        (256 * 8)
 #define HV_EVENT_FLAGS_BYTE_COUNT   (256)
 #define HV_EVENT_FLAGS_DWORD_COUNT  (256 / sizeof(uint32_t))
 
 /*
  * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS
  * MessageText:
  *    You did not supply enough message buffers to send a message.
  */
 
 #define HV_STATUS_INSUFFICIENT_BUFFERS   ((uint16_t)0x0013)
 
 typedef void (*hv_vmbus_channel_callback)(void *context);
 
 typedef struct {
 	void*		data;
 	uint32_t	length;
 } hv_vmbus_sg_buffer_list;
 
 typedef struct {
 	uint32_t	current_interrupt_mask;
 	uint32_t	current_read_index;
 	uint32_t	current_write_index;
 	uint32_t	bytes_avail_to_read;
 	uint32_t	bytes_avail_to_write;
 } hv_vmbus_ring_buffer_debug_info;
 
 typedef struct {
 	uint32_t 		rel_id;
 	hv_vmbus_channel_state	state;
 	hv_guid			interface_type;
 	hv_guid			interface_instance;
 	uint32_t		monitor_id;
 	uint32_t		server_monitor_pending;
 	uint32_t		server_monitor_latency;
 	uint32_t		server_monitor_connection_id;
 	uint32_t		client_monitor_pending;
 	uint32_t		client_monitor_latency;
 	uint32_t		client_monitor_connection_id;
 	hv_vmbus_ring_buffer_debug_info	inbound;
 	hv_vmbus_ring_buffer_debug_info	outbound;
 } hv_vmbus_channel_debug_info;
 
 typedef union {
 	hv_vmbus_channel_version_supported	version_supported;
 	hv_vmbus_channel_open_result		open_result;
 	hv_vmbus_channel_gpadl_torndown		gpadl_torndown;
 	hv_vmbus_channel_gpadl_created		gpadl_created;
 	hv_vmbus_channel_version_response	version_response;
 } hv_vmbus_channel_msg_response;
 
 /*
  * Represents each channel msg on the vmbus connection
  * This is a variable-size data structure depending on
  * the msg type itself
  */
 typedef struct hv_vmbus_channel_msg_info {
 	/*
 	 * Bookkeeping stuff
 	 */
 	TAILQ_ENTRY(hv_vmbus_channel_msg_info)  msg_list_entry;
 	/*
 	 * So far, this is only used to handle
 	 * gpadl body message
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor;
 	/*
 	 * Synchronize the request/response if
 	 * needed.
 	 * KYS: Use a semaphore for now.
 	 * Not perf critical.
 	 */
 	struct sema				wait_sema;
 	hv_vmbus_channel_msg_response		response;
 	uint32_t				message_size;
 	/**
 	 * The channel message that goes out on
 	 *  the "wire". It will contain at
 	 *  minimum the
 	 *  hv_vmbus_channel_msg_header
 	 * header.
 	 */
 	unsigned char 				msg[0];
 } hv_vmbus_channel_msg_info;
 
 /*
  * The format must be the same as hv_vm_data_gpa_direct
  */
 typedef struct hv_vmbus_channel_packet_page_buffer {
 	uint16_t		type;
 	uint16_t		data_offset8;
 	uint16_t		length8;
 	uint16_t		flags;
 	uint64_t		transaction_id;
 	uint32_t		reserved;
 	uint32_t		range_count;
 	hv_vmbus_page_buffer	range[HV_MAX_PAGE_BUFFER_COUNT];
 } __packed hv_vmbus_channel_packet_page_buffer;
 
 /*
  * The format must be the same as hv_vm_data_gpa_direct
  */
 typedef struct hv_vmbus_channel_packet_multipage_buffer {
 	uint16_t 			type;
 	uint16_t 			data_offset8;
 	uint16_t 			length8;
 	uint16_t 			flags;
 	uint64_t			transaction_id;
 	uint32_t 			reserved;
 	uint32_t			range_count; /* Always 1 in this case */
 	hv_vmbus_multipage_buffer	range;
 } __packed hv_vmbus_channel_packet_multipage_buffer;
 
 enum {
 	HV_VMBUS_MESSAGE_CONNECTION_ID	= 1,
 	HV_VMBUS_MESSAGE_PORT_ID	= 1,
 	HV_VMBUS_EVENT_CONNECTION_ID	= 2,
 	HV_VMBUS_EVENT_PORT_ID		= 2,
 	HV_VMBUS_MONITOR_CONNECTION_ID	= 3,
 	HV_VMBUS_MONITOR_PORT_ID	= 3,
 	HV_VMBUS_MESSAGE_SINT		= 2
 };
 
 #define HV_PRESENT_BIT		0x80000000
 
 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
 
-/*
- *  Connection identifier type
- */
-typedef union {
-	uint32_t		as_uint32_t;
-	struct {
-		uint32_t	id:24;
-		uint32_t	reserved:8;
-	} u;
-
-} __packed hv_vmbus_connection_id;
-
-/*
- * Definition of the hv_vmbus_signal_event hypercall input structure
- */
 typedef struct {
-	hv_vmbus_connection_id	connection_id;
-	uint16_t		flag_number;
-	uint16_t		rsvd_z;
-} __packed hv_vmbus_input_signal_event;
-
-typedef struct {
-	uint64_t			align8;
-	hv_vmbus_input_signal_event	event;
-} __packed hv_vmbus_input_signal_event_buffer;
-
-typedef struct {
 	uint64_t	guest_id;
 	void*		hypercall_page;
 	hv_bool_uint8_t	syn_ic_initialized;
+
+	hv_vmbus_handle	syn_ic_msg_page[MAXCPU];
+	hv_vmbus_handle	syn_ic_event_page[MAXCPU];
 	/*
-	 * This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall.
-	 * The input param is immutable  in our usage and
-	 * must be dynamic mem (vs stack or global).
+	 * For FreeBSD cpuid to Hyper-V vcpuid mapping.
 	 */
-	hv_vmbus_input_signal_event_buffer	*signal_event_buffer;
+	uint32_t	hv_vcpu_index[MAXCPU];
 	/*
-	 * 8-bytes aligned of the buffer above
+	 * Each cpu has its own software interrupt handler for channel
+	 * event and msg handling.
 	 */
-	hv_vmbus_input_signal_event		*signal_event_param;
-
-	hv_vmbus_handle	syn_ic_msg_page[MAXCPU];
-	hv_vmbus_handle	syn_ic_event_page[MAXCPU];
+	struct intr_event		*hv_event_intr_event[MAXCPU];
+	struct intr_event		*hv_msg_intr_event[MAXCPU];
+	void				*event_swintr[MAXCPU];
+	void				*msg_swintr[MAXCPU];
+	/*
+	 * Host use this vector to intrrupt guest for vmbus channel
+	 * event and msg.
+	 */
+	unsigned int			hv_cb_vector;
 } hv_vmbus_context;
 
 /*
  * Define hypervisor message types
  */
 typedef enum {
 
 	HV_MESSAGE_TYPE_NONE				= 0x00000000,
 
 	/*
 	 * Memory access messages
 	 */
 	HV_MESSAGE_TYPE_UNMAPPED_GPA			= 0x80000000,
 	HV_MESSAGE_TYPE_GPA_INTERCEPT			= 0x80000001,
 
 	/*
 	 * Timer notification messages
 	 */
 	HV_MESSAGE_TIMER_EXPIRED			= 0x80000010,
 
 	/*
 	 * Error messages
 	 */
 	HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE	= 0x80000020,
 	HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION		= 0x80000021,
 	HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE		= 0x80000022,
 
 	/*
 	 * Trace buffer complete messages
 	 */
 	HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE	= 0x80000040,
 
 	/*
 	 * Platform-specific processor intercept messages
 	 */
 	HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT		= 0x80010000,
 	HV_MESSAGE_TYPE_X64_MSR_INTERCEPT		= 0x80010001,
 	HV_MESSAGE_TYPE_X64_CPU_INTERCEPT		= 0x80010002,
 	HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT		= 0x80010003,
 	HV_MESSAGE_TYPE_X64_APIC_EOI			= 0x80010004,
 	HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR		= 0x80010005
 
 } hv_vmbus_msg_type;
 
 /*
  * Define port identifier type
  */
 typedef union _hv_vmbus_port_id {
 	uint32_t	as_uint32_t;
 	struct {
 		uint32_t	id:24;
 		uint32_t	reserved:8;
 	} u ;
 } hv_vmbus_port_id;
 
 /*
  * Define synthetic interrupt controller message flag
  */
 typedef union {
 	uint8_t	as_uint8_t;
 	struct {
 		uint8_t	message_pending:1;
 		uint8_t	reserved:7;
 	} u;
 } hv_vmbus_msg_flags;
 
 typedef uint64_t hv_vmbus_partition_id;
 
 /*
  * Define synthetic interrupt controller message header
  */
 typedef struct {
 	hv_vmbus_msg_type	message_type;
 	uint8_t			payload_size;
 	hv_vmbus_msg_flags	message_flags;
 	uint8_t			reserved[2];
 	union {
 		hv_vmbus_partition_id	sender;
 		hv_vmbus_port_id	port;
 	} u;
 } hv_vmbus_msg_header;
 
 /*
  *  Define synthetic interrupt controller message format
  */
 typedef struct {
 	hv_vmbus_msg_header	header;
 	union {
 		uint64_t	payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 	} u ;
 } hv_vmbus_message;
 
 /*
  *  Maximum channels is determined by the size of the interrupt
  *  page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for
  *  send endpoint interrupt and the other is receive
  *  endpoint interrupt.
  *
  *   Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels
  */
 #define HV_MAX_NUM_CHANNELS			(PAGE_SIZE >> 1) << 3
 
 /*
  * (The value here must be in multiple of 32)
  */
 #define HV_MAX_NUM_CHANNELS_SUPPORTED		256
 
 /*
  * VM Bus connection states
  */
 typedef enum {
 	HV_DISCONNECTED,
 	HV_CONNECTING,
 	HV_CONNECTED,
 	HV_DISCONNECTING
 } hv_vmbus_connect_state;
 
 #define HV_MAX_SIZE_CHANNEL_MESSAGE	HV_MESSAGE_PAYLOAD_BYTE_COUNT
 
 
 typedef struct {
 	hv_vmbus_connect_state			connect_state;
 	uint32_t				next_gpadl_handle;
 	/**
 	 * Represents channel interrupts. Each bit position
 	 * represents a channel.
 	 * When a channel sends an interrupt via VMBUS, it
 	 * finds its bit in the send_interrupt_page, set it and
 	 * calls Hv to generate a port event. The other end
 	 * receives the port event and parse the
 	 * recv_interrupt_page to see which bit is set
 	 */
 	void					*interrupt_page;
 	void					*send_interrupt_page;
 	void					*recv_interrupt_page;
 	/*
 	 * 2 pages - 1st page for parent->child
 	 * notification and 2nd is child->parent
 	 * notification
 	 */
 	void					*monitor_pages;
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info)	channel_msg_anchor;
 	struct mtx				channel_msg_lock;
 	/**
-	 * List of channels
+	 * List of primary channels. Sub channels will be linked
+	 * under their primary channel.
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel)		channel_anchor;
 	struct mtx				channel_lock;
 
 	hv_vmbus_handle				work_queue;
 	struct sema				control_sema;
 } hv_vmbus_connection;
 
 /*
  * Declare the MSR used to identify the guest OS
  */
 #define HV_X64_MSR_GUEST_OS_ID	0x40000000
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t build_number		: 16;
 		uint64_t service_version	: 8; /* Service Pack, etc. */
 		uint64_t minor_version		: 8;
 		uint64_t major_version		: 8;
 		/*
 		 * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS)
 		 * HV_GUEST_OS_VENDOR
 		 */
 		uint64_t os_id			: 8;
 		uint64_t vendor_id		: 16;
 	} u;
 } hv_vmbus_x64_msr_guest_os_id_contents;
 
 /*
  *  Declare the MSR used to setup pages used to communicate with the hypervisor
  */
 #define HV_X64_MSR_HYPERCALL	0x40000001
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t enable :1;
 		uint64_t reserved :11;
 		uint64_t guest_physical_address :52;
 	} u;
 } hv_vmbus_x64_msr_hypercall_contents;
 
 typedef union {
 	uint32_t as_uint32_t;
 	struct {
 		uint32_t group_enable :4;
 		uint32_t rsvd_z :28;
 	} u;
 } hv_vmbus_monitor_trigger_state;
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint32_t pending;
 		uint32_t armed;
 	} u;
 } hv_vmbus_monitor_trigger_group;
 
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint16_t		flag_number;
 	uint16_t		rsvd_z;
 } hv_vmbus_monitor_parameter;
 
 /*
  * hv_vmbus_monitor_page Layout
  * ------------------------------------------------------
  * | 0   | trigger_state (4 bytes) | Rsvd1 (4 bytes)     |
  * | 8   | trigger_group[0]                              |
  * | 10  | trigger_group[1]                              |
  * | 18  | trigger_group[2]                              |
  * | 20  | trigger_group[3]                              |
  * | 28  | Rsvd2[0]                                      |
  * | 30  | Rsvd2[1]                                      |
  * | 38  | Rsvd2[2]                                      |
  * | 40  | next_check_time[0][0] | next_check_time[0][1] |
  * | ...                                                 |
  * | 240 | latency[0][0..3]                              |
  * | 340 | Rsvz3[0]                                      |
  * | 440 | parameter[0][0]                               |
  * | 448 | parameter[0][1]                               |
  * | ...                                                 |
  * | 840 | Rsvd4[0]                                      |
  * ------------------------------------------------------
  */
 
 typedef struct {
 	hv_vmbus_monitor_trigger_state	trigger_state;
 	uint32_t			rsvd_z1;
 
 	hv_vmbus_monitor_trigger_group	trigger_group[4];
 	uint64_t			rsvd_z2[3];
 
 	int32_t				next_check_time[4][32];
 
 	uint16_t			latency[4][32];
 	uint64_t			rsvd_z3[32];
 
 	hv_vmbus_monitor_parameter	parameter[4][32];
 
 	uint8_t				rsvd_z4[1984];
 } hv_vmbus_monitor_page;
 
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
  * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES).
  */
 typedef enum {
 	HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES			= 0x00000001,
 	HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION		= 0x40000000,
 	HV_CPU_ID_FUNCTION_HV_INTERFACE				= 0x40000001,
 	/*
 	 * The remaining functions depend on the value
 	 * of hv_cpu_id_function_interface
 	 */
 	HV_CPU_ID_FUNCTION_MS_HV_VERSION			= 0x40000002,
 	HV_CPU_ID_FUNCTION_MS_HV_FEATURES			= 0x40000003,
 	HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION	= 0x40000004,
 	HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS		= 0x40000005
 
 } hv_vmbus_cpuid_function;
 
 /*
  * Define the format of the SIMP register
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t simp_enabled	: 1;
 		uint64_t preserved	: 11;
 		uint64_t base_simp_gpa	: 52;
 	} u;
 } hv_vmbus_synic_simp;
 
 /*
  * Define the format of the SIEFP register
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t siefp_enabled	: 1;
 		uint64_t preserved	: 11;
 		uint64_t base_siefp_gpa	: 52;
 	} u;
 } hv_vmbus_synic_siefp;
 
 /*
  * Define synthetic interrupt source
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t vector		: 8;
 		uint64_t reserved1	: 8;
 		uint64_t masked		: 1;
 		uint64_t auto_eoi	: 1;
 		uint64_t reserved2	: 46;
 	} u;
 } hv_vmbus_synic_sint;
 
 /*
  * Define syn_ic control register
  */
 typedef union _hv_vmbus_synic_scontrol {
     uint64_t as_uint64_t;
     struct {
         uint64_t enable		: 1;
         uint64_t reserved	: 63;
     } u;
 } hv_vmbus_synic_scontrol;
 
 /*
  *  Define the hv_vmbus_post_message hypercall input structure
  */
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint32_t		reserved;
 	hv_vmbus_msg_type	message_type;
 	uint32_t		payload_size;
 	uint64_t		payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 } hv_vmbus_input_post_message;
 
 /*
  * Define the synthetic interrupt controller event flags format
  */
 typedef union {
 	uint8_t		flags8[HV_EVENT_FLAGS_BYTE_COUNT];
 	uint32_t	flags32[HV_EVENT_FLAGS_DWORD_COUNT];
 } hv_vmbus_synic_event_flags;
 
+/* MSR used to provide vcpu index */
+#define	HV_X64_MSR_VP_INDEX   (0x40000002)
 
 /*
  * Define synthetic interrupt controller model specific registers
  */
 #define HV_X64_MSR_SCONTROL   (0x40000080)
 #define HV_X64_MSR_SVERSION   (0x40000081)
 #define HV_X64_MSR_SIEFP      (0x40000082)
 #define HV_X64_MSR_SIMP       (0x40000083)
 #define HV_X64_MSR_EOM        (0x40000084)
 
 #define HV_X64_MSR_SINT0      (0x40000090)
 #define HV_X64_MSR_SINT1      (0x40000091)
 #define HV_X64_MSR_SINT2      (0x40000092)
 #define HV_X64_MSR_SINT3      (0x40000093)
 #define HV_X64_MSR_SINT4      (0x40000094)
 #define HV_X64_MSR_SINT5      (0x40000095)
 #define HV_X64_MSR_SINT6      (0x40000096)
 #define HV_X64_MSR_SINT7      (0x40000097)
 #define HV_X64_MSR_SINT8      (0x40000098)
 #define HV_X64_MSR_SINT9      (0x40000099)
 #define HV_X64_MSR_SINT10     (0x4000009A)
 #define HV_X64_MSR_SINT11     (0x4000009B)
 #define HV_X64_MSR_SINT12     (0x4000009C)
 #define HV_X64_MSR_SINT13     (0x4000009D)
 #define HV_X64_MSR_SINT14     (0x4000009E)
 #define HV_X64_MSR_SINT15     (0x4000009F)
 
 /*
  * Declare the various hypercall operations
  */
 typedef enum {
 	HV_CALL_POST_MESSAGE	= 0x005c,
 	HV_CALL_SIGNAL_EVENT	= 0x005d,
 } hv_vmbus_call_code;
 
 /**
  * Global variables
  */
 
 extern hv_vmbus_context		hv_vmbus_g_context;
 extern hv_vmbus_connection	hv_vmbus_g_connection;
 
 
 /*
  * Private, VM Bus functions
  */
 
 int			hv_vmbus_ring_buffer_init(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len);
 
 void			hv_ring_buffer_cleanup(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 int			hv_ring_buffer_write(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				hv_vmbus_sg_buffer_list		sg_buffers[],
-				uint32_t			sg_buff_count);
+				uint32_t			sg_buff_count,
+				boolean_t			*need_sig);
 
 int			hv_ring_buffer_peek(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len);
 
 int			hv_ring_buffer_read(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len,
 				uint32_t			offset);
 
 uint32_t		hv_vmbus_get_ring_buffer_interrupt_mask(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 void			hv_vmbus_dump_ring_info(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				char				*prefix);
 
+void			hv_ring_buffer_read_begin(
+				hv_vmbus_ring_buffer_info	*ring_info);
+
+uint32_t		hv_ring_buffer_read_end(
+				hv_vmbus_ring_buffer_info	*ring_info);
+
 hv_vmbus_channel*	hv_vmbus_allocate_channel(void);
 void			hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
 void			hv_vmbus_on_channel_message(void *context);
 int			hv_vmbus_request_channel_offers(void);
 void			hv_vmbus_release_unattached_channels(void);
 int			hv_vmbus_init(void);
 void			hv_vmbus_cleanup(void);
 
 uint16_t		hv_vmbus_post_msg_via_msg_ipc(
 				hv_vmbus_connection_id	connection_id,
 				hv_vmbus_msg_type	message_type,
 				void			*payload,
 				size_t			payload_size);
 
-uint16_t		hv_vmbus_signal_event(void);
+uint16_t		hv_vmbus_signal_event(void *con_id);
 void			hv_vmbus_synic_init(void *irq_arg);
 void			hv_vmbus_synic_cleanup(void *arg);
 int			hv_vmbus_query_hypervisor_presence(void);
 
 struct hv_device*	hv_vmbus_child_device_create(
 				hv_guid			device_type,
 				hv_guid			device_instance,
 				hv_vmbus_channel	*channel);
 
 int			hv_vmbus_child_device_register(
 					struct hv_device *child_dev);
 int			hv_vmbus_child_device_unregister(
 					struct hv_device *child_dev);
 hv_vmbus_channel*	hv_vmbus_get_channel_from_rel_id(uint32_t rel_id);
 
 /**
  * Connection interfaces
  */
 int			hv_vmbus_connect(void);
 int			hv_vmbus_disconnect(void);
 int			hv_vmbus_post_message(void *buffer, size_t buf_size);
-int			hv_vmbus_set_event(uint32_t child_rel_id);
+int			hv_vmbus_set_event(hv_vmbus_channel *channel);
 void			hv_vmbus_on_events(void *);
 
 
 /*
  * The guest OS needs to register the guest ID with the hypervisor.
  * The guest ID is a 64 bit entity and the structure of this ID is
  * specified in the Hyper-V specification:
  *
  * http://msdn.microsoft.com/en-us/library/windows/
  * hardware/ff542653%28v=vs.85%29.aspx
  *
  * While the current guideline does not specify how FreeBSD guest ID(s)
  * need to be generated, our plan is to publish the guidelines for
  * FreeBSD and other guest operating systems that currently are hosted
  * on Hyper-V. The implementation here conforms to this yet
  * unpublished guidelines.
  *
  * Bit(s)
  * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
  * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200
  * 55:48 - Distro specific identification
  * 47:16 - FreeBSD kernel version number
  * 15:0  - Distro specific identification
  *
  */
 
 #define HV_FREEBSD_VENDOR_ID	0x8200
 #define HV_FREEBSD_GUEST_ID	hv_generate_guest_id(0,0)
 
 static inline  uint64_t hv_generate_guest_id(
 	uint8_t distro_id_part1,
 	uint16_t distro_id_part2)
 {
 	uint64_t guest_id;
 	guest_id =  (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48);
 	guest_id |= (((uint64_t)(distro_id_part1)) << 48);
 	guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */
 	guest_id |= ((uint64_t)(distro_id_part2));
 	return guest_id;
 }
 
 typedef struct {
 	unsigned int	vector;
-	void		*page_buffers[2];
+	void		*page_buffers[2 * MAXCPU];
 } hv_setup_args;
 
 #endif  /* __HYPERV_PRIV_H__ */
Index: head/sys/i386/conf/GENERIC
===================================================================
--- head/sys/i386/conf/GENERIC	(revision 282211)
+++ head/sys/i386/conf/GENERIC	(revision 282212)
@@ -1,370 +1,372 @@
 #
 # GENERIC -- Generic kernel configuration file for FreeBSD/i386
 #
 # For more information on this file, please read the config(5) manual page,
 # and/or the handbook section on Kernel Configuration Files:
 #
 #    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the ../../conf/NOTES and NOTES files.
 # If you are in doubt as to the purpose or necessity of a line, check first
 # in NOTES.
 #
 # $FreeBSD$
 
 cpu		I486_CPU
 cpu		I586_CPU
 cpu		I686_CPU
 ident		GENERIC
 
 makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
 makeoptions	WITH_CTF=1		# Run ctfconvert(1) for DTrace support
 
 options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption
 options 	INET			# InterNETworking
 options 	INET6			# IPv6 communications protocols
 options 	TCP_OFFLOAD		# TCP offload
 options 	SCTP			# Stream Control Transmission Protocol
 options 	FFS			# Berkeley Fast Filesystem
 options 	SOFTUPDATES		# Enable FFS soft updates support
 options 	UFS_ACL			# Support for access control lists
 options 	UFS_DIRHASH		# Improve performance on big directories
 options 	UFS_GJOURNAL		# Enable gjournal-based UFS journaling
 options 	QUOTA			# Enable disk quotas for UFS
 options 	MD_ROOT			# MD is a potential root device
 options 	NFSCL			# Network Filesystem Client
 options 	NFSD			# Network Filesystem Server
 options 	NFSLOCKD		# Network Lock Manager
 options 	NFS_ROOT		# NFS usable as /, requires NFSCL
 options 	MSDOSFS			# MSDOS Filesystem
 options 	CD9660			# ISO 9660 Filesystem
 options 	PROCFS			# Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		# Pseudo-filesystem framework
 options 	GEOM_PART_GPT		# GUID Partition Tables.
 options 	GEOM_RAID		# Soft RAID functionality.
 options 	GEOM_LABEL		# Provides labelization
 options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
 options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
 options 	COMPAT_FREEBSD6		# Compatible with FreeBSD6
 options 	COMPAT_FREEBSD7		# Compatible with FreeBSD7
 options 	COMPAT_FREEBSD9		# Compatible with FreeBSD9
 options 	COMPAT_FREEBSD10	# Compatible with FreeBSD10
 options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
 options 	KTRACE			# ktrace(1) support
 options 	STACK			# stack(9) support
 options 	SYSVSHM			# SYSV-style shared memory
 options 	SYSVMSG			# SYSV-style message queues
 options 	SYSVSEM			# SYSV-style semaphores
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	CAPABILITY_MODE		# Capsicum capability mode
 options 	CAPABILITIES		# Capsicum capabilities
 options 	MAC			# TrustedBSD MAC Framework
 options 	KDTRACE_HOOKS		# Kernel DTrace hooks
 options 	DDB_CTF			# Kernel ELF linker loads CTF data
 options 	INCLUDE_CONFIG_FILE	# Include this file in kernel
 
 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
 options 	KDB_TRACE		# Print a stack trace for a panic.
 # For full debugger support use (turn off in stable branch):
 options 	DDB			# Support DDB.
 options 	GDB			# Support remote GDB.
 options 	DEADLKRES		# Enable the deadlock resolver
 options 	INVARIANTS		# Enable calls of extra sanity checking
 options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
 options 	WITNESS			# Enable checks to detect deadlocks and cycles
 options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
 options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
 
 # To make an SMP kernel, the next two lines are needed
 options 	SMP			# Symmetric MultiProcessor Kernel
 device		apic			# I/O APIC
 
 # CPU frequency control
 device		cpufreq
 
 # Bus support.
 device		acpi
 device		pci
 options		PCI_IOV			# PCI SR-IOV support
 
 # Floppy drives
 device		fdc
 
 # ATA controllers
 device		ahci			# AHCI-compatible SATA controllers
 device		ata			# Legacy ATA/SATA controllers
 options 	ATA_STATIC_ID		# Static device numbering
 device		mvs			# Marvell 88SX50XX/88SX60XX/88SX70XX/SoC SATA
 device		siis			# SiliconImage SiI3124/SiI3132/SiI3531 SATA
 
 # SCSI Controllers
 device		ahc			# AHA2940 and onboard AIC7xxx devices
 options 	AHC_REG_PRETTY_PRINT	# Print register bitfields in debug
 					# output.  Adds ~128k to driver.
 device		ahd			# AHA39320/29320 and onboard AIC79xx devices
 options 	AHD_REG_PRETTY_PRINT	# Print register bitfields in debug
 					# output.  Adds ~215k to driver.
 device		esp			# AMD Am53C974 (Tekram DC-390(T))
 device		hptiop			# Highpoint RocketRaid 3xxx series
 device		isp			# Qlogic family
 #device		ispfw			# Firmware for QLogic HBAs- normally a module
 device		mpt			# LSI-Logic MPT-Fusion
 device		mps			# LSI-Logic MPT-Fusion 2
 device		mpr			# LSI-Logic MPT-Fusion 3
 #device		ncr			# NCR/Symbios Logic
 device		sym			# NCR/Symbios Logic (newer chipsets + those of `ncr')
 device		trm			# Tekram DC395U/UW/F DC315U adapters
 
 device		adv			# Advansys SCSI adapters
 device		adw			# Advansys wide SCSI adapters
 device		aha			# Adaptec 154x SCSI adapters
 device		aic			# Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
 device		bt			# Buslogic/Mylex MultiMaster SCSI adapters
 
 device		ncv			# NCR 53C500
 device		nsp			# Workbit Ninja SCSI-3
 device		stg			# TMC 18C30/18C50
 device		isci			# Intel C600 SAS controller
 
 # ATA/SCSI peripherals
 device		scbus			# SCSI bus (required for ATA/SCSI)
 device		ch			# SCSI media changers
 device		da			# Direct Access (disks)
 device		sa			# Sequential Access (tape etc)
 device		cd			# CD
 device		pass			# Passthrough device (direct ATA/SCSI access)
 device		ses			# Enclosure Services (SES and SAF-TE)
 #device		ctl			# CAM Target Layer
 
 # RAID controllers interfaced to the SCSI subsystem
 device		amr			# AMI MegaRAID
 device		arcmsr			# Areca SATA II RAID
 device		ciss			# Compaq Smart RAID 5*
 device		dpt			# DPT Smartcache III, IV - See NOTES for options
 device		hptmv			# Highpoint RocketRAID 182x
 device		hptnr			# Highpoint DC7280, R750
 device		hptrr			# Highpoint RocketRAID 17xx, 22xx, 23xx, 25xx
 device		hpt27xx			# Highpoint RocketRAID 27xx
 device		iir			# Intel Integrated RAID
 device		ips			# IBM (Adaptec) ServeRAID
 device		mly			# Mylex AcceleRAID/eXtremeRAID
 device		twa			# 3ware 9000 series PATA/SATA RAID
 device		tws			# LSI 3ware 9750 SATA+SAS 6Gb/s RAID controller
 
 # RAID controllers
 device		aac			# Adaptec FSA RAID
 device		aacp			# SCSI passthrough for aac (requires CAM)
 device		aacraid			# Adaptec by PMC RAID
 device		ida			# Compaq Smart RAID
 device		mfi			# LSI MegaRAID SAS
 device		mlx			# Mylex DAC960 family
 device		mrsas			# LSI/Avago MegaRAID SAS/SATA, 6Gb/s and 12Gb/s
 device		pst			# Promise Supertrak SX6000
 device		twe			# 3ware ATA RAID
 
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device		atkbdc			# AT keyboard controller
 device		atkbd			# AT keyboard
 device		psm			# PS/2 mouse
 
 device		kbdmux			# keyboard multiplexer
 
 device		vga			# VGA video card driver
 options 	VESA			# Add support for VESA BIOS Extensions (VBE)
 
 device		splash			# Splash screen and screen saver support
 
 # syscons is the default console driver, resembling an SCO console
 device		sc
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # vt is the new video console driver
 device		vt
 device		vt_vga
 
 device		agp			# support several AGP chipsets
 
 # Power management support (see NOTES for more options)
 #device		apm
 # Add suspend/resume support for the i8254.
 device		pmtimer
 
 # PCCARD (PCMCIA) support
 # PCMCIA and cardbus bridge support
 device		cbb			# cardbus (yenta) bridge
 device		pccard			# PC Card (16-bit) bus
 device		cardbus			# CardBus (32-bit) bus
 
 # Serial (COM) ports
 device		uart			# Generic UART driver
 
 # Parallel port
 device		ppc
 device		ppbus			# Parallel port bus (required)
 device		lpt			# Printer
 device		ppi			# Parallel port interface device
 #device		vpo			# Requires scbus and da
 
 device		puc			# Multi I/O cards and multi-channel UARTs
 
 # PCI Ethernet NICs.
 device		bxe			# Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE
 device		de			# DEC/Intel DC21x4x (``Tulip'')
 device		em			# Intel PRO/1000 Gigabit Ethernet Family
 device		igb			# Intel PRO/1000 PCIE Server Gigabit Family
 device		ixgb			# Intel PRO/10GbE Ethernet Card
 device		le			# AMD Am7900 LANCE and Am79C9xx PCnet
 device		ti			# Alteon Networks Tigon I/II gigabit Ethernet
 device		txp			# 3Com 3cR990 (``Typhoon'')
 device		vx			# 3Com 3c590, 3c595 (``Vortex'')
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
 device		miibus			# MII bus support
 device		ae			# Attansic/Atheros L2 FastEthernet
 device		age			# Attansic/Atheros L1 Gigabit Ethernet
 device		alc			# Atheros AR8131/AR8132 Ethernet
 device		ale			# Atheros AR8121/AR8113/AR8114 Ethernet
 device		bce			# Broadcom BCM5706/BCM5708 Gigabit Ethernet
 device		bfe			# Broadcom BCM440x 10/100 Ethernet
 device		bge			# Broadcom BCM570xx Gigabit Ethernet
 device		cas			# Sun Cassini/Cassini+ and NS DP83065 Saturn
 device		dc			# DEC/Intel 21143 and various workalikes
 device		et			# Agere ET1310 10/100/Gigabit Ethernet
 device		fxp			# Intel EtherExpress PRO/100B (82557, 82558)
 device		gem			# Sun GEM/Sun ERI/Apple GMAC
 device		hme			# Sun HME (Happy Meal Ethernet)
 device		jme			# JMicron JMC250 Gigabit/JMC260 Fast Ethernet
 device		lge			# Level 1 LXT1001 gigabit Ethernet
 device		msk			# Marvell/SysKonnect Yukon II Gigabit Ethernet
 device		nfe			# nVidia nForce MCP on-board Ethernet
 device		nge			# NatSemi DP83820 gigabit Ethernet
 device		pcn			# AMD Am79C97x PCI 10/100 (precedence over 'le')
 device		re			# RealTek 8139C+/8169/8169S/8110S
 device		rl			# RealTek 8129/8139
 device		sf			# Adaptec AIC-6915 (``Starfire'')
 device		sge			# Silicon Integrated Systems SiS190/191
 device		sis			# Silicon Integrated Systems SiS 900/SiS 7016
 device		sk			# SysKonnect SK-984x & SK-982x gigabit Ethernet
 device		ste			# Sundance ST201 (D-Link DFE-550TX)
 device		stge			# Sundance/Tamarack TC9021 gigabit Ethernet
 device		tl			# Texas Instruments ThunderLAN
 device		tx			# SMC EtherPower II (83c170 ``EPIC'')
 device		vge			# VIA VT612x gigabit Ethernet
 device		vr			# VIA Rhine, Rhine II
 device		vte			# DM&P Vortex86 RDC R6040 Fast Ethernet
 device		wb			# Winbond W89C840F
 device		xl			# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # ISA Ethernet NICs.  pccard NICs included.
 device		cs			# Crystal Semiconductor CS89x0 NIC
 # 'device ed' requires 'device miibus'
 device		ed			# NE[12]000, SMC Ultra, 3c503, DS8390 cards
 device		ex			# Intel EtherExpress Pro/10 and Pro/10+
 device		ep			# Etherlink III based cards
 device		fe			# Fujitsu MB8696x based cards
 device		ie			# EtherExpress 8/16, 3C507, StarLAN 10 etc.
 device		sn			# SMC's 9000 series of Ethernet chips
 device		xe			# Xircom pccard Ethernet
 
 # Wireless NIC cards
 device		wlan			# 802.11 support
 options 	IEEE80211_DEBUG		# enable debug msgs
 options 	IEEE80211_AMPDU_AGE	# age frames in AMPDU reorder q's
 options 	IEEE80211_SUPPORT_MESH	# enable 802.11s draft support
 device		wlan_wep		# 802.11 WEP support
 device		wlan_ccmp		# 802.11 CCMP support
 device		wlan_tkip		# 802.11 TKIP support
 device		wlan_amrr		# AMRR transmit rate control algorithm
 device		an			# Aironet 4500/4800 802.11 wireless NICs.
 device		ath			# Atheros NICs
 device		ath_pci			# Atheros pci/cardbus glue
 device		ath_hal			# pci/cardbus chip support
 options 	AH_SUPPORT_AR5416	# enable AR5416 tx/rx descriptors
 options 	AH_AR5416_INTERRUPT_MITIGATION # AR5416 interrupt mitigation
 options 	ATH_ENABLE_11N		# Enable 802.11n support for AR5416 and later
 device		ath_rate_sample		# SampleRate tx rate control for ath
 #device		bwi			# Broadcom BCM430x/BCM431x wireless NICs.
 #device		bwn			# Broadcom BCM43xx wireless NICs.
 device		ipw			# Intel 2100 wireless NICs.
 device		iwi			# Intel 2200BG/2225BG/2915ABG wireless NICs.
 device		iwn			# Intel 4965/1000/5000/6000 wireless NICs.
 device		malo			# Marvell Libertas wireless NICs.
 device		mwl			# Marvell 88W8363 802.11n wireless NICs.
 device		ral			# Ralink Technology RT2500 wireless NICs.
 device		wi			# WaveLAN/Intersil/Symbol 802.11 wireless NICs.
 #device		wl			# Older non 802.11 Wavelan wireless NIC.
 device		wpi			# Intel 3945ABG wireless NICs.
 
 # Pseudo devices.
 device		loop			# Network loopback
 device		random			# Entropy device
 device		padlock_rng		# VIA Padlock RNG
 device		rdrand_rng		# Intel Bull Mountain RNG
 device		ether			# Ethernet support
 device		vlan			# 802.1Q VLAN support
 device		tun			# Packet tunnel.
 device		md			# Memory "disks"
 device		gif			# IPv6 and IPv4 tunneling
 device		firmware		# firmware assist module
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 # Note that 'bpf' is required for DHCP.
 device		bpf			# Berkeley packet filter
 
 # USB support
 options 	USB_DEBUG		# enable debug msgs
 device		uhci			# UHCI PCI->USB interface
 device		ohci			# OHCI PCI->USB interface
 device		ehci			# EHCI PCI->USB interface (USB 2.0)
 device		xhci			# XHCI PCI->USB interface (USB 3.0)
 device		usb			# USB Bus (required)
 device		ukbd			# Keyboard
 device		umass			# Disks/Mass storage - Requires scbus and da
 
 # Sound support
 device		sound			# Generic sound driver (required)
 device		snd_cmi			# CMedia CMI8338/CMI8738
 device		snd_csa			# Crystal Semiconductor CS461x/428x
 device		snd_emu10kx		# Creative SoundBlaster Live! and Audigy
 device		snd_es137x		# Ensoniq AudioPCI ES137x
 device		snd_hda			# Intel High Definition Audio
 device		snd_ich			# Intel, NVidia and other ICH AC'97 Audio
 device		snd_via8233		# VIA VT8233x Audio
 
 # MMC/SD
 device		mmc			# MMC/SD bus
 device		mmcsd			# MMC/SD memory card
 device		sdhci			# Generic PCI SD Host Controller
 
 # VirtIO support
 device		virtio			# Generic VirtIO bus (required)
 device		virtio_pci		# VirtIO PCI device
 device		vtnet			# VirtIO Ethernet device
 device		virtio_blk		# VirtIO Block device
 device		virtio_scsi		# VirtIO SCSI device
 device		virtio_balloon		# VirtIO Memory Balloon device
 
-# HyperV drivers
+# HyperV drivers and enchancement support
+# NOTE: HYPERV depends on hyperv.  They must be added or removed together.
+options 	HYPERV			# Hyper-V kernel infrastructure
 device		hyperv			# HyperV drivers 
 
 # Xen HVM Guest Optimizations
 # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
 options 	XENHVM			# Xen HVM kernel infrastructure
 device		xenpci			# Xen HVM Hypervisor services driver
 
 # VMware support
 device		vmx			# VMware VMXNET3 Ethernet
Index: head/sys/i386/i386/apic_vector.s
===================================================================
--- head/sys/i386/i386/apic_vector.s	(revision 282211)
+++ head/sys/i386/i386/apic_vector.s	(revision 282212)
@@ -1,323 +1,342 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
  * $FreeBSD$
  */
 
 /*
  * Interrupt entry points for external interrupts triggered by I/O APICs
  * as well as IPI handlers.
  */
 
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
 
 	.text
 	SUPERALIGN_TEXT
 	/* End Of Interrupt to APIC */
 as_lapic_eoi:
 	cmpl	$0,x2apic_mode
 	jne	1f
 	movl	lapic_map,%eax
 	movl	$0,LA_EOI(%eax)
 	ret
 1:
 	movl	$MSR_APIC_EOI,%ecx
 	xorl	%eax,%eax
 	xorl	%edx,%edx
 	wrmsr
 	ret
 
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
  * in the ISR.  The handler determines the highest bit set in the ISR,
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
 #define	ISR_VEC(index, vec_name)					\
 	.text ;								\
 	SUPERALIGN_TEXT ;						\
 IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	SET_KERNEL_SREGS ;						\
 	cld ;								\
 	FAKE_MCOUNT(TF_EIP(%esp)) ;					\
 	cmpl	$0,x2apic_mode ;					\
 	je	1f ;							\
 	movl	$(MSR_APIC_ISR0 + index),%ecx ;				\
 	rdmsr ;								\
 	jmp	2f ;							\
 1: ;									\
 	movl	lapic_map, %edx ;/* pointer to local APIC */		\
 	movl	LA_ISR + 16 * (index)(%edx), %eax ;	/* load ISR */	\
 2: ;									\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
 	jz	3f ;							\
 	addl	$(32 * index),%eax ;					\
 	pushl	%esp		;                                       \
 	pushl	%eax ;		/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
 	addl	$8, %esp ;	/* discard parameter */			\
 3: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
 /*
  * Handle "spurious INTerrupts".
  * Notes:
  *  This is different than the "spurious INTerrupt" generated by an
  *   8259 PIC for missing INTs.  See the APIC documentation for details.
  *  This routine should NOT do an 'EOI' cycle.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(spuriousint)
 
 	/* No EOI cycle used here */
 
 	iret
 
 	ISR_VEC(1, apic_isr1)
 	ISR_VEC(2, apic_isr2)
 	ISR_VEC(3, apic_isr3)
 	ISR_VEC(4, apic_isr4)
 	ISR_VEC(5, apic_isr5)
 	ISR_VEC(6, apic_isr6)
 	ISR_VEC(7, apic_isr7)
 
 /*
  * Local APIC periodic timer handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(timerint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	lapic_handle_timer
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC CMCI handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cmcint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_cmc
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC error interrupt handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(errorint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_error
 	MEXITCOUNT
 	jmp	doreti
 
 #ifdef XENHVM
 /*
  * Xen event channel upcall interrupt handler.
  * Only used when the hypervisor supports direct vector callbacks.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(xen_intr_upcall)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	xen_intr_handle_upcall
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 #endif
 
+#ifdef HYPERV
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(hv_vmbus_callback)
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
+	FAKE_MCOUNT(TF_EIP(%esp))
+	pushl	%esp
+	call	hv_vector_handler
+	add	$4, %esp
+	MEXITCOUNT
+	jmp	doreti
+#endif
+
 #ifdef SMP
 /*
  * Global address space TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 invltlb_ret:
 	call	as_lapic_eoi
 	POP_FRAME
 	iret
 
 	SUPERALIGN_TEXT
 IDTVEC(invltlb)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invltlb_handler
 
 	jmp	invltlb_ret
 
 /*
  * Single page TLB shootdown
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlpg_handler
 
 	jmp	invltlb_ret
 
 /*
  * Page range TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlrng)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlrng_handler
 
 	jmp	invltlb_ret
 
 /*
  * Invalidate cache.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlcache)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlcache_handler
 
 	jmp	invltlb_ret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
 #ifndef XEN
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(ipi_intr_bitmap_handler)	
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	
 	FAKE_MCOUNT(TF_EIP(%esp))
 
 	call	ipi_bitmap_handler
 	MEXITCOUNT
 	jmp	doreti
 #endif
 /*
  * Executed by a CPU when it receives an IPI_STOP from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpustop)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	call	cpustop_handler
 
 	POP_FRAME
 	iret
 
 /*
  * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
  */
 #ifndef XEN
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpususpend)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	call	cpususpend_handler
 
 	POP_FRAME
 	jmp	doreti_iret
 #endif
 
 /*
  * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
  *
  * - Calls the generic rendezvous action function.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(rendezvous)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 #ifdef COUNT_IPIS
 	movl	PCPU(CPUID), %eax
 	movl	ipi_rendezvous_counts(,%eax,4), %eax
 	incl	(%eax)
 #endif
 	call	smp_rendezvous_action
 
 	call	as_lapic_eoi
 	POP_FRAME
 	iret
 	
 #endif /* SMP */
Index: head/sys/x86/include/apicvar.h
===================================================================
--- head/sys/x86/include/apicvar.h	(revision 282211)
+++ head/sys/x86/include/apicvar.h	(revision 282212)
@@ -1,466 +1,467 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _X86_APICVAR_H_
 #define _X86_APICVAR_H_
 
 /*
  * Local && I/O APIC variable definitions.
  */
 
 /*
  * Layout of local APIC interrupt vectors:
  *
  *	0xff (255)  +-------------+
  *                  |             | 15 (Spurious / IPIs / Local Interrupts)
  *	0xf0 (240)  +-------------+
  *                  |             | 14 (I/O Interrupts / Timer)
  *	0xe0 (224)  +-------------+
  *                  |             | 13 (I/O Interrupts)
  *	0xd0 (208)  +-------------+
  *                  |             | 12 (I/O Interrupts)
  *	0xc0 (192)  +-------------+
  *                  |             | 11 (I/O Interrupts)
  *	0xb0 (176)  +-------------+
  *                  |             | 10 (I/O Interrupts)
  *	0xa0 (160)  +-------------+
  *                  |             | 9 (I/O Interrupts)
  *	0x90 (144)  +-------------+
  *                  |             | 8 (I/O Interrupts / System Calls)
  *	0x80 (128)  +-------------+
  *                  |             | 7 (I/O Interrupts)
  *	0x70 (112)  +-------------+
  *                  |             | 6 (I/O Interrupts)
  *	0x60 (96)   +-------------+
  *                  |             | 5 (I/O Interrupts)
  *	0x50 (80)   +-------------+
  *                  |             | 4 (I/O Interrupts)
  *	0x40 (64)   +-------------+
  *                  |             | 3 (I/O Interrupts)
  *	0x30 (48)   +-------------+
  *                  |             | 2 (ATPIC Interrupts)
  *	0x20 (32)   +-------------+
  *                  |             | 1 (Exceptions, traps, faults, etc.)
  *	0x10 (16)   +-------------+
  *                  |             | 0 (Exceptions, traps, faults, etc.)
  *	0x00 (0)    +-------------+
  *
  * Note: 0x80 needs to be handled specially and not allocated to an
  * I/O device!
  */
 
 #define	MAX_APIC_ID	0xfe
 #define	APIC_ID_ALL	0xff
 
 /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */
 #define	APIC_IO_INTS	(IDT_IO_INTS + 16)
 #define	APIC_NUM_IOINTS	191
 
 /* The timer interrupt is used for clock handling and drives hardclock, etc. */
 #define	APIC_TIMER_INT	(APIC_IO_INTS + APIC_NUM_IOINTS)
 
 /*  
  ********************* !!! WARNING !!! ******************************
  * Each local apic has an interrupt receive fifo that is two entries deep
  * for each interrupt priority class (higher 4 bits of interrupt vector).
  * Once the fifo is full the APIC can no longer receive interrupts for this
  * class and sending IPIs from other CPUs will be blocked.
  * To avoid deadlocks there should be no more than two IPI interrupts
  * pending at the same time.
  * Currently this is guaranteed by dividing the IPIs in two groups that have 
  * each at most one IPI interrupt pending. The first group is protected by the
  * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user 
  * at a time) The second group uses a single interrupt and a bitmap to avoid
  * redundant IPI interrupts.
  */ 
 
 /* Interrupts for local APIC LVT entries other than the timer. */
 #define	APIC_LOCAL_INTS	240
 #define	APIC_ERROR_INT	APIC_LOCAL_INTS
 #define	APIC_THERMAL_INT (APIC_LOCAL_INTS + 1)
 #define	APIC_CMC_INT	(APIC_LOCAL_INTS + 2)
 #define	APIC_IPI_INTS	(APIC_LOCAL_INTS + 3)
 
 #define	IPI_RENDEZVOUS	(APIC_IPI_INTS)		/* Inter-CPU rendezvous. */
 #define	IPI_INVLTLB	(APIC_IPI_INTS + 1)	/* TLB Shootdown IPIs */
 #define	IPI_INVLPG	(APIC_IPI_INTS + 2)
 #define	IPI_INVLRNG	(APIC_IPI_INTS + 3)
 #define	IPI_INVLCACHE	(APIC_IPI_INTS + 4)
 /* Vector to handle bitmap based IPIs */
 #define	IPI_BITMAP_VECTOR	(APIC_IPI_INTS + 5) 
 
 /* IPIs handled by IPI_BITMAP_VECTOR */
 #define	IPI_AST		0 	/* Generate software trap. */
 #define IPI_PREEMPT     1
 #define IPI_HARDCLOCK   2
 #define IPI_BITMAP_LAST IPI_HARDCLOCK
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 6)	/* Stop CPU until restarted. */
 #define	IPI_SUSPEND	(APIC_IPI_INTS + 7)	/* Suspend CPU until restarted. */
 #ifdef __i386__
 #define	IPI_LAZYPMAP	(APIC_IPI_INTS + 8)	/* Lazy pmap release. */
 #define	IPI_DYN_FIRST	(APIC_IPI_INTS + 9)
 #else
 #define	IPI_DYN_FIRST	(APIC_IPI_INTS + 8)
 #endif
 #define	IPI_DYN_LAST	(254)			/* IPIs allocated at runtime */
 
 /*
  * IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since
  * it is delivered using an NMI anyways.
  */
 #define	IPI_STOP_HARD	255			/* Stop CPU with a NMI. */
 
 /*
  * The spurious interrupt can share the priority class with the IPIs since
  * it is not a normal interrupt. (Does not use the APIC's interrupt fifo)
  */
 #define	APIC_SPURIOUS_INT 255
 
 #ifndef LOCORE
 
 #define	APIC_IPI_DEST_SELF	-1
 #define	APIC_IPI_DEST_ALL	-2
 #define	APIC_IPI_DEST_OTHERS	-3
 
 #define	APIC_BUS_UNKNOWN	-1
 #define	APIC_BUS_ISA		0
 #define	APIC_BUS_EISA		1
 #define	APIC_BUS_PCI		2
 #define	APIC_BUS_MAX		APIC_BUS_PCI
 
 #define	IRQ_EXTINT		(NUM_IO_INTS + 1)
 #define	IRQ_NMI			(NUM_IO_INTS + 2)
 #define	IRQ_SMI			(NUM_IO_INTS + 3)
 #define	IRQ_DISABLED		(NUM_IO_INTS + 4)
 
 /*
  * An APIC enumerator is a psuedo bus driver that enumerates APIC's including
  * CPU's and I/O APIC's.
  */
 struct apic_enumerator {
 	const char *apic_name;
 	int (*apic_probe)(void);
 	int (*apic_probe_cpus)(void);
 	int (*apic_setup_local)(void);
 	int (*apic_setup_io)(void);
 	SLIST_ENTRY(apic_enumerator) apic_next;
 };
 
 inthand_t
 	IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3),
 	IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6),
 	IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint),
 	IDTVEC(spuriousint), IDTVEC(timerint);
 
 extern vm_paddr_t lapic_paddr;
 extern int apic_cpuids[];
 
 void	apic_register_enumerator(struct apic_enumerator *enumerator);
 void	*ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase);
 int	ioapic_disable_pin(void *cookie, u_int pin);
 int	ioapic_get_vector(void *cookie, u_int pin);
 void	ioapic_register(void *cookie);
 int	ioapic_remap_vector(void *cookie, u_int pin, int vector);
 int	ioapic_set_bus(void *cookie, u_int pin, int bus_type);
 int	ioapic_set_extint(void *cookie, u_int pin);
 int	ioapic_set_nmi(void *cookie, u_int pin);
 int	ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol);
 int	ioapic_set_triggermode(void *cookie, u_int pin,
 	    enum intr_trigger trigger);
 int	ioapic_set_smi(void *cookie, u_int pin);
 
 /*
  * Struct containing pointers to APIC functions whose
  * implementation is run time selectable.
  */
 struct apic_ops {
 	void	(*create)(u_int, int);
 	void	(*init)(vm_paddr_t);
 	void	(*xapic_mode)(void);
 	void	(*setup)(int);
 	void	(*dump)(const char *);
 	void	(*disable)(void);
 	void	(*eoi)(void);
 	int	(*id)(void);
 	int	(*intr_pending)(u_int);
 	void	(*set_logical_id)(u_int, u_int, u_int);
 	u_int	(*cpuid)(u_int);
 
 	/* Vectors */
 	u_int	(*alloc_vector)(u_int, u_int);
 	u_int	(*alloc_vectors)(u_int, u_int *, u_int, u_int);
 	void	(*enable_vector)(u_int, u_int);
 	void	(*disable_vector)(u_int, u_int);
 	void	(*free_vector)(u_int, u_int, u_int);
 
 
 	/* PMC */
 	int	(*enable_pmc)(void);
 	void	(*disable_pmc)(void);
 	void	(*reenable_pmc)(void);
 
 	/* CMC */
 	void	(*enable_cmc)(void);
 
 	/* IPI */
 	void	(*ipi_raw)(register_t, u_int);
 	void	(*ipi_vectored)(u_int, int);
 	int	(*ipi_wait)(int);
 	int	(*ipi_alloc)(inthand_t *ipifunc);
 	void	(*ipi_free)(int vector);
 
 	/* LVT */
 	int	(*set_lvt_mask)(u_int, u_int, u_char);
 	int	(*set_lvt_mode)(u_int, u_int, u_int32_t);
 	int	(*set_lvt_polarity)(u_int, u_int, enum intr_polarity);
 	int	(*set_lvt_triggermode)(u_int, u_int, enum intr_trigger);
 };
 
 extern struct apic_ops apic_ops;
 
 static inline void
 lapic_create(u_int apic_id, int boot_cpu)
 {
 
 	apic_ops.create(apic_id, boot_cpu);
 }
 
 static inline void
 lapic_init(vm_paddr_t addr)
 {
 
 	apic_ops.init(addr);
 }
 
 static inline void
 lapic_xapic_mode(void)
 {
 
 	apic_ops.xapic_mode();
 }
 
 static inline void
 lapic_setup(int boot)
 {
 
 	apic_ops.setup(boot);
 }
 
 static inline void
 lapic_dump(const char *str)
 {
 
 	apic_ops.dump(str);
 }
 
 static inline void
 lapic_disable(void)
 {
 
 	apic_ops.disable();
 }
 
 static inline void
 lapic_eoi(void)
 {
 
 	apic_ops.eoi();
 }
 
 static inline int
 lapic_id(void)
 {
 
 	return (apic_ops.id());
 }
 
 static inline int
 lapic_intr_pending(u_int vector)
 {
 
 	return (apic_ops.intr_pending(vector));
 }
 
 /* XXX: UNUSED */
 static inline void
 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
 {
 
 	apic_ops.set_logical_id(apic_id, cluster, cluster_id);
 }
 
 static inline u_int
 apic_cpuid(u_int apic_id)
 {
 
 	return (apic_ops.cpuid(apic_id));
 }
 
 static inline u_int
 apic_alloc_vector(u_int apic_id, u_int irq)
 {
 
 	return (apic_ops.alloc_vector(apic_id, irq));
 }
 
 static inline u_int
 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 
 	return (apic_ops.alloc_vectors(apic_id, irqs, count, align));
 }
 
 static inline void
 apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	apic_ops.enable_vector(apic_id, vector);
 }
 
 static inline void
 apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	apic_ops.disable_vector(apic_id, vector);
 }
 
 static inline void
 apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
 
 	apic_ops.free_vector(apic_id, vector, irq);
 }
 
 static inline int
 lapic_enable_pmc(void)
 {
 
 	return (apic_ops.enable_pmc());
 }
 
 static inline void
 lapic_disable_pmc(void)
 {
 
 	apic_ops.disable_pmc();
 }
 
 static inline void
 lapic_reenable_pmc(void)
 {
 
 	apic_ops.reenable_pmc();
 }
 
 static inline void
 lapic_enable_cmc(void)
 {
 
 	apic_ops.enable_cmc();
 }
 
 static inline void
 lapic_ipi_raw(register_t icrlo, u_int dest)
 {
 
 	apic_ops.ipi_raw(icrlo, dest);
 }
 
 static inline void
 lapic_ipi_vectored(u_int vector, int dest)
 {
 
 	apic_ops.ipi_vectored(vector, dest);
 }
 
 static inline int
 lapic_ipi_wait(int delay)
 {
 
 	return (apic_ops.ipi_wait(delay));
 }
 
 static inline int
 lapic_ipi_alloc(inthand_t *ipifunc)
 {
 
 	return (apic_ops.ipi_alloc(ipifunc));
 }
 
 static inline void
 lapic_ipi_free(int vector)
 {
 
 	return (apic_ops.ipi_free(vector));
 }
 
 static inline int
 lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked)
 {
 
 	return (apic_ops.set_lvt_mask(apic_id, lvt, masked));
 }
 
 static inline int
 lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode)
 {
 
 	return (apic_ops.set_lvt_mode(apic_id, lvt, mode));
 }
 
 static inline int
 lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
 {
 
 	return (apic_ops.set_lvt_polarity(apic_id, lvt, pol));
 }
 
 static inline int
 lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger)
 {
 
 	return (apic_ops.set_lvt_triggermode(apic_id, lvt, trigger));
 }
 
 void	lapic_handle_cmc(void);
 void	lapic_handle_error(void);
 void	lapic_handle_intr(int vector, struct trapframe *frame);
 void	lapic_handle_timer(struct trapframe *frame);
 void	xen_intr_handle_upcall(struct trapframe *frame);
+void	hv_vector_handler(struct trapframe *frame);
 
 extern int x2apic_mode;
 extern int lapic_eoi_suppression;
 
 #ifdef _SYS_SYSCTL_H_
 SYSCTL_DECL(_hw_apic);
 #endif
 
 #endif /* !LOCORE */
 #endif /* _X86_APICVAR_H_ */