diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
index 6186ae9b3371..534fe5013c56 100644
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -1,90 +1,91 @@
 cddl/dev/dtrace/riscv/dtrace_asm.S			optional dtrace compile-with "${DTRACE_S}"
 cddl/dev/dtrace/riscv/dtrace_isa.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/dtrace/riscv/dtrace_subr.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/dtrace/riscv/instr_size.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/riscv/fbt_isa.c				optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
 crypto/des/des_enc.c		optional	netsmb
 dev/ofw/ofw_cpu.c		optional	fdt
 dev/ofw/ofw_pcib.c		optional 	pci fdt
 dev/pci/pci_dw.c		optional	pci fdt
 dev/pci/pci_dw_if.m		optional	pci fdt
 dev/pci/pci_host_generic.c	optional	pci
 dev/pci/pci_host_generic_fdt.c	optional	pci fdt
 dev/uart/uart_cpu_fdt.c		optional	uart fdt
 dev/uart/uart_dev_lowrisc.c	optional	uart_lowrisc
 dev/vmm/vmm_dev.c		optional	vmm
 dev/vmm/vmm_stat.c		optional	vmm
 dev/xilinx/axi_quad_spi.c	optional	xilinx_spi
 dev/xilinx/axidma.c		optional	axidma xdma
 dev/xilinx/if_xae.c		optional	xae
 dev/xilinx/xlnx_pcib.c		optional	pci fdt xlnx_pcib
 kern/msi_if.m			standard
 kern/pic_if.m			standard
 kern/subr_devmap.c		standard
 kern/subr_dummy_vdso_tc.c	standard
 kern/subr_intr.c		standard
 kern/subr_physmem.c		standard
 libkern/bcopy.c			standard
 libkern/memcmp.c		standard
 libkern/memset.c		standard
 libkern/strcmp.c		standard
 libkern/strlen.c		standard
 libkern/strncmp.c		standard
 riscv/riscv/aplic.c		standard
 riscv/riscv/autoconf.c		standard
 riscv/riscv/bus_machdep.c	standard
 riscv/riscv/bus_space_asm.S	standard
 riscv/riscv/busdma_bounce.c	standard
 riscv/riscv/busdma_machdep.c	standard
 riscv/riscv/cache.c		standard
 riscv/riscv/clock.c		standard
 riscv/riscv/copyinout.S		standard
 riscv/riscv/cpufunc_asm.S	standard
 riscv/riscv/db_disasm.c		optional	ddb
 riscv/riscv/db_interface.c	optional	ddb
 riscv/riscv/db_trace.c		optional	ddb
 riscv/riscv/dump_machdep.c	standard
 riscv/riscv/elf_machdep.c	standard
 riscv/riscv/exception.S		standard
 riscv/riscv/exec_machdep.c	standard
 riscv/riscv/fpe.c		standard
 riscv/riscv/gdb_machdep.c	optional	gdb
 riscv/riscv/intc.c		standard
 riscv/riscv/identcpu.c		standard
 riscv/riscv/locore.S		standard	no-obj
 riscv/riscv/machdep.c		standard
 riscv/riscv/minidump_machdep.c	standard
 riscv/riscv/mp_machdep.c	optional	smp
 riscv/riscv/mem.c		standard
 riscv/riscv/nexus.c		standard
 riscv/riscv/ofw_machdep.c	optional	fdt
 riscv/riscv/plic.c		standard
 riscv/riscv/pmap.c		standard
 riscv/riscv/ptrace_machdep.c	standard
 riscv/riscv/riscv_console.c	optional	rcons
 riscv/riscv/riscv_syscon.c	optional	syscon riscv_syscon fdt
 riscv/riscv/sigtramp.S		standard
 riscv/riscv/sbi.c		standard
 riscv/riscv/sbi_ipi.c		optional	smp
 riscv/riscv/sdt_machdep.c	optional	kdtrace_hooks
 riscv/riscv/stack_machdep.c	optional	ddb | stack
 riscv/riscv/support.S		standard
 riscv/riscv/swtch.S		standard
 riscv/riscv/sys_machdep.c	standard
 riscv/riscv/trap.c		standard
 riscv/riscv/timer.c		standard
 riscv/riscv/uio_machdep.c	standard
 riscv/riscv/unwind.c		optional	ddb | kdtrace_hooks | stack
 riscv/riscv/vm_machdep.c	standard
 riscv/vmm/vmm.c					optional	vmm
 riscv/vmm/vmm_aplic.c				optional	vmm
 riscv/vmm/vmm_dev_machdep.c			optional	vmm
 riscv/vmm/vmm_instruction_emul.c		optional	vmm
 riscv/vmm/vmm_riscv.c				optional	vmm
 riscv/vmm/vmm_sbi.c				optional	vmm
 riscv/vmm/vmm_switch.S				optional	vmm
+riscv/vmm/vmm_vtimer.c				optional	vmm
 
 riscv/thead/thead.c		standard
 
 # Zstd
 contrib/zstd/lib/freebsd/zstd_kfreebsd.c		optional zstdio compile-with ${ZSTD_C}
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
index 0ec1147d0d4b..7c34dd92939b 100644
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -1,161 +1,162 @@
 .include <kmod.opts.mk>
 
 KMOD=	vmm
 
 .if ${MACHINE_CPUARCH} == "amd64"
 .endif
 
 SRCS+=	acpi_if.h bus_if.h device_if.h pci_if.h pcib_if.h vnode_if.h
 
 CFLAGS+= -DVMM_KEEP_STATS
 CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm
 
 # generic vmm support
 .PATH: ${SRCTOP}/sys/dev/vmm ${SRCTOP}/sys/${MACHINE}/vmm
 
 SRCS+=	vmm.c			\
 	vmm_dev.c		\
 	vmm_dev_machdep.c	\
 	vmm_instruction_emul.c	\
 	vmm_stat.c
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io
 DPSRCS+=	assym.inc
 
 # TODO: Add the new EL2 code
 SRCS+=	vmm_arm64.c \
 	vmm_reset.c \
 	vmm_call.S \
 	vmm_handlers.c \
 	vmm_mmu.c \
 	vmm_vhe_exception.S \
 	vmm_vhe.c \
 	vmm_hyp_el2.S
 
 .PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
 SRCS+=	vgic.c \
 	vgic_if.h \
 	vgic_if.c \
 	vgic_v3.c \
 	vtimer.c
 
 CLEANFILES+=	vmm_nvhe_exception.o vmm_nvhe.o
 
 CLEANFILES+=	vmm_hyp_blob.elf.full
 CLEANFILES+=	vmm_hyp_blob.elf vmm_hyp_blob.bin
 
 vmm_nvhe_exception.o: vmm_nvhe_exception.S
 	${CC} -c -x assembler-with-cpp -DLOCORE \
 	    ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} -o ${.TARGET} -fpie
 
 vmm_nvhe.o: vmm_nvhe.c
 	${CC} -c ${NOSAN_CFLAGS:N-mbranch-protection*} ${.IMPSRC} \
 	    -o ${.TARGET} -fpie
 
 vmm_hyp_blob.elf.full:	vmm_nvhe_exception.o vmm_nvhe.o
 	${LD} -m ${LD_EMULATION} -Bdynamic -L ${SYSDIR}/conf -T ${SYSDIR}/conf/ldscript.arm64 \
 	    ${_LDFLAGS:N-zbti-report*} --no-warn-mismatch --warn-common --export-dynamic \
 	    --dynamic-linker /red/herring -X -o ${.TARGET} ${.ALLSRC} \
 	    --defsym=_start='0x0' --defsym=text_start='0x0'
 
 vmm_hyp_blob.elf:	vmm_hyp_blob.elf.full
 	${OBJCOPY} --strip-debug ${.ALLSRC} ${.TARGET}
 
 vmm_hyp_blob.bin:	vmm_hyp_blob.elf
 	${OBJCOPY} --output-target=binary ${.ALLSRC} ${.TARGET}
 
 vmm_hyp_el2.o:	vmm_hyp_blob.bin
 
 .elif ${MACHINE_CPUARCH} == "amd64"
 CFLAGS+= -I${SRCTOP}/sys/${MACHINE}/vmm/io
 DPSRCS+=	vmx_assym.h svm_assym.h
 DPSRCS+=	vmx_genassym.c svm_genassym.c offset.inc
 
 CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel
 CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd
 
 SRCS+=	opt_acpi.h		\
 	opt_bhyve_snapshot.h	\
 	opt_ddb.h
 
 SRCS+=	vmm_host.c	\
 	vmm_ioport.c	\
 	vmm_lapic.c	\
 	vmm_mem.c	\
 	vmm_util.c	\
 	x86.c
 
 .PATH: ${SRCTOP}/sys/${MACHINE}/vmm/io
 SRCS+=	iommu.c		\
 	ppt.c           \
 	vatpic.c	\
 	vatpit.c	\
 	vhpet.c		\
 	vioapic.c	\
 	vlapic.c	\
 	vpmtmr.c	\
 	vrtc.c
 
 # intel-specific files
 .PATH: ${SRCTOP}/sys/amd64/vmm/intel
 SRCS+=	ept.c		\
 	vmcs.c		\
 	vmx_msr.c	\
 	vmx_support.S	\
 	vmx.c		\
 	vtd.c
 
 # amd-specific files
 .PATH: ${SRCTOP}/sys/amd64/vmm/amd
 SRCS+=	vmcb.c		\
 	amdviiommu.c	\
 	ivhd_if.c	\
 	ivhd_if.h	\
 	svm.c		\
 	svm_support.S	\
 	npt.c		\
 	ivrs_drv.c	\
 	amdvi_hw.c	\
 	svm_msr.c
 
 SRCS.BHYVE_SNAPSHOT=	vmm_snapshot.c
 
 CLEANFILES+=	vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
 
 OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
 OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h
 
 vmx_assym.h:    vmx_genassym.o
 	sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
 
 svm_assym.h:    svm_genassym.o
 	sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
 
 vmx_support.o:
 	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
 	    ${.IMPSRC} -o ${.TARGET}
 
 svm_support.o:
 	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
 	    ${.IMPSRC} -o ${.TARGET}
 
 hyp_genassym.o: offset.inc
 	${CC} -c ${NOSAN_CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC}
 
 vmx_genassym.o: offset.inc
 	${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC}
 
 svm_genassym.o: offset.inc
 	${CC} -c ${NOSAN_CFLAGS:N-flto*:N-fno-common} -fcommon ${.IMPSRC}
 
 .elif ${MACHINE_CPUARCH} == "riscv"
 
 SRCS+=	vmm_aplic.c	\
 	vmm_riscv.c	\
 	vmm_sbi.c	\
-	vmm_switch.S
+	vmm_switch.S	\
+	vmm_vtimer.c
 
 .endif
 
 .include <bsd.kmod.mk>
diff --git a/sys/riscv/vmm/riscv.h b/sys/riscv/vmm/riscv.h
index ed4b65003f94..f3665d33a386 100644
--- a/sys/riscv/vmm/riscv.h
+++ b/sys/riscv/vmm/riscv.h
@@ -1,132 +1,137 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
  *
  * This software was developed by the University of Cambridge Computer
  * Laboratory (Department of Computer Science and Technology) under Innovate
  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
  * Prototype".
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _VMM_RISCV_H_
 #define _VMM_RISCV_H_
 
 #include <machine/reg.h>
 #include <machine/pcpu.h>
 #include <machine/vmm.h>
 
+#include <riscv/vmm/vmm_vtimer.h>
+
 struct hypregs {
 	uint64_t hyp_ra;
 	uint64_t hyp_sp;
 	uint64_t hyp_gp;
 	uint64_t hyp_tp;
 	uint64_t hyp_t[7];
 	uint64_t hyp_s[12];
 	uint64_t hyp_a[8];
 	uint64_t hyp_sepc;
 	uint64_t hyp_sstatus;
 	uint64_t hyp_hstatus;
 };
 
 struct hypcsr {
 	uint64_t hvip;
 	uint64_t vsstatus;
 	uint64_t vsie;
 	uint64_t vstvec;
 	uint64_t vsscratch;
 	uint64_t vsepc;
 	uint64_t vscause;
 	uint64_t vstval;
 	uint64_t vsatp;
 	uint64_t scounteren;
 	uint64_t senvcfg;
 };
 
 struct hypctx {
 	struct hypregs host_regs;
 	struct hypregs guest_regs;
 	struct hypcsr guest_csrs;
 	uint64_t host_sscratch;
 	uint64_t host_stvec;
 	uint64_t host_scounteren;
 	uint64_t guest_scounteren;
 	struct hyp *hyp;
 	struct vcpu *vcpu;
 	bool has_exception;
 	int cpu_id;
 	int ipi_pending;
+	int interrupts_pending;
+	struct vtimer vtimer;
 };
 
 struct hyp {
 	struct vm	*vm;
 	uint64_t	vmid_generation;
 	bool		aplic_attached;
 	struct aplic	*aplic;
 	struct hypctx	*ctx[];
 };
 
 struct hyptrap {
 	uint64_t sepc;
 	uint64_t scause;
 	uint64_t stval;
 	uint64_t htval;
 	uint64_t htinst;
 };
 
 #define	DEFINE_VMMOPS_IFUNC(ret_type, opname, args)	\
     ret_type vmmops_##opname args;
 
 DEFINE_VMMOPS_IFUNC(int, modinit, (void))
 DEFINE_VMMOPS_IFUNC(int, modcleanup, (void))
 DEFINE_VMMOPS_IFUNC(void *, init, (struct vm *vm, struct pmap *pmap))
 DEFINE_VMMOPS_IFUNC(int, gla2gpa, (void *vcpui, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *is_fault))
 DEFINE_VMMOPS_IFUNC(int, run, (void *vcpui, register_t pc, struct pmap *pmap,
     struct vm_eventinfo *info))
 DEFINE_VMMOPS_IFUNC(void, cleanup, (void *vmi))
 DEFINE_VMMOPS_IFUNC(void *, vcpu_init, (void *vmi, struct vcpu *vcpu,
     int vcpu_id))
 DEFINE_VMMOPS_IFUNC(void, vcpu_cleanup, (void *vcpui))
 DEFINE_VMMOPS_IFUNC(int, exception, (void *vcpui, uint64_t scause))
 DEFINE_VMMOPS_IFUNC(int, getreg, (void *vcpui, int num, uint64_t *retval))
 DEFINE_VMMOPS_IFUNC(int, setreg, (void *vcpui, int num, uint64_t val))
 DEFINE_VMMOPS_IFUNC(int, getcap, (void *vcpui, int num, int *retval))
 DEFINE_VMMOPS_IFUNC(int, setcap, (void *vcpui, int num, int val))
 DEFINE_VMMOPS_IFUNC(struct vmspace *, vmspace_alloc, (vm_offset_t min,
     vm_offset_t max))
 DEFINE_VMMOPS_IFUNC(void, vmspace_free, (struct vmspace *vmspace))
 
 #define	dprintf(fmt, ...)
 
 struct hypctx *riscv_get_active_vcpu(void);
 void vmm_switch(struct hypctx *);
 void vmm_unpriv_trap(struct hyptrap *, uint64_t tmp);
 int vmm_sbi_ecall(struct vcpu *, bool *);
 
 void riscv_send_ipi(struct hypctx *hypctx, int hart_id);
 int riscv_check_ipi(struct hypctx *hypctx, bool clear);
+bool riscv_check_interrupts_pending(struct hypctx *hypctx);
 
 #endif /* !_VMM_RISCV_H_ */
diff --git a/sys/riscv/vmm/vmm.c b/sys/riscv/vmm/vmm.c
index 33a0cb5fe420..0596e0de2e43 100644
--- a/sys/riscv/vmm/vmm.c
+++ b/sys/riscv/vmm/vmm.c
@@ -1,1612 +1,1615 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2015 Mihai Carabas <mihai.carabas@gmail.com>
  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
  *
  * This software was developed by the University of Cambridge Computer
  * Laboratory (Department of Computer Science and Technology) under Innovate
  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
  * Prototype".
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 
 #include <machine/riscvreg.h>
 #include <machine/cpu.h>
 #include <machine/fpe.h>
 #include <machine/machdep.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 #include <machine/vm.h>
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
 #include <machine/vmm_instruction_emul.h>
 
 #include <dev/pci/pcireg.h>
 
 #include <dev/vmm/vmm_dev.h>
 #include <dev/vmm/vmm_ktr.h>
 
 #include "vmm_stat.h"
 #include "riscv.h"
 
 #include "vmm_aplic.h"
 
 struct vcpu {
 	int		flags;
 	enum vcpu_state	state;
 	struct mtx	mtx;
 	int		hostcpu;	/* host cpuid this vcpu last ran on */
 	int		vcpuid;
 	void		*stats;
 	struct vm_exit	exitinfo;
 	uint64_t	nextpc;		/* (x) next instruction to execute */
 	struct vm	*vm;		/* (o) */
 	void		*cookie;	/* (i) cpu-specific data */
 	struct fpreg	*guestfpu;	/* (a,i) guest fpu state */
 };
 
 #define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock_destroy(v)	mtx_destroy(&((v)->mtx))
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
 
 struct mem_seg {
 	uint64_t	gpa;
 	size_t		len;
 	bool		wired;
 	bool		sysmem;
 	vm_object_t	object;
 };
 #define	VM_MAX_MEMSEGS	3
 
 struct mem_map {
 	vm_paddr_t	gpa;
 	size_t		len;
 	vm_ooffset_t	segoff;
 	int		segid;
 	int		prot;
 	int		flags;
 };
 #define	VM_MAX_MEMMAPS	4
 
 struct vmm_mmio_region {
 	uint64_t start;
 	uint64_t end;
 	mem_region_read_t read;
 	mem_region_write_t write;
 };
 #define	VM_MAX_MMIO_REGIONS	4
 
 /*
  * Initialization:
  * (o) initialized the first time the VM is created
  * (i) initialized when VM is created and when it is reinitialized
  * (x) initialized before use
  */
 struct vm {
 	void		*cookie;		/* (i) cpu-specific data */
 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
 	volatile cpuset_t debug_cpus;		/* (i) vcpus stopped for debug*/
 	int		suspend;		/* (i) stop VM execution */
 	bool		dying;			/* (o) is dying */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
 	struct mem_map	mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
 	struct mem_seg	mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
 	struct vmspace	*vmspace;		/* (o) guest's address space */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	**vcpu;			/* (i) guest vcpus */
 	struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
 						/* (o) guest MMIO regions */
 	/* The following describe the vm cpu topology */
 	uint16_t	sockets;		/* (o) num of sockets */
 	uint16_t	cores;			/* (o) num of cores/socket */
 	uint16_t	threads;		/* (o) num of threads/core */
 	uint16_t	maxcpus;		/* (o) max pluggable cpus */
 	struct sx	mem_segs_lock;		/* (o) */
 	struct sx	vcpus_init_lock;	/* (o) */
 };
 
 static bool vmm_initialized = false;
 
 static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
 
 /* statistics */
 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 
 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
 
 static int vmm_ipinum;
 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
     "IPI vector used for vcpu notifications");
 
 u_int vm_maxcpu;
 SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &vm_maxcpu, 0, "Maximum number of vCPUs");
 
 static void vm_free_memmap(struct vm *vm, int ident);
 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu);
 
 /* global statistics */
 VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
 VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
 VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
 
 /*
  * Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
  * is a safe value for now.
  */
 #define	VM_MAXCPU	MIN(0xffff - 1, CPU_SETSIZE)
 
 static void
 vcpu_cleanup(struct vcpu *vcpu, bool destroy)
 {
 	vmmops_vcpu_cleanup(vcpu->cookie);
 	vcpu->cookie = NULL;
 	if (destroy) {
 		vmm_stat_free(vcpu->stats);
 		fpu_save_area_free(vcpu->guestfpu);
 		vcpu_lock_destroy(vcpu);
 	}
 }
 
 static struct vcpu *
 vcpu_alloc(struct vm *vm, int vcpu_id)
 {
 	struct vcpu *vcpu;
 
 	KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
 	    ("vcpu_alloc: invalid vcpu %d", vcpu_id));
 
 	vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
 	vcpu_lock_init(vcpu);
 	vcpu->state = VCPU_IDLE;
 	vcpu->hostcpu = NOCPU;
 	vcpu->vcpuid = vcpu_id;
 	vcpu->vm = vm;
 	vcpu->guestfpu = fpu_save_area_alloc();
 	vcpu->stats = vmm_stat_alloc();
 	return (vcpu);
 }
 
 static void
 vcpu_init(struct vcpu *vcpu)
 {
 	vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
 	MPASS(vcpu->cookie != NULL);
 	fpu_save_area_reset(vcpu->guestfpu);
 	vmm_stat_init(vcpu->stats);
 }
 
 struct vm_exit *
 vm_exitinfo(struct vcpu *vcpu)
 {
 	return (&vcpu->exitinfo);
 }
 
 static int
 vmm_init(void)
 {
 
 	vm_maxcpu = mp_ncpus;
 
 	TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
 
 	if (vm_maxcpu > VM_MAXCPU) {
 		printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
 		vm_maxcpu = VM_MAXCPU;
 	}
 
 	if (vm_maxcpu == 0)
 		vm_maxcpu = 1;
 
 	return (vmmops_modinit());
 }
 
 static int
 vmm_handler(module_t mod, int what, void *arg)
 {
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		/* TODO: check if has_hyp here? */
 		error = vmmdev_init();
 		if (error != 0)
 			break;
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = true;
 		break;
 	case MOD_UNLOAD:
 		/* TODO: check if has_hyp here? */
 		error = vmmdev_cleanup();
 		if (error == 0 && vmm_initialized) {
 			error = vmmops_modcleanup();
 			if (error)
 				vmm_initialized = false;
 		}
 		break;
 	default:
 		error = 0;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t vmm_kmod = {
 	"vmm",
 	vmm_handler,
 	NULL
 };
 
 /*
  * vmm initialization has the following dependencies:
  *
  * - vmm device initialization requires an initialized devfs.
  */
 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY);
 MODULE_VERSION(vmm, 1);
 
 static void
 vm_init(struct vm *vm, bool create)
 {
 	int i;
 
 	vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
 	MPASS(vm->cookie != NULL);
 
 	CPU_ZERO(&vm->active_cpus);
 	CPU_ZERO(&vm->debug_cpus);
 
 	vm->suspend = 0;
 	CPU_ZERO(&vm->suspended_cpus);
 
 	memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
 
 	if (!create) {
 		for (i = 0; i < vm->maxcpus; i++) {
 			if (vm->vcpu[i] != NULL)
 				vcpu_init(vm->vcpu[i]);
 		}
 	}
 }
 
 void
 vm_disable_vcpu_creation(struct vm *vm)
 {
 	sx_xlock(&vm->vcpus_init_lock);
 	vm->dying = true;
 	sx_xunlock(&vm->vcpus_init_lock);
 }
 
 struct vcpu *
 vm_alloc_vcpu(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
 		return (NULL);
 
 	/* Some interrupt controllers may have a CPU limit */
 	if (vcpuid >= aplic_max_cpu_count(vm->cookie))
 		return (NULL);
 
 	vcpu = (struct vcpu *)
 	    atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
 	if (__predict_true(vcpu != NULL))
 		return (vcpu);
 
 	sx_xlock(&vm->vcpus_init_lock);
 	vcpu = vm->vcpu[vcpuid];
 	if (vcpu == NULL && !vm->dying) {
 		vcpu = vcpu_alloc(vm, vcpuid);
 		vcpu_init(vcpu);
 
 		/*
 		 * Ensure vCPU is fully created before updating pointer
 		 * to permit unlocked reads above.
 		 */
 		atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
 		    (uintptr_t)vcpu);
 	}
 	sx_xunlock(&vm->vcpus_init_lock);
 	return (vcpu);
 }
 
 void
 vm_slock_vcpus(struct vm *vm)
 {
 	sx_slock(&vm->vcpus_init_lock);
 }
 
 void
 vm_unlock_vcpus(struct vm *vm)
 {
 	sx_unlock(&vm->vcpus_init_lock);
 }
 
 int
 vm_create(const char *name, struct vm **retvm)
 {
 	struct vm *vm;
 	struct vmspace *vmspace;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
 	 * to create the virtual machine.
 	 */
 	if (!vmm_initialized)
 		return (ENXIO);
 
 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 		return (EINVAL);
 
 	vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
 	if (vmspace == NULL)
 		return (ENOMEM);
 
 	vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->vmspace = vmspace;
 	sx_init(&vm->mem_segs_lock, "vm mem_segs");
 	sx_init(&vm->vcpus_init_lock, "vm vcpus");
 
 	vm->sockets = 1;
 	vm->cores = 1;			/* XXX backwards compatibility */
 	vm->threads = 1;		/* XXX backwards compatibility */
 	vm->maxcpus = vm_maxcpu;
 
 	vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
 	    M_WAITOK | M_ZERO);
 
 	vm_init(vm, true);
 
 	*retvm = vm;
 	return (0);
 }
 
 void
 vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
     uint16_t *threads, uint16_t *maxcpus)
 {
 	*sockets = vm->sockets;
 	*cores = vm->cores;
 	*threads = vm->threads;
 	*maxcpus = vm->maxcpus;
 }
 
 uint16_t
 vm_get_maxcpus(struct vm *vm)
 {
 	return (vm->maxcpus);
 }
 
 int
 vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
     uint16_t threads, uint16_t maxcpus)
 {
 	/* Ignore maxcpus. */
 	if ((sockets * cores * threads) > vm->maxcpus)
 		return (EINVAL);
 	vm->sockets = sockets;
 	vm->cores = cores;
 	vm->threads = threads;
 	return(0);
 }
 
 static void
 vm_cleanup(struct vm *vm, bool destroy)
 {
 	struct mem_map *mm;
 	int i;
 
 	aplic_detach_from_vm(vm->cookie);
 
 	for (i = 0; i < vm->maxcpus; i++) {
 		if (vm->vcpu[i] != NULL)
 			vcpu_cleanup(vm->vcpu[i], destroy);
 	}
 
 	vmmops_cleanup(vm->cookie);
 
 	/*
 	 * System memory is removed from the guest address space only when
 	 * the VM is destroyed. This is because the mapping remains the same
 	 * across VM reset.
 	 *
 	 * Device memory can be relocated by the guest (e.g. using PCI BARs)
 	 * so those mappings are removed on a VM reset.
 	 */
 	if (!destroy) {
 		for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 			mm = &vm->mem_maps[i];
 			if (destroy || !sysmem_mapping(vm, mm))
 				vm_free_memmap(vm, i);
 		}
 	}
 
 	if (destroy) {
 		for (i = 0; i < VM_MAX_MEMSEGS; i++)
 			vm_free_memseg(vm, i);
 
 		vmmops_vmspace_free(vm->vmspace);
 		vm->vmspace = NULL;
 
 		for (i = 0; i < vm->maxcpus; i++)
 			free(vm->vcpu[i], M_VMM);
 		free(vm->vcpu, M_VMM);
 		sx_destroy(&vm->vcpus_init_lock);
 		sx_destroy(&vm->mem_segs_lock);
 	}
 }
 
 void
 vm_destroy(struct vm *vm)
 {
 
 	vm_cleanup(vm, true);
 
 	free(vm, M_VMM);
 }
 
 int
 vm_reinit(struct vm *vm)
 {
 	int error;
 
 	/*
 	 * A virtual machine can be reset only if all vcpus are suspended.
 	 */
 	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
 		vm_cleanup(vm, false);
 		vm_init(vm, false);
 		error = 0;
 	} else {
 		error = EBUSY;
 	}
 
 	return (error);
 }
 
 const char *
 vm_name(struct vm *vm)
 {
 	return (vm->name);
 }
 
 void
 vm_slock_memsegs(struct vm *vm)
 {
 	sx_slock(&vm->mem_segs_lock);
 }
 
 void
 vm_xlock_memsegs(struct vm *vm)
 {
 	sx_xlock(&vm->mem_segs_lock);
 }
 
 void
 vm_unlock_memsegs(struct vm *vm)
 {
 	sx_unlock(&vm->mem_segs_lock);
 }
 
 /*
  * Return 'true' if 'gpa' is allocated in the guest address space.
  *
  * This function is called in the context of a running vcpu which acts as
  * an implicit lock on 'vm->mem_maps[]'.
  */
 bool
 vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
 {
 	struct vm *vm = vcpu->vm;
 	struct mem_map *mm;
 	int i;
 
 #ifdef INVARIANTS
 	int hostcpu, state;
 	state = vcpu_get_state(vcpu, &hostcpu);
 	KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
 	    ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
 #endif
 
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
 			return (true);		/* 'gpa' is sysmem or devmem */
 	}
 
 	return (false);
 }
 
 int
 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 {
 	struct mem_seg *seg;
 	vm_object_t obj;
 
 	sx_assert(&vm->mem_segs_lock, SX_XLOCKED);
 
 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
 		return (EINVAL);
 
 	if (len == 0 || (len & PAGE_MASK))
 		return (EINVAL);
 
 	seg = &vm->mem_segs[ident];
 	if (seg->object != NULL) {
 		if (seg->len == len && seg->sysmem == sysmem)
 			return (EEXIST);
 		else
 			return (EINVAL);
 	}
 
 	obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
 	if (obj == NULL)
 		return (ENOMEM);
 
 	seg->len = len;
 	seg->object = obj;
 	seg->sysmem = sysmem;
 	return (0);
 }
 
 int
 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
     vm_object_t *objptr)
 {
 	struct mem_seg *seg;
 
 	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
 
 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
 		return (EINVAL);
 
 	seg = &vm->mem_segs[ident];
 	if (len)
 		*len = seg->len;
 	if (sysmem)
 		*sysmem = seg->sysmem;
 	if (objptr)
 		*objptr = seg->object;
 	return (0);
 }
 
 void
 vm_free_memseg(struct vm *vm, int ident)
 {
 	struct mem_seg *seg;
 
 	KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
 	    ("%s: invalid memseg ident %d", __func__, ident));
 
 	seg = &vm->mem_segs[ident];
 	if (seg->object != NULL) {
 		vm_object_deallocate(seg->object);
 		bzero(seg, sizeof(struct mem_seg));
 	}
 }
 
 int
 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
     size_t len, int prot, int flags)
 {
 	struct mem_seg *seg;
 	struct mem_map *m, *map;
 	vm_ooffset_t last;
 	int i, error;
 
 	dprintf("%s: gpa %lx first %lx len %lx\n", __func__, gpa, first, len);
 
 	if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
 		return (EINVAL);
 
 	if (flags & ~VM_MEMMAP_F_WIRED)
 		return (EINVAL);
 
 	if (segid < 0 || segid >= VM_MAX_MEMSEGS)
 		return (EINVAL);
 
 	seg = &vm->mem_segs[segid];
 	if (seg->object == NULL)
 		return (EINVAL);
 
 	last = first + len;
 	if (first < 0 || first >= last || last > seg->len)
 		return (EINVAL);
 
 	if ((gpa | first | last) & PAGE_MASK)
 		return (EINVAL);
 
 	map = NULL;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		m = &vm->mem_maps[i];
 		if (m->len == 0) {
 			map = m;
 			break;
 		}
 	}
 
 	if (map == NULL)
 		return (ENOSPC);
 
 	error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
 	    len, 0, VMFS_NO_SPACE, prot, prot, 0);
 	if (error != KERN_SUCCESS)
 		return (EFAULT);
 
 	vm_object_reference(seg->object);
 
 	if (flags & VM_MEMMAP_F_WIRED) {
 		error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		if (error != KERN_SUCCESS) {
 			vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
 			return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
 			    EFAULT);
 		}
 	}
 
 	map->gpa = gpa;
 	map->len = len;
 	map->segoff = first;
 	map->segid = segid;
 	map->prot = prot;
 	map->flags = flags;
 	return (0);
 }
 
 int
 vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 	struct mem_map *m;
 	int i;
 
 	dprintf("%s: gpa %lx len %lx\n", __func__, gpa, len);
 
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		m = &vm->mem_maps[i];
 		if (m->gpa == gpa && m->len == len) {
 			vm_free_memmap(vm, i);
 			return (0);
 		}
 	}
 
 	return (EINVAL);
 }
 
 int
 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 {
 	struct mem_map *mm, *mmnext;
 	int i;
 
 	mmnext = NULL;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (mm->len == 0 || mm->gpa < *gpa)
 			continue;
 		if (mmnext == NULL || mm->gpa < mmnext->gpa)
 			mmnext = mm;
 	}
 
 	if (mmnext != NULL) {
 		*gpa = mmnext->gpa;
 		if (segid)
 			*segid = mmnext->segid;
 		if (segoff)
 			*segoff = mmnext->segoff;
 		if (len)
 			*len = mmnext->len;
 		if (prot)
 			*prot = mmnext->prot;
 		if (flags)
 			*flags = mmnext->flags;
 		return (0);
 	} else {
 		return (ENOENT);
 	}
 }
 
 static void
 vm_free_memmap(struct vm *vm, int ident)
 {
 	struct mem_map *mm;
 	int error __diagused;
 
 	mm = &vm->mem_maps[ident];
 	if (mm->len) {
 		error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
 		    mm->gpa + mm->len);
 		KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
 		    __func__, error));
 		bzero(mm, sizeof(struct mem_map));
 	}
 }
 
 static __inline bool
 sysmem_mapping(struct vm *vm, struct mem_map *mm)
 {
 
 	if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
 		return (true);
 	else
 		return (false);
 }
 
 vm_paddr_t
 vmm_sysmem_maxaddr(struct vm *vm)
 {
 	struct mem_map *mm;
 	vm_paddr_t maxaddr;
 	int i;
 
 	maxaddr = 0;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (sysmem_mapping(vm, mm)) {
 			if (maxaddr < mm->gpa + mm->len)
 				maxaddr = mm->gpa + mm->len;
 		}
 	}
 	return (maxaddr);
 }
 
 int
 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
 {
 	int error;
 
 	error = vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault);
 
 	return (error);
 }
 
 void
 vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
     mem_region_read_t mmio_read, mem_region_write_t mmio_write)
 {
 	int i;
 
 	for (i = 0; i < nitems(vm->mmio_region); i++) {
 		if (vm->mmio_region[i].start == 0 &&
 		    vm->mmio_region[i].end == 0) {
 			vm->mmio_region[i].start = start;
 			vm->mmio_region[i].end = start + size;
 			vm->mmio_region[i].read = mmio_read;
 			vm->mmio_region[i].write = mmio_write;
 			return;
 		}
 	}
 
 	panic("%s: No free MMIO region", __func__);
 }
 
 void
 vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
 {
 	int i;
 
 	for (i = 0; i < nitems(vm->mmio_region); i++) {
 		if (vm->mmio_region[i].start == start &&
 		    vm->mmio_region[i].end == start + size) {
 			memset(&vm->mmio_region[i], 0,
 			    sizeof(vm->mmio_region[i]));
 			return;
 		}
 	}
 
 	panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
 	    start + size);
 }
 
 static int
 vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
 {
 	struct vm *vm;
 	struct vm_exit *vme;
 	struct vie *vie;
 	struct hyp *hyp;
 	uint64_t fault_ipa;
 	struct vm_guest_paging *paging;
 	struct vmm_mmio_region *vmr;
 	int error, i;
 
 	vm = vcpu->vm;
 	hyp = vm->cookie;
 	if (!hyp->aplic_attached)
 		goto out_user;
 
 	vme = &vcpu->exitinfo;
 	vie = &vme->u.inst_emul.vie;
 	paging = &vme->u.inst_emul.paging;
 
 	fault_ipa = vme->u.inst_emul.gpa;
 
 	vmr = NULL;
 	for (i = 0; i < nitems(vm->mmio_region); i++) {
 		if (vm->mmio_region[i].start <= fault_ipa &&
 		    vm->mmio_region[i].end > fault_ipa) {
 			vmr = &vm->mmio_region[i];
 			break;
 		}
 	}
 	if (vmr == NULL)
 		goto out_user;
 
 	error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
 	    vmr->read, vmr->write, retu);
 	return (error);
 
 out_user:
 	*retu = true;
 	return (0);
 }
 
 int
 vm_suspend(struct vm *vm, enum vm_suspend_how how)
 {
 	int i;
 
 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
 		return (EINVAL);
 
 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
 		    vm->suspend, how);
 		return (EALREADY);
 	}
 
 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
 
 	/*
 	 * Notify all active vcpus that they are now suspended.
 	 */
 	for (i = 0; i < vm->maxcpus; i++) {
 		if (CPU_ISSET(i, &vm->active_cpus))
 			vcpu_notify_event(vm_vcpu(vm, i));
 	}
 
 	return (0);
 }
 
 void
 vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
 {
 	struct vm *vm = vcpu->vm;
 	struct vm_exit *vmexit;
 
 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
 
 	vmexit = vm_exitinfo(vcpu);
 	vmexit->pc = pc;
 	vmexit->inst_length = 4;
 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
 	vmexit->u.suspended.how = vm->suspend;
 }
 
 void
 vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
 {
 	struct vm_exit *vmexit;
 
 	vmexit = vm_exitinfo(vcpu);
 	vmexit->pc = pc;
 	vmexit->inst_length = 4;
 	vmexit->exitcode = VM_EXITCODE_DEBUG;
 }
 
 int
 vm_activate_cpu(struct vcpu *vcpu)
 {
 	struct vm *vm = vcpu->vm;
 
 	if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
 		return (EBUSY);
 
 	CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
 	return (0);
 
 }
 
 int
 vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
 {
 	if (vcpu == NULL) {
 		vm->debug_cpus = vm->active_cpus;
 		for (int i = 0; i < vm->maxcpus; i++) {
 			if (CPU_ISSET(i, &vm->active_cpus))
 				vcpu_notify_event(vm_vcpu(vm, i));
 		}
 	} else {
 		if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
 			return (EINVAL);
 
 		CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
 		vcpu_notify_event(vcpu);
 	}
 	return (0);
 }
 
 int
 vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
 {
 
 	if (vcpu == NULL) {
 		CPU_ZERO(&vm->debug_cpus);
 	} else {
 		if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
 			return (EINVAL);
 
 		CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
 	}
 	return (0);
 }
 
 int
 vcpu_debugged(struct vcpu *vcpu)
 {
 
 	return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
 }
 
 cpuset_t
 vm_active_cpus(struct vm *vm)
 {
 
 	return (vm->active_cpus);
 }
 
 cpuset_t
 vm_debug_cpus(struct vm *vm)
 {
 
 	return (vm->debug_cpus);
 }
 
 cpuset_t
 vm_suspended_cpus(struct vm *vm)
 {
 
 	return (vm->suspended_cpus);
 }
 
 
 void *
 vcpu_stats(struct vcpu *vcpu)
 {
 
 	return (vcpu->stats);
 }
 
 /*
  * This function is called to ensure that a vcpu "sees" a pending event
  * as soon as possible:
  * - If the vcpu thread is sleeping then it is woken up.
  * - If the vcpu is running on a different host_cpu then an IPI will be directed
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
 static void
 vcpu_notify_event_locked(struct vcpu *vcpu)
 {
 	int hostcpu;
 
 	hostcpu = vcpu->hostcpu;
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
 		if (hostcpu != curcpu) {
 			ipi_cpu(hostcpu, vmm_ipinum);
 		} else {
 			/*
 			 * If the 'vcpu' is running on 'curcpu' then it must
 			 * be sending a notification to itself (e.g. SELF_IPI).
 			 * The pending event will be picked up when the vcpu
 			 * transitions back to guest context.
 			 */
 		}
 	} else {
 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
 		    "with hostcpu %d", vcpu->state, hostcpu));
 		if (vcpu->state == VCPU_SLEEPING)
 			wakeup_one(vcpu);
 	}
 }
 
 void
 vcpu_notify_event(struct vcpu *vcpu)
 {
 	vcpu_lock(vcpu);
 	vcpu_notify_event_locked(vcpu);
 	vcpu_unlock(vcpu);
 }
 
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {
 
 	/* Flush host state to the pcb. */
 	fpe_state_save(curthread);
 
 	/* Ensure the VFP state will be re-loaded when exiting the guest. */
 	PCPU_SET(fpcurthread, NULL);
 
 	/* restore guest FPU state */
 	fpe_enable();
 	fpe_restore(vcpu->guestfpu);
 
 	/*
 	 * The FPU is now "dirty" with the guest's state so turn on emulation
 	 * to trap any access to the FPU by the host.
 	 */
 	fpe_disable();
 }
 
 static void
 save_guest_fpustate(struct vcpu *vcpu)
 {
 
 	/* Save guest FPE state. */
 	fpe_enable();
 	fpe_store(vcpu->guestfpu);
 	fpe_disable();
 
 	KASSERT(PCPU_GET(fpcurthread) == NULL,
 	    ("%s: fpcurthread set with guest registers", __func__));
 }
 
 static int
 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
     bool from_idle)
 {
 	int error;
 
 	vcpu_assert_locked(vcpu);
 
 	/*
 	 * State transitions from the vmmdev_ioctl() must always begin from
 	 * the VCPU_IDLE state. This guarantees that there is only a single
 	 * ioctl() operating on a vcpu at any point.
 	 */
 	if (from_idle) {
 		while (vcpu->state != VCPU_IDLE) {
 			vcpu_notify_event_locked(vcpu);
 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat",
 			    hz / 1000);
 		}
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 		    "vcpu idle state"));
 	}
 
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
 	} else {
 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
 		    "vcpu that is not running", vcpu->hostcpu));
 	}
 
 	/*
 	 * The following state transitions are allowed:
 	 * IDLE -> FROZEN -> IDLE
 	 * FROZEN -> RUNNING -> FROZEN
 	 * FROZEN -> SLEEPING -> FROZEN
 	 */
 	switch (vcpu->state) {
 	case VCPU_IDLE:
 	case VCPU_RUNNING:
 	case VCPU_SLEEPING:
 		error = (newstate != VCPU_FROZEN);
 		break;
 	case VCPU_FROZEN:
 		error = (newstate == VCPU_FROZEN);
 		break;
 	default:
 		error = 1;
 		break;
 	}
 
 	if (error)
 		return (EBUSY);
 
 	vcpu->state = newstate;
 	if (newstate == VCPU_RUNNING)
 		vcpu->hostcpu = curcpu;
 	else
 		vcpu->hostcpu = NOCPU;
 
 	if (newstate == VCPU_IDLE)
 		wakeup(&vcpu->state);
 
 	return (0);
 }
 
 static void
 vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
 		panic("Error %d setting state to %d\n", error, newstate);
 }
 
 static void
 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
 		panic("Error %d setting state to %d", error, newstate);
 }
 
 int
 vm_get_capability(struct vcpu *vcpu, int type, int *retval)
 {
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (vmmops_getcap(vcpu->cookie, type, retval));
 }
 
 int
 vm_set_capability(struct vcpu *vcpu, int type, int val)
 {
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (vmmops_setcap(vcpu->cookie, type, val));
 }
 
 struct vm *
 vcpu_vm(struct vcpu *vcpu)
 {
 
 	return (vcpu->vm);
 }
 
 int
 vcpu_vcpuid(struct vcpu *vcpu)
 {
 
 	return (vcpu->vcpuid);
 }
 
 void *
 vcpu_get_cookie(struct vcpu *vcpu)
 {
 
 	return (vcpu->cookie);
 }
 
 struct vcpu *
 vm_vcpu(struct vm *vm, int vcpuid)
 {
 
 	return (vm->vcpu[vcpuid]);
 }
 
 int
 vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
 {
 	int error;
 
 	vcpu_lock(vcpu);
 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
 	vcpu_unlock(vcpu);
 
 	return (error);
 }
 
 enum vcpu_state
 vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
 {
 	enum vcpu_state state;
 
 	vcpu_lock(vcpu);
 	state = vcpu->state;
 	if (hostcpu != NULL)
 		*hostcpu = vcpu->hostcpu;
 	vcpu_unlock(vcpu);
 
 	return (state);
 }
 
 static void *
 _vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
     void **cookie)
 {
 	int i, count, pageoff;
 	struct mem_map *mm;
 	vm_page_t m;
 
 	pageoff = gpa & PAGE_MASK;
 	if (len > PAGE_SIZE - pageoff)
 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
 
 	count = 0;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
 		    gpa < mm->gpa + mm->len) {
 			count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
 			    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
 			break;
 		}
 	}
 
 	if (count == 1) {
 		*cookie = m;
 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
 	} else {
 		*cookie = NULL;
 		return (NULL);
 	}
 }
 
 void *
 vm_gpa_hold(struct vcpu *vcpu, vm_paddr_t gpa, size_t len, int reqprot,
 	    void **cookie)
 {
 #ifdef INVARIANTS
 	/*
 	 * The current vcpu should be frozen to ensure 'vm_memmap[]'
 	 * stability.
 	 */
 	int state = vcpu_get_state(vcpu, NULL);
 	KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
 	    __func__, state));
 #endif
 	return (_vm_gpa_hold(vcpu->vm, gpa, len, reqprot, cookie));
 }
 
 void *
 vm_gpa_hold_global(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
     void **cookie)
 {
 	sx_assert(&vm->mem_segs_lock, SX_LOCKED);
 	return (_vm_gpa_hold(vm, gpa, len, reqprot, cookie));
 }
 
 void
 vm_gpa_release(void *cookie)
 {
 	vm_page_t m = cookie;
 
 	vm_page_unwire(m, PQ_ACTIVE);
 }
 
 int
 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
 {
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (vmmops_getreg(vcpu->cookie, reg, retval));
 }
 
 int
 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
 {
 	int error;
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 	error = vmmops_setreg(vcpu->cookie, reg, val);
 	if (error || reg != VM_REG_GUEST_SEPC)
 		return (error);
 
 	vcpu->nextpc = val;
 
 	return (0);
 }
 
 void *
 vm_get_cookie(struct vm *vm)
 {
 
 	return (vm->cookie);
 }
 
 int
 vm_inject_exception(struct vcpu *vcpu, uint64_t scause)
 {
 
 	return (vmmops_exception(vcpu->cookie, scause));
 }
 
 int
 vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr)
 {
 
 	return (aplic_attach_to_vm(vm->cookie, descr));
 }
 
 int
 vm_assert_irq(struct vm *vm, uint32_t irq)
 {
 
 	return (aplic_inject_irq(vm->cookie, -1, irq, true));
 }
 
 int
 vm_deassert_irq(struct vm *vm, uint32_t irq)
 {
 
 	return (aplic_inject_irq(vm->cookie, -1, irq, false));
 }
 
 int
 vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
     int func)
 {
 
 	return (aplic_inject_msi(vm->cookie, msg, addr));
 }
 
 static int
 vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
 {
 
 	vcpu_lock(vcpu);
 
 	while (1) {
 		if (aplic_check_pending(vcpu->cookie))
 			break;
 
 		if (riscv_check_ipi(vcpu->cookie, false))
 			break;
 
+		if (riscv_check_interrupts_pending(vcpu->cookie))
+			break;
+
 		if (vcpu_should_yield(vcpu))
 			break;
 
 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
 		/*
 		 * XXX msleep_spin() cannot be interrupted by signals so
 		 * wake up periodically to check pending signals.
 		 */
 		msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz / 1000);
 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 	}
 	vcpu_unlock(vcpu);
 
 	*retu = false;
 
 	return (0);
 }
 
 static int
 vm_handle_paging(struct vcpu *vcpu, bool *retu)
 {
 	struct vm *vm;
 	struct vm_exit *vme;
 	struct vm_map *map;
 	uint64_t addr;
 	pmap_t pmap;
 	int ftype, rv;
 
 	vm = vcpu->vm;
 	vme = &vcpu->exitinfo;
 
 	pmap = vmspace_pmap(vm->vmspace);
 	addr = (vme->htval << 2) & ~(PAGE_SIZE - 1);
 
 	dprintf("%s: %lx\n", __func__, addr);
 
 	switch (vme->scause) {
 	case SCAUSE_STORE_GUEST_PAGE_FAULT:
 		ftype = VM_PROT_WRITE;
 		break;
 	case SCAUSE_FETCH_GUEST_PAGE_FAULT:
 		ftype = VM_PROT_EXECUTE;
 		break;
 	case SCAUSE_LOAD_GUEST_PAGE_FAULT:
 		ftype = VM_PROT_READ;
 		break;
 	default:
 		panic("unknown page trap: %lu", vme->scause);
 	}
 
 	/* The page exists, but the page table needs to be updated. */
 	if (pmap_fault(pmap, addr, ftype))
 		return (0);
 
 	map = &vm->vmspace->vm_map;
 	rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL);
 	if (rv != KERN_SUCCESS) {
 		printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n",
 		    __func__, addr, ftype, rv);
 		return (EFAULT);
 	}
 
 	return (0);
 }
 
 static int
 vm_handle_suspend(struct vcpu *vcpu, bool *retu)
 {
 	struct vm *vm = vcpu->vm;
 	int error, i;
 	struct thread *td;
 
 	error = 0;
 	td = curthread;
 
 	CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
 
 	/*
 	 * Wait until all 'active_cpus' have suspended themselves.
 	 *
 	 * Since a VM may be suspended at any time including when one or
 	 * more vcpus are doing a rendezvous we need to call the rendezvous
 	 * handler while we are waiting to prevent a deadlock.
 	 */
 	vcpu_lock(vcpu);
 	while (error == 0) {
 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
 			break;
 
 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
 		msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 		if (td_ast_pending(td, TDA_SUSPEND)) {
 			vcpu_unlock(vcpu);
 			error = thread_check_susp(td, false);
 			vcpu_lock(vcpu);
 		}
 	}
 	vcpu_unlock(vcpu);
 
 	/*
 	 * Wakeup the other sleeping vcpus and return to userspace.
 	 */
 	for (i = 0; i < vm->maxcpus; i++) {
 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
 			vcpu_notify_event(vm_vcpu(vm, i));
 		}
 	}
 
 	*retu = true;
 	return (error);
 }
 
 int
 vm_run(struct vcpu *vcpu)
 {
 	struct vm_eventinfo evinfo;
 	struct vm_exit *vme;
 	struct vm *vm;
 	pmap_t pmap;
 	int error;
 	int vcpuid;
 	bool retu;
 
 	vm = vcpu->vm;
 
 	dprintf("%s\n", __func__);
 
 	vcpuid = vcpu->vcpuid;
 
 	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
 		return (EINVAL);
 
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
 	pmap = vmspace_pmap(vm->vmspace);
 	vme = &vcpu->exitinfo;
 	evinfo.rptr = NULL;
 	evinfo.sptr = &vm->suspend;
 	evinfo.iptr = NULL;
 restart:
 	critical_enter();
 
 	restore_guest_fpustate(vcpu);
 
 	vcpu_require_state(vcpu, VCPU_RUNNING);
 	error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
 	vcpu_require_state(vcpu, VCPU_FROZEN);
 
 	save_guest_fpustate(vcpu);
 
 	critical_exit();
 
 	if (error == 0) {
 		retu = false;
 		switch (vme->exitcode) {
 		case VM_EXITCODE_INST_EMUL:
 			vcpu->nextpc = vme->pc + vme->inst_length;
 			error = vm_handle_inst_emul(vcpu, &retu);
 			break;
 		case VM_EXITCODE_WFI:
 			vcpu->nextpc = vme->pc + vme->inst_length;
 			error = vm_handle_wfi(vcpu, vme, &retu);
 			break;
 		case VM_EXITCODE_ECALL:
 			/* Handle in userland. */
 			vcpu->nextpc = vme->pc + vme->inst_length;
 			retu = true;
 			break;
 		case VM_EXITCODE_PAGING:
 			vcpu->nextpc = vme->pc;
 			error = vm_handle_paging(vcpu, &retu);
 			break;
 		case VM_EXITCODE_BOGUS:
 			vcpu->nextpc = vme->pc;
 			retu = false;
 			error = 0;
 			break;
 		case VM_EXITCODE_SUSPENDED:
 			vcpu->nextpc = vme->pc;
 			error = vm_handle_suspend(vcpu, &retu);
 			break;
 		default:
 			/* Handle in userland. */
 			vcpu->nextpc = vme->pc;
 			retu = true;
 			break;
 		}
 	}
 
 	if (error == 0 && retu == false)
 		goto restart;
 
 	return (error);
 }
diff --git a/sys/riscv/vmm/vmm_riscv.c b/sys/riscv/vmm/vmm_riscv.c
index e276f8583e37..6ac945dfa1d0 100644
--- a/sys/riscv/vmm/vmm_riscv.c
+++ b/sys/riscv/vmm/vmm_riscv.c
@@ -1,926 +1,939 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
  *
  * This software was developed by the University of Cambridge Computer
  * Laboratory (Department of Computer Science and Technology) under Innovate
  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
  * Prototype".
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/vmem.h>
 #include <sys/bus.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <machine/md_var.h>
 #include <machine/riscvreg.h>
 #include <machine/vm.h>
 #include <machine/cpufunc.h>
 #include <machine/cpu.h>
 #include <machine/machdep.h>
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/atomic.h>
 #include <machine/pmap.h>
 #include <machine/intr.h>
 #include <machine/encoding.h>
 #include <machine/db_machdep.h>
 
 #include "riscv.h"
 #include "vmm_aplic.h"
 #include "vmm_stat.h"
 
 MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
 
 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
 
 static int
 m_op(uint32_t insn, int match, int mask)
 {
 
 	if (((insn ^ match) & mask) == 0)
 		return (1);
 
 	return (0);
 }
 
 static inline void
 riscv_set_active_vcpu(struct hypctx *hypctx)
 {
 
 	DPCPU_SET(vcpu, hypctx);
 }
 
 struct hypctx *
 riscv_get_active_vcpu(void)
 {
 
 	return (DPCPU_GET(vcpu));
 }
 
 int
 vmmops_modinit(void)
 {
 
 	if (!has_hyp) {
 		printf("vmm: riscv hart doesn't support H-extension.\n");
 		return (ENXIO);
 	}
 
-	if (!has_sstc) {
-		printf("vmm: riscv hart doesn't support SSTC extension.\n");
-		return (ENXIO);
-	}
-
 	return (0);
 }
 
 int
 vmmops_modcleanup(void)
 {
 
 	return (0);
 }
 
 void *
 vmmops_init(struct vm *vm, pmap_t pmap)
 {
 	struct hyp *hyp;
 	vm_size_t size;
 
 	size = round_page(sizeof(struct hyp) +
 	    sizeof(struct hypctx *) * vm_get_maxcpus(vm));
 	hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
 	hyp->vm = vm;
 	hyp->aplic_attached = false;
 
 	aplic_vminit(hyp);
 
 	return (hyp);
 }
 
 static void
 vmmops_delegate(void)
 {
 	uint64_t hedeleg;
 	uint64_t hideleg;
 
 	hedeleg  = (1UL << SCAUSE_INST_MISALIGNED);
 	hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
 	hedeleg |= (1UL << SCAUSE_BREAKPOINT);
 	hedeleg |= (1UL << SCAUSE_ECALL_USER);
 	hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
 	hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
 	hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
 	csr_write(hedeleg, hedeleg);
 
 	hideleg  = (1UL << IRQ_SOFTWARE_HYPERVISOR);
 	hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
 	hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
 	csr_write(hideleg, hideleg);
 }
 
 static void
 vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
 {
 	struct hypcsr *csrs;
 
 	csrs = &hypctx->guest_csrs;
 
 	csr_write(vsstatus, csrs->vsstatus);
 	csr_write(vsie, csrs->vsie);
 	csr_write(vstvec, csrs->vstvec);
 	csr_write(vsscratch, csrs->vsscratch);
 	csr_write(vsepc, csrs->vsepc);
 	csr_write(vscause, csrs->vscause);
 	csr_write(vstval, csrs->vstval);
 	csr_write(hvip, csrs->hvip);
 	csr_write(vsatp, csrs->vsatp);
 }
 
 static void
 vmmops_vcpu_save_csrs(struct hypctx *hypctx)
 {
 	struct hypcsr *csrs;
 
 	csrs = &hypctx->guest_csrs;
 
 	csrs->vsstatus = csr_read(vsstatus);
 	csrs->vsie = csr_read(vsie);
 	csrs->vstvec = csr_read(vstvec);
 	csrs->vsscratch = csr_read(vsscratch);
 	csrs->vsepc = csr_read(vsepc);
 	csrs->vscause = csr_read(vscause);
 	csrs->vstval = csr_read(vstval);
 	csrs->hvip = csr_read(hvip);
 	csrs->vsatp = csr_read(vsatp);
 }
 
 void *
 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
 {
 	struct hypctx *hypctx;
 	struct hyp *hyp;
 	vm_size_t size;
 
 	hyp = vmi;
 
 	dprintf("%s: hyp %p\n", __func__, hyp);
 
 	KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
 	    ("%s: Invalid vcpuid %d", __func__, vcpuid));
 
 	size = round_page(sizeof(struct hypctx));
 
 	hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
 	hypctx->hyp = hyp;
 	hypctx->vcpu = vcpu1;
 	hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
 
 	/* sstatus */
 	hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
 	hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
 
 	/* hstatus */
 	hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
 	hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
 
 	hypctx->cpu_id = vcpuid;
 	hyp->ctx[vcpuid] = hypctx;
 
 	aplic_cpuinit(hypctx);
+	vtimer_cpuinit(hypctx);
 
 	return (hypctx);
 }
 
 static int
 riscv_vmm_pinit(pmap_t pmap)
 {
 
 	dprintf("%s: pmap %p\n", __func__, pmap);
 
 	pmap_pinit_stage(pmap, PM_STAGE2);
 
 	return (1);
 }
 
 struct vmspace *
 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
 {
 
 	return (vmspace_alloc(min, max, riscv_vmm_pinit));
 }
 
 void
 vmmops_vmspace_free(struct vmspace *vmspace)
 {
 
 	pmap_remove_pages(vmspace_pmap(vmspace));
 	vmspace_free(vmspace);
 }
 
 static void
 riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data,
     struct hyptrap *trap)
 {
 	register struct hyptrap * htrap asm("a0");
 	uintptr_t old_hstatus;
 	uintptr_t old_stvec;
 	uintptr_t entry;
 	uint64_t val;
 	uint64_t tmp;
 	int intr;
 
 	entry = (uintptr_t)&vmm_unpriv_trap;
 	htrap = trap;
 
 	intr = intr_disable();
 
 	old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
 	/*
 	 * Setup a temporary exception vector, so that if hlvx.hu raises
 	 * an exception we catch it in the vmm_unpriv_trap().
 	 */
 	old_stvec = csr_swap(stvec, entry);
 
 	/*
 	 * Read first two bytes of instruction assuming it could be a
 	 * compressed one.
 	 */
 	__asm __volatile(".option push\n"
 			 ".option norvc\n"
 			"hlvx.hu %[val], (%[addr])\n"
 			".option pop\n"
 	    : [val] "=r" (val)
 	    : [addr] "r" (guest_addr), "r" (htrap)
 	    : "a1", "memory");
 
 	/*
 	 * Check if previous hlvx.hu did not raise an exception, and then
 	 * read the rest of instruction if it is a full-length one.
 	 */
 	if (trap->scause == -1 && (val & 0x3) == 0x3) {
 		guest_addr += 2;
 		__asm __volatile(".option push\n"
 				 ".option norvc\n"
 				"hlvx.hu %[tmp], (%[addr])\n"
 				".option pop\n"
 		    : [tmp] "=r" (tmp)
 		    : [addr] "r" (guest_addr), "r" (htrap)
 		    : "a1", "memory");
 		val |= (tmp << 16);
 	}
 
 	csr_write(hstatus, old_hstatus);
 	csr_write(stvec, old_stvec);
 
 	intr_restore(intr);
 
 	*data = val;
 }
 
 static int
 riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret,
     struct hyptrap *trap)
 {
 	uintptr_t guest_addr;
 	struct vie *vie;
 	uint64_t insn;
 	int reg_num;
 	int rs2, rd;
 	int direction;
 	int sign_extend;
 	int access_size;
 
 	guest_addr = vme_ret->sepc;
 
 	KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT ||
 	    vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT ||
 	    vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT,
 	    ("Invalid scause"));
 
 	direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ?
 	    VM_DIR_WRITE : VM_DIR_READ;
 
 	sign_extend = 1;
 
 	bzero(trap, sizeof(struct hyptrap));
 	trap->scause = -1;
 	riscv_unpriv_read(hypctx, guest_addr, &insn, trap);
 	if (trap->scause != -1)
 		return (-1);
 
 	if ((insn & 0x3) == 0x3) {
 		rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
 		rd = (insn & RD_MASK) >> RD_SHIFT;
 
 		if (direction == VM_DIR_WRITE) {
 			if (m_op(insn, MATCH_SB, MASK_SB))
 				access_size = 1;
 			else if (m_op(insn, MATCH_SH, MASK_SH))
 				access_size = 2;
 			else if (m_op(insn, MATCH_SW, MASK_SW))
 				access_size = 4;
 			else if (m_op(insn, MATCH_SD, MASK_SD))
 				access_size = 8;
 			else {
 				printf("unknown store instr at %lx",
 				    guest_addr);
 				return (-2);
 			}
 			reg_num = rs2;
 		} else {
 			if (m_op(insn, MATCH_LB, MASK_LB))
 				access_size = 1;
 			else if (m_op(insn, MATCH_LH, MASK_LH))
 				access_size = 2;
 			else if (m_op(insn, MATCH_LW, MASK_LW))
 				access_size = 4;
 			else if (m_op(insn, MATCH_LD, MASK_LD))
 				access_size = 8;
 			else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
 				access_size = 1;
 				sign_extend = 0;
 			} else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
 				access_size = 2;
 				sign_extend = 0;
 			} else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
 				access_size = 4;
 				sign_extend = 0;
 			} else {
 				printf("unknown load instr at %lx",
 				    guest_addr);
 				return (-3);
 			}
 			reg_num = rd;
 		}
 		vme_ret->inst_length = 4;
 	} else {
 		rs2 = (insn >> 7) & 0x7;
 		rs2 += 0x8;
 		rd = (insn >> 2) & 0x7;
 		rd += 0x8;
 
 		if (direction == VM_DIR_WRITE) {
 			if (m_op(insn, MATCH_C_SW, MASK_C_SW))
 				access_size = 4;
 			else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
 				access_size = 8;
 			else {
 				printf("unknown compressed store instr at %lx",
 				    guest_addr);
 				return (-4);
 			}
 		} else  {
 			if (m_op(insn, MATCH_C_LW, MASK_C_LW))
 				access_size = 4;
 			else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
 				access_size = 8;
 			else {
 				printf("unknown load instr at %lx", guest_addr);
 				return (-5);
 			}
 		}
 		reg_num = rd;
 		vme_ret->inst_length = 2;
 	}
 
 	vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
 	    (vme_ret->stval & 0x3);
 
 	dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn,
 	    reg_num, vme_ret->u.inst_emul.gpa);
 
 	vie = &vme_ret->u.inst_emul.vie;
 	vie->dir = direction;
 	vie->reg = reg_num;
 	vie->sign_extend = sign_extend;
 	vie->access_size = access_size;
 
 	return (0);
 }
 
 static bool
 riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
     pmap_t pmap)
 {
 	struct hyptrap trap;
 	uint64_t insn;
 	uint64_t gpa;
 	bool handled;
 	bool retu;
 	int ret;
 	int i;
 
 	handled = false;
 
 	if (vme->scause & SCAUSE_INTR) {
 		/*
 		 * Host interrupt? Leave critical section to handle.
 		 */
 		vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
 		vme->exitcode = VM_EXITCODE_BOGUS;
 		vme->inst_length = 0;
 		return (handled);
 	}
 
 	switch (vme->scause) {
 	case SCAUSE_FETCH_GUEST_PAGE_FAULT:
 	case SCAUSE_LOAD_GUEST_PAGE_FAULT:
 	case SCAUSE_STORE_GUEST_PAGE_FAULT:
 		gpa = (vme->htval << 2) | (vme->stval & 0x3);
 		if (vm_mem_allocated(hypctx->vcpu, gpa)) {
 			vme->exitcode = VM_EXITCODE_PAGING;
 			vme->inst_length = 0;
 			vme->u.paging.gpa = gpa;
 		} else {
 			ret = riscv_gen_inst_emul_data(hypctx, vme, &trap);
 			if (ret != 0) {
 				vme->exitcode = VM_EXITCODE_HYP;
 				vme->u.hyp.scause = trap.scause;
 				break;
 			}
 			vme->exitcode = VM_EXITCODE_INST_EMUL;
 		}
 		break;
 	case SCAUSE_ILLEGAL_INSTRUCTION:
 		/*
 		 * TODO: handle illegal instruction properly.
 		 */
 		printf("%s: Illegal instruction at %lx stval 0x%lx htval "
 		    "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval);
 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
 		vme->exitcode = VM_EXITCODE_BOGUS;
 		handled = false;
 		break;
 	case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
 		retu = false;
 		vmm_sbi_ecall(hypctx->vcpu, &retu);
 		if (retu == false) {
 			handled = true;
 			break;
 		}
 		for (i = 0; i < nitems(vme->u.ecall.args); i++)
 			vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i];
 		vme->exitcode = VM_EXITCODE_ECALL;
 		handled = false;
 		break;
 	case SCAUSE_VIRTUAL_INSTRUCTION:
 		insn = vme->stval;
 		if (m_op(insn, MATCH_WFI, MASK_WFI))
 			vme->exitcode = VM_EXITCODE_WFI;
 		else
 			vme->exitcode = VM_EXITCODE_BOGUS;
 		handled = false;
 		break;
 	default:
 		printf("unknown scause %lx\n", vme->scause);
 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
 		vme->exitcode = VM_EXITCODE_BOGUS;
 		handled = false;
 		break;
 	}
 
 	return (handled);
 }
 
 int
 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
     int prot, uint64_t *gpa, int *is_fault)
 {
 
 	/* Implement me. */
 
 	return (ENOSYS);
 }
 
 void
 riscv_send_ipi(struct hypctx *hypctx, int hart_id)
 {
 	struct hyp *hyp;
 	struct vm *vm;
 
 	hyp = hypctx->hyp;
 	vm = hyp->vm;
 
 	atomic_set_32(&hypctx->ipi_pending, 1);
 
 	vcpu_notify_event(vm_vcpu(vm, hart_id));
 }
 
 int
 riscv_check_ipi(struct hypctx *hypctx, bool clear)
 {
 	int val;
 
 	if (clear)
 		val = atomic_swap_32(&hypctx->ipi_pending, 0);
 	else
 		val = hypctx->ipi_pending;
 
 	return (val);
 }
 
+bool
+riscv_check_interrupts_pending(struct hypctx *hypctx)
+{
+
+	if (hypctx->interrupts_pending)
+		return (true);
+
+	return (false);
+}
+
 static void
 riscv_sync_interrupts(struct hypctx *hypctx)
 {
 	int pending;
 
 	pending = aplic_check_pending(hypctx);
-
 	if (pending)
 		hypctx->guest_csrs.hvip |= HVIP_VSEIP;
 	else
 		hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
 
-	csr_write(hvip, hypctx->guest_csrs.hvip);
-}
-
-static void
-riscv_sync_ipi(struct hypctx *hypctx)
-{
-
 	/* Guest clears VSSIP bit manually. */
 	if (riscv_check_ipi(hypctx, true))
 		hypctx->guest_csrs.hvip |= HVIP_VSSIP;
 
+	if (riscv_check_interrupts_pending(hypctx))
+		hypctx->guest_csrs.hvip |= HVIP_VSTIP;
+	else
+		hypctx->guest_csrs.hvip &= ~HVIP_VSTIP;
+
 	csr_write(hvip, hypctx->guest_csrs.hvip);
 }
 
 int
 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
 {
 	struct hypctx *hypctx;
 	struct vm_exit *vme;
 	struct vcpu *vcpu;
 	register_t val;
+	uint64_t hvip;
 	bool handled;
 
 	hypctx = (struct hypctx *)vcpui;
 	vcpu = hypctx->vcpu;
 	vme = vm_exitinfo(vcpu);
 
 	hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
 
 	vmmops_delegate();
 
 	/*
 	 * From The RISC-V Instruction Set Manual
 	 * Volume II: RISC-V Privileged Architectures
 	 *
 	 * If the new virtual machine's guest physical page tables
 	 * have been modified, it may be necessary to execute an HFENCE.GVMA
 	 * instruction (see Section 5.3.2) before or after writing hgatp.
 	 */
 	__asm __volatile("hfence.gvma" ::: "memory");
 
 	csr_write(hgatp, pmap->pm_satp);
-	csr_write(henvcfg, HENVCFG_STCE);
+	if (has_sstc)
+		csr_write(henvcfg, HENVCFG_STCE);
 	csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE);
 	/* TODO: should we trap rdcycle / rdtime? */
 	csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
 
 	vmmops_vcpu_restore_csrs(hypctx);
 
 	for (;;) {
 		dprintf("%s: pc %lx\n", __func__, pc);
 
 		if (hypctx->has_exception) {
 			hypctx->has_exception = false;
 			/*
 			 * TODO: implement exception injection.
 			 */
 		}
 
 		val = intr_disable();
 
 		/* Check if the vcpu is suspended */
 		if (vcpu_suspended(evinfo)) {
 			intr_restore(val);
 			vm_exit_suspended(vcpu, pc);
 			break;
 		}
 
 		if (vcpu_debugged(vcpu)) {
 			intr_restore(val);
 			vm_exit_debug(vcpu, pc);
 			break;
 		}
 
 		/*
 		 * TODO: What happens if a timer interrupt is asserted exactly
 		 * here, but for the previous VM?
 		 */
 		riscv_set_active_vcpu(hypctx);
 		aplic_flush_hwstate(hypctx);
-
 		riscv_sync_interrupts(hypctx);
-		riscv_sync_ipi(hypctx);
 
 		dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
 		    __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
 		    hypctx->guest_regs.hyp_hstatus);
 
 		vmm_switch(hypctx);
 
 		dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__,
 		    hypctx->guest_regs.hyp_hstatus);
 
+		/* Guest can clear VSSIP. It can't clear VSTIP or VSEIP. */
+		hvip = csr_read(hvip);
+		if ((hypctx->guest_csrs.hvip ^ hvip) & HVIP_VSSIP) {
+			if (hvip & HVIP_VSSIP) {
+				/* TODO: VSSIP was set by guest. */
+			} else {
+				/* VSSIP was cleared by guest. */
+				hypctx->guest_csrs.hvip &= ~HVIP_VSSIP;
+			}
+		}
+
 		aplic_sync_hwstate(hypctx);
-		riscv_sync_interrupts(hypctx);
 
 		/*
 		 * TODO: deactivate stage 2 pmap here if needed.
 		 */
 
 		vme->scause = csr_read(scause);
 		vme->sepc = csr_read(sepc);
 		vme->stval = csr_read(stval);
 		vme->htval = csr_read(htval);
 		vme->htinst = csr_read(htinst);
 
 		intr_restore(val);
 
 		vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
 		vme->pc = hypctx->guest_regs.hyp_sepc;
 		vme->inst_length = INSN_SIZE;
 
 		handled = riscv_handle_world_switch(hypctx, vme, pmap);
 		if (handled == false)
 			/* Exit loop to emulate instruction. */
 			break;
 		else {
 			/* Resume guest execution from the next instruction. */
 			hypctx->guest_regs.hyp_sepc += vme->inst_length;
 		}
 	}
 
 	vmmops_vcpu_save_csrs(hypctx);
 
 	return (0);
 }
 
 static void
 riscv_pcpu_vmcleanup(void *arg)
 {
 	struct hyp *hyp;
 	int i, maxcpus;
 
 	hyp = arg;
 	maxcpus = vm_get_maxcpus(hyp->vm);
 	for (i = 0; i < maxcpus; i++) {
 		if (riscv_get_active_vcpu() == hyp->ctx[i]) {
 			riscv_set_active_vcpu(NULL);
 			break;
 		}
 	}
 }
 
 void
 vmmops_vcpu_cleanup(void *vcpui)
 {
 	struct hypctx *hypctx;
 
 	hypctx = vcpui;
 
 	dprintf("%s\n", __func__);
 
 	aplic_cpucleanup(hypctx);
 
 	free(hypctx, M_HYP);
 }
 
 void
 vmmops_cleanup(void *vmi)
 {
 	struct hyp *hyp;
 
 	hyp = vmi;
 
 	dprintf("%s\n", __func__);
 
 	aplic_vmcleanup(hyp);
 
 	smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
 
 	free(hyp, M_HYP);
 }
 
 /*
  * Return register value. Registers have different sizes and an explicit cast
  * must be made to ensure proper conversion.
  */
 static uint64_t *
 hypctx_regptr(struct hypctx *hypctx, int reg)
 {
 
 	switch (reg) {
 	case VM_REG_GUEST_RA:
 		return (&hypctx->guest_regs.hyp_ra);
 	case VM_REG_GUEST_SP:
 		return (&hypctx->guest_regs.hyp_sp);
 	case VM_REG_GUEST_GP:
 		return (&hypctx->guest_regs.hyp_gp);
 	case VM_REG_GUEST_TP:
 		return (&hypctx->guest_regs.hyp_tp);
 	case VM_REG_GUEST_T0:
 		return (&hypctx->guest_regs.hyp_t[0]);
 	case VM_REG_GUEST_T1:
 		return (&hypctx->guest_regs.hyp_t[1]);
 	case VM_REG_GUEST_T2:
 		return (&hypctx->guest_regs.hyp_t[2]);
 	case VM_REG_GUEST_S0:
 		return (&hypctx->guest_regs.hyp_s[0]);
 	case VM_REG_GUEST_S1:
 		return (&hypctx->guest_regs.hyp_s[1]);
 	case VM_REG_GUEST_A0:
 		return (&hypctx->guest_regs.hyp_a[0]);
 	case VM_REG_GUEST_A1:
 		return (&hypctx->guest_regs.hyp_a[1]);
 	case VM_REG_GUEST_A2:
 		return (&hypctx->guest_regs.hyp_a[2]);
 	case VM_REG_GUEST_A3:
 		return (&hypctx->guest_regs.hyp_a[3]);
 	case VM_REG_GUEST_A4:
 		return (&hypctx->guest_regs.hyp_a[4]);
 	case VM_REG_GUEST_A5:
 		return (&hypctx->guest_regs.hyp_a[5]);
 	case VM_REG_GUEST_A6:
 		return (&hypctx->guest_regs.hyp_a[6]);
 	case VM_REG_GUEST_A7:
 		return (&hypctx->guest_regs.hyp_a[7]);
 	case VM_REG_GUEST_S2:
 		return (&hypctx->guest_regs.hyp_s[2]);
 	case VM_REG_GUEST_S3:
 		return (&hypctx->guest_regs.hyp_s[3]);
 	case VM_REG_GUEST_S4:
 		return (&hypctx->guest_regs.hyp_s[4]);
 	case VM_REG_GUEST_S5:
 		return (&hypctx->guest_regs.hyp_s[5]);
 	case VM_REG_GUEST_S6:
 		return (&hypctx->guest_regs.hyp_s[6]);
 	case VM_REG_GUEST_S7:
 		return (&hypctx->guest_regs.hyp_s[7]);
 	case VM_REG_GUEST_S8:
 		return (&hypctx->guest_regs.hyp_s[8]);
 	case VM_REG_GUEST_S9:
 		return (&hypctx->guest_regs.hyp_s[9]);
 	case VM_REG_GUEST_S10:
 		return (&hypctx->guest_regs.hyp_s[10]);
 	case VM_REG_GUEST_S11:
 		return (&hypctx->guest_regs.hyp_s[11]);
 	case VM_REG_GUEST_T3:
 		return (&hypctx->guest_regs.hyp_t[3]);
 	case VM_REG_GUEST_T4:
 		return (&hypctx->guest_regs.hyp_t[4]);
 	case VM_REG_GUEST_T5:
 		return (&hypctx->guest_regs.hyp_t[5]);
 	case VM_REG_GUEST_T6:
 		return (&hypctx->guest_regs.hyp_t[6]);
 	case VM_REG_GUEST_SEPC:
 		return (&hypctx->guest_regs.hyp_sepc);
 	default:
 		break;
 	}
 
 	return (NULL);
 }
 
 int
 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
 {
 	uint64_t *regp;
 	int running, hostcpu;
 	struct hypctx *hypctx;
 
 	hypctx = vcpui;
 
 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
 	if (running && hostcpu != curcpu)
 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
 		    vcpu_vcpuid(hypctx->vcpu));
 
 	if (reg == VM_REG_GUEST_ZERO) {
 		*retval = 0;
 		return (0);
 	}
 
 	regp = hypctx_regptr(hypctx, reg);
 	if (regp == NULL)
 		return (EINVAL);
 
 	*retval = *regp;
 
 	return (0);
 }
 
 int
 vmmops_setreg(void *vcpui, int reg, uint64_t val)
 {
 	struct hypctx *hypctx;
 	int running, hostcpu;
 	uint64_t *regp;
 
 	hypctx = vcpui;
 
 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
 	if (running && hostcpu != curcpu)
 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
 		    vcpu_vcpuid(hypctx->vcpu));
 
 	regp = hypctx_regptr(hypctx, reg);
 	if (regp == NULL)
 		return (EINVAL);
 
 	*regp = val;
 
 	return (0);
 }
 
 int
 vmmops_exception(void *vcpui, uint64_t scause)
 {
 	struct hypctx *hypctx;
 	int running, hostcpu;
 
 	hypctx = vcpui;
 
 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
 	if (running && hostcpu != curcpu)
 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
 		    vcpu_vcpuid(hypctx->vcpu));
 
 	/* TODO: implement me. */
 
 	return (ENOSYS);
 }
 
 int
 vmmops_getcap(void *vcpui, int num, int *retval)
 {
 	int ret;
 
 	ret = ENOENT;
 
 	switch (num) {
 	case VM_CAP_SSTC:
 		*retval = has_sstc;
 		ret = 0;
 		break;
 	case VM_CAP_UNRESTRICTED_GUEST:
 		*retval = 1;
 		ret = 0;
 		break;
 	default:
 		break;
 	}
 
 	return (ret);
 }
 
 int
 vmmops_setcap(void *vcpui, int num, int val)
 {
 
 	return (ENOENT);
 }
diff --git a/sys/riscv/vmm/vmm_sbi.c b/sys/riscv/vmm/vmm_sbi.c
index 6444b8c9e396..63dcf9b4a7ae 100644
--- a/sys/riscv/vmm/vmm_sbi.c
+++ b/sys/riscv/vmm/vmm_sbi.c
@@ -1,179 +1,205 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
  *
  * This software was developed by the University of Cambridge Computer
  * Laboratory (Department of Computer Science and Technology) under Innovate
  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
  * Prototype".
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/jail.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/sysctl.h>
 #include <sys/libkern.h>
 #include <sys/ioccom.h>
 #include <sys/mman.h>
 #include <sys/uio.h>
 #include <sys/proc.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 #include <machine/machdep.h>
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/md_var.h>
 #include <machine/sbi.h>
 
 #include "riscv.h"
 
 static int
 vmm_sbi_handle_rfnc(struct vcpu *vcpu, struct hypctx *hypctx)
 {
 	uint64_t hart_mask __unused;
 	uint64_t start __unused;
 	uint64_t size __unused;
 	uint64_t asid __unused;
 	uint64_t func_id;
 
 	func_id = hypctx->guest_regs.hyp_a[6];
 	hart_mask = hypctx->guest_regs.hyp_a[0];
 	start = hypctx->guest_regs.hyp_a[2];
 	size = hypctx->guest_regs.hyp_a[3];
 	asid = hypctx->guest_regs.hyp_a[4];
 
 	dprintf("%s: %ld hart_mask %lx start %lx size %lx\n", __func__,
 	    func_id, hart_mask, start, size);
 
 	/* TODO: implement remote sfence. */
 
 	switch (func_id) {
 	case SBI_RFNC_REMOTE_FENCE_I:
 		break;
 	case SBI_RFNC_REMOTE_SFENCE_VMA:
 		break;
 	case SBI_RFNC_REMOTE_SFENCE_VMA_ASID:
 		break;
 	default:
 		break;
 	}
 
 	hypctx->guest_regs.hyp_a[0] = 0;
 
 	return (0);
 }
 
+static int
+vmm_sbi_handle_time(struct vcpu *vcpu, struct hypctx *hypctx)
+{
+	uint64_t func_id;
+	uint64_t next_val;
+	int ret;
+
+	func_id = hypctx->guest_regs.hyp_a[6];
+	next_val = hypctx->guest_regs.hyp_a[0];
+
+	switch (func_id) {
+	case SBI_TIME_SET_TIMER:
+		vtimer_set_timer(hypctx, next_val);
+		ret = 0;
+		break;
+	default:
+		ret = -1;
+		break;
+	}
+
+	hypctx->guest_regs.hyp_a[0] = ret;
+
+	return (0);
+}
+
 static int
 vmm_sbi_handle_ipi(struct vcpu *vcpu, struct hypctx *hypctx)
 {
 	struct hypctx *target_hypctx;
 	struct vcpu *target_vcpu __unused;
 	cpuset_t active_cpus;
 	struct hyp *hyp;
 	uint64_t hart_mask;
 	uint64_t func_id;
 	int hart_id;
 	int bit;
 	int ret;
 
 	func_id = hypctx->guest_regs.hyp_a[6];
 	hart_mask = hypctx->guest_regs.hyp_a[0];
 
 	dprintf("%s: hart_mask %lx\n", __func__, hart_mask);
 
 	hyp = hypctx->hyp;
 
 	active_cpus = vm_active_cpus(hyp->vm);
 
 	switch (func_id) {
 	case SBI_IPI_SEND_IPI:
 		while ((bit = ffs(hart_mask))) {
 			hart_id = (bit - 1);
 			hart_mask &= ~(1u << hart_id);
 			if (CPU_ISSET(hart_id, &active_cpus)) {
 				/* TODO. */
 				target_vcpu = vm_vcpu(hyp->vm, hart_id);
 				target_hypctx = hypctx->hyp->ctx[hart_id];
 				riscv_send_ipi(target_hypctx, hart_id);
 			}
 		}
 		ret = 0;
 		break;
 	default:
 		printf("%s: unknown func %ld\n", __func__, func_id);
 		ret = -1;
 		break;
 	}
 
 	hypctx->guest_regs.hyp_a[0] = ret;
 
 	return (0);
 }
 
 int
 vmm_sbi_ecall(struct vcpu *vcpu, bool *retu)
 {
 	int sbi_extension_id __unused;
 	struct hypctx *hypctx;
 
 	hypctx = riscv_get_active_vcpu();
 	sbi_extension_id = hypctx->guest_regs.hyp_a[7];
 
 	dprintf("%s: args %lx %lx %lx %lx %lx %lx %lx %lx\n", __func__,
 	    hypctx->guest_regs.hyp_a[0],
 	    hypctx->guest_regs.hyp_a[1],
 	    hypctx->guest_regs.hyp_a[2],
 	    hypctx->guest_regs.hyp_a[3],
 	    hypctx->guest_regs.hyp_a[4],
 	    hypctx->guest_regs.hyp_a[5],
 	    hypctx->guest_regs.hyp_a[6],
 	    hypctx->guest_regs.hyp_a[7]);
 
 	switch (sbi_extension_id) {
 	case SBI_EXT_ID_RFNC:
 		vmm_sbi_handle_rfnc(vcpu, hypctx);
 		break;
 	case SBI_EXT_ID_TIME:
+		vmm_sbi_handle_time(vcpu, hypctx);
 		break;
 	case SBI_EXT_ID_IPI:
 		vmm_sbi_handle_ipi(vcpu, hypctx);
 		break;
 	default:
 		*retu = true;
 		break;
 	}
 
 	return (0);
 }
diff --git a/sys/riscv/vmm/vmm_vtimer.c b/sys/riscv/vmm/vmm_vtimer.c
new file mode 100644
index 000000000000..0dadc962114f
--- /dev/null
+++ b/sys/riscv/vmm/vmm_vtimer.c
@@ -0,0 +1,117 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+#include <dev/ofw/openfirm.h>
+
+#include "riscv.h"
+
+#define	VTIMER_DEFAULT_FREQ	1000000
+
+static int
+vtimer_get_timebase(uint32_t *freq)
+{
+	phandle_t node;
+	int len;
+
+	node = OF_finddevice("/cpus");
+	if (node == -1)
+		return (ENXIO);
+
+	len = OF_getproplen(node, "timebase-frequency");
+	if (len != 4)
+		return (ENXIO);
+
+	OF_getencprop(node, "timebase-frequency", freq, len);
+
+	return (0);
+}
+
+void
+vtimer_cpuinit(struct hypctx *hypctx)
+{
+	struct vtimer *vtimer;
+	uint32_t freq;
+	int error;
+
+	vtimer = &hypctx->vtimer;
+	mtx_init(&vtimer->mtx, "vtimer callout mutex", NULL, MTX_DEF);
+	callout_init_mtx(&vtimer->callout, &vtimer->mtx, 0);
+
+	error = vtimer_get_timebase(&freq);
+	if (error)
+		freq = VTIMER_DEFAULT_FREQ;
+
+	vtimer->freq = freq;
+}
+
+static void
+vtimer_inject_irq_callout(void *arg)
+{
+	struct hypctx *hypctx;
+	struct hyp *hyp;
+
+	hypctx = arg;
+	hyp = hypctx->hyp;
+
+	atomic_set_32(&hypctx->interrupts_pending, HVIP_VSTIP);
+	vcpu_notify_event(vm_vcpu(hyp->vm, hypctx->cpu_id));
+}
+
+int
+vtimer_set_timer(struct hypctx *hypctx, uint64_t next_val)
+{
+	struct vtimer *vtimer;
+	sbintime_t time;
+	uint64_t curtime;
+	uint64_t delta;
+
+	vtimer = &hypctx->vtimer;
+
+	curtime = rdtime();
+	if (curtime < next_val) {
+		delta = next_val - curtime;
+		time = delta * SBT_1S / vtimer->freq;
+		atomic_clear_32(&hypctx->interrupts_pending, HVIP_VSTIP);
+		callout_reset_sbt(&vtimer->callout, time, 0,
+		    vtimer_inject_irq_callout, hypctx, 0);
+	} else
+		atomic_set_32(&hypctx->interrupts_pending, HVIP_VSTIP);
+
+	return (0);
+}
diff --git a/sys/riscv/vmm/vmm_vtimer.h b/sys/riscv/vmm/vmm_vtimer.h
new file mode 100644
index 000000000000..6deca322ce99
--- /dev/null
+++ b/sys/riscv/vmm/vmm_vtimer.h
@@ -0,0 +1,47 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
+ *
+ * This software was developed by the University of Cambridge Computer
+ * Laboratory (Department of Computer Science and Technology) under Innovate
+ * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
+ * Prototype".
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _VMM_VTIMER_H_
+#define _VMM_VTIMER_H_
+
+struct hypctx;
+
+struct vtimer {
+	struct callout	callout;
+	struct mtx	mtx;
+	uint32_t	freq;
+};
+
+void vtimer_cpuinit(struct hypctx *hypctx);
+int vtimer_set_timer(struct hypctx *hypctx, uint64_t next_val);
+
+#endif /* !_VMM_VTIMER_H_ */
diff --git a/usr.sbin/bhyve/riscv/fdt.c b/usr.sbin/bhyve/riscv/fdt.c
index bef3f64b0c64..81296714e013 100644
--- a/usr.sbin/bhyve/riscv/fdt.c
+++ b/usr.sbin/bhyve/riscv/fdt.c
@@ -1,327 +1,328 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2022 The FreeBSD Foundation
  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
  *
  * This software was developed by Andrew Turner under sponsorship from
  * the FreeBSD Foundation.
  *
  * This software was developed by the University of Cambridge Computer
  * Laboratory (Department of Computer Science and Technology) under Innovate
  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
  * Prototype".
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 
 #include <assert.h>
 #include <errno.h>
 #include <stdio.h>
 #include <unistd.h>
 
 #include <libfdt.h>
 #include <vmmapi.h>
 
 #include "config.h"
 #include "bhyverun.h"
 #include "fdt.h"
 
 #define	SET_PROP_U32(prop, idx, val)	\
     ((uint32_t *)(prop))[(idx)] = cpu_to_fdt32(val)
 #define	SET_PROP_U64(prop, idx, val)	\
     ((uint64_t *)(prop))[(idx)] = cpu_to_fdt64(val)
 
 #define	IRQ_TYPE_LEVEL_HIGH	4
 #define	IRQ_TYPE_LEVEL_LOW	8
 
 static void *fdtroot;
 static uint32_t aplic_phandle = 0;
 static uint32_t intc0_phandle = 0;
 
 static uint32_t
 assign_phandle(void *fdt)
 {
 	static uint32_t next_phandle = 1;
 	uint32_t phandle;
 
 	phandle = next_phandle;
 	next_phandle++;
 	fdt_property_u32(fdt, "phandle", phandle);
 
 	return (phandle);
 }
 
 static void
 set_single_reg(void *fdt, uint64_t start, uint64_t len)
 {
 	void *reg;
 
 	fdt_property_placeholder(fdt, "reg", 2 * sizeof(uint64_t), &reg);
 	SET_PROP_U64(reg, 0, start);
 	SET_PROP_U64(reg, 1, len);
 }
 
 static void
 add_cpu(void *fdt, int cpuid, const char *isa)
 {
 	char node_name[16];
 
 	snprintf(node_name, sizeof(node_name), "cpu@%d", cpuid);
 
 	fdt_begin_node(fdt, node_name);
 	fdt_property_string(fdt, "device_type", "cpu");
 	fdt_property_string(fdt, "compatible", "riscv");
 	fdt_property_u32(fdt, "reg", cpuid);
 	fdt_property_string(fdt, "riscv,isa", isa);
 	fdt_property_string(fdt, "mmu-type", "riscv,sv39");
 	fdt_property_string(fdt, "clock-frequency", "1000000000");
 
 	fdt_begin_node(fdt, "interrupt-controller");
 	intc0_phandle = assign_phandle(fdt);
 	fdt_property_u32(fdt, "#address-cells", 2);
 	fdt_property_u32(fdt, "#interrupt-cells", 1);
 	fdt_property(fdt, "interrupt-controller", NULL, 0);
 	fdt_property_string(fdt, "compatible", "riscv,cpu-intc");
 	fdt_end_node(fdt);
 
 	fdt_end_node(fdt);
 }
 
 static void
 add_cpus(void *fdt, int ncpu, const char *isa)
 {
 	int cpuid;
 
 	fdt_begin_node(fdt, "cpus");
 	/* XXX: Needed given the root #address-cells? */
 	fdt_property_u32(fdt, "#address-cells", 1);
 	fdt_property_u32(fdt, "#size-cells", 0);
-	fdt_property_u32(fdt, "timebase-frequency", 10000000);
+	/* TODO: take timebase from kernel? */
+	fdt_property_u32(fdt, "timebase-frequency", 1000000);
 
 	for (cpuid = 0; cpuid < ncpu; cpuid++)
 		add_cpu(fdt, cpuid, isa);
 
 	fdt_end_node(fdt);
 }
 
 int
 fdt_init(struct vmctx *ctx, int ncpu, vm_paddr_t fdtaddr, vm_size_t fdtsize,
     const char *isa)
 {
 	void *fdt;
 	const char *bootargs;
 
 	fdt = paddr_guest2host(ctx, fdtaddr, fdtsize);
 	if (fdt == NULL)
 		return (EFAULT);
 
 	fdt_create(fdt, (int)fdtsize);
 
 	/* Add the memory reserve map (needed even if none is reserved) */
 	fdt_finish_reservemap(fdt);
 
 	/* Create the root node */
 	fdt_begin_node(fdt, "");
 
 	fdt_property_string(fdt, "compatible", "freebsd,bhyve");
 	fdt_property_u32(fdt, "#address-cells", 2);
 	fdt_property_u32(fdt, "#size-cells", 2);
 
 	fdt_begin_node(fdt, "chosen");
 	fdt_property_string(fdt, "stdout-path", "serial0:115200n8");
 	bootargs = get_config_value("fdt.bootargs");
 	if (bootargs != NULL)
 		fdt_property_string(fdt, "bootargs", bootargs);
 	fdt_end_node(fdt);
 
 	fdt_begin_node(fdt, "memory");
 	fdt_property_string(fdt, "device_type", "memory");
 	/* There is no lowmem on riscv. */
 	assert(vm_get_lowmem_size(ctx) == 0);
 	set_single_reg(fdt, vm_get_highmem_base(ctx), vm_get_highmem_size(ctx));
 	fdt_end_node(fdt);
 
 	add_cpus(fdt, ncpu, isa);
 
 	/* Finalized by fdt_finalized(). */
 	fdtroot = fdt;
 
 	return (0);
 }
 
 void
 fdt_add_aplic(uint64_t mem_base, uint64_t mem_size)
 {
 	char node_name[32];
 	void *fdt, *prop;
 
 	fdt = fdtroot;
 
 	snprintf(node_name, sizeof(node_name), "interrupt-controller@%lx",
 	    (unsigned long)mem_base);
 	fdt_begin_node(fdt, node_name);
 
 	aplic_phandle = assign_phandle(fdt);
 	fdt_property_string(fdt, "compatible", "riscv,aplic");
 	fdt_property(fdt, "interrupt-controller", NULL, 0);
 #if notyet
 	fdt_property(fdt, "msi-controller", NULL, 0);
 #endif
 	/* XXX: Needed given the root #address-cells? */
 	fdt_property_u32(fdt, "#address-cells", 2);
 	fdt_property_u32(fdt, "#interrupt-cells", 2);
 	fdt_property_placeholder(fdt, "reg", 2 * sizeof(uint64_t), &prop);
 	SET_PROP_U64(prop, 0, mem_base);
 	SET_PROP_U64(prop, 1, mem_size);
 
 	fdt_property_placeholder(fdt, "interrupts-extended",
 	    2 * sizeof(uint32_t), &prop);
 	SET_PROP_U32(prop, 0, intc0_phandle);
 	SET_PROP_U32(prop, 1, 9);
 	fdt_property_u32(fdt, "riscv,num-sources", 63);
 
 	fdt_end_node(fdt);
 
 	fdt_property_u32(fdt, "interrupt-parent", aplic_phandle);
 }
 
 void
 fdt_add_uart(uint64_t uart_base, uint64_t uart_size, int intr)
 {
 	void *fdt, *interrupts;
 	char node_name[32];
 
 	assert(aplic_phandle != 0);
 
 	fdt = fdtroot;
 
 	snprintf(node_name, sizeof(node_name), "serial@%lx", uart_base);
 	fdt_begin_node(fdt, node_name);
 	fdt_property_string(fdt, "compatible", "ns16550");
 	set_single_reg(fdt, uart_base, uart_size);
 	fdt_property_u32(fdt, "interrupt-parent", aplic_phandle);
 	fdt_property_placeholder(fdt, "interrupts", 2 * sizeof(uint32_t),
 	    &interrupts);
 	SET_PROP_U32(interrupts, 0, intr);
 	SET_PROP_U32(interrupts, 1, IRQ_TYPE_LEVEL_HIGH);
 
 	fdt_end_node(fdt);
 
 	snprintf(node_name, sizeof(node_name), "/serial@%lx", uart_base);
 	fdt_begin_node(fdt, "aliases");
 	fdt_property_string(fdt, "serial0", node_name);
 	fdt_end_node(fdt);
 }
 
 void
 fdt_add_pcie(int intrs[static 4])
 {
 	void *fdt, *prop;
 	int slot, pin, intr, i;
 
 	assert(aplic_phandle != 0);
 
 	fdt = fdtroot;
 
 	fdt_begin_node(fdt, "pcie@1f0000000");
 	fdt_property_string(fdt, "compatible", "pci-host-ecam-generic");
 	fdt_property_u32(fdt, "#address-cells", 3);
 	fdt_property_u32(fdt, "#size-cells", 2);
 	fdt_property_string(fdt, "device_type", "pci");
 	fdt_property_u64(fdt, "bus-range", (0ul << 32) | 1);
 	set_single_reg(fdt, 0xe0000000, 0x10000000);
 	fdt_property_placeholder(fdt, "ranges",
 	    2 * 7 * sizeof(uint32_t), &prop);
 	SET_PROP_U32(prop, 0, 0x01000000);
 
 	SET_PROP_U32(prop, 1, 0);
 	SET_PROP_U32(prop, 2, 0xdf000000);
 
 	SET_PROP_U32(prop, 3, 0);
 	SET_PROP_U32(prop, 4, 0xdf000000);
 
 	SET_PROP_U32(prop, 5, 0);
 	SET_PROP_U32(prop, 6, 0x01000000);
 
 	SET_PROP_U32(prop, 7, 0x02000000);
 
 	SET_PROP_U32(prop, 8, 0);
 	SET_PROP_U32(prop, 9, 0xa0000000);
 
 	SET_PROP_U32(prop, 10, 0);
 	SET_PROP_U32(prop, 11, 0xa0000000);
 
 	SET_PROP_U32(prop, 12, 0);
 	SET_PROP_U32(prop, 13, 0x3f000000);
 
 #if notyet
 	fdt_property_placeholder(fdt, "msi-map", 4 * sizeof(uint32_t), &prop);
 	SET_PROP_U32(prop, 0, 0);		/* RID base */
 	SET_PROP_U32(prop, 1, aplic_phandle);	/* MSI parent */
 	SET_PROP_U32(prop, 2, 0);		/* MSI base */
 	SET_PROP_U32(prop, 3, 0x10000);		/* RID length */
 	fdt_property_u32(fdt, "msi-parent", aplic_phandle);
 #endif
 
 	fdt_property_u32(fdt, "#interrupt-cells", 1);
 	fdt_property_u32(fdt, "interrupt-parent", aplic_phandle);
 
 	/*
 	 * Describe standard swizzled interrupts routing (pins rotated by one
 	 * for each consecutive slot). Must match pci_irq_route().
 	 */
 	fdt_property_placeholder(fdt, "interrupt-map-mask",
 	    4 * sizeof(uint32_t), &prop);
 	SET_PROP_U32(prop, 0, 3 << 11);
 	SET_PROP_U32(prop, 1, 0);
 	SET_PROP_U32(prop, 2, 0);
 	SET_PROP_U32(prop, 3, 7);
 	fdt_property_placeholder(fdt, "interrupt-map",
 	    16 * 9 * sizeof(uint32_t), &prop);
 	for (i = 0; i < 16; ++i) {
 		pin = i % 4;
 		slot = i / 4;
 		intr = intrs[(pin + slot) % 4];
 		SET_PROP_U32(prop, 10 * i + 0, slot << 11);
 		SET_PROP_U32(prop, 10 * i + 1, 0);
 		SET_PROP_U32(prop, 10 * i + 2, 0);
 		SET_PROP_U32(prop, 10 * i + 3, pin + 1);
 		SET_PROP_U32(prop, 10 * i + 4, aplic_phandle);
 		SET_PROP_U32(prop, 10 * i + 5, 0);
 		SET_PROP_U32(prop, 10 * i + 6, 0);
 		SET_PROP_U32(prop, 10 * i + 7, intr);
 		SET_PROP_U32(prop, 10 * i + 8, IRQ_TYPE_LEVEL_HIGH);
 	}
 
 	fdt_end_node(fdt);
 }
 
 void
 fdt_finalize(void)
 {
 	fdt_end_node(fdtroot);
 
 	fdt_finish(fdtroot);
 }