D13995.id38245.diff
No OneTemporary
Actions

Size

41 KB

Referenced Files

None

Subscribers

None

D13995.id38245.diff
View Options

	Index: sys/dev/nvme/nvme.h
	===================================================================
	--- sys/dev/nvme/nvme.h
	+++ sys/dev/nvme/nvme.h
	@@ -124,6 +124,7 @@
	} __packed;

	enum shn_value {
	+ NVME_SHN_NOEFCT = 0x0,
	NVME_SHN_NORMAL = 0x1,
	NVME_SHN_ABRUPT = 0x2,
	};
	Index: usr.sbin/bhyve/Makefile
	===================================================================
	--- usr.sbin/bhyve/Makefile
	+++ usr.sbin/bhyve/Makefile
	@@ -39,6 +39,7 @@
	pci_virtio_rnd.c \
	pci_uart.c \
	pci_xhci.c \
	+ pci_nvme.c \
	pm.c \
	post.c \
	ps2kbd.c \
	@@ -61,7 +62,7 @@

	LIBADD= vmmapi md pthread z

	-CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
	+CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller -I${BHYVE_SYSDIR}/sys/dev/nvme/

	WARNS?= 2

	Index: usr.sbin/bhyve/bhyve.8
	===================================================================
	--- usr.sbin/bhyve/bhyve.8
	+++ usr.sbin/bhyve/bhyve.8
	@@ -180,6 +180,8 @@
	.It Li lpc
	LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM.
	The LPC bridge emulation can only be configured on bus 0.
	+.It Li nvme
	+NVMe controller attached.
	.El
	.It Op Ar conf
	This optional parameter describes the backend for device emulations.
	Index: usr.sbin/bhyve/pci_nvme.c
	===================================================================
	--- /dev/null
	+++ usr.sbin/bhyve/pci_nvme.c
	@@ -0,0 +1,1257 @@
	+#include <stdio.h>
	+#include <stdlib.h>
	+#include <stdint.h>
	+#include <stdbool.h>
	+#include <string.h>
	+#include <assert.h>
	+#include <pthread.h>
	+
	+#include <machine/vmm.h>
	+#include <vmmapi.h>
	+
	+#include <nvme.h>
	+
	+#include "pci_emul.h"
	+#include "block_if.h"
	+#include "bhyverun.h"
	+
	+#ifdef NVME_DEBUG
	+static FILE* dbg;
	+#define DPRINTF(format, arg...) \
	+ do { \
	+ fprintf(dbg, format, ##arg); \
	+ fflush(dbg); \
	+ } while (0)
	+#else
	+#define DPRINTF(format, arg...)
	+#endif
	+
	+enum nvme_controller_register_offsets {
	+ NVME_CR_CAP_LOW = 0x00,
	+ NVME_CR_CAP_HI = 0x04,
	+ NVME_CR_VS = 0x08,
	+ NVME_CR_INTMS = 0x0c,
	+ NVME_CR_INTMC = 0x10,
	+ NVME_CR_CC = 0x14,
	+ NVME_CR_CSTS = 0x1c,
	+ NVME_CR_NSSR = 0x20,
	+ NVME_CR_AQA = 0x24,
	+ NVME_CR_ASQ_LOW = 0x28,
	+ NVME_CR_ASQ_HI = 0x2c,
	+ NVME_CR_ACQ_LOW = 0x30,
	+ NVME_CR_ACQ_HI = 0x34,
	+ // submission queue 0 tail doorbell (admin)
	+ NVME_CR_IO_QUEUE_BASE = 0x1000,
	+ /*
	+ * 0x1000 ~ 0x1003 : submission queue 0 head doorbell(admin)
	+ * 0x1004 ~ 0x1007 : completion queue 0 tail doorbell(admin)
	+ * 0x1008 ~ 0x100b : submission queue 1 head doorbell
	+ * 0x100c ~ 0x100f : completion queue 1 tail doorbell
	+ */
	+ NVME_CR_ADMIN_SQ_TAIL = 0x1000,
	+ NVME_CR_ADMIN_CQ_HEAD = 0x1004,
	+ NVME_CR_SQ_1_TAIL = 0x1008,
	+ NVME_CR_CQ_1_HEAD = 0x100c,
	+ NVME_CR_SIZE = 0x1010,
	+};
	+
	+#define NVME_IO_SQ_NUM 1
	+#define NVME_IO_CQ_NUM 1
	+
	+#define NVME_IO_SQS_SIZE (NVME_IO_SQ_NUM + 1)
	+#define NVME_IO_CQS_SIZE (NVME_IO_CQ_NUM + 1)
	+
	+enum nvme_pci_bar {
	+ NVME_BAR_CR = 0, // 0 and 1
	+ NVME_BAR_RSV = 3, // reserved
	+ NVME_BAR_MSIX = 4, // 4 and 5
	+};
	+
	+/*
	+ * NVME_CR_ADMIN_CQ_HEAD and NVMe I/O completion queues
	+ */
	+#define NVME_COMPLETION_QUEUE_NUM (NVME_IO_CQ_NUM + 1)
	+
	+enum nvme_cmd_identify_cdw10 {
	+ NVME_CMD_IDENTIFY_CDW10_CNTID = 0xffff0000,
	+ NVME_CMD_IDENTIFY_CDW10_RSV = 0x0000ff00,
	+ NVME_CMD_IDENTIFY_CDW10_CNS = 0x000000ff,
	+};
	+
	+enum nvme_cmd_identify_data {
	+ NVME_CMD_IDENTIFY_CNS_NAMESPACE = 0x0,
	+ NVME_CMD_IDENTIFY_CNS_CONTROLLER = 0x1,
	+};
	+
	+enum nvme_cc_bits {
	+ NVME_CC_EN = 0x00000001,
	+ NVME_CC_RSV0 = 0x0000000e,
	+ NVME_CC_CCS = 0x00000070,
	+ NVME_CC_MPS = 0x00000780,
	+ NVME_CC_AMS = 0x00003800,
	+ NVME_CC_SHN = 0x0000c000,
	+ NVME_CC_IOSQES = 0x000f0000,
	+ NVME_CC_IOCQES = 0x00f00000,
	+ NVME_CC_RSV1 = 0xff000000,
	+};
	+
	+struct nvme_features {
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint8_t ab : 2;
	+ uint8_t reserved : 5;
	+ uint8_t lpw : 8;
	+ uint8_t mpw : 8;
	+ uint8_t hpw : 8;
	+ } __packed bits;
	+ } __packed arbitration;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint8_t ps : 5;
	+ uint32_t reserved : 27;
	+ } __packed bits;
	+ } __packed power_management;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint8_t num : 6;
	+ uint32_t reserved : 26;
	+ } __packed bits;
	+ } __packed lba_range_type;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint16_t over;
	+ uint16_t under;
	+ } __packed bits;
	+ } __packed temparture_threshold;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint16_t tler;
	+ uint16_t reserved;
	+ } __packed bits;
	+ } __packed error_recovery;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint8_t wce : 1;
	+ uint32_t reserved : 31;
	+ } __packed bits;
	+ } __packed volatile_write_cache;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint16_t ncqr : 16;
	+ uint16_t nsqr : 16;
	+ } __packed bits;
	+ } __packed num_of_queues;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint8_t thr : 8;
	+ uint8_t time : 8;
	+ uint16_t reserved : 16;
	+ } __packed bits;
	+ } __packed interrupt_coalscing;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint16_t iv;
	+ uint8_t cd : 1;
	+ uint16_t reserved : 15;
	+ } __packed bits;
	+
	+
	+ } __packed interrupt_vector_config;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint8_t dn : 1;
	+ uint32_t reserved : 31;
	+ } __packed bits;
	+ } __packed write_atomicity;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint16_t smart : 8;
	+ uint16_t ns_attr_noti : 1;
	+ uint16_t fw_act_noti : 1;
	+ uint16_t tele_log_noti : 1;
	+ uint32_t reserved : 21;
	+ } __packed bits;
	+ } __packed async_event_config;
	+
	+ union {
	+ uint32_t raw;
	+ struct {
	+ uint8_t pbslc;
	+ uint32_t reserved : 24;
	+ } __packed bits;
	+ } __packed software_progress_marker;
	+
	+};
	+
	+struct nvme_completion_queue_info {
	+ uintptr_t base_addr;
	+ uint16_t size;
	+ uint16_t tail;
	+ uint16_t qid;
	+ uint16_t interrupt_vector;
	+ pthread_mutex_t mtx;
	+};
	+
	+struct pci_nvme_softc;
	+// io request to block_if.
	+struct nvme_ioreq {
	+ struct blockif_req io_req;
	+ struct nvme_completion completion_entry;
	+ struct nvme_completion_queue_info* cq_info;
	+ struct nvme_submission_queue_info* sq_info;
	+ struct pci_nvme_softc* sc;
	+ STAILQ_ENTRY(nvme_ioreq) io_flist;
	+ TAILQ_ENTRY(nvme_ioreq) io_blist;
	+};
	+
	+struct nvme_submission_queue_info {
	+ uintptr_t base_addr;
	+ uint16_t size;
	+ uint16_t qid;
	+ uint16_t completion_qid;
	+ struct nvme_ioreq* ioreq;
	+ STAILQ_HEAD(nvme_fhead, nvme_ioreq) iofhd;
	+ TAILQ_HEAD(nvme_bhead, nvme_ioreq) iobhd;
	+};
	+
	+struct pci_nvme_softc {
	+ struct nvme_registers regs;
	+ struct nvme_features features;
	+ struct pci_devinst* pi;
	+ uint16_t completion_queue_head;
	+ uint16_t submission_queue_tail;
	+ uintptr_t asq_base;
	+ uintptr_t acq_base;
	+ struct nvme_controller_data controller_data;
	+ struct nvme_namespace_data namespace_data;
	+ struct nvme_completion_queue_info* cqs_info;
	+ struct nvme_submission_queue_info* sqs_info;
	+ struct blockif_ctxt* bctx;
	+};
	+
	+static void pci_nvme_reset(struct pci_nvme_softc* sc)
	+{
	+ /*
	+ * Controller Register values are according to NVMe specification 1.0e.
	+ */
	+
	+ /*
	+ * Controller Capabilites
	+ */
	+ // Maximum queue (I/O submission and completion queue) entries supported by nvme controller
	+ sc->regs.cap_lo.bits.mqes = 0x10;
	+
	+ // Contiguous Queues Required
	+ sc->regs.cap_lo.bits.cqr = 1;
	+ sc->regs.cap_lo.bits.ams = 0;
	+ sc->regs.cap_lo.bits.reserved1 = 0;
	+ sc->regs.cap_lo.bits.to = 10;
	+
	+ sc->regs.cap_hi.bits.dstrd = 0;
	+ sc->regs.cap_hi.bits.reserved3 = 0;
	+ sc->regs.cap_hi.bits.css_nvm = 0;
	+ sc->regs.cap_hi.bits.css_reserved = 0;
	+ sc->regs.cap_hi.bits.reserved2 = 0;
	+ /*
	+ * MPSMIN and MPSMAX
	+ * indicate the minimum and maximum host memory page size
	+ * (2 ^ (12 + MPSMAX or MPSMIN))
	+ */
	+ sc->regs.cap_hi.bits.mpsmin = 0;
	+ sc->regs.cap_hi.bits.mpsmax = 0;
	+ sc->regs.cap_hi.bits.reserved1 = 0;
	+
	+ /*
	+ * Version of NVM express specification.
	+ */
	+ // in this case, the version is 1.0
	+ uint32_t version = (0x0001 << 16) \| 0x0000;
	+ sc->regs.vs = version;
	+
	+ // Interrupt Mask Set
	+ sc->regs.intms = 0;
	+
	+ // Interrupt Mask clear
	+ sc->regs.intmc = 0;
	+
	+ sc->regs.cc.raw = 0;
	+
	+ sc->regs.csts.raw = 0;
	+
	+ sc->completion_queue_head = 0;
	+ sc->asq_base = 0;
	+ sc->acq_base = 0;
	+}
	+
	+static void nvme_initialize_feature(struct pci_nvme_softc* sc)
	+{
	+ sc->features.arbitration.raw = 0;
	+
	+ sc->features.temparture_threshold.bits.over = 0xffff;
	+ sc->features.temparture_threshold.bits.under = 0x0000;
	+
	+ sc->features.power_management.raw = 0;
	+
	+ sc->features.error_recovery.raw = 0;
	+
	+ sc->features.num_of_queues.raw = 0;
	+
	+ sc->features.interrupt_coalscing.raw = 0;
	+
	+ sc->features.interrupt_vector_config.raw = 0;
	+ sc->features.interrupt_vector_config.bits.cd = 1;
	+
	+ sc->features.async_event_config.raw = 0;
	+}
	+
	+static void nvme_initialize_identify(struct pci_nvme_softc* sc)
	+{
	+ // only one namespace
	+ sc->controller_data.nn = 0x1;
	+
	+ // LBA format
	+ sc->namespace_data.lbaf[0].ms = 0x00;
	+
	+ /*
	+ * LBA data size is 2^n (n is started by 0)
	+ * should be larger than 9.(i.e. 512 bytes)
	+ */
	+ uint64_t lba_data_size = 9;
	+ sc->namespace_data.lbaf[0].lbads = lba_data_size;
	+ sc->namespace_data.lbaf[0].rp = 0x0;
	+
	+ sc->namespace_data.nlbaf = 0x00;
	+ sc->namespace_data.flbas.format = 0x00;
	+ sc->namespace_data.nlbaf = 0x1;
	+
	+ uint64_t block_size = blockif_size(sc->bctx);
	+ sc->namespace_data.nsze = block_size / (2 << (lba_data_size - 1));
	+ sc->namespace_data.ncap = block_size / (2 << (lba_data_size - 1));
	+}
	+
	+static int pci_nvme_submission_queue_init(
	+ struct nvme_submission_queue_info* qinfo,
	+ struct blockif_ctxt* ctxt)
	+{
	+ struct nvme_ioreq* req;
	+ int ioq_size = blockif_queuesz(ctxt);
	+ qinfo->ioreq = calloc(ioq_size, sizeof(struct nvme_ioreq));
	+ STAILQ_INIT(&qinfo->iofhd);
	+
	+ // setup and insert requests to the free queue
	+ for (int i = 0; i < ioq_size; ++i) {
	+ req = &qinfo->ioreq[i];
	+ req->sq_info = qinfo;
	+ // setup callback function
	+ STAILQ_INSERT_TAIL(&qinfo->iofhd, req, io_flist);
	+ }
	+
	+ TAILQ_INIT(&qinfo->iobhd);
	+ return 0;
	+}
	+
	+static void
	+pci_nvme_cq_init(struct nvme_completion_queue_info* cqinfo)
	+{
	+ pthread_mutex_init(&cqinfo->mtx, NULL);
	+ cqinfo->base_addr = (uintptr_t)NULL;
	+ cqinfo->size = -1;
	+ cqinfo->tail = 0;
	+ cqinfo->qid = -1;
	+ cqinfo->interrupt_vector = -1;
	+}
	+
	+static int pci_nvme_init(struct vmctx* ctx, struct pci_devinst* pi, char* opts)
	+{
	+ struct pci_nvme_softc* sc;
	+ struct blockif_ctxt* bctxt;
	+ char bident[sizeof("XX:X:X")];
	+
	+#ifdef NVME_DEBUG
	+ dbg = fopen("/tmp/nvme_emu_log", "w+");
	+#endif
	+ DPRINTF("--- start nvme controller ---\n");
	+
	+ if (opts == NULL) {
	+ fprintf(stderr, "pci_nvme: backing device required\n");
	+ return 1;
	+ }
	+ DPRINTF("%s\n", opts);
	+
	+ /*
	+ * Attempt to open the backing image. Use the PCI
	+ * slot/func for the identifier string.
	+ */
	+ snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
	+ bctxt = blockif_open(opts, bident);
	+ if (bctxt == NULL) {
	+ goto fail;
	+ }
	+
	+/* pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0953); */
	+ pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0111);
	+ pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
	+ pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
	+
	+ // for NVMe Controller Registers
	+ if (pci_emul_alloc_bar(pi, NVME_BAR_CR, PCIBAR_MEM64, NVME_CR_SIZE)) {
	+ DPRINTF("error is occured in pci_emul_alloc_bar\n");
	+ goto fail;
	+ }
	+
	+ // It is recommended that the controller allocate a uniqueue MSI-X vector for each completion queue.
	+ if (pci_emul_add_msixcap(pi, NVME_COMPLETION_QUEUE_NUM, NVME_BAR_MSIX))
	+ {
	+ DPRINTF("error is occured in pci_emul_add_msixcap\n");
	+ goto fail;
	+ }
	+ DPRINTF("table %d, pba %d\n", pci_msix_table_bar(pi), pci_msix_pba_bar(pi));
	+
	+ sc = calloc(1, sizeof(struct pci_nvme_softc));
	+ pi->pi_arg = sc;
	+ pi->pi_vmctx = ctx;
	+ sc->pi = pi;
	+
	+ sc->bctx = bctxt;
	+
	+ pci_nvme_reset(sc);
	+
	+ sc->cqs_info = calloc(NVME_IO_CQS_SIZE,
	+ sizeof(struct nvme_completion_queue_info));
	+ for(int i=0; i< NVME_IO_CQS_SIZE; ++i) {
	+ pci_nvme_cq_init(&sc->cqs_info[i]);
	+ }
	+
	+ sc->sqs_info = calloc(NVME_IO_SQS_SIZE,
	+ sizeof(struct nvme_submission_queue_info));
	+
	+ for (int i = 0; i < NVME_IO_SQS_SIZE; ++i) {
	+ pci_nvme_submission_queue_init(&sc->sqs_info[i], sc->bctx);
	+ }
	+
	+ nvme_initialize_identify(sc);
	+ nvme_initialize_feature(sc);
	+
	+ return 0;
	+
	+fail:
	+ blockif_close(bctxt);
	+ free(sc->cqs_info);
	+ free(sc->sqs_info);
	+ free(sc);
	+ return 1;
	+}
	+
	+static void pci_nvme_setup_controller(struct vmctx* ctx,
	+ struct pci_nvme_softc* sc)
	+{
	+ DPRINTF("asqs 0x%x, acqs 0x%x\n", sc->regs.aqa.bits.asqs,
	+ sc->regs.aqa.bits.acqs);
	+ sc->asq_base = (uintptr_t)vm_map_gpa(
	+ ctx, sc->regs.asq,
	+ sizeof(struct nvme_command) * sc->regs.aqa.bits.asqs);
	+ sc->acq_base = (uintptr_t)vm_map_gpa(
	+ ctx, sc->regs.acq,
	+ sizeof(struct nvme_completion) * sc->regs.aqa.bits.acqs);
	+
	+ sc->regs.csts.bits.rdy = 1;
	+}
	+
	+static void execute_set_feature_command(struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ DPRINTF("0x%x\n", command->cdw11);
	+ DPRINTF("0x%x\n", command->cdw10);
	+ cmp_entry->cdw0 = 0x00000000;
	+ enum nvme_feature feature = command->cdw10 & 0xf;
	+ switch (feature) {
	+ case NVME_FEAT_NUMBER_OF_QUEUES:
	+ sc->features.num_of_queues.raw = command->cdw11 & 0xffff;
	+ DPRINTF("SET_FEATURE cmd: ncqr 0x%x, nsqr 0x%x\n",
	+ (command->cdw11 >> 16), (command->cdw11 & 0xffff));
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ if (pci_msix_enabled(sc->pi)) {
	+ DPRINTF("generate msix, table_count %d, \n",
	+ sc->pi->pi_msix.table_count);
	+ pci_generate_msix(sc->pi, 0);
	+ }
	+ else {
	+ assert(0 && "pci_msix is disable?");
	+ }
	+ break;
	+
	+ case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
	+ //TODO
	+ sc->features.async_event_config.raw = command->cdw11;
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ pci_generate_msix(sc->pi, 0);
	+ break;
	+
	+ case NVME_FEAT_INTERRUPT_COALESCING:
	+ DPRINTF("interrupt coalescing cdw11 0x%x\n", command->cdw11);
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ sc->features.interrupt_coalscing.bits.thr = command->cdw11 & 0xff;
	+ sc->features.interrupt_coalscing.bits.time =
	+ (command->cdw11 >> 8) & 0xff;
	+ pci_generate_msix(sc->pi, 0);
	+ break;
	+
	+ default:
	+ assert(0 && "this feature is not implemented");
	+ }
	+}
	+
	+enum temp_threshold_cdw11 {
	+ NVME_TEMP_THRESHOLD_TMPTH = 0x0000ffff,
	+ NVME_TEMP_THRESHOLD_TMPSEL = 0x000f0000,
	+ NVME_TEMP_THRESHOLD_THSEL = 0x00300000,
	+ NVME_TEMP_THRESHOLD_RESERVED = 0xffc00000,
	+};
	+
	+static void execute_get_feature_command(struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ enum nvme_feature feature = command->cdw10 & 0xf;
	+ switch (feature) {
	+ case NVME_FEAT_TEMPERATURE_THRESHOLD: {
	+ uint8_t thsel = (command->cdw11 & NVME_TEMP_THRESHOLD_THSEL) >> 20;
	+ // over temparture threshold
	+ if (thsel == 0x00) {
	+ cmp_entry->cdw0 = sc->features.temparture_threshold.bits.over;
	+ }
	+ // under temparture threshold
	+ else if (thsel == 0x1) {
	+ cmp_entry->cdw0 = sc->features.temparture_threshold.bits.under;
	+ }
	+ else {
	+ assert("the thsel is invalied");
	+ }
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ pci_generate_msix(sc->pi, 0);
	+ break;
	+ }
	+
	+ default:
	+ DPRINTF("feature number: 0x%x\n", feature);
	+ assert(0 && "not implemented");
	+ }
	+
	+ return;
	+}
	+
	+static void nvme_execute_identify_command(struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ DPRINTF("Identify command (0x%x)\n",
	+ command->cdw10 & NVME_CMD_IDENTIFY_CDW10_CNS);
	+ DPRINTF("cdw10 0x%x, dptr 0x%lx, 0x%lx", command->cdw10, command->prp1,
	+ command->prp2);
	+ uintptr_t dest_addr = (uintptr_t)vm_map_gpa(
	+ sc->pi->pi_vmctx, command->prp1, sizeof(struct nvme_controller_data));
	+
	+ // TODO have to consider about completion queue entry content
	+ switch (command->cdw10 & NVME_CMD_IDENTIFY_CDW10_CNS) {
	+ case NVME_CMD_IDENTIFY_CNS_NAMESPACE:
	+ memcpy((struct nvme_namespace_data*)dest_addr, &sc->namespace_data,
	+ sizeof(struct nvme_namespace_data));
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ pci_generate_msix(sc->pi, 0);
	+ return;
	+ case NVME_CMD_IDENTIFY_CNS_CONTROLLER:
	+ memcpy((struct nvme_controller_data*)dest_addr,
	+ &sc->controller_data, sizeof(struct nvme_controller_data));
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ pci_generate_msix(sc->pi, 0);
	+ return;
	+ default:
	+ assert(0 && "[CNS] not inplemented");
	+ }
	+
	+ assert(0);
	+}
	+
	+enum create_io_cq_cdw11 {
	+ NVME_CREATE_IO_CQ_CDW11_PC = 0x00000001,
	+ NVME_CREATE_IO_CQ_CDW11_IEN = 0x00000002,
	+ NVME_CREATE_IO_CQ_CDW11_RSV = 0x0000fffc,
	+ NVME_CREATE_IO_CQ_CDW11_IV = 0xffff0000,
	+};
	+
	+static void
	+nvme_execute_create_io_cq_command(struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ // TODO
	+ // IEN
	+ // IV
	+ DPRINTF("interrupt vector 0x%x\n", command->cdw11 >> 16);
	+ if (command->cdw11 & NVME_CREATE_IO_CQ_CDW11_PC) {
	+ uint16_t qid = command->cdw10 & 0xffff;
	+ assert(qid < NVME_IO_CQS_SIZE &&
	+ "you should increase number of completion queue of this controller emulator");
	+
	+ if (sc->cqs_info[qid].base_addr != (uintptr_t)NULL) {
	+ assert(0 && "the completion queue is already used");
	+ }
	+
	+ uint16_t interrupt_vector = command->cdw11 >> 16;
	+ uint16_t queue_size = command->cdw10 >> 16;
	+ sc->cqs_info[qid].base_addr =
	+ (uintptr_t)vm_map_gpa(sc->pi->pi_vmctx, command->prp1,
	+ sizeof(struct nvme_completion) * queue_size);
	+ sc->cqs_info[qid].size = queue_size;
	+ sc->cqs_info[qid].qid = qid;
	+ sc->cqs_info[qid].interrupt_vector = interrupt_vector;
	+
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ pci_generate_msix(sc->pi, 0);
	+ DPRINTF("qid %d, qsize 0x%x, addr 0x%lx, IV %d\n",
	+ qid, queue_size, command->prp1, interrupt_vector);
	+ }
	+ else {
	+ assert(0 && "not implemented");
	+ }
	+
	+ return;
	+}
	+
	+enum create_io_sq_cdw11 {
	+ NVME_CREATE_IO_SQ_CDW11_PC = 0x00000001,
	+ NVME_CREATE_IO_SQ_CDW11_QPRIO = 0x00000060,
	+ NVME_CREATE_IO_SQ_CDW11_RSV = 0x0000ff80,
	+ NVME_CREATE_IO_SQ_CDW11_CQID = 0xffff0000,
	+};
	+
	+static void nvme_execute_create_io_sq_command(struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ if (command->cdw11 & NVME_CREATE_IO_SQ_CDW11_PC) {
	+ uint16_t qid = command->cdw10 & 0xffff;
	+ assert(qid < NVME_IO_SQS_SIZE &&
	+ "you should increase number of submission queue of this controller emulator");
	+ // TODO
	+ /* uint8_t queue_priority = (command->cdw11 &
	+ * NVME_CREATE_IO_SQ_CDW11_QPRIO) >> 1; */
	+ struct nvme_submission_queue_info* sq_info = &sc->sqs_info[qid];
	+ if (sq_info->base_addr != (uintptr_t)NULL) {
	+ assert(0);
	+ }
	+ uint16_t cqid = command->cdw11 >> 16;
	+ uint16_t queue_size = command->cdw10 >> 16;
	+ sq_info->base_addr =
	+ (uintptr_t)vm_map_gpa(sc->pi->pi_vmctx, command->prp1,
	+ sizeof(struct nvme_command) * queue_size);
	+ sq_info->size = queue_size;
	+ sq_info->completion_qid = cqid;
	+ sq_info->qid = qid;
	+
	+ cmp_entry->status.sc = 0x00;
	+ cmp_entry->status.sct = 0x0;
	+ pci_generate_msix(sc->pi, 0);
	+ DPRINTF("qid %d, qsize 0x%x, addr 0x%lx, cqid %d\n",
	+ qid, queue_size, command->prp1, cqid);
	+
	+ }
	+ else {
	+ assert(0 && "not implemented");
	+ }
	+}
	+static void nvme_execute_delete_io_sq_command(
	+ struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ uint16_t qid = command->cdw10 & 0xffff;
	+ sc->sqs_info[qid].base_addr = (uintptr_t)NULL;
	+ pci_generate_msix(sc->pi, 0);
	+}
	+
	+static void nvme_execute_delete_io_cq_command(
	+ struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ uint16_t qid = command->cdw10 & 0xffff;
	+ sc->cqs_info[qid].base_addr = (uintptr_t)NULL;
	+ pci_generate_msix(sc->pi, 0);
	+}
	+
	+static void execute_async_event_request_command(
	+ struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_completion* cmp_entry)
	+{
	+ // TODO
	+ // when some events are occured, the controller send notify
	+}
	+
	+static void pci_nvme_execute_admin_command(struct pci_nvme_softc* sc,
	+ uint64_t value)
	+{
	+ struct nvme_command* command =
	+ (struct nvme_command*)(sc->asq_base +
	+ sizeof(struct nvme_command) * (value - 1));
	+
	+ struct nvme_completion* cmp_entry =
	+ (struct nvme_completion*)(sc->acq_base +
	+ sizeof(struct nvme_completion) *
	+ sc->completion_queue_head);
	+
	+ cmp_entry->sqid = 0;
	+ cmp_entry->sqhd = value - 1;
	+ cmp_entry->cid = command->cid;
	+ cmp_entry->status.p = !cmp_entry->status.p;
	+
	+ DPRINTF("[admin command] 0x%x\n", command->opc);
	+ switch (command->opc) {
	+ case NVME_OPC_DELETE_IO_SQ:
	+ nvme_execute_delete_io_sq_command(sc, command, cmp_entry);
	+ break;
	+ case NVME_OPC_CREATE_IO_SQ:
	+ nvme_execute_create_io_sq_command(sc, command, cmp_entry);
	+ break;
	+ case NVME_OPC_DELETE_IO_CQ:
	+ nvme_execute_delete_io_cq_command(sc, command, cmp_entry);
	+ break;
	+ case NVME_OPC_CREATE_IO_CQ:
	+ nvme_execute_create_io_cq_command(sc, command, cmp_entry);
	+ break;
	+ case NVME_OPC_IDENTIFY:
	+ nvme_execute_identify_command(sc, command, cmp_entry);
	+ break;
	+ case NVME_OPC_SET_FEATURES:
	+ execute_set_feature_command(sc, command, cmp_entry);
	+ break;
	+ case NVME_OPC_GET_FEATURES:
	+ execute_get_feature_command(sc, command, cmp_entry);
	+ break;
	+ case NVME_OPC_ASYNC_EVENT_REQUEST:
	+ // XXX this is a dirty hack.... should be fixed...
	+ cmp_entry->status.p = !cmp_entry->status.p;
	+ execute_async_event_request_command(sc, command, cmp_entry);
	+ sc->completion_queue_head--;
	+ break;
	+ default:
	+ assert(0 && "the admin command is not implemented");
	+ }
	+
	+ sc->completion_queue_head++;
	+ if(sc->regs.aqa.bits.acqs == sc->completion_queue_head)
	+ {
	+ sc->completion_queue_head = 0;
	+ }
	+}
	+
	+static void pci_nvme_blockif_ioreq_cb(struct blockif_req* br, int err)
	+{
	+ DPRINTF("%s %d\n", __func__, err);
	+
	+ struct nvme_ioreq* nreq = br->br_param;
	+ struct nvme_completion_queue_info* cq_info = nreq->cq_info;
	+ struct nvme_submission_queue_info* sq_info = nreq->sq_info;
	+
	+ pthread_mutex_lock(&cq_info->mtx);
	+ struct nvme_completion* completion_entry =
	+ (struct nvme_completion*)(cq_info->base_addr +
	+ sizeof(struct nvme_completion) *
	+ cq_info->tail);
	+
	+ nreq->completion_entry.status.sct = 0x0;
	+ nreq->completion_entry.status.sc = 0x00;
	+
	+ // save phase value before write values to completion queue entry
	+ uint8_t status_phase = completion_entry->status.p;
	+
	+ memcpy(completion_entry, &nreq->completion_entry,
	+ sizeof(struct nvme_completion));
	+ completion_entry->status.p = !status_phase;
	+
	+ DPRINTF("br_resid 0x%lx\n", br->br_resid);
	+ DPRINTF("cq_head 0x%x(max 0x%x), qid %d, status phase %x IV %d\n",
	+ cq_info->tail, cq_info->size, cq_info->qid,
	+ !status_phase, cq_info->interrupt_vector);
	+
	+ cq_info->tail++;
	+/* if (cq_info->tail == cq_info->size) { */
	+ if (cq_info->tail > cq_info->size) {
	+ cq_info->tail = 0;
	+ }
	+
	+ TAILQ_REMOVE(&sq_info->iobhd, nreq, io_blist);
	+
	+ STAILQ_INSERT_TAIL(&sq_info->iofhd, nreq, io_flist);
	+
	+ pci_generate_msix(nreq->sc->pi, cq_info->interrupt_vector);
	+ pthread_mutex_unlock(&cq_info->mtx);
	+}
	+
	+static void nvme_nvm_command_read_write(
	+ struct pci_nvme_softc* sc,
	+ struct nvme_command* command,
	+ struct nvme_submission_queue_info* sq_info,
	+ uint16_t sqhd)
	+{
	+ int err = 0;
	+ uintptr_t starting_lba = ((uint64_t)command->cdw11 << 32) \| command->cdw10;
	+ // NLB (number of logic block) is a 0's based value
	+ uint16_t number_of_lb = (command->cdw12 & 0xffff) + 1;
	+ int logic_block_size = 1 << sc->namespace_data.lbaf[0].lbads;
	+
	+ DPRINTF("slba 0x%lx, nlba 0x%x, lb size 0x%x\n",
	+ starting_lba, number_of_lb, logic_block_size);
	+ DPRINTF("sqhd: 0x%x, destination addr : 0x%08lx\n", sqhd, command->prp1);
	+
	+ /* struct nvme_ioreq nreq = sq_info->ioreq; /
	+ struct nvme_ioreq* nreq = STAILQ_FIRST(&sq_info->iofhd);
	+ assert(nreq != NULL);
	+ STAILQ_REMOVE_HEAD(&sq_info->iofhd, io_flist);
	+
	+ nreq->completion_entry.sqhd = sqhd;
	+ nreq->completion_entry.sqid = sq_info->qid;
	+ nreq->completion_entry.cid = command->cid;
	+ nreq->cq_info = &sc->cqs_info[sq_info->completion_qid];
	+ nreq->sc = sc;
	+ nreq->sq_info = sq_info;
	+ DPRINTF("sqid %d, cqid %d\n", sq_info->qid, sq_info->completion_qid);
	+
	+
	+ int data_size = number_of_lb * logic_block_size;
	+ int page_num = data_size / (1 << 12);
	+ if(data_size % (1 << 12))
	+ {
	+ page_num++;
	+ }
	+ //TODO page size is depend on CC.MPS
	+ DPRINTF("all size 0x%x, page num 0x%x\n", data_size, page_num);
	+ DPRINTF("mod 0x%x\n", data_size % (1 << 12));
	+ DPRINTF("PRP 0x%08lx 0x%08lx\n", command->prp1, command->prp2);
	+ DPRINTF("CDW0 0x%x\n", command->rsvd1);
	+
	+ struct blockif_req* breq = &nreq->io_req;
	+ breq->br_offset = 0;
	+
	+ if(command->prp2 == 0) {
	+
	+ breq->br_iov[0].iov_base =
	+ paddr_guest2host(sc->pi->pi_vmctx, command->prp1, data_size);
	+ breq->br_iov[0].iov_len = data_size;
	+ breq->br_iovcnt = 1;
	+ }
	+ else {
	+
	+ if(page_num == 2) {
	+ // page size (4k)
	+ int size = 1 << 12;
	+ breq->br_iov[0].iov_base =
	+ paddr_guest2host(sc->pi->pi_vmctx, command->prp1, size);
	+ breq->br_iov[0].iov_len = size;
	+
	+ breq->br_iov[1].iov_base =
	+ paddr_guest2host(sc->pi->pi_vmctx, command->prp2, size);
	+ breq->br_iov[1].iov_len = size;
	+ }
	+ else if(page_num > 2) {
	+
	+ int prp1_offset = (command->prp1 & 0xfff);
	+ int prp1_size = (1 << 12) - prp1_offset;
	+
	+ breq->br_iov[0].iov_base =
	+ paddr_guest2host(sc->pi->pi_vmctx, command->prp1, prp1_size);
	+ breq->br_iov[0].iov_len = prp1_size;
	+
	+ data_size -= 1 << 12;
	+
	+ uint64_t* prp_list = paddr_guest2host(
	+ sc->pi->pi_vmctx,
	+ command->prp2,
	+ sizeof(uint64_t) * (page_num - 1));
	+
	+ DPRINTF("prp1 : 0x%lx, 0x%x\n", command->prp1, prp1_size);
	+
	+ //TODO be carefull of size. br_iov[BLOCKIF_IOV_MAX].
	+ for(int i=1; i< page_num; ++i)
	+ {
	+ int size = (1<<12);
	+ if(data_size < (1 << 12))
	+ {
	+ assert(i == (page_num - 1));
	+ size = data_size;
	+ }
	+ breq->br_iov[i].iov_base =
	+ paddr_guest2host(sc->pi->pi_vmctx, prp_list[i - 1], size);
	+ breq->br_iov[i].iov_len = size;
	+
	+ data_size -= size;
	+ DPRINTF("prp2[%d] : 0x%lx, 0x%x\n", i, prp_list[i], size);
	+ }
	+ DPRINTF("last data size: 0x%x\n", data_size);
	+ DPRINTF("page num 0x%x", page_num);
	+ }
	+ else {
	+ breq->br_iov[0].iov_base =
	+ paddr_guest2host(sc->pi->pi_vmctx, command->prp1,
	+ number_of_lb * logic_block_size);
	+
	+ breq->br_iov[0].iov_len = number_of_lb * logic_block_size;
	+ }
	+ }
	+ breq->br_iovcnt = page_num;
	+ breq->br_offset += starting_lba * logic_block_size;
	+ breq->br_resid = number_of_lb * logic_block_size;
	+ breq->br_callback = pci_nvme_blockif_ioreq_cb;
	+ breq->br_param = nreq;
	+
	+ TAILQ_INSERT_HEAD(&sq_info->iobhd, nreq, io_blist);
	+
	+ switch (command->opc) {
	+ case NVME_OPC_READ:
	+ err = blockif_read(sc->bctx, breq);
	+ break;
	+ case NVME_OPC_WRITE:
	+ err = blockif_write(sc->bctx, breq);
	+ break;
	+ default:
	+ //TODO
	+ assert(0 && "??");
	+ }
	+
	+ assert(err == 0 && "blockif_read or blockif_write failed");
	+}
	+
	+/* static void nvme_nvm_command_flush(struct pci_nvme_softc* sc, */
	+/* struct nvme_submission_queue_info* sq_info, */
	+/* uint16_t cid, */
	+/* uint16_t sqhd) */
	+/* { */
	+/* assert("not yet implemented"); */
	+/* struct nvme_completion_queue_info* cq_info = */
	+/* &sc->cqs_info[sq_info->completion_qid]; */
	+
	+/* pthread_mutex_lock(&cq_info->mtx); */
	+/* struct nvme_completion* completion_entry = */
	+/* (struct nvme_completion)(cq_info->base_addr + /
	+/* sizeof(struct nvme_completion) * */
	+/* cq_info->head); */
	+
	+/* completion_entry->sqhd = sqhd; */
	+/* completion_entry->sqid = sq_info->qid; */
	+/* completion_entry->cid = cid; */
	+/* completion_entry->status.sct = 0x0; */
	+/* completion_entry->status.sc = 0x00; */
	+/* completion_entry->status.p = !completion_entry->status.p; */
	+
	+/* DPRINTF("cid: 0x%x, cqid: 0x%x, cq_info->head 0x%x, cq_info->size 0x%x\n", */
	+/* cid, sq_info->completion_qid, cq_info->head, cq_info->size); */
	+/* cq_info->head++; */
	+/* if (cq_info->head == cq_info->size) { */
	+/* cq_info->head = 0; */
	+/* } */
	+/* pthread_mutex_unlock(&cq_info->mtx); */
	+
	+/* pci_generate_msix(sc->pi, sq_info->completion_qid); */
	+/* } */
	+
	+static void pci_nvme_execute_nvme_command(struct pci_nvme_softc* sc,
	+ uint16_t qid,
	+ uint64_t sq_tail)
	+{
	+ struct nvme_submission_queue_info* sq_info = &sc->sqs_info[qid];
	+
	+ uint16_t command_index = sq_tail - 1;
	+ uint16_t sqhd = sq_tail - 1;
	+ if (sq_tail == 0x0) {
	+ command_index = sq_info->size;
	+ sqhd = sq_info->size;
	+ }
	+
	+ struct nvme_command* command =
	+ (struct nvme_command*)(sq_info->base_addr +
	+ sizeof(struct nvme_command) * (command_index));
	+
	+ /* uint16_t completion_qid = sq_info->completion_qid; */
	+ /* struct nvme_completion_queue_info *cq_info =
	+ * &sc->cqs_info[completion_qid]; */
	+
	+ DPRINTF("*** nvm command 0x%x ***\n", command->opc);
	+ DPRINTF("opc: 0x%x, cid: 0x%x, nsid: 0x%x, qid: 0x%x, sq_tail 0x%lx\n",
	+ command->opc, command->cid, command->nsid, qid, sq_tail);
	+
	+ switch (command->opc) {
	+ case NVME_OPC_READ:
	+ case NVME_OPC_WRITE:
	+ nvme_nvm_command_read_write(sc, command, sq_info, sqhd);
	+ return;
	+ /* case NVME_OPC_FLUSH: */
	+ /* nvme_nvm_command_flush(sc, sq_info, command->cid, sqhd);
	+ */
	+ /* return; */
	+ /* case NVME_OPC_DATASET_MANAGEMENT: */
	+ /* { */
	+ /* pthread_mutex_lock(&cq_info->mtx); */
	+ /* struct nvme_completion* completion_entry = */
	+ /* pci_nvme_acquire_completion_entry(sc, cq_info);
	+ */
	+ /* completion_entry->status.sc = 0x00; */
	+ /* completion_entry->status.sct = 0x0; */
	+ /* pthread_mutex_unlock(&cq_info->mtx); */
	+
	+ /* pci_generate_msix(sc->pi, sq_info->completion_qid);
	+ */
	+ /* return; */
	+ /* } */
	+
	+ default:
	+ assert(0 && "the nvme command is not implemented yet");
	+ }
	+}
	+
	+static void pci_nvme_write_bar_0(struct vmctx* ctx,
	+ struct pci_nvme_softc* sc,
	+ uint64_t regoff,
	+ uint64_t value,
	+ int size)
	+{
	+ // write value to doorbell register
	+ // TODO limit of doorbell register address is depend on the number of queue
	+ // pairs.
	+ if (regoff >= NVME_CR_IO_QUEUE_BASE && regoff < NVME_CR_SIZE) {
	+ int queue_offset = regoff - NVME_CR_IO_QUEUE_BASE; // 0x1000 is NVME_CR_IO_QUEUE_BASE
	+ DPRINTF("regoff 0x%lx\n", regoff);
	+ int qid = queue_offset / 8; // 4 is doorbell register size
	+ int is_completion = (queue_offset / 4) % 2;
	+
	+ // completion doorbell
	+ if (is_completion) {
	+ DPRINTF("completion doorbell (%d) is knocked, cqhd 0x%x\n",
	+ qid, (uint16_t)value);
	+ if (qid == 0) {
	+ // TODO
	+ }
	+ else {
	+ // TODO
	+ }
	+ return;
	+ }
	+ // submission doorbell
	+ else {
	+ // admin command
	+ if (qid == 0) {
	+ pci_nvme_execute_admin_command(sc, value);
	+ return;
	+ }
	+ // nvme command
	+ // TODO validate qid
	+ else {
	+ pci_nvme_execute_nvme_command(sc, qid, value);
	+ return;
	+ }
	+ }
	+ assert(0);
	+ }
	+
	+ DPRINTF("write 0x%lx, value %lx \n", regoff, value);
	+ assert(size == 4 && "word size should be 4(byte)");
	+ switch (regoff) {
	+ case NVME_CR_CC:
	+
	+ // enable (EN) bit in controller register
	+ /* - 0 -> 1
	+ * - When a controller is ready to process commands, the
	+ * controller set '1' to CSTS.RDY.
	+ */
	+ if ((sc->regs.cc.bits.en == 0) && (value & NVME_CC_EN)) {
	+ pci_nvme_reset(sc);
	+ pci_nvme_setup_controller(ctx, sc);
	+ DPRINTF("nvme up\n");
	+ }
	+
	+ // TODO
	+ /* - 1 -> 0
	+ * Controler Reset
	+ * - delete all I/O Submission queues and Completion queues
	+ * - reset admin submittion queue and completion queue
	+ * - move to idle state
	+ * - all controller registers are reset to their default values
	+ * (defined in section 5.21.1)
	+ * - CSTS.RDY bit is cleared to '0'.
	+ */
	+ if ((sc->regs.cc.bits.en == 1) && !(value & NVME_CC_EN)) {
	+ DPRINTF("nvme down\n");
	+ sc->regs.csts.bits.rdy = 0;
	+ // TODO
	+ /* assert(0); */
	+ }
	+
	+ switch((value & NVME_CC_SHN) >> 14)
	+ {
	+ case NVME_SHN_NOEFCT:
	+ break;
	+ case NVME_SHN_NORMAL:
	+ sc->regs.csts.bits.shst = NVME_SHST_COMPLETE;
	+ break;
	+ case NVME_SHN_ABRUPT:
	+ default:
	+ assert(0 && "not yet implemented");
	+ }
	+
	+ sc->regs.cc.raw = (uint32_t)value;
	+ return;
	+
	+ case NVME_CR_AQA:
	+ sc->regs.aqa.raw = (uint32_t)value;
	+ return;
	+
	+ case NVME_CR_ASQ_LOW:
	+ sc->regs.asq =
	+ (sc->regs.asq & 0xffffffff00000000) \| (0xfffff000 & value);
	+ return;
	+
	+ case NVME_CR_ASQ_HI:
	+ sc->regs.asq = (sc->regs.asq & 0x00000000ffffffff) \| (value << 32);
	+ return;
	+
	+ case NVME_CR_ACQ_LOW:
	+ sc->regs.acq =
	+ (sc->regs.acq & 0xffffffff00000000) \| (0xfffff000 & value);
	+ return;
	+
	+ case NVME_CR_ACQ_HI:
	+ sc->regs.acq = (sc->regs.acq & 0x00000000ffffffff) \| (value << 32);
	+ return;
	+
	+ default:
	+ DPRINTF("unknown regoff 0x%lx with value 0x%lx, size %d in %s\n",
	+ regoff, value, size, __func__);
	+ assert(0);
	+ }
	+}
	+
	+static void pci_nvme_write(struct vmctx* ctx,
	+ int vcpu,
	+ struct pci_devinst* pi,
	+ int baridx,
	+ uint64_t regoff,
	+ int size,
	+ uint64_t value)
	+{
	+ struct pci_nvme_softc* sc = pi->pi_arg;
	+
	+ if (baridx == pci_msix_table_bar(pi) \|\| baridx == pci_msix_pba_bar(pi)) {
	+ DPRINTF("baridx, %d, msix: regoff 0x%lx, size %d, value %lx\n", baridx,
	+ regoff, size, value);
	+ pci_emul_msix_twrite(pi, regoff, size, value);
	+ return;
	+ }
	+
	+ switch (baridx) {
	+ case 0:
	+ pci_nvme_write_bar_0(ctx, sc, regoff, value, size);
	+ break;
	+
	+ default:
	+ DPRINTF("unknown baridx %d with 0x%lx in %s\n", baridx, value,
	+ __func__);
	+ assert(0);
	+ }
	+}
	+
	+static uint64_t pci_nvme_read_bar_0(
	+ struct pci_nvme_softc* sc,
	+ enum nvme_controller_register_offsets offset,
	+ int size)
	+{
	+ DPRINTF("read 0x%x\n", offset);
	+ assert(size == 4 && "word size should be 4.");
	+ switch (offset) {
	+ case NVME_CR_CAP_LOW:
	+ return (uint64_t)sc->regs.cap_lo.raw;
	+
	+ case NVME_CR_CAP_HI:
	+ return (uint64_t)sc->regs.cap_hi.raw;
	+
	+ case NVME_CR_VS:
	+ return (uint64_t)sc->regs.vs;
	+
	+ case NVME_CR_CC:
	+ return (uint64_t)sc->regs.cc.raw;
	+
	+ case NVME_CR_CSTS:
	+ DPRINTF("CSTS raw 0x%x\n", sc->regs.csts.raw);
	+ return (uint64_t)sc->regs.csts.raw;
	+
	+ default:
	+ DPRINTF("unknown regoff value: 0x%x, size %d in %s\n", offset, size,
	+ __func__);
	+ assert(0);
	+ }
	+
	+ return 0;
	+}
	+
	+static uint64_t pci_nvme_read(struct vmctx* ctx,
	+ int vcpu,
	+ struct pci_devinst* pi,
	+ int baridx,
	+ uint64_t regoff,
	+ int size)
	+{
	+ struct pci_nvme_softc* sc = pi->pi_arg;
	+
	+ if (baridx == pci_msix_table_bar(pi) \|\| baridx == pci_msix_pba_bar(pi)) {
	+ DPRINTF("baridx: %d, msix: regoff 0x%lx, size %d\n", baridx, regoff,
	+ size);
	+ return pci_emul_msix_tread(pi, regoff, size);
	+ }
	+
	+ switch (baridx) {
	+ case 0:
	+ return pci_nvme_read_bar_0(sc, regoff, size);
	+
	+ default:
	+ DPRINTF("unknown bar %d, 0x%lx\n", baridx, regoff);
	+ assert(0);
	+ }
	+
	+ return 0;
	+}
	+
	+struct pci_devemu pci_de_nvme = {
	+ .pe_emu = "nvme",
	+ .pe_init = pci_nvme_init,
	+ .pe_barwrite = pci_nvme_write,
	+ .pe_barread = pci_nvme_read
	+};
	+PCI_EMUL_SET(pci_de_nvme);

File Metadata

Mime Type: text/plain
Expires: Fri, Nov 28, 12:37 AM (21 h, 25 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 26255910
Default Alt Text: D13995.id38245.diff (41 KB)

D13995.id38245.diffNo OneTemporaryActions

D13995.id38245.diffView Options

File Metadata

Event Timeline

D13995.id38245.diff
No OneTemporary
Actions

D13995.id38245.diff
View Options