Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107435728
D44720.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
28 KB
Referenced Files
None
Subscribers
None
D44720.diff
View Options
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c
--- a/sys/cam/ctl/ctl.c
+++ b/sys/cam/ctl/ctl.c
@@ -83,6 +83,7 @@
#include <cam/ctl/ctl_ha.h>
#include <cam/ctl/ctl_private.h>
#include <cam/ctl/ctl_debug.h>
+#include <cam/ctl/ctl_nvme_all.h>
#include <cam/ctl/ctl_scsi_all.h>
#include <cam/ctl/ctl_error.h>
@@ -447,6 +448,8 @@
static void ctl_failover_lun(union ctl_io *io);
static void ctl_scsiio_precheck(struct ctl_scsiio *ctsio);
static int ctl_scsiio(struct ctl_scsiio *ctsio);
+static void ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio);
+static int ctl_nvmeio(struct ctl_nvmeio *ctnio);
static int ctl_target_reset(union ctl_io *io);
static void ctl_do_lun_reset(struct ctl_lun *lun, uint32_t initidx,
@@ -4963,7 +4966,6 @@
int retval;
CTL_DEBUG_PRINT(("ctl_config_move_done\n"));
- CTL_IO_ASSERT(io, SCSI);
if (ctl_debug & CTL_DEBUG_CDB_DATA)
ctl_data_print(io);
@@ -4998,7 +5000,17 @@
* XXX KDM call ctl_scsiio() again for now, and check flag
* bits to see whether we're allocated or not.
*/
- retval = ctl_scsiio(&io->scsiio);
+ switch (io->io_hdr.io_type) {
+ case CTL_IO_SCSI:
+ retval = ctl_scsiio(&io->scsiio);
+ break;
+ case CTL_IO_NVME:
+ case CTL_IO_NVME_ADMIN:
+ retval = ctl_nvmeio(&io->nvmeio);
+ break;
+ default:
+ __assert_unreachable();
+ }
}
return (retval);
}
@@ -10598,6 +10610,725 @@
return (CTL_RETVAL_COMPLETE);
}
+/*
+ * For NVMe commands, parse the LBA and length.
+ */
+static bool
+ctl_nvme_get_lba_len(struct ctl_nvmeio *ctnio, uint64_t *lba, uint32_t *len)
+{
+ CTL_IO_ASSERT(ctnio, NVME);
+
+ switch (ctnio->cmd.opc) {
+ case NVME_OPC_WRITE:
+ case NVME_OPC_READ:
+ case NVME_OPC_WRITE_UNCORRECTABLE:
+ case NVME_OPC_COMPARE:
+ case NVME_OPC_WRITE_ZEROES:
+ case NVME_OPC_VERIFY:
+ *lba = (uint64_t)le32toh(ctnio->cmd.cdw11) << 32 |
+ le32toh(ctnio->cmd.cdw10);
+ *len = (le32toh(ctnio->cmd.cdw12) & 0xffff) + 1;
+ return (true);
+ default:
+ *lba = 0;
+ *len = 0;
+ return (false);
+ }
+}
+
+static bool
+ctl_nvme_fua(struct ctl_nvmeio *ctnio)
+{
+ return ((le32toh(ctnio->cmd.cdw12) & (1U << 30)) != 0);
+}
+
+int
+ctl_nvme_identify(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ size_t len;
+ int retval;
+ uint8_t cns;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_identify\n"));
+
+ CTL_IO_ASSERT(ctnio, NVME_ADMIN);
+ MPASS(ctnio->cmd.opc == NVME_OPC_IDENTIFY);
+
+ /*
+ * The data buffer for Identify is always 4096 bytes, see
+ * 5.51.1 in NVMe base specification 1.4.
+ */
+ len = 4096;
+
+ ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
+ ctnio->kern_data_len = len;
+ ctnio->kern_total_len = len;
+ ctnio->kern_rel_offset = 0;
+ ctnio->kern_sg_entries = 0;
+
+ ctl_nvme_set_success(ctnio);
+ ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+ ctnio->be_move_done = ctl_config_move_done;
+
+ /*
+ * If we don't have a LUN, return an empty result for CNS == 0.
+ */
+ if (lun == NULL) {
+ cns = le32toh(ctnio->cmd.cdw10) & 0xff;
+ switch (cns) {
+ case 0:
+ memset(ctnio->kern_data_ptr, 0, len);
+ ctl_datamove((union ctl_io *)ctnio);
+ break;
+ default:
+ ctl_nvme_set_invalid_field(ctnio);
+ break;
+ }
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ retval = lun->backend->config_read((union ctl_io *)ctnio);
+ return (retval);
+}
+
+int
+ctl_nvme_flush(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ int retval;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_flush\n"));
+
+ CTL_IO_ASSERT(ctnio, NVME);
+ MPASS(ctnio->cmd.opc == NVME_OPC_FLUSH);
+
+ /*
+ * NVMe flushes always flush the entire namespace, not an LBA
+ * range.
+ */
+ retval = lun->backend->config_write((union ctl_io *)ctnio);
+
+ return (retval);
+}
+
+int
+ctl_nvme_read_write(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ struct ctl_lba_len_flags *lbalen;
+ uint64_t lba;
+ uint32_t num_blocks;
+ int flags, retval;
+ bool isread;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_read_write: command: %#x\n",
+ ctnio->cmd.opc));
+
+ CTL_IO_ASSERT(ctnio, NVME);
+ MPASS(ctnio->cmd.opc == NVME_OPC_WRITE ||
+ ctnio->cmd.opc == NVME_OPC_READ);
+
+ flags = 0;
+ isread = ctnio->cmd.opc == NVME_OPC_READ;
+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+
+ /*
+ * The first check is to make sure we're in bounds, the second
+ * check is to catch wrap-around problems. If the lba + num blocks
+ * is less than the lba, then we've wrapped around and the block
+ * range is invalid anyway.
+ */
+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+ || ((lba + num_blocks) < lba)) {
+ ctl_nvme_set_lba_out_of_range(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ /*
+ * Set FUA and/or DPO if caches are disabled.
+ *
+ * For a read this may not be quite correct for the block
+ * backend as any earlier writes to the LBA range should be
+ * flushed to backing store as part of the read.
+ */
+ if (ctl_nvme_fua(ctnio)) {
+ flags |= CTL_LLF_FUA;
+ if (isread)
+ flags |= CTL_LLF_DPO;
+ }
+
+ lbalen = (struct ctl_lba_len_flags *)
+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+ lbalen->lba = lba;
+ lbalen->len = num_blocks;
+ lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) | flags;
+
+ ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
+ ctnio->kern_rel_offset = 0;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_read_write: calling data_submit()\n"));
+
+ retval = lun->backend->data_submit((union ctl_io *)ctnio);
+ return (retval);
+}
+
+int
+ctl_nvme_write_uncorrectable(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ struct ctl_lba_len_flags *lbalen;
+ uint64_t lba;
+ uint32_t num_blocks;
+ int retval;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_write_uncorrectable\n"));
+
+ CTL_IO_ASSERT(ctnio, NVME);
+ MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_UNCORRECTABLE);
+
+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+
+ /*
+ * The first check is to make sure we're in bounds, the second
+ * check is to catch wrap-around problems. If the lba + num blocks
+ * is less than the lba, then we've wrapped around and the block
+ * range is invalid anyway.
+ */
+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+ || ((lba + num_blocks) < lba)) {
+ ctl_nvme_set_lba_out_of_range(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ lbalen = (struct ctl_lba_len_flags *)
+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+ lbalen->lba = lba;
+ lbalen->len = num_blocks;
+ lbalen->flags = 0;
+ retval = lun->backend->config_write((union ctl_io *)ctnio);
+
+ return (retval);
+}
+
+int
+ctl_nvme_compare(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ struct ctl_lba_len_flags *lbalen;
+ uint64_t lba;
+ uint32_t num_blocks;
+ int flags;
+ int retval;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_compare\n"));
+
+ CTL_IO_ASSERT(ctnio, NVME);
+ MPASS(ctnio->cmd.opc == NVME_OPC_COMPARE);
+
+ flags = 0;
+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+ if (ctl_nvme_fua(ctnio))
+ flags |= CTL_LLF_FUA;
+
+ /*
+ * The first check is to make sure we're in bounds, the second
+ * check is to catch wrap-around problems. If the lba + num blocks
+ * is less than the lba, then we've wrapped around and the block
+ * range is invalid anyway.
+ */
+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+ || ((lba + num_blocks) < lba)) {
+ ctl_nvme_set_lba_out_of_range(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ lbalen = (struct ctl_lba_len_flags *)
+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+ lbalen->lba = lba;
+ lbalen->len = num_blocks;
+ lbalen->flags = CTL_LLF_COMPARE | flags;
+ ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
+ ctnio->kern_rel_offset = 0;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_compare: calling data_submit()\n"));
+ retval = lun->backend->data_submit((union ctl_io *)ctnio);
+ return (retval);
+}
+
+int
+ctl_nvme_write_zeroes(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ struct ctl_lba_len_flags *lbalen;
+ uint64_t lba;
+ uint32_t num_blocks;
+ int retval;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_write_zeroes\n"));
+
+ CTL_IO_ASSERT(ctnio, NVME);
+ MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_ZEROES);
+
+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+
+ /*
+ * The first check is to make sure we're in bounds, the second
+ * check is to catch wrap-around problems. If the lba + num blocks
+ * is less than the lba, then we've wrapped around and the block
+ * range is invalid anyway.
+ */
+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+ || ((lba + num_blocks) < lba)) {
+ ctl_nvme_set_lba_out_of_range(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ lbalen = (struct ctl_lba_len_flags *)
+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+ lbalen->lba = lba;
+ lbalen->len = num_blocks;
+ lbalen->flags = 0;
+ retval = lun->backend->config_write((union ctl_io *)ctnio);
+
+ return (retval);
+}
+
+int
+ctl_nvme_dataset_management(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ struct nvme_dsm_range *r;
+ uint64_t lba;
+ uint32_t len, num_blocks;
+ u_int i, ranges;
+ int retval;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_dataset_management\n"));
+
+ CTL_IO_ASSERT(ctnio, NVME);
+ MPASS(ctnio->cmd.opc == NVME_OPC_DATASET_MANAGEMENT);
+
+ ranges = le32toh(ctnio->cmd.cdw10) & 0xff;
+ len = ranges * sizeof(struct nvme_dsm_range);
+
+ /*
+ * If we've got a kernel request that hasn't been malloced yet,
+ * malloc it and tell the caller the data buffer is here.
+ */
+ if ((ctnio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
+ ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
+ ctnio->kern_data_len = len;
+ ctnio->kern_total_len = len;
+ ctnio->kern_rel_offset = 0;
+ ctnio->kern_sg_entries = 0;
+ ctnio->io_hdr.flags |= CTL_FLAG_ALLOCATED;
+ ctnio->be_move_done = ctl_config_move_done;
+ ctl_datamove((union ctl_io *)ctnio);
+
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ /*
+ * Require a flat buffer of the correct size.
+ */
+ if (ctnio->kern_sg_entries > 0 ||
+ ctnio->kern_total_len - ctnio->kern_data_resid != len)
+ return (CTL_RETVAL_ERROR);
+
+ /*
+ * Verify that none of the ranges are out of bounds.
+ */
+ r = (struct nvme_dsm_range *)ctnio->kern_data_ptr;
+ for (i = 0; i < ranges; i++) {
+ lba = le64toh(r[i].starting_lba);
+ num_blocks = le32toh(r[i].length);
+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+ || ((lba + num_blocks) < lba)) {
+ ctl_nvme_set_lba_out_of_range(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+ }
+
+ CTL_DEBUG_PRINT(("ctl_nvme_dataset_management: calling config_write()\n"));
+ retval = lun->backend->config_write((union ctl_io *)ctnio);
+ return (retval);
+}
+
+int
+ctl_nvme_verify(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_lun *lun = CTL_LUN(ctnio);
+ struct ctl_lba_len_flags *lbalen;
+ uint64_t lba;
+ uint32_t num_blocks;
+ int flags;
+ int retval;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_verify\n"));
+
+ CTL_IO_ASSERT(ctnio, NVME);
+ MPASS(ctnio->cmd.opc == NVME_OPC_VERIFY);
+
+ flags = 0;
+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
+ if (ctl_nvme_fua(ctnio))
+ flags |= CTL_LLF_FUA;
+
+ /*
+ * The first check is to make sure we're in bounds, the second
+ * check is to catch wrap-around problems. If the lba + num blocks
+ * is less than the lba, then we've wrapped around and the block
+ * range is invalid anyway.
+ */
+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
+ || ((lba + num_blocks) < lba)) {
+ ctl_nvme_set_lba_out_of_range(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ lbalen = (struct ctl_lba_len_flags *)
+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
+ lbalen->lba = lba;
+ lbalen->len = num_blocks;
+ lbalen->flags = CTL_LLF_VERIFY | flags;
+ ctnio->kern_total_len = 0;
+ ctnio->kern_rel_offset = 0;
+
+ CTL_DEBUG_PRINT(("ctl_nvme_verify: calling data_submit()\n"));
+ retval = lun->backend->data_submit((union ctl_io *)ctnio);
+ return (retval);
+}
+
+static const struct ctl_nvme_cmd_entry *
+ctl_nvme_get_cmd_entry(struct ctl_nvmeio *ctnio)
+{
+ const struct ctl_nvme_cmd_entry *entry;
+
+ switch (ctnio->io_hdr.io_type) {
+ case CTL_IO_NVME:
+ entry = &nvme_nvm_cmd_table[ctnio->cmd.opc];
+ break;
+ case CTL_IO_NVME_ADMIN:
+ entry = &nvme_admin_cmd_table[ctnio->cmd.opc];
+ break;
+ default:
+ __assert_unreachable();
+ }
+ return (entry);
+}
+
+static const struct ctl_nvme_cmd_entry *
+ctl_nvme_validate_command(struct ctl_nvmeio *ctnio)
+{
+ const struct ctl_nvme_cmd_entry *entry;
+
+ entry = ctl_nvme_get_cmd_entry(ctnio);
+ if (entry->execute == NULL) {
+ ctl_nvme_set_invalid_opcode(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (NULL);
+ }
+
+ /* Validate fused commands. */
+ switch (NVMEV(NVME_CMD_FUSE, ctnio->cmd.fuse)) {
+ case NVME_FUSE_NORMAL:
+ break;
+ case NVME_FUSE_FIRST:
+ if (ctnio->io_hdr.io_type != CTL_IO_NVME ||
+ ctnio->cmd.opc != NVME_OPC_COMPARE) {
+ ctl_nvme_set_invalid_field(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (NULL);
+ }
+ break;
+ case NVME_FUSE_SECOND:
+ if (ctnio->io_hdr.io_type != CTL_IO_NVME ||
+ ctnio->cmd.opc != NVME_OPC_COMPARE) {
+ ctl_nvme_set_invalid_field(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (NULL);
+ }
+ break;
+ default:
+ ctl_nvme_set_invalid_field(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return (NULL);
+ }
+
+ return (entry);
+}
+
+/*
+ * This is a simpler version of ctl_scsiio_lun_check that fails
+ * requests on a LUN without active media.
+ *
+ * Returns true if the command has been completed with an error.
+ */
+static bool
+ctl_nvmeio_lun_check(struct ctl_lun *lun,
+ const struct ctl_nvme_cmd_entry *entry, struct ctl_nvmeio *ctnio)
+{
+ mtx_assert(&lun->lun_lock, MA_OWNED);
+
+ if ((entry->flags & CTL_CMD_FLAG_OK_ON_NO_MEDIA) == 0) {
+ if ((lun->flags & (CTL_LUN_EJECTED | CTL_LUN_NO_MEDIA |
+ CTL_LUN_STOPPED)) != 0) {
+ ctl_nvme_set_namespace_not_ready(ctnio);
+ return (true);
+ }
+ }
+
+ return (false);
+}
+
+/*
+ * Check for blockage against the OOA (Order Of Arrival) queue.
+ * Assumptions:
+ * - pending_io is generally either incoming, or on the blocked queue
+ * - starting I/O is the I/O we want to start the check with.
+ */
+static ctl_action
+ctl_nvme_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io,
+ union ctl_io **starting_io, union ctl_io **aborted_io)
+{
+ union ctl_io *ooa_io = *starting_io;
+
+ CTL_IO_ASSERT(pending_io, NVME, NVME_ADMIN);
+
+ mtx_assert(&lun->lun_lock, MA_OWNED);
+
+ *aborted_io = NULL;
+
+ /*
+ * Aborted commands are not going to be executed and may even
+ * not report completion, so we don't care about their order.
+ * Let them complete ASAP to clean the OOA queue.
+ */
+ if (__predict_false(pending_io->io_hdr.flags & CTL_FLAG_ABORT))
+ return (CTL_ACTION_PASS);
+
+ /*
+ * NVMe has rather simple command ordering requirements. In
+ * particular, there is no requirement on the controller to
+ * enforce a specific order for overlapping LBAs. The only
+ * constraint is that fused operations (Compare and Write),
+ * must be completed as a unit.
+ *
+ * To support fused operations, the following strategy is used:
+ * - the first half of a fused command is not enqueued to rtr
+ * until the second half is enqueued
+ * - the second half of a fused command blocks on the first
+ * half of a fuse command
+ * - subsequent commands block on the second half of the
+ * fused command
+ */
+
+ /*
+ * Is the previously submitted command the first half of a
+ * fused operation?
+ */
+ if (ooa_io != NULL &&
+ NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) == NVME_FUSE_FIRST) {
+ /*
+ * If this is the second half, enqueue the first half
+ * and block the second half on the first half.
+ */
+ if (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse) ==
+ NVME_FUSE_SECOND) {
+ /*
+ * XXX: Do we need to wait for other rtr requests
+ * to drain so this is truly atomic?
+ */
+ return (CTL_ACTION_FUSED);
+ }
+
+ /* Abort the first half. */
+ ctl_nvme_set_missing_fused_command(&ooa_io->nvmeio);
+ *aborted_io = ooa_io;
+ } else {
+ switch (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse)) {
+ case NVME_FUSE_FIRST:
+ /* First half, wait for the second half. */
+ return (CTL_ACTION_SKIP);
+ case NVME_FUSE_SECOND:
+ /* Second half without a matching first half, abort. */
+ ctl_nvme_set_missing_fused_command(&pending_io->nvmeio);
+ *aborted_io = pending_io;
+ return (CTL_ACTION_SKIP);
+ }
+ }
+
+ /*
+ * Scan the OOA queue looking for the most recent second half
+ * of a fused op.
+ */
+ for (; ooa_io != NULL;
+ ooa_io = (union ctl_io *)LIST_NEXT(&ooa_io->io_hdr, ooa_links)) {
+ if (NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) ==
+ NVME_FUSE_SECOND) {
+ *starting_io = ooa_io;
+ return (CTL_ACTION_BLOCK);
+ }
+ }
+
+ *starting_io = NULL;
+ return (CTL_ACTION_PASS);
+}
+
+static void
+ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio)
+{
+ struct ctl_softc *softc = CTL_SOFTC(ctnio);
+ struct ctl_lun *lun;
+ const struct ctl_nvme_cmd_entry *entry;
+ union ctl_io *bio, *aborted_io;
+ uint32_t targ_lun;
+
+ lun = NULL;
+ targ_lun = ctnio->io_hdr.nexus.targ_mapped_lun;
+ if (targ_lun < ctl_max_luns)
+ lun = softc->ctl_luns[targ_lun];
+ if (lun != NULL) {
+ /*
+ * If the LUN is invalid, pretend that it doesn't exist.
+ * It will go away as soon as all pending I/O has been
+ * completed.
+ */
+ mtx_lock(&lun->lun_lock);
+ if (lun->flags & CTL_LUN_DISABLED) {
+ mtx_unlock(&lun->lun_lock);
+ lun = NULL;
+ }
+ }
+ CTL_LUN(ctnio) = lun;
+ if (lun != NULL) {
+ CTL_BACKEND_LUN(ctnio) = lun->be_lun;
+
+ /*
+ * Every I/O goes into the OOA queue for a particular LUN,
+ * and stays there until completion.
+ */
+#ifdef CTL_TIME_IO
+ if (LIST_EMPTY(&lun->ooa_queue))
+ lun->idle_time += getsbinuptime() - lun->last_busy;
+#endif
+ LIST_INSERT_HEAD(&lun->ooa_queue, &ctnio->io_hdr, ooa_links);
+ }
+
+ /* Get command entry and return error if it is unsupported. */
+ entry = ctl_nvme_validate_command(ctnio);
+ if (entry == NULL) {
+ if (lun)
+ mtx_unlock(&lun->lun_lock);
+ return;
+ }
+
+ ctnio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK;
+ ctnio->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK;
+
+ /* All NVMe commands other than IDENTIFY require a LUN. */
+ if (lun == NULL) {
+ if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) {
+ ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+ ctl_enqueue_rtr((union ctl_io *)ctnio);
+ return;
+ }
+
+ ctl_nvme_set_invalid_namespace(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ CTL_DEBUG_PRINT(("ctl_nvmeio_precheck: bailing out due to invalid LUN\n"));
+ return;
+ } else {
+ /*
+ * NVMe namespaces can only be backed by T_DIRECT LUNs.
+ */
+ if (lun->be_lun->lun_type != T_DIRECT) {
+ mtx_unlock(&lun->lun_lock);
+ ctl_nvme_set_invalid_namespace(ctnio);
+ ctl_done((union ctl_io *)ctnio);
+ return;
+ }
+ }
+
+ if (ctl_nvmeio_lun_check(lun, entry, ctnio) != 0) {
+ mtx_unlock(&lun->lun_lock);
+ ctl_done((union ctl_io *)ctnio);
+ return;
+ }
+
+ bio = (union ctl_io *)LIST_NEXT(&ctnio->io_hdr, ooa_links);
+ switch (ctl_nvme_check_ooa(lun, (union ctl_io *)ctnio, &bio,
+ &aborted_io)) {
+ case CTL_ACTION_PASS:
+ ctnio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+ mtx_unlock(&lun->lun_lock);
+ ctl_enqueue_rtr((union ctl_io *)ctnio);
+ break;
+ case CTL_ACTION_FUSED:
+ /* Block the second half on the first half. */
+ ctnio->io_hdr.blocker = bio;
+ TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
+ blocked_links);
+
+ /* Pass the first half. */
+ bio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+ mtx_unlock(&lun->lun_lock);
+ ctl_enqueue_rtr(bio);
+ break;
+ case CTL_ACTION_SKIP:
+ mtx_unlock(&lun->lun_lock);
+ break;
+ case CTL_ACTION_BLOCK:
+ ctnio->io_hdr.blocker = bio;
+ TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
+ blocked_links);
+ mtx_unlock(&lun->lun_lock);
+ break;
+ default:
+ __assert_unreachable();
+ }
+ if (aborted_io != NULL)
+ ctl_done(aborted_io);
+}
+
+static int
+ctl_nvmeio(struct ctl_nvmeio *ctnio)
+{
+ const struct ctl_nvme_cmd_entry *entry;
+ int retval;
+
+ CTL_DEBUG_PRINT(("ctl_nvmeio %s opc=%02X\n",
+ ctnio->io_hdr.io_type == CTL_IO_NVME ? "nvm" : "admin",
+ ctnio->cmd.opc));
+
+ entry = ctl_nvme_get_cmd_entry(ctnio);
+ MPASS(entry != NULL);
+
+ /*
+ * If this I/O has been aborted, just send it straight to
+ * ctl_done() without executing it.
+ */
+ if (ctnio->io_hdr.flags & CTL_FLAG_ABORT) {
+ ctl_done((union ctl_io *)ctnio);
+ return (CTL_RETVAL_COMPLETE);
+ }
+
+ /*
+ * All the checks should have been handled by ctl_nvmeio_precheck().
+ * We should be clear now to just execute the I/O.
+ */
+ retval = entry->execute(ctnio);
+
+ return (retval);
+}
+
/*
* For known CDB types, parse the LBA and length.
*/
@@ -11016,7 +11747,7 @@
* we know for sure that the blocker I/O does no longer count.
*/
static void
-ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
+ctl_scsi_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
{
struct ctl_softc *softc = lun->ctl_softc;
union ctl_io *bio, *obio;
@@ -11111,6 +11842,72 @@
}
}
+static void
+ctl_nvme_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
+{
+ union ctl_io *bio;
+ const struct ctl_nvme_cmd_entry *entry;
+
+ CTL_IO_ASSERT(io, NVME, NVME_ADMIN);
+
+ mtx_assert(&lun->lun_lock, MA_OWNED);
+
+ if (io->io_hdr.blocker == NULL)
+ return;
+
+ /*
+ * If this is the second half of a fused operation, it should
+ * be the only io on the blocked list. If the first half
+ * failed, complete the second half with an appropriate error.
+ */
+ bio = io->io_hdr.blocker;
+ if (NVMEV(NVME_CMD_FUSE, io->nvmeio.cmd.fuse) == NVME_FUSE_SECOND) {
+ MPASS(io ==
+ (union ctl_io *)TAILQ_FIRST(&bio->io_hdr.blocked_queue));
+ MPASS(TAILQ_NEXT(&io->io_hdr, blocked_links) == NULL);
+
+ TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
+ blocked_links);
+ if (bio->io_hdr.status != CTL_SUCCESS) {
+ ctl_nvme_set_failed_fused_command(&io->nvmeio);
+ ctl_done(io);
+ return;
+ }
+ } else {
+ /*
+ * This must be a command that was blocked on the
+ * second half of a fused operation.
+ */
+ MPASS(NVMEV(NVME_CMD_FUSE, bio->nvmeio.cmd.fuse) ==
+ NVME_FUSE_SECOND);
+ TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
+ blocked_links);
+ }
+
+ entry = ctl_nvme_get_cmd_entry(&io->nvmeio);
+ if (ctl_nvmeio_lun_check(lun, entry, &io->nvmeio) != 0) {
+ ctl_done(io);
+ return;
+ }
+
+ io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR;
+ ctl_enqueue_rtr(io);
+}
+
+static void
+ctl_try_unblock_io(struct ctl_lun *lun, union ctl_io *io, bool skip)
+{
+ switch (io->io_hdr.io_type) {
+ case CTL_IO_SCSI:
+ return (ctl_scsi_try_unblock_io(lun, io, skip));
+ case CTL_IO_NVME:
+ case CTL_IO_NVME_ADMIN:
+ return (ctl_nvme_try_unblock_io(lun, io, skip));
+ default:
+ __assert_unreachable();
+ }
+}
+
/*
* Try to unblock I/Os blocked by the specified I/O.
*
@@ -11824,6 +12621,7 @@
*/
LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
union ctl_io *xio = (union ctl_io *)xioh;
+
if ((targ_port == UINT32_MAX ||
targ_port == xioh->nexus.targ_port) &&
(init_id == UINT32_MAX ||
@@ -13196,7 +13994,22 @@
{
struct ctl_port *port = CTL_PORT(io);
- CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0]));
+ switch (io->io_hdr.io_type) {
+ case CTL_IO_SCSI:
+ case CTL_IO_TASK:
+ CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0]));
+ break;
+ case CTL_IO_NVME:
+ CTL_DEBUG_PRINT(("ctl_queue nvme nvm cmd=%02X\n",
+ io->nvmeio.cmd.opc));
+ break;
+ case CTL_IO_NVME_ADMIN:
+ CTL_DEBUG_PRINT(("ctl_queue nvme admin cmd=%02X\n",
+ io->nvmeio.cmd.opc));
+ break;
+ default:
+ break;
+ }
#ifdef CTL_TIME_IO
io->io_hdr.start_time = time_uptime;
@@ -13210,6 +14023,8 @@
switch (io->io_hdr.io_type) {
case CTL_IO_SCSI:
case CTL_IO_TASK:
+ case CTL_IO_NVME:
+ case CTL_IO_NVME_ADMIN:
if (ctl_debug & CTL_DEBUG_CDB)
ctl_io_print(io);
ctl_enqueue_incoming(io);
@@ -13249,6 +14064,12 @@
ctl_io_print(io);
ctl_run_task(io);
break;
+ case CTL_IO_NVME:
+ case CTL_IO_NVME_ADMIN:
+ if (ctl_debug & CTL_DEBUG_CDB)
+ ctl_io_print(io);
+ ctl_nvmeio_precheck(&io->nvmeio);
+ break;
default:
printf("ctl_run: unknown I/O type %d\n", io->io_hdr.io_type);
return (EINVAL);
@@ -13418,19 +14239,41 @@
if (io != NULL) {
STAILQ_REMOVE_HEAD(&thr->incoming_queue, links);
mtx_unlock(&thr->queue_lock);
- if (io->io_hdr.io_type == CTL_IO_TASK)
+ switch (io->io_hdr.io_type) {
+ case CTL_IO_TASK:
ctl_run_task(io);
- else
+ break;
+ case CTL_IO_SCSI:
ctl_scsiio_precheck(&io->scsiio);
+ break;
+ case CTL_IO_NVME:
+ case CTL_IO_NVME_ADMIN:
+ ctl_nvmeio_precheck(&io->nvmeio);
+ break;
+ default:
+ __assert_unreachable();
+ }
continue;
}
io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue);
if (io != NULL) {
STAILQ_REMOVE_HEAD(&thr->rtr_queue, links);
mtx_unlock(&thr->queue_lock);
- retval = ctl_scsiio(&io->scsiio);
- if (retval != CTL_RETVAL_COMPLETE)
- CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
+ switch (io->io_hdr.io_type) {
+ case CTL_IO_SCSI:
+ retval = ctl_scsiio(&io->scsiio);
+ if (retval != CTL_RETVAL_COMPLETE)
+ CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
+ break;
+ case CTL_IO_NVME:
+ case CTL_IO_NVME_ADMIN:
+ retval = ctl_nvmeio(&io->nvmeio);
+ if (retval != CTL_RETVAL_COMPLETE)
+ CTL_DEBUG_PRINT(("ctl_nvmeio failed\n"));
+ break;
+ default:
+ __assert_unreachable();
+ }
continue;
}
diff --git a/sys/cam/ctl/ctl_nvme_cmd_table.c b/sys/cam/ctl/ctl_nvme_cmd_table.c
new file mode 100644
--- /dev/null
+++ b/sys/cam/ctl/ctl_nvme_cmd_table.c
@@ -0,0 +1,35 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Chelsio Communications, Inc.
+ */
+
+#include <dev/nvme/nvme.h>
+
+#include <cam/ctl/ctl.h>
+#include <cam/ctl/ctl_ha.h>
+#include <cam/ctl/ctl_io.h>
+#include <cam/ctl/ctl_ioctl.h>
+#include <cam/ctl/ctl_private.h>
+
+/* Administrative Command Set (CTL_IO_NVME_ADMIN). */
+const struct ctl_nvme_cmd_entry nvme_admin_cmd_table[256] =
+{
+ [NVME_OPC_IDENTIFY] = { ctl_nvme_identify, CTL_FLAG_DATA_IN |
+ CTL_CMD_FLAG_OK_ON_NO_LUN },
+};
+
+/* NVM Command Set (CTL_IO_NVME). */
+const struct ctl_nvme_cmd_entry nvme_nvm_cmd_table[256] =
+{
+ [NVME_OPC_FLUSH] = { ctl_nvme_flush, CTL_FLAG_DATA_NONE },
+ [NVME_OPC_WRITE] = { ctl_nvme_read_write, CTL_FLAG_DATA_OUT },
+ [NVME_OPC_READ] = { ctl_nvme_read_write, CTL_FLAG_DATA_IN },
+ [NVME_OPC_WRITE_UNCORRECTABLE] = { ctl_nvme_write_uncorrectable,
+ CTL_FLAG_DATA_NONE },
+ [NVME_OPC_COMPARE] = { ctl_nvme_compare, CTL_FLAG_DATA_OUT },
+ [NVME_OPC_WRITE_ZEROES] = { ctl_nvme_write_zeroes, CTL_FLAG_DATA_NONE },
+ [NVME_OPC_DATASET_MANAGEMENT] = { ctl_nvme_dataset_management,
+ CTL_FLAG_DATA_OUT },
+ [NVME_OPC_VERIFY] = { ctl_nvme_verify, CTL_FLAG_DATA_NONE },
+};
diff --git a/sys/cam/ctl/ctl_private.h b/sys/cam/ctl/ctl_private.h
--- a/sys/cam/ctl/ctl_private.h
+++ b/sys/cam/ctl/ctl_private.h
@@ -78,7 +78,8 @@
CTL_ACTION_SKIP,
CTL_ACTION_BLOCK,
CTL_ACTION_OVERLAP,
- CTL_ACTION_OVERLAP_TAG
+ CTL_ACTION_OVERLAP_TAG,
+ CTL_ACTION_FUSED,
} ctl_action;
/*
@@ -139,6 +140,12 @@
* after the opcode byte. */
};
+/* Only data flags are currently used for NVMe commands. */
+struct ctl_nvme_cmd_entry {
+ int (*execute)(struct ctl_nvmeio *);
+ ctl_io_flags flags;
+};
+
typedef enum {
CTL_LUN_NONE = 0x000,
CTL_LUN_CONTROL = 0x001,
@@ -412,6 +419,8 @@
#ifdef _KERNEL
extern const struct ctl_cmd_entry ctl_cmd_table[256];
+extern const struct ctl_nvme_cmd_entry nvme_admin_cmd_table[256];
+extern const struct ctl_nvme_cmd_entry nvme_nvm_cmd_table[256];
uint32_t ctl_get_initindex(struct ctl_nexus *nexus);
int ctl_lun_map_init(struct ctl_port *port);
@@ -459,6 +468,15 @@
int ctl_report_timestamp(struct ctl_scsiio *ctsio);
int ctl_get_lba_status(struct ctl_scsiio *ctsio);
+int ctl_nvme_identify(struct ctl_nvmeio *ctnio);
+int ctl_nvme_flush(struct ctl_nvmeio *ctnio);
+int ctl_nvme_read_write(struct ctl_nvmeio *ctnio);
+int ctl_nvme_write_uncorrectable(struct ctl_nvmeio *ctnio);
+int ctl_nvme_compare(struct ctl_nvmeio *ctnio);
+int ctl_nvme_write_zeroes(struct ctl_nvmeio *ctnio);
+int ctl_nvme_dataset_management(struct ctl_nvmeio *ctnio);
+int ctl_nvme_verify(struct ctl_nvmeio *ctnio);
+
void ctl_tpc_init(struct ctl_softc *softc);
void ctl_tpc_shutdown(struct ctl_softc *softc);
void ctl_tpc_lun_init(struct ctl_lun *lun);
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -109,6 +109,7 @@
cam/ctl/ctl_frontend_iscsi.c optional ctl cfiscsi
cam/ctl/ctl_ha.c optional ctl
cam/ctl/ctl_nvme_all.c optional ctl
+cam/ctl/ctl_nvme_cmd_table.c optional ctl
cam/ctl/ctl_scsi_all.c optional ctl
cam/ctl/ctl_tpc.c optional ctl
cam/ctl/ctl_tpc_local.c optional ctl
diff --git a/sys/modules/ctl/Makefile b/sys/modules/ctl/Makefile
--- a/sys/modules/ctl/Makefile
+++ b/sys/modules/ctl/Makefile
@@ -13,6 +13,7 @@
SRCS+= ctl_frontend_ioctl.c
SRCS+= ctl_ha.c
SRCS+= ctl_nvme_all.c
+SRCS+= ctl_nvme_cmd_table.c
SRCS+= ctl_scsi_all.c
SRCS+= ctl_tpc.c
SRCS+= ctl_tpc_local.c
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jan 15, 3:18 AM (10 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15805391
Default Alt Text
D44720.diff (28 KB)
Attached To
Mode
D44720: ctl: Support for NVMe commands
Attached
Detach File
Event Timeline
Log In to Comment