diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -1902,6 +1902,7 @@ struct nvme_namespace; struct nvme_controller; struct nvme_consumer; +struct nvme_passthru_cmd; typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *); @@ -1921,6 +1922,11 @@ uint32_t nsid, int is_user_buffer, int is_admin_cmd); +int nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, + struct nvme_passthru_cmd *npc, + uint32_t nsid, bool is_user, + bool is_admin); + /* Admin functions */ void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature, uint32_t cdw11, diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -43,6 +43,7 @@ #include #include "nvme_private.h" +#include "nvme_linux.h" #define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */ @@ -1269,7 +1270,7 @@ ret = EFAULT; goto err; } - req = nvme_allocate_request_vaddr(buf->b_data, pt->len, + req = nvme_allocate_request_vaddr(buf->b_data, pt->len, nvme_pt_done, pt); } else req = nvme_allocate_request_vaddr(pt->buf, pt->len, @@ -1314,6 +1315,103 @@ return (ret); } +static void +nvme_npc_done(void *arg, const struct nvme_completion *cpl) +{ + struct nvme_passthru_cmd *npc = arg; + struct mtx *mtx = (void *)(uintptr_t)npc->metadata; + + npc->result = cpl->cdw0; /* cpl in host order by now */ + mtx_lock(mtx); + npc->metadata = 0; + wakeup(npc); + mtx_unlock(mtx); +} + +/* XXX refactor? */ + +int +nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, + struct nvme_passthru_cmd *npc, uint32_t nsid, bool is_user, bool is_admin) +{ + struct nvme_request *req; + struct mtx *mtx; + struct buf *buf = NULL; + int ret = 0; + + /* + * We don't support metadata. + */ + if (npc->metadata != 0 || npc->metadata_len != 0) + return (EIO); + + if (npc->data_len > 0 && npc->addr != 0) { + if (npc->data_len > ctrlr->max_xfer_size) { + nvme_printf(ctrlr, + "npc->data_len (%d) exceeds max_xfer_size (%d)\n", + npc->data_len, ctrlr->max_xfer_size); + return (EIO); + } + /* We only support data out or data in commands, but not both at once. */ + if ((npc->opcode & 0x3) == 0 || (npc->opcode & 0x3) == 3) + return (EINVAL); + if (is_user) { + /* + * Ensure the user buffer is wired for the duration of + * this pass-through command. + */ + PHOLD(curproc); + buf = uma_zalloc(pbuf_zone, M_WAITOK); + buf->b_iocmd = npc->opcode & 1 ? BIO_WRITE : BIO_READ; + if (vmapbuf(buf, (void *)npc->addr, npc->data_len, 1) < 0) { + ret = EFAULT; + goto err; + } + req = nvme_allocate_request_vaddr(buf->b_data, npc->data_len, + nvme_npc_done, npc); + } else + req = nvme_allocate_request_vaddr((void *)npc->addr, npc->data_len, + nvme_npc_done, npc); + } else + req = nvme_allocate_request_null(nvme_npc_done, npc); + + req->cmd.opc = npc->opcode; + req->cmd.fuse = npc->flags; + req->cmd.rsvd2 = htole16(npc->cdw2); + req->cmd.rsvd3 = htole16(npc->cdw3); + req->cmd.cdw10 = htole32(npc->cdw10); + req->cmd.cdw11 = htole32(npc->cdw11); + req->cmd.cdw12 = htole32(npc->cdw12); + req->cmd.cdw13 = htole32(npc->cdw13); + req->cmd.cdw14 = htole32(npc->cdw14); + req->cmd.cdw15 = htole32(npc->cdw15); + + req->cmd.nsid = htole32(nsid); + + mtx = mtx_pool_find(mtxpool_sleep, npc); + npc->metadata = (uintptr_t) mtx; + + /* XXX no timeout passed down */ + if (is_admin) + nvme_ctrlr_submit_admin_request(ctrlr, req); + else + nvme_ctrlr_submit_io_request(ctrlr, req); + + mtx_lock(mtx); + while (npc->metadata != 0) + mtx_sleep(npc, mtx, PRIBIO, "nvme_npc", 0); + mtx_unlock(mtx); + + if (buf != NULL) { + vunmapbuf(buf); +err: + uma_zfree(pbuf_zone, buf); + PRELE(curproc); + } + + return (ret); +} + static int nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct thread *td) @@ -1324,6 +1422,7 @@ ctrlr = cdev->si_drv1; switch (cmd) { + case NVME_IOCTL_RESET: /* Linux compat */ case NVME_RESET_CONTROLLER: nvme_ctrlr_reset(ctrlr); break; @@ -1342,6 +1441,19 @@ case NVME_GET_MAX_XFER_SIZE: *(uint64_t *)arg = ctrlr->max_xfer_size; break; + /* Linux Compatible (see nvme_linux.h) */ + case NVME_IOCTL_ID: + td->td_retval[0] = 0xfffffffful; + return (0); + + case NVME_IOCTL_ADMIN_CMD: + case NVME_IOCTL_IO_CMD: { + struct nvme_passthru_cmd *npc = (struct nvme_passthru_cmd *)arg; + + return (nvme_ctrlr_linux_passthru_cmd(ctrlr, npc, npc->nsid, true, + cmd == NVME_IOCTL_ADMIN_CMD)); + } + default: return (ENOTTY); } diff --git a/sys/dev/nvme/nvme_linux.h b/sys/dev/nvme/nvme_linux.h new file mode 100644 --- /dev/null +++ b/sys/dev/nvme/nvme_linux.h @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2024, Netflix Inc. + * Written by Warner Losh + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +/* + * Linux compatible NVME ioctls. So far we just support ID, ADMIN_CMD and + * IO_CMD. The rest are not supported. + */ + + +#include +#include + +struct nvme_passthru_cmd { + __uint8_t opcode; + __uint8_t flags; + __uint16_t rsvd1; + __uint32_t nsid; + __uint32_t cdw2; + __uint32_t cdw3; + __uint64_t metadata; + __uint64_t addr; + __uint32_t metadata_len; + __uint32_t data_len; + __uint32_t cdw10; + __uint32_t cdw11; + __uint32_t cdw12; + __uint32_t cdw13; + __uint32_t cdw14; + __uint32_t cdw15; + __uint32_t timeout_ms; + __uint32_t result; +}; + +#define nvme_admin_cmd nvme_passthru_cmd + +/* + * Linux nvme ioctls, commented out ones are not supported + */ +#define NVME_IOCTL_ID _IO('N', 0x40) +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) +/* #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) */ +#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) +/* #define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) */ +/* #define NVME_IOCTL_RESCAN _IO('N', 0x46) */ +/* #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) */ +/* #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) */ +/* #define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) */ + +/* io_uring async commands: */ +/* #define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd) */ +/* #define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd) */ +/* #define NVME_URING_CMD_ADMIN _IOWR('N', 0x82, struct nvme_uring_cmd) */ +/* #define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd) */ diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -43,6 +43,7 @@ #include #include "nvme_private.h" +#include "nvme_linux.h" static void nvme_bio_child_inbed(struct bio *parent, int bio_error); static void nvme_bio_child_done(void *arg, @@ -93,6 +94,18 @@ case DIOCGSECTORSIZE: *(u_int *)arg = nvme_ns_get_sector_size(ns); break; + /* Linux Compatible (see nvme_linux.h) */ + case NVME_IOCTL_ID: + td->td_retval[0] = ns->id; + return (0); + + case NVME_IOCTL_ADMIN_CMD: + case NVME_IOCTL_IO_CMD: { + struct nvme_passthru_cmd *npc = (struct nvme_passthru_cmd *)arg; + + return (nvme_ctrlr_linux_passthru_cmd(ctrlr, npc, ns->id, true, + cmd == NVME_IOCTL_ADMIN_CMD)); + } default: return (ENOTTY); } @@ -610,7 +623,6 @@ return (ENXIO); ns->cdev->si_drv2 = make_dev_alias(ns->cdev, "%sns%d", device_get_nameunit(ctrlr->dev), ns->id); - ns->cdev->si_flags |= SI_UNMAPPED; return (0);