Changeset View
Standalone View
sys/dev/nvd/nvd.c
Show All 26 Lines | |||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/bio.h> | #include <sys/bio.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/module.h> | #include <sys/module.h> | ||||
#include <sys/sysctl.h> | |||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/taskqueue.h> | #include <sys/taskqueue.h> | ||||
#include <geom/geom.h> | #include <geom/geom.h> | ||||
#include <geom/geom_disk.h> | #include <geom/geom_disk.h> | ||||
#include <dev/nvme/nvme.h> | #include <dev/nvme/nvme.h> | ||||
Show All 10 Lines | |||||
static void *nvd_new_controller(struct nvme_controller *ctrlr); | static void *nvd_new_controller(struct nvme_controller *ctrlr); | ||||
static void nvd_controller_fail(void *ctrlr); | static void nvd_controller_fail(void *ctrlr); | ||||
static int nvd_load(void); | static int nvd_load(void); | ||||
static void nvd_unload(void); | static void nvd_unload(void); | ||||
MALLOC_DEFINE(M_NVD, "nvd", "nvd(4) allocations"); | MALLOC_DEFINE(M_NVD, "nvd", "nvd(4) allocations"); | ||||
SYSCTL_NODE(_kern, OID_AUTO, nvd, CTLFLAG_RD, 0, "NVM Express disk driver"); | |||||
/* | |||||
* Intel NVMe controllers have a slow path for I/Os that span a 128KB | |||||
* stripe boundary but ZFS limits ashift, which is derived from | |||||
* d_stripesize, to 13 (8KB) so we limit the stripesize reported to | |||||
* geom(8) to 4KB by default. | |||||
* | |||||
* This may result in a small number of additional I/Os to require | |||||
* splitting in nvme(4), however the NVMe I/O path is very efficient | |||||
* so these additional I/Os will cause very minimal (if any) difference | |||||
* in performance or CPU utilisation. | |||||
*/ | |||||
static int nvd_max_stripesize = 1<<12; | |||||
SYSCTL_INT(_kern_nvd, OID_AUTO, max_stripsize, CTLFLAG_RWTUN, | |||||
&nvd_max_stripesize, 0, "The maximum stripe size reported to geom(8)"); | |||||
struct nvme_consumer *consumer_handle; | struct nvme_consumer *consumer_handle; | ||||
struct nvd_disk { | struct nvd_disk { | ||||
struct bio_queue_head bioq; | struct bio_queue_head bioq; | ||||
struct task bioqtask; | struct task bioqtask; | ||||
struct mtx bioqlock; | struct mtx bioqlock; | ||||
▲ Show 20 Lines • Show All 205 Lines • ▼ Show 20 Lines | nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg) | ||||
disk->d_ioctl = nvd_ioctl; | disk->d_ioctl = nvd_ioctl; | ||||
disk->d_name = NVD_STR; | disk->d_name = NVD_STR; | ||||
disk->d_drv1 = ndisk; | disk->d_drv1 = ndisk; | ||||
disk->d_maxsize = nvme_ns_get_max_io_xfer_size(ns); | disk->d_maxsize = nvme_ns_get_max_io_xfer_size(ns); | ||||
disk->d_sectorsize = nvme_ns_get_sector_size(ns); | disk->d_sectorsize = nvme_ns_get_sector_size(ns); | ||||
disk->d_mediasize = (off_t)nvme_ns_get_size(ns); | disk->d_mediasize = (off_t)nvme_ns_get_size(ns); | ||||
disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns); | disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns); | ||||
disk->d_stripesize = nvme_ns_get_stripesize(ns); | disk->d_stripesize = nvd_max_stripesize == 0 ? | ||||
nvme_ns_get_stripesize(ns) : | |||||
MIN(nvme_ns_get_stripesize(ns), nvd_max_stripesize); | |||||
if (TAILQ_EMPTY(&disk_head)) | if (TAILQ_EMPTY(&disk_head)) | ||||
disk->d_unit = 0; | disk->d_unit = 0; | ||||
else | else | ||||
disk->d_unit = | disk->d_unit = | ||||
TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1; | TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1; | ||||
disk->d_flags = 0; | disk->d_flags = 0; | ||||
imp: Any chance you can make this a quirk? Or something nvme_ns_get_stripesize?
I'd hate to… | |||||
Done Inline ActionsThe setting of stripesize is already a quirk, although a hardcoded one see: nvme_ns.c:489. Jim's idea of passing this up to geom was to allow the upper layers to perform alignment and hence obtain optimum performance. I've made the limit into a sysctl but kept the default to 4KB to ensure max compatibility, that ok? smh: The setting of stripesize is already a quirk, although a hardcoded one see: nvme_ns.c:489. | |||||
impUnsubmitted Done Inline ActionsNo. It's not OK. I don't want to replicate this logic in CAM attached nda I'm working on. Why can't we just fix it in nvme_ns.c? imp: No. It's not OK. I don't want to replicate this logic in CAM attached nda I'm working on. Why… | |||||
smhAuthorUnsubmitted Done Inline Actionsrgr I miss-understood your original comment. I'll look to move it, thanks for the update. smh: rgr I miss-understood your original comment. I'll look to move it, thanks for the update. | |||||
impUnsubmitted Done Inline ActionsThanks Steve. nvme_ns.c needs a more robust quirking mechanism, but this is good for now. imp: Thanks Steve. nvme_ns.c needs a more robust quirking mechanism, but this is good for now. | |||||
jimharrisUnsubmitted Done Inline ActionsPutting this in nvme_ns.c is OK, but we should change the name of nvme_ns_get_stripesize() then (since it is no longer reporting the stripe size quick verbatim). How about nvme_ns_get_optimal_sector_size()? When stripe size == 0, this just returns nvme_ns_get_sector_size(). Otherwise it returns min(stripesize, 4KB). jimharris: Putting this in nvme_ns.c is OK, but we should change the name of nvme_ns_get_stripesize() then… | |||||
jimharrisUnsubmitted Not Done Inline ActionsAnd I meant to add that then this gets used for d_sectorsize, and we don't set d_stripesize at all. jimharris: And I meant to add that then this gets used for d_sectorsize, and we don't set d_stripesize at… | |||||
Not Done Inline ActionsWhile d_sectorsize is a bit miss-used here I don't want to prevent things from getting the true sectorsize so I elected to still set d_stripesize just to nvme_ns_get_optimal_sector_size instead. smh: While d_sectorsize is a bit miss-used here I don't want to prevent things from getting the true… | |||||
if (nvme_ns_get_flags(ns) & NVME_NS_DEALLOCATE_SUPPORTED) | if (nvme_ns_get_flags(ns) & NVME_NS_DEALLOCATE_SUPPORTED) | ||||
disk->d_flags |= DISKFLAG_CANDELETE; | disk->d_flags |= DISKFLAG_CANDELETE; | ||||
if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED) | if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED) | ||||
disk->d_flags |= DISKFLAG_CANFLUSHCACHE; | disk->d_flags |= DISKFLAG_CANFLUSHCACHE; | ||||
/* ifdef used here to ease porting to stable branches at a later point. */ | /* ifdef used here to ease porting to stable branches at a later point. */ | ||||
#ifdef DISKFLAG_UNMAPPED_BIO | #ifdef DISKFLAG_UNMAPPED_BIO | ||||
▲ Show 20 Lines • Show All 95 Lines • Show Last 20 Lines |
Any chance you can make this a quirk? Or something nvme_ns_get_stripesize?
I'd hate to duplicate this in my NVMe CAM attachment.