Index: sys/dev/nvd/nvd.c =================================================================== --- sys/dev/nvd/nvd.c +++ sys/dev/nvd/nvd.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,22 @@ MALLOC_DEFINE(M_NVD, "nvd", "nvd(4) allocations"); +SYSCTL_NODE(_kern, OID_AUTO, nvd, CTLFLAG_RD, 0, "NVM Express disk driver"); +/* + * Intel NVMe controllers have a slow path for I/Os that span a 128KB + * stripe boundary but ZFS limits ashift, which is derived from + * d_stripesize, to 13 (8KB) so we limit the stripesize reported to + * geom(8) to 4KB by default. + * + * This may result in a small number of additional I/Os to require + * splitting in nvme(4), however the NVMe I/O path is very efficient + * so these additional I/Os will cause very minimal (if any) difference + * in performance or CPU utilisation. + */ +static int nvd_max_stripesize = 1<<12; +SYSCTL_INT(_kern_nvd, OID_AUTO, max_stripsize, CTLFLAG_RWTUN, + &nvd_max_stripesize, 0, "The maximum stripe size reported to geom(8)"); + struct nvme_consumer *consumer_handle; struct nvd_disk { @@ -279,7 +296,9 @@ disk->d_sectorsize = nvme_ns_get_sector_size(ns); disk->d_mediasize = (off_t)nvme_ns_get_size(ns); disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns); - disk->d_stripesize = nvme_ns_get_stripesize(ns); + disk->d_stripesize = nvd_max_stripesize == 0 ? + nvme_ns_get_stripesize(ns) : + MIN(nvme_ns_get_stripesize(ns), nvd_max_stripesize); if (TAILQ_EMPTY(&disk_head)) disk->d_unit = 0;