Page MenuHomeFreeBSD

D50222.id158222.diff
No OneTemporary

D50222.id158222.diff

diff --git a/lib/libnvmf/libnvmf.h b/lib/libnvmf/libnvmf.h
--- a/lib/libnvmf/libnvmf.h
+++ b/lib/libnvmf/libnvmf.h
@@ -342,7 +342,8 @@
*/
int nvmf_handoff_host(const struct nvme_discovery_log_entry *dle,
const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
- struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata);
+ struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+ uint32_t reconnect_delay, uint32_t controller_loss_timeout);
/*
* Disconnect an active host association previously handed off to the
@@ -370,7 +371,8 @@
*/
int nvmf_reconnect_host(int fd, const struct nvme_discovery_log_entry *dle,
const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
- struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata);
+ struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+ uint32_t reconnect_delay, uint32_t controller_loss_timeout);
/*
* Fetch connection status from an existing kernel host.
diff --git a/lib/libnvmf/nvmf_host.c b/lib/libnvmf/nvmf_host.c
--- a/lib/libnvmf/nvmf_host.c
+++ b/lib/libnvmf/nvmf_host.c
@@ -792,7 +792,8 @@
prepare_queues_for_handoff(struct nvmf_ioc_nv *nv,
const struct nvme_discovery_log_entry *dle, const char *hostnqn,
struct nvmf_qpair *admin_qp, u_int num_queues,
- struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
+ struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+ uint32_t reconnect_delay, uint32_t controller_loss_timeout)
{
const struct nvmf_association *na = admin_qp->nq_association;
nvlist_t *nvl, *nvl_qp, *nvl_rparams;
@@ -820,6 +821,9 @@
nvlist_add_string(nvl_rparams, "hostnqn", hostnqn);
nvlist_add_number(nvl_rparams, "num_io_queues", num_queues);
nvlist_add_number(nvl_rparams, "kato", admin_qp->nq_kato);
+ nvlist_add_number(nvl_rparams, "reconnect_delay", reconnect_delay);
+ nvlist_add_number(nvl_rparams, "controller_loss_timeout",
+ controller_loss_timeout);
nvlist_add_number(nvl_rparams, "io_qsize", io_queues[0]->nq_qsize);
nvlist_add_bool(nvl_rparams, "sq_flow_control",
na->na_params.sq_flow_control);
@@ -842,6 +846,9 @@
nvl = nvlist_create(0);
nvlist_add_number(nvl, "trtype", na->na_trtype);
nvlist_add_number(nvl, "kato", admin_qp->nq_kato);
+ nvlist_add_number(nvl, "reconnect_delay", reconnect_delay);
+ nvlist_add_number(nvl, "controller_loss_timeout",
+ controller_loss_timeout);
nvlist_move_nvlist(nvl, "rparams", nvl_rparams);
/* First, the admin queue. */
@@ -872,7 +879,8 @@
int
nvmf_handoff_host(const struct nvme_discovery_log_entry *dle,
const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
- struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
+ struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+ uint32_t reconnect_delay, uint32_t controller_loss_timeout)
{
struct nvmf_ioc_nv nv;
u_int i;
@@ -885,7 +893,8 @@
}
error = prepare_queues_for_handoff(&nv, dle, hostnqn, admin_qp,
- num_queues, io_queues, cdata);
+ num_queues, io_queues, cdata, reconnect_delay,
+ controller_loss_timeout);
if (error != 0)
goto out;
@@ -981,14 +990,16 @@
int
nvmf_reconnect_host(int fd, const struct nvme_discovery_log_entry *dle,
const char *hostnqn, struct nvmf_qpair *admin_qp, u_int num_queues,
- struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
+ struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata,
+ uint32_t reconnect_delay, uint32_t controller_loss_timeout)
{
struct nvmf_ioc_nv nv;
u_int i;
int error;
error = prepare_queues_for_handoff(&nv, dle, hostnqn, admin_qp,
- num_queues, io_queues, cdata);
+ num_queues, io_queues, cdata, reconnect_delay,
+ controller_loss_timeout);
if (error != 0)
goto out;
diff --git a/sbin/devd/Makefile b/sbin/devd/Makefile
--- a/sbin/devd/Makefile
+++ b/sbin/devd/Makefile
@@ -46,6 +46,11 @@
HYPERVPACKAGE= hyperv-tools
.endif
+CONFGROUPS+= NVME
+NVMEDIR= ${DEVDDIR}
+NVME+= nvmf.conf
+NVMEPACKAGE= nvme-tools
+
.if ${MK_USB} != "no"
DEVD+= uath.conf ulpt.conf
.endif
diff --git a/sbin/devd/devd.conf.5 b/sbin/devd/devd.conf.5
--- a/sbin/devd/devd.conf.5
+++ b/sbin/devd/devd.conf.5
@@ -38,7 +38,7 @@
.\" ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
.\" SOFTWARE.
.\"
-.Dd July 8, 2025
+.Dd July 9, 2025
.Dt DEVD.CONF 5
.Os
.Sh NAME
@@ -517,6 +517,8 @@
representing the start of a controller reset, the successful completion of a
controller reset, or a timeout while waiting for the controller to reset,
respectively.
+.It Li nvme Ta Li controller Ta Li RECONNECT Ta
+An NVMe over Fabrics host has disconnected and is requesting a reconnect.
.El
.Pp
.Bl -column "SYSTEM" "SUBSYSTEM" "SHUTDOWN-THRESHOLD" -compact
diff --git a/sbin/devd/nvmf.conf b/sbin/devd/nvmf.conf
new file mode 100644
--- /dev/null
+++ b/sbin/devd/nvmf.conf
@@ -0,0 +1,7 @@
+# Attempt to reconnect NVMeoF host devices when requested
+notify 100 {
+ match "system" "nvme";
+ match "subsystem" "controller";
+ match "type" "RECONNECT";
+ action "nvmecontrol reconnect $name";
+};
diff --git a/sbin/nvmecontrol/connect.c b/sbin/nvmecontrol/connect.c
--- a/sbin/nvmecontrol/connect.c
+++ b/sbin/nvmecontrol/connect.c
@@ -31,6 +31,8 @@
const char *subnqn;
const char *hostnqn;
uint32_t kato;
+ uint32_t reconnect_delay;
+ uint32_t controller_loss_timeout;
uint16_t num_io_queues;
uint16_t queue_size;
bool data_digests;
@@ -43,6 +45,8 @@
.subnqn = NULL,
.hostnqn = NULL,
.kato = NVMF_KATO_DEFAULT / 1000,
+ .reconnect_delay = NVMF_DEFAULT_RECONNECT_DELAY,
+ .controller_loss_timeout = NVMF_DEFAULT_CONTROLLER_LOSS,
.num_io_queues = 1,
.queue_size = 0,
.data_digests = false,
@@ -107,7 +111,7 @@
}
error = nvmf_handoff_host(dle, hostnqn, admin, opt.num_io_queues, io,
- &cdata);
+ &cdata, opt.reconnect_delay, opt.controller_loss_timeout);
if (error != 0) {
warnc(error, "Failed to handoff queues to kernel");
free(io);
@@ -259,6 +263,11 @@
"Number of entries in each I/O queue"),
OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato,
"Keep Alive timeout (in seconds)"),
+ OPT("reconnect-delay", 'r', arg_uint32, opt, reconnect_delay,
+ "Delay between reconnect attempts after connection loss "
+ "(in seconds)"),
+ OPT("ctrl-loss-tmo", 'l', arg_uint32, opt, controller_loss_timeout,
+ "Controller loss timeout after connection loss (in seconds)"),
OPT("hostnqn", 'q', arg_string, opt, hostnqn,
"Host NQN"),
OPT("flow_control", 'F', arg_none, opt, flow_control,
diff --git a/sbin/nvmecontrol/nvmecontrol.8 b/sbin/nvmecontrol/nvmecontrol.8
--- a/sbin/nvmecontrol/nvmecontrol.8
+++ b/sbin/nvmecontrol/nvmecontrol.8
@@ -33,7 +33,7 @@
.\"
.\" Author: Jim Harris <jimharris@FreeBSD.org>
.\"
-.Dd April 29, 2025
+.Dd July 9, 2025
.Dt NVMECONTROL 8
.Os
.Sh NAME
@@ -216,6 +216,8 @@
.Op Fl c Ar cntl-id
.Op Fl i Ar queues
.Op Fl k Ar seconds
+.Op Fl l Ar seconds
+.Op Fl r Ar seconds
.Op Fl t Ar transport
.Op Fl q Ar HostNQN
.Op Fl Q Ar entries
@@ -226,6 +228,8 @@
.Op Fl FGg
.Op Fl i Ar queues
.Op Fl k Ar seconds
+.Op Fl l Ar seconds
+.Op Fl r Ar seconds
.Op Fl t Ar transport
.Op Fl q Ar HostNQN
.Op Fl Q Ar entries
@@ -241,6 +245,8 @@
.Op Fl FGg
.Op Fl i Ar queues
.Op Fl k Ar seconds
+.Op Fl l Ar seconds
+.Op Fl r Ar seconds
.Op Fl t Ar transport
.Op Fl q Ar HostNQN
.Op Fl Q Ar entries
@@ -786,6 +792,29 @@
.It Fl k Ar seconds
Keep Alive timer duration in seconds.
The default is 120.
+.It Fl l Ar seconds
+Controller Loss timer duration in seconds.
+The default is 600.
+.Pp
+This timer starts when an association is lost with a remote I/O controller
+and is cancelled when a new association is established.
+If the timer expires, the controller device is deleted.
+A setting of zero disables this timer.
+.It Fl r Ar seconds
+Reconnect timer duration in seconds.
+The default is 10.
+.Pp
+When an association is lost with a remote I/O controller,
+the controller device will request reconnection via periodic
+.Xr devctl 4
+notifications until either a new association is established or the controller
+device is deleted.
+This timer sets the interval between each
+.Xr devctl 4
+notification.
+Note that the first notification is triggered immediately after an association
+is lost.
+A setting of zero disables this timer.
.It Fl t Ar transport
Transport to use.
The default is
diff --git a/sbin/nvmecontrol/reconnect.c b/sbin/nvmecontrol/reconnect.c
--- a/sbin/nvmecontrol/reconnect.c
+++ b/sbin/nvmecontrol/reconnect.c
@@ -27,6 +27,8 @@
const char *transport;
const char *hostnqn;
uint32_t kato;
+ uint32_t reconnect_delay;
+ uint32_t controller_loss_timeout;
uint16_t num_io_queues;
uint16_t queue_size;
bool data_digests;
@@ -37,6 +39,8 @@
.transport = "tcp",
.hostnqn = NULL,
.kato = NVMF_KATO_DEFAULT / 1000,
+ .reconnect_delay = NVMF_DEFAULT_RECONNECT_DELAY,
+ .controller_loss_timeout = NVMF_DEFAULT_CONTROLLER_LOSS,
.num_io_queues = 1,
.queue_size = 0,
.data_digests = false,
@@ -59,6 +63,7 @@
reconnect_nvm_controller(int fd, const struct nvmf_association_params *aparams,
enum nvmf_trtype trtype, int adrfam, const char *address, const char *port,
uint16_t cntlid, const char *subnqn, const char *hostnqn, uint32_t kato,
+ uint32_t reconnect_delay, uint32_t controller_loss_timeout,
u_int num_io_queues, u_int queue_size,
const struct nvme_discovery_log_entry *dle)
{
@@ -88,7 +93,7 @@
}
error = nvmf_reconnect_host(fd, dle, hostnqn, admin, num_io_queues, io,
- &cdata);
+ &cdata, reconnect_delay, controller_loss_timeout);
if (error != 0) {
warnc(error, "Failed to handoff queues to kernel");
free(io);
@@ -137,7 +142,8 @@
error = reconnect_nvm_controller(fd, &aparams, trtype, AF_UNSPEC,
address, port, le16toh(dle->cntlid), subnqn, hostnqn,
- opt.kato * 1000, opt.num_io_queues, opt.queue_size, NULL);
+ opt.kato * 1000, opt.reconnect_delay, opt.controller_loss_timeout,
+ opt.num_io_queues, opt.queue_size, NULL);
free(subnqn);
free(tofree);
return (error);
@@ -196,6 +202,8 @@
address, port, le16toh(dle->cntlid), dle->subnqn,
nvlist_get_string(rparams, "hostnqn"),
dnvlist_get_number(rparams, "kato", 0),
+ dnvlist_get_number(rparams, "reconnect_delay", 0),
+ dnvlist_get_number(rparams, "controller_loss_timeout", 0),
nvlist_get_number(rparams, "num_io_queues"),
nvlist_get_number(rparams, "io_qsize"), dle);
free(subnqn);
@@ -291,6 +299,11 @@
"Number of entries in each I/O queue"),
OPT("keep-alive-tmo", 'k', arg_uint32, opt, kato,
"Keep Alive timeout (in seconds)"),
+ OPT("reconnect-delay", 'r', arg_uint32, opt, reconnect_delay,
+ "Delay between reconnect attempts after connection loss "
+ "(in seconds)"),
+ OPT("ctrl-loss-tmo", 'l', arg_uint32, opt, controller_loss_timeout,
+ "Controller loss timeout after connection loss (in seconds)"),
OPT("hostnqn", 'q', arg_string, opt, hostnqn,
"Host NQN"),
OPT("flow_control", 'F', arg_none, opt, flow_control,
diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c
--- a/sys/dev/nvmf/host/nvmf.c
+++ b/sys/dev/nvmf/host/nvmf.c
@@ -27,6 +27,7 @@
#include <dev/nvmf/host/nvmf_var.h>
static struct cdevsw nvmf_cdevsw;
+static struct taskqueue *nvmf_tq;
bool nvmf_fail_disconnect = false;
SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
@@ -34,7 +35,10 @@
MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
+static void nvmf_controller_loss_task(void *arg, int pending);
static void nvmf_disconnect_task(void *arg, int pending);
+static void nvmf_request_reconnect(struct nvmf_softc *sc);
+static void nvmf_request_reconnect_task(void *arg, int pending);
static void nvmf_shutdown_pre_sync(void *arg, int howto);
static void nvmf_shutdown_post_sync(void *arg, int howto);
@@ -294,6 +298,9 @@
admin = nvlist_get_nvlist(nvl, "admin");
io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
kato = dnvlist_get_number(nvl, "kato", 0);
+ sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0);
+ sc->controller_loss_timeout = dnvlist_get_number(nvl,
+ "controller_loss_timeout", 0);
/* Setup the admin queue. */
sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0);
@@ -504,6 +511,10 @@
callout_init(&sc->ka_tx_timer, 1);
sx_init(&sc->connection_lock, "nvmf connection");
TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
+ TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0,
+ nvmf_controller_loss_task, sc);
+ TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0,
+ nvmf_request_reconnect_task, sc);
oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq",
@@ -603,7 +614,9 @@
nvmf_destroy_aer(sc);
- taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+ taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task);
+ taskqueue_drain(nvmf_tq, &sc->disconnect_task);
sx_destroy(&sc->connection_lock);
nvlist_destroy(sc->rparams);
free(sc->cdata, M_NVMF);
@@ -613,7 +626,7 @@
void
nvmf_disconnect(struct nvmf_softc *sc)
{
- taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_enqueue(nvmf_tq, &sc->disconnect_task);
}
static void
@@ -676,6 +689,74 @@
nvmf_destroy_qp(sc->admin);
sc->admin = NULL;
+ if (sc->reconnect_delay != 0)
+ nvmf_request_reconnect(sc);
+ if (sc->controller_loss_timeout != 0)
+ taskqueue_enqueue_timeout(nvmf_tq,
+ &sc->controller_loss_task, sc->controller_loss_timeout *
+ hz);
+
+ sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_controller_loss_task(void *arg, int pending)
+{
+ struct nvmf_softc *sc = arg;
+ device_t dev;
+ int error;
+
+ bus_topo_lock();
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching) {
+ /* Reconnected or already detaching. */
+ sx_xunlock(&sc->connection_lock);
+ bus_topo_unlock();
+ return;
+ }
+
+ sc->controller_timedout = true;
+ sx_xunlock(&sc->connection_lock);
+
+ /*
+ * XXX: Doing this from here is a bit ugly. We don't have an
+ * extra reference on `dev` but bus_topo_lock should block any
+ * concurrent device_delete_child invocations.
+ */
+ dev = sc->dev;
+ error = device_delete_child(root_bus, dev);
+ if (error != 0)
+ device_printf(dev,
+ "failed to detach after controller loss: %d\n", error);
+ bus_topo_unlock();
+}
+
+static void
+nvmf_request_reconnect(struct nvmf_softc *sc)
+{
+ char buf[64];
+
+ sx_assert(&sc->connection_lock, SX_LOCKED);
+
+ snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev));
+ devctl_notify("nvme", "controller", "RECONNECT", buf);
+ taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task,
+ sc->reconnect_delay * hz);
+}
+
+static void
+nvmf_request_reconnect_task(void *arg, int pending)
+{
+ struct nvmf_softc *sc = arg;
+
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
+ /* Reconnected or already detaching. */
+ sx_xunlock(&sc->connection_lock);
+ return;
+ }
+
+ nvmf_request_reconnect(sc);
sx_xunlock(&sc->connection_lock);
}
@@ -699,7 +780,7 @@
}
sx_xlock(&sc->connection_lock);
- if (sc->admin != NULL || sc->detaching) {
+ if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
error = EBUSY;
goto out;
}
@@ -745,6 +826,9 @@
nvmf_reconnect_sim(sc);
nvmf_rescan_all_ns(sc);
+
+ taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL);
+ taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL);
out:
sx_xunlock(&sc->connection_lock);
nvlist_destroy(nvl);
@@ -852,7 +936,21 @@
}
free(sc->io, M_NVMF);
- taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_drain(nvmf_tq, &sc->disconnect_task);
+ if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task,
+ NULL) != 0)
+ taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+
+ /*
+ * Don't cancel/drain the controller loss task if that task
+ * has fired and is triggering the detach.
+ */
+ if (!sc->controller_timedout) {
+ if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task,
+ NULL) != 0)
+ taskqueue_drain_timeout(nvmf_tq,
+ &sc->controller_loss_task);
+ }
if (sc->admin != NULL)
nvmf_destroy_qp(sc->admin);
@@ -1154,14 +1252,25 @@
static int
nvmf_modevent(module_t mod, int what, void *arg)
{
+ int error;
+
switch (what) {
case MOD_LOAD:
- return (nvmf_ctl_load());
+ error = nvmf_ctl_load();
+ if (error != 0)
+ return (error);
+
+ nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO,
+ taskqueue_thread_enqueue, &nvmf_tq);
+ taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq");
+ return (0);
case MOD_QUIESCE:
return (0);
case MOD_UNLOAD:
nvmf_ctl_unload();
destroy_dev_drain(&nvmf_cdevsw);
+ if (nvmf_tq != NULL)
+ taskqueue_free(nvmf_tq);
return (0);
default:
return (EOPNOTSUPP);
diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h
--- a/sys/dev/nvmf/host/nvmf_var.h
+++ b/sys/dev/nvmf/host/nvmf_var.h
@@ -75,9 +75,15 @@
struct callout ka_rx_timer;
sbintime_t ka_rx_sbt;
+ struct timeout_task request_reconnect_task;
+ struct timeout_task controller_loss_task;
+ uint32_t reconnect_delay;
+ uint32_t controller_loss_timeout;
+
struct sx connection_lock;
struct task disconnect_task;
bool detaching;
+ bool controller_timedout;
u_int num_aer;
struct nvmf_aer *aer;
diff --git a/sys/dev/nvmf/nvmf.h b/sys/dev/nvmf/nvmf.h
--- a/sys/dev/nvmf/nvmf.h
+++ b/sys/dev/nvmf/nvmf.h
@@ -26,6 +26,13 @@
#define NVMF_NN (1024)
+/*
+ * Default timeouts for Fabrics hosts. These match values used by
+ * Linux.
+ */
+#define NVMF_DEFAULT_RECONNECT_DELAY 10
+#define NVMF_DEFAULT_CONTROLLER_LOSS 600
+
/*
* (data, size) is the userspace buffer for a packed nvlist.
*
@@ -68,6 +75,8 @@
*
* number trtype
* number kato (optional)
+ * number reconnect_delay (optional)
+ * number controller_loss_timeout (optional)
* qpair handoff nvlist admin
* qpair handoff nvlist array io
* binary cdata struct nvme_controller_data
@@ -81,6 +90,8 @@
* string hostnqn
* number num_io_queues
* number kato (optional)
+ * number reconnect_delay (optional)
+ * number controller_loss_timeout (optional)
* number io_qsize
* bool sq_flow_control
*

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 8, 10:42 PM (8 h, 15 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28506798
Default Alt Text
D50222.id158222.diff (18 KB)

Event Timeline