D21493.diff
No OneTemporary
Actions

Size

8 KB

Referenced Files

None

Subscribers

None

D21493.diff
View Options

	Index: head/sys/dev/nvme/nvme.c
	===================================================================
	--- head/sys/dev/nvme/nvme.c
	+++ head/sys/dev/nvme/nvme.c
	@@ -137,9 +137,10 @@
	}

	/*
	- * Reset controller twice to ensure we do a transition from cc.en==1
	- * to cc.en==0. This is because we don't really know what status
	- * the controller was left in when boot handed off to OS.
	+ * Reset controller twice to ensure we do a transition from cc.en==1 to
	+ * cc.en==0. This is because we don't really know what status the
	+ * controller was left in when boot handed off to OS. Linux doesn't do
	+ * this, however. If we adopt that policy, see also nvme_ctrlr_resume().
	*/
	status = nvme_ctrlr_hw_reset(ctrlr);
	if (status != 0) {
	Index: head/sys/dev/nvme/nvme_ctrlr.c
	===================================================================
	--- head/sys/dev/nvme/nvme_ctrlr.c
	+++ head/sys/dev/nvme/nvme_ctrlr.c
	@@ -118,8 +118,8 @@

	/*
	* Our best estimate for the maximum number of I/Os that we should
	- * noramlly have in flight at one time. This should be viewed as a hint,
	- * not a hard limit and will need to be revisitted when the upper layers
	+ * normally have in flight at one time. This should be viewed as a hint,
	+ * not a hard limit and will need to be revisited when the upper layers
	* of the storage system grows multi-queue support.
	*/
	ctrlr->max_hw_pend_io = num_trackers * ctrlr->num_io_queues * 3 / 4;
	@@ -344,10 +344,10 @@
	return (nvme_ctrlr_wait_for_ready(ctrlr, 1));
	}

	-int
	-nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
	+static void
	+nvme_ctrlr_disable_qpairs(struct nvme_controller *ctrlr)
	{
	- int i, err;
	+ int i;

	nvme_admin_qpair_disable(&ctrlr->adminq);
	/*
	@@ -359,7 +359,15 @@
	for (i = 0; i < ctrlr->num_io_queues; i++)
	nvme_io_qpair_disable(&ctrlr->ioq[i]);
	}
	+}

	+int
	+nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
	+{
	+ int err;
	+
	+ nvme_ctrlr_disable_qpairs(ctrlr);
	+
	DELAY(100*1000);

	err = nvme_ctrlr_disable(ctrlr);
	@@ -481,7 +489,7 @@
	}

	static int
	-nvme_ctrlr_destroy_qpairs(struct nvme_controller *ctrlr)
	+nvme_ctrlr_delete_qpairs(struct nvme_controller *ctrlr)
	{
	struct nvme_completion_poll_status status;
	struct nvme_qpair *qpair;
	@@ -820,7 +828,7 @@
	}

	static void
	-nvme_ctrlr_start(void *ctrlr_arg)
	+nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
	{
	struct nvme_controller *ctrlr = ctrlr_arg;
	uint32_t old_num_io_queues;
	@@ -833,7 +841,7 @@
	* the number of I/O queues supported, so cannot reset
	* the adminq again here.
	*/
	- if (ctrlr->is_resetting)
	+ if (resetting)
	nvme_qpair_reset(&ctrlr->adminq);

	for (i = 0; i < ctrlr->num_io_queues; i++)
	@@ -854,7 +862,7 @@
	* explicit specify how many queues it will use. This value should
	* never change between resets, so panic if somehow that does happen.
	*/
	- if (ctrlr->is_resetting) {
	+ if (resetting) {
	old_num_io_queues = ctrlr->num_io_queues;
	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
	nvme_ctrlr_fail(ctrlr);
	@@ -894,7 +902,7 @@

	if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 &&
	nvme_ctrlr_construct_io_qpairs(ctrlr) == 0)
	- nvme_ctrlr_start(ctrlr);
	+ nvme_ctrlr_start(ctrlr, false);
	else
	nvme_ctrlr_fail(ctrlr);

	@@ -923,7 +931,7 @@
	*/
	pause("nvmereset", hz / 10);
	if (status == 0)
	- nvme_ctrlr_start(ctrlr);
	+ nvme_ctrlr_start(ctrlr, true);
	else
	nvme_ctrlr_fail(ctrlr);

	@@ -946,7 +954,7 @@
	}

	/*
	- * Poll the single-vector intertrupt case: num_io_queues will be 1 and
	+ * Poll the single-vector interrupt case: num_io_queues will be 1 and
	* there's only a single vector. While we're polling, we mask further
	* interrupts in the controller.
	*/
	@@ -1012,7 +1020,7 @@
	if (is_user_buffer) {
	/*
	* Ensure the user buffer is wired for the duration of
	- * this passthrough command.
	+ * this pass-through command.
	*/
	PHOLD(curproc);
	buf = uma_zalloc(pbuf_zone, M_WAITOK);
	@@ -1031,7 +1039,7 @@
	} else
	req = nvme_allocate_request_null(nvme_pt_done, pt);

	- /* Assume userspace already converted to little-endian */
	+ /* Assume user space already converted to little-endian */
	req->cmd.opc = pt->cmd.opc;
	req->cmd.fuse = pt->cmd.fuse;
	req->cmd.rsvd2 = pt->cmd.rsvd2;
	@@ -1206,7 +1214,7 @@

	if (ctrlr->is_initialized) {
	if (!gone)
	- nvme_ctrlr_destroy_qpairs(ctrlr);
	+ nvme_ctrlr_delete_qpairs(ctrlr);
	for (i = 0; i < ctrlr->num_io_queues; i++)
	nvme_io_qpair_destroy(&ctrlr->ioq[i]);
	free(ctrlr->ioq, M_NVME);
	@@ -1305,4 +1313,88 @@
	{

	return (&ctrlr->cdata);
	+}
	+
	+int
	+nvme_ctrlr_suspend(struct nvme_controller *ctrlr)
	+{
	+ int to = hz;
	+
	+ /*
	+ * Can't touch failed controllers, so it's already suspended.
	+ */
	+ if (ctrlr->is_failed)
	+ return (0);
	+
	+ /*
	+ * We don't want the reset taskqueue running, since it does similar
	+ * things, so prevent it from running after we start. Wait for any reset
	+ * that may have been started to complete. The reset process we follow
	+ * will ensure that any new I/O will queue and be given to the hardware
	+ * after we resume (though there should be none).
	+ */
	+ while (atomic_cmpset_32(&ctrlr->is_resetting, 0, 1) == 0 && to-- > 0)
	+ pause("nvmesusp", 1);
	+ if (to <= 0) {
	+ nvme_printf(ctrlr,
	+ "Competing reset task didn't finish. Try again later.\n");
	+ return (EWOULDBLOCK);
	+ }
	+
	+ /*
	+ * Per Section 7.6.2 of NVMe spec 1.4, to properly suspend, we need to
	+ * delete the hardware I/O queues, and then shutdown. This properly
	+ * flushes any metadata the drive may have stored so it can survive
	+ * having its power removed and prevents the unsafe shutdown count from
	+ * incriminating. Once we delete the qpairs, we have to disable them
	+ * before shutting down. The delay is out of paranoia in
	+ * nvme_ctrlr_hw_reset, and is repeated here (though we should have no
	+ * pending I/O that the delay copes with).
	+ */
	+ nvme_ctrlr_delete_qpairs(ctrlr);
	+ nvme_ctrlr_disable_qpairs(ctrlr);
	+ DELAY(100*1000);
	+ nvme_ctrlr_shutdown(ctrlr);
	+
	+ return (0);
	+}
	+
	+int
	+nvme_ctrlr_resume(struct nvme_controller *ctrlr)
	+{
	+
	+ /*
	+ * Can't touch failed controllers, so nothing to do to resume.
	+ */
	+ if (ctrlr->is_failed)
	+ return (0);
	+
	+ /*
	+ * Have to reset the hardware twice, just like we do on attach. See
	+ * nmve_attach() for why.
	+ */
	+ if (nvme_ctrlr_hw_reset(ctrlr) != 0)
	+ goto fail;
	+ if (nvme_ctrlr_hw_reset(ctrlr) != 0)
	+ goto fail;
	+
	+ /*
	+ * Now that we're reset the hardware, we can restart the controller. Any
	+ * I/O that was pending is requeued. Any admin commands are aborted with
	+ * an error. Once we've restarted, take the controller out of reset.
	+ */
	+ nvme_ctrlr_start(ctrlr, true);
	+ atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
	+
	+ return (0);
	+fail:
	+ /*
	+ * Since we can't bring the controller out of reset, announce and fail
	+ * the controller. However, we have to return success for the resume
	+ * itself, due to questionable APIs.
	+ */
	+ nvme_printf(ctrlr, "Failed to reset on resume, failing.\n");
	+ nvme_ctrlr_fail(ctrlr);
	+ atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
	+ return (0);
	}
	Index: head/sys/dev/nvme/nvme_pci.c
	===================================================================
	--- head/sys/dev/nvme/nvme_pci.c
	+++ head/sys/dev/nvme/nvme_pci.c
	@@ -43,6 +43,8 @@
	static int nvme_pci_probe(device_t);
	static int nvme_pci_attach(device_t);
	static int nvme_pci_detach(device_t);
	+static int nvme_pci_suspend(device_t);
	+static int nvme_pci_resume(device_t);

	static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);

	@@ -51,6 +53,8 @@
	DEVMETHOD(device_probe, nvme_pci_probe),
	DEVMETHOD(device_attach, nvme_pci_attach),
	DEVMETHOD(device_detach, nvme_pci_detach),
	+ DEVMETHOD(device_suspend, nvme_pci_suspend),
	+ DEVMETHOD(device_resume, nvme_pci_resume),
	DEVMETHOD(device_shutdown, nvme_shutdown),
	{ 0, 0 }
	};
	@@ -331,4 +335,22 @@
	}

	ctrlr->msix_enabled = 1;
	+}
	+
	+static int
	+nvme_pci_suspend(device_t dev)
	+{
	+ struct nvme_controller *ctrlr;
	+
	+ ctrlr = DEVICE2SOFTC(dev);
	+ return (nvme_ctrlr_suspend(ctrlr));
	+}
	+
	+static int
	+nvme_pci_resume(device_t dev)
	+{
	+ struct nvme_controller *ctrlr;
	+
	+ ctrlr = DEVICE2SOFTC(dev);
	+ return (nvme_ctrlr_resume(ctrlr));
	}
	Index: head/sys/dev/nvme/nvme_private.h
	===================================================================
	--- head/sys/dev/nvme/nvme_private.h
	+++ head/sys/dev/nvme/nvme_private.h
	@@ -556,4 +556,7 @@
	void nvme_ctrlr_intx_handler(void *arg);
	void nvme_ctrlr_poll(struct nvme_controller *ctrlr);

	+int nvme_ctrlr_suspend(struct nvme_controller *ctrlr);
	+int nvme_ctrlr_resume(struct nvme_controller *ctrlr);
	+
	#endif /* __NVME_PRIVATE_H__ */

File Metadata

Mime Type: text/plain
Expires: Mon, Jan 27, 1:07 PM (3 h, 7 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16197089
Default Alt Text: D21493.diff (8 KB)

D21493.diffNo OneTemporaryActions

D21493.diffView Options

File Metadata

Event Timeline

D21493.diff
No OneTemporary
Actions

D21493.diff
View Options