Changeset View
Standalone View
sys/dev/nvme/nvme_ctrlr.c
Show First 20 Lines • Show All 238 Lines • ▼ Show 20 Lines | |||||
static int | static int | ||||
nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val) | nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val) | ||||
{ | { | ||||
int ms_waited; | int ms_waited; | ||||
union cc_register cc; | union cc_register cc; | ||||
union csts_register csts; | union csts_register csts; | ||||
cc.raw = nvme_mmio_read_4(ctrlr, cc); | cc.raw = nvme_mmio_read_4(ctrlr, cc); | ||||
imp: This is now unused, remove it. | |||||
csts.raw = nvme_mmio_read_4(ctrlr, csts); | csts.raw = nvme_mmio_read_4(ctrlr, csts); | ||||
if (cc.bits.en != desired_val) { | ms_waited = 0; | ||||
nvme_printf(ctrlr, "%s called with desired_val = %d " | while (cc.bits.en != desired_val) { | ||||
"but cc.en = %d\n", __func__, desired_val, cc.bits.en); | if (ms_waited++ > ctrlr->enable_timeout_in_ms) { | ||||
nvme_printf(ctrlr, "controller enable did not become %d " | |||||
"within %d ms\n", desired_val, ctrlr->enable_timeout_in_ms); | |||||
return (ENXIO); | return (ENXIO); | ||||
} | } | ||||
DELAY(1000); | |||||
cc.raw = nvme_mmio_read_4(ctrlr, cc); | |||||
Not Done Inline ActionsI suspect what the code was trying to do here was follow the CC.EN guidance in the spec (section 3.1.5 of the 1.3 spec) that transitioning CC.EN from 0 to 1 when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when CSTS.RDY is 0 "has undefined results". If that is the case, there should be code similar to: if (csts.bits.rdy == desired) { nvme_printf(ctrlr, "%s: cannot transition in undefined state " "(cc.en -> %u when csts.rdy = %u)\n", __func__, desired, csts.bits.rdy); return (ENXIO); } in nvme_ctrlr_disable() and nvme_ctrlr_enable(), but not here. chuck: I suspect what the code was trying to do here was follow the CC.EN guidance in the spec… | |||||
Not Done Inline ActionsThe old code didn't do that at all... But this is a good suggestion. imp: The old code didn't do that at all... But this is a good suggestion. | |||||
Not Done Inline ActionsAh, I see what you are saying... The code was trying to enforce that as a precondition.... However, since we just changed things, there's a race here, I think, so I'll leave this check out. imp: Ah, I see what you are saying... The code was trying to enforce that as a precondition.... | |||||
} | |||||
ms_waited = 0; | ms_waited = 0; | ||||
while (csts.bits.rdy != desired_val) { | while (csts.bits.rdy != desired_val) { | ||||
DELAY(1000); | |||||
if (ms_waited++ > ctrlr->ready_timeout_in_ms) { | if (ms_waited++ > ctrlr->ready_timeout_in_ms) { | ||||
nvme_printf(ctrlr, "controller ready did not become %d " | nvme_printf(ctrlr, "controller ready did not become %d " | ||||
"within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms); | "within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms); | ||||
return (ENXIO); | return (ENXIO); | ||||
} | } | ||||
DELAY(1000); | |||||
csts.raw = nvme_mmio_read_4(ctrlr, csts); | csts.raw = nvme_mmio_read_4(ctrlr, csts); | ||||
} | } | ||||
return (0); | return (0); | ||||
} | } | ||||
static void | static int | ||||
nvme_ctrlr_disable(struct nvme_controller *ctrlr) | nvme_ctrlr_disable(struct nvme_controller *ctrlr) | ||||
{ | { | ||||
union cc_register cc; | union cc_register cc; | ||||
union csts_register csts; | union csts_register csts; | ||||
int err; | |||||
cc.raw = nvme_mmio_read_4(ctrlr, cc); | cc.raw = nvme_mmio_read_4(ctrlr, cc); | ||||
csts.raw = nvme_mmio_read_4(ctrlr, csts); | csts.raw = nvme_mmio_read_4(ctrlr, csts); | ||||
if (cc.bits.en == 1 && csts.bits.rdy == 0) | if (cc.bits.en == 1 && csts.bits.rdy == 0) { | ||||
nvme_ctrlr_wait_for_ready(ctrlr, 1); | err = nvme_ctrlr_wait_for_ready(ctrlr, 1); | ||||
if (err != 0) | |||||
return (err); | |||||
cc.bits.en = 0; | cc.bits.en = 0; | ||||
nvme_mmio_write_4(ctrlr, cc, cc.raw); | nvme_mmio_write_4(ctrlr, cc, cc.raw); | ||||
DELAY(5000); | return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); | ||||
nvme_ctrlr_wait_for_ready(ctrlr, 0); | |||||
} | } | ||||
static int | static int | ||||
nvme_ctrlr_enable(struct nvme_controller *ctrlr) | nvme_ctrlr_enable(struct nvme_controller *ctrlr) | ||||
Not Done Inline ActionsIf EN and RDY are out of sync, the user probably needs to issue a reset to recover the card. Would it be useful to log this? chuck: If EN and RDY are out of sync, the user probably needs to issue a reset to recover the card. | |||||
{ | { | ||||
union cc_register cc; | union cc_register cc; | ||||
union csts_register csts; | union csts_register csts; | ||||
union aqa_register aqa; | union aqa_register aqa; | ||||
cc.raw = nvme_mmio_read_4(ctrlr, cc); | cc.raw = nvme_mmio_read_4(ctrlr, cc); | ||||
csts.raw = nvme_mmio_read_4(ctrlr, csts); | csts.raw = nvme_mmio_read_4(ctrlr, csts); | ||||
if (cc.bits.en == 1) { | if (cc.bits.en == 1) { | ||||
if (csts.bits.rdy == 1) | if (csts.bits.rdy == 1) | ||||
return (0); | return (0); | ||||
else | else | ||||
return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); | return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); | ||||
} | } | ||||
Not Done Inline ActionsShould this check CSTS.RDY? Something like if (csts.bit.rdy == 1) { /* EN == 0 already wait for RDY == 0 or fail */ err = nvme_ctrlr_wait_for_ready(ctrlr, 0); if (err != 0) return (err); } chuck: Should this check CSTS.RDY? Something like
```
if (csts.bit.rdy == 1) {
/* EN == 0… | |||||
nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); | nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); | ||||
DELAY(5000); | DELAY(5000); | ||||
nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); | nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); | ||||
DELAY(5000); | DELAY(5000); | ||||
aqa.raw = 0; | aqa.raw = 0; | ||||
/* acqs and asqs are 0-based. */ | /* acqs and asqs are 0-based. */ | ||||
aqa.bits.acqs = ctrlr->adminq.num_entries-1; | aqa.bits.acqs = ctrlr->adminq.num_entries-1; | ||||
aqa.bits.asqs = ctrlr->adminq.num_entries-1; | aqa.bits.asqs = ctrlr->adminq.num_entries-1; | ||||
nvme_mmio_write_4(ctrlr, aqa, aqa.raw); | nvme_mmio_write_4(ctrlr, aqa, aqa.raw); | ||||
DELAY(5000); | DELAY(5000); | ||||
cc.bits.en = 1; | cc.bits.en = 1; | ||||
cc.bits.css = 0; | cc.bits.css = 0; | ||||
cc.bits.ams = 0; | cc.bits.ams = 0; | ||||
cc.bits.shn = 0; | cc.bits.shn = 0; | ||||
cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ | cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ | ||||
cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ | cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ | ||||
/* This evaluates to 0, which is according to spec. */ | /* This evaluates to 0, which is according to spec. */ | ||||
cc.bits.mps = (PAGE_SIZE >> 13); | cc.bits.mps = (PAGE_SIZE >> 13); | ||||
nvme_mmio_write_4(ctrlr, cc, cc.raw); | nvme_mmio_write_4(ctrlr, cc, cc.raw); | ||||
DELAY(5000); | |||||
return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); | return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); | ||||
} | } | ||||
int | int | ||||
nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) | nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) | ||||
{ | { | ||||
int i; | int i; | ||||
nvme_admin_qpair_disable(&ctrlr->adminq); | nvme_admin_qpair_disable(&ctrlr->adminq); | ||||
/* | /* | ||||
* I/O queues are not allocated before the initial HW | * I/O queues are not allocated before the initial HW | ||||
* reset, so do not try to disable them. Use is_initialized | * reset, so do not try to disable them. Use is_initialized | ||||
* to determine if this is the initial HW reset. | * to determine if this is the initial HW reset. | ||||
*/ | */ | ||||
if (ctrlr->is_initialized) { | if (ctrlr->is_initialized) { | ||||
for (i = 0; i < ctrlr->num_io_queues; i++) | for (i = 0; i < ctrlr->num_io_queues; i++) | ||||
nvme_io_qpair_disable(&ctrlr->ioq[i]); | nvme_io_qpair_disable(&ctrlr->ioq[i]); | ||||
} | } | ||||
DELAY(100*1000); | DELAY(100*1000); | ||||
nvme_ctrlr_disable(ctrlr); | err = nvme_ctrlr_disable(ctrlr); | ||||
if (err != 0) | |||||
return err; | |||||
return (nvme_ctrlr_enable(ctrlr)); | return (nvme_ctrlr_enable(ctrlr)); | ||||
} | } | ||||
void | void | ||||
nvme_ctrlr_reset(struct nvme_controller *ctrlr) | nvme_ctrlr_reset(struct nvme_controller *ctrlr) | ||||
{ | { | ||||
int cmpset; | int cmpset; | ||||
▲ Show 20 Lines • Show All 757 Lines • ▼ Show 20 Lines | nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) | ||||
if (cap_hi.bits.dstrd != 0) | if (cap_hi.bits.dstrd != 0) | ||||
return (ENXIO); | return (ENXIO); | ||||
ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin); | ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin); | ||||
/* Get ready timeout value from controller, in units of 500ms. */ | /* Get ready timeout value from controller, in units of 500ms. */ | ||||
cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); | cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); | ||||
ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; | ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; | ||||
ctrlr->enable_timeout_in_ms = 2500; /* Wait up to 2.5s for EN bit delta */ | |||||
Not Done Inline ActionsVendors specify the maximum time for CSTS.RDY and CSTS.SHST transitions in CAP.TO, and by in large, test that their products meet this value. While the other changes seem good, this change forces the driver to wait more than the vendor specifies and longer than the spec mandates. The original code here is what I would do and have done. chuck: Vendors specify the maximum time for CSTS.RDY and CSTS.SHST transitions in CAP.TO, and by in… | |||||
Not Done Inline ActionsI'm not sure I get this comment. We still use ready_timeout_in_ms which is derived from CAP.TO. What enable_timeout does is for the CC.EN readback after we've set it to make sure that it transitions before we start the timer on the RDY transition (which is still bounded by ready_timeout_in_ms). I couldn't find a spec for how long it will take for EN to transition. imp: I'm not sure I get this comment.
We still use ready_timeout_in_ms which is derived from CAP.TO. | |||||
Not Done Inline ActionsBah, I misunderstood what the code was trying to do. Best I can tell, the Linux code doesn't check that the register write to CC completed for either the enable or disable case. My interpretation of the Linux code is that it allows extra time for the quirked device to set CSTS.RDY == 0. Since NVMe doesn't specify when implementation must update the EN field, there's an argument that says the code which waits for this field to be set is not guaranteed to be correct for compliant implementations. This would argue for removing the check of cc.bits.en altogether. Also, wouldn't PCIe transaction ordering rules prevent the read of CSTS (non-posted) from passing the write to CC (posted)? If the troublesome device needs additional time to disable, wouldn't it be better to add a quirk for the device and then add an additional DELAY() in nvme_ctrlr_disable() for those devices? chuck: Bah, I misunderstood what the code was trying to do.
Best I can tell, the Linux code doesn't… | |||||
Not Done Inline ActionsI hate quirks. They are super ugly. Linux always sleeps 2.3s on disable for these devices. Also, it's hard to know that you need to wait until you've had one FAIL, notice that the failure was because the timeout was insufficient, and add a quirk. And if your OPS group is lame, it may mean you have to wait for several failures to happen before it makes it back to you for FA. imp: I hate quirks. They are super ugly. Linux always sleeps 2.3s on disable for these devices. | |||||
timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD; | timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD; | ||||
TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period); | TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period); | ||||
timeout_period = min(timeout_period, NVME_MAX_TIMEOUT_PERIOD); | timeout_period = min(timeout_period, NVME_MAX_TIMEOUT_PERIOD); | ||||
timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD); | timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD); | ||||
ctrlr->timeout_period = timeout_period; | ctrlr->timeout_period = timeout_period; | ||||
nvme_retry_count = NVME_DEFAULT_RETRY_COUNT; | nvme_retry_count = NVME_DEFAULT_RETRY_COUNT; | ||||
▲ Show 20 Lines • Show All 138 Lines • Show Last 20 Lines |
This is now unused, remove it.