diff --git a/sys/dev/mpr/mpr.c b/sys/dev/mpr/mpr.c --- a/sys/dev/mpr/mpr.c +++ b/sys/dev/mpr/mpr.c @@ -1141,7 +1141,8 @@ if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; - KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("command not busy\n")); + KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, + ("command not busy, state = %u\n", cm->cm_state)); cm->cm_state = MPR_CM_STATE_INQUEUE; if (sc->atomic_desc_capable) { @@ -1917,6 +1918,11 @@ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT, sc, 0, mpr_dump_reqs, "I", "Dump Active Requests"); + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), + OID_AUTO, "dump_reqs_alltypes", CTLFLAG_RW, + &sc->dump_reqs_alltypes, 0, + "dump all request types not just inqueue"); + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0, "Use the phy number for enumeration"); @@ -2101,7 +2107,7 @@ /* Best effort, no locking */ for (i = smid; i < numreqs; i++) { cm = &sc->commands[i]; - if (cm->cm_state != state) + if ((sc->dump_reqs_alltypes == 0) && (cm->cm_state != state)) continue; hdr.smid = i; hdr.state = cm->cm_state; @@ -2365,6 +2371,8 @@ return; } + KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE, + ("command not inqueue, state = %u\n", cm->cm_state)); cm->cm_state = MPR_CM_STATE_BUSY; if (cm->cm_flags & MPR_CM_FLAGS_POLLED) cm->cm_flags |= MPR_CM_FLAGS_COMPLETE; @@ -2544,9 +2552,6 @@ case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS: case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; - KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE, - ("command not inqueue\n")); - cm->cm_state = MPR_CM_STATE_BUSY; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: diff --git a/sys/dev/mpr/mpr_sas.c b/sys/dev/mpr/mpr_sas.c --- a/sys/dev/mpr/mpr_sas.c +++ b/sys/dev/mpr/mpr_sas.c @@ -1194,7 +1194,7 @@ "out\n", tm); KASSERT(tm->cm_state == MPR_CM_STATE_INQUEUE, - ("command not inqueue\n")); + ("command not inqueue, state = %u\n", tm->cm_state)); tm->cm_state = MPR_CM_STATE_BUSY; mpr_reinit(sc); @@ -2437,7 +2437,7 @@ if (cm->cm_flags & MPR_CM_FLAGS_ON_RECOVERY) { TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery); KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, - ("Not busy for CM_FLAGS_TIMEDOUT: %d\n", cm->cm_state)); + ("Not busy for CM_FLAGS_TIMEDOUT: %u\n", cm->cm_state)); cm->cm_flags &= ~MPR_CM_FLAGS_ON_RECOVERY; if (cm->cm_reply != NULL) mprsas_log_command(cm, MPR_RECOVERY, diff --git a/sys/dev/mpr/mpr_sas_lsi.c b/sys/dev/mpr/mpr_sas_lsi.c --- a/sys/dev/mpr/mpr_sas_lsi.c +++ b/sys/dev/mpr/mpr_sas_lsi.c @@ -125,6 +125,7 @@ static int mprsas_get_sata_identify(struct mpr_softc *sc, u16 handle, Mpi2SataPassthroughReply_t *mpi_reply, char *id_buffer, int sz, u32 devinfo); +static void mprsas_ata_id_complete(struct mpr_softc *, struct mpr_command *); static void mprsas_ata_id_timeout(struct mpr_softc *, struct mpr_command *); int mprsas_get_sas_address_for_sata_disk(struct mpr_softc *sc, u64 *sas_address, u16 handle, u32 device_info, u8 *is_SATA_SSD); @@ -1005,7 +1006,8 @@ * An Abort Task TM should be used instead of a Target Reset, but that * would be much more difficult because targets have not been fully * discovered yet, and LUN's haven't been setup. So, just reset the - * target instead of the LUN. + * target instead of the LUN. The commands should complete once + * the target has been reset. */ for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; @@ -1033,16 +1035,6 @@ } } out: - /* - * Free the commands that may not have been freed from the SATA ID call - */ - for (i = 1; i < sc->num_reqs; i++) { - cm = &sc->commands[i]; - if (cm->cm_flags & MPR_CM_FLAGS_SATA_ID_TIMEOUT) { - free(cm->cm_data, M_MPR); - mpr_free_command(sc, cm); - } - } mprsas_startup_decrement(sassc); return (error); } @@ -1218,8 +1210,8 @@ out: /* * If the SATA_ID_TIMEOUT flag has been set for this command, don't free - * it. The command and buffer will be freed after sending an Abort - * Task TM. + * it. The command and buffer will be freed after we send a Target + * Reset TM and the command comes back from the controller. */ if ((cm->cm_flags & MPR_CM_FLAGS_SATA_ID_TIMEOUT) == 0) { mpr_free_command(sc, cm); @@ -1228,6 +1220,22 @@ return (error); } +/* + * This is completion handler to make sure that commands and allocated + * buffers get freed when timed out SATA ID commands finally complete after + * we've reset the target. In the normal case, we wait for the command to + * complete. + */ +static void +mprsas_ata_id_complete(struct mpr_softc *sc, struct mpr_command *cm) +{ + mpr_dprint(sc, MPR_INFO, "%s ATA ID completed late cm %p sc %p\n", + __func__, cm, sc); + + free(cm->cm_data, M_MPR); + mpr_free_command(sc, cm); +} + static void mprsas_ata_id_timeout(struct mpr_softc *sc, struct mpr_command *cm) { @@ -1242,7 +1250,12 @@ * this command has timed out, it's no longer in the queue. */ cm->cm_flags |= MPR_CM_FLAGS_SATA_ID_TIMEOUT; - cm->cm_state = MPR_CM_STATE_BUSY; + + /* + * Since we will no longer be waiting for the command to complete, + * set a completion handler to make sure we free all resources. + */ + cm->cm_complete = mprsas_ata_id_complete; } static int diff --git a/sys/dev/mpr/mprvar.h b/sys/dev/mpr/mprvar.h --- a/sys/dev/mpr/mprvar.h +++ b/sys/dev/mpr/mprvar.h @@ -367,6 +367,7 @@ u_int enable_ssu; int spinup_wait_time; int use_phynum; + int dump_reqs_alltypes; uint64_t chain_alloc_fail; uint64_t prp_page_alloc_fail; struct sysctl_ctx_list sysctl_ctx; @@ -617,7 +618,8 @@ struct mpr_chain *chain, *chain_temp; struct mpr_prp_page *prp_page, *prp_page_temp; - KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n")); + KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, + ("state not busy, state = %u\n", cm->cm_state)); if (cm->cm_reply != NULL) mpr_free_reply(sc, cm->cm_reply_data); @@ -658,7 +660,7 @@ return (NULL); KASSERT(cm->cm_state == MPR_CM_STATE_FREE, - ("mpr: Allocating busy command\n")); + ("mpr: Allocating busy command, state = %u\n", cm->cm_state)); TAILQ_REMOVE(&sc->req_list, cm, cm_link); cm->cm_state = MPR_CM_STATE_BUSY; @@ -671,7 +673,8 @@ { struct mpr_chain *chain, *chain_temp; - KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n")); + KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, + ("state not busy, state = %u\n", cm->cm_state)); if (cm->cm_reply != NULL) mpr_free_reply(sc, cm->cm_reply_data); @@ -700,7 +703,7 @@ return (NULL); KASSERT(cm->cm_state == MPR_CM_STATE_FREE, - ("mpr: Allocating busy command\n")); + ("mpr: Allocating busy command, state = %u\n", cm->cm_state)); TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link); cm->cm_state = MPR_CM_STATE_BUSY; diff --git a/sys/dev/mps/mps.c b/sys/dev/mps/mps.c --- a/sys/dev/mps/mps.c +++ b/sys/dev/mps/mps.c @@ -1112,7 +1112,8 @@ rd.u.high = cm->cm_desc.Words.High; rd.word = htole64(rd.word); - KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("command not busy\n")); + KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, + ("command not busy, state = %u\n", cm->cm_state)); cm->cm_state = MPS_CM_STATE_INQUEUE; /* TODO-We may need to make below regwrite atomic */ @@ -1774,6 +1775,11 @@ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT, sc, 0, mps_dump_reqs, "I", "Dump Active Requests"); + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), + OID_AUTO, "dump_reqs_alltypes", CTLFLAG_RW, + &sc->dump_reqs_alltypes, 0, + "dump all request types not just inqueue"); + SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0, "Use the phy number for enumeration"); @@ -1947,7 +1953,7 @@ /* Best effort, no locking */ for (i = smid; i < numreqs; i++) { cm = &sc->commands[i]; - if (cm->cm_state != state) + if ((sc->dump_reqs_alltypes == 0) && (cm->cm_state != state)) continue; hdr.smid = i; hdr.state = cm->cm_state; @@ -2206,6 +2212,9 @@ return; } + KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE, + ("command not inqueue, state = %u\n", cm->cm_state)); + cm->cm_state = MPS_CM_STATE_BUSY; if (cm->cm_flags & MPS_CM_FLAGS_POLLED) cm->cm_flags |= MPS_CM_FLAGS_COMPLETE; @@ -2382,9 +2391,6 @@ switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; - KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE, - ("command not inqueue\n")); - cm->cm_state = MPS_CM_STATE_BUSY; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: @@ -2460,7 +2466,6 @@ cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; if (cm->cm_state == MPS_CM_STATE_INQUEUE) { - cm->cm_state = MPS_CM_STATE_BUSY; cm->cm_reply = reply; cm->cm_reply_data = le32toh( desc->AddressReply.ReplyFrameAddress); diff --git a/sys/dev/mps/mps_sas.c b/sys/dev/mps/mps_sas.c --- a/sys/dev/mps/mps_sas.c +++ b/sys/dev/mps/mps_sas.c @@ -1170,7 +1170,7 @@ "task mgmt %p timed out\n", tm); KASSERT(tm->cm_state == MPS_CM_STATE_INQUEUE, - ("command not inqueue\n")); + ("command not inqueue, state = %u\n", tm->cm_state)); tm->cm_state = MPS_CM_STATE_BUSY; mps_reinit(sc); @@ -2015,7 +2015,7 @@ if (cm->cm_flags & MPS_CM_FLAGS_ON_RECOVERY) { TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery); KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, - ("Not busy for CM_FLAGS_TIMEDOUT: %d\n", cm->cm_state)); + ("Not busy for CM_FLAGS_TIMEDOUT: %u\n", cm->cm_state)); cm->cm_flags &= ~MPS_CM_FLAGS_ON_RECOVERY; if (cm->cm_reply != NULL) mpssas_log_command(cm, MPS_RECOVERY, diff --git a/sys/dev/mps/mps_sas_lsi.c b/sys/dev/mps/mps_sas_lsi.c --- a/sys/dev/mps/mps_sas_lsi.c +++ b/sys/dev/mps/mps_sas_lsi.c @@ -123,6 +123,7 @@ static int mpssas_get_sata_identify(struct mps_softc *sc, u16 handle, Mpi2SataPassthroughReply_t *mpi_reply, char *id_buffer, int sz, u32 devinfo); +static void mpssas_ata_id_complete(struct mps_softc *, struct mps_command *); static void mpssas_ata_id_timeout(struct mps_softc *, struct mps_command *); int mpssas_get_sas_address_for_sata_disk(struct mps_softc *sc, u64 *sas_address, u16 handle, u32 device_info, u8 *is_SATA_SSD); @@ -780,7 +781,8 @@ * An Abort Task TM should be used instead of a Target Reset, but that * would be much more difficult because targets have not been fully * discovered yet, and LUN's haven't been setup. So, just reset the - * target instead of the LUN. + * target instead of the LUN. The commands should complete once the + * target has been reset. */ for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; @@ -808,16 +810,6 @@ } } out: - /* - * Free the commands that may not have been freed from the SATA ID call - */ - for (i = 1; i < sc->num_reqs; i++) { - cm = &sc->commands[i]; - if (cm->cm_flags & MPS_CM_FLAGS_SATA_ID_TIMEOUT) { - free(cm->cm_data, M_MPT2); - mps_free_command(sc, cm); - } - } mpssas_startup_decrement(sassc); return (error); } @@ -993,8 +985,8 @@ out: /* * If the SATA_ID_TIMEOUT flag has been set for this command, don't free - * it. The command and buffer will be freed after sending an Abort - * Task TM. + * it. The command and buffer will be freed after we send a Target + * Reset TM and the command comes back from the controller. */ if ((cm->cm_flags & MPS_CM_FLAGS_SATA_ID_TIMEOUT) == 0) { mps_free_command(sc, cm); @@ -1003,21 +995,41 @@ return (error); } +/* + * This is completion handler to make sure that commands and allocated + * buffers get freed when timed out SATA ID commands finally complete after + * we've reset the target. In the normal case, we wait for the command to + * complete. + */ static void -mpssas_ata_id_timeout(struct mps_softc *sc, struct mps_command *cm) +mpssas_ata_id_complete(struct mps_softc *sc, struct mps_command *cm) { + mps_dprint(sc, MPS_INFO, "%s ATA ID completed late cm %p sc %p\n", + __func__, cm, sc); + + free(cm->cm_data, M_MPT2); + mps_free_command(sc, cm); +} + +static void +mpssas_ata_id_timeout(struct mps_softc *sc, struct mps_command *cm) +{ mps_dprint(sc, MPS_INFO, "%s ATA ID command timeout cm %p sc %p\n", __func__, cm, sc); /* * The Abort Task cannot be sent from here because the driver has not * completed setting up targets. Instead, the command is flagged so - * that special handling will be used to send the abort. Now that - * this command has timed out, it's no longer in the queue. + * that special handling will be used to send a target reset. */ cm->cm_flags |= MPS_CM_FLAGS_SATA_ID_TIMEOUT; - cm->cm_state = MPS_CM_STATE_BUSY; + + /* + * Since we will no longer be waiting for the command to complete, + * set a completion handler to make sure we free all resources. + */ + cm->cm_complete = mpssas_ata_id_complete; } static int diff --git a/sys/dev/mps/mpsvar.h b/sys/dev/mps/mpsvar.h --- a/sys/dev/mps/mpsvar.h +++ b/sys/dev/mps/mpsvar.h @@ -326,6 +326,7 @@ u_int enable_ssu; int spinup_wait_time; int use_phynum; + int dump_reqs_alltypes; uint64_t chain_alloc_fail; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; @@ -548,7 +549,7 @@ struct mps_chain *chain, *chain_temp; KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, - ("state not busy: %d\n", cm->cm_state)); + ("state not busy: %u\n", cm->cm_state)); if (cm->cm_reply != NULL) mps_free_reply(sc, cm->cm_reply_data); @@ -584,7 +585,7 @@ return (NULL); KASSERT(cm->cm_state == MPS_CM_STATE_FREE, - ("mps: Allocating busy command: %d\n", cm->cm_state)); + ("mps: Allocating busy command: %u\n", cm->cm_state)); TAILQ_REMOVE(&sc->req_list, cm, cm_link); cm->cm_state = MPS_CM_STATE_BUSY; @@ -598,7 +599,7 @@ struct mps_chain *chain, *chain_temp; KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, - ("state not busy: %d\n", cm->cm_state)); + ("state not busy: %u\n", cm->cm_state)); if (cm->cm_reply != NULL) mps_free_reply(sc, cm->cm_reply_data); @@ -627,7 +628,7 @@ return (NULL); KASSERT(cm->cm_state == MPS_CM_STATE_FREE, - ("mps: Allocating high priority busy command: %d\n", cm->cm_state)); + ("mps: Allocating high priority busy command: %u\n", cm->cm_state)); TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link); cm->cm_state = MPS_CM_STATE_BUSY;