Index: head/sys/dev/mpr/mpr.c =================================================================== --- head/sys/dev/mpr/mpr.c +++ head/sys/dev/mpr/mpr.c @@ -2617,12 +2617,17 @@ } else { cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; - if (cm->cm_state != MPR_CM_STATE_TIMEDOUT) - cm->cm_state = MPR_CM_STATE_BUSY; - cm->cm_reply = reply; - cm->cm_reply_data = - le32toh(desc->AddressReply. - ReplyFrameAddress); + if (cm->cm_state == MPR_CM_STATE_INQUEUE) { + cm->cm_reply = reply; + cm->cm_reply_data = + le32toh(desc->AddressReply. + ReplyFrameAddress); + } else { + mpr_dprint(sc, MPR_RECOVERY, + "Bad state for ADDRESS_REPLY status," + " ignoring state %d cm %p\n", + cm->cm_state, cm); + } } break; } Index: head/sys/dev/mpr/mpr_sas.c =================================================================== --- head/sys/dev/mpr/mpr_sas.c +++ head/sys/dev/mpr/mpr_sas.c @@ -1692,7 +1692,7 @@ * and been re-used, though this is unlikely. */ mpr_intr_locked(sc); - if (cm->cm_state != MPR_CM_STATE_INQUEUE) { + if (cm->cm_flags & MPR_CM_FLAGS_ON_RECOVERY) { mprsas_log_command(cm, MPR_XINFO, "SCSI command %p almost timed out\n", cm); return; @@ -1721,7 +1721,7 @@ * operational. if not, do a diag reset. */ mprsas_set_ccbstatus(cm->cm_ccb, CAM_CMD_TIMEOUT); - cm->cm_state = MPR_CM_STATE_TIMEDOUT; + cm->cm_flags |= MPR_CM_FLAGS_ON_RECOVERY | MPR_CM_FLAGS_TIMEDOUT; TAILQ_INSERT_TAIL(&targ->timedout_commands, cm, cm_recovery); if (targ->tm != NULL) { @@ -2529,9 +2529,11 @@ TAILQ_REMOVE(&cm->cm_targ->commands, cm, cm_link); ccb->ccb_h.status &= ~(CAM_STATUS_MASK | CAM_SIM_QUEUED); - if (cm->cm_state == MPR_CM_STATE_TIMEDOUT) { + if (cm->cm_flags & MPR_CM_FLAGS_ON_RECOVERY) { TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery); - cm->cm_state = MPR_CM_STATE_BUSY; + KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, + ("Not busy for CM_FLAGS_TIMEDOUT: %d\n", cm->cm_state)); + cm->cm_flags &= ~MPR_CM_FLAGS_ON_RECOVERY; if (cm->cm_reply != NULL) mprsas_log_command(cm, MPR_RECOVERY, "completed timedout cm %p ccb %p during recovery " @@ -2817,7 +2819,7 @@ * retry counter), the only difference is what gets printed * on the console. */ - if (cm->cm_state == MPR_CM_STATE_TIMEDOUT) + if (cm->cm_flags & MPR_CM_FLAGS_TIMEDOUT) mprsas_set_ccbstatus(ccb, CAM_CMD_TIMEOUT); else mprsas_set_ccbstatus(ccb, CAM_REQ_ABORTED); Index: head/sys/dev/mpr/mpr_sas_lsi.c =================================================================== --- head/sys/dev/mpr/mpr_sas_lsi.c +++ head/sys/dev/mpr/mpr_sas_lsi.c @@ -1017,7 +1017,7 @@ cm = &sc->commands[i]; if (cm->cm_flags & MPR_CM_FLAGS_SATA_ID_TIMEOUT) { targ->timeouts++; - cm->cm_state = MPR_CM_STATE_TIMEDOUT; + cm->cm_flags |= MPR_CM_FLAGS_TIMEDOUT; if ((targ->tm = mprsas_alloc_tm(sc)) != NULL) { mpr_dprint(sc, MPR_INFO, "%s: sending Target " @@ -1244,9 +1244,11 @@ /* * The Abort Task cannot be sent from here because the driver has not * completed setting up targets. Instead, the command is flagged so - * that special handling will be used to send the abort. + * that special handling will be used to send the abort. Now that + * this command has timed out, it's no longer in the queue. */ cm->cm_flags |= MPR_CM_FLAGS_SATA_ID_TIMEOUT; + cm->cm_state = MPR_CM_STATE_BUSY; } static int Index: head/sys/dev/mpr/mprvar.h =================================================================== --- head/sys/dev/mpr/mprvar.h +++ head/sys/dev/mpr/mprvar.h @@ -275,11 +275,12 @@ #define MPR_CM_FLAGS_ERROR_MASK MPR_CM_FLAGS_CHAIN_FAILED #define MPR_CM_FLAGS_USE_CCB (1 << 9) #define MPR_CM_FLAGS_SATA_ID_TIMEOUT (1 << 10) +#define MPR_CM_FLAGS_ON_RECOVERY (1 << 12) +#define MPR_CM_FLAGS_TIMEDOUT (1 << 13) u_int cm_state; #define MPR_CM_STATE_FREE 0 #define MPR_CM_STATE_BUSY 1 -#define MPR_CM_STATE_TIMEDOUT 2 -#define MPR_CM_STATE_INQUEUE 3 +#define MPR_CM_STATE_INQUEUE 2 bus_dmamap_t cm_dmamap; struct scsi_sense_data *cm_sense; uint64_t *nvme_error_response; Index: head/sys/dev/mps/mps.c =================================================================== --- head/sys/dev/mps/mps.c +++ head/sys/dev/mps/mps.c @@ -2479,13 +2479,24 @@ (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { + /* + * Ignore commands not in INQUEUE state + * since they've already been completed + * via another path. + */ cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; - if (cm->cm_state != MPS_CM_STATE_TIMEDOUT) + if (cm->cm_state == MPS_CM_STATE_INQUEUE) { cm->cm_state = MPS_CM_STATE_BUSY; - cm->cm_reply = reply; - cm->cm_reply_data = le32toh( - desc->AddressReply.ReplyFrameAddress); + cm->cm_reply = reply; + cm->cm_reply_data = le32toh( + desc->AddressReply.ReplyFrameAddress); + } else { + mps_dprint(sc, MPS_RECOVERY, + "Bad state for ADDRESS_REPLY status," + " ignoring state %d cm %p\n", + cm->cm_state, cm); + } } break; } Index: head/sys/dev/mps/mps_sas.c =================================================================== --- head/sys/dev/mps/mps_sas.c +++ head/sys/dev/mps/mps_sas.c @@ -1602,7 +1602,7 @@ * and been re-used, though this is unlikely. */ mps_intr_locked(sc); - if (cm->cm_state != MPS_CM_STATE_INQUEUE) { + if (cm->cm_flags & MPS_CM_FLAGS_ON_RECOVERY) { mpssas_log_command(cm, MPS_XINFO, "SCSI command %p almost timed out\n", cm); return; @@ -1626,7 +1626,7 @@ * operational. if not, do a diag reset. */ mpssas_set_ccbstatus(cm->cm_ccb, CAM_CMD_TIMEOUT); - cm->cm_state = MPS_CM_STATE_TIMEDOUT; + cm->cm_flags |= MPS_CM_FLAGS_ON_RECOVERY | MPS_CM_FLAGS_TIMEDOUT; TAILQ_INSERT_TAIL(&targ->timedout_commands, cm, cm_recovery); if (targ->tm != NULL) { @@ -2040,9 +2040,11 @@ biotrack(ccb->csio.bio, __func__); #endif - if (cm->cm_state == MPS_CM_STATE_TIMEDOUT) { + if (cm->cm_flags & MPS_CM_FLAGS_ON_RECOVERY) { TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery); - cm->cm_state = MPS_CM_STATE_BUSY; + KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, + ("Not busy for CM_FLAGS_TIMEDOUT: %d\n", cm->cm_state)); + cm->cm_flags &= ~MPS_CM_FLAGS_ON_RECOVERY; if (cm->cm_reply != NULL) mpssas_log_command(cm, MPS_RECOVERY, "completed timedout cm %p ccb %p during recovery " @@ -2325,7 +2327,7 @@ * retry counter), the only difference is what gets printed * on the console. */ - if (cm->cm_state == MPS_CM_STATE_TIMEDOUT) + if (cm->cm_flags & MPS_CM_FLAGS_TIMEDOUT) mpssas_set_ccbstatus(ccb, CAM_CMD_TIMEOUT); else mpssas_set_ccbstatus(ccb, CAM_REQ_ABORTED); Index: head/sys/dev/mps/mps_sas_lsi.c =================================================================== --- head/sys/dev/mps/mps_sas_lsi.c +++ head/sys/dev/mps/mps_sas_lsi.c @@ -790,7 +790,7 @@ cm = &sc->commands[i]; if (cm->cm_flags & MPS_CM_FLAGS_SATA_ID_TIMEOUT) { targ->timeouts++; - cm->cm_state = MPS_CM_STATE_TIMEDOUT; + cm->cm_flags |= MPS_CM_FLAGS_TIMEDOUT; if ((targ->tm = mpssas_alloc_tm(sc)) != NULL) { mps_dprint(sc, MPS_INFO, "%s: sending Target " @@ -1017,9 +1017,11 @@ /* * The Abort Task cannot be sent from here because the driver has not * completed setting up targets. Instead, the command is flagged so - * that special handling will be used to send the abort. + * that special handling will be used to send the abort. Now that + * this command has timed out, it's no longer in the queue. */ cm->cm_flags |= MPS_CM_FLAGS_SATA_ID_TIMEOUT; + cm->cm_state = MPS_CM_STATE_BUSY; } static int Index: head/sys/dev/mps/mpsvar.h =================================================================== --- head/sys/dev/mps/mpsvar.h +++ head/sys/dev/mps/mpsvar.h @@ -242,11 +242,12 @@ #define MPS_CM_FLAGS_ERROR_MASK MPS_CM_FLAGS_CHAIN_FAILED #define MPS_CM_FLAGS_USE_CCB (1 << 10) #define MPS_CM_FLAGS_SATA_ID_TIMEOUT (1 << 11) +#define MPS_CM_FLAGS_ON_RECOVERY (1 << 12) +#define MPS_CM_FLAGS_TIMEDOUT (1 << 13) u_int cm_state; #define MPS_CM_STATE_FREE 0 #define MPS_CM_STATE_BUSY 1 -#define MPS_CM_STATE_TIMEDOUT 2 -#define MPS_CM_STATE_INQUEUE 3 +#define MPS_CM_STATE_INQUEUE 2 bus_dmamap_t cm_dmamap; struct scsi_sense_data *cm_sense; TAILQ_HEAD(, mps_chain) cm_chain_list; @@ -545,7 +546,8 @@ { struct mps_chain *chain, *chain_temp; - KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n")); + KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, + ("state not busy: %d\n", cm->cm_state)); if (cm->cm_reply != NULL) mps_free_reply(sc, cm->cm_reply_data); @@ -581,7 +583,7 @@ return (NULL); KASSERT(cm->cm_state == MPS_CM_STATE_FREE, - ("mps: Allocating busy command\n")); + ("mps: Allocating busy command: %d\n", cm->cm_state)); TAILQ_REMOVE(&sc->req_list, cm, cm_link); cm->cm_state = MPS_CM_STATE_BUSY; @@ -594,7 +596,8 @@ { struct mps_chain *chain, *chain_temp; - KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n")); + KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, + ("state not busy: %d\n", cm->cm_state)); if (cm->cm_reply != NULL) mps_free_reply(sc, cm->cm_reply_data); @@ -623,7 +626,7 @@ return (NULL); KASSERT(cm->cm_state == MPS_CM_STATE_FREE, - ("mps: Allocating busy command\n")); + ("mps: Allocating high priority busy command: %d\n", cm->cm_state)); TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link); cm->cm_state = MPS_CM_STATE_BUSY;