diff --git a/sys/dev/bnxt/bnxt_en/bnxt.h b/sys/dev/bnxt/bnxt_en/bnxt.h --- a/sys/dev/bnxt/bnxt_en/bnxt.h +++ b/sys/dev/bnxt/bnxt_en/bnxt.h @@ -144,6 +144,23 @@ #define BNXT_EVENT_THERMAL_CURRENT_TEMP(data2) \ ((data2) & HWRM_ASYNC_EVENT_CMPL_ERROR_REPORT_THERMAL_EVENT_DATA2_CURRENT_TEMP_MASK) +#define EVENT_DATA1_RESET_NOTIFY_FW_ACTIVATION(data1) \ + (((data1) & \ + HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\ + HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION) + +#define EVENT_DATA2_RESET_NOTIFY_FW_STATUS_CODE(data2) \ + ((data2) & \ + HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK) + +#define EVENT_DATA1_RECOVERY_ENABLED(data1) \ + !!((data1) & \ + HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED) + +#define EVENT_DATA1_RECOVERY_MASTER_FUNC(data1) \ + !!((data1) & \ + HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC) + #define INVALID_STATS_CTX_ID -1 /* Maximum numbers of RX and TX descriptors. iflib requires this to be a power @@ -834,6 +851,124 @@ uint32_t vector; }; +enum bnxt_health_severity { + SEVERITY_NORMAL = 0, + SEVERITY_WARNING, + SEVERITY_RECOVERABLE, + SEVERITY_FATAL, +}; + +enum bnxt_health_remedy { + REMEDY_DEVLINK_RECOVER, + REMEDY_POWER_CYCLE_DEVICE, + REMEDY_POWER_CYCLE_HOST, + REMEDY_FW_UPDATE, + REMEDY_HW_REPLACE, +}; + +struct bnxt_fw_health { + u32 flags; + u32 polling_dsecs; + u32 master_func_wait_dsecs; + u32 normal_func_wait_dsecs; + u32 post_reset_wait_dsecs; + u32 post_reset_max_wait_dsecs; + u32 regs[4]; + u32 mapped_regs[4]; +#define BNXT_FW_HEALTH_REG 0 +#define BNXT_FW_HEARTBEAT_REG 1 +#define BNXT_FW_RESET_CNT_REG 2 +#define BNXT_FW_RESET_INPROG_REG 3 + u32 fw_reset_inprog_reg_mask; + u32 last_fw_heartbeat; + u32 last_fw_reset_cnt; + u8 enabled:1; + u8 primary:1; + u8 status_reliable:1; + u8 resets_reliable:1; + u8 tmr_multiplier; + u8 tmr_counter; + u8 fw_reset_seq_cnt; + u32 fw_reset_seq_regs[16]; + u32 fw_reset_seq_vals[16]; + u32 fw_reset_seq_delay_msec[16]; + u32 echo_req_data1; + u32 echo_req_data2; + struct devlink_health_reporter *fw_reporter; + struct mutex lock; + enum bnxt_health_severity severity; + enum bnxt_health_remedy remedy; + u32 arrests; + u32 discoveries; + u32 survivals; + u32 fatalities; + u32 diagnoses; +}; + +#define BNXT_FW_HEALTH_REG_TYPE_MASK 3 +#define BNXT_FW_HEALTH_REG_TYPE_CFG 0 +#define BNXT_FW_HEALTH_REG_TYPE_GRC 1 +#define BNXT_FW_HEALTH_REG_TYPE_BAR0 2 +#define BNXT_FW_HEALTH_REG_TYPE_BAR1 3 + +#define BNXT_FW_HEALTH_REG_TYPE(reg) ((reg) & BNXT_FW_HEALTH_REG_TYPE_MASK) +#define BNXT_FW_HEALTH_REG_OFF(reg) ((reg) & ~BNXT_FW_HEALTH_REG_TYPE_MASK) + +#define BNXT_FW_HEALTH_WIN_BASE 0x3000 +#define BNXT_FW_HEALTH_WIN_MAP_OFF 8 + +#define BNXT_FW_HEALTH_WIN_OFF(reg) (BNXT_FW_HEALTH_WIN_BASE + \ + ((reg) & BNXT_GRC_OFFSET_MASK)) + +#define BNXT_FW_STATUS_HEALTH_MSK 0xffff +#define BNXT_FW_STATUS_HEALTHY 0x8000 +#define BNXT_FW_STATUS_SHUTDOWN 0x100000 +#define BNXT_FW_STATUS_RECOVERING 0x400000 + +#define BNXT_FW_IS_HEALTHY(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_IS_BOOTING(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) < \ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_IS_ERR(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_IS_RECOVERING(sts) (BNXT_FW_IS_ERR(sts) && \ + ((sts) & BNXT_FW_STATUS_RECOVERING)) + +#define BNXT_FW_RETRY 5 +#define BNXT_FW_IF_RETRY 10 +#define BNXT_FW_SLOT_RESET_RETRY 4 + +#define BNXT_GRCPF_REG_CHIMP_COMM 0x0 +#define BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER 0x100 +#define BNXT_GRCPF_REG_WINDOW_BASE_OUT 0x400 +#define BNXT_GRCPF_REG_SYNC_TIME 0x480 +#define BNXT_GRCPF_REG_SYNC_TIME_ADJ 0x488 +#define BNXT_GRCPF_REG_SYNC_TIME_ADJ_PER_MSK 0xffffffUL +#define BNXT_GRCPF_REG_SYNC_TIME_ADJ_PER_SFT 0 +#define BNXT_GRCPF_REG_SYNC_TIME_ADJ_VAL_MSK 0x1f000000UL +#define BNXT_GRCPF_REG_SYNC_TIME_ADJ_VAL_SFT 24 +#define BNXT_GRCPF_REG_SYNC_TIME_ADJ_SIGN_MSK 0x20000000UL +#define BNXT_GRCPF_REG_SYNC_TIME_ADJ_SIGN_SFT 29 + +#define BNXT_GRC_REG_STATUS_P5 0x520 + +#define BNXT_GRCPF_REG_KONG_COMM 0xA00 +#define BNXT_GRCPF_REG_KONG_COMM_TRIGGER 0xB00 + +#define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014 +#define BNXT_CAG_REG_BASE 0x300000 + +#define BNXT_GRC_REG_CHIP_NUM 0x48 +#define BNXT_GRC_REG_BASE 0x260000 + +#define BNXT_TS_REG_TIMESYNC_TS0_LOWER 0x640180c +#define BNXT_TS_REG_TIMESYNC_TS0_UPPER 0x6401810 + +#define BNXT_GRC_BASE_MASK 0xfffff000 +#define BNXT_GRC_OFFSET_MASK 0x00000ffc struct bnxt_softc { device_t dev; if_ctx_t ctx; @@ -1080,7 +1215,49 @@ test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) struct pci_dev *pdev; - int fw_reset_state; + struct work_struct sp_task; + unsigned long sp_event; +#define BNXT_RX_MASK_SP_EVENT 0 +#define BNXT_RX_NTP_FLTR_SP_EVENT 1 +#define BNXT_LINK_CHNG_SP_EVENT 2 +#define BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT 3 +#define BNXT_VXLAN_ADD_PORT_SP_EVENT 4 +#define BNXT_VXLAN_DEL_PORT_SP_EVENT 5 +#define BNXT_RESET_TASK_SP_EVENT 6 +#define BNXT_RST_RING_SP_EVENT 7 +#define BNXT_HWRM_PF_UNLOAD_SP_EVENT 8 +#define BNXT_PERIODIC_STATS_SP_EVENT 9 +#define BNXT_HWRM_PORT_MODULE_SP_EVENT 10 +#define BNXT_RESET_TASK_SILENT_SP_EVENT 11 +#define BNXT_GENEVE_ADD_PORT_SP_EVENT 12 +#define BNXT_GENEVE_DEL_PORT_SP_EVENT 13 +#define BNXT_LINK_SPEED_CHNG_SP_EVENT 14 +#define BNXT_FLOW_STATS_SP_EVENT 15 +#define BNXT_UPDATE_PHY_SP_EVENT 16 +#define BNXT_RING_COAL_NOW_SP_EVENT 17 +#define BNXT_FW_RESET_NOTIFY_SP_EVENT 18 +#define BNXT_FW_EXCEPTION_SP_EVENT 19 +#define BNXT_VF_VNIC_CHANGE_SP_EVENT 20 +#define BNXT_LINK_CFG_CHANGE_SP_EVENT 21 +#define BNXT_PTP_CURRENT_TIME_EVENT 22 +#define BNXT_FW_ECHO_REQUEST_SP_EVENT 23 +#define BNXT_VF_CFG_CHNG_SP_EVENT 24 + + struct delayed_work fw_reset_task; + int fw_reset_state; +#define BNXT_FW_RESET_STATE_POLL_VF 1 +#define BNXT_FW_RESET_STATE_RESET_FW 2 +#define BNXT_FW_RESET_STATE_ENABLE_DEV 3 +#define BNXT_FW_RESET_STATE_POLL_FW 4 +#define BNXT_FW_RESET_STATE_OPENING 5 +#define BNXT_FW_RESET_STATE_POLL_FW_DOWN 6 + u16 fw_reset_min_dsecs; +#define BNXT_DFLT_FW_RST_MIN_DSECS 20 + u16 fw_reset_max_dsecs; +#define BNXT_DFLT_FW_RST_MAX_DSECS 60 + unsigned long fw_reset_timestamp; + + struct bnxt_fw_health *fw_health; }; struct bnxt_filter_info { diff --git a/sys/dev/bnxt/bnxt_en/bnxt_hwrm.h b/sys/dev/bnxt/bnxt_en/bnxt_hwrm.h --- a/sys/dev/bnxt/bnxt_en/bnxt_hwrm.h +++ b/sys/dev/bnxt/bnxt_en/bnxt_hwrm.h @@ -32,10 +32,11 @@ #define BNXT_PAUSE_TX (HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX) #define BNXT_PAUSE_RX (HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX) -#define BNXT_AUTO_PAUSE_AUTONEG_PAUSE \ - (HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_AUTONEG_PAUSE) +#define BNXT_AUTO_PAUSE_AUTONEG_PAUSE \ + (HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_AUTONEG_PAUSE) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) #define BNXT_BACKING_STORE_CFG_LEGACY_LEN 256 +#define SHORT_HWRM_CMD_TIMEOUT 500 /* HWRM Function Prototypes */ int @@ -48,7 +49,8 @@ struct bnxt_ring *ring, int cmpl_ring_id); int bnxt_hwrm_ver_get(struct bnxt_softc *softc); int bnxt_hwrm_queue_qportcfg(struct bnxt_softc *softc, uint32_t path_dir); -int bnxt_hwrm_func_drv_rgtr(struct bnxt_softc *softc); +int bnxt_hwrm_func_drv_rgtr(struct bnxt_softc *bp, unsigned long *bmap, int bmap_size, + bool async_only); int bnxt_hwrm_func_drv_unrgtr(struct bnxt_softc *softc, bool shutdown); int bnxt_hwrm_func_qcaps(struct bnxt_softc *softc); int bnxt_hwrm_func_qcfg(struct bnxt_softc *softc); diff --git a/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c b/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c --- a/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c +++ b/sys/dev/bnxt/bnxt_en/bnxt_hwrm.c @@ -933,22 +933,99 @@ return rc; } -int -bnxt_hwrm_func_drv_rgtr(struct bnxt_softc *softc) -{ +static const u16 bnxt_async_events_arr[] = { + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DEFAULT_VNIC_CHANGE, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE, +}; + +int bnxt_hwrm_func_drv_rgtr(struct bnxt_softc *bp, unsigned long *bmap, int bmap_size, + bool async_only) +{ + DECLARE_BITMAP(async_events_bmap, 256); + u32 *events = (u32 *)async_events_bmap; + struct hwrm_func_drv_rgtr_output *resp = + (void *)bp->hwrm_cmd_resp.idi_vaddr; struct hwrm_func_drv_rgtr_input req = {0}; + u32 flags = 0; + int rc; + int i; - bnxt_hwrm_cmd_hdr_init(softc, &req, HWRM_FUNC_DRV_RGTR); - - req.enables = htole32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER | - HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_OS_TYPE); - req.os_type = htole16(HWRM_FUNC_DRV_RGTR_INPUT_OS_TYPE_FREEBSD); - + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_RGTR); req.ver_maj = HWRM_VERSION_MAJOR; req.ver_min = HWRM_VERSION_MINOR; req.ver_upd = HWRM_VERSION_UPDATE; - return hwrm_send_message(softc, &req, sizeof(req)); + req.enables = htole32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_OS_TYPE | + HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER | + HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD); + + if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET) + flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT; + if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) + flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT | + HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_MASTER_SUPPORT; + if (bp->fw_cap & BNXT_FW_CAP_NPAR_1_2) + flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_NPAR_1_2_SUPPORT; + flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ASYM_QUEUE_CFG_SUPPORT; + req.flags = htole32(flags); + req.os_type = htole16(HWRM_FUNC_DRV_RGTR_INPUT_OS_TYPE_FREEBSD); + + if (BNXT_PF(bp)) { + req.enables |= + htole32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VF_REQ_FWD); + } + + if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) + req.flags |= cpu_to_le32(HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FLOW_HANDLE_64BIT_MODE); + + memset(async_events_bmap, 0, sizeof(async_events_bmap)); + for (i = 0; i < ARRAY_SIZE(bnxt_async_events_arr); i++) { + u16 event_id = bnxt_async_events_arr[i]; + + if (event_id == HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY && + !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) { + continue; + } + __set_bit(bnxt_async_events_arr[i], async_events_bmap); + } + if (bmap && bmap_size) { + for (i = 0; i < bmap_size; i++) { + if (test_bit(i, bmap)) + __set_bit(i, async_events_bmap); + } + } + for (i = 0; i < 8; i++) + req.async_event_fwd[i] |= htole32(events[i]); + + if (async_only) + req.enables = + htole32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD); + + rc = hwrm_send_message(bp, &req, sizeof(req)); + + if (!rc) { + if (resp->flags & + le32toh(HWRM_FUNC_DRV_RGTR_OUTPUT_FLAGS_IF_CHANGE_SUPPORTED)) + bp->fw_cap |= BNXT_FW_CAP_IF_CHANGE; + } + + + return rc; } int @@ -2994,56 +3071,6 @@ return rc; } -int bnxt_hwrm_func_rgtr_async_events(struct bnxt_softc *softc, unsigned long *bmap, - int bmap_size) -{ - struct hwrm_func_drv_rgtr_input req = {0}; - struct hwrm_func_drv_rgtr_output *resp = - (void *)softc->hwrm_cmd_resp.idi_vaddr; - bitstr_t *async_events_bmap; - uint32_t *events; - int i, rc = 0; - -#define BNXT_MAX_NUM_ASYNC_EVENTS 256 - async_events_bmap = bit_alloc(BNXT_MAX_NUM_ASYNC_EVENTS, M_DEVBUF, - M_WAITOK|M_ZERO); - events = (uint32_t *)async_events_bmap; - - bnxt_hwrm_cmd_hdr_init(softc, &req, HWRM_FUNC_DRV_RGTR); - - req.enables = - htole32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD); - - memset(async_events_bmap, 0, sizeof(BNXT_MAX_NUM_ASYNC_EVENTS / 8)); - - bit_set(async_events_bmap, HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE); - bit_set(async_events_bmap, HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD); - bit_set(async_events_bmap, HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED); - bit_set(async_events_bmap, HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE); - bit_set(async_events_bmap, HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE); - - if (bmap && bmap_size) { - for (i = 0; i < bmap_size; i++) { - if (bit_test(bmap, i)) - bit_set(async_events_bmap, i); - } - } - - for (i = 0; i < 8; i++) - req.async_event_fwd[i] |= htole32(events[i]); - - free(async_events_bmap, M_DEVBUF); - - rc = hwrm_send_message(softc, &req, sizeof(req)); - if (!rc) { - if (resp->flags & - le32toh(HWRM_FUNC_DRV_RGTR_OUTPUT_FLAGS_IF_CHANGE_SUPPORTED)) - softc->fw_cap |= BNXT_FW_CAP_IF_CHANGE; - } - - return rc; -} - void bnxt_hwrm_ring_info_get(struct bnxt_softc *softc, uint8_t ring_type, uint32_t ring_id, uint32_t *prod, uint32_t *cons) { diff --git a/sys/dev/bnxt/bnxt_en/bnxt_sysctl.c b/sys/dev/bnxt/bnxt_en/bnxt_sysctl.c --- a/sys/dev/bnxt/bnxt_en/bnxt_sysctl.c +++ b/sys/dev/bnxt/bnxt_en/bnxt_sysctl.c @@ -29,11 +29,17 @@ #include #include #include +#include #include "bnxt.h" #include "bnxt_hwrm.h" #include "bnxt_sysctl.h" +DEFINE_MUTEX(tmp_mutex); /* mutex lock for driver */ +extern void bnxt_fw_reset(struct bnxt_softc *bp); +extern void bnxt_queue_sp_work(struct bnxt_softc *bp); +extern void +process_nq(struct bnxt_softc *softc, uint16_t nqid); /* * We want to create: * dev.bnxt.0.hwstats.txq0 @@ -1549,6 +1555,46 @@ return rc; } +static +void simulate_reset(struct bnxt_softc *bp, char *fwcli_string) +{ + struct hwrm_dbg_fw_cli_input req = {0}; + int rc = 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_FW_CLI); + req.cmpl_ring = -1; + req.target_id = -1; + req.cli_cmd_len = strlen(fwcli_string); + req.host_buf_len = 64 * 1024; + strcpy((char *)req.cli_cmd, fwcli_string); + + BNXT_HWRM_LOCK(bp); + rc = _hwrm_send_message(bp, &req, sizeof(req)); + if (rc) { + device_printf(bp->dev, " Manual FW fault failed, rc:%x\n", rc); + } + BNXT_HWRM_UNLOCK(bp); +} + +static int +bnxt_reset_ctrl(SYSCTL_HANDLER_ARGS) { + struct bnxt_softc *softc = arg1; + int rc = 0; + char buf[50] = {0}; + + if (softc == NULL) + return EBUSY; + + rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (rc || req->newptr == NULL) + return rc; + + if (BNXT_CHIP_P5(softc)) + simulate_reset(softc, buf); + + return rc; +} + int bnxt_create_config_sysctls_pre(struct bnxt_softc *softc) { @@ -1606,6 +1652,10 @@ SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fw_cap", CTLFLAG_RD, &softc->fw_cap, 0, "FW caps"); + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, + "reset_ctrl", CTLTYPE_STRING | CTLFLAG_RWTUN, softc, + 0, bnxt_reset_ctrl, "A", + "Issue controller reset: 0 / 1"); return 0; } diff --git a/sys/dev/bnxt/bnxt_en/bnxt_ulp.c b/sys/dev/bnxt/bnxt_en/bnxt_ulp.c --- a/sys/dev/bnxt/bnxt_en/bnxt_ulp.c +++ b/sys/dev/bnxt/bnxt_en/bnxt_ulp.c @@ -49,6 +49,8 @@ #include "bnxt_hwrm.h" #include "bnxt_ulp.h" +void bnxt_destroy_irq(struct bnxt_softc *softc); + static int bnxt_register_dev(struct bnxt_en_dev *edev, int ulp_id, struct bnxt_ulp_ops *ulp_ops, void *handle) { @@ -379,12 +381,12 @@ ulp->async_events_bmap = events_bmap; wmb(); ulp->max_async_event_id = max_id; - bnxt_hwrm_func_drv_rgtr(bp); + bnxt_hwrm_func_drv_rgtr(bp, events_bmap, max_id + 1, true); mtx_unlock(&bp->en_ops_lock); return 0; } -static void bnxt_destroy_irq(struct bnxt_softc *softc) +void bnxt_destroy_irq(struct bnxt_softc *softc) { kfree(softc->irq_tbl); } diff --git a/sys/dev/bnxt/bnxt_en/if_bnxt.c b/sys/dev/bnxt/bnxt_en/if_bnxt.c --- a/sys/dev/bnxt/bnxt_en/if_bnxt.c +++ b/sys/dev/bnxt/bnxt_en/if_bnxt.c @@ -169,6 +169,8 @@ SLIST_HEAD(softc_list, bnxt_softc_list) pf_list; int bnxt_num_pfs = 0; +void +process_nq(struct bnxt_softc *softc, uint16_t nqid); static void *bnxt_register(device_t dev); /* Soft queue setup and teardown */ @@ -236,7 +238,10 @@ static void bnxt_get_port_module_status(struct bnxt_softc *softc); static void bnxt_rdma_aux_device_init(struct bnxt_softc *softc); static void bnxt_rdma_aux_device_uninit(struct bnxt_softc *softc); +static void bnxt_queue_fw_reset_work(struct bnxt_softc *bp, unsigned long delay); +void bnxt_queue_sp_work(struct bnxt_softc *bp); +void bnxt_fw_reset(struct bnxt_softc *bp); /* * Device Interface Declaration */ @@ -268,6 +273,27 @@ IFLIB_PNP_INFO(pci, bnxt, bnxt_vendor_info_array); +void writel_fbsd(struct bnxt_softc *bp, u32, u8, u32); +u32 readl_fbsd(struct bnxt_softc *bp, u32, u8); + +u32 readl_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx) +{ + + if (!bar_idx) + return bus_space_read_4(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off); + else + return bus_space_read_4(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off); +} + +void writel_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx, u32 val) +{ + + if (!bar_idx) + bus_space_write_4(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off, htole32(val)); + else + bus_space_write_4(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off, htole32(val)); +} + static DEFINE_IDA(bnxt_aux_dev_ids); static device_method_t bnxt_iflib_methods[] = { @@ -356,6 +382,11 @@ .isc_driver_version = bnxt_driver_version, }; +#define PCI_SUBSYSTEM_ID 0x2e +static struct workqueue_struct *bnxt_pf_wq; + +extern void bnxt_destroy_irq(struct bnxt_softc *softc); + /* * Device Methods */ @@ -665,7 +696,6 @@ "Unable to allocate space for TPA\n"); goto tpa_alloc_fail; } - /* Allocate the AG ring */ softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].softc = softc; @@ -1040,7 +1070,7 @@ ctx->flags &= ~BNXT_CTX_FLAG_INITED; kfree(ctx); - softc->ctx = NULL; + softc->ctx_mem = NULL; } static int bnxt_alloc_ctx_mem(struct bnxt_softc *softc) @@ -1351,6 +1381,685 @@ softc->max_lltc = min(softc->max_lltc, lltc); } +static int bnxt_hwrm_poll(struct bnxt_softc *bp) +{ + struct hwrm_ver_get_output *resp = + (void *)bp->hwrm_cmd_resp.idi_vaddr; + struct hwrm_ver_get_input req = {0}; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VER_GET); + + req.hwrm_intf_maj = HWRM_VERSION_MAJOR; + req.hwrm_intf_min = HWRM_VERSION_MINOR; + req.hwrm_intf_upd = HWRM_VERSION_UPDATE; + + rc = _hwrm_send_message(bp, &req, sizeof(req)); + if (rc) + return rc; + + if (resp->flags & HWRM_VER_GET_OUTPUT_FLAGS_DEV_NOT_RDY) + rc = -EAGAIN; + + return rc; +} + +static void bnxt_rtnl_lock_sp(struct bnxt_softc *bp) +{ + /* We are called from bnxt_sp_task which has BNXT_STATE_IN_SP_TASK + * set. If the device is being closed, bnxt_close() may be holding + * rtnl() and waiting for BNXT_STATE_IN_SP_TASK to clear. So we + * must clear BNXT_STATE_IN_SP_TASK before holding rtnl(). + */ + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + rtnl_lock(); +} + +static void bnxt_rtnl_unlock_sp(struct bnxt_softc *bp) +{ + set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + rtnl_unlock(); +} + +static void bnxt_fw_fatal_close(struct bnxt_softc *softc) +{ + bnxt_disable_intr(softc->ctx); + if (pci_is_enabled(softc->pdev)) + pci_disable_device(softc->pdev); +} + +static u32 bnxt_fw_health_readl(struct bnxt_softc *bp, int reg_idx) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + u32 reg = fw_health->regs[reg_idx]; + u32 reg_type, reg_off, val = 0; + + reg_type = BNXT_FW_HEALTH_REG_TYPE(reg); + reg_off = BNXT_FW_HEALTH_REG_OFF(reg); + switch (reg_type) { + case BNXT_FW_HEALTH_REG_TYPE_CFG: + pci_read_config_dword(bp->pdev, reg_off, &val); + break; + case BNXT_FW_HEALTH_REG_TYPE_GRC: + reg_off = fw_health->mapped_regs[reg_idx]; + fallthrough; + case BNXT_FW_HEALTH_REG_TYPE_BAR0: + val = readl_fbsd(bp, reg_off, 0); + break; + case BNXT_FW_HEALTH_REG_TYPE_BAR1: + val = readl_fbsd(bp, reg_off, 2); + break; + } + if (reg_idx == BNXT_FW_RESET_INPROG_REG) + val &= fw_health->fw_reset_inprog_reg_mask; + return val; +} + +static void bnxt_fw_reset_close(struct bnxt_softc *bp) +{ + int i; + bnxt_ulp_stop(bp); + /* When firmware is in fatal state, quiesce device and disable + * bus master to prevent any potential bad DMAs before freeing + * kernel memory. + */ + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) { + u16 val = 0; + + val = pci_read_config(bp->dev, PCI_SUBSYSTEM_ID, 2); + if (val == 0xffff) { + bp->fw_reset_min_dsecs = 0; + } + bnxt_fw_fatal_close(bp); + } + + iflib_request_reset(bp->ctx); + bnxt_stop(bp->ctx); + bnxt_hwrm_func_drv_unrgtr(bp, false); + + for (i = bp->nrxqsets-1; i>=0; i--) { + if (BNXT_CHIP_P5(bp)) + iflib_irq_free(bp->ctx, &bp->nq_rings[i].irq); + else + iflib_irq_free(bp->ctx, &bp->rx_cp_rings[i].irq); + + } + if (pci_is_enabled(bp->pdev)) + pci_disable_device(bp->pdev); + pci_disable_busmaster(bp->dev); + bnxt_free_ctx_mem(bp); +} + +static bool is_bnxt_fw_ok(struct bnxt_softc *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + bool no_heartbeat = false, has_reset = false; + u32 val; + + val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG); + if (val == fw_health->last_fw_heartbeat) + no_heartbeat = true; + + val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); + if (val != fw_health->last_fw_reset_cnt) + has_reset = true; + + if (!no_heartbeat && has_reset) + return true; + + return false; +} + +void bnxt_fw_reset(struct bnxt_softc *bp) +{ + bnxt_rtnl_lock_sp(bp); + if (test_bit(BNXT_STATE_OPEN, &bp->state) && + !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + int tmo; + set_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bnxt_fw_reset_close(bp); + + if ((bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD)) { + bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN; + tmo = HZ / 10; + } else { + bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; + tmo = bp->fw_reset_min_dsecs * HZ /10; + } + bnxt_queue_fw_reset_work(bp, tmo); + } + bnxt_rtnl_unlock_sp(bp); +} + +static void bnxt_queue_fw_reset_work(struct bnxt_softc *bp, unsigned long delay) +{ + if (!(test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))) + return; + + if (BNXT_PF(bp)) + queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay); + else + schedule_delayed_work(&bp->fw_reset_task, delay); +} + +void bnxt_queue_sp_work(struct bnxt_softc *bp) +{ + if (BNXT_PF(bp)) + queue_work(bnxt_pf_wq, &bp->sp_task); + else + schedule_work(&bp->sp_task); +} + +static void bnxt_fw_reset_writel(struct bnxt_softc *bp, int reg_idx) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + u32 reg = fw_health->fw_reset_seq_regs[reg_idx]; + u32 val = fw_health->fw_reset_seq_vals[reg_idx]; + u32 reg_type, reg_off, delay_msecs; + + delay_msecs = fw_health->fw_reset_seq_delay_msec[reg_idx]; + reg_type = BNXT_FW_HEALTH_REG_TYPE(reg); + reg_off = BNXT_FW_HEALTH_REG_OFF(reg); + switch (reg_type) { + case BNXT_FW_HEALTH_REG_TYPE_CFG: + pci_write_config_dword(bp->pdev, reg_off, val); + break; + case BNXT_FW_HEALTH_REG_TYPE_GRC: + writel_fbsd(bp, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4, 0, reg_off & BNXT_GRC_BASE_MASK); + reg_off = (reg_off & BNXT_GRC_OFFSET_MASK) + 0x2000; + fallthrough; + case BNXT_FW_HEALTH_REG_TYPE_BAR0: + writel_fbsd(bp, reg_off, 0, val); + break; + case BNXT_FW_HEALTH_REG_TYPE_BAR1: + writel_fbsd(bp, reg_off, 2, val); + break; + } + if (delay_msecs) { + pci_read_config_dword(bp->pdev, 0, &val); + msleep(delay_msecs); + } +} + +static void bnxt_reset_all(struct bnxt_softc *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + int i, rc; + + if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { + bp->fw_reset_timestamp = jiffies; + return; + } + + if (fw_health->flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_HOST) { + for (i = 0; i < fw_health->fw_reset_seq_cnt; i++) + bnxt_fw_reset_writel(bp, i); + } else if (fw_health->flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_CO_CPU) { + struct hwrm_fw_reset_input req = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET); + req.target_id = htole16(HWRM_TARGET_ID_KONG); + req.embedded_proc_type = HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_CHIP; + req.selfrst_status = HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTASAP; + req.flags = HWRM_FW_RESET_INPUT_FLAGS_RESET_GRACEFUL; + rc = hwrm_send_message(bp, &req, sizeof(req)); + + if (rc != -ENODEV) + device_printf(bp->dev, "Unable to reset FW rc=%d\n", rc); + } + bp->fw_reset_timestamp = jiffies; +} + +static int __bnxt_alloc_fw_health(struct bnxt_softc *bp) +{ + if (bp->fw_health) + return 0; + + bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL); + if (!bp->fw_health) + return -ENOMEM; + + mutex_init(&bp->fw_health->lock); + return 0; +} + +static int bnxt_alloc_fw_health(struct bnxt_softc *bp) +{ + int rc; + + if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) && + !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) + return 0; + + rc = __bnxt_alloc_fw_health(bp); + if (rc) { + bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET; + bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY; + return rc; + } + + return 0; +} + +static inline void __bnxt_map_fw_health_reg(struct bnxt_softc *bp, u32 reg) +{ + writel_fbsd(bp, BNXT_GRCPF_REG_WINDOW_BASE_OUT + BNXT_FW_HEALTH_WIN_MAP_OFF, 0, reg & BNXT_GRC_BASE_MASK); +} + +static int bnxt_map_fw_health_regs(struct bnxt_softc *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + u32 reg_base = 0xffffffff; + int i; + + bp->fw_health->status_reliable = false; + bp->fw_health->resets_reliable = false; + /* Only pre-map the monitoring GRC registers using window 3 */ + for (i = 0; i < 4; i++) { + u32 reg = fw_health->regs[i]; + + if (BNXT_FW_HEALTH_REG_TYPE(reg) != BNXT_FW_HEALTH_REG_TYPE_GRC) + continue; + if (reg_base == 0xffffffff) + reg_base = reg & BNXT_GRC_BASE_MASK; + if ((reg & BNXT_GRC_BASE_MASK) != reg_base) + return -ERANGE; + fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_OFF(reg); + } + bp->fw_health->status_reliable = true; + bp->fw_health->resets_reliable = true; + if (reg_base == 0xffffffff) + return 0; + + __bnxt_map_fw_health_reg(bp, reg_base); + return 0; +} + +static void bnxt_inv_fw_health_reg(struct bnxt_softc *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + u32 reg_type; + + if (!fw_health) + return; + + reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_HEALTH_REG]); + if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) + fw_health->status_reliable = false; + + reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_RESET_CNT_REG]); + if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) + fw_health->resets_reliable = false; +} + +static int bnxt_hwrm_error_recovery_qcfg(struct bnxt_softc *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + struct hwrm_error_recovery_qcfg_output *resp = + (void *)bp->hwrm_cmd_resp.idi_vaddr; + struct hwrm_error_recovery_qcfg_input req = {0}; + int rc, i; + + if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) + return 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_ERROR_RECOVERY_QCFG); + rc = _hwrm_send_message(bp, &req, sizeof(req)); + + if (rc) + goto err_recovery_out; + fw_health->flags = le32toh(resp->flags); + if ((fw_health->flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_CO_CPU) && + !(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) { + rc = -EINVAL; + goto err_recovery_out; + } + fw_health->polling_dsecs = le32toh(resp->driver_polling_freq); + fw_health->master_func_wait_dsecs = + le32toh(resp->master_func_wait_period); + fw_health->normal_func_wait_dsecs = + le32toh(resp->normal_func_wait_period); + fw_health->post_reset_wait_dsecs = + le32toh(resp->master_func_wait_period_after_reset); + fw_health->post_reset_max_wait_dsecs = + le32toh(resp->max_bailout_time_after_reset); + fw_health->regs[BNXT_FW_HEALTH_REG] = + le32toh(resp->fw_health_status_reg); + fw_health->regs[BNXT_FW_HEARTBEAT_REG] = + le32toh(resp->fw_heartbeat_reg); + fw_health->regs[BNXT_FW_RESET_CNT_REG] = + le32toh(resp->fw_reset_cnt_reg); + fw_health->regs[BNXT_FW_RESET_INPROG_REG] = + le32toh(resp->reset_inprogress_reg); + fw_health->fw_reset_inprog_reg_mask = + le32toh(resp->reset_inprogress_reg_mask); + fw_health->fw_reset_seq_cnt = resp->reg_array_cnt; + if (fw_health->fw_reset_seq_cnt >= 16) { + rc = -EINVAL; + goto err_recovery_out; + } + for (i = 0; i < fw_health->fw_reset_seq_cnt; i++) { + fw_health->fw_reset_seq_regs[i] = + le32toh(resp->reset_reg[i]); + fw_health->fw_reset_seq_vals[i] = + le32toh(resp->reset_reg_val[i]); + fw_health->fw_reset_seq_delay_msec[i] = + le32toh(resp->delay_after_reset[i]); + } +err_recovery_out: + if (!rc) + rc = bnxt_map_fw_health_regs(bp); + if (rc) + bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY; + return rc; +} + +static int bnxt_drv_rgtr(struct bnxt_softc *bp) +{ + int rc; + + /* determine whether we can support error recovery before + * registering with FW + */ + if (bnxt_alloc_fw_health(bp)) { + device_printf(bp->dev, "no memory for firmware error recovery\n"); + } else { + rc = bnxt_hwrm_error_recovery_qcfg(bp); + if (rc) + device_printf(bp->dev, "hwrm query error recovery failure rc: %d\n", + rc); + } + rc = bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, false); //sumit dbg: revisit the params + if (rc) + return -ENODEV; + return 0; +} + +static bool bnxt_fw_reset_timeout(struct bnxt_softc *bp) +{ + return time_after(jiffies, bp->fw_reset_timestamp + + (bp->fw_reset_max_dsecs * HZ / 10)); +} + +static int bnxt_open(struct bnxt_softc *bp) +{ + int rc = 0; + if (BNXT_PF(bp)) + rc = bnxt_hwrm_nvm_get_dev_info(bp, &bp->nvm_info->mfg_id, + &bp->nvm_info->device_id, &bp->nvm_info->sector_size, + &bp->nvm_info->size, &bp->nvm_info->reserved_size, + &bp->nvm_info->available_size); + + /* Get the queue config */ + rc = bnxt_hwrm_queue_qportcfg(bp, HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_TX); + if (rc) { + device_printf(bp->dev, "reinit: hwrm qportcfg (tx) failed\n"); + return rc; + } + if (bp->is_asym_q) { + rc = bnxt_hwrm_queue_qportcfg(bp, + HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX); + if (rc) { + device_printf(bp->dev, "re-init: hwrm qportcfg (rx) failed\n"); + return rc; + } + bnxt_verify_asym_queues(bp); + } else { + bp->rx_max_q = bp->tx_max_q; + memcpy(bp->rx_q_info, bp->tx_q_info, sizeof(bp->rx_q_info)); + memcpy(bp->rx_q_ids, bp->tx_q_ids, sizeof(bp->rx_q_ids)); + } + /* Get the HW capabilities */ + rc = bnxt_hwrm_func_qcaps(bp); + if (rc) + return rc; + + /* Register the driver with the FW */ + rc = bnxt_drv_rgtr(bp); + if (rc) + return rc; + if (bp->hwrm_spec_code >= 0x10803) { + rc = bnxt_alloc_ctx_mem(bp); + if (rc) { + device_printf(bp->dev, "attach: alloc_ctx_mem failed\n"); + return rc; + } + rc = bnxt_hwrm_func_resc_qcaps(bp, true); + if (!rc) + bp->flags |= BNXT_FLAG_FW_CAP_NEW_RM; + } + + if (BNXT_CHIP_P5(bp)) + bnxt_hwrm_reserve_pf_rings(bp); + /* Get the current configuration of this function */ + rc = bnxt_hwrm_func_qcfg(bp); + if (rc) { + device_printf(bp->dev, "re-init: hwrm func qcfg failed\n"); + return rc; + } + + bnxt_msix_intr_assign(bp->ctx, 0); + bnxt_init(bp->ctx); + bnxt_intr_enable(bp->ctx); + + if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + bnxt_ulp_start(bp, 0); + } + } + + device_printf(bp->dev, "Network interface is UP and operational\n"); + + return rc; +} +static void bnxt_fw_reset_abort(struct bnxt_softc *bp, int rc) +{ + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { + bnxt_ulp_start(bp, rc); + } + bp->fw_reset_state = 0; +} + +static void bnxt_fw_reset_task(struct work_struct *work) +{ + struct bnxt_softc *bp = container_of(work, struct bnxt_softc, fw_reset_task.work); + int rc = 0; + + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + device_printf(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n"); + return; + } + + switch (bp->fw_reset_state) { + case BNXT_FW_RESET_STATE_POLL_FW_DOWN: { + u32 val; + + val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + if (!(val & BNXT_FW_STATUS_SHUTDOWN) && + !bnxt_fw_reset_timeout(bp)) { + bnxt_queue_fw_reset_work(bp, HZ / 5); + return; + } + + if (!bp->fw_health->primary) { + u32 wait_dsecs = bp->fw_health->normal_func_wait_dsecs; + + bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; + bnxt_queue_fw_reset_work(bp, wait_dsecs * HZ / 10); + return; + } + bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW; + } + fallthrough; + case BNXT_FW_RESET_STATE_RESET_FW: + bnxt_reset_all(bp); + bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; + bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10); + return; + case BNXT_FW_RESET_STATE_ENABLE_DEV: + bnxt_inv_fw_health_reg(bp); + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) && + !bp->fw_reset_min_dsecs) { + u16 val; + + val = pci_read_config(bp->dev, PCI_SUBSYSTEM_ID, 2); + if (val == 0xffff) { + if (bnxt_fw_reset_timeout(bp)) { + device_printf(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); + rc = -ETIMEDOUT; + goto fw_reset_abort; + } + bnxt_queue_fw_reset_work(bp, HZ / 1000); + return; + } + } + clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); + clear_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state); + if (!pci_is_enabled(bp->pdev)) { + if (pci_enable_device(bp->pdev)) { + device_printf(bp->dev, "Cannot re-enable PCI device\n"); + rc = -ENODEV; + goto fw_reset_abort; + } + } + pci_set_master(bp->pdev); + bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW; + fallthrough; + case BNXT_FW_RESET_STATE_POLL_FW: + bp->hwrm_cmd_timeo = SHORT_HWRM_CMD_TIMEOUT; + rc = bnxt_hwrm_poll(bp); + if (rc) { + if (bnxt_fw_reset_timeout(bp)) { + device_printf(bp->dev, "Firmware reset aborted\n"); + goto fw_reset_abort_status; + } + bnxt_queue_fw_reset_work(bp, HZ / 5); + return; + } + bp->hwrm_cmd_timeo = DFLT_HWRM_CMD_TIMEOUT; + bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; + fallthrough; + case BNXT_FW_RESET_STATE_OPENING: + rc = bnxt_open(bp); + if (rc) { + device_printf(bp->dev, "bnxt_open() failed during FW reset\n"); + bnxt_fw_reset_abort(bp, rc); + rtnl_unlock(); + return; + } + + if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && + bp->fw_health->enabled) { + bp->fw_health->last_fw_reset_cnt = + bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); + } + bp->fw_reset_state = 0; + smp_mb__before_atomic(); + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bnxt_ulp_start(bp, 0); + clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); + set_bit(BNXT_STATE_OPEN, &bp->state); + rtnl_unlock(); + } + return; + +fw_reset_abort_status: + if (bp->fw_health->status_reliable || + (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) { + u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + + device_printf(bp->dev, "fw_health_status 0x%x\n", sts); + } +fw_reset_abort: + rtnl_lock(); + bnxt_fw_reset_abort(bp, rc); + rtnl_unlock(); +} + +static void bnxt_force_fw_reset(struct bnxt_softc *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + u32 wait_dsecs; + + if (!test_bit(BNXT_STATE_OPEN, &bp->state) || + test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + return; + bnxt_fw_reset_close(bp); + wait_dsecs = fw_health->master_func_wait_dsecs; + if (fw_health->primary) { + if (fw_health->flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_CO_CPU) + wait_dsecs = 0; + bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW; + } else { + bp->fw_reset_timestamp = jiffies + wait_dsecs * HZ / 10; + wait_dsecs = fw_health->normal_func_wait_dsecs; + bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; + } + + bp->fw_reset_min_dsecs = fw_health->post_reset_wait_dsecs; + bp->fw_reset_max_dsecs = fw_health->post_reset_max_wait_dsecs; + bnxt_queue_fw_reset_work(bp, wait_dsecs * HZ / 10); +} + +static void bnxt_fw_exception(struct bnxt_softc *bp) +{ + device_printf(bp->dev, "Detected firmware fatal condition, initiating reset\n"); + set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); + bnxt_rtnl_lock_sp(bp); + bnxt_force_fw_reset(bp); + bnxt_rtnl_unlock_sp(bp); +} + +static void __bnxt_fw_recover(struct bnxt_softc *bp) +{ + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) || + test_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state)) + bnxt_fw_reset(bp); + else + bnxt_fw_exception(bp); +} + +static void bnxt_devlink_health_fw_report(struct bnxt_softc *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + + if (!fw_health) + return; + + if (!fw_health->fw_reporter) { + __bnxt_fw_recover(bp); + return; + } +} + +static void bnxt_sp_task(struct work_struct *work) +{ + struct bnxt_softc *bp = container_of(work, struct bnxt_softc, sp_task); + + set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + smp_mb__after_atomic(); + if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + return; + } + + if (test_and_clear_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event)) { + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) || + test_bit(BNXT_STATE_FW_NON_FATAL_COND, &bp->state)) + bnxt_devlink_health_fw_report(bp); + else + bnxt_fw_reset(bp); + } + + if (test_and_clear_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event)) { + if (!is_bnxt_fw_ok(bp)) + bnxt_devlink_health_fw_report(bp); + } + smp_mb__before_atomic(); + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); +} + /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx) @@ -1475,6 +2184,16 @@ /* Get NVRAM info */ if (BNXT_PF(softc)) { + if (!bnxt_pf_wq) { + bnxt_pf_wq = + create_singlethread_workqueue("bnxt_pf_wq"); + if (!bnxt_pf_wq) { + device_printf(softc->dev, "Unable to create workqueue.\n"); + rc = -ENOMEM; + goto nvm_alloc_fail; + } + } + softc->nvm_info = malloc(sizeof(struct bnxt_nvram_info), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->nvm_info == NULL) { @@ -1503,18 +2222,6 @@ softc->db_ops.bnxt_db_tx_cq = bnxt_cuw_db_cq; } - /* Register the driver with the FW */ - rc = bnxt_hwrm_func_drv_rgtr(softc); - if (rc) { - device_printf(softc->dev, "attach: hwrm drv rgtr failed\n"); - goto drv_rgtr_fail; - } - - rc = bnxt_hwrm_func_rgtr_async_events(softc, NULL, 0); - if (rc) { - device_printf(softc->dev, "attach: hwrm rgtr async evts failed\n"); - goto drv_rgtr_fail; - } /* Get the queue config */ rc = bnxt_hwrm_queue_qportcfg(softc, HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_TX); @@ -1541,6 +2248,14 @@ if (rc) goto failed; + /* + * Register the driver with the FW + * Register the async events with the FW + */ + rc = bnxt_drv_rgtr(softc); + if (rc) + goto failed; + if (softc->hwrm_spec_code >= 0x10803) { rc = bnxt_alloc_ctx_mem(softc); if (rc) { @@ -1701,6 +2416,10 @@ if (rc) goto failed; + set_bit(BNXT_STATE_OPEN, &softc->state); + INIT_WORK(&softc->sp_task, bnxt_sp_task); + INIT_DELAYED_WORK(&softc->fw_reset_task, bnxt_fw_reset_task); + /* Initialize the vlan list */ SLIST_INIT(&softc->vnic_info.vlan_tags); softc->vnic_info.vlan_tag_list.idi_vaddr = NULL; @@ -1713,7 +2432,6 @@ bnxt_free_sysctl_ctx(softc); init_sysctl_failed: bnxt_hwrm_func_drv_unrgtr(softc, false); -drv_rgtr_fail: if (BNXT_PF(softc)) free(softc->nvm_info, M_DEVBUF); nvm_alloc_fail: @@ -1778,6 +2496,8 @@ int i; bnxt_rdma_aux_device_uninit(softc); + cancel_delayed_work_sync(&softc->fw_reset_task); + cancel_work_sync(&softc->sp_task); bnxt_dcb_free(softc); SLIST_REMOVE(&pf_list, &softc->list, bnxt_softc_list, next); bnxt_num_pfs--; @@ -1816,6 +2536,9 @@ bnxt_free_hwrm_short_cmd_req(softc); BNXT_HWRM_LOCK_DESTROY(softc); + if (!bnxt_num_pfs && bnxt_pf_wq) + destroy_workqueue(bnxt_pf_wq); + if (softc->pdev) linux_pci_detach_device(softc->pdev); free(softc->state_bv, M_DEVBUF); @@ -2035,7 +2758,6 @@ device_printf(softc->dev, "Failed to add auxiliary device for ROCE\n"); msleep(1000 * 2); ida_free(&bnxt_aux_dev_ids, softc->aux_dev->id); - bnxt_aux_dev_free(softc); } device_printf(softc->dev, "%s:%d Added auxiliary device (id %d) for ROCE \n", __func__, __LINE__, softc->aux_dev->id); @@ -2637,7 +3359,7 @@ } } -static void +void process_nq(struct bnxt_softc *softc, uint16_t nqid) { struct bnxt_cp_ring *cpr = &softc->nq_rings[nqid]; @@ -3759,6 +4481,9 @@ struct hwrm_async_event_cmpl *ae = (void *)cmpl; uint16_t async_id = le16toh(ae->event_id); struct ifmediareq ifmr; + char *type_str; + char *status_desc; + struct bnxt_fw_health *fw_health; u32 data1 = le32toh(ae->event_data1); u32 data2 = le32toh(ae->event_data2); @@ -3778,6 +4503,79 @@ case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE: break; + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: { + type_str = "Solicited"; + + if (!softc->fw_health) + goto async_event_process_exit; + + softc->fw_reset_timestamp = jiffies; + softc->fw_reset_min_dsecs = ae->timestamp_lo; + if (!softc->fw_reset_min_dsecs) + softc->fw_reset_min_dsecs = BNXT_DFLT_FW_RST_MIN_DSECS; + softc->fw_reset_max_dsecs = le16toh(ae->timestamp_hi); + if (!softc->fw_reset_max_dsecs) + softc->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS; + if (EVENT_DATA1_RESET_NOTIFY_FW_ACTIVATION(data1)) { + set_bit(BNXT_STATE_FW_ACTIVATE_RESET, &softc->state); + } else if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { + type_str = "Fatal"; + softc->fw_health->fatalities++; + set_bit(BNXT_STATE_FW_FATAL_COND, &softc->state); + } else if (data2 && BNXT_FW_STATUS_HEALTHY != + EVENT_DATA2_RESET_NOTIFY_FW_STATUS_CODE(data2)) { + type_str = "Non-fatal"; + softc->fw_health->survivals++; + set_bit(BNXT_STATE_FW_NON_FATAL_COND, &softc->state); + } + device_printf(softc->dev, + "%s firmware reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n", + type_str, data1, data2, + softc->fw_reset_min_dsecs * 100, + softc->fw_reset_max_dsecs * 100); + set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &softc->sp_event); + break; + } + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: { + fw_health = softc->fw_health; + status_desc = "healthy"; + u32 status; + + if (!fw_health) + goto async_event_process_exit; + + if (!EVENT_DATA1_RECOVERY_ENABLED(data1)) { + fw_health->enabled = false; + device_printf(softc->dev, "Driver recovery watchdog is disabled\n"); + break; + } + fw_health->primary = EVENT_DATA1_RECOVERY_MASTER_FUNC(data1); + fw_health->tmr_multiplier = + DIV_ROUND_UP(fw_health->polling_dsecs * HZ, + HZ * 10); + fw_health->tmr_counter = fw_health->tmr_multiplier; + if (!fw_health->enabled) + fw_health->last_fw_heartbeat = + bnxt_fw_health_readl(softc, BNXT_FW_HEARTBEAT_REG); + fw_health->last_fw_reset_cnt = + bnxt_fw_health_readl(softc, BNXT_FW_RESET_CNT_REG); + status = bnxt_fw_health_readl(softc, BNXT_FW_HEALTH_REG); + if (status != BNXT_FW_STATUS_HEALTHY) + status_desc = "unhealthy"; + device_printf(softc->dev, + "Driver recovery watchdog, role: %s, firmware status: 0x%x (%s), resets: %u\n", + fw_health->primary ? "primary" : "backup", status, + status_desc, fw_health->last_fw_reset_cnt); + if (!fw_health->enabled) { + /* Make sure tmr_counter is set and seen by + * bnxt_health_check() before setting enabled + */ + smp_mb(); + fw_health->enabled = true; + } + goto async_event_process_exit; + } + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: @@ -3799,6 +4597,8 @@ "Unknown async completion type %u\n", async_id); break; } + bnxt_queue_sp_work(softc); + async_event_process_exit: bnxt_ulp_async_events(softc, ae); }