diff --git a/sys/dev/ice/ice_rdma.c b/sys/dev/ice/ice_rdma.c
index 8443cab1e98e..411bfb41dcec 100644
--- a/sys/dev/ice/ice_rdma.c
+++ b/sys/dev/ice/ice_rdma.c
@@ -1,867 +1,916 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*  Copyright (c) 2023, Intel Corporation
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions are met:
  *
  *   1. Redistributions of source code must retain the above copyright notice,
  *      this list of conditions and the following disclaimer.
  *
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  *   3. Neither the name of the Intel Corporation nor the names of its
  *      contributors may be used to endorse or promote products derived from
  *      this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  *  POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * @file ice_rdma.c
  * @brief RDMA client driver interface
  *
  * Functions to interface with the RDMA client driver, for enabling RMDA
  * functionality for the ice driver.
  *
  * The RDMA client interface is based on a simple kobject interface which is
  * defined by the rmda_if.m and irdma_di_if.m interfaces.
  *
  * The ice device driver provides the rmda_di_if.m interface methods, while
  * the client RDMA driver provides the irdma_if.m interface methods as an
  * extension ontop of the irdma_di_if kobject.
  *
  * The initial connection between drivers is done via the RDMA client driver
  * calling ice_rdma_register.
  */
 
 #include "ice_iflib.h"
 #include "ice_rdma_internal.h"
 
 #include "irdma_if.h"
 #include "irdma_di_if.h"
 
 /**
  * @var ice_rdma
  * @brief global RDMA driver state
  *
  * Contains global state the driver uses to connect to a client RDMA interface
  * driver.
  */
 static struct ice_rdma_state ice_rdma;
 
 /*
  * Helper function prototypes
  */
 static int ice_rdma_pf_attach_locked(struct ice_softc *sc);
 static void ice_rdma_pf_detach_locked(struct ice_softc *sc);
 static int ice_rdma_check_version(struct ice_rdma_info *info);
 static void ice_rdma_cp_qos_info(struct ice_hw *hw,
 				 struct ice_dcbx_cfg *dcbx_cfg,
 				 struct ice_qos_params *qos_info);
 
 /*
  * RDMA Device Interface prototypes
  */
 static int ice_rdma_pf_reset(struct ice_rdma_peer *peer);
 static int ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
 				 struct ice_rdma_msix_mapping *msix_info);
 static int ice_rdma_qset_register_request(struct ice_rdma_peer *peer,
 			     struct ice_rdma_qset_update *res);
 static int ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer_dev,
 				      bool enable);
 static void ice_rdma_request_handler(struct ice_rdma_peer *peer,
 				     struct ice_rdma_request *req);
 
 
 /**
  * @var ice_rdma_di_methods
  * @brief RDMA driver interface methods
  *
  * Kobject methods implementing the driver-side interface for the RDMA peer
  * clients. This method table contains the operations which the client can
  * request from the driver.
  *
  * The client driver will then extend this kobject class with methods that the
  * driver can request from the client.
  */
 static kobj_method_t ice_rdma_di_methods[] = {
 	KOBJMETHOD(irdma_di_reset, ice_rdma_pf_reset),
 	KOBJMETHOD(irdma_di_msix_init, ice_rdma_pf_msix_init),
 	KOBJMETHOD(irdma_di_qset_register_request, ice_rdma_qset_register_request),
 	KOBJMETHOD(irdma_di_vsi_filter_update, ice_rdma_update_vsi_filter),
 	KOBJMETHOD(irdma_di_req_handler, ice_rdma_request_handler),
 	KOBJMETHOD_END
 };
 
 /* Define ice_rdma_di class which will be extended by the iRDMA driver */
 DEFINE_CLASS_0(ice_rdma_di, ice_rdma_di_class, ice_rdma_di_methods, sizeof(struct ice_rdma_peer));
 
 /**
  * ice_rdma_pf_reset - RDMA client interface requested a reset
  * @peer: the RDMA peer client structure
  *
  * Implements IRDMA_DI_RESET, called by the RDMA client driver to request
  * a reset of an ice driver device.
  */
 static int
 ice_rdma_pf_reset(struct ice_rdma_peer *peer)
 {
 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
 
-	/*
-	 * Request that the driver re-initialize by bringing the interface
-	 * down and up.
-	 */
-	ice_request_stack_reinit(sc);
+	/* Tell the base driver that RDMA is requesting a PFR */
+	ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
+
+	/* XXX: Base driver will notify RDMA when it's done */
 
 	return (0);
 }
 
 /**
  * ice_rdma_pf_msix_init - RDMA client interface request MSI-X initialization
  * @peer: the RDMA peer client structure
  * @msix_info: requested MSI-X mapping
  *
  * Implements IRDMA_DI_MSIX_INIT, called by the RDMA client driver to
  * initialize the MSI-X resources required for RDMA functionality.
  */
 static int
 ice_rdma_pf_msix_init(struct ice_rdma_peer *peer,
 		      struct ice_rdma_msix_mapping __unused *msix_info)
 {
 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
 
 	MPASS(msix_info != NULL);
 
 	device_printf(sc->dev, "%s: iRDMA MSI-X initialization request is not yet implemented\n", __func__);
 
 	/* TODO: implement MSI-X initialization for RDMA */
 	return (ENOSYS);
 }
 
 /**
  * ice_rdma_register_request - RDMA client interface request qset
  *                             registration or unregistration
  * @peer: the RDMA peer client structure
  * @res: resources to be registered or unregistered
  */
 static int
 ice_rdma_qset_register_request(struct ice_rdma_peer *peer, struct ice_rdma_qset_update *res)
 {
 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
 	struct ice_vsi *vsi = NULL;
 	struct ice_dcbx_cfg *dcbx_cfg;
 	struct ice_hw *hw = &sc->hw;
 	enum ice_status status;
 	int count, i, ret = 0;
 	uint32_t *qset_teid;
 	uint16_t *qs_handle;
 	uint16_t max_rdmaqs[ICE_MAX_TRAFFIC_CLASS];
 	uint16_t vsi_id;
 	uint8_t ena_tc = 0;
 
 	if (!res)
 		return -EINVAL;
 
 	if (res->cnt_req > ICE_MAX_TXQ_PER_TXQG)
 		return -EINVAL;
 
 	switch(res->res_type) {
 	case ICE_RDMA_QSET_ALLOC:
 		count = res->cnt_req;
 		vsi_id = peer->pf_vsi_num;
 		break;
 	case ICE_RDMA_QSET_FREE:
 		count = res->res_allocated;
 		vsi_id = res->qsets.vsi_id;
 		break;
 	default:
 		return -EINVAL;
 	}
 	qset_teid = (uint32_t *)ice_calloc(hw, count, sizeof(*qset_teid));
 	if (!qset_teid)
 		return -ENOMEM;
 
 	qs_handle = (uint16_t *)ice_calloc(hw, count, sizeof(*qs_handle));
 	if (!qs_handle) {
 		ice_free(hw, qset_teid);
 		return -ENOMEM;
 	}
 
 	ice_for_each_traffic_class(i)
 		max_rdmaqs[i] = 0;
 	for (i = 0; i < sc->num_available_vsi; i++) {
 		if (sc->all_vsi[i] &&
 		    ice_get_hw_vsi_num(hw, sc->all_vsi[i]->idx) == vsi_id) {
 			vsi = sc->all_vsi[i];
 			break;
 		}
 	}
 
 	if (!vsi) {
 		ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI\n");
 		ret = -EINVAL;
 		goto out;
 	}
 	if (sc != vsi->sc) {
 		ice_debug(hw, ICE_DBG_RDMA, "VSI is tied to unexpected device\n");
 		ret = -EXDEV;
 		goto out;
 	}
 
 	for (i = 0; i < count; i++) {
 		struct ice_rdma_qset_params *qset;
 
 		qset = &res->qsets;
 		if (qset->vsi_id != peer->pf_vsi_num) {
 			ice_debug(hw, ICE_DBG_RDMA, "RDMA QSet invalid VSI requested %d %d\n",
 				  qset->vsi_id, peer->pf_vsi_num);
 			ret = -EINVAL;
 			goto out;
 		}
 		max_rdmaqs[qset->tc]++;
 		qs_handle[i] = qset->qs_handle;
 		qset_teid[i] = qset->teid;
 	}
 
 	switch(res->res_type) {
 	case ICE_RDMA_QSET_ALLOC:
 		dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
 		ena_tc = ice_dcb_get_tc_map(dcbx_cfg);
 
 		ice_debug(hw, ICE_DBG_RDMA, "%s:%d ena_tc=%x\n", __func__, __LINE__, ena_tc);
 		status = ice_cfg_vsi_rdma(hw->port_info, vsi->idx, ena_tc,
 					  max_rdmaqs);
 		if (status) {
 			ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset config\n");
 			ret = -EINVAL;
 			goto out;
 		}
 
 		for (i = 0; i < count; i++) {
 			struct ice_rdma_qset_params *qset;
 
 			qset = &res->qsets;
 			status = ice_ena_vsi_rdma_qset(hw->port_info, vsi->idx,
 						       qset->tc, &qs_handle[i], 1,
 						       &qset_teid[i]);
 			if (status) {
 				ice_debug(hw, ICE_DBG_RDMA, "Failed VSI RDMA qset enable\n");
 				ret = -EINVAL;
 				goto out;
 			}
 			qset->teid = qset_teid[i];
 		}
 		break;
 	case ICE_RDMA_QSET_FREE:
 		status = ice_dis_vsi_rdma_qset(hw->port_info, count, qset_teid, qs_handle);
 		if (status)
 			ret = -EINVAL;
 		break;
 	default:
 		ret = -EINVAL;
 		break;
 	}
 
 out:
 	ice_free(hw, qs_handle);
 	ice_free(hw, qset_teid);
 
 	return ret;
 }
 
 /**
  *  ice_rdma_update_vsi_filter - configure vsi information
  *                               when opening or closing rdma driver
  *  @peer: the RDMA peer client structure
  *  @enable: enable or disable the rdma filter
  */
 static int
 ice_rdma_update_vsi_filter(struct ice_rdma_peer *peer,
 			   bool enable)
 {
 	struct ice_softc *sc = ice_rdma_peer_to_sc(peer);
 	struct ice_vsi *vsi;
 	int ret;
 
 	vsi = &sc->pf_vsi;
 	if (!vsi)
 		return -EINVAL;
 
 	ret = ice_cfg_iwarp_fltr(&sc->hw, vsi->idx, enable);
 	if (ret) {
 		device_printf(sc->dev, "Failed to  %sable iWARP filtering\n",
 				enable ? "en" : "dis");
 	} else {
 		if (enable)
 			vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
 		else
 			vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
 	}
 
 	return ret;
 }
 
 /**
  * ice_rdma_request_handler - handle requests incoming from RDMA driver
  * @peer: the RDMA peer client structure
  * @req: structure containing request
  */
 static void
 ice_rdma_request_handler(struct ice_rdma_peer *peer,
 			 struct ice_rdma_request *req)
 {
 	if (!req || !peer) {
 		log(LOG_WARNING, "%s: peer or req are not valid\n", __func__);
 		return;
 	}
 
 	switch(req->type) {
 	case ICE_RDMA_EVENT_RESET:
+		ice_rdma_pf_reset(peer);
 		break;
 	case ICE_RDMA_EVENT_QSET_REGISTER:
 		ice_rdma_qset_register_request(peer, &req->res);
 		break;
 	case ICE_RDMA_EVENT_VSI_FILTER_UPDATE:
 		ice_rdma_update_vsi_filter(peer, req->enable_filter);
 		break;
 	default:
 		log(LOG_WARNING, "%s: Event %d not supported\n", __func__, req->type);
 		break;
 	}
 }
 
 /**
  * ice_rdma_cp_qos_info - gather current QOS/DCB settings in LAN to pass
  *                        to RDMA driver
  * @hw: ice hw structure
  * @dcbx_cfg: current DCB settings in ice driver
  * @qos_info: destination of the DCB settings
  */
 static void
 ice_rdma_cp_qos_info(struct ice_hw *hw, struct ice_dcbx_cfg *dcbx_cfg,
 		     struct ice_qos_params *qos_info)
 {
 	u32 up2tc;
 	u8 j;
 	u8 num_tc = 0;
 	u8 val_tc = 0;  /* number of TC for validation */
 	u8 cnt_tc = 0;
 
 	/* setup qos_info fields with defaults */
 	qos_info->num_apps = 0;
 	qos_info->num_tc = 1;
 
 	for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
 		qos_info->up2tc[j] = 0;
 
 	qos_info->tc_info[0].rel_bw = 100;
 	for (j = 1; j < IEEE_8021QAZ_MAX_TCS; j++)
 		qos_info->tc_info[j].rel_bw = 0;
 
 	/* gather current values */
 	up2tc = rd32(hw, PRTDCB_TUP2TC);
 	qos_info->num_apps = dcbx_cfg->numapps;
 
 	for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
 		num_tc |= BIT(dcbx_cfg->etscfg.prio_table[j]);
 	}
 	for (j = 0; j < ICE_MAX_TRAFFIC_CLASS; j++) {
 		if (num_tc & BIT(j)) {
 			cnt_tc++;
 			val_tc |= BIT(j);
 		} else {
 			break;
 		}
 	}
 	qos_info->num_tc = (val_tc == num_tc && num_tc != 0) ? cnt_tc : 1;
 	for (j = 0; j < ICE_TC_MAX_USER_PRIORITY; j++)
 		qos_info->up2tc[j] = (up2tc >> (j * 3)) & 0x7;
 
 	for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++)
 		qos_info->tc_info[j].rel_bw = dcbx_cfg->etscfg.tcbwtable[j];
 	for (j = 0; j < qos_info->num_apps; j++) {
 		qos_info->apps[j].priority = dcbx_cfg->app[j].priority;
 		qos_info->apps[j].prot_id = dcbx_cfg->app[j].prot_id;
 		qos_info->apps[j].selector = dcbx_cfg->app[j].selector;
 	}
 
 	/* Gather DSCP-to-TC mapping and QoS/PFC mode */
 	memcpy(qos_info->dscp_map, dcbx_cfg->dscp_map, sizeof(qos_info->dscp_map));
 	qos_info->pfc_mode = dcbx_cfg->pfc_mode;
 }
 
 /**
  * ice_rdma_check_version - Check that the provided RDMA version is compatible
  * @info: the RDMA client information structure
  *
  * Verify that the client RDMA driver provided a version that is compatible
  * with the driver interface.
  */
 static int
 ice_rdma_check_version(struct ice_rdma_info *info)
 {
 	/* Make sure the MAJOR version matches */
 	if (info->major_version != ICE_RDMA_MAJOR_VERSION) {
 		log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports major version %d.x.x\n",
 		    __func__,
 		    info->major_version, info->minor_version, info->patch_version,
 		    ICE_RDMA_MAJOR_VERSION);
 		return (ENOTSUP);
 	}
 
 	/*
 	 * Make sure that the MINOR version is compatible.
 	 *
 	 * This means that the RDMA client driver version MUST not be greater
 	 * than the version provided by the driver, as it would indicate that
 	 * the RDMA client expects features which are not supported by the
 	 * main driver.
 	 */
 	if (info->minor_version > ICE_RDMA_MINOR_VERSION) {
 		log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to minor version %d.%d.x\n",
 		__func__,
 		info->major_version, info->minor_version, info->patch_version,
 		ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION);
 		return (ENOTSUP);
 	}
 
 	/*
 	 * Make sure that the PATCH version is compatible.
 	 *
 	 * This means that the RDMA client version MUST not be greater than
 	 * the version provided by the driver, as it may indicate that the
 	 * RDMA client expects certain backwards compatible bug fixes which
 	 * are not implemented by this version of the main driver.
 	 */
 	if ((info->minor_version == ICE_RDMA_MINOR_VERSION) &&
 	    (info->patch_version > ICE_RDMA_PATCH_VERSION)) {
 		log(LOG_WARNING, "%s: the iRDMA driver requested version %d.%d.%d, but this driver only supports up to patch version %d.%d.%d\n",
 		__func__,
 		info->major_version, info->minor_version, info->patch_version,
 		ICE_RDMA_MAJOR_VERSION, ICE_RDMA_MINOR_VERSION, ICE_RDMA_PATCH_VERSION);
 		return (ENOTSUP);
 	}
 
 	/* Make sure that the kobject class is initialized */
 	if (info->rdma_class == NULL) {
 		log(LOG_WARNING, "%s: the iRDMA driver did not specify a kobject interface\n",
 		    __func__);
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 /**
  * ice_rdma_register - Register an RDMA client driver
  * @info: the RDMA client information structure
  *
  * Called by the RDMA client driver on load. Used to initialize the RDMA
  * client driver interface and enable interop between the ice driver and the
  * RDMA client driver.
  *
  * The RDMA client driver must provide the version number it expects, along
  * with a pointer to a kobject class that extends the irdma_di_if class, and
  * implements the irdma_if class interface.
  */
 int
 ice_rdma_register(struct ice_rdma_info *info)
 {
 	struct ice_rdma_entry *entry;
 	struct ice_softc *sc;
 	int err = 0;
 
 	sx_xlock(&ice_rdma.mtx);
 
 	if (!ice_enable_irdma) {
 		log(LOG_INFO, "%s: The iRDMA driver interface has been disabled\n", __func__);
 		err = (ECONNREFUSED);
 		goto return_unlock;
 	}
 
 	if (ice_rdma.registered) {
 		log(LOG_WARNING, "%s: iRDMA driver already registered\n", __func__);
 		err = (EBUSY);
 		goto return_unlock;
 	}
 
 	/* Make sure the iRDMA version is compatible */
 	err = ice_rdma_check_version(info);
 	if (err)
 		goto return_unlock;
 
 	log(LOG_INFO, "%s: iRDMA driver registered using version %d.%d.%d\n",
 	    __func__, info->major_version, info->minor_version, info->patch_version);
 
 	ice_rdma.peer_class = info->rdma_class;
 
 	/*
 	 * Initialize the kobject interface and notify the RDMA client of each
 	 * existing PF interface.
 	 */
 	LIST_FOREACH(entry, &ice_rdma.peers, node) {
 		kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
 		/* Gather DCB/QOS info into peer */
 		sc = __containerof(entry, struct ice_softc, rdma_entry);
 		memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info));
 		ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg,
 				     &entry->peer.initial_qos_info);
 
 		IRDMA_PROBE(&entry->peer);
 		if (entry->initiated)
 			IRDMA_OPEN(&entry->peer);
 	}
 	ice_rdma.registered = true;
 
 return_unlock:
 	sx_xunlock(&ice_rdma.mtx);
 
 	return (err);
 }
 
 /**
  * ice_rdma_unregister - Unregister an RDMA client driver
  *
  * Called by the RDMA client driver on unload. Used to de-initialize the RDMA
  * client driver interface and shut down communication between the ice driver
  * and the RDMA client driver.
  */
 int
 ice_rdma_unregister(void)
 {
 	struct ice_rdma_entry *entry;
 
 	sx_xlock(&ice_rdma.mtx);
 
 	if (!ice_rdma.registered) {
 		log(LOG_WARNING, "%s: iRDMA driver was not previously registered\n",
 		       __func__);
 		sx_xunlock(&ice_rdma.mtx);
 		return (ENOENT);
 	}
 
 	log(LOG_INFO, "%s: iRDMA driver unregistered\n", __func__);
 	ice_rdma.registered = false;
 	ice_rdma.peer_class = NULL;
 
 	/*
 	 * Release the kobject interface for each of the existing PF
 	 * interfaces. Note that we do not notify the client about removing
 	 * each PF, as it is assumed that the client will have already cleaned
 	 * up any associated resources when it is unregistered.
 	 */
 	LIST_FOREACH(entry, &ice_rdma.peers, node)
 		kobj_delete((kobj_t)&entry->peer, NULL);
 
 	sx_xunlock(&ice_rdma.mtx);
 
 	return (0);
 }
 
 /**
  * ice_rdma_init - RDMA driver init routine
  *
  * Called during ice driver module initialization to setup the RDMA client
  * interface mutex and RDMA peer structure list.
  */
 void
 ice_rdma_init(void)
 {
 	LIST_INIT(&ice_rdma.peers);
 	sx_init_flags(&ice_rdma.mtx, "ice rdma interface", SX_DUPOK);
 
 	ice_rdma.registered = false;
 	ice_rdma.peer_class = NULL;
 }
 
 /**
  * ice_rdma_exit - RDMA driver exit routine
  *
  * Called during ice driver module exit to shutdown the RDMA client interface
  * mutex.
  */
 void
 ice_rdma_exit(void)
 {
 	MPASS(LIST_EMPTY(&ice_rdma.peers));
 	sx_destroy(&ice_rdma.mtx);
 }
 
 /**
  * ice_rdma_pf_attach_locked - Prepare a PF for RDMA connections
  * @sc: the ice driver softc
  *
  * Initialize a peer entry for this PF and add it to the RDMA interface list.
  * Notify the client RDMA driver of a new PF device.
  *
  * @pre must be called while holding the ice_rdma mutex.
  */
 static int
 ice_rdma_pf_attach_locked(struct ice_softc *sc)
 {
 	struct ice_rdma_entry *entry;
 
 	/* Do not attach the PF unless RDMA is supported */
 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA))
 		return (0);
 
 	entry = &sc->rdma_entry;
 	if (entry->attached) {
 		device_printf(sc->dev, "iRDMA peer entry already exists\n");
 		return (EEXIST);
 	}
 
 	entry->attached = true;
 	entry->peer.dev = sc->dev;
 	entry->peer.ifp = sc->ifp;
 	entry->peer.pf_id = sc->hw.pf_id;
 	entry->peer.pci_mem = sc->bar0.res;
 	entry->peer.pf_vsi_num = ice_get_hw_vsi_num(&sc->hw, sc->pf_vsi.idx);
 	if (sc->rdma_imap && sc->rdma_imap[0] != ICE_INVALID_RES_IDX &&
 	    sc->irdma_vectors > 0) {
 		entry->peer.msix.base = sc->rdma_imap[0];
 		entry->peer.msix.count = sc->irdma_vectors;
 	}
 
 	/* Gather DCB/QOS info into peer */
 	memset(&entry->peer.initial_qos_info, 0, sizeof(entry->peer.initial_qos_info));
 	ice_rdma_cp_qos_info(&sc->hw, &sc->hw.port_info->qos_cfg.local_dcbx_cfg,
 			     &entry->peer.initial_qos_info);
 
 	/*
 	 * If the RDMA client driver has already registered, initialize the
 	 * kobject and notify the client of a new PF
 	 */
 	if (ice_rdma.registered) {
 		kobj_init((kobj_t)&entry->peer, ice_rdma.peer_class);
 		IRDMA_PROBE(&entry->peer);
 	}
 
 	LIST_INSERT_HEAD(&ice_rdma.peers, entry, node);
 
 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_en);
 
 	return (0);
 }
 
 /**
  * ice_rdma_pf_attach - Notify the RDMA client of a new PF
  * @sc: the ice driver softc
  *
  * Called during PF attach to notify the RDMA client of a new PF.
  */
 int
 ice_rdma_pf_attach(struct ice_softc *sc)
 {
 	int err;
 
 	sx_xlock(&ice_rdma.mtx);
 	err = ice_rdma_pf_attach_locked(sc);
 	sx_xunlock(&ice_rdma.mtx);
 
 	return (err);
 }
 
 /**
  * ice_rdma_pf_detach_locked - Notify the RDMA client on PF detach
  * @sc: the ice driver softc
  *
  * Notify the RDMA peer client driver of removal of a PF, and release any
  * RDMA-specific resources associated with that PF. Remove the PF from the
  * list of available RDMA entries.
  *
  * @pre must be called while holding the ice_rdma mutex.
  */
 static void
 ice_rdma_pf_detach_locked(struct ice_softc *sc)
 {
 	struct ice_rdma_entry *entry;
 
 	/* No need to detach the PF if RDMA is not enabled */
 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RDMA))
 		return;
 
 	entry = &sc->rdma_entry;
 	if (!entry->attached) {
 		device_printf(sc->dev, "iRDMA peer entry was not attached\n");
 		return;
 	}
 
 	/*
 	 * If the RDMA client driver is registered, notify the client that
 	 * a PF has been removed, and release the kobject reference.
 	 */
 	if (ice_rdma.registered) {
 		IRDMA_REMOVE(&entry->peer);
 		kobj_delete((kobj_t)&entry->peer, NULL);
 	}
 
 	LIST_REMOVE(entry, node);
 	entry->attached = false;
 
 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_en);
 }
 
 /**
  * ice_rdma_pf_detach - Notify the RDMA client of a PF detaching
  * @sc: the ice driver softc
  *
  * Take the ice_rdma mutex and then notify the RDMA client that a PF has been
  * removed.
  */
 void
 ice_rdma_pf_detach(struct ice_softc *sc)
 {
 	sx_xlock(&ice_rdma.mtx);
 	ice_rdma_pf_detach_locked(sc);
 	sx_xunlock(&ice_rdma.mtx);
 }
 
 /**
  * ice_rdma_pf_init - Notify the RDMA client that a PF has initialized
  * @sc: the ice driver softc
  *
  * Called by the ice driver when a PF has been initialized. Notifies the RDMA
  * client that a PF is up and ready to operate.
  */
 int
 ice_rdma_pf_init(struct ice_softc *sc)
 {
 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
 
 	sx_xlock(&ice_rdma.mtx);
 
 	/* Update the MTU */
 	peer->mtu = sc->ifp->if_mtu;
 	sc->rdma_entry.initiated = true;
 
 	if (sc->rdma_entry.attached && ice_rdma.registered) {
 		sx_xunlock(&ice_rdma.mtx);
 		return IRDMA_OPEN(peer);
 	}
 
 	sx_xunlock(&ice_rdma.mtx);
 
 	return (0);
 }
 
 /**
  * ice_rdma_pf_stop - Notify the RDMA client of a stopped PF device
  * @sc: the ice driver softc
  *
  * Called by the ice driver when a PF is stopped. Notifies the RDMA client
  * driver that the PF has stopped and is not ready to operate.
  */
 int
 ice_rdma_pf_stop(struct ice_softc *sc)
 {
 	sx_xlock(&ice_rdma.mtx);
 
 	sc->rdma_entry.initiated = false;
 	if (sc->rdma_entry.attached && ice_rdma.registered) {
 		sx_xunlock(&ice_rdma.mtx);
 		return IRDMA_CLOSE(&sc->rdma_entry.peer);
 	}
 
 	sx_xunlock(&ice_rdma.mtx);
 
 	return (0);
 }
 
 /**
  * ice_rdma_link_change - Notify RDMA client of a change in link status
  * @sc: the ice driver softc
  * @linkstate: the link status
  * @baudrate: the link rate in bits per second
  *
  * Notify the RDMA client of a link status change, by sending it the new link
  * state and baudrate.
  *
  * The link state is represented the same was as in the ifnet structure. It
  * should be LINK_STATE_UNKNOWN, LINK_STATE_DOWN, or LINK_STATE_UP.
  */
 void
 ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate)
 {
 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
 	struct ice_rdma_event event;
 
 	memset(&event, 0, sizeof(struct ice_rdma_event));
 	event.type = ICE_RDMA_EVENT_LINK_CHANGE;
 	event.linkstate = linkstate;
 	event.baudrate = baudrate;
 
 	sx_xlock(&ice_rdma.mtx);
 
 	if (sc->rdma_entry.attached && ice_rdma.registered)
 		IRDMA_EVENT_HANDLER(peer, &event);
 
 	sx_xunlock(&ice_rdma.mtx);
 }
 
 /**
  *  ice_rdma_notify_dcb_qos_change - notify RDMA driver to pause traffic
  *  @sc: the ice driver softc
  *
  *  Notify the RDMA driver that QOS/DCB settings are about to change.
  *  Once the function return, all the QPs should be suspended.
  */
 void
 ice_rdma_notify_dcb_qos_change(struct ice_softc *sc)
 {
 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
 	struct ice_rdma_event event;
 
 	memset(&event, 0, sizeof(struct ice_rdma_event));
 	event.type = ICE_RDMA_EVENT_TC_CHANGE;
 	/* pre-event */
 	event.prep = true;
 
 	sx_xlock(&ice_rdma.mtx);
 	if (sc->rdma_entry.attached && ice_rdma.registered)
 		IRDMA_EVENT_HANDLER(peer, &event);
 	sx_xunlock(&ice_rdma.mtx);
 }
 
 /**
  *  ice_rdma_dcb_qos_update - pass the changed dcb settings to RDMA driver
  *  @sc: the ice driver softc
  *  @pi: the port info structure
  *
  *  Pass the changed DCB settings to RDMA traffic. This function should be
  *  called only after ice_rdma_notify_dcb_qos_change has been called and
  *  returned before. After the function returns, all the RDMA traffic
  *  should be resumed.
  */
 void
 ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi)
 {
 	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
 	struct ice_rdma_event event;
 
 	memset(&event, 0, sizeof(struct ice_rdma_event));
 	event.type = ICE_RDMA_EVENT_TC_CHANGE;
 	/* post-event */
 	event.prep = false;
 
 	/* gather current configuration */
 	ice_rdma_cp_qos_info(&sc->hw, &pi->qos_cfg.local_dcbx_cfg, &event.port_qos);
 	sx_xlock(&ice_rdma.mtx);
 	if (sc->rdma_entry.attached && ice_rdma.registered)
 		IRDMA_EVENT_HANDLER(peer, &event);
 	sx_xunlock(&ice_rdma.mtx);
 }
+
+/**
+ *  ice_rdma_notify_pe_intr - notify irdma on incoming interrupts regarding PE
+ *  @sc: the ice driver softc
+ *  @oicr: interrupt cause
+ *
+ *  Pass the information about received interrupt to RDMA driver if it was
+ *  relating to PE. Specifically PE_CRITERR and HMC_ERR.
+ *  The irdma driver shall decide what should be done upon these interrupts.
+ */
+void
+ice_rdma_notify_pe_intr(struct ice_softc *sc, uint32_t oicr)
+{
+	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
+	struct ice_rdma_event event;
+
+	memset(&event, 0, sizeof(struct ice_rdma_event));
+	event.type = ICE_RDMA_EVENT_CRIT_ERR;
+	event.oicr_reg = oicr;
+
+	sx_xlock(&ice_rdma.mtx);
+	if (sc->rdma_entry.attached && ice_rdma.registered)
+		IRDMA_EVENT_HANDLER(peer, &event);
+	sx_xunlock(&ice_rdma.mtx);
+}
+
+/**
+ *  ice_rdma_notify_reset - notify irdma on incoming pf-reset
+ *  @sc: the ice driver softc
+ *
+ *  Inform irdma driver of an incoming PF reset.
+ *  The irdma driver shall set its state to reset, and avoid using CQP
+ *  anymore. Next step should be to call ice_rdma_pf_stop in order to
+ *  remove resources.
+ */
+void
+ice_rdma_notify_reset(struct ice_softc *sc)
+{
+	struct ice_rdma_peer *peer = &sc->rdma_entry.peer;
+	struct ice_rdma_event event;
+
+	memset(&event, 0, sizeof(struct ice_rdma_event));
+	event.type = ICE_RDMA_EVENT_RESET;
+
+	sx_xlock(&ice_rdma.mtx);
+	if (sc->rdma_entry.attached && ice_rdma.registered)
+	        IRDMA_EVENT_HANDLER(peer, &event);
+	sx_xunlock(&ice_rdma.mtx);
+}
diff --git a/sys/dev/ice/ice_rdma.h b/sys/dev/ice/ice_rdma.h
index f83c30b33f6c..38e2ef491e8e 100644
--- a/sys/dev/ice/ice_rdma.h
+++ b/sys/dev/ice/ice_rdma.h
@@ -1,310 +1,311 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*  Copyright (c) 2023, Intel Corporation
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions are met:
  *
  *   1. Redistributions of source code must retain the above copyright notice,
  *      this list of conditions and the following disclaimer.
  *
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  *   3. Neither the name of the Intel Corporation nor the names of its
  *      contributors may be used to endorse or promote products derived from
  *      this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  *  POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * @file ice_rdma.h
  * @brief header file for RDMA client interface functions
  *
  * Contains definitions and function calls shared by the ice driver and the
  * RDMA client interface driver.
  *
  * Since these definitions are shared between drivers it is important that any
  * changes are considered carefully for backwards compatibility.
  */
 #ifndef _ICE_RDMA_H_
 #define _ICE_RDMA_H_
 
 /*
  * The RDMA client interface version is used to help determine
  * incompatibilities between the interface definition shared between the main
  * driver and the client driver.
  *
  * It will follows the semantic version guidelines, that is:
  * Given the version number MAJOR.MINOR.PATCH, increment the:
  *
  * MAJOR version when you make incompatible changes,
  * MINOR version when you add functionality in a backwards-compatible manner, and
  * PATCH version when you make backwards-compatible bug fixes.
  *
  * Any change to this file, or one of the kobject interface files must come
  * with an associated change in one of the MAJOR, MINOR, or PATCH versions,
  * and care must be taken that backwards incompatible changes MUST increment
  * the MAJOR version.
  *
  * Note: Until the MAJOR version is set to at least 1, the above semantic
  * version guarantees may not hold, and this interface should not be
  * considered stable.
  */
 #define ICE_RDMA_MAJOR_VERSION 1
-#define ICE_RDMA_MINOR_VERSION 0
+#define ICE_RDMA_MINOR_VERSION 1
 #define ICE_RDMA_PATCH_VERSION 0
 
 /**
  * @def ICE_RDMA_MAX_MSIX
  * @brief Maximum number of MSI-X vectors that will be reserved
  *
  * Defines the maximum number of MSI-X vectors that an RDMA interface will
  * have reserved in advance. Does not guarantee that many vectors have
  * actually been enabled.
  */
 #define ICE_RDMA_MAX_MSIX 64
 
 /**
  * @struct ice_rdma_info
  * @brief RDMA information from the client driver
  *
  * The RDMA client driver will fill in this structure and pass its contents
  * back to the main driver using the ice_rdma_register function.
  *
  * It should fill the version in with the ICE_RDMA_* versions as defined in
  * the ice_rdma.h header.
  *
  * Additionally it must provide a pointer to a kobject class which extends the
  * ice_rdma_di_class with the operations defined in the rdma_if.m interface.
  *
  * If the version specified is not compatible, then the registration will
  * of the RDMA driver will fail.
  */
 struct ice_rdma_info {
 	uint16_t major_version;
 	uint16_t minor_version;
 	uint16_t patch_version;
 
 	kobj_class_t rdma_class;
 };
 
 #define ICE_RDMA_MAX_USER_PRIORITY	8
 #define ICE_RDMA_MAX_MSIX		64
 
 /* Declare the ice_rdma_di kobject class */
 DECLARE_CLASS(ice_rdma_di_class);
 
 /**
  * @struct ice_rdma_msix_mapping
  * @brief MSI-X mapping requested by the peer RDMA driver
  *
  * Defines a mapping for MSI-X vectors being requested by the peer RDMA driver
  * for a given PF.
  */
 struct ice_rdma_msix_mapping {
 	uint8_t itr_indx;
 	int aeq_vector;
 	int ceq_cnt;
 	int *ceq_vector;
 };
 
 /**
  * @struct ice_rdma_msix
  * @brief RDMA MSI-X vectors reserved for the peer RDMA driver
  *
  * Defines the segment of the MSI-X vectors for use by the RDMA driver. These
  * are reserved by the PF when it initializes.
  */
 struct ice_rdma_msix {
 	int base;
 	int count;
 };
 
 /**
  * @struct ice_qos_info
  * @brief QoS information to be shared with RDMA driver
  */
 struct ice_qos_info {
 	uint64_t tc_ctx;
 	uint8_t rel_bw;
 	uint8_t prio_type;
 	uint8_t egress_virt_up;
 	uint8_t ingress_virt_up;
 };
 
 /**
  * @struct ice_qos_app_priority_table
  * @brief Application priority data
  */
 struct ice_qos_app_priority_table {
 	uint16_t prot_id;
 	uint8_t priority;
 	uint8_t selector;
 };
 
 #define IEEE_8021QAZ_MAX_TCS  8
 #define ICE_TC_MAX_USER_PRIORITY 8
 #define ICE_QOS_MAX_APPS 32
 #define ICE_QOS_DSCP_NUM_VAL 64
 
 /**
  * @struct ice_qos_params
  * @brief Holds all necessary data for RDMA to work with DCB
  *
  * Struct to hold QoS info
  */
 struct ice_qos_params {
 	struct ice_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
 	uint8_t up2tc[ICE_TC_MAX_USER_PRIORITY];
 	uint8_t vsi_relative_bw;
 	uint8_t vsi_priority_type;
 	uint32_t num_apps;
 	uint8_t pfc_mode;
 	uint8_t dscp_map[ICE_QOS_DSCP_NUM_VAL];
 	struct ice_qos_app_priority_table apps[ICE_QOS_MAX_APPS];
 	uint8_t num_tc;
 };
 
 /**
  * @struct ice_rdma_peer
  * @brief RDMA driver information
  *
  * Shared structure used by the RDMA client driver when talking with the main
  * device driver.
  *
  * Because the definition of this structure is shared between the two drivers,
  * its ABI should be handled carefully.
  */
 struct ice_rdma_peer {
 	/**
 	 * The KOBJ_FIELDS macro must come first, in order for it to be used
 	 * as a kobject.
 	 */
 	KOBJ_FIELDS;
 
 	struct ifnet *ifp;
 	device_t dev;
 	struct resource *pci_mem;
 	struct ice_qos_params initial_qos_info;
 	struct ice_rdma_msix msix;
 	uint16_t mtu;
 	uint16_t pf_vsi_num;
 	uint8_t pf_id;
 };
 
 /**
  * @enum ice_res_type
  * @brief enum for type of resource registration
  *
  * enum for type of resource registration.
  * created for plausible compatibility with IDC
  */
 enum ice_res_type {
 	ICE_INVAL_RES = 0x0,
 	ICE_RDMA_QSET_ALLOC = 0x8,
 	ICE_RDMA_QSET_FREE = 0x18,
 };
 
 /**
  * @struct ice_rdma_qset_params
  * @brief struct to hold per RDMA Qset info
  */
 struct ice_rdma_qset_params {
 	uint32_t teid;  /* qset TEID */
 	uint16_t qs_handle; /* RDMA driver provides this */
 	uint16_t vsi_id; /* VSI index */
 	uint8_t tc; /* TC branch the QSet should belong to */
 	uint8_t reserved[3];
 };
 
 #define ICE_MAX_TXQ_PER_TXQG 128
 /**
  * @struct ice_rdma_qset_update
  * @brief struct used to register and unregister qsets for RDMA driver
  */
 struct ice_rdma_qset_update {
 	enum ice_res_type res_type;
 	uint16_t cnt_req;
 	uint16_t res_allocated;
 	uint32_t res_handle;
 	struct ice_rdma_qset_params qsets;
 };
 
 /**
  * @enum ice_rdma_event_type
  * @brief enum for type of event from base driver
  */
 enum ice_rdma_event_type {
 	ICE_RDMA_EVENT_NONE = 0,
 	ICE_RDMA_EVENT_LINK_CHANGE,
 	ICE_RDMA_EVENT_MTU_CHANGE,
 	ICE_RDMA_EVENT_TC_CHANGE,
 	ICE_RDMA_EVENT_API_CHANGE,
 	ICE_RDMA_EVENT_CRIT_ERR,
 	ICE_RDMA_EVENT_RESET,
 	ICE_RDMA_EVENT_QSET_REGISTER,
 	ICE_RDMA_EVENT_VSI_FILTER_UPDATE,
 	ICE_RDMA_EVENT_LAST
 };
 
 /**
  * @struct ice_rdma_event
  * @brief struct for event information to pass to RDMA driver
  */
 struct ice_rdma_event {
 	enum ice_rdma_event_type type;
 	union {
 		/* link change event */
 		struct {
 			int linkstate;
 			uint64_t baudrate;
 		};
 		/* MTU change event */
-		struct {
-			int mtu;
-		};
+		int mtu;
 		/*
 		 * TC/QoS/DCB change event
-		 * RESET event use prep variable only
 		 * prep: if true, this is a pre-event, post-event otherwise
 		 */
 		struct {
 			struct ice_qos_params port_qos;
 			bool prep;
 		};
+		/*
+		 * CRIT_ERR event
+		 */
+		uint32_t oicr_reg;
 	};
 };
 
 /**
  * @struct ice_rdma_request
  * @brief struct with data for a request from the RDMA driver
  */
 struct ice_rdma_request {
 	enum ice_rdma_event_type type;
 	union {
 		struct {
 			struct ice_rdma_qset_update res;
 		};
 		struct {
 			bool enable_filter;
 		};
 	};
 };
 
 int ice_rdma_register(struct ice_rdma_info *info);
 int ice_rdma_unregister(void);
 
 #endif
diff --git a/sys/dev/ice/ice_rdma_internal.h b/sys/dev/ice/ice_rdma_internal.h
index c80d4540194a..b36544609f45 100644
--- a/sys/dev/ice/ice_rdma_internal.h
+++ b/sys/dev/ice/ice_rdma_internal.h
@@ -1,101 +1,103 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*  Copyright (c) 2023, Intel Corporation
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions are met:
  *
  *   1. Redistributions of source code must retain the above copyright notice,
  *      this list of conditions and the following disclaimer.
  *
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  *   3. Neither the name of the Intel Corporation nor the names of its
  *      contributors may be used to endorse or promote products derived from
  *      this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  *  POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * @file ice_rdma_internal.h
  * @brief internal header for the RMDA driver interface setup
  *
  * Contains the definitions and functions used by the ice driver to setup the
  * RDMA driver interface. Functions and definitions in this file are not
  * shared with the RDMA client driver.
  */
 #ifndef _ICE_RDMA_INTERNAL_H_
 #define _ICE_RDMA_INTERNAL_H_
 
 #include "ice_rdma.h"
 
 /* Forward declare the softc structure */
 struct ice_softc;
 
 /* Global sysctl variable indicating if the RDMA client interface is enabled */
 extern bool ice_enable_irdma;
 
 /**
  * @struct ice_rdma_entry
  * @brief RDMA peer list node
  *
  * Structure used to store peer entries for each PF in a linked list.
  */
 struct ice_rdma_entry {
 	LIST_ENTRY(ice_rdma_entry) node;
 	struct ice_rdma_peer peer;
 	bool attached;
 	bool initiated;
 };
 
 #define ice_rdma_peer_to_entry(p) __containerof(p, struct ice_rdma_entry, peer)
 #define ice_rdma_entry_to_sc(e) __containerof(e, struct ice_softc, rdma_entry)
 #define ice_rdma_peer_to_sc(p) ice_rdma_entry_to_sc(ice_rdma_peer_to_entry(p))
 
 /**
  * @struct ice_rdma_peers
  * @brief Head list structure for the RDMA entry list
  *
  * Type defining the head of the linked list of RDMA entries.
  */
 LIST_HEAD(ice_rdma_peers, ice_rdma_entry);
 
 /**
  * @struct ice_rdma_state
  * @brief global driver state for RDMA
  *
  * Contains global state shared across all PFs by the device driver, such as
  * the kobject class of the currently connected peer driver, and the linked
  * list of peer entries for each PF.
  */
 struct ice_rdma_state {
 	bool registered;
 	kobj_class_t peer_class;
 	struct sx mtx;
 	struct ice_rdma_peers peers;
 };
 
 void ice_rdma_init(void);
 void ice_rdma_exit(void);
 
 int  ice_rdma_pf_attach(struct ice_softc *sc);
 void ice_rdma_pf_detach(struct ice_softc *sc);
 int  ice_rdma_pf_init(struct ice_softc *sc);
 int  ice_rdma_pf_stop(struct ice_softc *sc);
 void ice_rdma_link_change(struct ice_softc *sc, int linkstate, uint64_t baudrate);
 void ice_rdma_notify_dcb_qos_change(struct ice_softc *sc);
 void ice_rdma_dcb_qos_update(struct ice_softc *sc, struct ice_port_info *pi);
+void ice_rdma_notify_pe_intr(struct ice_softc *sc, uint32_t oicr);
+void ice_rdma_notify_reset(struct ice_softc *sc);
 #endif
diff --git a/sys/dev/ice/if_ice_iflib.c b/sys/dev/ice/if_ice_iflib.c
index be1aa86234c7..d8542e383564 100644
--- a/sys/dev/ice/if_ice_iflib.c
+++ b/sys/dev/ice/if_ice_iflib.c
@@ -1,3106 +1,3105 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*  Copyright (c) 2023, Intel Corporation
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions are met:
  *
  *   1. Redistributions of source code must retain the above copyright notice,
  *      this list of conditions and the following disclaimer.
  *
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  *   3. Neither the name of the Intel Corporation nor the names of its
  *      contributors may be used to endorse or promote products derived from
  *      this software without specific prior written permission.
  *
  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  *  POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * @file if_ice_iflib.c
  * @brief iflib driver implementation
  *
  * Contains the main entry point for the iflib driver implementation. It
  * implements the various ifdi driver methods, and sets up the module and
  * driver values to load an iflib driver.
  */
 
 #include "ice_iflib.h"
 #include "ice_drv_info.h"
 #include "ice_switch.h"
 #include "ice_sched.h"
 
 #include <sys/module.h>
 #include <sys/sockio.h>
 #include <sys/smp.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 /*
  * Device method prototypes
  */
 
 static void *ice_register(device_t);
 static int  ice_if_attach_pre(if_ctx_t);
 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
 static int  ice_if_attach_post(if_ctx_t);
 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
 static int  ice_if_detach(if_ctx_t);
 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
 static void ice_if_queues_free(if_ctx_t ctx);
 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
 static void ice_if_intr_enable(if_ctx_t ctx);
 static void ice_if_intr_disable(if_ctx_t ctx);
 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
 static int ice_if_media_change(if_ctx_t ctx);
 static void ice_if_init(if_ctx_t ctx);
 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
 static void ice_if_update_admin_status(if_ctx_t ctx);
 static void ice_if_multi_set(if_ctx_t ctx);
 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
 static void ice_if_stop(if_ctx_t ctx);
 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
 static int ice_if_suspend(if_ctx_t ctx);
 static int ice_if_resume(if_ctx_t ctx);
 static bool ice_if_needs_restart(if_ctx_t, enum iflib_restart_event);
 
 static int ice_msix_que(void *arg);
 static int ice_msix_admin(void *arg);
 
 /*
  * Helper function prototypes
  */
 static int ice_pci_mapping(struct ice_softc *sc);
 static void ice_free_pci_mapping(struct ice_softc *sc);
 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
 static void ice_init_device_features(struct ice_softc *sc);
 static void ice_init_tx_tracking(struct ice_vsi *vsi);
 static void ice_handle_reset_event(struct ice_softc *sc);
 static void ice_handle_pf_reset_request(struct ice_softc *sc);
 static void ice_prepare_for_reset(struct ice_softc *sc);
 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
 static void ice_rebuild(struct ice_softc *sc);
 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
 static void ice_free_irqvs(struct ice_softc *sc);
 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
 static void ice_poll_for_media_avail(struct ice_softc *sc);
 static void ice_setup_scctx(struct ice_softc *sc);
 static int ice_allocate_msix(struct ice_softc *sc);
 static void ice_admin_timer(void *arg);
 static void ice_transition_recovery_mode(struct ice_softc *sc);
 static void ice_transition_safe_mode(struct ice_softc *sc);
 
 /*
  * Device Interface Declaration
  */
 
 /**
  * @var ice_methods
  * @brief ice driver method entry points
  *
  * List of device methods implementing the generic device interface used by
  * the device stack to interact with the ice driver. Since this is an iflib
  * driver, most of the methods point to the generic iflib implementation.
  */
 static device_method_t ice_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_register, ice_register),
 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
 	DEVMETHOD(device_attach,   iflib_device_attach),
 	DEVMETHOD(device_detach,   iflib_device_detach),
 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
 	DEVMETHOD(device_suspend,  iflib_device_suspend),
 	DEVMETHOD(device_resume,   iflib_device_resume),
 	DEVMETHOD_END
 };
 
 /**
  * @var ice_iflib_methods
  * @brief iflib method entry points
  *
  * List of device methods used by the iflib stack to interact with this
  * driver. These are the real main entry points used to interact with this
  * driver.
  */
 static device_method_t ice_iflib_methods[] = {
 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
 	DEVMETHOD(ifdi_detach, ice_if_detach),
 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
 	DEVMETHOD(ifdi_init, ice_if_init),
 	DEVMETHOD(ifdi_stop, ice_if_stop),
 	DEVMETHOD(ifdi_timer, ice_if_timer),
 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
 	DEVMETHOD(ifdi_resume, ice_if_resume),
 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
 	DEVMETHOD_END
 };
 
 /**
  * @var ice_driver
  * @brief driver structure for the generic device stack
  *
  * driver_t definition used to setup the generic device methods.
  */
 static driver_t ice_driver = {
 	.name = "ice",
 	.methods = ice_methods,
 	.size = sizeof(struct ice_softc),
 };
 
 /**
  * @var ice_iflib_driver
  * @brief driver structure for the iflib stack
  *
  * driver_t definition used to setup the iflib device methods.
  */
 static driver_t ice_iflib_driver = {
 	.name = "ice",
 	.methods = ice_iflib_methods,
 	.size = sizeof(struct ice_softc),
 };
 
 extern struct if_txrx ice_txrx;
 extern struct if_txrx ice_recovery_txrx;
 
 /**
  * @var ice_sctx
  * @brief ice driver shared context
  *
  * Structure defining shared values (context) that is used by all instances of
  * the device. Primarily used to setup details about how the iflib stack
  * should treat this driver. Also defines the default, minimum, and maximum
  * number of descriptors in each ring.
  */
 static struct if_shared_ctx ice_sctx = {
 	.isc_magic = IFLIB_MAGIC,
 	.isc_q_align = PAGE_SIZE,
 
 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
 	 * that doesn't make sense since that would be larger than the maximum
 	 * size of a single packet.
 	 */
 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
 
 	/* XXX: This is only used by iflib to ensure that
 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
 	 */
 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
 	/* XXX: This is used by iflib to set the number of segments in the TSO
 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
 	 * related ifnet parameter.
 	 */
 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
 
 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
 
 	.isc_nfl = 1,
 	.isc_ntxqs = 1,
 	.isc_nrxqs = 1,
 
 	.isc_admin_intrcnt = 1,
 	.isc_vendor_info = ice_vendor_info_array,
 	.isc_driver_version = __DECONST(char *, ice_driver_version),
 	.isc_driver = &ice_iflib_driver,
 
 	/*
 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
 	 * for hardware checksum offload
 	 *
 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
 	 * IP sum field, required by our hardware to calculate valid TSO
 	 * checksums.
 	 *
 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
 	 * even when the interface is down.
 	 *
 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
 	 * vectors manually instead of relying on iflib code to do this.
 	 */
 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
 
 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
 };
 
 /**
  * @var ice_devclass
  * @brief ice driver device class
  *
  * device class used to setup the ice driver module kobject class.
  */
 devclass_t ice_devclass;
 DRIVER_MODULE(ice, pci, ice_driver, ice_devclass, ice_module_event_handler, 0);
 
 MODULE_VERSION(ice, 1);
 MODULE_DEPEND(ice, pci, 1, 1, 1);
 MODULE_DEPEND(ice, ether, 1, 1, 1);
 MODULE_DEPEND(ice, iflib, 1, 1, 1);
 
 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
 
 /* Static driver-wide sysctls */
 #include "ice_iflib_sysctls.h"
 
 /**
  * ice_pci_mapping - Map PCI BAR memory
  * @sc: device private softc
  *
  * Map PCI BAR 0 for device operation.
  */
 static int
 ice_pci_mapping(struct ice_softc *sc)
 {
 	int rc;
 
 	/* Map BAR0 */
 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
 	if (rc)
 		return rc;
 
 	return 0;
 }
 
 /**
  * ice_free_pci_mapping - Release PCI BAR memory
  * @sc: device private softc
  *
  * Release PCI BARs which were previously mapped by ice_pci_mapping().
  */
 static void
 ice_free_pci_mapping(struct ice_softc *sc)
 {
 	/* Free BAR0 */
 	ice_free_bar(sc->dev, &sc->bar0);
 }
 
 /*
  * Device methods
  */
 
 /**
  * ice_register - register device method callback
  * @dev: the device being registered
  *
  * Returns a pointer to the shared context structure, which is used by iflib.
  */
 static void *
 ice_register(device_t dev __unused)
 {
 	return &ice_sctx;
 } /* ice_register */
 
 /**
  * ice_setup_scctx - Setup the iflib softc context structure
  * @sc: the device private structure
  *
  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
  * when loading.
  */
 static void
 ice_setup_scctx(struct ice_softc *sc)
 {
 	if_softc_ctx_t scctx = sc->scctx;
 	struct ice_hw *hw = &sc->hw;
 	bool safe_mode, recovery_mode;
 
 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
 
 	/*
 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
 	 * a single queue pair.
 	 */
 	if (safe_mode || recovery_mode) {
 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
 		scctx->isc_ntxqsets_max = 1;
 		scctx->isc_nrxqsets_max = 1;
 	} else {
 		/*
 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
 		 * the values of the override sysctls. Cache these initial
 		 * values so that the driver can be aware of what the iflib
 		 * sysctl value is when setting up MSI-X vectors.
 		 */
 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
 
 		if (scctx->isc_ntxqsets == 0)
 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
 		if (scctx->isc_nrxqsets == 0)
 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
 
 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
 
 		/*
 		 * Sanity check that the iflib sysctl values are within the
 		 * maximum supported range.
 		 */
 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
 	}
 
 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
 
 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
 
 	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
 
 	/*
 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
 	 */
 	if (recovery_mode)
 		scctx->isc_txrx = &ice_recovery_txrx;
 	else
 		scctx->isc_txrx = &ice_txrx;
 
 	/*
 	 * If the driver loads in Safe mode or Recovery mode, disable
 	 * advanced features including hardware offloads.
 	 */
 	if (safe_mode || recovery_mode) {
 		scctx->isc_capenable = ICE_SAFE_CAPS;
 		scctx->isc_tx_csum_flags = 0;
 	} else {
 		scctx->isc_capenable = ICE_FULL_CAPS;
 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
 	}
 
 	scctx->isc_capabilities = scctx->isc_capenable;
 } /* ice_setup_scctx */
 
 /**
  * ice_if_attach_pre - Early device attach logic
  * @ctx: the iflib context structure
  *
  * Called by iflib during the attach process. Earliest main driver entry
  * point which performs necessary hardware and driver initialization. Called
  * before the Tx and Rx queues are allocated.
  */
 static int
 ice_if_attach_pre(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	enum ice_fw_modes fw_mode;
 	enum ice_status status;
 	if_softc_ctx_t scctx;
 	struct ice_hw *hw;
 	device_t dev;
 	int err;
 
 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
 
 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
 
 	sc->ctx = ctx;
 	sc->media = iflib_get_media(ctx);
 	sc->sctx = iflib_get_sctx(ctx);
 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
 
 	dev = sc->dev = iflib_get_dev(ctx);
 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
 
 	hw = &sc->hw;
 	hw->back = sc;
 
 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
 		 "%s:admin", device_get_nameunit(dev));
 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
 
 	ASSERT_CTX_LOCKED(sc);
 
 	if (ice_pci_mapping(sc)) {
 		err = (ENXIO);
 		goto destroy_admin_timer;
 	}
 
 	/* Save off the PCI information */
 	ice_save_pci_info(hw, dev);
 
 	/* create tunables as early as possible */
 	ice_add_device_tunables(sc);
 
 	/* Setup ControlQ lengths */
 	ice_set_ctrlq_len(hw);
 
 reinit_hw:
 
 	fw_mode = ice_get_fw_mode(hw);
 	if (fw_mode == ICE_FW_MODE_REC) {
 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
 
 		err = ice_attach_pre_recovery_mode(sc);
 		if (err)
 			goto free_pci_mapping;
 
 		return (0);
 	}
 
 	/* Initialize the hw data structure */
 	status = ice_init_hw(hw);
 	if (status) {
 		if (status == ICE_ERR_FW_API_VER) {
 			/* Enter recovery mode, so that the driver remains
 			 * loaded. This way, if the system administrator
 			 * cannot update the driver, they may still attempt to
 			 * downgrade the NVM.
 			 */
 			err = ice_attach_pre_recovery_mode(sc);
 			if (err)
 				goto free_pci_mapping;
 
 			return (0);
 		} else {
 			err = EIO;
 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
 				      ice_status_str(status),
 				      ice_aq_str(hw->adminq.sq_last_status));
 		}
 		goto free_pci_mapping;
 	}
 
 	ice_init_device_features(sc);
 
 	/* Notify firmware of the device driver version */
 	err = ice_send_version(sc);
 	if (err)
 		goto deinit_hw;
 
 	/*
 	 * Success indicates a change was made that requires a reinitialization
 	 * of the hardware
 	 */
 	err = ice_load_pkg_file(sc);
 	if (err == ICE_SUCCESS) {
 		ice_deinit_hw(hw);
 		goto reinit_hw;
 	}
 
 	err = ice_init_link_events(sc);
 	if (err) {
 		device_printf(dev, "ice_init_link_events failed: %s\n",
 			      ice_err_str(err));
 		goto deinit_hw;
 	}
 
 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
 	 * and firmware, this will force them to use single VLAN mode.
 	 */
 	status = ice_set_vlan_mode(hw);
 	if (status) {
 		err = EIO;
 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
 			      ice_status_str(status),
 			      ice_aq_str(hw->adminq.sq_last_status));
 		goto deinit_hw;
 	}
 
 	ice_print_nvm_version(sc);
 
 	/* Setup the MAC address */
 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
 
 	/* Setup the iflib softc context structure */
 	ice_setup_scctx(sc);
 
 	/* Initialize the Tx queue manager */
 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
 	if (err) {
 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
 			      ice_err_str(err));
 		goto deinit_hw;
 	}
 
 	/* Initialize the Rx queue manager */
 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
 	if (err) {
 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
 			      ice_err_str(err));
 		goto free_tx_qmgr;
 	}
 
 	/* Initialize the interrupt resource manager */
 	err = ice_alloc_intr_tracking(sc);
 	if (err)
 		/* Errors are already printed */
 		goto free_rx_qmgr;
 
 	/* Determine maximum number of VSIs we'll prepare for */
 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
 				    hw->func_caps.guar_num_vsi);
 
 	if (!sc->num_available_vsi) {
 		err = EIO;
 		device_printf(dev, "No VSIs allocated to host\n");
 		goto free_intr_tracking;
 	}
 
 	/* Allocate storage for the VSI pointers */
 	sc->all_vsi = (struct ice_vsi **)
 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
 		       M_ICE, M_WAITOK | M_ZERO);
 	if (!sc->all_vsi) {
 		err = ENOMEM;
 		device_printf(dev, "Unable to allocate VSI array\n");
 		goto free_intr_tracking;
 	}
 
 	/*
 	 * Prepare the statically allocated primary PF VSI in the softc
 	 * structure. Other VSIs will be dynamically allocated as needed.
 	 */
 	ice_setup_pf_vsi(sc);
 
 	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
 	    scctx->isc_nrxqsets_max);
 	if (err) {
 		device_printf(dev, "Unable to allocate VSI Queue maps\n");
 		goto free_main_vsi;
 	}
 
 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
 	err = ice_allocate_msix(sc);
 	if (err)
 		goto free_main_vsi;
 
 	return 0;
 
 free_main_vsi:
 	/* ice_release_vsi will free the queue maps if they were allocated */
 	ice_release_vsi(&sc->pf_vsi);
 	free(sc->all_vsi, M_ICE);
 	sc->all_vsi = NULL;
 free_intr_tracking:
 	ice_free_intr_tracking(sc);
 free_rx_qmgr:
 	ice_resmgr_destroy(&sc->rx_qmgr);
 free_tx_qmgr:
 	ice_resmgr_destroy(&sc->tx_qmgr);
 deinit_hw:
 	ice_deinit_hw(hw);
 free_pci_mapping:
 	ice_free_pci_mapping(sc);
 destroy_admin_timer:
 	mtx_lock(&sc->admin_mtx);
 	callout_stop(&sc->admin_timer);
 	mtx_unlock(&sc->admin_mtx);
 	mtx_destroy(&sc->admin_mtx);
 	return err;
 } /* ice_if_attach_pre */
 
 /**
  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
  * @sc: the device private softc
  *
  * Loads the device driver in limited Firmware Recovery mode, intended to
  * allow users to update the firmware to attempt to recover the device.
  *
  * @remark We may enter recovery mode in case either (a) the firmware is
  * detected to be in an invalid state and must be re-programmed, or (b) the
  * driver detects that the loaded firmware has a non-compatible API version
  * that the driver cannot operate with.
  */
 static int
 ice_attach_pre_recovery_mode(struct ice_softc *sc)
 {
 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
 
 	/* Setup the iflib softc context */
 	ice_setup_scctx(sc);
 
 	/* Setup the PF VSI back pointer */
 	sc->pf_vsi.sc = sc;
 
 	/*
 	 * We still need to allocate MSI-X vectors since we need one vector to
 	 * run the administrative admin interrupt
 	 */
 	return ice_allocate_msix(sc);
 }
 
 /**
  * ice_update_link_status - notify OS of link state change
  * @sc: device private softc structure
  * @update_media: true if we should update media even if link didn't change
  *
  * Called to notify iflib core of link status changes. Should be called once
  * during attach_post, and whenever link status changes during runtime.
  *
  * This call only updates the currently supported media types if the link
  * status changed, or if update_media is set to true.
  */
 static void
 ice_update_link_status(struct ice_softc *sc, bool update_media)
 {
 	struct ice_hw *hw = &sc->hw;
 	enum ice_status status;
 
 	/* Never report link up when in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	/* Report link status to iflib only once each time it changes */
 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
 		if (sc->link_up) { /* link is up */
 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
 
 			ice_set_default_local_lldp_mib(sc);
 
 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
 
 			ice_link_up_msg(sc);
 		} else { /* link is down */
 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
 		}
 		update_media = true;
 	}
 
 	/* Update the supported media types */
 	if (update_media) {
 		status = ice_add_media_types(sc, sc->media);
 		if (status)
 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
 				      ice_status_str(status),
 				      ice_aq_str(hw->adminq.sq_last_status));
 	}
 }
 
 /**
  * ice_if_attach_post - Late device attach logic
  * @ctx: the iflib context structure
  *
  * Called by iflib to finish up attaching the device. Performs any attach
  * logic which must wait until after the Tx and Rx queues have been
  * allocated.
  */
 static int
 ice_if_attach_post(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	if_t ifp = iflib_get_ifp(ctx);
 	int err;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* We don't yet support loading if MSI-X is not supported */
 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
 		return (ENOTSUP);
 	}
 
 	/* The ifnet structure hasn't yet been initialized when the attach_pre
 	 * handler is called, so wait until attach_post to setup the
 	 * isc_max_frame_size.
 	 */
 
 	sc->ifp = ifp;
 	sc->scctx->isc_max_frame_size = ifp->if_mtu +
 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
 
 	/*
 	 * If we are in recovery mode, only perform a limited subset of
 	 * initialization to support NVM recovery.
 	 */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
 		ice_attach_post_recovery_mode(sc);
 		return (0);
 	}
 
 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
 
 	err = ice_initialize_vsi(&sc->pf_vsi);
 	if (err) {
 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
 			      ice_err_str(err));
 		return err;
 	}
 
 	/* Enable FW health event reporting */
 	ice_init_health_events(sc);
 
 	/* Configure the main PF VSI for RSS */
 	err = ice_config_rss(&sc->pf_vsi);
 	if (err) {
 		device_printf(sc->dev,
 			      "Unable to configure RSS for the main VSI, err %s\n",
 			      ice_err_str(err));
 		return err;
 	}
 
 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
 	err = ice_cfg_pf_ethertype_filters(sc);
 	if (err)
 		return err;
 
 	ice_get_and_print_bus_info(sc);
 
 	ice_set_link_management_mode(sc);
 
 	ice_init_saved_phy_cfg(sc);
 
 	ice_cfg_pba_num(sc);
 
 	ice_add_device_sysctls(sc);
 
 	/* Get DCBX/LLDP state and start DCBX agent */
 	ice_init_dcb_setup(sc);
 
 	/* Setup link configuration parameters */
 	ice_init_link_configuration(sc);
 	ice_update_link_status(sc, true);
 
 	/* Configure interrupt causes for the administrative interrupt */
 	ice_configure_misc_interrupts(sc);
 
 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
 
 	err = ice_rdma_pf_attach(sc);
 	if (err)
 		return (err);
 
 	/* Start the admin timer */
 	mtx_lock(&sc->admin_mtx);
 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
 	mtx_unlock(&sc->admin_mtx);
 
 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
 
 	return 0;
 } /* ice_if_attach_post */
 
 /**
  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
  * @sc: the device private softc
  *
  * Performs minimal work to prepare the driver to recover an NVM in case the
  * firmware is in recovery mode.
  */
 static void
 ice_attach_post_recovery_mode(struct ice_softc *sc)
 {
 	/* Configure interrupt causes for the administrative interrupt */
 	ice_configure_misc_interrupts(sc);
 
 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
 
 	/* Start the admin timer */
 	mtx_lock(&sc->admin_mtx);
 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
 	mtx_unlock(&sc->admin_mtx);
 
 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
 }
 
 /**
  * ice_free_irqvs - Free IRQ vector memory
  * @sc: the device private softc structure
  *
  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
  */
 static void
 ice_free_irqvs(struct ice_softc *sc)
 {
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	if_ctx_t ctx = sc->ctx;
 	int i;
 
 	/* If the irqvs array is NULL, then there are no vectors to free */
 	if (sc->irqvs == NULL)
 		return;
 
 	/* Free the IRQ vectors */
 	for (i = 0; i < sc->num_irq_vectors; i++)
 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
 
 	/* Clear the irqv pointers */
 	for (i = 0; i < vsi->num_rx_queues; i++)
 		vsi->rx_queues[i].irqv = NULL;
 
 	for (i = 0; i < vsi->num_tx_queues; i++)
 		vsi->tx_queues[i].irqv = NULL;
 
 	/* Release the vector array memory */
 	free(sc->irqvs, M_ICE);
 	sc->irqvs = NULL;
 	sc->num_irq_vectors = 0;
 }
 
 /**
  * ice_if_detach - Device driver detach logic
  * @ctx: iflib context structure
  *
  * Perform device shutdown logic to detach the device driver.
  *
  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
  * ice_if_detach(). It is possible for the functions to be called in either
  * order, and they must not assume to have a strict ordering.
  */
 static int
 ice_if_detach(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	int i;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Indicate that we're detaching */
 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
 
 	/* Stop the admin timer */
 	mtx_lock(&sc->admin_mtx);
 	callout_stop(&sc->admin_timer);
 	mtx_unlock(&sc->admin_mtx);
 	mtx_destroy(&sc->admin_mtx);
 
 	ice_rdma_pf_detach(sc);
 
 	/* Free allocated media types */
 	ifmedia_removeall(sc->media);
 
 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
 	 * pointers. Note, the calls here and those in ice_if_queues_free()
 	 * are *BOTH* necessary, as we cannot guarantee which path will be
 	 * run first
 	 */
 	ice_vsi_del_txqs_ctx(vsi);
 	ice_vsi_del_rxqs_ctx(vsi);
 
 	/* Release MSI-X resources */
 	ice_free_irqvs(sc);
 
 	for (i = 0; i < sc->num_available_vsi; i++) {
 		if (sc->all_vsi[i])
 			ice_release_vsi(sc->all_vsi[i]);
 	}
 
 	if (sc->all_vsi) {
 		free(sc->all_vsi, M_ICE);
 		sc->all_vsi = NULL;
 	}
 
 	/* Release MSI-X memory */
 	pci_release_msi(sc->dev);
 
 	if (sc->msix_table != NULL) {
 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
 				     rman_get_rid(sc->msix_table),
 				     sc->msix_table);
 		sc->msix_table = NULL;
 	}
 
 	ice_free_intr_tracking(sc);
 
 	/* Destroy the queue managers */
 	ice_resmgr_destroy(&sc->tx_qmgr);
 	ice_resmgr_destroy(&sc->rx_qmgr);
 
 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		ice_deinit_hw(&sc->hw);
 
 	ice_free_pci_mapping(sc);
 
 	return 0;
 } /* ice_if_detach */
 
 /**
  * ice_if_tx_queues_alloc - Allocate Tx queue memory
  * @ctx: iflib context structure
  * @vaddrs: virtual addresses for the queue memory
  * @paddrs: physical addresses for the queue memory
  * @ntxqs: the number of Tx queues per set (should always be 1)
  * @ntxqsets: the number of Tx queue sets to allocate
  *
  * Called by iflib to allocate Tx queues for the device. Allocates driver
  * memory to track each queue, the status arrays used for descriptor
  * status reporting, and Tx queue sysctls.
  */
 static int
 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
 		       int __invariant_only ntxqs, int ntxqsets)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	struct ice_tx_queue *txq;
 	int err, i, j;
 
 	MPASS(ntxqs == 1);
 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Do not bother allocating queues if we're in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (0);
 
 	/* Allocate queue structure memory */
 	if (!(vsi->tx_queues =
 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
 		return (ENOMEM);
 	}
 
 	/* Allocate report status arrays */
 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
 		if (!(txq->tx_rsq =
 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
 			err = ENOMEM;
 			goto free_tx_queues;
 		}
 		/* Initialize report status array */
 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
 			txq->tx_rsq[j] = QIDX_INVALID;
 	}
 
 	/* Assign queues from PF space to the main VSI */
 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
 	if (err) {
 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
 			      ice_err_str(err));
 		goto free_tx_queues;
 	}
 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
 
 	/* Add Tx queue sysctls context */
 	ice_vsi_add_txqs_ctx(vsi);
 
 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
 		/* q_handle == me when only one TC */
 		txq->me = txq->q_handle = i;
 		txq->vsi = vsi;
 
 		/* store the queue size for easier access */
 		txq->desc_count = sc->scctx->isc_ntxd[0];
 
 		/* get the virtual and physical address of the hardware queues */
 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
 		txq->tx_paddr = paddrs[i];
 
 		ice_add_txq_sysctls(txq);
 	}
 
 	vsi->num_tx_queues = ntxqsets;
 
 	return (0);
 
 free_tx_queues:
 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
 		if (txq->tx_rsq != NULL) {
 			free(txq->tx_rsq, M_ICE);
 			txq->tx_rsq = NULL;
 		}
 	}
 	free(vsi->tx_queues, M_ICE);
 	vsi->tx_queues = NULL;
 	return err;
 }
 
 /**
  * ice_if_rx_queues_alloc - Allocate Rx queue memory
  * @ctx: iflib context structure
  * @vaddrs: virtual addresses for the queue memory
  * @paddrs: physical addresses for the queue memory
  * @nrxqs: number of Rx queues per set (should always be 1)
  * @nrxqsets: number of Rx queue sets to allocate
  *
  * Called by iflib to allocate Rx queues for the device. Allocates driver
  * memory to track each queue, as well as sets up the Rx queue sysctls.
  */
 static int
 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
 		       int __invariant_only nrxqs, int nrxqsets)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	struct ice_rx_queue *rxq;
 	int err, i;
 
 	MPASS(nrxqs == 1);
 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Do not bother allocating queues if we're in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (0);
 
 	/* Allocate queue structure memory */
 	if (!(vsi->rx_queues =
 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
 		return (ENOMEM);
 	}
 
 	/* Assign queues from PF space to the main VSI */
 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
 	if (err) {
 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
 			      ice_err_str(err));
 		goto free_rx_queues;
 	}
 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
 
 	/* Add Rx queue sysctls context */
 	ice_vsi_add_rxqs_ctx(vsi);
 
 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
 		rxq->me = i;
 		rxq->vsi = vsi;
 
 		/* store the queue size for easier access */
 		rxq->desc_count = sc->scctx->isc_nrxd[0];
 
 		/* get the virtual and physical address of the hardware queues */
 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
 		rxq->rx_paddr = paddrs[i];
 
 		ice_add_rxq_sysctls(rxq);
 	}
 
 	vsi->num_rx_queues = nrxqsets;
 
 	return (0);
 
 free_rx_queues:
 	free(vsi->rx_queues, M_ICE);
 	vsi->rx_queues = NULL;
 	return err;
 }
 
 /**
  * ice_if_queues_free - Free queue memory
  * @ctx: the iflib context structure
  *
  * Free queue memory allocated by ice_if_tx_queues_alloc() and
  * ice_if_rx_queues_alloc().
  *
  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
  * called in the same order. It's possible for ice_if_queues_free() to be
  * called prior to ice_if_detach(), and vice versa.
  *
  * For this reason, the main VSI is a static member of the ice_softc, which is
  * not free'd until after iflib finishes calling both of these functions.
  *
  * Thus, care must be taken in how we manage the memory being freed by this
  * function, and in what tasks it can and must perform.
  */
 static void
 ice_if_queues_free(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	struct ice_tx_queue *txq;
 	int i;
 
 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
 	 * pointers. Note, the calls here and those in ice_if_detach()
 	 * are *BOTH* necessary, as we cannot guarantee which path will be
 	 * run first
 	 */
 	ice_vsi_del_txqs_ctx(vsi);
 	ice_vsi_del_rxqs_ctx(vsi);
 
 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
 	ice_free_irqvs(sc);
 
 	if (vsi->tx_queues != NULL) {
 		/* free the tx_rsq arrays */
 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
 			if (txq->tx_rsq != NULL) {
 				free(txq->tx_rsq, M_ICE);
 				txq->tx_rsq = NULL;
 			}
 		}
 		free(vsi->tx_queues, M_ICE);
 		vsi->tx_queues = NULL;
 		vsi->num_tx_queues = 0;
 	}
 	if (vsi->rx_queues != NULL) {
 		free(vsi->rx_queues, M_ICE);
 		vsi->rx_queues = NULL;
 		vsi->num_rx_queues = 0;
 	}
 }
 
 /**
  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
  * @arg: The Rx queue memory
  *
  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
  * iflib to schedule the main Rx thread.
  */
 static int
 ice_msix_que(void *arg)
 {
 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
 
 	/* TODO: dynamic ITR algorithm?? */
 
 	return (FILTER_SCHEDULE_THREAD);
 }
 
 /**
  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
  * @arg: pointer to device softc memory
  *
  * Called by iflib when an administrative interrupt occurs. Should perform any
  * fast logic for handling the interrupt cause, and then indicate whether the
  * admin task needs to be queued.
  */
 static int
 ice_msix_admin(void *arg)
 {
 	struct ice_softc *sc = (struct ice_softc *)arg;
 	struct ice_hw *hw = &sc->hw;
 	device_t dev = sc->dev;
 	u32 oicr;
 
 	/* There is no safe way to modify the enabled miscellaneous causes of
 	 * the OICR vector at runtime, as doing so would be prone to race
 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
 	 * causes and allow future interrupts to occur. The admin interrupt
 	 * vector will not be re-enabled until after we exit this function,
 	 * but any delayed tasks must be resilient against possible "late
 	 * arrival" interrupts that occur while we're already handling the
 	 * task. This is done by using state bits and serializing these
 	 * delayed tasks via the admin status task function.
 	 */
 	oicr = rd32(hw, PFINT_OICR);
 
 	/* Processing multiple controlq interrupts on a single vector does not
 	 * provide an indication of which controlq triggered the interrupt.
 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
 	 * it gets automatically cleared when the hardware acknowledges the
 	 * interrupt.
 	 *
 	 * This means we don't really have a good indication of whether or
 	 * which controlq triggered this interrupt. We'll just notify the
 	 * admin task that it should check all the controlqs.
 	 */
 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
 
 	if (oicr & PFINT_OICR_VFLR_M) {
 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
 	}
 
 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
 	}
 
 	if (oicr & PFINT_OICR_GRST_M) {
 		u32 reset;
 
 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
 			GLGEN_RSTAT_RESET_TYPE_S;
 
 		if (reset == ICE_RESET_CORER)
 			sc->soft_stats.corer_count++;
 		else if (reset == ICE_RESET_GLOBR)
 			sc->soft_stats.globr_count++;
 		else
 			sc->soft_stats.empr_count++;
 
 		/* There are a couple of bits at play for handling resets.
 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
 		 * indicate that the driver has received an OICR with a reset
 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
 		 * happen. Second, we set hw->reset_ongoing to indicate that
 		 * the hardware is in reset. We will set this back to false as
 		 * soon as the driver has determined that the hardware is out
 		 * of reset.
 		 *
 		 * If the driver wishes to trigger a request, it can set one of
 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
 		 * correct type of reset.
 		 */
 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
 			hw->reset_ongoing = true;
 	}
 
 	if (oicr & PFINT_OICR_ECC_ERR_M) {
 		device_printf(dev, "ECC Error detected!\n");
 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
 	}
 
-	if (oicr & PFINT_OICR_PE_CRITERR_M) {
-		device_printf(dev, "Critical Protocol Engine Error detected!\n");
-		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
+	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
+		if (oicr & PFINT_OICR_HMC_ERR_M)
+			/* Log the HMC errors */
+			ice_log_hmc_error(hw, dev);
+		ice_rdma_notify_pe_intr(sc, oicr);
 	}
 
 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
 		device_printf(dev, "PCI Exception detected!\n");
 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
 	}
 
-	if (oicr & PFINT_OICR_HMC_ERR_M) {
-		/* Log the HMC errors, but don't disable the interrupt cause */
-		ice_log_hmc_error(hw, dev);
-	}
-
 	return (FILTER_SCHEDULE_THREAD);
 }
 
 /**
  * ice_allocate_msix - Allocate MSI-X vectors for the interface
  * @sc: the device private softc
  *
  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
  *
  * First, determine a suitable total number of vectors based on the number
  * of CPUs, RSS buckets, the administrative vector, and other demands such as
  * RDMA.
  *
  * Request the desired amount of vectors, and see how many we obtain. If we
  * don't obtain as many as desired, reduce the demands by lowering the number
  * of requested queues or reducing the demand from other features such as
  * RDMA.
  *
  * @remark This function is required because the driver sets the
  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
  * manually.
  *
  * @remark This driver will only use MSI-X vectors. If this is not possible,
  * neither MSI or legacy interrupts will be tried.
  *
  * @post on success this function must set the following scctx parameters:
  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
  *
  * @returns zero on success or an error code on failure.
  */
 static int
 ice_allocate_msix(struct ice_softc *sc)
 {
 	bool iflib_override_queue_count = false;
 	if_softc_ctx_t scctx = sc->scctx;
 	device_t dev = sc->dev;
 	cpuset_t cpus;
 	int bar, queues, vectors, requested;
 	int err = 0;
 	int rdma;
 
 	/* Allocate the MSI-X bar */
 	bar = scctx->isc_msix_bar;
 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
 	if (!sc->msix_table) {
 		device_printf(dev, "Unable to map MSI-X table\n");
 		return (ENOMEM);
 	}
 
 	/* Check if the iflib queue count sysctls have been set */
 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
 		iflib_override_queue_count = true;
 
 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
 	if (err) {
 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
 			      __func__, ice_err_str(err));
 		CPU_COPY(&all_cpus, &cpus);
 	}
 
 	/* Attempt to mimic behavior of iflib_msix_init */
 	if (iflib_override_queue_count) {
 		/*
 		 * If the override sysctls have been set, limit the queues to
 		 * the number of logical CPUs.
 		 */
 		queues = mp_ncpus;
 	} else {
 		/*
 		 * Otherwise, limit the queue count to the CPUs associated
 		 * with the NUMA node the device is associated with.
 		 */
 		queues = CPU_COUNT(&cpus);
 	}
 
 	/* Clamp to the number of RSS buckets */
 	queues = imin(queues, rss_getnumbuckets());
 
 	/*
 	 * Clamp the number of queue pairs to the minimum of the requested Tx
 	 * and Rx queues.
 	 */
 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
 
 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
 		/*
 		 * Choose a number of RDMA vectors based on the number of CPUs
 		 * up to a maximum
 		 */
 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
 
 		/* Further limit by the user configurable tunable */
 		rdma = min(rdma, ice_rdma_max_msix);
 	} else {
 		rdma = 0;
 	}
 
 	/*
 	 * Determine the number of vectors to request. Note that we also need
 	 * to allocate one vector for administrative tasks.
 	 */
 	requested = rdma + queues + 1;
 
 	vectors = requested;
 
 	err = pci_alloc_msix(dev, &vectors);
 	if (err) {
 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
 			      vectors, ice_err_str(err));
 		goto err_free_msix_table;
 	}
 
 	/* If we don't receive enough vectors, reduce demands */
 	if (vectors < requested) {
 		int diff = requested - vectors;
 
 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
 			      requested, vectors);
 
 		/*
 		 * The OS didn't grant us the requested number of vectors.
 		 * Check to see if we can reduce demands by limiting the
 		 * number of vectors allocated to certain features.
 		 */
 
 		if (rdma >= diff) {
 			/* Reduce the number of RDMA vectors we reserve */
 			rdma -= diff;
 			diff = 0;
 		} else {
 			/* Disable RDMA and reduce the difference */
 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
 			diff -= rdma;
 			rdma = 0;
 		}
 
 		/*
 		 * If we still have a difference, we need to reduce the number
 		 * of queue pairs.
 		 *
 		 * However, we still need at least one vector for the admin
 		 * interrupt and one queue pair.
 		 */
 		if (queues <= diff) {
 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
 			err = (ERANGE);
 			goto err_pci_release_msi;
 		}
 
 		queues -= diff;
 	}
 
 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
 	if (rdma)
 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
 			      rdma);
 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
 		      vectors);
 
 	scctx->isc_vectors = vectors;
 	scctx->isc_nrxqsets = queues;
 	scctx->isc_ntxqsets = queues;
 	scctx->isc_intr = IFLIB_INTR_MSIX;
 
 	sc->irdma_vectors = rdma;
 
 	/* Interrupt allocation tracking isn't required in recovery mode,
 	 * since neither RDMA nor VFs are enabled.
 	 */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (0);
 
 	/* Keep track of which interrupt indices are being used for what */
 	sc->lan_vectors = vectors - rdma;
 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
 	if (err) {
 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
 			      ice_err_str(err));
 		goto err_pci_release_msi;
 	}
 	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->rdma_imap, rdma);
 	if (err) {
 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
 			      ice_err_str(err));
 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
 					    sc->lan_vectors);
 		goto err_pci_release_msi;
 	}
 
 	return (0);
 
 err_pci_release_msi:
 	pci_release_msi(dev);
 err_free_msix_table:
 	if (sc->msix_table != NULL) {
 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
 				rman_get_rid(sc->msix_table),
 				sc->msix_table);
 		sc->msix_table = NULL;
 	}
 
 	return (err);
 }
 
 /**
  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
  * @ctx: the iflib context structure
  * @msix: the number of vectors we were assigned
  *
  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
  * we get at least the same number of vectors as we have queues, and that we
  * always have the same number of Tx and Rx queues.
  *
  * Tx queues use a softirq instead of using their own hardware interrupt.
  */
 static int
 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	int err, i, vector;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
 		device_printf(sc->dev,
 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
 			      vsi->num_tx_queues, vsi->num_rx_queues);
 		return (EOPNOTSUPP);
 	}
 
 	if (msix < (vsi->num_rx_queues + 1)) {
 		device_printf(sc->dev,
 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
 		return (EOPNOTSUPP);
 	}
 
 	/* Save the number of vectors for future use */
 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
 
 	/* Allocate space to store the IRQ vector data */
 	if (!(sc->irqvs =
 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
 					       M_ICE, M_NOWAIT))) {
 		device_printf(sc->dev,
 			      "Unable to allocate irqv memory\n");
 		return (ENOMEM);
 	}
 
 	/* Administrative interrupt events will use vector 0 */
 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
 				      ice_msix_admin, sc, 0, "admin");
 	if (err) {
 		device_printf(sc->dev,
 			      "Failed to register Admin queue handler: %s\n",
 			      ice_err_str(err));
 		goto free_irqvs;
 	}
 	sc->irqvs[0].me = 0;
 
 	/* Do not allocate queue interrupts when in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (0);
 
 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
 		int rid = vector + 1;
 		char irq_name[16];
 
 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
 					      IFLIB_INTR_RXTX, ice_msix_que,
 					      rxq, rxq->me, irq_name);
 		if (err) {
 			device_printf(sc->dev,
 				      "Failed to allocate q int %d err: %s\n",
 				      i, ice_err_str(err));
 			vector--;
 			i--;
 			goto fail;
 		}
 		sc->irqvs[vector].me = vector;
 		rxq->irqv = &sc->irqvs[vector];
 
 		bzero(irq_name, sizeof(irq_name));
 
 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
 					    IFLIB_INTR_TX, txq,
 					    txq->me, irq_name);
 		txq->irqv = &sc->irqvs[vector];
 	}
 
 	return (0);
 fail:
 	for (; i >= 0; i--, vector--)
 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
 free_irqvs:
 	free(sc->irqvs, M_ICE);
 	sc->irqvs = NULL;
 	return err;
 }
 
 /**
  * ice_if_mtu_set - Set the device MTU
  * @ctx: iflib context structure
  * @mtu: the MTU requested
  *
  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
  *
  * @pre assumes the caller holds the iflib CTX lock
  */
 static int
 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Do not support configuration when in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (ENOSYS);
 
 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
 		return (EINVAL);
 
 	sc->scctx->isc_max_frame_size = mtu +
 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
 
 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
 
 	return (0);
 }
 
 /**
  * ice_if_intr_enable - Enable device interrupts
  * @ctx: iflib context structure
  *
  * Called by iflib to request enabling device interrupts.
  */
 static void
 ice_if_intr_enable(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	struct ice_hw *hw = &sc->hw;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Enable ITR 0 */
 	ice_enable_intr(hw, sc->irqvs[0].me);
 
 	/* Do not enable queue interrupts in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	/* Enable all queue interrupts */
 	for (int i = 0; i < vsi->num_rx_queues; i++)
 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
 }
 
 /**
  * ice_if_intr_disable - Disable device interrupts
  * @ctx: iflib context structure
  *
  * Called by iflib to request disabling device interrupts.
  */
 static void
 ice_if_intr_disable(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_hw *hw = &sc->hw;
 	unsigned int i;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
 	 * assigned to queues. Instead of assuming that the interrupt
 	 * assignment in the rx_queues structure is valid, just disable all
 	 * possible interrupts
 	 *
 	 * Note that we choose not to disable ITR 0 because this handles the
 	 * AdminQ interrupts, and we want to keep processing these even when
 	 * the interface is offline.
 	 */
 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
 		ice_disable_intr(hw, i);
 }
 
 /**
  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
  * @ctx: iflib context structure
  * @rxqid: the Rx queue to enable
  *
  * Enable a specific Rx queue interrupt.
  *
  * This function is not protected by the iflib CTX lock.
  */
 static int
 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	struct ice_hw *hw = &sc->hw;
 
 	/* Do not enable queue interrupts in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (ENOSYS);
 
 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
 	return (0);
 }
 
 /**
  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
  * @ctx: iflib context structure
  * @txqid: the Tx queue to enable
  *
  * Enable a specific Tx queue interrupt.
  *
  * This function is not protected by the iflib CTX lock.
  */
 static int
 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	struct ice_hw *hw = &sc->hw;
 
 	/* Do not enable queue interrupts in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (ENOSYS);
 
 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
 	return (0);
 }
 
 /**
  * ice_if_promisc_set - Set device promiscuous mode
  * @ctx: iflib context structure
  * @flags: promiscuous flags to configure
  *
  * Called by iflib to configure device promiscuous mode.
  *
  * @remark Calls to this function will always overwrite the previous setting
  */
 static int
 ice_if_promisc_set(if_ctx_t ctx, int flags)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_hw *hw = &sc->hw;
 	device_t dev = sc->dev;
 	enum ice_status status;
 	bool promisc_enable = flags & IFF_PROMISC;
 	bool multi_enable = flags & IFF_ALLMULTI;
 
 	/* Do not support configuration when in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return (ENOSYS);
 
 	if (multi_enable)
 		return (EOPNOTSUPP);
 
 	if (promisc_enable) {
 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
 					     ICE_VSI_PROMISC_MASK, 0);
 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
 			device_printf(dev,
 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
 				      ice_status_str(status),
 				      ice_aq_str(hw->adminq.sq_last_status));
 			return (EIO);
 		}
 	} else {
 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
 					       ICE_VSI_PROMISC_MASK, 0);
 		if (status) {
 			device_printf(dev,
 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
 				      ice_status_str(status),
 				      ice_aq_str(hw->adminq.sq_last_status));
 			return (EIO);
 		}
 	}
 
 	return (0);
 }
 
 /**
  * ice_if_media_change - Change device media
  * @ctx: device ctx structure
  *
  * Called by iflib when a media change is requested. This operation is not
  * supported by the hardware, so we just return an error code.
  */
 static int
 ice_if_media_change(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 
 	device_printf(sc->dev, "Media change is not supported.\n");
 	return (ENODEV);
 }
 
 /**
  * ice_if_media_status - Report current device media
  * @ctx: iflib context structure
  * @ifmr: ifmedia request structure to update
  *
  * Updates the provided ifmr with current device media status, including link
  * status and media type.
  */
 static void
 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	/* Never report link up or media types when in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	if (!sc->link_up)
 		return;
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 	ifmr->ifm_active |= IFM_FDX;
 
 	if (li->phy_type_low)
 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
 	else if (li->phy_type_high)
 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
 	else
 		ifmr->ifm_active |= IFM_UNKNOWN;
 
 	/* Report flow control status as well */
 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
 }
 
 /**
  * ice_init_tx_tracking - Initialize Tx queue software tracking values
  * @vsi: the VSI to initialize
  *
  * Initialize Tx queue software tracking values, including the Report Status
  * queue, and related software tracking values.
  */
 static void
 ice_init_tx_tracking(struct ice_vsi *vsi)
 {
 	struct ice_tx_queue *txq;
 	size_t j;
 	int i;
 
 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
 
 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
 
 		/* Initialize the last processed descriptor to be the end of
 		 * the ring, rather than the start, so that we avoid an
 		 * off-by-one error in ice_ift_txd_credits_update for the
 		 * first packet.
 		 */
 		txq->tx_cidx_processed = txq->desc_count - 1;
 
 		for (j = 0; j < txq->desc_count; j++)
 			txq->tx_rsq[j] = QIDX_INVALID;
 	}
 }
 
 /**
  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
  * @sc: the device softc
  *
  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
  * buffer sizes when programming hardware.
  */
 static void
 ice_update_rx_mbuf_sz(struct ice_softc *sc)
 {
 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
 	struct ice_vsi *vsi = &sc->pf_vsi;
 
 	MPASS(mbuf_sz <= UINT16_MAX);
 	vsi->mbuf_sz = mbuf_sz;
 }
 
 /**
  * ice_if_init - Initialize the device
  * @ctx: iflib ctx structure
  *
  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
  * device filters and prepares the Tx and Rx engines.
  *
  * @pre assumes the caller holds the iflib CTX lock
  */
 static void
 ice_if_init(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	device_t dev = sc->dev;
 	int err;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/*
 	 * We've seen an issue with 11.3/12.1 where sideband routines are
 	 * called after detach is called.  This would call routines after
 	 * if_stop, causing issues with the teardown process.  This has
 	 * seemingly been fixed in STABLE snapshots, but it seems like a
 	 * good idea to have this guard here regardless.
 	 */
 	if (ice_driver_is_detaching(sc))
 		return;
 
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
 		return;
 	}
 
 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
 		return;
 	}
 
 	ice_update_rx_mbuf_sz(sc);
 
 	/* Update the MAC address... User might use a LAA */
 	err = ice_update_laa_mac(sc);
 	if (err) {
 		device_printf(dev,
 			      "LAA address change failed, err %s\n",
 			      ice_err_str(err));
 		return;
 	}
 
 	/* Initialize software Tx tracking values */
 	ice_init_tx_tracking(&sc->pf_vsi);
 
 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
 	if (err) {
 		device_printf(dev,
 			      "Unable to configure the main VSI for Tx: %s\n",
 			      ice_err_str(err));
 		return;
 	}
 
 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
 	if (err) {
 		device_printf(dev,
 			      "Unable to configure the main VSI for Rx: %s\n",
 			      ice_err_str(err));
 		goto err_cleanup_tx;
 	}
 
 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
 	if (err) {
 		device_printf(dev,
 			      "Unable to enable Rx rings for transmit: %s\n",
 			      ice_err_str(err));
 		goto err_cleanup_tx;
 	}
 
 	err = ice_cfg_pf_default_mac_filters(sc);
 	if (err) {
 		device_printf(dev,
 			      "Unable to configure default MAC filters: %s\n",
 			      ice_err_str(err));
 		goto err_stop_rx;
 	}
 
 	/* We use software interrupts for Tx, so we only program the hardware
 	 * interrupts for Rx.
 	 */
 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
 	ice_configure_rx_itr(&sc->pf_vsi);
 
 	/* Configure promiscuous mode */
 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
 
 	ice_rdma_pf_init(sc);
 
 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
 	return;
 
 err_stop_rx:
 	ice_control_all_rx_queues(&sc->pf_vsi, false);
 err_cleanup_tx:
 	ice_vsi_disable_tx(&sc->pf_vsi);
 }
 
 /**
  * ice_poll_for_media_avail - Re-enable link if media is detected
  * @sc: device private structure
  *
  * Intended to be called from the driver's timer function, this function
  * sends the Get Link Status AQ command and re-enables HW link if the
  * command says that media is available.
  *
  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
  * since media removal events are supposed to be sent to the driver through
  * a link status event.
  */
 static void
 ice_poll_for_media_avail(struct ice_softc *sc)
 {
 	struct ice_hw *hw = &sc->hw;
 	struct ice_port_info *pi = hw->port_info;
 
 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
 		pi->phy.get_link_info = true;
 		ice_get_link_status(pi, &sc->link_up);
 
 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
 			enum ice_status status;
 
 			/* Re-enable link and re-apply user link settings */
 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
 
 			/* Update the OS about changes in media capability */
 			status = ice_add_media_types(sc, sc->media);
 			if (status)
 				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
 					      ice_status_str(status),
 					      ice_aq_str(hw->adminq.sq_last_status));
 
 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
 		}
 	}
 }
 
 /**
  * ice_if_timer - called by iflib periodically
  * @ctx: iflib ctx structure
  * @qid: the queue this timer was called for
  *
  * This callback is triggered by iflib periodically. We use it to update the
  * hw statistics.
  *
  * @remark this function is not protected by the iflib CTX lock.
  */
 static void
 ice_if_timer(if_ctx_t ctx, uint16_t qid)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
 
 	if (qid != 0)
 		return;
 
 	/* Do not attempt to update stats when in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	/* Update device statistics */
 	ice_update_pf_stats(sc);
 
 	/*
 	 * For proper watchdog management, the iflib stack needs to know if
 	 * we've been paused during the last interval. Check if the
 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
 	 */
 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
 		sc->scctx->isc_pause_frames = 1;
 
 	/* Update the primary VSI stats */
 	ice_update_vsi_hw_stats(&sc->pf_vsi);
 }
 
 /**
  * ice_admin_timer - called periodically to trigger the admin task
  * @arg: callout(9) argument pointing to the device private softc structure
  *
  * Timer function used as part of a callout(9) timer that will periodically
  * trigger the admin task, even when the interface is down.
  *
  * @remark this function is not called by iflib and is not protected by the
  * iflib CTX lock.
  *
  * @remark because this is a callout function, it cannot sleep and should not
  * attempt taking the iflib CTX lock.
  */
 static void
 ice_admin_timer(void *arg)
 {
 	struct ice_softc *sc = (struct ice_softc *)arg;
 
 	/*
 	 * There is a point where callout routines are no longer
 	 * cancelable.  So there exists a window of time where the
 	 * driver enters detach() and tries to cancel the callout, but the
 	 * callout routine has passed the cancellation point.  The detach()
 	 * routine is unaware of this and tries to free resources that the
 	 * callout routine needs.  So we check for the detach state flag to
 	 * at least shrink the window of opportunity.
 	 */
 	if (ice_driver_is_detaching(sc))
 		return;
 
 	/* Fire off the admin task */
 	iflib_admin_intr_deferred(sc->ctx);
 
 	/* Reschedule the admin timer */
 	callout_schedule(&sc->admin_timer, hz/2);
 }
 
 /**
  * ice_transition_recovery_mode - Transition to recovery mode
  * @sc: the device private softc
  *
  * Called when the driver detects that the firmware has entered recovery mode
  * at run time.
  */
 static void
 ice_transition_recovery_mode(struct ice_softc *sc)
 {
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	int i;
 
 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
 
 	/* Tell the stack that the link has gone down */
 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
 
 	/* Request that the device be re-initialized */
 	ice_request_stack_reinit(sc);
 
 	ice_rdma_pf_detach(sc);
 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
 
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
 
 	ice_vsi_del_txqs_ctx(vsi);
 	ice_vsi_del_rxqs_ctx(vsi);
 
 	for (i = 0; i < sc->num_available_vsi; i++) {
 		if (sc->all_vsi[i])
 			ice_release_vsi(sc->all_vsi[i]);
 	}
 	sc->num_available_vsi = 0;
 
 	if (sc->all_vsi) {
 		free(sc->all_vsi, M_ICE);
 		sc->all_vsi = NULL;
 	}
 
 	/* Destroy the interrupt manager */
 	ice_resmgr_destroy(&sc->imgr);
 	/* Destroy the queue managers */
 	ice_resmgr_destroy(&sc->tx_qmgr);
 	ice_resmgr_destroy(&sc->rx_qmgr);
 
 	ice_deinit_hw(&sc->hw);
 }
 
 /**
  * ice_transition_safe_mode - Transition to safe mode
  * @sc: the device private softc
  *
  * Called when the driver attempts to reload the DDP package during a device
  * reset, and the new download fails. If so, we must transition to safe mode
  * at run time.
  *
  * @remark although safe mode normally allocates only a single queue, we can't
  * change the number of queues dynamically when using iflib. Due to this, we
  * do not attempt to reduce the number of queues.
  */
 static void
 ice_transition_safe_mode(struct ice_softc *sc)
 {
 	/* Indicate that we are in Safe mode */
 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
 
 	ice_rdma_pf_detach(sc);
 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
 
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
 
 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
 }
 
 /**
  * ice_if_update_admin_status - update admin status
  * @ctx: iflib ctx structure
  *
  * Called by iflib to update the admin status. For our purposes, this means
  * check the adminq, and update the link status. It's ultimately triggered by
  * our admin interrupt, or by the ice_if_timer periodically.
  *
  * @pre assumes the caller holds the iflib CTX lock
  */
 static void
 ice_if_update_admin_status(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	enum ice_fw_modes fw_mode;
 	bool reschedule = false;
 	u16 pending = 0;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Check if the firmware entered recovery mode at run time */
 	fw_mode = ice_get_fw_mode(&sc->hw);
 	if (fw_mode == ICE_FW_MODE_REC) {
 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
 			/* If we just entered recovery mode, log a warning to
 			 * the system administrator and deinit driver state
 			 * that is no longer functional.
 			 */
 			ice_transition_recovery_mode(sc);
 		}
 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
 			/* Rollback mode isn't fatal, but we don't want to
 			 * repeatedly post a message about it.
 			 */
 			ice_print_rollback_msg(&sc->hw);
 		}
 	}
 
 	/* Handle global reset events */
 	ice_handle_reset_event(sc);
 
 	/* Handle PF reset requests */
 	ice_handle_pf_reset_request(sc);
 
 	/* Handle MDD events */
 	ice_handle_mdd_event(sc);
 
 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
 		/*
 		 * If we know the control queues are disabled, skip processing
 		 * the control queues entirely.
 		 */
 		;
 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
 		if (pending > 0)
 			reschedule = true;
 
 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
 		if (pending > 0)
 			reschedule = true;
 	}
 
 	/* Poll for link up */
 	ice_poll_for_media_avail(sc);
 
 	/* Check and update link status */
 	ice_update_link_status(sc, false);
 
 	/*
 	 * If there are still messages to process, we need to reschedule
 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
 	 * woken up at the next interrupt or timer event.
 	 */
 	if (reschedule) {
 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
 		iflib_admin_intr_deferred(ctx);
 	} else {
 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
 	}
 }
 
 /**
  * ice_prepare_for_reset - Prepare device for an impending reset
  * @sc: The device private softc
  *
  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
  * scheduler setup, and shutting down controlqs. Uses the
  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
  * driver for reset or not.
  */
 static void
 ice_prepare_for_reset(struct ice_softc *sc)
 {
 	struct ice_hw *hw = &sc->hw;
 
 	/* If we're already prepared, there's nothing to do */
 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
 		return;
 
 	log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname);
 
 	/* In recovery mode, hardware is not initialized */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
+	/* inform the RDMA client */
+	ice_rdma_notify_reset(sc);
 	/* stop the RDMA client */
 	ice_rdma_pf_stop(sc);
 
 	/* Release the main PF VSI queue mappings */
 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
 				    sc->pf_vsi.num_tx_queues);
 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
 				    sc->pf_vsi.num_rx_queues);
 
 	ice_clear_hw_tbls(hw);
 
 	if (hw->port_info)
 		ice_sched_clear_port(hw->port_info);
 
 	ice_shutdown_all_ctrlq(hw, false);
 }
 
 /**
  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
  * @sc: the device softc pointer
  *
  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
  * mapping after a reset occurred.
  */
 static int
 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
 {
 	struct ice_vsi *vsi = &sc->pf_vsi;
 	struct ice_tx_queue *txq;
 	struct ice_rx_queue *rxq;
 	int err, i;
 
 	/* Re-assign Tx queues from PF space to the main VSI */
 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
 					    vsi->num_tx_queues);
 	if (err) {
 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
 			      ice_err_str(err));
 		return (err);
 	}
 
 	/* Re-assign Rx queues from PF space to this VSI */
 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
 					    vsi->num_rx_queues);
 	if (err) {
 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
 			      ice_err_str(err));
 		goto err_release_tx_queues;
 	}
 
 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
 
 	/* Re-assign Tx queue tail pointers */
 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
 
 	/* Re-assign Rx queue tail pointers */
 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
 
 	return (0);
 
 err_release_tx_queues:
 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
 				   sc->pf_vsi.num_tx_queues);
 
 	return (err);
 }
 
 /* determine if the iflib context is active */
 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
 
 /**
  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
  * @sc: The device private softc
  *
  * Handle a driver rebuild while in recovery mode. This will only rebuild the
  * limited functionality supported while in recovery mode.
  */
 static void
 ice_rebuild_recovery_mode(struct ice_softc *sc)
 {
 	device_t dev = sc->dev;
 
 	/* enable PCIe bus master */
 	pci_enable_busmaster(dev);
 
 	/* Configure interrupt causes for the administrative interrupt */
 	ice_configure_misc_interrupts(sc);
 
 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
 
 	/* Now that the rebuild is finished, we're no longer prepared to reset */
 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
 
 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
 
 	/* In order to completely restore device functionality, the iflib core
 	 * needs to be reset. We need to request an iflib reset. Additionally,
 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
 	 * the iflib core, we also want re-run the admin task so that iflib
 	 * resets immediately instead of waiting for the next interrupt.
 	 */
 	ice_request_stack_reinit(sc);
 
 	return;
 }
 
 /**
  * ice_rebuild - Rebuild driver state post reset
  * @sc: The device private softc
  *
  * Restore driver state after a reset occurred. Restart the controlqs, setup
  * the hardware port, and re-enable the VSIs.
  */
 static void
 ice_rebuild(struct ice_softc *sc)
 {
 	struct ice_hw *hw = &sc->hw;
 	device_t dev = sc->dev;
 	enum ice_ddp_state pkg_state;
 	enum ice_status status;
 	int err;
 
 	sc->rebuild_ticks = ticks;
 
 	/* If we're rebuilding, then a reset has succeeded. */
 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
 
 	/*
 	 * If the firmware is in recovery mode, only restore the limited
 	 * functionality supported by recovery mode.
 	 */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
 		ice_rebuild_recovery_mode(sc);
 		return;
 	}
 
 	/* enable PCIe bus master */
 	pci_enable_busmaster(dev);
 
 	status = ice_init_all_ctrlq(hw);
 	if (status) {
 		device_printf(dev, "failed to re-init controlqs, err %s\n",
 			      ice_status_str(status));
 		goto err_shutdown_ctrlq;
 	}
 
 	/* Query the allocated resources for Tx scheduler */
 	status = ice_sched_query_res_alloc(hw);
 	if (status) {
 		device_printf(dev,
 			      "Failed to query scheduler resources, err %s aq_err %s\n",
 			      ice_status_str(status),
 			      ice_aq_str(hw->adminq.sq_last_status));
 		goto err_shutdown_ctrlq;
 	}
 
 	/* Re-enable FW logging. Keep going even if this fails */
 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
 	if (!status) {
 		/*
 		 * We should have the most updated cached copy of the
 		 * configuration, regardless of whether we're rebuilding
 		 * or not.  So we'll simply check to see if logging was
 		 * enabled pre-rebuild.
 		 */
 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
 			status = ice_fwlog_register(hw);
 			if (status)
 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
 				   ice_status_str(status),
 				   ice_aq_str(hw->adminq.sq_last_status));
 		}
 	} else
 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
 		   ice_status_str(status),
 		   ice_aq_str(hw->adminq.sq_last_status));
 
 	err = ice_send_version(sc);
 	if (err)
 		goto err_shutdown_ctrlq;
 
 	err = ice_init_link_events(sc);
 	if (err) {
 		device_printf(dev, "ice_init_link_events failed: %s\n",
 			      ice_err_str(err));
 		goto err_shutdown_ctrlq;
 	}
 
 	status = ice_clear_pf_cfg(hw);
 	if (status) {
 		device_printf(dev, "failed to clear PF configuration, err %s\n",
 			      ice_status_str(status));
 		goto err_shutdown_ctrlq;
 	}
 
 	ice_clear_pxe_mode(hw);
 
 	status = ice_get_caps(hw);
 	if (status) {
 		device_printf(dev, "failed to get capabilities, err %s\n",
 			      ice_status_str(status));
 		goto err_shutdown_ctrlq;
 	}
 
 	status = ice_sched_init_port(hw->port_info);
 	if (status) {
 		device_printf(dev, "failed to initialize port, err %s\n",
 			      ice_status_str(status));
 		goto err_sched_cleanup;
 	}
 
 	/* If we previously loaded the package, it needs to be reloaded now */
 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
 		if (!ice_is_init_pkg_successful(pkg_state)) {
 			ice_log_pkg_init(sc, pkg_state);
 			ice_transition_safe_mode(sc);
 		}
 	}
 
 	ice_reset_pf_stats(sc);
 
 	err = ice_rebuild_pf_vsi_qmap(sc);
 	if (err) {
 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
 			      ice_err_str(err));
 		goto err_sched_cleanup;
 	}
 	err = ice_initialize_vsi(&sc->pf_vsi);
 	if (err) {
 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
 			      ice_err_str(err));
 		goto err_release_queue_allocations;
 	}
 
 	/* Replay all VSI configuration */
 	err = ice_replay_all_vsi_cfg(sc);
 	if (err)
 		goto err_deinit_pf_vsi;
 
 	/* Re-enable FW health event reporting */
 	ice_init_health_events(sc);
 
 	/* Reconfigure the main PF VSI for RSS */
 	err = ice_config_rss(&sc->pf_vsi);
 	if (err) {
 		device_printf(sc->dev,
 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
 			      ice_err_str(err));
 		goto err_deinit_pf_vsi;
 	}
 
 	/* Refresh link status */
 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
 	sc->hw.port_info->phy.get_link_info = true;
 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
 	ice_update_link_status(sc, true);
 
 	/* RDMA interface will be restarted by the stack re-init */
 
 	/* Configure interrupt causes for the administrative interrupt */
 	ice_configure_misc_interrupts(sc);
 
 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
 
 	/* Now that the rebuild is finished, we're no longer prepared to reset */
 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
 
 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
 
 	/* In order to completely restore device functionality, the iflib core
 	 * needs to be reset. We need to request an iflib reset. Additionally,
 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
 	 * the iflib core, we also want re-run the admin task so that iflib
 	 * resets immediately instead of waiting for the next interrupt.
 	 */
 	ice_request_stack_reinit(sc);
 
 	return;
 
 err_deinit_pf_vsi:
 	ice_deinit_vsi(&sc->pf_vsi);
 err_release_queue_allocations:
 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
 				    sc->pf_vsi.num_tx_queues);
 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
 				    sc->pf_vsi.num_rx_queues);
 err_sched_cleanup:
 	ice_sched_cleanup_all(hw);
 err_shutdown_ctrlq:
 	ice_shutdown_all_ctrlq(hw, false);
 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
 }
 
 /**
  * ice_handle_reset_event - Handle reset events triggered by OICR
  * @sc: The device private softc
  *
  * Handle reset events triggered by an OICR notification. This includes CORER,
  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
  * firmware.
  *
  * @pre assumes the iflib context lock is held, and will unlock it while
  * waiting for the hardware to finish reset.
  */
 static void
 ice_handle_reset_event(struct ice_softc *sc)
 {
 	struct ice_hw *hw = &sc->hw;
 	enum ice_status status;
 	device_t dev = sc->dev;
 
 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
 	 * trigger an OICR interrupt. Our OICR handler will determine when
 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
 	 * appropriate.
 	 */
 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
 		return;
 
 	ice_prepare_for_reset(sc);
 
 	/*
 	 * Release the iflib context lock and wait for the device to finish
 	 * resetting.
 	 */
 	IFLIB_CTX_UNLOCK(sc);
 	status = ice_check_reset(hw);
 	IFLIB_CTX_LOCK(sc);
 	if (status) {
 		device_printf(dev, "Device never came out of reset, err %s\n",
 			      ice_status_str(status));
 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
 		return;
 	}
 
 	/* We're done with the reset, so we can rebuild driver state */
 	sc->hw.reset_ongoing = false;
 	ice_rebuild(sc);
 
 	/* In the unlikely event that a PF reset request occurs at the same
 	 * time as a global reset, clear the request now. This avoids
 	 * resetting a second time right after we reset due to a global event.
 	 */
 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
 }
 
 /**
  * ice_handle_pf_reset_request - Initiate PF reset requested by software
  * @sc: The device private softc
  *
  * Initiate a PF reset requested by software. We handle this in the admin task
  * so that only one thread actually handles driver preparation and cleanup,
  * rather than having multiple threads possibly attempt to run this code
  * simultaneously.
  *
  * @pre assumes the iflib context lock is held and will unlock it while
  * waiting for the PF reset to complete.
  */
 static void
 ice_handle_pf_reset_request(struct ice_softc *sc)
 {
 	struct ice_hw *hw = &sc->hw;
 	enum ice_status status;
 
 	/* Check for PF reset requests */
 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
 		return;
 
 	/* Make sure we're prepared for reset */
 	ice_prepare_for_reset(sc);
 
 	/*
 	 * Release the iflib context lock and wait for the device to finish
 	 * resetting.
 	 */
 	IFLIB_CTX_UNLOCK(sc);
 	status = ice_reset(hw, ICE_RESET_PFR);
 	IFLIB_CTX_LOCK(sc);
 	if (status) {
 		device_printf(sc->dev, "device PF reset failed, err %s\n",
 			      ice_status_str(status));
 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
 		return;
 	}
 
 	sc->soft_stats.pfr_count++;
 	ice_rebuild(sc);
 }
 
 /**
  * ice_init_device_features - Init device driver features
  * @sc: driver softc structure
  *
  * @pre assumes that the function capabilities bits have been set up by
  * ice_init_hw().
  */
 static void
 ice_init_device_features(struct ice_softc *sc)
 {
 	/* Set capabilities that all devices support */
 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
 
 	/* Disable features due to hardware limitations... */
 	if (!sc->hw.func_caps.common_cap.rss_table_size)
 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
 	if (!sc->hw.func_caps.common_cap.iwarp || !ice_enable_irdma)
 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
 	if (!sc->hw.func_caps.common_cap.dcb)
 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
 	/* Disable features due to firmware limitations... */
 	if (!ice_is_fw_health_report_supported(&sc->hw))
 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
 	if (!ice_fwlog_supported(&sc->hw))
 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
 	if (sc->hw.fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
 		else
 			ice_fwlog_unregister(&sc->hw);
 	}
 
 	/* Disable capabilities not supported by the OS */
 	ice_disable_unsupported_features(sc->feat_cap);
 
 	/* RSS is always enabled for iflib */
 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
 
 	/* Disable features based on sysctl settings */
 	if (!ice_tx_balance_en)
 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
 }
 
 /**
  * ice_if_multi_set - Callback to update Multicast filters in HW
  * @ctx: iflib ctx structure
  *
  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
  * the if_multiaddrs list and determine which filters have been added or
  * removed from the list, and update HW programming to reflect the new list.
  *
  * @pre assumes the caller holds the iflib CTX lock
  */
 static void
 ice_if_multi_set(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	int err;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Do not handle multicast configuration in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	err = ice_sync_multicast_filters(sc);
 	if (err) {
 		device_printf(sc->dev,
 			      "Failed to synchronize multicast filter list: %s\n",
 			      ice_err_str(err));
 		return;
 	}
 }
 
 /**
  * ice_if_vlan_register - Register a VLAN with the hardware
  * @ctx: iflib ctx pointer
  * @vtag: VLAN to add
  *
  * Programs the main PF VSI with a hardware filter for the given VLAN.
  *
  * @pre assumes the caller holds the iflib CTX lock
  */
 static void
 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	enum ice_status status;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Do not handle VLAN configuration in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
 	if (status) {
 		device_printf(sc->dev,
 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
 			      vtag, ice_status_str(status),
 			      ice_aq_str(sc->hw.adminq.sq_last_status));
 	}
 }
 
 /**
  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
  * @ctx: iflib ctx pointer
  * @vtag: VLAN to add
  *
  * Removes the previously programmed VLAN filter from the main PF VSI.
  *
  * @pre assumes the caller holds the iflib CTX lock
  */
 static void
 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	enum ice_status status;
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Do not handle VLAN configuration in recovery mode */
 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
 		return;
 
 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
 	if (status) {
 		device_printf(sc->dev,
 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
 			      vtag, ice_status_str(status),
 			      ice_aq_str(sc->hw.adminq.sq_last_status));
 	}
 }
 
 /**
  * ice_if_stop - Stop the device
  * @ctx: iflib context structure
  *
  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
  * down)
  *
  * @pre assumes the caller holds the iflib CTX lock
  */
 static void
 ice_if_stop(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/*
 	 * The iflib core may call IFDI_STOP prior to the first call to
 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
 	 * don't have, and disable Tx queues which aren't yet configured.
 	 * Although it is likely these extra operations are harmless, they do
 	 * cause spurious warning messages to be displayed, which may confuse
 	 * users.
 	 *
 	 * To avoid these messages, we use a state bit indicating if we've
 	 * been initialized. It will be set when ice_if_init is called, and
 	 * cleared here in ice_if_stop.
 	 */
 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
 		return;
 
 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
 		return;
 	}
 
 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
 		return;
 	}
 
 	ice_rdma_pf_stop(sc);
 
 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
 	 * return of these functions because there's nothing we can really do
 	 * if they fail, and the functions already print error messages.
 	 * Just try to shut down as much as we can.
 	 */
 	ice_rm_pf_default_mac_filters(sc);
 
 	/* Dissociate the Tx and Rx queues from the interrupts */
 	ice_flush_txq_interrupts(&sc->pf_vsi);
 	ice_flush_rxq_interrupts(&sc->pf_vsi);
 
 	/* Disable the Tx and Rx queues */
 	ice_vsi_disable_tx(&sc->pf_vsi);
 	ice_control_all_rx_queues(&sc->pf_vsi, false);
 }
 
 /**
  * ice_if_get_counter - Get current value of an ifnet statistic
  * @ctx: iflib context pointer
  * @counter: ifnet counter to read
  *
  * Reads the current value of an ifnet counter for the device.
  *
  * This function is not protected by the iflib CTX lock.
  */
 static uint64_t
 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 
 	/* Return the counter for the main PF VSI */
 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
 }
 
 /**
  * ice_request_stack_reinit - Request that iflib re-initialize
  * @sc: the device private softc
  *
  * Request that the device be brought down and up, to re-initialize. For
  * example, this may be called when a device reset occurs, or when Tx and Rx
  * queues need to be re-initialized.
  *
  * This is required because the iflib state is outside the driver, and must be
  * re-initialized if we need to resart Tx and Rx queues.
  */
 void
 ice_request_stack_reinit(struct ice_softc *sc)
 {
 	if (CTX_ACTIVE(sc->ctx)) {
 		iflib_request_reset(sc->ctx);
 		iflib_admin_intr_deferred(sc->ctx);
 	}
 }
 
 /**
  * ice_driver_is_detaching - Check if the driver is detaching/unloading
  * @sc: device private softc
  *
  * Returns true if the driver is detaching, false otherwise.
  *
  * @remark on newer kernels, take advantage of iflib_in_detach in order to
  * report detachment correctly as early as possible.
  *
  * @remark this function is used by various code paths that want to avoid
  * running if the driver is about to be removed. This includes sysctls and
  * other driver access points. Note that it does not fully resolve
  * detach-based race conditions as it is possible for a thread to race with
  * iflib_in_detach.
  */
 bool
 ice_driver_is_detaching(struct ice_softc *sc)
 {
 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
 		iflib_in_detach(sc->ctx));
 }
 
 /**
  * ice_if_priv_ioctl - Device private ioctl handler
  * @ctx: iflib context pointer
  * @command: The ioctl command issued
  * @data: ioctl specific data
  *
  * iflib callback for handling custom driver specific ioctls.
  *
  * @pre Assumes that the iflib context lock is held.
  */
 static int
 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 	struct ifdrv *ifd;
 	device_t dev = sc->dev;
 
 	if (data == NULL)
 		return (EINVAL);
 
 	ASSERT_CTX_LOCKED(sc);
 
 	/* Make sure the command type is valid */
 	switch (command) {
 	case SIOCSDRVSPEC:
 	case SIOCGDRVSPEC:
 		/* Accepted commands */
 		break;
 	case SIOCGPRIVATE_0:
 		/*
 		 * Although we do not support this ioctl command, it's
 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
 		 * handler. Do not print a message in this case
 		 */
 		return (ENOTSUP);
 	default:
 		/*
 		 * If we get a different command for this function, it's
 		 * definitely unexpected, so log a message indicating what
 		 * command we got for debugging purposes.
 		 */
 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
 			      __func__, command);
 		return (EINVAL);
 	}
 
 	ifd = (struct ifdrv *)data;
 
 	switch (ifd->ifd_cmd) {
 	case ICE_NVM_ACCESS:
 		return ice_handle_nvm_access_ioctl(sc, ifd);
 	case ICE_DEBUG_DUMP:
 		return ice_handle_debug_dump_ioctl(sc, ifd);
 	default:
 		return EINVAL;
 	}
 }
 
 /**
  * ice_if_i2c_req - I2C request handler for iflib
  * @ctx: iflib context pointer
  * @req: The I2C parameters to use
  *
  * Read from the port's I2C eeprom using the parameters from the ioctl.
  *
  * @remark The iflib-only part is pretty simple.
  */
 static int
 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 
 	return ice_handle_i2c_req(sc, req);
 }
 
 /**
  * ice_if_suspend - PCI device suspend handler for iflib
  * @ctx: iflib context pointer
  *
  * Deinitializes the driver and clears HW resources in preparation for
  * suspend or an FLR.
  *
  * @returns 0; this return value is ignored
  */
 static int
 ice_if_suspend(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 
 	/* At least a PFR is always going to happen after this;
 	 * either via FLR or during the D3->D0 transition.
 	 */
 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
 
 	ice_prepare_for_reset(sc);
 
 	return (0);
 }
 
 /**
  * ice_if_resume - PCI device resume handler for iflib
  * @ctx: iflib context pointer
  *
  * Reinitializes the driver and the HW after PCI resume or after
  * an FLR. An init is performed by iflib after this function is finished.
  *
  * @returns 0; this return value is ignored
  */
 static int
 ice_if_resume(if_ctx_t ctx)
 {
 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
 
 	ice_rebuild(sc);
 
 	return (0);
 }
 
 /* ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
  * @ctx: iflib context
  * @event: event code to check
  *
  * Defaults to returning false for unknown events.
  *
  * @returns true if iflib needs to reinit the interface
  */
 static bool
 ice_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
 {
 	switch (event) {
 	case IFLIB_RESTART_VLAN_CONFIG:
 	default:
 		return (false);
 	}
 }
 
diff --git a/sys/dev/irdma/icrdma.c b/sys/dev/irdma/icrdma.c
index 848f8126e57c..8efd9d5c6034 100644
--- a/sys/dev/irdma/icrdma.c
+++ b/sys/dev/irdma/icrdma.c
@@ -1,820 +1,816 @@
 /*-
  * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
  *
  * Copyright (c) 2021 - 2023 Intel Corporation
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * OpenFabrics.org BSD license below:
  *
  *   Redistribution and use in source and binary forms, with or
  *   without modification, are permitted provided that the following
  *   conditions are met:
  *
  *    - Redistributions of source code must retain the above
  *	copyright notice, this list of conditions and the following
  *	disclaimer.
  *
  *    - Redistributions in binary form must reproduce the above
  *	copyright notice, this list of conditions and the following
  *	disclaimer in the documentation and/or other materials
  *	provided with the distribution.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <machine/bus.h>
 #include <linux/device.h>
 #include <sys/rman.h>
 
 #include "ice_rdma.h"
 #include "irdma_main.h"
 #include "icrdma_hw.h"
 
 #include "irdma_if.h"
 #include "irdma_di_if.h"
 
 /**
  *  Driver version
  */
 char irdma_driver_version[] = "1.2.17-k";
 
 /**
  * irdma_init_tunable - prepare tunables
  * @rf: RDMA PCI function
  * @pf_id: id of the pf
  */
 static void
 irdma_init_tunable(struct irdma_pci_f *rf, uint8_t pf_id)
 {
 	struct sysctl_oid_list *irdma_oid_list;
 	struct irdma_tunable_info *t_info = &rf->tun_info;
 	char pf_name[16];
 
 	snprintf(pf_name, 15, "irdma%d", pf_id);
 	sysctl_ctx_init(&t_info->irdma_sysctl_ctx);
 
 	t_info->irdma_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx,
 						    SYSCTL_STATIC_CHILDREN(_dev),
 						    OID_AUTO, pf_name,
 						    CTLFLAG_RD, NULL, "");
 
 	irdma_oid_list = SYSCTL_CHILDREN(t_info->irdma_sysctl_tree);
 
 	t_info->sws_sysctl_tree = SYSCTL_ADD_NODE(&t_info->irdma_sysctl_ctx,
 						  irdma_oid_list, OID_AUTO,
 						  "sw_stats", CTLFLAG_RD,
 						  NULL, "");
 	/*
 	 * debug mask setting
 	 */
 	SYSCTL_ADD_S32(&t_info->irdma_sysctl_ctx, irdma_oid_list,
 		       OID_AUTO, "debug", CTLFLAG_RWTUN, &rf->sc_dev.debug_mask,
 		       0, "irdma debug");
 
 	/*
 	 * RoCEv2/iWARP setting RoCEv2 the default mode
 	 */
 	t_info->roce_ena = 1;
 	SYSCTL_ADD_U8(&t_info->irdma_sysctl_ctx, irdma_oid_list, OID_AUTO,
 		      "roce_enable", CTLFLAG_RDTUN, &t_info->roce_ena, 0,
 		      "RoCEv2 mode enable");
 
 	rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY;
 	if (t_info->roce_ena == 1)
 		rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
 	else if (t_info->roce_ena != 0)
 		printf("%s:%d wrong roce_enable value (%d), using iWARP\n",
 		       __func__, __LINE__, t_info->roce_ena);
 	printf("%s:%d protocol: %s, roce_enable value: %d\n", __func__, __LINE__,
 	       (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? "iWARP" : "RoCEv2",
 	       t_info->roce_ena);
 
 	snprintf(t_info->drv_ver, IRDMA_VER_LEN, "%s", irdma_driver_version);
 	SYSCTL_ADD_STRING(&t_info->irdma_sysctl_ctx, irdma_oid_list,
 			  OID_AUTO, "drv_ver", CTLFLAG_RDTUN, t_info->drv_ver,
 			  IRDMA_VER_LEN, "driver version");
 
 	irdma_dcqcn_tunables_init(rf);
 	irdma_sysctl_settings(rf);
 }
 
 /**
  * irdma_find_handler - obtain hdl object to identify pf
  * @p_dev: the peer interface structure
  */
 static struct irdma_handler *
 irdma_find_handler(struct ice_rdma_peer *p_dev)
 {
 	struct irdma_handler *hdl;
 	unsigned long flags;
 
 	spin_lock_irqsave(&irdma_handler_lock, flags);
 	list_for_each_entry(hdl, &irdma_handlers, list) {
 		if (!hdl->iwdev->rf->peer_info)
 			continue;
 		if (hdl->iwdev->rf->peer_info->dev == p_dev->dev) {
 			spin_unlock_irqrestore(&irdma_handler_lock, flags);
 			return hdl;
 		}
 	}
 	spin_unlock_irqrestore(&irdma_handler_lock, flags);
 
 	return NULL;
 }
 
 /**
  * peer_to_iwdev - return iwdev based on peer
  * @peer: the peer interface structure
  */
 static struct irdma_device *
 peer_to_iwdev(struct ice_rdma_peer *peer)
 {
 	struct irdma_handler *hdl;
 
 	hdl = irdma_find_handler(peer);
 	if (!hdl) {
 		printf("%s:%d rdma handler not found\n", __func__, __LINE__);
 		return NULL;
 	}
 
 	return hdl->iwdev;
 }
 
 /**
  * irdma_get_qos_info - save qos info from parameters to internal struct
  * @l2params: destination, qos, tc, mtu info structure
  * @qos_info: source, DCB settings structure
  */
 static void
 irdma_get_qos_info(struct irdma_pci_f *rf, struct irdma_l2params *l2params,
 		   struct ice_qos_params *qos_info)
 {
 	int i;
 	char txt[7][128] = {"", "", "", "", "", "", ""};
 	u8 len;
 
 	l2params->num_tc = qos_info->num_tc;
 	l2params->num_apps = qos_info->num_apps;
 	l2params->vsi_prio_type = qos_info->vsi_priority_type;
 	l2params->vsi_rel_bw = qos_info->vsi_relative_bw;
 	for (i = 0; i < l2params->num_tc; i++) {
 		l2params->tc_info[i].egress_virt_up =
 		    qos_info->tc_info[i].egress_virt_up;
 		l2params->tc_info[i].ingress_virt_up =
 		    qos_info->tc_info[i].ingress_virt_up;
 		l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
 		l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
 		l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
 	}
 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
 		l2params->up2tc[i] = qos_info->up2tc[i];
 
 	if (qos_info->pfc_mode == IRDMA_QOS_MODE_DSCP) {
 		l2params->dscp_mode = true;
 		memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
 	}
 	if (!(rf->sc_dev.debug_mask & IRDMA_DEBUG_DCB))
 		return;
 	for (i = 0; i < l2params->num_tc; i++) {
 		len = strlen(txt[0]);
 		snprintf(txt[0] + len, sizeof(txt[0]) - 5, " %d",
 			 l2params->tc_info[i].egress_virt_up);
 		len = strlen(txt[1]);
 		snprintf(txt[1] + len, sizeof(txt[1]) - 5, " %d",
 			 l2params->tc_info[i].ingress_virt_up);
 		len = strlen(txt[2]);
 		snprintf(txt[2] + len, sizeof(txt[2]) - 5, " %d",
 			 l2params->tc_info[i].prio_type);
 		len = strlen(txt[3]);
 		snprintf(txt[3] + len, sizeof(txt[3]) - 5, " %d",
 			 l2params->tc_info[i].rel_bw);
 		len = strlen(txt[4]);
 		snprintf(txt[4] + len, sizeof(txt[4]) - 5, " %lu",
 			 l2params->tc_info[i].tc_ctx);
 	}
 	len = strlen(txt[5]);
 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
 		len += snprintf(txt[5] + len, sizeof(txt[5]) - 5, " %d",
 				l2params->up2tc[i]);
 	len = strlen(txt[6]);
 	for (i = 0; i < IRDMA_DSCP_NUM_VAL; i++)
 		len += snprintf(txt[6] + len, sizeof(txt[6]) - 5, " %d",
 				l2params->dscp_map[i]);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "num_tc:          %d\n", l2params->num_tc);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "num_apps:        %d\n", l2params->num_apps);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "vsi_prio_type:   %d\n", l2params->vsi_prio_type);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "vsi_rel_bw:      %d\n", l2params->vsi_rel_bw);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "egress_virt_up: %s\n", txt[0]);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "ingress_virt_up:%s\n", txt[1]);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "prio_type: %s\n", txt[2]);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "rel_bw:    %s\n", txt[3]);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "tc_ctx:    %s\n", txt[4]);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "up2tc:     %s\n", txt[5]);
 	irdma_debug(&rf->sc_dev, IRDMA_DEBUG_DCB, "dscp_mode: %s\n", txt[6]);
 
 	irdma_debug_buf(&rf->sc_dev, IRDMA_DEBUG_DCB, "l2params", l2params, sizeof(*l2params));
 }
 
 /**
  * irdma_log_invalid_mtu - check mtu setting validity
  * @mtu: mtu value
  * @dev: hardware control device structure
  */
 static void
 irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
 {
 	if (mtu < IRDMA_MIN_MTU_IPV4)
 		irdma_dev_warn(to_ibdev(dev),
 			       "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n",
 			       mtu);
 	else if (mtu < IRDMA_MIN_MTU_IPV6)
 		irdma_dev_warn(to_ibdev(dev),
 			       "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\n",
 			       mtu);
 }
 
 /**
  * irdma_get_event_name - convert type enum to string
  * @type: event type enum
  */
 static const char *
 irdma_get_event_name(enum ice_rdma_event_type type)
 {
 	switch (type) {
 	case ICE_RDMA_EVENT_LINK_CHANGE:
 		return "LINK CHANGE";
 	case ICE_RDMA_EVENT_MTU_CHANGE:
 		return "MTU CHANGE";
 	case ICE_RDMA_EVENT_TC_CHANGE:
 		return "TC CHANGE";
 	case ICE_RDMA_EVENT_API_CHANGE:
 		return "API CHANGE";
 	case ICE_RDMA_EVENT_CRIT_ERR:
 		return "CRITICAL ERROR";
 	case ICE_RDMA_EVENT_RESET:
 		return "RESET";
 	case ICE_RDMA_EVENT_QSET_REGISTER:
 		return "QSET REGISTER";
 	case ICE_RDMA_EVENT_VSI_FILTER_UPDATE:
 		return "VSI FILTER UPDATE";
 	default:
 		return "UNKNOWN";
 	}
 }
 
 /**
  * irdma_event_handler - handling events from lan driver
  * @peer: the peer interface structure
  * @event: event info structure
  */
 static void
 irdma_event_handler(struct ice_rdma_peer *peer, struct ice_rdma_event *event)
 {
 	struct irdma_device *iwdev;
 	struct irdma_l2params l2params = {};
 
 	printf("%s:%d event_handler %s (%x) on pf %d (%d)\n", __func__, __LINE__,
 	       irdma_get_event_name(event->type),
 	       event->type, peer->pf_id, if_getdunit(peer->ifp));
 	iwdev = peer_to_iwdev(peer);
 	if (!iwdev) {
 		printf("%s:%d rdma device not found\n", __func__, __LINE__);
 		return;
 	}
 
 	switch (event->type) {
 	case ICE_RDMA_EVENT_LINK_CHANGE:
 		printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
 		       peer->pf_id, if_getdunit(peer->ifp), event->linkstate,
 		       event->baudrate);
 		break;
 	case ICE_RDMA_EVENT_MTU_CHANGE:
 		if (iwdev->vsi.mtu != event->mtu) {
 			l2params.mtu = event->mtu;
 			l2params.mtu_changed = true;
 			irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
 			irdma_change_l2params(&iwdev->vsi, &l2params);
 		}
 		break;
 	case ICE_RDMA_EVENT_TC_CHANGE:
 		/*
 		 * 1. check if it is pre or post 2. check if it is currently being done
 		 */
 		if (event->prep == iwdev->vsi.tc_change_pending) {
 			printf("%s:%d can't process %s TC change if TC change is %spending\n",
 			       __func__, __LINE__,
 			       event->prep ? "pre" : "post",
 			       event->prep ? " " : "not ");
 			goto done;
 		}
 		if (!atomic_inc_not_zero(&iwdev->rf->dev_ctx.event_rfcnt)) {
 			printf("%s:%d (%d) EVENT_TC_CHANGE received, but not processed %d\n",
 			       __func__, __LINE__, if_getdunit(peer->ifp),
 			       atomic_read(&iwdev->rf->dev_ctx.event_rfcnt));
 			break;
 		}
 		if (event->prep) {
 			iwdev->vsi.tc_change_pending = true;
 			irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
 			wait_event_timeout(iwdev->suspend_wq,
 					   !atomic_read(&iwdev->vsi.qp_suspend_reqs),
 					   IRDMA_EVENT_TIMEOUT_MS * 10);
 			irdma_ws_reset(&iwdev->vsi);
 			printf("%s:%d TC change preparation done\n", __func__, __LINE__);
 		} else {
 			l2params.tc_changed = true;
 			irdma_get_qos_info(iwdev->rf, &l2params, &event->port_qos);
 			if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
 				iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
 
 			irdma_check_fc_for_tc_update(&iwdev->vsi, &l2params);
 			irdma_change_l2params(&iwdev->vsi, &l2params);
 			printf("%s:%d TC change done\n", __func__, __LINE__);
 		}
 		atomic_dec(&iwdev->rf->dev_ctx.event_rfcnt);
 		break;
 	case ICE_RDMA_EVENT_CRIT_ERR:
-#ifdef EVNT_HNDLR_CRITERR
 		if (event->oicr_reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
 			u32 pe_criterr;
 
 #define IRDMA_Q1_RESOURCE_ERR  0x0001024d
 			pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]);
 			if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) {
 				irdma_pr_err("critical PE Error, GLPE_CRITERR=0x%08x\n",
 					     pe_criterr);
 				iwdev->rf->reset = true;
 			} else {
 				irdma_dev_warn(to_ibdev(&iwdev->rf->sc_dev),
 					       "Q1 Resource Check\n");
 			}
 		}
 		if (event->oicr_reg & IRDMAPFINT_OICR_HMC_ERR_M) {
 			irdma_pr_err("HMC Error\n");
 			iwdev->rf->reset = true;
 		}
 		if (iwdev->rf->reset)
 			iwdev->rf->gen_ops.request_reset(iwdev->rf);
-#else
-		printf("%s:%d event type received: %d\n", __func__, __LINE__, event->type);
-#endif
 		break;
 	case ICE_RDMA_EVENT_RESET:
 		iwdev->rf->reset = true;
 		break;
 	default:
 		printf("%s:%d event type unsupported: %d\n", __func__, __LINE__, event->type);
 	}
 done:
 	return;
 }
 
 /**
  * irdma_link_change - Callback for link state change
  * @peer: the peer interface structure
  * @linkstate: state of the link
  * @baudrate: speed of the link
  */
 static void
 irdma_link_change(struct ice_rdma_peer *peer, int linkstate, uint64_t baudrate)
 {
 	printf("%s:%d PF: %x (%x), state: %d, speed: %lu\n", __func__, __LINE__,
 	       peer->pf_id, if_getdunit(peer->ifp), linkstate, baudrate);
 }
 
 /**
  * irdma_finalize_task - Finish open or close phase in a separate thread
  * @context: instance holding peer and iwdev information
  *
  * Triggered from irdma_open or irdma_close to perform rt_init_hw or
  * rt_deinit_hw respectively. Does registration and unregistration of
  * the device.
  */
 static void
 irdma_finalize_task(void *context, int pending)
 {
 	struct irdma_task_arg *task_arg = (struct irdma_task_arg *)context;
 	struct irdma_device *iwdev = task_arg->iwdev;
 	struct irdma_pci_f *rf = iwdev->rf;
 	struct ice_rdma_peer *peer = task_arg->peer;
 	struct irdma_l2params l2params = {{{0}}};
 	struct ice_rdma_request req = {0};
 	int status = 0;
 
 	if (iwdev->iw_status) {
 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
 			    "Starting deferred closing %d (%d)\n",
 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
 		atomic_dec(&rf->dev_ctx.event_rfcnt);
 		wait_event_timeout(iwdev->suspend_wq,
 				   !atomic_read(&rf->dev_ctx.event_rfcnt),
 				   IRDMA_MAX_TIMEOUT);
 		if (atomic_read(&rf->dev_ctx.event_rfcnt) != 0) {
 			printf("%s:%d (%d) waiting for event_rfcnt (%d) timeout, proceed with unload\n",
 			       __func__, __LINE__, if_getdunit(peer->ifp),
 			       atomic_read(&rf->dev_ctx.event_rfcnt));
 		}
 		irdma_dereg_ipaddr_event_cb(rf);
 		irdma_ib_unregister_device(iwdev);
 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
 		req.enable_filter = false;
 		IRDMA_DI_REQ_HANDLER(peer, &req);
 		irdma_cleanup_dead_qps(&iwdev->vsi);
 		irdma_rt_deinit_hw(iwdev);
 	} else {
 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
 			    "Starting deferred opening %d (%d)\n",
 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
 		irdma_get_qos_info(iwdev->rf, &l2params, &peer->initial_qos_info);
 		if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
 			iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
 
 		l2params.mtu = peer->mtu;
 		status = irdma_rt_init_hw(iwdev, &l2params);
 		if (status) {
 			irdma_pr_err("RT init failed %d\n", status);
 			ib_dealloc_device(&iwdev->ibdev);
 			return;
 		}
 		status = irdma_ib_register_device(iwdev);
 		if (status) {
 			irdma_pr_err("Registration failed %d\n", status);
 			irdma_rt_deinit_hw(iwdev);
 			ib_dealloc_device(&iwdev->ibdev);
 		}
 		irdma_sw_stats_tunables_init(rf);
 		req.type = ICE_RDMA_EVENT_VSI_FILTER_UPDATE;
 		req.enable_filter = true;
 		IRDMA_DI_REQ_HANDLER(peer, &req);
 		irdma_reg_ipaddr_event_cb(rf);
 		atomic_inc(&rf->dev_ctx.event_rfcnt);
 		irdma_debug(&rf->sc_dev, IRDMA_DEBUG_INIT,
 			    "Deferred opening finished %d (%d)\n",
 			    rf->peer_info->pf_id, if_getdunit(peer->ifp));
 	}
 }
 
 /**
  * irdma_alloc_pcidev - allocate memory for pcidev and populate data
  * @peer: the new peer interface structure
  * @rf: RDMA PCI function
  */
 static int
 irdma_alloc_pcidev(struct ice_rdma_peer *peer, struct irdma_pci_f *rf)
 {
 	rf->pcidev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
 	if (!rf->pcidev) {
 		return -ENOMEM;
 	}
 	if (linux_pci_attach_device(rf->dev_ctx.dev, NULL, NULL, rf->pcidev))
 		return -ENOMEM;
 
 	return 0;
 }
 
 /**
  * irdma_dealloc_pcidev - deallocate memory for pcidev
  * @rf: RDMA PCI function
  */
 static void
 irdma_dealloc_pcidev(struct irdma_pci_f *rf)
 {
 	linux_pci_detach_device(rf->pcidev);
 	kfree(rf->pcidev);
 }
 
 /**
  * irdma_fill_device_info - assign initial values to rf variables
  * @iwdev: irdma device
  * @peer: the peer interface structure
  */
 static void
 irdma_fill_device_info(struct irdma_device *iwdev,
 		       struct ice_rdma_peer *peer)
 {
 	struct irdma_pci_f *rf = iwdev->rf;
 
 	rf->peer_info = peer;
 	rf->gen_ops.register_qset = irdma_register_qset;
 	rf->gen_ops.unregister_qset = irdma_unregister_qset;
 
 	rf->rdma_ver = IRDMA_GEN_2;
 	rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2;
 	rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
 	rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
 	rf->check_fc = irdma_check_fc_for_qp;
 	rf->gen_ops.request_reset = irdma_request_reset;
 	irdma_set_rf_user_cfg_params(rf);
 
 	rf->default_vsi.vsi_idx = peer->pf_vsi_num;
 	rf->dev_ctx.dev = peer->dev;
 	rf->dev_ctx.mem_bus_space_tag = rman_get_bustag(peer->pci_mem);
 	rf->dev_ctx.mem_bus_space_handle = rman_get_bushandle(peer->pci_mem);
 	rf->dev_ctx.mem_bus_space_size = rman_get_size(peer->pci_mem);
 
 	rf->hw.dev_context = &rf->dev_ctx;
 	rf->hw.hw_addr = (u8 *)rman_get_virtual(peer->pci_mem);
 	rf->msix_count = peer->msix.count;
 	rf->msix_info.entry = peer->msix.base;
 	rf->msix_info.vector = peer->msix.count;
 	printf("%s:%d msix_info: %d %d %d\n", __func__, __LINE__,
 	       rf->msix_count, rf->msix_info.entry, rf->msix_info.vector);
 
 	rf->iwdev = iwdev;
 	iwdev->netdev = peer->ifp;
 	iwdev->init_state = INITIAL_STATE;
 	iwdev->vsi_num = peer->pf_vsi_num;
 	iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
 	iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
 	iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
 	iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
 	iwdev->roce_rtomin = 5;
 
 	if (rf->protocol_used == IRDMA_ROCE_PROTOCOL_ONLY) {
 		iwdev->roce_mode = true;
 	}
 }
 
 /**
  * irdma_probe - Callback to probe a new RDMA peer device
  * @peer: the new peer interface structure
  *
  * Callback implementing the RDMA_PROBE function. Called by the ice driver to
  * notify the RDMA client driver that a new device has been created
  */
 static int
 irdma_probe(struct ice_rdma_peer *peer)
 {
 	struct irdma_device *iwdev;
 	struct irdma_pci_f *rf;
 	struct irdma_handler *hdl;
 	int err = 0;
 
 	irdma_pr_info("probe: irdma-%s peer=%p, peer->pf_id=%d, peer->ifp=%p, peer->ifp->if_dunit=%d, peer->pci_mem->r_bustag=%p\n",
 		      irdma_driver_version, peer, peer->pf_id, peer->ifp,
 		      if_getdunit(peer->ifp), (void *)(uintptr_t)peer->pci_mem->r_bustag);
 
 	hdl = irdma_find_handler(peer);
 	if (hdl)
 		return -EBUSY;
 
 	hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
 	if (!hdl)
 		return -ENOMEM;
 
 	iwdev = (struct irdma_device *)ib_alloc_device(sizeof(*iwdev));
 	if (!iwdev) {
 		kfree(hdl);
 		return -ENOMEM;
 	}
 
 	iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
 	if (!iwdev->rf) {
 		ib_dealloc_device(&iwdev->ibdev);
 		kfree(hdl);
 		return -ENOMEM;
 	}
 	hdl->iwdev = iwdev;
 	iwdev->hdl = hdl;
 
 	irdma_init_tunable(iwdev->rf, if_getdunit(peer->ifp));
 	irdma_fill_device_info(iwdev, peer);
 	rf = iwdev->rf;
 
 	if (irdma_alloc_pcidev(peer, rf))
 		goto err_pcidev;
 
 	irdma_add_handler(hdl);
 
 	if (irdma_ctrl_init_hw(rf)) {
 		err = -EIO;
 		goto err_ctrl_init;
 	}
 
 	rf->dev_ctx.task_arg.peer = peer;
 	rf->dev_ctx.task_arg.iwdev = iwdev;
 	rf->dev_ctx.task_arg.peer = peer;
 
 	TASK_INIT(&hdl->deferred_task, 0, irdma_finalize_task, &rf->dev_ctx.task_arg);
 	hdl->deferred_tq = taskqueue_create_fast("irdma_defer",
 						 M_NOWAIT, taskqueue_thread_enqueue,
 						 &hdl->deferred_tq);
 	taskqueue_start_threads(&hdl->deferred_tq, 1, PI_NET, "irdma_defer_t");
 
 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
 
 	return 0;
 
 err_ctrl_init:
 	irdma_del_handler(hdl);
 	irdma_dealloc_pcidev(rf);
 err_pcidev:
 	kfree(iwdev->rf);
 	ib_dealloc_device(&iwdev->ibdev);
 	kfree(hdl);
 
 	return err;
 }
 
 /**
  * irdma_remove - Callback to remove an RDMA peer device
  * @peer: the new peer interface structure
  *
  * Callback implementing the RDMA_REMOVE function. Called by the ice driver to
  * notify the RDMA client driver that the device wille be delated
  */
 static int
 irdma_remove(struct ice_rdma_peer *peer)
 {
 	struct irdma_handler *hdl;
 	struct irdma_device *iwdev;
 
 	irdma_debug((struct irdma_sc_dev *)NULL, IRDMA_DEBUG_INIT,
 		    "removing %s irdma%d\n", __func__, if_getdunit(peer->ifp));
 
 	hdl = irdma_find_handler(peer);
 	if (!hdl)
 		return 0;
 
 	iwdev = hdl->iwdev;
 
 	if (iwdev->vsi.tc_change_pending) {
 		iwdev->vsi.tc_change_pending = false;
 		irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_RESUME);
 	}
 
 	taskqueue_enqueue(hdl->deferred_tq, &hdl->deferred_task);
 
 	taskqueue_drain(hdl->deferred_tq, &hdl->deferred_task);
 	taskqueue_free(hdl->deferred_tq);
 	hdl->iwdev->rf->dev_ctx.task_arg.iwdev = NULL;
 	hdl->iwdev->rf->dev_ctx.task_arg.peer = NULL;
 
 	sysctl_ctx_free(&iwdev->rf->tun_info.irdma_sysctl_ctx);
 	hdl->iwdev->rf->tun_info.irdma_sysctl_tree = NULL;
 	hdl->iwdev->rf->tun_info.sws_sysctl_tree = NULL;
 
 	irdma_ctrl_deinit_hw(iwdev->rf);
 
 	irdma_dealloc_pcidev(iwdev->rf);
 
 	irdma_del_handler(iwdev->hdl);
 	kfree(iwdev->hdl);
 	kfree(iwdev->rf);
 	ib_dealloc_device(&iwdev->ibdev);
 	irdma_pr_info("IRDMA hardware deinitialization complete irdma%d\n",
 		      if_getdunit(peer->ifp));
 
 	return 0;
 }
 
 /**
  * irdma_open - Callback for operation open for RDMA device
  * @peer: the new peer interface structure
  *
  * Callback implementing the RDMA_OPEN function. Called by the ice driver to
  * notify the RDMA client driver that a new device has been initialized.
  */
 static int
 irdma_open(struct ice_rdma_peer *peer)
 {
 	struct irdma_device *iwdev;
 	struct ice_rdma_event event = {0};
 
 	iwdev = peer_to_iwdev(peer);
 	if (iwdev) {
 		event.type = ICE_RDMA_EVENT_MTU_CHANGE;
 		event.mtu = peer->mtu;
 
 		irdma_event_handler(peer, &event);
 	} else {
 		irdma_probe(peer);
 	}
 
 	return 0;
 }
 
 /**
  * irdma_close - Callback to notify that a peer device is down
  * @peer: the RDMA peer device being stopped
  *
  * Callback implementing the RDMA_CLOSE function. Called by the ice driver to
  * notify the RDMA client driver that a peer device is being stopped.
  */
 static int
 irdma_close(struct ice_rdma_peer *peer)
 {
 	/*
 	 * This is called when ifconfig down or pf-reset is about to happen.
 	 */
 	struct irdma_device *iwdev;
 
 	iwdev = peer_to_iwdev(peer);
 	if (iwdev && iwdev->rf->reset)
 		irdma_remove(peer);
 
 	return 0;
 }
 
 /**
  * irdma_prep_for_unregister - ensure the driver is ready to unregister
  */
 static void
 irdma_prep_for_unregister(void)
 {
 	struct irdma_handler *hdl;
 	unsigned long flags;
 	bool hdl_valid;
 
 	do {
 		hdl_valid = false;
 		spin_lock_irqsave(&irdma_handler_lock, flags);
 		list_for_each_entry(hdl, &irdma_handlers, list) {
 			if (!hdl->iwdev->rf->peer_info)
 				continue;
 			hdl_valid = true;
 			break;
 		}
 		spin_unlock_irqrestore(&irdma_handler_lock, flags);
 		if (!hdl || !hdl_valid)
 			break;
 		IRDMA_CLOSE(hdl->iwdev->rf->peer_info);
 		IRDMA_REMOVE(hdl->iwdev->rf->peer_info);
 	} while (1);
 }
 
 static kobj_method_t irdma_methods[] = {
 	KOBJMETHOD(irdma_probe, irdma_probe),
 	    KOBJMETHOD(irdma_open, irdma_open),
 	    KOBJMETHOD(irdma_close, irdma_close),
 	    KOBJMETHOD(irdma_remove, irdma_remove),
 	    KOBJMETHOD(irdma_link_change, irdma_link_change),
 	    KOBJMETHOD(irdma_event_handler, irdma_event_handler),
 	    KOBJMETHOD_END
 };
 
 /* declare irdma_class which extends the ice_rdma_di class */
 DEFINE_CLASS_1(irdma, irdma_class, irdma_methods, sizeof(struct ice_rdma_peer), ice_rdma_di_class);
 
 static struct ice_rdma_info irdma_info = {
 	.major_version = ICE_RDMA_MAJOR_VERSION,
 	.minor_version = ICE_RDMA_MINOR_VERSION,
 	.patch_version = ICE_RDMA_PATCH_VERSION,
 	.rdma_class = &irdma_class,
 };
 
 /**
  * irdma_module_event_handler - Module event handler callback
  * @mod: unused mod argument
  * @what: the module event to handle
  * @arg: unused module event argument
  *
  * Callback used by the FreeBSD module stack to notify the driver of module
  * events. Used to implement custom handling for certain module events such as
  * load and unload.
  */
 static int
 irdma_module_event_handler(module_t __unused mod, int what, void __unused * arg)
 {
 	switch (what) {
 	case MOD_LOAD:
 		printf("Loading irdma module\n");
 		return ice_rdma_register(&irdma_info);
 	case MOD_UNLOAD:
 		printf("Unloading irdma module\n");
 		irdma_prep_for_unregister();
 		ice_rdma_unregister();
 		return (0);
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t irdma_moduledata = {
 	"irdma",
 	    irdma_module_event_handler,
 	    NULL
 };
 
 DECLARE_MODULE(irdma, irdma_moduledata, SI_SUB_LAST, SI_ORDER_ANY);
 MODULE_VERSION(irdma, 1);
 MODULE_DEPEND(irdma, ice, 1, 1, 1);
 MODULE_DEPEND(irdma, ibcore, 1, 1, 1);