diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c index 8991d9c23fbc..054250d21668 100644 --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -1,2906 +1,2906 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009-2012 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ahci.h" #include #include #include #include #include /* local prototypes */ static void ahci_intr(void *data); static void ahci_intr_one(void *data); static void ahci_intr_one_edge(void *data); static int ahci_ch_init(device_t dev); static int ahci_ch_deinit(device_t dev); static int ahci_ch_suspend(device_t dev); static int ahci_ch_resume(device_t dev); static void ahci_ch_pm(void *arg); static void ahci_ch_intr(void *arg); static void ahci_ch_intr_direct(void *arg); static void ahci_ch_intr_main(struct ahci_channel *ch, uint32_t istatus); static void ahci_begin_transaction(struct ahci_channel *ch, union ccb *ccb); static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void ahci_execute_transaction(struct ahci_slot *slot); static void ahci_timeout(void *arg); static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et); static int ahci_setup_fis(struct ahci_channel *ch, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag); static void ahci_dmainit(device_t dev); static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static void ahci_dmafini(device_t dev); static void ahci_slotsalloc(device_t dev); static void ahci_slotsfree(device_t dev); static void ahci_reset(struct ahci_channel *ch); static void ahci_start(struct ahci_channel *ch, int fbs); static void ahci_stop(struct ahci_channel *ch); static void ahci_clo(struct ahci_channel *ch); static void ahci_start_fr(struct ahci_channel *ch); static void ahci_stop_fr(struct ahci_channel *ch); static int ahci_phy_check_events(struct ahci_channel *ch, u_int32_t serr); static uint32_t ahci_ch_detval(struct ahci_channel *ch, uint32_t val); static int ahci_sata_connect(struct ahci_channel *ch); static int ahci_sata_phy_reset(struct ahci_channel *ch); static int ahci_wait_ready(struct ahci_channel *ch, int t, int t0); static void ahci_issue_recovery(struct ahci_channel *ch); static void ahci_process_read_log(struct ahci_channel *ch, union ccb *ccb); static void ahci_process_request_sense(struct ahci_channel *ch, union ccb *ccb); static void ahciaction(struct cam_sim *sim, union ccb *ccb); static void ahcipoll(struct cam_sim *sim); static MALLOC_DEFINE(M_AHCI, "AHCI driver", "AHCI driver data buffers"); #define recovery_type spriv_field0 #define RECOVERY_NONE 0 #define RECOVERY_READ_LOG 1 #define RECOVERY_REQUEST_SENSE 2 #define recovery_slot spriv_field1 static uint32_t ahci_ch_detval(struct ahci_channel *ch, uint32_t val) { return ch->disablephy ? ATA_SC_DET_DISABLE : val; } int ahci_ctlr_setup(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); /* Clear interrupts */ ATA_OUTL(ctlr->r_mem, AHCI_IS, ATA_INL(ctlr->r_mem, AHCI_IS)); /* Configure CCC */ if (ctlr->ccc) { ATA_OUTL(ctlr->r_mem, AHCI_CCCP, ATA_INL(ctlr->r_mem, AHCI_PI)); ATA_OUTL(ctlr->r_mem, AHCI_CCCC, (ctlr->ccc << AHCI_CCCC_TV_SHIFT) | (4 << AHCI_CCCC_CC_SHIFT) | AHCI_CCCC_EN); ctlr->cccv = (ATA_INL(ctlr->r_mem, AHCI_CCCC) & AHCI_CCCC_INT_MASK) >> AHCI_CCCC_INT_SHIFT; if (bootverbose) { device_printf(dev, "CCC with %dms/4cmd enabled on vector %d\n", ctlr->ccc, ctlr->cccv); } } /* Enable AHCI interrupts */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, ATA_INL(ctlr->r_mem, AHCI_GHC) | AHCI_GHC_IE); return (0); } int ahci_ctlr_reset(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); uint32_t v; int timeout; /* BIOS/OS Handoff */ if ((ATA_INL(ctlr->r_mem, AHCI_VS) >= 0x00010200) && (ATA_INL(ctlr->r_mem, AHCI_CAP2) & AHCI_CAP2_BOH) && ((v = ATA_INL(ctlr->r_mem, AHCI_BOHC)) & AHCI_BOHC_OOS) == 0) { /* Request OS ownership. */ ATA_OUTL(ctlr->r_mem, AHCI_BOHC, v | AHCI_BOHC_OOS); /* Wait up to 2s for BIOS ownership release. */ for (timeout = 0; timeout < 80; timeout++) { DELAY(25000); v = ATA_INL(ctlr->r_mem, AHCI_BOHC); if ((v & AHCI_BOHC_BOS) == 0) break; if ((v & AHCI_BOHC_BB) == 0) break; } } /* Enable AHCI mode */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE); /* Reset AHCI controller */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE|AHCI_GHC_HR); for (timeout = 1000; timeout > 0; timeout--) { DELAY(1000); if ((ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_HR) == 0) break; } if (timeout == 0) { device_printf(dev, "AHCI controller reset failure\n"); return (ENXIO); } /* Reenable AHCI mode */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE); if (ctlr->quirks & AHCI_Q_RESTORE_CAP) { /* * Restore capability field. * This is write to a read-only register to restore its state. * On fully standard-compliant hardware this is not needed and * this operation shall not take place. See ahci_pci.c for * platforms using this quirk. */ ATA_OUTL(ctlr->r_mem, AHCI_CAP, ctlr->caps); } return (0); } int ahci_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int error, i, speed, unit; uint32_t u, version; device_t child; ctlr->dev = dev; ctlr->ccc = 0; resource_int_value(device_get_name(dev), device_get_unit(dev), "ccc", &ctlr->ccc); mtx_init(&ctlr->ch_mtx, "AHCI channels lock", NULL, MTX_DEF); /* Setup our own memory management for channels. */ ctlr->sc_iomem.rm_start = rman_get_start(ctlr->r_mem); ctlr->sc_iomem.rm_end = rman_get_end(ctlr->r_mem); ctlr->sc_iomem.rm_type = RMAN_ARRAY; ctlr->sc_iomem.rm_descr = "I/O memory addresses"; if ((error = rman_init(&ctlr->sc_iomem)) != 0) { ahci_free_mem(dev); return (error); } if ((error = rman_manage_region(&ctlr->sc_iomem, rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) { ahci_free_mem(dev); rman_fini(&ctlr->sc_iomem); return (error); } /* Get the HW capabilities */ version = ATA_INL(ctlr->r_mem, AHCI_VS); ctlr->caps = ATA_INL(ctlr->r_mem, AHCI_CAP); if (version >= 0x00010200) ctlr->caps2 = ATA_INL(ctlr->r_mem, AHCI_CAP2); if (ctlr->caps & AHCI_CAP_EMS) ctlr->capsem = ATA_INL(ctlr->r_mem, AHCI_EM_CTL); if (ctlr->quirks & AHCI_Q_FORCE_PI) { /* * Enable ports. * The spec says that BIOS sets up bits corresponding to * available ports. On platforms where this information * is missing, the driver can define available ports on its own. */ int nports = (ctlr->caps & AHCI_CAP_NPMASK) + 1; int nmask = (1 << nports) - 1; ATA_OUTL(ctlr->r_mem, AHCI_PI, nmask); device_printf(dev, "Forcing PI to %d ports (mask = %x)\n", nports, nmask); } ctlr->ichannels = ATA_INL(ctlr->r_mem, AHCI_PI); /* Identify and set separate quirks for HBA and RAID f/w Marvells. */ if ((ctlr->quirks & AHCI_Q_ALTSIG) && (ctlr->caps & AHCI_CAP_SPM) == 0) ctlr->quirks |= AHCI_Q_NOBSYRES; if (ctlr->quirks & AHCI_Q_1CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->ichannels &= 0x01; } if (ctlr->quirks & AHCI_Q_2CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->caps |= 1; ctlr->ichannels &= 0x03; } if (ctlr->quirks & AHCI_Q_4CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->caps |= 3; ctlr->ichannels &= 0x0f; } ctlr->channels = MAX(flsl(ctlr->ichannels), (ctlr->caps & AHCI_CAP_NPMASK) + 1); if (ctlr->quirks & AHCI_Q_NOPMP) ctlr->caps &= ~AHCI_CAP_SPM; if (ctlr->quirks & AHCI_Q_NONCQ) ctlr->caps &= ~AHCI_CAP_SNCQ; if ((ctlr->caps & AHCI_CAP_CCCS) == 0) ctlr->ccc = 0; ctlr->emloc = ATA_INL(ctlr->r_mem, AHCI_EM_LOC); /* Create controller-wide DMA tag. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, (ctlr->caps & AHCI_CAP_64BIT) ? BUS_SPACE_MAXADDR : BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, ctlr->dma_coherent ? BUS_DMA_COHERENT : 0, NULL, NULL, &ctlr->dma_tag)) { ahci_free_mem(dev); rman_fini(&ctlr->sc_iomem); return (ENXIO); } ahci_ctlr_setup(dev); /* Setup interrupts. */ if ((error = ahci_setup_interrupt(dev)) != 0) { bus_dma_tag_destroy(ctlr->dma_tag); ahci_free_mem(dev); rman_fini(&ctlr->sc_iomem); return (error); } i = 0; for (u = ctlr->ichannels; u != 0; u >>= 1) i += (u & 1); ctlr->direct = (ctlr->msi && (ctlr->numirqs > 1 || i <= 3)); resource_int_value(device_get_name(dev), device_get_unit(dev), "direct", &ctlr->direct); /* Announce HW capabilities. */ speed = (ctlr->caps & AHCI_CAP_ISS) >> AHCI_CAP_ISS_SHIFT; device_printf(dev, "AHCI v%x.%02x with %d %sGbps ports, Port Multiplier %s%s\n", ((version >> 20) & 0xf0) + ((version >> 16) & 0x0f), ((version >> 4) & 0xf0) + (version & 0x0f), (ctlr->caps & AHCI_CAP_NPMASK) + 1, ((speed == 1) ? "1.5":((speed == 2) ? "3": ((speed == 3) ? "6":"?"))), (ctlr->caps & AHCI_CAP_SPM) ? "supported" : "not supported", (ctlr->caps & AHCI_CAP_FBSS) ? " with FBS" : ""); if (ctlr->quirks != 0) { device_printf(dev, "quirks=0x%b\n", ctlr->quirks, AHCI_Q_BIT_STRING); } if (bootverbose) { device_printf(dev, "Caps:%s%s%s%s%s%s%s%s %sGbps", (ctlr->caps & AHCI_CAP_64BIT) ? " 64bit":"", (ctlr->caps & AHCI_CAP_SNCQ) ? " NCQ":"", (ctlr->caps & AHCI_CAP_SSNTF) ? " SNTF":"", (ctlr->caps & AHCI_CAP_SMPS) ? " MPS":"", (ctlr->caps & AHCI_CAP_SSS) ? " SS":"", (ctlr->caps & AHCI_CAP_SALP) ? " ALP":"", (ctlr->caps & AHCI_CAP_SAL) ? " AL":"", (ctlr->caps & AHCI_CAP_SCLO) ? " CLO":"", ((speed == 1) ? "1.5":((speed == 2) ? "3": ((speed == 3) ? "6":"?")))); printf("%s%s%s%s%s%s %dcmd%s%s%s %dports\n", (ctlr->caps & AHCI_CAP_SAM) ? " AM":"", (ctlr->caps & AHCI_CAP_SPM) ? " PM":"", (ctlr->caps & AHCI_CAP_FBSS) ? " FBS":"", (ctlr->caps & AHCI_CAP_PMD) ? " PMD":"", (ctlr->caps & AHCI_CAP_SSC) ? " SSC":"", (ctlr->caps & AHCI_CAP_PSC) ? " PSC":"", ((ctlr->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1, (ctlr->caps & AHCI_CAP_CCCS) ? " CCC":"", (ctlr->caps & AHCI_CAP_EMS) ? " EM":"", (ctlr->caps & AHCI_CAP_SXS) ? " eSATA":"", (ctlr->caps & AHCI_CAP_NPMASK) + 1); } if (bootverbose && version >= 0x00010200) { device_printf(dev, "Caps2:%s%s%s%s%s%s\n", (ctlr->caps2 & AHCI_CAP2_DESO) ? " DESO":"", (ctlr->caps2 & AHCI_CAP2_SADM) ? " SADM":"", (ctlr->caps2 & AHCI_CAP2_SDS) ? " SDS":"", (ctlr->caps2 & AHCI_CAP2_APST) ? " APST":"", (ctlr->caps2 & AHCI_CAP2_NVMP) ? " NVMP":"", (ctlr->caps2 & AHCI_CAP2_BOH) ? " BOH":""); } /* Attach all channels on this controller */ for (unit = 0; unit < ctlr->channels; unit++) { child = device_add_child(dev, "ahcich", -1); if (child == NULL) { device_printf(dev, "failed to add channel device\n"); continue; } device_set_ivars(child, (void *)(intptr_t)unit); if ((ctlr->ichannels & (1 << unit)) == 0) device_disable(child); } /* Attach any remapped NVME device */ for (; unit < ctlr->channels + ctlr->remapped_devices; unit++) { child = device_add_child(dev, "nvme", -1); if (child == NULL) { device_printf(dev, "failed to add remapped NVMe device"); continue; } device_set_ivars(child, (void *)(intptr_t)(unit | AHCI_REMAPPED_UNIT)); } if (ctlr->caps & AHCI_CAP_EMS) { child = device_add_child(dev, "ahciem", -1); if (child == NULL) device_printf(dev, "failed to add enclosure device\n"); else device_set_ivars(child, (void *)(intptr_t)AHCI_EM_UNIT); } bus_generic_attach(dev); return (0); } int ahci_detach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int i; /* Detach & delete all children */ device_delete_children(dev); /* Free interrupts. */ for (i = 0; i < ctlr->numirqs; i++) { if (ctlr->irqs[i].r_irq) { bus_teardown_intr(dev, ctlr->irqs[i].r_irq, ctlr->irqs[i].handle); bus_release_resource(dev, SYS_RES_IRQ, ctlr->irqs[i].r_irq_rid, ctlr->irqs[i].r_irq); } } bus_dma_tag_destroy(ctlr->dma_tag); /* Free memory. */ rman_fini(&ctlr->sc_iomem); ahci_free_mem(dev); mtx_destroy(&ctlr->ch_mtx); return (0); } void ahci_free_mem(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); /* Release memory resources */ if (ctlr->r_mem) bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); if (ctlr->r_msix_table) bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_msix_tab_rid, ctlr->r_msix_table); if (ctlr->r_msix_pba) bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_msix_pba_rid, ctlr->r_msix_pba); ctlr->r_msix_pba = ctlr->r_mem = ctlr->r_msix_table = NULL; } int ahci_setup_interrupt(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int i; /* Check for single MSI vector fallback. */ if (ctlr->numirqs > 1 && (ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_MRSM) != 0) { device_printf(dev, "Falling back to one MSI\n"); ctlr->numirqs = 1; } /* Ensure we don't overrun irqs. */ if (ctlr->numirqs > AHCI_MAX_IRQS) { device_printf(dev, "Too many irqs %d > %d (clamping)\n", ctlr->numirqs, AHCI_MAX_IRQS); ctlr->numirqs = AHCI_MAX_IRQS; } /* Allocate all IRQs. */ for (i = 0; i < ctlr->numirqs; i++) { ctlr->irqs[i].ctlr = ctlr; ctlr->irqs[i].r_irq_rid = i + (ctlr->msi ? 1 : 0); if (ctlr->channels == 1 && !ctlr->ccc && ctlr->msi) ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE; else if (ctlr->numirqs == 1 || i >= ctlr->channels || (ctlr->ccc && i == ctlr->cccv)) ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL; else if (ctlr->channels > ctlr->numirqs && i == ctlr->numirqs - 1) ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER; else ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE; if (!(ctlr->irqs[i].r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ctlr->irqs[i].r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) { device_printf(dev, "unable to map interrupt\n"); return (ENXIO); } if ((bus_setup_intr(dev, ctlr->irqs[i].r_irq, ATA_INTR_FLAGS, NULL, (ctlr->irqs[i].mode != AHCI_IRQ_MODE_ONE) ? ahci_intr : ((ctlr->quirks & AHCI_Q_EDGEIS) ? ahci_intr_one_edge : ahci_intr_one), &ctlr->irqs[i], &ctlr->irqs[i].handle))) { /* SOS XXX release r_irq */ device_printf(dev, "unable to setup interrupt\n"); return (ENXIO); } if (ctlr->numirqs > 1) { bus_describe_intr(dev, ctlr->irqs[i].r_irq, ctlr->irqs[i].handle, ctlr->irqs[i].mode == AHCI_IRQ_MODE_ONE ? "ch%d" : "%d", i); } } return (0); } /* * Common case interrupt handler. */ static void ahci_intr(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; u_int32_t is, ise = 0; void *arg; int unit; if (irq->mode == AHCI_IRQ_MODE_ALL) { unit = 0; if (ctlr->ccc) is = ctlr->ichannels; else is = ATA_INL(ctlr->r_mem, AHCI_IS); } else { /* AHCI_IRQ_MODE_AFTER */ unit = irq->r_irq_rid - 1; is = ATA_INL(ctlr->r_mem, AHCI_IS); is &= (0xffffffff << unit); } /* CCC interrupt is edge triggered. */ if (ctlr->ccc) ise = 1 << ctlr->cccv; /* Some controllers have edge triggered IS. */ if (ctlr->quirks & AHCI_Q_EDGEIS) ise |= is; if (ise != 0) ATA_OUTL(ctlr->r_mem, AHCI_IS, ise); for (; unit < ctlr->channels; unit++) { if ((is & (1 << unit)) != 0 && (arg = ctlr->interrupt[unit].argument)) { ctlr->interrupt[unit].function(arg); } } for (; unit < ctlr->channels + ctlr->remapped_devices; unit++) { if ((arg = ctlr->interrupt[unit].argument)) { ctlr->interrupt[unit].function(arg); } } /* AHCI declares level triggered IS. */ if (!(ctlr->quirks & AHCI_Q_EDGEIS)) ATA_OUTL(ctlr->r_mem, AHCI_IS, is); ATA_RBL(ctlr->r_mem, AHCI_IS); } /* * Simplified interrupt handler for multivector MSI mode. */ static void ahci_intr_one(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; void *arg; int unit; unit = irq->r_irq_rid - 1; if ((arg = ctlr->interrupt[unit].argument)) ctlr->interrupt[unit].function(arg); /* AHCI declares level triggered IS. */ ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit); ATA_RBL(ctlr->r_mem, AHCI_IS); } static void ahci_intr_one_edge(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; void *arg; int unit; unit = irq->r_irq_rid - 1; /* Some controllers have edge triggered IS. */ ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit); if ((arg = ctlr->interrupt[unit].argument)) ctlr->interrupt[unit].function(arg); ATA_RBL(ctlr->r_mem, AHCI_IS); } struct resource * ahci_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct ahci_controller *ctlr = device_get_softc(dev); struct resource *res; rman_res_t st; int offset, size, unit; bool is_em, is_remapped; unit = (intptr_t)device_get_ivars(child); is_em = is_remapped = false; if (unit & AHCI_REMAPPED_UNIT) { unit &= AHCI_UNIT; unit -= ctlr->channels; is_remapped = true; } else if (unit & AHCI_EM_UNIT) { unit &= AHCI_UNIT; is_em = true; } res = NULL; switch (type) { case SYS_RES_MEMORY: if (is_remapped) { offset = ctlr->remap_offset + unit * ctlr->remap_size; size = ctlr->remap_size; } else if (!is_em) { offset = AHCI_OFFSET + (unit << 7); size = 128; } else if (*rid == 0) { offset = AHCI_EM_CTL; size = 4; } else { offset = (ctlr->emloc & 0xffff0000) >> 14; size = (ctlr->emloc & 0x0000ffff) << 2; if (*rid != 1) { if (*rid == 2 && (ctlr->capsem & (AHCI_EM_XMT | AHCI_EM_SMB)) == 0) offset += size; else break; } } st = rman_get_start(ctlr->r_mem); res = rman_reserve_resource(&ctlr->sc_iomem, st + offset, st + offset + size - 1, size, RF_ACTIVE, child); if (res) { bus_space_handle_t bsh; bus_space_tag_t bst; bsh = rman_get_bushandle(ctlr->r_mem); bst = rman_get_bustag(ctlr->r_mem); bus_space_subregion(bst, bsh, offset, 128, &bsh); rman_set_bushandle(res, bsh); rman_set_bustag(res, bst); } break; case SYS_RES_IRQ: if (*rid == ATA_IRQ_RID) res = ctlr->irqs[0].r_irq; break; } return (res); } int ahci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { switch (type) { case SYS_RES_MEMORY: rman_release_resource(r); return (0); case SYS_RES_IRQ: if (rid != ATA_IRQ_RID) return (ENOENT); return (0); } return (EINVAL); } int ahci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, driver_filter_t *filter, driver_intr_t *function, void *argument, void **cookiep) { struct ahci_controller *ctlr = device_get_softc(dev); int unit = (intptr_t)device_get_ivars(child) & AHCI_UNIT; if (filter != NULL) { printf("ahci.c: we cannot use a filter here\n"); return (EINVAL); } ctlr->interrupt[unit].function = function; ctlr->interrupt[unit].argument = argument; return (0); } int ahci_teardown_intr(device_t dev, device_t child, struct resource *irq, void *cookie) { struct ahci_controller *ctlr = device_get_softc(dev); int unit = (intptr_t)device_get_ivars(child) & AHCI_UNIT; ctlr->interrupt[unit].function = NULL; ctlr->interrupt[unit].argument = NULL; return (0); } int ahci_print_child(device_t dev, device_t child) { intptr_t ivars; int retval; retval = bus_print_child_header(dev, child); ivars = (intptr_t)device_get_ivars(child); if ((ivars & AHCI_EM_UNIT) == 0) retval += printf(" at channel %d", (int)ivars & AHCI_UNIT); retval += bus_print_child_footer(dev, child); return (retval); } int ahci_child_location_str(device_t dev, device_t child, char *buf, size_t buflen) { intptr_t ivars; ivars = (intptr_t)device_get_ivars(child); if ((ivars & AHCI_EM_UNIT) == 0) snprintf(buf, buflen, "channel=%d", (int)ivars & AHCI_UNIT); return (0); } bus_dma_tag_t ahci_get_dma_tag(device_t dev, device_t child) { struct ahci_controller *ctlr = device_get_softc(dev); return (ctlr->dma_tag); } void ahci_attached(device_t dev, struct ahci_channel *ch) { struct ahci_controller *ctlr = device_get_softc(dev); mtx_lock(&ctlr->ch_mtx); ctlr->ch[ch->unit] = ch; mtx_unlock(&ctlr->ch_mtx); } void ahci_detached(device_t dev, struct ahci_channel *ch) { struct ahci_controller *ctlr = device_get_softc(dev); mtx_lock(&ctlr->ch_mtx); mtx_lock(&ch->mtx); ctlr->ch[ch->unit] = NULL; mtx_unlock(&ch->mtx); mtx_unlock(&ctlr->ch_mtx); } struct ahci_channel * ahci_getch(device_t dev, int n) { struct ahci_controller *ctlr = device_get_softc(dev); struct ahci_channel *ch; KASSERT(n >= 0 && n < AHCI_MAX_PORTS, ("Bad channel number %d", n)); mtx_lock(&ctlr->ch_mtx); ch = ctlr->ch[n]; if (ch != NULL) mtx_lock(&ch->mtx); mtx_unlock(&ctlr->ch_mtx); return (ch); } void ahci_putch(struct ahci_channel *ch) { mtx_unlock(&ch->mtx); } static int ahci_ch_probe(device_t dev) { device_set_desc_copy(dev, "AHCI channel"); return (BUS_PROBE_DEFAULT); } static int ahci_ch_disablephy_proc(SYSCTL_HANDLER_ARGS) { struct ahci_channel *ch; int error, value; ch = arg1; value = ch->disablephy; error = sysctl_handle_int(oidp, &value, 0, req); if (error != 0 || req->newptr == NULL || (value != 0 && value != 1)) return (error); mtx_lock(&ch->mtx); ch->disablephy = value; if (value) { ahci_ch_deinit(ch->dev); } else { ahci_ch_init(ch->dev); ahci_phy_check_events(ch, ATA_SE_PHY_CHANGED | ATA_SE_EXCHANGED); } mtx_unlock(&ch->mtx); return (0); } static int ahci_ch_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(device_get_parent(dev)); struct ahci_channel *ch = device_get_softc(dev); struct cam_devq *devq; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; int rid, error, i, sata_rev = 0; u_int32_t version; ch->dev = dev; ch->unit = (intptr_t)device_get_ivars(dev); ch->caps = ctlr->caps; ch->caps2 = ctlr->caps2; ch->start = ctlr->ch_start; ch->quirks = ctlr->quirks; ch->vendorid = ctlr->vendorid; ch->deviceid = ctlr->deviceid; ch->subvendorid = ctlr->subvendorid; ch->subdeviceid = ctlr->subdeviceid; ch->numslots = ((ch->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1; mtx_init(&ch->mtx, "AHCI channel lock", NULL, MTX_DEF); ch->pm_level = 0; resource_int_value(device_get_name(dev), device_get_unit(dev), "pm_level", &ch->pm_level); STAILQ_INIT(&ch->doneq); if (ch->pm_level > 3) callout_init_mtx(&ch->pm_timer, &ch->mtx, 0); callout_init_mtx(&ch->reset_timer, &ch->mtx, 0); /* JMicron external ports (0) sometimes limited */ if ((ctlr->quirks & AHCI_Q_SATA1_UNIT0) && ch->unit == 0) sata_rev = 1; if (ch->quirks & AHCI_Q_SATA2) sata_rev = 2; resource_int_value(device_get_name(dev), device_get_unit(dev), "sata_rev", &sata_rev); for (i = 0; i < 16; i++) { ch->user[i].revision = sata_rev; ch->user[i].mode = 0; ch->user[i].bytecount = 8192; ch->user[i].tags = ch->numslots; ch->user[i].caps = 0; ch->curr[i] = ch->user[i]; if (ch->pm_level) { ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ | CTS_SATA_CAPS_H_APST | CTS_SATA_CAPS_D_PMREQ | CTS_SATA_CAPS_D_APST; } ch->user[i].caps |= CTS_SATA_CAPS_H_DMAAA | CTS_SATA_CAPS_H_AN; } rid = 0; if (!(ch->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE))) return (ENXIO); ch->chcaps = ATA_INL(ch->r_mem, AHCI_P_CMD); version = ATA_INL(ctlr->r_mem, AHCI_VS); if (version < 0x00010200 && (ctlr->caps & AHCI_CAP_FBSS)) ch->chcaps |= AHCI_P_CMD_FBSCP; if (ch->caps2 & AHCI_CAP2_SDS) ch->chscaps = ATA_INL(ch->r_mem, AHCI_P_DEVSLP); if (bootverbose) { device_printf(dev, "Caps:%s%s%s%s%s%s\n", (ch->chcaps & AHCI_P_CMD_HPCP) ? " HPCP":"", (ch->chcaps & AHCI_P_CMD_MPSP) ? " MPSP":"", (ch->chcaps & AHCI_P_CMD_CPD) ? " CPD":"", (ch->chcaps & AHCI_P_CMD_ESP) ? " ESP":"", (ch->chcaps & AHCI_P_CMD_FBSCP) ? " FBSCP":"", (ch->chscaps & AHCI_P_DEVSLP_DSP) ? " DSP":""); } ahci_dmainit(dev); ahci_slotsalloc(dev); mtx_lock(&ch->mtx); ahci_ch_init(dev); rid = ATA_IRQ_RID; if (!(ch->r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE))) { device_printf(dev, "Unable to map interrupt\n"); error = ENXIO; goto err0; } if ((bus_setup_intr(dev, ch->r_irq, ATA_INTR_FLAGS, NULL, ctlr->direct ? ahci_ch_intr_direct : ahci_ch_intr, ch, &ch->ih))) { device_printf(dev, "Unable to setup interrupt\n"); error = ENXIO; goto err1; } /* Create the device queue for our SIM. */ devq = cam_simq_alloc(ch->numslots); if (devq == NULL) { device_printf(dev, "Unable to allocate simq\n"); error = ENOMEM; goto err1; } /* Construct SIM entry */ ch->sim = cam_sim_alloc(ahciaction, ahcipoll, "ahcich", ch, device_get_unit(dev), (struct mtx *)&ch->mtx, (ch->quirks & AHCI_Q_NOCCS) ? 1 : min(2, ch->numslots), (ch->caps & AHCI_CAP_SNCQ) ? ch->numslots : 0, devq); if (ch->sim == NULL) { cam_simq_free(devq); device_printf(dev, "unable to allocate sim\n"); error = ENOMEM; goto err1; } if (xpt_bus_register(ch->sim, dev, 0) != CAM_SUCCESS) { device_printf(dev, "unable to register xpt bus\n"); error = ENXIO; goto err2; } if (xpt_create_path(&ch->path, /*periph*/NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { device_printf(dev, "unable to create path\n"); error = ENXIO; goto err3; } if (ch->pm_level > 3) { callout_reset(&ch->pm_timer, (ch->pm_level == 4) ? hz / 1000 : hz / 8, ahci_ch_pm, ch); } mtx_unlock(&ch->mtx); ahci_attached(device_get_parent(dev), ch); ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "disable_phy", - CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, ch, + CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, ch, 0, ahci_ch_disablephy_proc, "IU", "Disable PHY"); return (0); err3: xpt_bus_deregister(cam_sim_path(ch->sim)); err2: cam_sim_free(ch->sim, /*free_devq*/TRUE); err1: bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); err0: bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_unlock(&ch->mtx); mtx_destroy(&ch->mtx); return (error); } static int ahci_ch_detach(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); ahci_detached(device_get_parent(dev), ch); mtx_lock(&ch->mtx); xpt_async(AC_LOST_DEVICE, ch->path, NULL); /* Forget about reset. */ if (ch->resetting) { ch->resetting = 0; xpt_release_simq(ch->sim, TRUE); } xpt_free_path(ch->path); xpt_bus_deregister(cam_sim_path(ch->sim)); cam_sim_free(ch->sim, /*free_devq*/TRUE); mtx_unlock(&ch->mtx); if (ch->pm_level > 3) callout_drain(&ch->pm_timer); callout_drain(&ch->reset_timer); bus_teardown_intr(dev, ch->r_irq, ch->ih); bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); ahci_ch_deinit(dev); ahci_slotsfree(dev); ahci_dmafini(dev); bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_destroy(&ch->mtx); return (0); } static int ahci_ch_init(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); uint64_t work; /* Disable port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Setup work areas */ work = ch->dma.work_bus + AHCI_CL_OFFSET; ATA_OUTL(ch->r_mem, AHCI_P_CLB, work & 0xffffffff); ATA_OUTL(ch->r_mem, AHCI_P_CLBU, work >> 32); work = ch->dma.rfis_bus; ATA_OUTL(ch->r_mem, AHCI_P_FB, work & 0xffffffff); ATA_OUTL(ch->r_mem, AHCI_P_FBU, work >> 32); /* Activate the channel and power/spin up device */ ATA_OUTL(ch->r_mem, AHCI_P_CMD, (AHCI_P_CMD_ACTIVE | AHCI_P_CMD_POD | AHCI_P_CMD_SUD | ((ch->pm_level == 2 || ch->pm_level == 3) ? AHCI_P_CMD_ALPE : 0) | ((ch->pm_level > 2) ? AHCI_P_CMD_ASP : 0 ))); ahci_start_fr(ch); ahci_start(ch, 1); return (0); } static int ahci_ch_deinit(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); /* Disable port interrupts. */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Reset command register. */ ahci_stop(ch); ahci_stop_fr(ch); ATA_OUTL(ch->r_mem, AHCI_P_CMD, 0); /* Allow everything, including partial and slumber modes. */ ATA_OUTL(ch->r_mem, AHCI_P_SCTL, 0); /* Request slumber mode transition and give some time to get there. */ ATA_OUTL(ch->r_mem, AHCI_P_CMD, AHCI_P_CMD_SLUMBER); DELAY(100); /* Disable PHY. */ ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE); return (0); } static int ahci_ch_suspend(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); xpt_freeze_simq(ch->sim, 1); /* Forget about reset. */ if (ch->resetting) { ch->resetting = 0; callout_stop(&ch->reset_timer); xpt_release_simq(ch->sim, TRUE); } while (ch->oslots) msleep(ch, &ch->mtx, PRIBIO, "ahcisusp", hz/100); ahci_ch_deinit(dev); mtx_unlock(&ch->mtx); return (0); } static int ahci_ch_resume(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); ahci_ch_init(dev); ahci_reset(ch); xpt_release_simq(ch->sim, TRUE); mtx_unlock(&ch->mtx); return (0); } devclass_t ahcich_devclass; static device_method_t ahcich_methods[] = { DEVMETHOD(device_probe, ahci_ch_probe), DEVMETHOD(device_attach, ahci_ch_attach), DEVMETHOD(device_detach, ahci_ch_detach), DEVMETHOD(device_suspend, ahci_ch_suspend), DEVMETHOD(device_resume, ahci_ch_resume), DEVMETHOD_END }; static driver_t ahcich_driver = { "ahcich", ahcich_methods, sizeof(struct ahci_channel) }; DRIVER_MODULE(ahcich, ahci, ahcich_driver, ahcich_devclass, NULL, NULL); struct ahci_dc_cb_args { bus_addr_t maddr; int error; }; static void ahci_dmainit(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); struct ahci_dc_cb_args dcba; size_t rfsize; int error; /* Command area. */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 1024, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, AHCI_WORK_SIZE, 1, AHCI_WORK_SIZE, 0, NULL, NULL, &ch->dma.work_tag); if (error != 0) goto error; error = bus_dmamem_alloc(ch->dma.work_tag, (void **)&ch->dma.work, BUS_DMA_ZERO, &ch->dma.work_map); if (error != 0) goto error; error = bus_dmamap_load(ch->dma.work_tag, ch->dma.work_map, ch->dma.work, AHCI_WORK_SIZE, ahci_dmasetupc_cb, &dcba, BUS_DMA_NOWAIT); if (error != 0 || (error = dcba.error) != 0) { bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map); goto error; } ch->dma.work_bus = dcba.maddr; /* FIS receive area. */ if (ch->chcaps & AHCI_P_CMD_FBSCP) rfsize = 4096; else rfsize = 256; error = bus_dma_tag_create(bus_get_dma_tag(dev), rfsize, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, rfsize, 1, rfsize, 0, NULL, NULL, &ch->dma.rfis_tag); if (error != 0) goto error; error = bus_dmamem_alloc(ch->dma.rfis_tag, (void **)&ch->dma.rfis, 0, &ch->dma.rfis_map); if (error != 0) goto error; error = bus_dmamap_load(ch->dma.rfis_tag, ch->dma.rfis_map, ch->dma.rfis, rfsize, ahci_dmasetupc_cb, &dcba, BUS_DMA_NOWAIT); if (error != 0 || (error = dcba.error) != 0) { bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map); goto error; } ch->dma.rfis_bus = dcba.maddr; /* Data area. */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, AHCI_SG_ENTRIES * PAGE_SIZE, AHCI_SG_ENTRIES, AHCI_PRD_MAX, 0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag); if (error != 0) goto error; return; error: device_printf(dev, "WARNING - DMA initialization failed, error %d\n", error); ahci_dmafini(dev); } static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct ahci_dc_cb_args *dcba = (struct ahci_dc_cb_args *)xsc; if (!(dcba->error = error)) dcba->maddr = segs[0].ds_addr; } static void ahci_dmafini(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); if (ch->dma.data_tag) { bus_dma_tag_destroy(ch->dma.data_tag); ch->dma.data_tag = NULL; } if (ch->dma.rfis_bus) { bus_dmamap_unload(ch->dma.rfis_tag, ch->dma.rfis_map); bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map); ch->dma.rfis_bus = 0; ch->dma.rfis = NULL; } if (ch->dma.work_bus) { bus_dmamap_unload(ch->dma.work_tag, ch->dma.work_map); bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map); ch->dma.work_bus = 0; ch->dma.work = NULL; } if (ch->dma.work_tag) { bus_dma_tag_destroy(ch->dma.work_tag); ch->dma.work_tag = NULL; } } static void ahci_slotsalloc(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; /* Alloc and setup command/dma slots */ bzero(ch->slot, sizeof(ch->slot)); for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; slot->ch = ch; slot->slot = i; slot->state = AHCI_SLOT_EMPTY; slot->ct_offset = AHCI_CT_OFFSET + AHCI_CT_SIZE * i; slot->ccb = NULL; callout_init_mtx(&slot->timeout, &ch->mtx, 0); if (bus_dmamap_create(ch->dma.data_tag, 0, &slot->dma.data_map)) device_printf(ch->dev, "FAILURE - create data_map\n"); } } static void ahci_slotsfree(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; /* Free all dma slots */ for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; callout_drain(&slot->timeout); if (slot->dma.data_map) { bus_dmamap_destroy(ch->dma.data_tag, slot->dma.data_map); slot->dma.data_map = NULL; } } } static int ahci_phy_check_events(struct ahci_channel *ch, u_int32_t serr) { if (((ch->pm_level == 0) && (serr & ATA_SE_PHY_CHANGED)) || ((ch->pm_level != 0 || ch->listening) && (serr & ATA_SE_EXCHANGED))) { u_int32_t status = ATA_INL(ch->r_mem, AHCI_P_SSTS); union ccb *ccb; if (bootverbose) { if ((status & ATA_SS_DET_MASK) != ATA_SS_DET_NO_DEVICE) device_printf(ch->dev, "CONNECT requested\n"); else device_printf(ch->dev, "DISCONNECT requested\n"); } ahci_reset(ch); if ((ccb = xpt_alloc_ccb_nowait()) == NULL) return (0); if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); return (0); } xpt_rescan(ccb); return (1); } return (0); } static void ahci_cpd_check_events(struct ahci_channel *ch) { u_int32_t status; union ccb *ccb; device_t dev; if (ch->pm_level == 0) return; status = ATA_INL(ch->r_mem, AHCI_P_CMD); if ((status & AHCI_P_CMD_CPD) == 0) return; if (bootverbose) { dev = ch->dev; if (status & AHCI_P_CMD_CPS) { device_printf(dev, "COLD CONNECT requested\n"); } else device_printf(dev, "COLD DISCONNECT requested\n"); } ahci_reset(ch); if ((ccb = xpt_alloc_ccb_nowait()) == NULL) return; if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); return; } xpt_rescan(ccb); } static void ahci_notify_events(struct ahci_channel *ch, u_int32_t status) { struct cam_path *dpath; int i; if (ch->caps & AHCI_CAP_SSNTF) ATA_OUTL(ch->r_mem, AHCI_P_SNTF, status); if (bootverbose) device_printf(ch->dev, "SNTF 0x%04x\n", status); for (i = 0; i < 16; i++) { if ((status & (1 << i)) == 0) continue; if (xpt_create_path(&dpath, NULL, xpt_path_path_id(ch->path), i, 0) == CAM_REQ_CMP) { xpt_async(AC_SCSI_AEN, dpath, NULL); xpt_free_path(dpath); } } } static void ahci_done(struct ahci_channel *ch, union ccb *ccb) { mtx_assert(&ch->mtx, MA_OWNED); if ((ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0 || ch->batch == 0) { xpt_done(ccb); return; } STAILQ_INSERT_TAIL(&ch->doneq, &ccb->ccb_h, sim_links.stqe); } static void ahci_ch_intr(void *arg) { struct ahci_channel *ch = (struct ahci_channel *)arg; uint32_t istatus; /* Read interrupt statuses. */ istatus = ATA_INL(ch->r_mem, AHCI_P_IS); mtx_lock(&ch->mtx); ahci_ch_intr_main(ch, istatus); mtx_unlock(&ch->mtx); } static void ahci_ch_intr_direct(void *arg) { struct ahci_channel *ch = (struct ahci_channel *)arg; struct ccb_hdr *ccb_h; uint32_t istatus; STAILQ_HEAD(, ccb_hdr) tmp_doneq = STAILQ_HEAD_INITIALIZER(tmp_doneq); /* Read interrupt statuses. */ istatus = ATA_INL(ch->r_mem, AHCI_P_IS); mtx_lock(&ch->mtx); ch->batch = 1; ahci_ch_intr_main(ch, istatus); ch->batch = 0; /* * Prevent the possibility of issues caused by processing the queue * while unlocked below by moving the contents to a local queue. */ STAILQ_CONCAT(&tmp_doneq, &ch->doneq); mtx_unlock(&ch->mtx); while ((ccb_h = STAILQ_FIRST(&tmp_doneq)) != NULL) { STAILQ_REMOVE_HEAD(&tmp_doneq, sim_links.stqe); xpt_done_direct((union ccb *)ccb_h); } } static void ahci_ch_pm(void *arg) { struct ahci_channel *ch = (struct ahci_channel *)arg; uint32_t work; if (ch->numrslots != 0) return; work = ATA_INL(ch->r_mem, AHCI_P_CMD); if (ch->pm_level == 4) work |= AHCI_P_CMD_PARTIAL; else work |= AHCI_P_CMD_SLUMBER; ATA_OUTL(ch->r_mem, AHCI_P_CMD, work); } static void ahci_ch_intr_main(struct ahci_channel *ch, uint32_t istatus) { uint32_t cstatus, serr = 0, sntf = 0, ok, err; enum ahci_err_type et; int i, ccs, port, reset = 0; /* Clear interrupt statuses. */ ATA_OUTL(ch->r_mem, AHCI_P_IS, istatus); /* Read command statuses. */ if (ch->numtslots != 0) cstatus = ATA_INL(ch->r_mem, AHCI_P_SACT); else cstatus = 0; if (ch->numrslots != ch->numtslots) cstatus |= ATA_INL(ch->r_mem, AHCI_P_CI); /* Read SNTF in one of possible ways. */ if ((istatus & AHCI_P_IX_SDB) && (ch->pm_present || ch->curr[0].atapi != 0)) { if (ch->caps & AHCI_CAP_SSNTF) sntf = ATA_INL(ch->r_mem, AHCI_P_SNTF); else if (ch->fbs_enabled) { u_int8_t *fis = ch->dma.rfis + 0x58; for (i = 0; i < 16; i++) { if (fis[1] & 0x80) { fis[1] &= 0x7f; sntf |= 1 << i; } fis += 256; } } else { u_int8_t *fis = ch->dma.rfis + 0x58; if (fis[1] & 0x80) sntf = (1 << (fis[1] & 0x0f)); } } /* Process PHY events */ if (istatus & (AHCI_P_IX_PC | AHCI_P_IX_PRC | AHCI_P_IX_OF | AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) { serr = ATA_INL(ch->r_mem, AHCI_P_SERR); if (serr) { ATA_OUTL(ch->r_mem, AHCI_P_SERR, serr); reset = ahci_phy_check_events(ch, serr); } } /* Process cold presence detection events */ if ((istatus & AHCI_P_IX_CPD) && !reset) ahci_cpd_check_events(ch); /* Process command errors */ if (istatus & (AHCI_P_IX_OF | AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) { if (ch->quirks & AHCI_Q_NOCCS) { /* * ASMedia chips sometimes report failed commands as * completed. Count all running commands as failed. */ cstatus |= ch->rslots; /* They also report wrong CCS, so try to guess one. */ ccs = powerof2(cstatus) ? ffs(cstatus) - 1 : -1; } else { ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT; } //device_printf(dev, "%s ERROR is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x fbs %08x ccs %d\n", // __func__, istatus, cstatus, sstatus, ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), // serr, ATA_INL(ch->r_mem, AHCI_P_FBS), ccs); port = -1; if (ch->fbs_enabled) { uint32_t fbs = ATA_INL(ch->r_mem, AHCI_P_FBS); if (fbs & AHCI_P_FBS_SDE) { port = (fbs & AHCI_P_FBS_DWE) >> AHCI_P_FBS_DWE_SHIFT; } else { for (i = 0; i < 16; i++) { if (ch->numrslotspd[i] == 0) continue; if (port == -1) port = i; else if (port != i) { port = -2; break; } } } } err = ch->rslots & cstatus; } else { ccs = 0; err = 0; port = -1; } /* Complete all successful commands. */ ok = ch->rslots & ~cstatus; for (i = 0; i < ch->numslots; i++) { if ((ok >> i) & 1) ahci_end_transaction(&ch->slot[i], AHCI_ERR_NONE); } /* On error, complete the rest of commands with error statuses. */ if (err) { if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } ahci_done(ch, fccb); } for (i = 0; i < ch->numslots; i++) { /* XXX: reqests in loading state. */ if (((err >> i) & 1) == 0) continue; if (port >= 0 && ch->slot[i].ccb->ccb_h.target_id != port) continue; if (istatus & AHCI_P_IX_TFE) { if (port != -2) { /* Task File Error */ if (ch->numtslotspd[ ch->slot[i].ccb->ccb_h.target_id] == 0) { /* Untagged operation. */ if (i == ccs) et = AHCI_ERR_TFE; else et = AHCI_ERR_INNOCENT; } else { /* Tagged operation. */ et = AHCI_ERR_NCQ; } } else { et = AHCI_ERR_TFE; ch->fatalerr = 1; } } else if (istatus & AHCI_P_IX_IF) { if (ch->numtslots == 0 && i != ccs && port != -2) et = AHCI_ERR_INNOCENT; else et = AHCI_ERR_SATA; } else et = AHCI_ERR_INVALID; ahci_end_transaction(&ch->slot[i], et); } /* * We can't reinit port if there are some other * commands active, use resume to complete them. */ if (ch->rslots != 0 && !ch->recoverycmd) ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | AHCI_P_FBS_DEC); } /* Process NOTIFY events */ if (sntf) ahci_notify_events(ch, sntf); } /* Must be called with channel locked. */ static int ahci_check_collision(struct ahci_channel *ch, union ccb *ccb) { int t = ccb->ccb_h.target_id; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { /* Tagged command while we have no supported tag free. */ if (((~ch->oslots) & (0xffffffff >> (32 - ch->curr[t].tags))) == 0) return (1); /* If we have FBS */ if (ch->fbs_enabled) { /* Tagged command while untagged are active. */ if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] == 0) return (1); } else { /* Tagged command while untagged are active. */ if (ch->numrslots != 0 && ch->numtslots == 0) return (1); /* Tagged command while tagged to other target is active. */ if (ch->numtslots != 0 && ch->taggedtarget != ccb->ccb_h.target_id) return (1); } } else { /* If we have FBS */ if (ch->fbs_enabled) { /* Untagged command while tagged are active. */ if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] != 0) return (1); } else { /* Untagged command while tagged are active. */ if (ch->numrslots != 0 && ch->numtslots != 0) return (1); } } if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) { /* Atomic command while anything active. */ if (ch->numrslots != 0) return (1); } /* We have some atomic command running. */ if (ch->aslots != 0) return (1); return (0); } /* Must be called with channel locked. */ static void ahci_begin_transaction(struct ahci_channel *ch, union ccb *ccb) { struct ahci_slot *slot; int tag, tags; /* Choose empty slot. */ tags = ch->numslots; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) tags = ch->curr[ccb->ccb_h.target_id].tags; if (ch->lastslot + 1 < tags) tag = ffs(~(ch->oslots >> (ch->lastslot + 1))); else tag = 0; if (tag == 0 || tag + ch->lastslot >= tags) tag = ffs(~ch->oslots) - 1; else tag += ch->lastslot; ch->lastslot = tag; /* Occupy chosen slot. */ slot = &ch->slot[tag]; slot->ccb = ccb; /* Stop PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3) callout_stop(&ch->pm_timer); /* Update channel stats. */ ch->oslots |= (1 << tag); ch->numrslots++; ch->numrslotspd[ccb->ccb_h.target_id]++; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ch->numtslots++; ch->numtslotspd[ccb->ccb_h.target_id]++; ch->taggedtarget = ccb->ccb_h.target_id; } if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) ch->aslots |= (1 << tag); if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { slot->state = AHCI_SLOT_LOADING; bus_dmamap_load_ccb(ch->dma.data_tag, slot->dma.data_map, ccb, ahci_dmasetprd, slot, 0); } else { slot->dma.nsegs = 0; ahci_execute_transaction(slot); } } /* Locked by busdma engine. */ static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct ahci_slot *slot = arg; struct ahci_channel *ch = slot->ch; struct ahci_cmd_tab *ctp; struct ahci_dma_prd *prd; int i; if (error) { device_printf(ch->dev, "DMA load error\n"); ahci_end_transaction(slot, AHCI_ERR_INVALID); return; } KASSERT(nsegs <= AHCI_SG_ENTRIES, ("too many DMA segment entries\n")); /* Get a piece of the workspace for this request */ ctp = (struct ahci_cmd_tab *)(ch->dma.work + slot->ct_offset); /* Fill S/G table */ prd = &ctp->prd_tab[0]; for (i = 0; i < nsegs; i++) { prd[i].dba = htole64(segs[i].ds_addr); prd[i].dbc = htole32((segs[i].ds_len - 1) & AHCI_PRD_MASK); } slot->dma.nsegs = nsegs; bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, ((slot->ccb->ccb_h.flags & CAM_DIR_IN) ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE)); ahci_execute_transaction(slot); } /* Must be called with channel locked. */ static void ahci_execute_transaction(struct ahci_slot *slot) { struct ahci_channel *ch = slot->ch; struct ahci_cmd_tab *ctp; struct ahci_cmd_list *clp; union ccb *ccb = slot->ccb; int port = ccb->ccb_h.target_id & 0x0f; int fis_size, i, softreset; uint8_t *fis = ch->dma.rfis + 0x40; uint8_t val; uint16_t cmd_flags; /* Get a piece of the workspace for this request */ ctp = (struct ahci_cmd_tab *)(ch->dma.work + slot->ct_offset); /* Setup the FIS for this request */ if (!(fis_size = ahci_setup_fis(ch, ctp, ccb, slot->slot))) { device_printf(ch->dev, "Setting up SATA FIS failed\n"); ahci_end_transaction(slot, AHCI_ERR_INVALID); return; } /* Setup the command list entry */ clp = (struct ahci_cmd_list *) (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot)); cmd_flags = (ccb->ccb_h.flags & CAM_DIR_OUT ? AHCI_CMD_WRITE : 0) | (ccb->ccb_h.func_code == XPT_SCSI_IO ? (AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH) : 0) | (fis_size / sizeof(u_int32_t)) | (port << 12); clp->prd_length = htole16(slot->dma.nsegs); /* Special handling for Soft Reset command. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL)) { if (ccb->ataio.cmd.control & ATA_A_RESET) { softreset = 1; /* Kick controller into sane state */ ahci_stop(ch); ahci_clo(ch); ahci_start(ch, 0); cmd_flags |= AHCI_CMD_RESET | AHCI_CMD_CLR_BUSY; } else { softreset = 2; /* Prepare FIS receive area for check. */ for (i = 0; i < 20; i++) fis[i] = 0xff; } } else softreset = 0; clp->bytecount = 0; clp->cmd_flags = htole16(cmd_flags); clp->cmd_table_phys = htole64(ch->dma.work_bus + slot->ct_offset); bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_PREREAD); /* Set ACTIVE bit for NCQ commands. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ATA_OUTL(ch->r_mem, AHCI_P_SACT, 1 << slot->slot); } /* If FBS is enabled, set PMP port. */ if (ch->fbs_enabled) { ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | (port << AHCI_P_FBS_DEV_SHIFT)); } /* Issue command to the controller. */ slot->state = AHCI_SLOT_RUNNING; ch->rslots |= (1 << slot->slot); ATA_OUTL(ch->r_mem, AHCI_P_CI, (1 << slot->slot)); /* Device reset commands doesn't interrupt. Poll them. */ if (ccb->ccb_h.func_code == XPT_ATA_IO && (ccb->ataio.cmd.command == ATA_DEVICE_RESET || softreset)) { int count, timeout = ccb->ccb_h.timeout * 100; enum ahci_err_type et = AHCI_ERR_NONE; for (count = 0; count < timeout; count++) { DELAY(10); if (!(ATA_INL(ch->r_mem, AHCI_P_CI) & (1 << slot->slot))) break; if ((ATA_INL(ch->r_mem, AHCI_P_TFD) & ATA_S_ERROR) && softreset != 1) { #if 0 device_printf(ch->dev, "Poll error on slot %d, TFD: %04x\n", slot->slot, ATA_INL(ch->r_mem, AHCI_P_TFD)); #endif et = AHCI_ERR_TFE; break; } /* Workaround for ATI SB600/SB700 chipsets. */ if (ccb->ccb_h.target_id == 15 && (ch->quirks & AHCI_Q_ATI_PMP_BUG) && (ATA_INL(ch->r_mem, AHCI_P_IS) & AHCI_P_IX_IPM)) { et = AHCI_ERR_TIMEOUT; break; } } /* * Some Marvell controllers require additional time * after soft reset to work properly. Setup delay * to 50ms after soft reset. */ if (ch->quirks & AHCI_Q_MRVL_SR_DEL) DELAY(50000); /* * Marvell HBAs with non-RAID firmware do not wait for * readiness after soft reset, so we have to wait here. * Marvell RAIDs do not have this problem, but instead * sometimes forget to update FIS receive area, breaking * this wait. */ if ((ch->quirks & AHCI_Q_NOBSYRES) == 0 && (ch->quirks & AHCI_Q_ATI_PMP_BUG) == 0 && softreset == 2 && et == AHCI_ERR_NONE) { for ( ; count < timeout; count++) { bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_POSTREAD); val = fis[2]; bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_PREREAD); if ((val & ATA_S_BUSY) == 0) break; DELAY(10); } } if (timeout && (count >= timeout)) { device_printf(ch->dev, "Poll timeout on slot %d port %d\n", slot->slot, port); device_printf(ch->dev, "is %08x cs %08x ss %08x " "rs %08x tfd %02x serr %08x cmd %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI), ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR), ATA_INL(ch->r_mem, AHCI_P_CMD)); et = AHCI_ERR_TIMEOUT; } /* Kick controller into sane state and enable FBS. */ if (softreset == 2) ch->eslots |= (1 << slot->slot); ahci_end_transaction(slot, et); return; } /* Start command execution timeout */ callout_reset_sbt(&slot->timeout, SBT_1MS * ccb->ccb_h.timeout / 2, 0, ahci_timeout, slot, 0); return; } /* Must be called with channel locked. */ static void ahci_process_timeout(struct ahci_channel *ch) { int i; mtx_assert(&ch->mtx, MA_OWNED); /* Handle the rest of commands. */ for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; ahci_end_transaction(&ch->slot[i], AHCI_ERR_TIMEOUT); } } /* Must be called with channel locked. */ static void ahci_rearm_timeout(struct ahci_channel *ch) { int i; mtx_assert(&ch->mtx, MA_OWNED); for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; /* Do we have a running request on slot? */ if (slot->state < AHCI_SLOT_RUNNING) continue; if ((ch->toslots & (1 << i)) == 0) continue; callout_reset_sbt(&slot->timeout, SBT_1MS * slot->ccb->ccb_h.timeout / 2, 0, ahci_timeout, slot, 0); } } /* Locked by callout mechanism. */ static void ahci_timeout(void *arg) { struct ahci_slot *slot = arg; struct ahci_channel *ch = slot->ch; device_t dev = ch->dev; uint32_t sstatus; int ccs; int i; /* Check for stale timeout. */ if (slot->state < AHCI_SLOT_RUNNING) return; /* Check if slot was not being executed last time we checked. */ if (slot->state < AHCI_SLOT_EXECUTING) { /* Check if slot started executing. */ sstatus = ATA_INL(ch->r_mem, AHCI_P_SACT); ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT; if ((sstatus & (1 << slot->slot)) != 0 || ccs == slot->slot || ch->fbs_enabled || ch->wrongccs) slot->state = AHCI_SLOT_EXECUTING; else if ((ch->rslots & (1 << ccs)) == 0) { ch->wrongccs = 1; slot->state = AHCI_SLOT_EXECUTING; } callout_reset_sbt(&slot->timeout, SBT_1MS * slot->ccb->ccb_h.timeout / 2, 0, ahci_timeout, slot, 0); return; } device_printf(dev, "Timeout on slot %d port %d\n", slot->slot, slot->ccb->ccb_h.target_id & 0x0f); device_printf(dev, "is %08x cs %08x ss %08x rs %08x tfd %02x " "serr %08x cmd %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI), ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR), ATA_INL(ch->r_mem, AHCI_P_CMD)); /* Handle frozen command. */ if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } ahci_done(ch, fccb); } if (!ch->fbs_enabled && !ch->wrongccs) { /* Without FBS we know real timeout source. */ ch->fatalerr = 1; /* Handle command with timeout. */ ahci_end_transaction(&ch->slot[slot->slot], AHCI_ERR_TIMEOUT); /* Handle the rest of commands. */ for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT); } } else { /* With FBS we wait for other commands timeout and pray. */ if (ch->toslots == 0) xpt_freeze_simq(ch->sim, 1); ch->toslots |= (1 << slot->slot); if ((ch->rslots & ~ch->toslots) == 0) ahci_process_timeout(ch); else device_printf(dev, " ... waiting for slots %08x\n", ch->rslots & ~ch->toslots); } } /* Must be called with channel locked. */ static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) { struct ahci_channel *ch = slot->ch; union ccb *ccb = slot->ccb; struct ahci_cmd_list *clp; int lastto; uint32_t sig; bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); clp = (struct ahci_cmd_list *) (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot)); /* Read result registers to the result struct * May be incorrect if several commands finished same time, * so read only when sure or have to. */ if (ccb->ccb_h.func_code == XPT_ATA_IO) { struct ata_res *res = &ccb->ataio.res; if ((et == AHCI_ERR_TFE) || (ccb->ataio.cmd.flags & CAM_ATAIO_NEEDRESULT)) { u_int8_t *fis = ch->dma.rfis + 0x40; bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_POSTREAD); if (ch->fbs_enabled) { fis += ccb->ccb_h.target_id * 256; res->status = fis[2]; res->error = fis[3]; } else { uint16_t tfd = ATA_INL(ch->r_mem, AHCI_P_TFD); res->status = tfd; res->error = tfd >> 8; } res->lba_low = fis[4]; res->lba_mid = fis[5]; res->lba_high = fis[6]; res->device = fis[7]; res->lba_low_exp = fis[8]; res->lba_mid_exp = fis[9]; res->lba_high_exp = fis[10]; res->sector_count = fis[12]; res->sector_count_exp = fis[13]; /* * Some weird controllers do not return signature in * FIS receive area. Read it from PxSIG register. */ if ((ch->quirks & AHCI_Q_ALTSIG) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && (ccb->ataio.cmd.control & ATA_A_RESET) == 0) { sig = ATA_INL(ch->r_mem, AHCI_P_SIG); res->lba_high = sig >> 24; res->lba_mid = sig >> 16; res->lba_low = sig >> 8; res->sector_count = sig; } } else bzero(res, sizeof(*res)); if ((ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) == 0 && (ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && (ch->quirks & AHCI_Q_NOCOUNT) == 0) { ccb->ataio.resid = ccb->ataio.dxfer_len - le32toh(clp->bytecount); } } else { if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && (ch->quirks & AHCI_Q_NOCOUNT) == 0) { ccb->csio.resid = ccb->csio.dxfer_len - le32toh(clp->bytecount); } } if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, (ccb->ccb_h.flags & CAM_DIR_IN) ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ch->dma.data_tag, slot->dma.data_map); } if (et != AHCI_ERR_NONE) ch->eslots |= (1 << slot->slot); /* In case of error, freeze device for proper recovery. */ if ((et != AHCI_ERR_NONE) && (!ch->recoverycmd) && !(ccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(ccb->ccb_h.path, 1); ccb->ccb_h.status |= CAM_DEV_QFRZN; } /* Set proper result status. */ ccb->ccb_h.status &= ~CAM_STATUS_MASK; switch (et) { case AHCI_ERR_NONE: ccb->ccb_h.status |= CAM_REQ_CMP; if (ccb->ccb_h.func_code == XPT_SCSI_IO) ccb->csio.scsi_status = SCSI_STATUS_OK; break; case AHCI_ERR_INVALID: ch->fatalerr = 1; ccb->ccb_h.status |= CAM_REQ_INVALID; break; case AHCI_ERR_INNOCENT: ccb->ccb_h.status |= CAM_REQUEUE_REQ; break; case AHCI_ERR_TFE: case AHCI_ERR_NCQ: if (ccb->ccb_h.func_code == XPT_SCSI_IO) { ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND; } else { ccb->ccb_h.status |= CAM_ATA_STATUS_ERROR; } break; case AHCI_ERR_SATA: ch->fatalerr = 1; if (!ch->recoverycmd) { xpt_freeze_simq(ch->sim, 1); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_RELEASE_SIMQ; } ccb->ccb_h.status |= CAM_UNCOR_PARITY; break; case AHCI_ERR_TIMEOUT: if (!ch->recoverycmd) { xpt_freeze_simq(ch->sim, 1); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_RELEASE_SIMQ; } ccb->ccb_h.status |= CAM_CMD_TIMEOUT; break; default: ch->fatalerr = 1; ccb->ccb_h.status |= CAM_REQ_CMP_ERR; } /* Free slot. */ ch->oslots &= ~(1 << slot->slot); ch->rslots &= ~(1 << slot->slot); ch->aslots &= ~(1 << slot->slot); slot->state = AHCI_SLOT_EMPTY; slot->ccb = NULL; /* Update channel stats. */ ch->numrslots--; ch->numrslotspd[ccb->ccb_h.target_id]--; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ch->numtslots--; ch->numtslotspd[ccb->ccb_h.target_id]--; } /* Cancel timeout state if request completed normally. */ if (et != AHCI_ERR_TIMEOUT) { lastto = (ch->toslots == (1 << slot->slot)); ch->toslots &= ~(1 << slot->slot); if (lastto) xpt_release_simq(ch->sim, TRUE); } /* If it was first request of reset sequence and there is no error, * proceed to second request. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && (ccb->ataio.cmd.control & ATA_A_RESET) && et == AHCI_ERR_NONE) { ccb->ataio.cmd.control &= ~ATA_A_RESET; ahci_begin_transaction(ch, ccb); return; } /* If it was our READ LOG command - process it. */ if (ccb->ccb_h.recovery_type == RECOVERY_READ_LOG) { ahci_process_read_log(ch, ccb); /* If it was our REQUEST SENSE command - process it. */ } else if (ccb->ccb_h.recovery_type == RECOVERY_REQUEST_SENSE) { ahci_process_request_sense(ch, ccb); /* If it was NCQ or ATAPI command error, put result on hold. */ } else if (et == AHCI_ERR_NCQ || ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_SCSI_STATUS_ERROR && (ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) == 0)) { ch->hold[slot->slot] = ccb; ch->numhslots++; } else ahci_done(ch, ccb); /* If we have no other active commands, ... */ if (ch->rslots == 0) { /* if there was fatal error - reset port. */ if (ch->toslots != 0 || ch->fatalerr) { ahci_reset(ch); } else { /* if we have slots in error, we can reinit port. */ if (ch->eslots != 0) { ahci_stop(ch); ahci_clo(ch); ahci_start(ch, 1); } /* if there commands on hold, we can do READ LOG. */ if (!ch->recoverycmd && ch->numhslots) ahci_issue_recovery(ch); } /* If all the rest of commands are in timeout - give them chance. */ } else if ((ch->rslots & ~ch->toslots) == 0 && et != AHCI_ERR_TIMEOUT) ahci_rearm_timeout(ch); /* Unfreeze frozen command. */ if (ch->frozen && !ahci_check_collision(ch, ch->frozen)) { union ccb *fccb = ch->frozen; ch->frozen = NULL; ahci_begin_transaction(ch, fccb); xpt_release_simq(ch->sim, TRUE); } /* Start PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3 && (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) { callout_schedule(&ch->pm_timer, (ch->pm_level == 4) ? hz / 1000 : hz / 8); } } static void ahci_issue_recovery(struct ahci_channel *ch) { union ccb *ccb; struct ccb_ataio *ataio; struct ccb_scsiio *csio; int i; /* Find some held command. */ for (i = 0; i < ch->numslots; i++) { if (ch->hold[i]) break; } ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { device_printf(ch->dev, "Unable to allocate recovery command\n"); completeall: /* We can't do anything -- complete held commands. */ for (i = 0; i < ch->numslots; i++) { if (ch->hold[i] == NULL) continue; ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; ch->hold[i]->ccb_h.status |= CAM_RESRC_UNAVAIL; ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } ahci_reset(ch); return; } ccb->ccb_h = ch->hold[i]->ccb_h; /* Reuse old header. */ if (ccb->ccb_h.func_code == XPT_ATA_IO) { /* READ LOG */ ccb->ccb_h.recovery_type = RECOVERY_READ_LOG; ccb->ccb_h.func_code = XPT_ATA_IO; ccb->ccb_h.flags = CAM_DIR_IN; ccb->ccb_h.timeout = 1000; /* 1s should be enough. */ ataio = &ccb->ataio; ataio->data_ptr = malloc(512, M_AHCI, M_NOWAIT); if (ataio->data_ptr == NULL) { xpt_free_ccb(ccb); device_printf(ch->dev, "Unable to allocate memory for READ LOG command\n"); goto completeall; } ataio->dxfer_len = 512; bzero(&ataio->cmd, sizeof(ataio->cmd)); ataio->cmd.flags = CAM_ATAIO_48BIT; ataio->cmd.command = 0x2F; /* READ LOG EXT */ ataio->cmd.sector_count = 1; ataio->cmd.sector_count_exp = 0; ataio->cmd.lba_low = 0x10; ataio->cmd.lba_mid = 0; ataio->cmd.lba_mid_exp = 0; } else { /* REQUEST SENSE */ ccb->ccb_h.recovery_type = RECOVERY_REQUEST_SENSE; ccb->ccb_h.recovery_slot = i; ccb->ccb_h.func_code = XPT_SCSI_IO; ccb->ccb_h.flags = CAM_DIR_IN; ccb->ccb_h.status = 0; ccb->ccb_h.timeout = 1000; /* 1s should be enough. */ csio = &ccb->csio; csio->data_ptr = (void *)&ch->hold[i]->csio.sense_data; csio->dxfer_len = ch->hold[i]->csio.sense_len; csio->cdb_len = 6; bzero(&csio->cdb_io, sizeof(csio->cdb_io)); csio->cdb_io.cdb_bytes[0] = 0x03; csio->cdb_io.cdb_bytes[4] = csio->dxfer_len; } /* Freeze SIM while doing recovery. */ ch->recoverycmd = 1; xpt_freeze_simq(ch->sim, 1); ahci_begin_transaction(ch, ccb); } static void ahci_process_read_log(struct ahci_channel *ch, union ccb *ccb) { uint8_t *data; struct ata_res *res; int i; ch->recoverycmd = 0; data = ccb->ataio.data_ptr; if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP && (data[0] & 0x80) == 0) { for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; if (ch->hold[i]->ccb_h.func_code != XPT_ATA_IO) continue; if ((data[0] & 0x1F) == i) { res = &ch->hold[i]->ataio.res; res->status = data[2]; res->error = data[3]; res->lba_low = data[4]; res->lba_mid = data[5]; res->lba_high = data[6]; res->device = data[7]; res->lba_low_exp = data[8]; res->lba_mid_exp = data[9]; res->lba_high_exp = data[10]; res->sector_count = data[12]; res->sector_count_exp = data[13]; } else { ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; ch->hold[i]->ccb_h.status |= CAM_REQUEUE_REQ; } ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } } else { if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) device_printf(ch->dev, "Error while READ LOG EXT\n"); else if ((data[0] & 0x80) == 0) { device_printf(ch->dev, "Non-queued command error in READ LOG EXT\n"); } for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; if (ch->hold[i]->ccb_h.func_code != XPT_ATA_IO) continue; ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } } free(ccb->ataio.data_ptr, M_AHCI); xpt_free_ccb(ccb); xpt_release_simq(ch->sim, TRUE); } static void ahci_process_request_sense(struct ahci_channel *ch, union ccb *ccb) { int i; ch->recoverycmd = 0; i = ccb->ccb_h.recovery_slot; if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { ch->hold[i]->ccb_h.status |= CAM_AUTOSNS_VALID; } else { ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; ch->hold[i]->ccb_h.status |= CAM_AUTOSENSE_FAIL; } ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; xpt_free_ccb(ccb); xpt_release_simq(ch->sim, TRUE); } static void ahci_start(struct ahci_channel *ch, int fbs) { u_int32_t cmd; /* Run the channel start callback, if any. */ if (ch->start) ch->start(ch); /* Clear SATA error register */ ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xFFFFFFFF); /* Clear any interrupts pending on this channel */ ATA_OUTL(ch->r_mem, AHCI_P_IS, 0xFFFFFFFF); /* Configure FIS-based switching if supported. */ if (ch->chcaps & AHCI_P_CMD_FBSCP) { ch->fbs_enabled = (fbs && ch->pm_present) ? 1 : 0; ATA_OUTL(ch->r_mem, AHCI_P_FBS, ch->fbs_enabled ? AHCI_P_FBS_EN : 0); } /* Start operations on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); cmd &= ~AHCI_P_CMD_PMA; ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_ST | (ch->pm_present ? AHCI_P_CMD_PMA : 0)); } static void ahci_stop(struct ahci_channel *ch) { u_int32_t cmd; int timeout; /* Kill all activity on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_ST); /* Wait for activity stop. */ timeout = 0; do { DELAY(10); if (timeout++ > 50000) { device_printf(ch->dev, "stopping AHCI engine failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CR); ch->eslots = 0; } static void ahci_clo(struct ahci_channel *ch) { u_int32_t cmd; int timeout; /* Issue Command List Override if supported */ if (ch->caps & AHCI_CAP_SCLO) { cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); cmd |= AHCI_P_CMD_CLO; ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd); timeout = 0; do { DELAY(10); if (timeout++ > 50000) { device_printf(ch->dev, "executing CLO failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CLO); } } static void ahci_stop_fr(struct ahci_channel *ch) { u_int32_t cmd; int timeout; /* Kill all FIS reception on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_FRE); /* Wait for FIS reception stop. */ timeout = 0; do { DELAY(10); if (timeout++ > 50000) { device_printf(ch->dev, "stopping AHCI FR engine failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_FR); } static void ahci_start_fr(struct ahci_channel *ch) { u_int32_t cmd; /* Start FIS reception on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_FRE); } static int ahci_wait_ready(struct ahci_channel *ch, int t, int t0) { int timeout = 0; uint32_t val; while ((val = ATA_INL(ch->r_mem, AHCI_P_TFD)) & (ATA_S_BUSY | ATA_S_DRQ)) { if (timeout > t) { if (t != 0) { device_printf(ch->dev, "AHCI reset: device not ready after %dms " "(tfd = %08x)\n", MAX(t, 0) + t0, val); } return (EBUSY); } DELAY(1000); timeout++; } if (bootverbose) device_printf(ch->dev, "AHCI reset: device ready after %dms\n", timeout + t0); return (0); } static void ahci_reset_to(void *arg) { struct ahci_channel *ch = arg; if (ch->resetting == 0) return; ch->resetting--; if (ahci_wait_ready(ch, ch->resetting == 0 ? -1 : 0, (310 - ch->resetting) * 100) == 0) { ch->resetting = 0; ahci_start(ch, 1); xpt_release_simq(ch->sim, TRUE); return; } if (ch->resetting == 0) { ahci_clo(ch); ahci_start(ch, 1); xpt_release_simq(ch->sim, TRUE); return; } callout_schedule(&ch->reset_timer, hz / 10); } static void ahci_reset(struct ahci_channel *ch) { struct ahci_controller *ctlr = device_get_softc(device_get_parent(ch->dev)); int i; xpt_freeze_simq(ch->sim, 1); if (bootverbose) device_printf(ch->dev, "AHCI reset...\n"); /* Forget about previous reset. */ if (ch->resetting) { ch->resetting = 0; callout_stop(&ch->reset_timer); xpt_release_simq(ch->sim, TRUE); } /* Requeue freezed command. */ if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } ahci_done(ch, fccb); } /* Kill the engine and requeue all running commands. */ ahci_stop(ch); for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; /* XXX; Commands in loading state. */ ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT); } for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; ahci_done(ch, ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } if (ch->toslots != 0) xpt_release_simq(ch->sim, TRUE); ch->eslots = 0; ch->toslots = 0; ch->wrongccs = 0; ch->fatalerr = 0; /* Tell the XPT about the event */ xpt_async(AC_BUS_RESET, ch->path, NULL); /* Disable port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Reset and reconnect PHY, */ if (!ahci_sata_phy_reset(ch)) { if (bootverbose) device_printf(ch->dev, "AHCI reset: device not found\n"); ch->devices = 0; /* Enable wanted port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, (((ch->pm_level != 0) ? AHCI_P_IX_CPD | AHCI_P_IX_MP : 0) | AHCI_P_IX_PRC | AHCI_P_IX_PC)); xpt_release_simq(ch->sim, TRUE); return; } if (bootverbose) device_printf(ch->dev, "AHCI reset: device found\n"); /* Wait for clearing busy status. */ if (ahci_wait_ready(ch, dumping ? 31000 : 0, 0)) { if (dumping) ahci_clo(ch); else ch->resetting = 310; } ch->devices = 1; /* Enable wanted port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, (((ch->pm_level != 0) ? AHCI_P_IX_CPD | AHCI_P_IX_MP : 0) | AHCI_P_IX_TFE | AHCI_P_IX_HBF | AHCI_P_IX_HBD | AHCI_P_IX_IF | AHCI_P_IX_OF | ((ch->pm_level == 0) ? AHCI_P_IX_PRC : 0) | AHCI_P_IX_PC | AHCI_P_IX_DP | AHCI_P_IX_UF | (ctlr->ccc ? 0 : AHCI_P_IX_SDB) | AHCI_P_IX_DS | AHCI_P_IX_PS | (ctlr->ccc ? 0 : AHCI_P_IX_DHR))); if (ch->resetting) callout_reset(&ch->reset_timer, hz / 10, ahci_reset_to, ch); else { ahci_start(ch, 1); xpt_release_simq(ch->sim, TRUE); } } static int ahci_setup_fis(struct ahci_channel *ch, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag) { u_int8_t *fis = &ctp->cfis[0]; bzero(fis, 20); fis[0] = 0x27; /* host to device */ fis[1] = (ccb->ccb_h.target_id & 0x0f); if (ccb->ccb_h.func_code == XPT_SCSI_IO) { fis[1] |= 0x80; fis[2] = ATA_PACKET_CMD; if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && ch->curr[ccb->ccb_h.target_id].mode >= ATA_DMA) fis[3] = ATA_F_DMA; else { fis[5] = ccb->csio.dxfer_len; fis[6] = ccb->csio.dxfer_len >> 8; } fis[7] = ATA_D_LBA; fis[15] = ATA_A_4BIT; bcopy((ccb->ccb_h.flags & CAM_CDB_POINTER) ? ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes, ctp->acmd, ccb->csio.cdb_len); bzero(ctp->acmd + ccb->csio.cdb_len, 32 - ccb->csio.cdb_len); } else if ((ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) == 0) { fis[1] |= 0x80; fis[2] = ccb->ataio.cmd.command; fis[3] = ccb->ataio.cmd.features; fis[4] = ccb->ataio.cmd.lba_low; fis[5] = ccb->ataio.cmd.lba_mid; fis[6] = ccb->ataio.cmd.lba_high; fis[7] = ccb->ataio.cmd.device; fis[8] = ccb->ataio.cmd.lba_low_exp; fis[9] = ccb->ataio.cmd.lba_mid_exp; fis[10] = ccb->ataio.cmd.lba_high_exp; fis[11] = ccb->ataio.cmd.features_exp; fis[12] = ccb->ataio.cmd.sector_count; if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { fis[12] &= 0x07; fis[12] |= tag << 3; } fis[13] = ccb->ataio.cmd.sector_count_exp; if (ccb->ataio.ata_flags & ATA_FLAG_ICC) fis[14] = ccb->ataio.icc; fis[15] = ATA_A_4BIT; if (ccb->ataio.ata_flags & ATA_FLAG_AUX) { fis[16] = ccb->ataio.aux & 0xff; fis[17] = (ccb->ataio.aux >> 8) & 0xff; fis[18] = (ccb->ataio.aux >> 16) & 0xff; fis[19] = (ccb->ataio.aux >> 24) & 0xff; } } else { fis[15] = ccb->ataio.cmd.control; } return (20); } static int ahci_sata_connect(struct ahci_channel *ch) { u_int32_t status; int timeout, found = 0; /* Wait up to 100ms for "connect well" */ for (timeout = 0; timeout < 1000 ; timeout++) { status = ATA_INL(ch->r_mem, AHCI_P_SSTS); if ((status & ATA_SS_DET_MASK) != ATA_SS_DET_NO_DEVICE) found = 1; if (((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_ONLINE) && ((status & ATA_SS_SPD_MASK) != ATA_SS_SPD_NO_SPEED) && ((status & ATA_SS_IPM_MASK) == ATA_SS_IPM_ACTIVE)) break; if ((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_OFFLINE) { if (bootverbose) { device_printf(ch->dev, "SATA offline status=%08x\n", status); } return (0); } if (found == 0 && timeout >= 100) break; DELAY(100); } if (timeout >= 1000 || !found) { if (bootverbose) { device_printf(ch->dev, "SATA connect timeout time=%dus status=%08x\n", timeout * 100, status); } return (0); } if (bootverbose) { device_printf(ch->dev, "SATA connect time=%dus status=%08x\n", timeout * 100, status); } /* Clear SATA error register */ ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xffffffff); return (1); } static int ahci_sata_phy_reset(struct ahci_channel *ch) { int sata_rev; uint32_t val, detval; if (ch->listening) { val = ATA_INL(ch->r_mem, AHCI_P_CMD); val |= AHCI_P_CMD_SUD; ATA_OUTL(ch->r_mem, AHCI_P_CMD, val); ch->listening = 0; } sata_rev = ch->user[ch->pm_present ? 15 : 0].revision; if (sata_rev == 1) val = ATA_SC_SPD_SPEED_GEN1; else if (sata_rev == 2) val = ATA_SC_SPD_SPEED_GEN2; else if (sata_rev == 3) val = ATA_SC_SPD_SPEED_GEN3; else val = 0; detval = ahci_ch_detval(ch, ATA_SC_DET_RESET); ATA_OUTL(ch->r_mem, AHCI_P_SCTL, detval | val | ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER); DELAY(1000); detval = ahci_ch_detval(ch, ATA_SC_DET_IDLE); ATA_OUTL(ch->r_mem, AHCI_P_SCTL, detval | val | ((ch->pm_level > 0) ? 0 : (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER))); if (!ahci_sata_connect(ch)) { if (ch->caps & AHCI_CAP_SSS) { val = ATA_INL(ch->r_mem, AHCI_P_CMD); val &= ~AHCI_P_CMD_SUD; ATA_OUTL(ch->r_mem, AHCI_P_CMD, val); ch->listening = 1; } else if (ch->pm_level > 0) ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE); return (0); } return (1); } static int ahci_check_ids(struct ahci_channel *ch, union ccb *ccb) { if (ccb->ccb_h.target_id > ((ch->caps & AHCI_CAP_SPM) ? 15 : 0)) { ccb->ccb_h.status = CAM_TID_INVALID; ahci_done(ch, ccb); return (-1); } if (ccb->ccb_h.target_lun != 0) { ccb->ccb_h.status = CAM_LUN_INVALID; ahci_done(ch, ccb); return (-1); } return (0); } static void ahciaction(struct cam_sim *sim, union ccb *ccb) { struct ahci_channel *ch; CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("ahciaction func_code=%x\n", ccb->ccb_h.func_code)); ch = (struct ahci_channel *)cam_sim_softc(sim); switch (ccb->ccb_h.func_code) { /* Common cases first */ case XPT_ATA_IO: /* Execute the requested I/O operation */ case XPT_SCSI_IO: if (ahci_check_ids(ch, ccb)) return; if (ch->devices == 0 || (ch->pm_present == 0 && ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) { ccb->ccb_h.status = CAM_SEL_TIMEOUT; break; } ccb->ccb_h.recovery_type = RECOVERY_NONE; /* Check for command collision. */ if (ahci_check_collision(ch, ccb)) { /* Freeze command. */ ch->frozen = ccb; /* We have only one frozen slot, so freeze simq also. */ xpt_freeze_simq(ch->sim, 1); return; } ahci_begin_transaction(ch, ccb); return; case XPT_ABORT: /* Abort the specified CCB */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; break; case XPT_SET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; struct ahci_device *d; if (ahci_check_ids(ch, ccb)) return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else d = &ch->user[ccb->ccb_h.target_id]; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_REVISION) d->revision = cts->xport_specific.sata.revision; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_MODE) d->mode = cts->xport_specific.sata.mode; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_BYTECOUNT) d->bytecount = min(8192, cts->xport_specific.sata.bytecount); if (cts->xport_specific.sata.valid & CTS_SATA_VALID_TAGS) d->tags = min(ch->numslots, cts->xport_specific.sata.tags); if (cts->xport_specific.sata.valid & CTS_SATA_VALID_PM) ch->pm_present = cts->xport_specific.sata.pm_present; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_ATAPI) d->atapi = cts->xport_specific.sata.atapi; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS) d->caps = cts->xport_specific.sata.caps; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: /* Get default/user set transfer settings for the target */ { struct ccb_trans_settings *cts = &ccb->cts; struct ahci_device *d; uint32_t status; if (ahci_check_ids(ch, ccb)) return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else d = &ch->user[ccb->ccb_h.target_id]; cts->protocol = PROTO_UNSPECIFIED; cts->protocol_version = PROTO_VERSION_UNSPECIFIED; cts->transport = XPORT_SATA; cts->transport_version = XPORT_VERSION_UNSPECIFIED; cts->proto_specific.valid = 0; cts->xport_specific.sata.valid = 0; if (cts->type == CTS_TYPE_CURRENT_SETTINGS && (ccb->ccb_h.target_id == 15 || (ccb->ccb_h.target_id == 0 && !ch->pm_present))) { status = ATA_INL(ch->r_mem, AHCI_P_SSTS) & ATA_SS_SPD_MASK; if (status & 0x0f0) { cts->xport_specific.sata.revision = (status & 0x0f0) >> 4; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; } cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D; if (ch->pm_level) { if (ch->caps & (AHCI_CAP_PSC | AHCI_CAP_SSC)) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ; if (ch->caps2 & AHCI_CAP2_APST) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_APST; } if ((ch->caps & AHCI_CAP_SNCQ) && (ch->quirks & AHCI_Q_NOAA) == 0) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_DMAAA; cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_AN; cts->xport_specific.sata.caps &= ch->user[ccb->ccb_h.target_id].caps; cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } else { cts->xport_specific.sata.revision = d->revision; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; cts->xport_specific.sata.caps = d->caps; cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } cts->xport_specific.sata.mode = d->mode; cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE; cts->xport_specific.sata.bytecount = d->bytecount; cts->xport_specific.sata.valid |= CTS_SATA_VALID_BYTECOUNT; cts->xport_specific.sata.pm_present = ch->pm_present; cts->xport_specific.sata.valid |= CTS_SATA_VALID_PM; cts->xport_specific.sata.tags = d->tags; cts->xport_specific.sata.valid |= CTS_SATA_VALID_TAGS; cts->xport_specific.sata.atapi = d->atapi; cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_BUS: /* Reset the specified SCSI bus */ case XPT_RESET_DEV: /* Bus Device Reset the specified SCSI device */ ahci_reset(ch); ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_TERM_IO: /* Terminate the I/O process */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; break; case XPT_PATH_INQ: /* Path routing inquiry */ { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; /* XXX??? */ cpi->hba_inquiry = PI_SDTR_ABLE; if (ch->caps & AHCI_CAP_SNCQ) cpi->hba_inquiry |= PI_TAG_ABLE; if (ch->caps & AHCI_CAP_SPM) cpi->hba_inquiry |= PI_SATAPM; cpi->target_sprt = 0; cpi->hba_misc = PIM_SEQSCAN | PIM_UNMAPPED; if ((ch->quirks & AHCI_Q_NOAUX) == 0) cpi->hba_misc |= PIM_ATA_EXT; cpi->hba_eng_cnt = 0; if (ch->caps & AHCI_CAP_SPM) cpi->max_target = 15; else cpi->max_target = 0; cpi->max_lun = 0; cpi->initiator_id = 0; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "AHCI", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->transport = XPORT_SATA; cpi->transport_version = XPORT_VERSION_UNSPECIFIED; cpi->protocol = PROTO_ATA; cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; cpi->maxio = ctob(AHCI_SG_ENTRIES - 1); /* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */ if (ch->quirks & AHCI_Q_MAXIO_64K) cpi->maxio = min(cpi->maxio, 128 * 512); cpi->hba_vendor = ch->vendorid; cpi->hba_device = ch->deviceid; cpi->hba_subvendor = ch->subvendorid; cpi->hba_subdevice = ch->subdeviceid; cpi->ccb_h.status = CAM_REQ_CMP; break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } ahci_done(ch, ccb); } static void ahcipoll(struct cam_sim *sim) { struct ahci_channel *ch = (struct ahci_channel *)cam_sim_softc(sim); uint32_t istatus; /* Read interrupt statuses and process if any. */ istatus = ATA_INL(ch->r_mem, AHCI_P_IS); if (istatus != 0) ahci_ch_intr_main(ch, istatus); if (ch->resetting != 0 && (--ch->resetpolldiv <= 0 || !callout_pending(&ch->reset_timer))) { ch->resetpolldiv = 1000; ahci_reset_to(ch); } } devclass_t ahci_devclass; MODULE_VERSION(ahci, 1); MODULE_DEPEND(ahci, cam, 1, 1, 1); diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c index 7e6a33a4285b..45a6c273822d 100644 --- a/sys/dev/ioat/ioat.c +++ b/sys/dev/ioat/ioat.c @@ -1,2146 +1,2146 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * Copyright (C) 2018 Alexander Motin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include "ioat.h" #include "ioat_hw.h" #include "ioat_internal.h" #ifndef BUS_SPACE_MAXADDR_40BIT #define BUS_SPACE_MAXADDR_40BIT MIN(BUS_SPACE_MAXADDR, 0xFFFFFFFFFFULL) #endif #ifndef BUS_SPACE_MAXADDR_46BIT #define BUS_SPACE_MAXADDR_46BIT MIN(BUS_SPACE_MAXADDR, 0x3FFFFFFFFFFFULL) #endif static int ioat_probe(device_t device); static int ioat_attach(device_t device); static int ioat_detach(device_t device); static int ioat_setup_intr(struct ioat_softc *ioat); static int ioat_teardown_intr(struct ioat_softc *ioat); static int ioat3_attach(device_t device); static int ioat_start_channel(struct ioat_softc *ioat); static int ioat_map_pci_bar(struct ioat_softc *ioat); static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void ioat_interrupt_handler(void *arg); static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat); static int chanerr_to_errno(uint32_t); static void ioat_process_events(struct ioat_softc *ioat, boolean_t intr); static inline uint32_t ioat_get_active(struct ioat_softc *ioat); static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat); static void ioat_free_ring(struct ioat_softc *, uint32_t size, struct ioat_descriptor *); static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags); static union ioat_hw_descriptor *ioat_get_descriptor(struct ioat_softc *, uint32_t index); static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *, uint32_t index); static void ioat_halted_debug(struct ioat_softc *, uint32_t); static void ioat_poll_timer_callback(void *arg); static void dump_descriptor(void *hw_desc); static void ioat_submit_single(struct ioat_softc *ioat); static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error); static int ioat_reset_hw(struct ioat_softc *ioat); static void ioat_reset_hw_task(void *, int); static void ioat_setup_sysctl(device_t device); static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS); static void ioat_get(struct ioat_softc *); static void ioat_put(struct ioat_softc *); static void ioat_drain_locked(struct ioat_softc *); #define ioat_log_message(v, ...) do { \ if ((v) <= g_ioat_debug_level) { \ device_printf(ioat->device, __VA_ARGS__); \ } \ } while (0) MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations"); SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "ioat node"); static int g_force_legacy_interrupts; SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN, &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled"); int g_ioat_debug_level = 0; SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level, 0, "Set log level (0-3) for ioat(4). Higher is more verbose."); unsigned g_ioat_ring_order = 13; SYSCTL_UINT(_hw_ioat, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ioat_ring_order, 0, "Set IOAT ring order. (1 << this) == ring size."); /* * OS <-> Driver interface structures */ static device_method_t ioat_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ioat_probe), DEVMETHOD(device_attach, ioat_attach), DEVMETHOD(device_detach, ioat_detach), DEVMETHOD_END }; static driver_t ioat_pci_driver = { "ioat", ioat_pci_methods, sizeof(struct ioat_softc), }; static devclass_t ioat_devclass; DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0); MODULE_VERSION(ioat, 1); /* * Private data structures */ static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS]; static unsigned ioat_channel_index = 0; SYSCTL_UINT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0, "Number of IOAT channels attached"); static struct mtx ioat_list_mtx; MTX_SYSINIT(ioat_list_mtx, &ioat_list_mtx, "ioat list mtx", MTX_DEF); static struct _pcsid { u_int32_t type; const char *desc; } pci_ids[] = { { 0x34308086, "TBG IOAT Ch0" }, { 0x34318086, "TBG IOAT Ch1" }, { 0x34328086, "TBG IOAT Ch2" }, { 0x34338086, "TBG IOAT Ch3" }, { 0x34298086, "TBG IOAT Ch4" }, { 0x342a8086, "TBG IOAT Ch5" }, { 0x342b8086, "TBG IOAT Ch6" }, { 0x342c8086, "TBG IOAT Ch7" }, { 0x37108086, "JSF IOAT Ch0" }, { 0x37118086, "JSF IOAT Ch1" }, { 0x37128086, "JSF IOAT Ch2" }, { 0x37138086, "JSF IOAT Ch3" }, { 0x37148086, "JSF IOAT Ch4" }, { 0x37158086, "JSF IOAT Ch5" }, { 0x37168086, "JSF IOAT Ch6" }, { 0x37178086, "JSF IOAT Ch7" }, { 0x37188086, "JSF IOAT Ch0 (RAID)" }, { 0x37198086, "JSF IOAT Ch1 (RAID)" }, { 0x3c208086, "SNB IOAT Ch0" }, { 0x3c218086, "SNB IOAT Ch1" }, { 0x3c228086, "SNB IOAT Ch2" }, { 0x3c238086, "SNB IOAT Ch3" }, { 0x3c248086, "SNB IOAT Ch4" }, { 0x3c258086, "SNB IOAT Ch5" }, { 0x3c268086, "SNB IOAT Ch6" }, { 0x3c278086, "SNB IOAT Ch7" }, { 0x3c2e8086, "SNB IOAT Ch0 (RAID)" }, { 0x3c2f8086, "SNB IOAT Ch1 (RAID)" }, { 0x0e208086, "IVB IOAT Ch0" }, { 0x0e218086, "IVB IOAT Ch1" }, { 0x0e228086, "IVB IOAT Ch2" }, { 0x0e238086, "IVB IOAT Ch3" }, { 0x0e248086, "IVB IOAT Ch4" }, { 0x0e258086, "IVB IOAT Ch5" }, { 0x0e268086, "IVB IOAT Ch6" }, { 0x0e278086, "IVB IOAT Ch7" }, { 0x0e2e8086, "IVB IOAT Ch0 (RAID)" }, { 0x0e2f8086, "IVB IOAT Ch1 (RAID)" }, { 0x2f208086, "HSW IOAT Ch0" }, { 0x2f218086, "HSW IOAT Ch1" }, { 0x2f228086, "HSW IOAT Ch2" }, { 0x2f238086, "HSW IOAT Ch3" }, { 0x2f248086, "HSW IOAT Ch4" }, { 0x2f258086, "HSW IOAT Ch5" }, { 0x2f268086, "HSW IOAT Ch6" }, { 0x2f278086, "HSW IOAT Ch7" }, { 0x2f2e8086, "HSW IOAT Ch0 (RAID)" }, { 0x2f2f8086, "HSW IOAT Ch1 (RAID)" }, { 0x0c508086, "BWD IOAT Ch0" }, { 0x0c518086, "BWD IOAT Ch1" }, { 0x0c528086, "BWD IOAT Ch2" }, { 0x0c538086, "BWD IOAT Ch3" }, { 0x6f508086, "BDXDE IOAT Ch0" }, { 0x6f518086, "BDXDE IOAT Ch1" }, { 0x6f528086, "BDXDE IOAT Ch2" }, { 0x6f538086, "BDXDE IOAT Ch3" }, { 0x6f208086, "BDX IOAT Ch0" }, { 0x6f218086, "BDX IOAT Ch1" }, { 0x6f228086, "BDX IOAT Ch2" }, { 0x6f238086, "BDX IOAT Ch3" }, { 0x6f248086, "BDX IOAT Ch4" }, { 0x6f258086, "BDX IOAT Ch5" }, { 0x6f268086, "BDX IOAT Ch6" }, { 0x6f278086, "BDX IOAT Ch7" }, { 0x6f2e8086, "BDX IOAT Ch0 (RAID)" }, { 0x6f2f8086, "BDX IOAT Ch1 (RAID)" }, { 0x20218086, "SKX IOAT" }, }; MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ioat, pci_ids, nitems(pci_ids)); /* * OS <-> Driver linkage functions */ static int ioat_probe(device_t device) { struct _pcsid *ep; u_int32_t type; type = pci_get_devid(device); for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) { if (ep->type == type) { device_set_desc(device, ep->desc); return (0); } } return (ENXIO); } static int ioat_attach(device_t device) { struct ioat_softc *ioat; int error, i; ioat = DEVICE2SOFTC(device); ioat->device = device; if (bus_get_domain(device, &ioat->domain) != 0) ioat->domain = 0; ioat->cpu = CPU_FFS(&cpuset_domain[ioat->domain]) - 1; if (ioat->cpu < 0) ioat->cpu = CPU_FIRST(); error = ioat_map_pci_bar(ioat); if (error != 0) goto err; ioat->version = ioat_read_cbver(ioat); if (ioat->version < IOAT_VER_3_0) { error = ENODEV; goto err; } error = ioat3_attach(device); if (error != 0) goto err; error = pci_enable_busmaster(device); if (error != 0) goto err; error = ioat_setup_intr(ioat); if (error != 0) goto err; error = ioat_reset_hw(ioat); if (error != 0) goto err; ioat_process_events(ioat, FALSE); ioat_setup_sysctl(device); mtx_lock(&ioat_list_mtx); for (i = 0; i < IOAT_MAX_CHANNELS; i++) { if (ioat_channel[i] == NULL) break; } if (i >= IOAT_MAX_CHANNELS) { mtx_unlock(&ioat_list_mtx); device_printf(device, "Too many I/OAT devices in system\n"); error = ENXIO; goto err; } ioat->chan_idx = i; ioat_channel[i] = ioat; if (i >= ioat_channel_index) ioat_channel_index = i + 1; mtx_unlock(&ioat_list_mtx); ioat_test_attach(); err: if (error != 0) ioat_detach(device); return (error); } static inline int ioat_bus_dmamap_destroy(struct ioat_softc *ioat, const char *func, bus_dma_tag_t dmat, bus_dmamap_t map) { int error; error = bus_dmamap_destroy(dmat, map); if (error != 0) { ioat_log_message(0, "%s: bus_dmamap_destroy failed %d\n", func, error); } return (error); } static int ioat_detach(device_t device) { struct ioat_softc *ioat; int i, error; ioat = DEVICE2SOFTC(device); mtx_lock(&ioat_list_mtx); ioat_channel[ioat->chan_idx] = NULL; while (ioat_channel_index > 0 && ioat_channel[ioat_channel_index - 1] == NULL) ioat_channel_index--; mtx_unlock(&ioat_list_mtx); ioat_test_detach(); taskqueue_drain(taskqueue_thread, &ioat->reset_task); mtx_lock(&ioat->submit_lock); ioat->quiescing = TRUE; ioat->destroying = TRUE; wakeup(&ioat->quiescing); wakeup(&ioat->resetting); ioat_drain_locked(ioat); mtx_unlock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); while (ioat_get_active(ioat) > 0) msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1); mtx_unlock(&ioat->cleanup_lock); ioat_teardown_intr(ioat); callout_drain(&ioat->poll_timer); pci_disable_busmaster(device); if (ioat->pci_resource != NULL) bus_release_resource(device, SYS_RES_MEMORY, ioat->pci_resource_id, ioat->pci_resource); if (ioat->data_tag != NULL) { for (i = 0; i < 1 << ioat->ring_size_order; i++) { error = ioat_bus_dmamap_destroy(ioat, __func__, ioat->data_tag, ioat->ring[i].src_dmamap); if (error != 0) return (error); } for (i = 0; i < 1 << ioat->ring_size_order; i++) { error = ioat_bus_dmamap_destroy(ioat, __func__, ioat->data_tag, ioat->ring[i].dst_dmamap); if (error != 0) return (error); } for (i = 0; i < 1 << ioat->ring_size_order; i++) { error = ioat_bus_dmamap_destroy(ioat, __func__, ioat->data_tag, ioat->ring[i].src2_dmamap); if (error != 0) return (error); } for (i = 0; i < 1 << ioat->ring_size_order; i++) { error = ioat_bus_dmamap_destroy(ioat, __func__, ioat->data_tag, ioat->ring[i].dst2_dmamap); if (error != 0) return (error); } bus_dma_tag_destroy(ioat->data_tag); } if (ioat->ring != NULL) ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring); if (ioat->comp_update != NULL) { bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map); bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update, ioat->comp_update_map); bus_dma_tag_destroy(ioat->comp_update_tag); } if (ioat->hw_desc_ring != NULL) { bus_dmamap_unload(ioat->hw_desc_tag, ioat->hw_desc_map); bus_dmamem_free(ioat->hw_desc_tag, ioat->hw_desc_ring, ioat->hw_desc_map); bus_dma_tag_destroy(ioat->hw_desc_tag); } return (0); } static int ioat_teardown_intr(struct ioat_softc *ioat) { if (ioat->tag != NULL) bus_teardown_intr(ioat->device, ioat->res, ioat->tag); if (ioat->res != NULL) bus_release_resource(ioat->device, SYS_RES_IRQ, rman_get_rid(ioat->res), ioat->res); pci_release_msi(ioat->device); return (0); } static int ioat_start_channel(struct ioat_softc *ioat) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct bus_dmadesc *dmadesc; uint64_t status; uint32_t chanerr; int i; ioat_acquire(&ioat->dmaengine); /* Submit 'NULL' operation manually to avoid quiescing flag */ desc = ioat_get_ring_entry(ioat, ioat->head); hw_desc = &ioat_get_descriptor(ioat, ioat->head)->dma; dmadesc = &desc->bus_dmadesc; dmadesc->callback_fn = NULL; dmadesc->callback_arg = NULL; hw_desc->u.control_raw = 0; hw_desc->u.control_generic.op = IOAT_OP_COPY; hw_desc->u.control_generic.completion_update = 1; hw_desc->size = 8; hw_desc->src_addr = 0; hw_desc->dest_addr = 0; hw_desc->u.control.null = 1; ioat_submit_single(ioat); ioat_release(&ioat->dmaengine); for (i = 0; i < 100; i++) { DELAY(1); status = ioat_get_chansts(ioat); if (is_ioat_idle(status)) return (0); } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_log_message(0, "could not start channel: " "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr, IOAT_CHANERR_STR); return (ENXIO); } /* * Initialize Hardware */ static int ioat3_attach(device_t device) { struct ioat_softc *ioat; struct ioat_descriptor *ring; struct ioat_dma_hw_descriptor *dma_hw_desc; void *hw_desc; bus_addr_t lowaddr; size_t ringsz; int i, num_descriptors; int error; uint8_t xfercap; error = 0; ioat = DEVICE2SOFTC(device); ioat->capabilities = ioat_read_dmacapability(ioat); ioat_log_message(0, "Capabilities: %b\n", (int)ioat->capabilities, IOAT_DMACAP_STR); xfercap = ioat_read_xfercap(ioat); ioat->max_xfer_size = 1 << xfercap; ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_SUPPORTED) != 0; if (ioat->intrdelay_supported) ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK; /* TODO: need to check DCA here if we ever do XOR/PQ */ mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF); mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF); callout_init(&ioat->poll_timer, 1); TASK_INIT(&ioat->reset_task, 0, ioat_reset_hw_task, ioat); /* Establish lock order for Witness */ mtx_lock(&ioat->cleanup_lock); mtx_lock(&ioat->submit_lock); mtx_unlock(&ioat->submit_lock); mtx_unlock(&ioat->cleanup_lock); ioat->is_submitter_processing = FALSE; if (ioat->version >= IOAT_VER_3_3) lowaddr = BUS_SPACE_MAXADDR_48BIT; else if (ioat->version >= IOAT_VER_3_2) lowaddr = BUS_SPACE_MAXADDR_46BIT; else lowaddr = BUS_SPACE_MAXADDR_40BIT; error = bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL, &ioat->comp_update_tag); if (error != 0) return (error); error = bus_dmamem_alloc(ioat->comp_update_tag, (void **)&ioat->comp_update, BUS_DMA_ZERO | BUS_DMA_WAITOK, &ioat->comp_update_map); if (error != 0) return (error); error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map, ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat, BUS_DMA_NOWAIT); if (error != 0) return (error); ioat->ring_size_order = g_ioat_ring_order; num_descriptors = 1 << ioat->ring_size_order; ringsz = sizeof(struct ioat_dma_hw_descriptor) * num_descriptors; error = bus_dma_tag_create(bus_get_dma_tag(ioat->device), 2 * 1024 * 1024, 0x0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, ringsz, 0, NULL, NULL, &ioat->hw_desc_tag); if (error != 0) return (error); error = bus_dmamem_alloc(ioat->hw_desc_tag, &hw_desc, BUS_DMA_ZERO | BUS_DMA_WAITOK, &ioat->hw_desc_map); if (error != 0) return (error); error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc, ringsz, ioat_dmamap_cb, &ioat->hw_desc_bus_addr, BUS_DMA_NOWAIT); if (error) return (error); ioat->hw_desc_ring = hw_desc; error = bus_dma_tag_create(bus_get_dma_tag(ioat->device), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, ioat->max_xfer_size, 1, ioat->max_xfer_size, 0, NULL, NULL, &ioat->data_tag); if (error != 0) return (error); ioat->ring = malloc_domainset(num_descriptors * sizeof(*ring), M_IOAT, DOMAINSET_PREF(ioat->domain), M_ZERO | M_WAITOK); ring = ioat->ring; for (i = 0; i < num_descriptors; i++) { memset(&ring[i].bus_dmadesc, 0, sizeof(ring[i].bus_dmadesc)); ring[i].id = i; error = bus_dmamap_create(ioat->data_tag, 0, &ring[i].src_dmamap); if (error != 0) { ioat_log_message(0, "%s: bus_dmamap_create failed %d\n", __func__, error); return (error); } error = bus_dmamap_create(ioat->data_tag, 0, &ring[i].dst_dmamap); if (error != 0) { ioat_log_message(0, "%s: bus_dmamap_create failed %d\n", __func__, error); return (error); } error = bus_dmamap_create(ioat->data_tag, 0, &ring[i].src2_dmamap); if (error != 0) { ioat_log_message(0, "%s: bus_dmamap_create failed %d\n", __func__, error); return (error); } error = bus_dmamap_create(ioat->data_tag, 0, &ring[i].dst2_dmamap); if (error != 0) { ioat_log_message(0, "%s: bus_dmamap_create failed %d\n", __func__, error); return (error); } } for (i = 0; i < num_descriptors; i++) { dma_hw_desc = &ioat->hw_desc_ring[i].dma; dma_hw_desc->next = RING_PHYS_ADDR(ioat, i + 1); } ioat->tail = ioat->head = 0; *ioat->comp_update = ioat->last_seen = RING_PHYS_ADDR(ioat, ioat->tail - 1); return (0); } static int ioat_map_pci_bar(struct ioat_softc *ioat) { ioat->pci_resource_id = PCIR_BAR(0); ioat->pci_resource = bus_alloc_resource_any(ioat->device, SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE); if (ioat->pci_resource == NULL) { ioat_log_message(0, "unable to allocate pci resource\n"); return (ENODEV); } ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource); ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource); return (0); } static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { struct ioat_softc *ioat = arg; KASSERT(error == 0, ("%s: error:%d", __func__, error)); ioat->comp_update_bus_addr = seg[0].ds_addr; } static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *baddr; KASSERT(error == 0, ("%s: error:%d", __func__, error)); baddr = arg; *baddr = segs->ds_addr; } /* * Interrupt setup and handlers */ static int ioat_setup_intr(struct ioat_softc *ioat) { uint32_t num_vectors; int error; boolean_t use_msix; boolean_t force_legacy_interrupts; use_msix = FALSE; force_legacy_interrupts = FALSE; if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) { num_vectors = 1; pci_alloc_msix(ioat->device, &num_vectors); if (num_vectors == 1) use_msix = TRUE; } if (use_msix) { ioat->rid = 1; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_ACTIVE); } else { ioat->rid = 0; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_SHAREABLE | RF_ACTIVE); } if (ioat->res == NULL) { ioat_log_message(0, "bus_alloc_resource failed\n"); return (ENOMEM); } ioat->tag = NULL; error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag); if (error != 0) { ioat_log_message(0, "bus_setup_intr failed\n"); return (error); } ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN); return (0); } static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat) { u_int32_t pciid; pciid = pci_get_devid(ioat->device); switch (pciid) { /* BWD: */ case 0x0c508086: case 0x0c518086: case 0x0c528086: case 0x0c538086: /* BDXDE: */ case 0x6f508086: case 0x6f518086: case 0x6f528086: case 0x6f538086: return (TRUE); } return (FALSE); } static void ioat_interrupt_handler(void *arg) { struct ioat_softc *ioat = arg; ioat->stats.interrupts++; ioat_process_events(ioat, TRUE); } static int chanerr_to_errno(uint32_t chanerr) { if (chanerr == 0) return (0); if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0) return (EFAULT); if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0) return (EIO); /* This one is probably our fault: */ if ((chanerr & IOAT_CHANERR_NDADDERR) != 0) return (EIO); return (EIO); } static void ioat_process_events(struct ioat_softc *ioat, boolean_t intr) { struct ioat_descriptor *desc; struct bus_dmadesc *dmadesc; uint64_t comp_update, status; uint32_t completed, chanerr; int error; if (intr) { mtx_lock(&ioat->cleanup_lock); } else { if (!mtx_trylock(&ioat->cleanup_lock)) return; } /* * Don't run while the hardware is being reset. Reset is responsible * for blocking new work and draining & completing existing work, so * there is nothing to do until new work is queued after reset anyway. */ if (ioat->resetting_cleanup) { mtx_unlock(&ioat->cleanup_lock); return; } completed = 0; comp_update = *ioat->comp_update; status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK; if (status < ioat->hw_desc_bus_addr || status >= ioat->hw_desc_bus_addr + (1 << ioat->ring_size_order) * sizeof(struct ioat_generic_hw_descriptor)) panic("Bogus completion address %jx (channel %u)", (uintmax_t)status, ioat->chan_idx); if (status == ioat->last_seen) { /* * If we landed in process_events and nothing has been * completed, check for a timeout due to channel halt. */ goto out; } CTR4(KTR_IOAT, "%s channel=%u hw_status=0x%lx last_seen=0x%lx", __func__, ioat->chan_idx, comp_update, ioat->last_seen); while (RING_PHYS_ADDR(ioat, ioat->tail - 1) != status) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) ok cb %p(%p)", ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn, dmadesc->callback_arg); bus_dmamap_unload(ioat->data_tag, desc->src_dmamap); bus_dmamap_unload(ioat->data_tag, desc->dst_dmamap); bus_dmamap_unload(ioat->data_tag, desc->src2_dmamap); bus_dmamap_unload(ioat->data_tag, desc->dst2_dmamap); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, 0); completed++; ioat->tail++; } CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__, ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat)); if (completed != 0) { ioat->last_seen = RING_PHYS_ADDR(ioat, ioat->tail - 1); ioat->stats.descriptors_processed += completed; wakeup(&ioat->tail); } out: ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); mtx_unlock(&ioat->cleanup_lock); /* * The device doesn't seem to reliably push suspend/halt statuses to * the channel completion memory address, so poll the device register * here. For performance reasons skip it on interrupts, do it only * on much more rare polling events. */ if (!intr) comp_update = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS; if (!is_ioat_halted(comp_update) && !is_ioat_suspended(comp_update)) return; ioat->stats.channel_halts++; /* * Fatal programming error on this DMA channel. Flush any outstanding * work with error status and restart the engine. */ mtx_lock(&ioat->submit_lock); ioat->quiescing = TRUE; mtx_unlock(&ioat->submit_lock); /* * This is safe to do here because the submit queue is quiesced. We * know that we will drain all outstanding events, so ioat_reset_hw * can't deadlock. It is necessary to protect other ioat_process_event * threads from racing ioat_reset_hw, reading an indeterminate hw * state, and attempting to continue issuing completions. */ mtx_lock(&ioat->cleanup_lock); ioat->resetting_cleanup = TRUE; chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); if (1 <= g_ioat_debug_level) ioat_halted_debug(ioat, chanerr); ioat->stats.last_halt_chanerr = chanerr; while (ioat_get_active(ioat) > 0) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) err cb %p(%p)", ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn, dmadesc->callback_arg); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, chanerr_to_errno(chanerr)); ioat->tail++; ioat->stats.descriptors_processed++; ioat->stats.descriptors_error++; } CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__, ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat)); /* Clear error status */ ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); mtx_unlock(&ioat->cleanup_lock); ioat_log_message(0, "Resetting channel to recover from error\n"); error = taskqueue_enqueue(taskqueue_thread, &ioat->reset_task); KASSERT(error == 0, ("%s: taskqueue_enqueue failed: %d", __func__, error)); } static void ioat_reset_hw_task(void *ctx, int pending __unused) { struct ioat_softc *ioat; int error; ioat = ctx; ioat_log_message(1, "%s: Resetting channel\n", __func__); error = ioat_reset_hw(ioat); KASSERT(error == 0, ("%s: reset failed: %d", __func__, error)); (void)error; } /* * User API functions */ unsigned ioat_get_nchannels(void) { return (ioat_channel_index); } bus_dmaengine_t ioat_get_dmaengine(uint32_t index, int flags) { struct ioat_softc *ioat; KASSERT((flags & ~(M_NOWAIT | M_WAITOK)) == 0, ("invalid flags: 0x%08x", flags)); KASSERT((flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK), ("invalid wait | nowait")); mtx_lock(&ioat_list_mtx); if (index >= ioat_channel_index || (ioat = ioat_channel[index]) == NULL) { mtx_unlock(&ioat_list_mtx); return (NULL); } mtx_lock(&ioat->submit_lock); mtx_unlock(&ioat_list_mtx); if (ioat->destroying) { mtx_unlock(&ioat->submit_lock); return (NULL); } ioat_get(ioat); if (ioat->quiescing) { if ((flags & M_NOWAIT) != 0) { ioat_put(ioat); mtx_unlock(&ioat->submit_lock); return (NULL); } while (ioat->quiescing && !ioat->destroying) msleep(&ioat->quiescing, &ioat->submit_lock, 0, "getdma", 0); if (ioat->destroying) { ioat_put(ioat); mtx_unlock(&ioat->submit_lock); return (NULL); } } mtx_unlock(&ioat->submit_lock); return (&ioat->dmaengine); } void ioat_put_dmaengine(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); mtx_lock(&ioat->submit_lock); ioat_put(ioat); mtx_unlock(&ioat->submit_lock); } int ioat_get_hwversion(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->version); } size_t ioat_get_max_io_size(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->max_xfer_size); } uint32_t ioat_get_capabilities(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->capabilities); } int ioat_get_domain(bus_dmaengine_t dmaengine, int *domain) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (bus_get_domain(ioat->device, domain)); } int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); if (!ioat->intrdelay_supported) return (ENODEV); if (delay > ioat->intrdelay_max) return (ERANGE); ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay); ioat->cached_intrdelay = ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK; return (0); } uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->intrdelay_max); } void ioat_acquire(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); mtx_lock(&ioat->submit_lock); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); ioat->acq_head = ioat->head; } int ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags) { struct ioat_softc *ioat; int error; ioat = to_ioat_softc(dmaengine); ioat_acquire(dmaengine); error = ioat_reserve_space(ioat, n, mflags); if (error != 0) ioat_release(dmaengine); return (error); } void ioat_release(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR3(KTR_IOAT, "%s channel=%u dispatch1 head=%u", __func__, ioat->chan_idx, ioat->head); KFAIL_POINT_CODE(DEBUG_FP, ioat_release, /* do nothing */); CTR3(KTR_IOAT, "%s channel=%u dispatch2 head=%u", __func__, ioat->chan_idx, ioat->head); if (ioat->acq_head != ioat->head) { ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->head); if (!callout_pending(&ioat->poll_timer)) { callout_reset_on(&ioat->poll_timer, 1, ioat_poll_timer_callback, ioat, ioat->cpu); } } mtx_unlock(&ioat->submit_lock); } static struct ioat_descriptor * ioat_op_generic(struct ioat_softc *ioat, uint8_t op, uint32_t size, uint64_t src, uint64_t dst, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_generic_hw_descriptor *hw_desc; struct ioat_descriptor *desc; bus_dma_segment_t seg; int mflags, nseg, error; mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT((flags & ~_DMA_GENERIC_FLAGS) == 0, ("Unrecognized flag(s): %#x", flags & ~_DMA_GENERIC_FLAGS)); KASSERT(size <= ioat->max_xfer_size, ("%s: size too big (%u > %u)", __func__, (unsigned)size, ioat->max_xfer_size)); if ((flags & DMA_NO_WAIT) != 0) mflags = M_NOWAIT; else mflags = M_WAITOK; if (ioat_reserve_space(ioat, 1, mflags) != 0) return (NULL); desc = ioat_get_ring_entry(ioat, ioat->head); hw_desc = &ioat_get_descriptor(ioat, ioat->head)->generic; hw_desc->u.control_raw = 0; hw_desc->u.control_generic.op = op; hw_desc->u.control_generic.completion_update = 1; if ((flags & DMA_INT_EN) != 0) hw_desc->u.control_generic.int_enable = 1; if ((flags & DMA_FENCE) != 0) hw_desc->u.control_generic.fence = 1; hw_desc->size = size; if (src != 0) { nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->src_dmamap, src, size, 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->src_addr = seg.ds_addr; } if (dst != 0) { nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->dst_dmamap, dst, size, 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->dest_addr = seg.ds_addr; } desc->bus_dmadesc.callback_fn = callback_fn; desc->bus_dmadesc.callback_arg = callback_arg; return (desc); } struct bus_dmadesc * ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma; hw_desc->u.control.null = 1; ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); CTR6(KTR_IOAT, "%s channel=%u desc=%p dest=%lx src=%lx len=%lx", __func__, ioat->chan_idx, &desc->bus_dmadesc, dst, src, len); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; bus_size_t src1_len, dst1_len; bus_dma_segment_t seg; int nseg, error; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); KASSERT(((src1 | src2 | dst1 | dst2) & PAGE_MASK) == 0, ("%s: addresses are not page-aligned", __func__)); desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, 0, 0, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma; src1_len = (src2 != src1 + PAGE_SIZE) ? PAGE_SIZE : 2 * PAGE_SIZE; nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->src_dmamap, src1, src1_len, 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->src_addr = seg.ds_addr; if (src1_len != 2 * PAGE_SIZE) { hw_desc->u.control.src_page_break = 1; nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->src2_dmamap, src2, PAGE_SIZE, 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->next_src_addr = seg.ds_addr; } dst1_len = (dst2 != dst1 + PAGE_SIZE) ? PAGE_SIZE : 2 * PAGE_SIZE; nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->dst_dmamap, dst1, dst1_len, 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->dest_addr = seg.ds_addr; if (dst1_len != 2 * PAGE_SIZE) { hw_desc->u.control.dest_page_break = 1; nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->dst2_dmamap, dst2, PAGE_SIZE, 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->next_dest_addr = seg.ds_addr; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_crc32_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; uint32_t teststore; uint8_t op; bus_dma_segment_t seg; int nseg, error; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); KASSERT((ioat->capabilities & IOAT_DMACAP_MOVECRC) != 0, ("%s: device lacks MOVECRC capability", __func__)); teststore = (flags & _DMA_CRC_TESTSTORE); KASSERT(teststore != _DMA_CRC_TESTSTORE, ("%s: TEST and STORE invalid", __func__)); KASSERT(teststore != 0 || (flags & DMA_CRC_INLINE) == 0, ("%s: INLINE invalid without TEST or STORE", __func__)); switch (teststore) { case DMA_CRC_STORE: op = IOAT_OP_MOVECRC_STORE; break; case DMA_CRC_TEST: op = IOAT_OP_MOVECRC_TEST; break; default: KASSERT(teststore == 0, ("bogus")); op = IOAT_OP_MOVECRC; break; } desc = ioat_op_generic(ioat, op, len, src, dst, callback_fn, callback_arg, flags & ~_DMA_CRC_FLAGS); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32; if ((flags & DMA_CRC_INLINE) == 0) { nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->dst2_dmamap, crcptr, sizeof(uint32_t), 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->crc_address = seg.ds_addr; } else hw_desc->u.control.crc_location = 1; if (initialseed != NULL) { hw_desc->u.control.use_seed = 1; hw_desc->seed = *initialseed; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src, bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_crc32_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; uint32_t teststore; uint8_t op; bus_dma_segment_t seg; int nseg, error; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); KASSERT((ioat->capabilities & IOAT_DMACAP_CRC) != 0, ("%s: device lacks CRC capability", __func__)); teststore = (flags & _DMA_CRC_TESTSTORE); KASSERT(teststore != _DMA_CRC_TESTSTORE, ("%s: TEST and STORE invalid", __func__)); KASSERT(teststore != 0 || (flags & DMA_CRC_INLINE) == 0, ("%s: INLINE invalid without TEST or STORE", __func__)); switch (teststore) { case DMA_CRC_STORE: op = IOAT_OP_CRC_STORE; break; case DMA_CRC_TEST: op = IOAT_OP_CRC_TEST; break; default: KASSERT(teststore == 0, ("bogus")); op = IOAT_OP_CRC; break; } desc = ioat_op_generic(ioat, op, len, src, 0, callback_fn, callback_arg, flags & ~_DMA_CRC_FLAGS); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32; if ((flags & DMA_CRC_INLINE) == 0) { nseg = -1; error = _bus_dmamap_load_phys(ioat->data_tag, desc->dst2_dmamap, crcptr, sizeof(uint32_t), 0, &seg, &nseg); if (error != 0) { ioat_log_message(0, "%s: _bus_dmamap_load_phys" " failed %d\n", __func__, error); return (NULL); } hw_desc->crc_address = seg.ds_addr; } else hw_desc->u.control.crc_location = 1; if (initialseed != NULL) { hw_desc->u.control.use_seed = 1; hw_desc->seed = *initialseed; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_fill_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); KASSERT((ioat->capabilities & IOAT_DMACAP_BFILL) != 0, ("%s: device lacks BFILL capability", __func__)); desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, 0, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->fill; hw_desc->src_data = fillpattern; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } /* * Ring Management */ static inline uint32_t ioat_get_active(struct ioat_softc *ioat) { return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1)); } static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat) { return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1); } /* * Reserves space in this IOAT descriptor ring by ensuring enough slots remain * for 'num_descs'. * * If mflags contains M_WAITOK, blocks until enough space is available. * * Returns zero on success, or an errno on error. If num_descs is beyond the * maximum ring size, returns EINVAl; if allocation would block and mflags * contains M_NOWAIT, returns EAGAIN. * * Must be called with the submit_lock held; returns with the lock held. The * lock may be dropped to allocate the ring. * * (The submit_lock is needed to add any entries to the ring, so callers are * assured enough room is available.) */ static int ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags) { boolean_t dug; int error; mtx_assert(&ioat->submit_lock, MA_OWNED); error = 0; dug = FALSE; if (num_descs < 1 || num_descs >= (1 << ioat->ring_size_order)) { error = EINVAL; goto out; } for (;;) { if (ioat->quiescing) { error = ENXIO; goto out; } if (ioat_get_ring_space(ioat) >= num_descs) goto out; CTR3(KTR_IOAT, "%s channel=%u starved (%u)", __func__, ioat->chan_idx, num_descs); if (!dug && !ioat->is_submitter_processing) { ioat->is_submitter_processing = TRUE; mtx_unlock(&ioat->submit_lock); CTR2(KTR_IOAT, "%s channel=%u attempting to process events", __func__, ioat->chan_idx); ioat_process_events(ioat, FALSE); mtx_lock(&ioat->submit_lock); dug = TRUE; KASSERT(ioat->is_submitter_processing == TRUE, ("is_submitter_processing")); ioat->is_submitter_processing = FALSE; wakeup(&ioat->tail); continue; } if ((mflags & M_WAITOK) == 0) { error = EAGAIN; break; } CTR2(KTR_IOAT, "%s channel=%u blocking on completions", __func__, ioat->chan_idx); msleep(&ioat->tail, &ioat->submit_lock, 0, "ioat_full", 0); continue; } out: mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT(!ioat->quiescing || error == ENXIO, ("reserved during quiesce")); return (error); } static void ioat_free_ring(struct ioat_softc *ioat, uint32_t size, struct ioat_descriptor *ring) { free(ring, M_IOAT); } static struct ioat_descriptor * ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index) { return (&ioat->ring[index % (1 << ioat->ring_size_order)]); } static union ioat_hw_descriptor * ioat_get_descriptor(struct ioat_softc *ioat, uint32_t index) { return (&ioat->hw_desc_ring[index % (1 << ioat->ring_size_order)]); } static void ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr) { union ioat_hw_descriptor *desc; ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr, IOAT_CHANERR_STR); if (chanerr == 0) return; mtx_assert(&ioat->cleanup_lock, MA_OWNED); desc = ioat_get_descriptor(ioat, ioat->tail + 0); dump_descriptor(desc); desc = ioat_get_descriptor(ioat, ioat->tail + 1); dump_descriptor(desc); } static void ioat_poll_timer_callback(void *arg) { struct ioat_softc *ioat; ioat = arg; CTR1(KTR_IOAT, "%s", __func__); ioat_process_events(ioat, FALSE); mtx_lock(&ioat->submit_lock); if (ioat_get_active(ioat) > 0) callout_schedule(&ioat->poll_timer, 1); mtx_unlock(&ioat->submit_lock); } /* * Support Functions */ static void ioat_submit_single(struct ioat_softc *ioat) { mtx_assert(&ioat->submit_lock, MA_OWNED); ioat->head++; CTR4(KTR_IOAT, "%s channel=%u head=%u tail=%u", __func__, ioat->chan_idx, ioat->head, ioat->tail); ioat->stats.descriptors_submitted++; } static int ioat_reset_hw(struct ioat_softc *ioat) { uint64_t status; uint32_t chanerr; unsigned timeout; int error; CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); mtx_lock(&ioat->submit_lock); while (ioat->resetting && !ioat->destroying) msleep(&ioat->resetting, &ioat->submit_lock, 0, "IRH_drain", 0); if (ioat->destroying) { mtx_unlock(&ioat->submit_lock); return (ENXIO); } ioat->resetting = TRUE; ioat->quiescing = TRUE; mtx_unlock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); while (ioat_get_active(ioat) > 0) msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1); /* * Suspend ioat_process_events while the hardware and softc are in an * indeterminate state. */ ioat->resetting_cleanup = TRUE; mtx_unlock(&ioat->cleanup_lock); CTR2(KTR_IOAT, "%s channel=%u quiesced and drained", __func__, ioat->chan_idx); status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(ioat); /* Wait at most 20 ms */ for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) && timeout < 20; timeout++) { DELAY(1000); status = ioat_get_chansts(ioat); } if (timeout == 20) { error = ETIMEDOUT; goto out; } KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce")); chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); CTR2(KTR_IOAT, "%s channel=%u hardware suspended", __func__, ioat->chan_idx); /* * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors * that can cause stability issues for IOAT v3. */ pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07, 4); chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4); pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4); /* * BDXDE and BWD models reset MSI-X registers on device reset. * Save/restore their contents manually. */ if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets MSI-X registers; saving\n"); pci_save_state(ioat->device); } ioat_reset(ioat); CTR2(KTR_IOAT, "%s channel=%u hardware reset", __func__, ioat->chan_idx); /* Wait at most 20 ms */ for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++) DELAY(1000); if (timeout == 20) { error = ETIMEDOUT; goto out; } if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets registers; restored\n"); pci_restore_state(ioat->device); } /* Reset attempts to return the hardware to "halted." */ status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) { /* So this really shouldn't happen... */ ioat_log_message(0, "Device is active after a reset?\n"); ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); error = 0; goto out; } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); if (chanerr != 0) { mtx_lock(&ioat->cleanup_lock); ioat_halted_debug(ioat, chanerr); mtx_unlock(&ioat->cleanup_lock); error = EIO; goto out; } /* * Bring device back online after reset. Writing CHAINADDR brings the * device back to active. * * The internal ring counter resets to zero, so we have to start over * at zero as well. */ ioat->tail = ioat->head = 0; *ioat->comp_update = ioat->last_seen = RING_PHYS_ADDR(ioat, ioat->tail - 1); ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); ioat_write_chancmp(ioat, ioat->comp_update_bus_addr); ioat_write_chainaddr(ioat, RING_PHYS_ADDR(ioat, 0)); error = 0; CTR2(KTR_IOAT, "%s channel=%u configured channel", __func__, ioat->chan_idx); out: /* Enqueues a null operation and ensures it completes. */ if (error == 0) { error = ioat_start_channel(ioat); CTR2(KTR_IOAT, "%s channel=%u started channel", __func__, ioat->chan_idx); } /* * Resume completions now that ring state is consistent. */ mtx_lock(&ioat->cleanup_lock); ioat->resetting_cleanup = FALSE; mtx_unlock(&ioat->cleanup_lock); /* Unblock submission of new work */ mtx_lock(&ioat->submit_lock); ioat->quiescing = FALSE; wakeup(&ioat->quiescing); ioat->resetting = FALSE; wakeup(&ioat->resetting); CTR2(KTR_IOAT, "%s channel=%u reset done", __func__, ioat->chan_idx); mtx_unlock(&ioat->submit_lock); return (error); } static int sysctl_handle_chansts(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; uint64_t status; int error; ioat = arg1; status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS; sbuf_new_for_sysctl(&sb, NULL, 256, req); switch (status) { case IOAT_CHANSTS_ACTIVE: sbuf_printf(&sb, "ACTIVE"); break; case IOAT_CHANSTS_IDLE: sbuf_printf(&sb, "IDLE"); break; case IOAT_CHANSTS_SUSPENDED: sbuf_printf(&sb, "SUSPENDED"); break; case IOAT_CHANSTS_HALTED: sbuf_printf(&sb, "HALTED"); break; case IOAT_CHANSTS_ARMED: sbuf_printf(&sb, "ARMED"); break; default: sbuf_printf(&sb, "UNKNOWN"); break; } error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_dpi(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; #define PRECISION "1" const uintmax_t factor = 10; uintmax_t rate; int error; ioat = arg1; sbuf_new_for_sysctl(&sb, NULL, 16, req); if (ioat->stats.interrupts == 0) { sbuf_printf(&sb, "NaN"); goto out; } rate = ioat->stats.descriptors_processed * factor / ioat->stats.interrupts; sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor, rate % factor); #undef PRECISION out: error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; int error, arg; ioat = arg1; arg = 0; error = SYSCTL_OUT(req, &arg, sizeof(arg)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &arg, sizeof(arg)); if (error != 0) return (error); if (arg != 0) error = ioat_reset_hw(ioat); return (error); } static void dump_descriptor(void *hw_desc) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 8; j++) printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]); printf("\n"); } } static void ioat_setup_sysctl(device_t device) { struct sysctl_oid_list *par, *statpar, *state, *hammer; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree, *tmp; struct ioat_softc *ioat; ioat = DEVICE2SOFTC(device); ctx = device_get_sysctl_ctx(device); tree = device_get_sysctl_tree(device); par = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD, &ioat->version, 0, "HW version (0xMM form)"); SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD, &ioat->max_xfer_size, 0, "HW maximum transfer size"); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD, &ioat->intrdelay_supported, 0, "Is INTRDELAY supported"); SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD, &ioat->intrdelay_max, 0, "Maximum configurable INTRDELAY on this channel (microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IOAT channel internal state"); state = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD, &ioat->ring_size_order, 0, "SW descriptor ring size order"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head, 0, "SW descriptor head pointer index"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail, 0, "SW descriptor tail pointer index"); SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD, ioat->comp_update, "HW addr of last completion"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_submitter_processing", CTLFLAG_RD, &ioat->is_submitter_processing, 0, "submitter processing"); SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, ioat, 0, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, ioat, 0, sysctl_handle_chansts, "A", "String of the channel status"); SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD, &ioat->cached_intrdelay, 0, "Current INTRDELAY on this channel (cached, microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Big hammers (mostly for testing)"); hammer = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset", - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, ioat, 0, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, ioat, 0, sysctl_handle_reset, "I", "Set to non-zero to reset the hardware"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IOAT channel statistics"); statpar = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts", CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.interrupts, "Number of interrupts processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors", CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.descriptors_processed, "Number of descriptors processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted", CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.descriptors_submitted, "Number of descriptors submitted to this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored", CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.descriptors_error, "Number of descriptors failed by channel errors"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts", CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.channel_halts, 0, "Number of times the channel has halted"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr", CTLFLAG_RW | CTLFLAG_STATS, &ioat->stats.last_halt_chanerr, 0, "The raw CHANERR when the channel was last halted"); SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, ioat, 0, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, ioat, 0, sysctl_handle_dpi, "A", "Descriptors per interrupt"); } static void ioat_get(struct ioat_softc *ioat) { mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT(ioat->refcnt < UINT32_MAX, ("refcnt overflow")); ioat->refcnt++; } static void ioat_put(struct ioat_softc *ioat) { mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT(ioat->refcnt >= 1, ("refcnt error")); if (--ioat->refcnt == 0) wakeup(&ioat->refcnt); } static void ioat_drain_locked(struct ioat_softc *ioat) { mtx_assert(&ioat->submit_lock, MA_OWNED); while (ioat->refcnt > 0) msleep(&ioat->refcnt, &ioat->submit_lock, 0, "ioat_drain", 0); } #ifdef DDB #define _db_show_lock(lo) LOCK_CLASS(lo)->lc_ddb_show(lo) #define db_show_lock(lk) _db_show_lock(&(lk)->lock_object) DB_SHOW_COMMAND(ioat, db_show_ioat) { struct ioat_softc *sc; unsigned idx; if (!have_addr) goto usage; idx = (unsigned)addr; if (idx >= ioat_channel_index) goto usage; sc = ioat_channel[idx]; db_printf("ioat softc at %p\n", sc); if (sc == NULL) return; db_printf(" version: %d\n", sc->version); db_printf(" chan_idx: %u\n", sc->chan_idx); db_printf(" submit_lock: "); db_show_lock(&sc->submit_lock); db_printf(" capabilities: %b\n", (int)sc->capabilities, IOAT_DMACAP_STR); db_printf(" cached_intrdelay: %u\n", sc->cached_intrdelay); db_printf(" *comp_update: 0x%jx\n", (uintmax_t)*sc->comp_update); db_printf(" poll_timer:\n"); db_printf(" c_time: %ju\n", (uintmax_t)sc->poll_timer.c_time); db_printf(" c_arg: %p\n", sc->poll_timer.c_arg); db_printf(" c_func: %p\n", sc->poll_timer.c_func); db_printf(" c_lock: %p\n", sc->poll_timer.c_lock); db_printf(" c_flags: 0x%x\n", (unsigned)sc->poll_timer.c_flags); db_printf(" quiescing: %d\n", (int)sc->quiescing); db_printf(" destroying: %d\n", (int)sc->destroying); db_printf(" is_submitter_processing: %d\n", (int)sc->is_submitter_processing); db_printf(" intrdelay_supported: %d\n", (int)sc->intrdelay_supported); db_printf(" resetting: %d\n", (int)sc->resetting); db_printf(" head: %u\n", sc->head); db_printf(" tail: %u\n", sc->tail); db_printf(" ring_size_order: %u\n", sc->ring_size_order); db_printf(" last_seen: 0x%lx\n", sc->last_seen); db_printf(" ring: %p\n", sc->ring); db_printf(" descriptors: %p\n", sc->hw_desc_ring); db_printf(" descriptors (phys): 0x%jx\n", (uintmax_t)sc->hw_desc_bus_addr); db_printf(" ring[%u] (tail):\n", sc->tail % (1 << sc->ring_size_order)); db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->tail)->id); db_printf(" addr: 0x%lx\n", RING_PHYS_ADDR(sc, sc->tail)); db_printf(" next: 0x%lx\n", ioat_get_descriptor(sc, sc->tail)->generic.next); db_printf(" ring[%u] (head - 1):\n", (sc->head - 1) % (1 << sc->ring_size_order)); db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->head - 1)->id); db_printf(" addr: 0x%lx\n", RING_PHYS_ADDR(sc, sc->head - 1)); db_printf(" next: 0x%lx\n", ioat_get_descriptor(sc, sc->head - 1)->generic.next); db_printf(" ring[%u] (head):\n", (sc->head) % (1 << sc->ring_size_order)); db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->head)->id); db_printf(" addr: 0x%lx\n", RING_PHYS_ADDR(sc, sc->head)); db_printf(" next: 0x%lx\n", ioat_get_descriptor(sc, sc->head)->generic.next); for (idx = 0; idx < (1 << sc->ring_size_order); idx++) if ((*sc->comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK) == RING_PHYS_ADDR(sc, idx)) db_printf(" ring[%u] == hardware tail\n", idx); db_printf(" cleanup_lock: "); db_show_lock(&sc->cleanup_lock); db_printf(" refcnt: %u\n", sc->refcnt); db_printf(" stats:\n"); db_printf(" interrupts: %lu\n", sc->stats.interrupts); db_printf(" descriptors_processed: %lu\n", sc->stats.descriptors_processed); db_printf(" descriptors_error: %lu\n", sc->stats.descriptors_error); db_printf(" descriptors_submitted: %lu\n", sc->stats.descriptors_submitted); db_printf(" channel_halts: %u\n", sc->stats.channel_halts); db_printf(" last_halt_chanerr: %u\n", sc->stats.last_halt_chanerr); if (db_pager_quit) return; db_printf(" hw status:\n"); db_printf(" status: 0x%lx\n", ioat_get_chansts(sc)); db_printf(" chanctrl: 0x%x\n", (unsigned)ioat_read_2(sc, IOAT_CHANCTRL_OFFSET)); db_printf(" chancmd: 0x%x\n", (unsigned)ioat_read_1(sc, IOAT_CHANCMD_OFFSET)); db_printf(" dmacount: 0x%x\n", (unsigned)ioat_read_2(sc, IOAT_DMACOUNT_OFFSET)); db_printf(" chainaddr: 0x%lx\n", ioat_read_double_4(sc, IOAT_CHAINADDR_OFFSET_LOW)); db_printf(" chancmp: 0x%lx\n", ioat_read_double_4(sc, IOAT_CHANCMP_OFFSET_LOW)); db_printf(" chanerr: %b\n", (int)ioat_read_4(sc, IOAT_CHANERR_OFFSET), IOAT_CHANERR_STR); return; usage: db_printf("usage: show ioat <0-%u>\n", ioat_channel_index); return; } #endif /* DDB */ diff --git a/sys/dev/isp/isp_freebsd.c b/sys/dev/isp/isp_freebsd.c index 657d3c4446ed..d5e70e96fc44 100644 --- a/sys/dev/isp/isp_freebsd.c +++ b/sys/dev/isp/isp_freebsd.c @@ -1,3494 +1,3494 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009-2020 Alexander Motin * Copyright (c) 1997-2009 by Matthew Jacob * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Platform (FreeBSD) dependent common attachment code for Qlogic adapters. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(isp, 1); MODULE_DEPEND(isp, cam, 1, 1, 1); int isp_announced = 0; int isp_loop_down_limit = 60; /* default loop down limit */ int isp_quickboot_time = 7; /* don't wait more than N secs for loop up */ int isp_gone_device_time = 30; /* grace time before reporting device lost */ static const char prom3[] = "Chan %d [%u] PortID 0x%06x Departed because of %s"; static void isp_freeze_loopdown(ispsoftc_t *, int); static void isp_loop_changed(ispsoftc_t *isp, int chan); static void isp_rq_check_above(ispsoftc_t *); static void isp_rq_check_below(ispsoftc_t *); static d_ioctl_t ispioctl; static void isp_poll(struct cam_sim *); static callout_func_t isp_watchdog; static callout_func_t isp_gdt; static task_fn_t isp_gdt_task; static void isp_kthread(void *); static void isp_action(struct cam_sim *, union ccb *); static int isp_timer_count; static void isp_timer(void *); static struct cdevsw isp_cdevsw = { .d_version = D_VERSION, .d_ioctl = ispioctl, .d_name = "isp", }; static int isp_role_sysctl(SYSCTL_HANDLER_ARGS) { ispsoftc_t *isp = (ispsoftc_t *)arg1; int chan = arg2; int error, old, value; value = FCPARAM(isp, chan)->role; error = sysctl_handle_int(oidp, &value, 0, req); if ((error != 0) || (req->newptr == NULL)) return (error); if (value < ISP_ROLE_NONE || value > ISP_ROLE_BOTH) return (EINVAL); ISP_LOCK(isp); old = FCPARAM(isp, chan)->role; /* We don't allow target mode switch from here. */ value = (old & ISP_ROLE_TARGET) | (value & ISP_ROLE_INITIATOR); /* If nothing has changed -- we are done. */ if (value == old) { ISP_UNLOCK(isp); return (0); } /* Actually change the role. */ error = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, value); ISP_UNLOCK(isp); return (error); } static int isp_attach_chan(ispsoftc_t *isp, struct cam_devq *devq, int chan) { fcparam *fcp = FCPARAM(isp, chan); struct isp_fc *fc = ISP_FC_PC(isp, chan); struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(isp->isp_osinfo.dev); struct sysctl_oid *tree = device_get_sysctl_tree(isp->isp_osinfo.dev); char name[16]; struct cam_sim *sim; struct cam_path *path; #ifdef ISP_TARGET_MODE int i; #endif sim = cam_sim_alloc(isp_action, isp_poll, "isp", isp, device_get_unit(isp->isp_dev), &isp->isp_lock, isp->isp_maxcmds, isp->isp_maxcmds, devq); if (sim == NULL) return (ENOMEM); if (xpt_bus_register(sim, isp->isp_dev, chan) != CAM_SUCCESS) { cam_sim_free(sim, FALSE); return (EIO); } if (xpt_create_path(&path, NULL, cam_sim_path(sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sim)); cam_sim_free(sim, FALSE); return (ENXIO); } ISP_LOCK(isp); fc->sim = sim; fc->path = path; fc->isp = isp; fc->ready = 1; fcp->isp_use_gft_id = 1; fcp->isp_use_gff_id = 1; callout_init_mtx(&fc->gdt, &isp->isp_lock, 0); TASK_INIT(&fc->gtask, 1, isp_gdt_task, fc); #ifdef ISP_TARGET_MODE TAILQ_INIT(&fc->waitq); STAILQ_INIT(&fc->ntfree); for (i = 0; i < ATPDPSIZE; i++) STAILQ_INSERT_TAIL(&fc->ntfree, &fc->ntpool[i], next); LIST_INIT(&fc->atfree); for (i = ATPDPSIZE-1; i >= 0; i--) LIST_INSERT_HEAD(&fc->atfree, &fc->atpool[i], next); for (i = 0; i < ATPDPHASHSIZE; i++) LIST_INIT(&fc->atused[i]); #endif isp_loop_changed(isp, chan); ISP_UNLOCK(isp); if (kproc_create(isp_kthread, fc, &fc->kproc, 0, 0, "%s_%d", device_get_nameunit(isp->isp_osinfo.dev), chan)) { xpt_free_path(fc->path); xpt_bus_deregister(cam_sim_path(fc->sim)); cam_sim_free(fc->sim, FALSE); return (ENOMEM); } fc->num_threads += 1; if (chan > 0) { snprintf(name, sizeof(name), "chan%d", chan); tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, name, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Virtual channel"); } SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "wwnn", CTLFLAG_RD, &fcp->isp_wwnn, "World Wide Node Name"); SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "wwpn", CTLFLAG_RD, &fcp->isp_wwpn, "World Wide Port Name"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "loop_down_limit", CTLFLAG_RW, &fc->loop_down_limit, 0, "Loop Down Limit"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "gone_device_time", CTLFLAG_RW, &fc->gone_device_time, 0, "Gone Device Time"); #if defined(ISP_TARGET_MODE) && defined(DEBUG) SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "inject_lost_data_frame", CTLFLAG_RW, &fc->inject_lost_data_frame, 0, "Cause a Lost Frame on a Read"); #endif SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "role", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + "role", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, isp, chan, isp_role_sysctl, "I", "Current role"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "speed", CTLFLAG_RD, &fcp->isp_gbspeed, 0, "Connection speed in gigabits"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "linkstate", CTLFLAG_RD, &fcp->isp_linkstate, 0, "Link state"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "fwstate", CTLFLAG_RD, &fcp->isp_fwstate, 0, "Firmware state"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "loopstate", CTLFLAG_RD, &fcp->isp_loopstate, 0, "Loop state"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "topo", CTLFLAG_RD, &fcp->isp_topo, 0, "Connection topology"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "use_gft_id", CTLFLAG_RWTUN, &fcp->isp_use_gft_id, 0, "Use GFT_ID during fabric scan"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "use_gff_id", CTLFLAG_RWTUN, &fcp->isp_use_gff_id, 0, "Use GFF_ID during fabric scan"); return (0); } static void isp_detach_chan(ispsoftc_t *isp, int chan) { struct isp_fc *fc = ISP_FC_PC(isp, chan); xpt_free_path(fc->path); xpt_bus_deregister(cam_sim_path(fc->sim)); cam_sim_free(fc->sim, FALSE); /* Wait for the channel's spawned threads to exit. */ wakeup(fc); while (fc->num_threads != 0) mtx_sleep(&fc->num_threads, &isp->isp_lock, PRIBIO, "isp_reap", 0); } int isp_attach(ispsoftc_t *isp) { const char *nu = device_get_nameunit(isp->isp_osinfo.dev); int du = device_get_unit(isp->isp_dev); int chan; /* * Create the device queue for our SIM(s). */ isp->isp_osinfo.devq = cam_simq_alloc(isp->isp_maxcmds); if (isp->isp_osinfo.devq == NULL) { return (EIO); } for (chan = 0; chan < isp->isp_nchan; chan++) { if (isp_attach_chan(isp, isp->isp_osinfo.devq, chan)) { goto unwind; } } callout_init_mtx(&isp->isp_osinfo.tmo, &isp->isp_lock, 0); isp_timer_count = hz >> 2; callout_reset(&isp->isp_osinfo.tmo, isp_timer_count, isp_timer, isp); isp->isp_osinfo.cdev = make_dev(&isp_cdevsw, du, UID_ROOT, GID_OPERATOR, 0600, "%s", nu); if (isp->isp_osinfo.cdev) { isp->isp_osinfo.cdev->si_drv1 = isp; } return (0); unwind: ISP_LOCK(isp); isp->isp_osinfo.is_exiting = 1; while (--chan >= 0) isp_detach_chan(isp, chan); ISP_UNLOCK(isp); cam_simq_free(isp->isp_osinfo.devq); isp->isp_osinfo.devq = NULL; return (-1); } int isp_detach(ispsoftc_t *isp) { int chan; if (isp->isp_osinfo.cdev) { destroy_dev(isp->isp_osinfo.cdev); isp->isp_osinfo.cdev = NULL; } ISP_LOCK(isp); /* Tell spawned threads that we're exiting. */ isp->isp_osinfo.is_exiting = 1; for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1) isp_detach_chan(isp, chan); ISP_UNLOCK(isp); callout_drain(&isp->isp_osinfo.tmo); cam_simq_free(isp->isp_osinfo.devq); return (0); } static void isp_freeze_loopdown(ispsoftc_t *isp, int chan) { struct isp_fc *fc = ISP_FC_PC(isp, chan); if (fc->sim == NULL) return; if (fc->simqfrozen == 0) { isp_prt(isp, ISP_LOGDEBUG0, "Chan %d Freeze simq (loopdown)", chan); fc->simqfrozen = SIMQFRZ_LOOPDOWN; xpt_hold_boot(); xpt_freeze_simq(fc->sim, 1); } else { isp_prt(isp, ISP_LOGDEBUG0, "Chan %d Mark simq frozen (loopdown)", chan); fc->simqfrozen |= SIMQFRZ_LOOPDOWN; } } static void isp_unfreeze_loopdown(ispsoftc_t *isp, int chan) { struct isp_fc *fc = ISP_FC_PC(isp, chan); if (fc->sim == NULL) return; int wasfrozen = fc->simqfrozen & SIMQFRZ_LOOPDOWN; fc->simqfrozen &= ~SIMQFRZ_LOOPDOWN; if (wasfrozen && fc->simqfrozen == 0) { isp_prt(isp, ISP_LOGDEBUG0, "Chan %d Release simq", chan); xpt_release_simq(fc->sim, 1); xpt_release_boot(); } } /* * Functions to protect from request queue overflow by freezing SIM queue. * XXX: freezing only one arbitrary SIM, since they all share the queue. */ static void isp_rq_check_above(ispsoftc_t *isp) { struct isp_fc *fc = ISP_FC_PC(isp, 0); if (isp->isp_rqovf || fc->sim == NULL) return; if (!isp_rqentry_avail(isp, QENTRY_MAX)) { xpt_freeze_simq(fc->sim, 1); isp->isp_rqovf = 1; } } static void isp_rq_check_below(ispsoftc_t *isp) { struct isp_fc *fc = ISP_FC_PC(isp, 0); if (!isp->isp_rqovf || fc->sim == NULL) return; if (isp_rqentry_avail(isp, QENTRY_MAX)) { xpt_release_simq(fc->sim, 0); isp->isp_rqovf = 0; } } static int ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td) { ispsoftc_t *isp; int nr, chan, retval = ENOTTY; isp = dev->si_drv1; switch (c) { case ISP_SDBLEV: { int olddblev = isp->isp_dblev; isp->isp_dblev = *(int *)addr; *(int *)addr = olddblev; retval = 0; break; } case ISP_GETROLE: chan = *(int *)addr; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } *(int *)addr = FCPARAM(isp, chan)->role; retval = 0; break; case ISP_SETROLE: nr = *(int *)addr; chan = nr >> 8; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } nr &= 0xff; if (nr & ~(ISP_ROLE_INITIATOR|ISP_ROLE_TARGET)) { retval = EINVAL; break; } ISP_LOCK(isp); *(int *)addr = FCPARAM(isp, chan)->role; retval = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, nr); ISP_UNLOCK(isp); break; case ISP_RESETHBA: ISP_LOCK(isp); isp_reinit(isp, 0); ISP_UNLOCK(isp); retval = 0; break; case ISP_RESCAN: chan = *(intptr_t *)addr; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } ISP_LOCK(isp); if (isp_fc_runstate(isp, chan, 5 * 1000000) != LOOP_READY) { retval = EIO; } else { retval = 0; } ISP_UNLOCK(isp); break; case ISP_FC_LIP: chan = *(intptr_t *)addr; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } ISP_LOCK(isp); if (isp_control(isp, ISPCTL_SEND_LIP, chan)) { retval = EIO; } else { retval = 0; } ISP_UNLOCK(isp); break; case ISP_FC_GETDINFO: { struct isp_fc_device *ifc = (struct isp_fc_device *) addr; fcportdb_t *lp; if (ifc->loopid >= MAX_FC_TARG) { retval = EINVAL; break; } lp = &FCPARAM(isp, ifc->chan)->portdb[ifc->loopid]; if (lp->state != FC_PORTDB_STATE_NIL) { ifc->role = (lp->prli_word3 & SVC3_ROLE_MASK) >> SVC3_ROLE_SHIFT; ifc->loopid = lp->handle; ifc->portid = lp->portid; ifc->node_wwn = lp->node_wwn; ifc->port_wwn = lp->port_wwn; retval = 0; } else { retval = ENODEV; } break; } case ISP_FC_GETHINFO: { struct isp_hba_device *hba = (struct isp_hba_device *) addr; int chan = hba->fc_channel; if (chan < 0 || chan >= isp->isp_nchan) { retval = ENXIO; break; } hba->fc_fw_major = ISP_FW_MAJORX(isp->isp_fwrev); hba->fc_fw_minor = ISP_FW_MINORX(isp->isp_fwrev); hba->fc_fw_micro = ISP_FW_MICROX(isp->isp_fwrev); hba->fc_nchannels = isp->isp_nchan; hba->fc_nports = MAX_FC_TARG; hba->fc_speed = FCPARAM(isp, hba->fc_channel)->isp_gbspeed; hba->fc_topology = FCPARAM(isp, chan)->isp_topo + 1; hba->fc_loopid = FCPARAM(isp, chan)->isp_loopid; hba->nvram_node_wwn = FCPARAM(isp, chan)->isp_wwnn_nvram; hba->nvram_port_wwn = FCPARAM(isp, chan)->isp_wwpn_nvram; hba->active_node_wwn = FCPARAM(isp, chan)->isp_wwnn; hba->active_port_wwn = FCPARAM(isp, chan)->isp_wwpn; retval = 0; break; } case ISP_TSK_MGMT: { int needmarker; struct isp_fc_tsk_mgmt *fct = (struct isp_fc_tsk_mgmt *) addr; uint16_t nphdl; isp24xx_tmf_t tmf; isp24xx_statusreq_t sp; fcparam *fcp; fcportdb_t *lp; int i; chan = fct->chan; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } needmarker = retval = 0; nphdl = fct->loopid; ISP_LOCK(isp); fcp = FCPARAM(isp, chan); for (i = 0; i < MAX_FC_TARG; i++) { lp = &fcp->portdb[i]; if (lp->handle == nphdl) { break; } } if (i == MAX_FC_TARG) { retval = ENXIO; ISP_UNLOCK(isp); break; } ISP_MEMZERO(&tmf, sizeof(tmf)); tmf.tmf_header.rqs_entry_type = RQSTYPE_TSK_MGMT; tmf.tmf_header.rqs_entry_count = 1; tmf.tmf_nphdl = lp->handle; tmf.tmf_delay = 2; tmf.tmf_timeout = 4; tmf.tmf_tidlo = lp->portid; tmf.tmf_tidhi = lp->portid >> 16; tmf.tmf_vpidx = ISP_GET_VPIDX(isp, chan); tmf.tmf_lun[1] = fct->lun & 0xff; if (fct->lun >= 256) { tmf.tmf_lun[0] = 0x40 | (fct->lun >> 8); } switch (fct->action) { case IPT_CLEAR_ACA: tmf.tmf_flags = ISP24XX_TMF_CLEAR_ACA; break; case IPT_TARGET_RESET: tmf.tmf_flags = ISP24XX_TMF_TARGET_RESET; needmarker = 1; break; case IPT_LUN_RESET: tmf.tmf_flags = ISP24XX_TMF_LUN_RESET; needmarker = 1; break; case IPT_CLEAR_TASK_SET: tmf.tmf_flags = ISP24XX_TMF_CLEAR_TASK_SET; needmarker = 1; break; case IPT_ABORT_TASK_SET: tmf.tmf_flags = ISP24XX_TMF_ABORT_TASK_SET; needmarker = 1; break; default: retval = EINVAL; break; } if (retval) { ISP_UNLOCK(isp); break; } retval = isp_exec_entry_queue(isp, &tmf, &sp, 5); if (retval != 0) { isp_prt(isp, ISP_LOGERR, "%s: TMF of chan %d error %d", __func__, chan, retval); ISP_UNLOCK(isp); break; } if (sp.req_completion_status != 0) retval = EIO; else if (needmarker) fcp->sendmarker = 1; ISP_UNLOCK(isp); break; } default: break; } return (retval); } /* * Local Inlines */ static ISP_INLINE int isp_get_pcmd(ispsoftc_t *, union ccb *); static ISP_INLINE void isp_free_pcmd(ispsoftc_t *, union ccb *); static ISP_INLINE int isp_get_pcmd(ispsoftc_t *isp, union ccb *ccb) { ISP_PCMD(ccb) = isp->isp_osinfo.pcmd_free; if (ISP_PCMD(ccb) == NULL) { return (-1); } isp->isp_osinfo.pcmd_free = ((struct isp_pcmd *)ISP_PCMD(ccb))->next; return (0); } static ISP_INLINE void isp_free_pcmd(ispsoftc_t *isp, union ccb *ccb) { if (ISP_PCMD(ccb)) { #ifdef ISP_TARGET_MODE PISP_PCMD(ccb)->datalen = 0; #endif PISP_PCMD(ccb)->next = isp->isp_osinfo.pcmd_free; isp->isp_osinfo.pcmd_free = ISP_PCMD(ccb); ISP_PCMD(ccb) = NULL; } } /* * Put the target mode functions here, because some are inlines */ #ifdef ISP_TARGET_MODE static ISP_INLINE tstate_t *get_lun_statep(ispsoftc_t *, int, lun_id_t); static atio_private_data_t *isp_get_atpd(ispsoftc_t *, int, uint32_t); static atio_private_data_t *isp_find_atpd(ispsoftc_t *, int, uint32_t); static void isp_put_atpd(ispsoftc_t *, int, atio_private_data_t *); static inot_private_data_t *isp_get_ntpd(ispsoftc_t *, int); static inot_private_data_t *isp_find_ntpd(ispsoftc_t *, int, uint32_t, uint32_t); static void isp_put_ntpd(ispsoftc_t *, int, inot_private_data_t *); static tstate_t *create_lun_state(ispsoftc_t *, int, struct cam_path *); static void destroy_lun_state(ispsoftc_t *, int, tstate_t *); static void isp_enable_lun(ispsoftc_t *, union ccb *); static void isp_disable_lun(ispsoftc_t *, union ccb *); static callout_func_t isp_refire_notify_ack; static void isp_complete_ctio(ispsoftc_t *isp, union ccb *); enum Start_Ctio_How { FROM_CAM, FROM_TIMER, FROM_SRR, FROM_CTIO_DONE }; static void isp_target_start_ctio(ispsoftc_t *, union ccb *, enum Start_Ctio_How); static void isp_handle_platform_atio7(ispsoftc_t *, at7_entry_t *); static void isp_handle_platform_ctio(ispsoftc_t *, ct7_entry_t *); static int isp_handle_platform_target_notify_ack(ispsoftc_t *, isp_notify_t *, uint32_t rsp); static void isp_handle_platform_target_tmf(ispsoftc_t *, isp_notify_t *); static void isp_target_mark_aborted_early(ispsoftc_t *, int chan, tstate_t *, uint32_t); static ISP_INLINE tstate_t * get_lun_statep(ispsoftc_t *isp, int bus, lun_id_t lun) { struct isp_fc *fc = ISP_FC_PC(isp, bus); tstate_t *tptr; SLIST_FOREACH(tptr, &fc->lun_hash[LUN_HASH_FUNC(lun)], next) { if (tptr->ts_lun == lun) return (tptr); } return (NULL); } static int isp_atio_restart(ispsoftc_t *isp, int bus, tstate_t *tptr) { inot_private_data_t *ntp; struct ntpdlist rq; if (STAILQ_EMPTY(&tptr->restart_queue)) return (0); STAILQ_INIT(&rq); STAILQ_CONCAT(&rq, &tptr->restart_queue); while ((ntp = STAILQ_FIRST(&rq)) != NULL) { STAILQ_REMOVE_HEAD(&rq, next); isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at7_entry_t *)ntp->data)->at_rxid); isp_handle_platform_atio7(isp, (at7_entry_t *) ntp->data); isp_put_ntpd(isp, bus, ntp); if (!STAILQ_EMPTY(&tptr->restart_queue)) break; } if (!STAILQ_EMPTY(&rq)) { STAILQ_CONCAT(&rq, &tptr->restart_queue); STAILQ_CONCAT(&tptr->restart_queue, &rq); } return (!STAILQ_EMPTY(&tptr->restart_queue)); } static void isp_tmcmd_restart(ispsoftc_t *isp) { struct isp_fc *fc; tstate_t *tptr; union ccb *ccb; int bus, i; for (bus = 0; bus < isp->isp_nchan; bus++) { fc = ISP_FC_PC(isp, bus); for (i = 0; i < LUN_HASH_SIZE; i++) { SLIST_FOREACH(tptr, &fc->lun_hash[i], next) isp_atio_restart(isp, bus, tptr); } /* * We only need to do this once per channel. */ ccb = (union ccb *)TAILQ_FIRST(&fc->waitq); if (ccb != NULL) { TAILQ_REMOVE(&fc->waitq, &ccb->ccb_h, sim_links.tqe); isp_target_start_ctio(isp, ccb, FROM_TIMER); } } isp_rq_check_above(isp); isp_rq_check_below(isp); } static atio_private_data_t * isp_get_atpd(ispsoftc_t *isp, int chan, uint32_t tag) { struct isp_fc *fc = ISP_FC_PC(isp, chan); atio_private_data_t *atp; atp = LIST_FIRST(&fc->atfree); if (atp) { LIST_REMOVE(atp, next); atp->tag = tag; LIST_INSERT_HEAD(&fc->atused[ATPDPHASH(tag)], atp, next); } return (atp); } static atio_private_data_t * isp_find_atpd(ispsoftc_t *isp, int chan, uint32_t tag) { struct isp_fc *fc = ISP_FC_PC(isp, chan); atio_private_data_t *atp; LIST_FOREACH(atp, &fc->atused[ATPDPHASH(tag)], next) { if (atp->tag == tag) return (atp); } return (NULL); } static void isp_put_atpd(ispsoftc_t *isp, int chan, atio_private_data_t *atp) { struct isp_fc *fc = ISP_FC_PC(isp, chan); if (atp->ests) isp_put_ecmd(isp, atp->ests); LIST_REMOVE(atp, next); memset(atp, 0, sizeof (*atp)); LIST_INSERT_HEAD(&fc->atfree, atp, next); } static void isp_dump_atpd(ispsoftc_t *isp, int chan) { struct isp_fc *fc = ISP_FC_PC(isp, chan); atio_private_data_t *atp; const char *states[8] = { "Free", "ATIO", "CAM", "CTIO", "LAST_CTIO", "PDON", "?6", "7" }; for (atp = fc->atpool; atp < &fc->atpool[ATPDPSIZE]; atp++) { if (atp->state == ATPD_STATE_FREE) continue; isp_prt(isp, ISP_LOGALL, "Chan %d ATP [0x%x] origdlen %u bytes_xfrd %u lun %jx nphdl 0x%04x s_id 0x%06x d_id 0x%06x oxid 0x%04x state %s", chan, atp->tag, atp->orig_datalen, atp->bytes_xfered, (uintmax_t)atp->lun, atp->nphdl, atp->sid, atp->did, atp->oxid, states[atp->state & 0x7]); } } static inot_private_data_t * isp_get_ntpd(ispsoftc_t *isp, int chan) { struct isp_fc *fc = ISP_FC_PC(isp, chan); inot_private_data_t *ntp; ntp = STAILQ_FIRST(&fc->ntfree); if (ntp) STAILQ_REMOVE_HEAD(&fc->ntfree, next); return (ntp); } static inot_private_data_t * isp_find_ntpd(ispsoftc_t *isp, int chan, uint32_t tag_id, uint32_t seq_id) { struct isp_fc *fc = ISP_FC_PC(isp, chan); inot_private_data_t *ntp; for (ntp = fc->ntpool; ntp < &fc->ntpool[ATPDPSIZE]; ntp++) { if (ntp->tag_id == tag_id && ntp->seq_id == seq_id) return (ntp); } return (NULL); } static void isp_put_ntpd(ispsoftc_t *isp, int chan, inot_private_data_t *ntp) { struct isp_fc *fc = ISP_FC_PC(isp, chan); ntp->tag_id = ntp->seq_id = 0; STAILQ_INSERT_HEAD(&fc->ntfree, ntp, next); } tstate_t * create_lun_state(ispsoftc_t *isp, int bus, struct cam_path *path) { struct isp_fc *fc = ISP_FC_PC(isp, bus); lun_id_t lun; tstate_t *tptr; lun = xpt_path_lun_id(path); tptr = malloc(sizeof (tstate_t), M_DEVBUF, M_NOWAIT|M_ZERO); if (tptr == NULL) return (NULL); tptr->ts_lun = lun; SLIST_INIT(&tptr->atios); SLIST_INIT(&tptr->inots); STAILQ_INIT(&tptr->restart_queue); SLIST_INSERT_HEAD(&fc->lun_hash[LUN_HASH_FUNC(lun)], tptr, next); ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, path, "created tstate\n"); return (tptr); } static void destroy_lun_state(ispsoftc_t *isp, int bus, tstate_t *tptr) { struct isp_fc *fc = ISP_FC_PC(isp, bus); union ccb *ccb; inot_private_data_t *ntp; while ((ccb = (union ccb *)SLIST_FIRST(&tptr->atios)) != NULL) { SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle); ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); }; while ((ccb = (union ccb *)SLIST_FIRST(&tptr->inots)) != NULL) { SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle); ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); } while ((ntp = STAILQ_FIRST(&tptr->restart_queue)) != NULL) { isp_endcmd(isp, ntp->data, NIL_HANDLE, bus, SCSI_STATUS_BUSY, 0); STAILQ_REMOVE_HEAD(&tptr->restart_queue, next); isp_put_ntpd(isp, bus, ntp); } SLIST_REMOVE(&fc->lun_hash[LUN_HASH_FUNC(tptr->ts_lun)], tptr, tstate, next); free(tptr, M_DEVBUF); } static void isp_enable_lun(ispsoftc_t *isp, union ccb *ccb) { tstate_t *tptr; int bus = XS_CHANNEL(ccb); target_id_t target = ccb->ccb_h.target_id; lun_id_t lun = ccb->ccb_h.target_lun; /* * We only support either target and lun both wildcard * or target and lun both non-wildcard. */ ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path, "enabling lun %jx\n", (uintmax_t)lun); if ((target == CAM_TARGET_WILDCARD) != (lun == CAM_LUN_WILDCARD)) { ccb->ccb_h.status = CAM_LUN_INVALID; xpt_done(ccb); return; } /* Create the state pointer. It should not already exist. */ tptr = get_lun_statep(isp, bus, lun); if (tptr) { ccb->ccb_h.status = CAM_LUN_ALRDY_ENA; xpt_done(ccb); return; } tptr = create_lun_state(isp, bus, ccb->ccb_h.path); if (tptr == NULL) { ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); } static void isp_disable_lun(ispsoftc_t *isp, union ccb *ccb) { tstate_t *tptr; int bus = XS_CHANNEL(ccb); target_id_t target = ccb->ccb_h.target_id; lun_id_t lun = ccb->ccb_h.target_lun; ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path, "disabling lun %jx\n", (uintmax_t)lun); if ((target == CAM_TARGET_WILDCARD) != (lun == CAM_LUN_WILDCARD)) { ccb->ccb_h.status = CAM_LUN_INVALID; xpt_done(ccb); return; } /* Find the state pointer. */ if ((tptr = get_lun_statep(isp, bus, lun)) == NULL) { ccb->ccb_h.status = CAM_PATH_INVALID; xpt_done(ccb); return; } destroy_lun_state(isp, bus, tptr); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); } static void isp_target_start_ctio(ispsoftc_t *isp, union ccb *ccb, enum Start_Ctio_How how) { int fctape, sendstatus, resid; fcparam *fcp; atio_private_data_t *atp; struct ccb_scsiio *cso; struct isp_ccbq *waitq; uint32_t dmaresult, handle, xfrlen, sense_length, tmp; ct7_entry_t local, *cto = &local; isp_prt(isp, ISP_LOGTDEBUG0, "%s: ENTRY[0x%x] how %u xfrlen %u sendstatus %d sense_len %u", __func__, ccb->csio.tag_id, how, ccb->csio.dxfer_len, (ccb->ccb_h.flags & CAM_SEND_STATUS) != 0, ((ccb->ccb_h.flags & CAM_SEND_SENSE)? ccb->csio.sense_len : 0)); waitq = &ISP_FC_PC(isp, XS_CHANNEL(ccb))->waitq; switch (how) { case FROM_CAM: /* * Insert at the tail of the list, if any, waiting CTIO CCBs */ TAILQ_INSERT_TAIL(waitq, &ccb->ccb_h, sim_links.tqe); break; case FROM_TIMER: case FROM_SRR: case FROM_CTIO_DONE: TAILQ_INSERT_HEAD(waitq, &ccb->ccb_h, sim_links.tqe); break; } while ((ccb = (union ccb *) TAILQ_FIRST(waitq)) != NULL) { TAILQ_REMOVE(waitq, &ccb->ccb_h, sim_links.tqe); cso = &ccb->csio; xfrlen = cso->dxfer_len; if (xfrlen == 0) { if ((ccb->ccb_h.flags & CAM_SEND_STATUS) == 0) { ISP_PATH_PRT(isp, ISP_LOGERR, ccb->ccb_h.path, "a data transfer length of zero but no status to send is wrong\n"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); continue; } } atp = isp_find_atpd(isp, XS_CHANNEL(ccb), cso->tag_id); if (atp == NULL) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] cannot find private data adjunct in %s", __func__, cso->tag_id, __func__); isp_dump_atpd(isp, XS_CHANNEL(ccb)); ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); continue; } /* * Is this command a dead duck? */ if (atp->dead) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] not sending a CTIO for a dead command", __func__, cso->tag_id); ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); continue; } /* * Check to make sure we're still in target mode. */ fcp = FCPARAM(isp, XS_CHANNEL(ccb)); if ((fcp->role & ISP_ROLE_TARGET) == 0) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] stopping sending a CTIO because we're no longer in target mode", __func__, cso->tag_id); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); continue; } /* * We're only handling ATPD_CCB_OUTSTANDING outstanding CCB at a time (one of which * could be split into two CTIOs to split data and status). */ if (atp->ctcnt >= ATPD_CCB_OUTSTANDING) { isp_prt(isp, ISP_LOGTINFO, "[0x%x] handling only %d CCBs at a time (flags for this ccb: 0x%x)", cso->tag_id, ATPD_CCB_OUTSTANDING, ccb->ccb_h.flags); TAILQ_INSERT_HEAD(waitq, &ccb->ccb_h, sim_links.tqe); break; } /* * Does the initiator expect FC-Tape style responses? */ if ((atp->word3 & PRLI_WD3_RETRY) && fcp->fctape_enabled) { fctape = 1; } else { fctape = 0; } /* * If we already did the data xfer portion of a CTIO that sends data * and status, don't do it again and do the status portion now. */ if (atp->sendst) { isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] now sending synthesized status orig_dl=%u xfered=%u bit=%u", cso->tag_id, atp->orig_datalen, atp->bytes_xfered, atp->bytes_in_transit); xfrlen = 0; /* we already did the data transfer */ atp->sendst = 0; } if (ccb->ccb_h.flags & CAM_SEND_STATUS) { sendstatus = 1; } else { sendstatus = 0; } if (ccb->ccb_h.flags & CAM_SEND_SENSE) { KASSERT((sendstatus != 0), ("how can you have CAM_SEND_SENSE w/o CAM_SEND_STATUS?")); /* * Sense length is not the entire sense data structure size. Periph * drivers don't seem to be setting sense_len to reflect the actual * size. We'll peek inside to get the right amount. */ sense_length = cso->sense_len; /* * This 'cannot' happen */ if (sense_length > (XCMD_SIZE - MIN_FCP_RESPONSE_SIZE)) { sense_length = XCMD_SIZE - MIN_FCP_RESPONSE_SIZE; } } else { sense_length = 0; } /* * Check for overflow */ tmp = atp->bytes_xfered + atp->bytes_in_transit; if (xfrlen > 0 && tmp > atp->orig_datalen) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] data overflow by %u bytes", __func__, cso->tag_id, tmp + xfrlen - atp->orig_datalen); ccb->ccb_h.status = CAM_DATA_RUN_ERR; xpt_done(ccb); continue; } if (xfrlen > atp->orig_datalen - tmp) { xfrlen = atp->orig_datalen - tmp; if (xfrlen == 0 && !sendstatus) { cso->resid = cso->dxfer_len; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); continue; } } memset(cto, 0, QENTRY_LEN); cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7; cto->ct_header.rqs_entry_count = 1; cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM; ATPD_SET_SEQNO(cto, atp); cto->ct_nphdl = atp->nphdl; cto->ct_rxid = atp->tag; cto->ct_iid_lo = atp->sid; cto->ct_iid_hi = atp->sid >> 16; cto->ct_oxid = atp->oxid; cto->ct_vpidx = ISP_GET_VPIDX(isp, XS_CHANNEL(ccb)); cto->ct_timeout = XS_TIME(ccb); cto->ct_flags = atp->tattr << CT7_TASK_ATTR_SHIFT; /* * Mode 1, status, no data. Only possible when we are sending status, have * no data to transfer, and any sense data can fit into a ct7_entry_t. * * Mode 2, status, no data. We have to use this in the case that * the sense data won't fit into a ct7_entry_t. * */ if (sendstatus && xfrlen == 0) { cto->ct_flags |= CT7_SENDSTATUS | CT7_NO_DATA; resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit; if (sense_length <= MAXRESPLEN_24XX) { cto->ct_flags |= CT7_FLAG_MODE1; cto->ct_scsi_status = cso->scsi_status; if (resid < 0) { cto->ct_resid = -resid; cto->ct_scsi_status |= (FCP_RESID_OVERFLOW << 8); } else if (resid > 0) { cto->ct_resid = resid; cto->ct_scsi_status |= (FCP_RESID_UNDERFLOW << 8); } if (fctape) { cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF; } if (sense_length) { cto->ct_scsi_status |= (FCP_SNSLEN_VALID << 8); cto->rsp.m1.ct_resplen = cto->ct_senselen = sense_length; memcpy(cto->rsp.m1.ct_resp, &cso->sense_data, sense_length); } } else { bus_addr_t addr; fcp_rsp_iu_t rp; if (atp->ests == NULL) { atp->ests = isp_get_ecmd(isp); if (atp->ests == NULL) { TAILQ_INSERT_HEAD(waitq, &ccb->ccb_h, sim_links.tqe); break; } } memset(&rp, 0, sizeof(rp)); if (fctape) { cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF; rp.fcp_rsp_bits |= FCP_CONF_REQ; } cto->ct_flags |= CT7_FLAG_MODE2; rp.fcp_rsp_scsi_status = cso->scsi_status; if (resid < 0) { rp.fcp_rsp_resid = -resid; rp.fcp_rsp_bits |= FCP_RESID_OVERFLOW; } else if (resid > 0) { rp.fcp_rsp_resid = resid; rp.fcp_rsp_bits |= FCP_RESID_UNDERFLOW; } if (sense_length) { rp.fcp_rsp_snslen = sense_length; cto->ct_senselen = sense_length; rp.fcp_rsp_bits |= FCP_SNSLEN_VALID; isp_put_fcp_rsp_iu(isp, &rp, atp->ests); memcpy(((fcp_rsp_iu_t *)atp->ests)->fcp_rsp_extra, &cso->sense_data, sense_length); } else { isp_put_fcp_rsp_iu(isp, &rp, atp->ests); } if (isp->isp_dblev & ISP_LOGTDEBUG1) { isp_print_bytes(isp, "FCP Response Frame After Swizzling", MIN_FCP_RESPONSE_SIZE + sense_length, atp->ests); } bus_dmamap_sync(isp->isp_osinfo.ecmd_dmat, isp->isp_osinfo.ecmd_map, BUS_DMASYNC_PREWRITE); addr = isp->isp_osinfo.ecmd_dma; addr += ((((isp_ecmd_t *)atp->ests) - isp->isp_osinfo.ecmd_base) * XCMD_SIZE); isp_prt(isp, ISP_LOGTDEBUG0, "%s: ests base %p vaddr %p ecmd_dma %jx addr %jx len %u", __func__, isp->isp_osinfo.ecmd_base, atp->ests, (uintmax_t) isp->isp_osinfo.ecmd_dma, (uintmax_t)addr, MIN_FCP_RESPONSE_SIZE + sense_length); cto->rsp.m2.ct_datalen = MIN_FCP_RESPONSE_SIZE + sense_length; cto->rsp.m2.ct_fcp_rsp_iudata.ds_base = DMA_LO32(addr); cto->rsp.m2.ct_fcp_rsp_iudata.ds_basehi = DMA_HI32(addr); cto->rsp.m2.ct_fcp_rsp_iudata.ds_count = MIN_FCP_RESPONSE_SIZE + sense_length; } if (sense_length) { isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d slen %u sense: %x %x/%x/%x", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, cto->ct_resid, sense_length, cso->sense_data.error_code, cso->sense_data.sense_buf[1], cso->sense_data.sense_buf[11], cso->sense_data.sense_buf[12]); } else { isp_prt(isp, ISP_LOGDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, cto->ct_resid); } atp->state = ATPD_STATE_LAST_CTIO; } /* * Mode 0 data transfers, *possibly* with status. */ if (xfrlen != 0) { cto->ct_flags |= CT7_FLAG_MODE0; if ((cso->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { cto->ct_flags |= CT7_DATA_IN; } else { cto->ct_flags |= CT7_DATA_OUT; } cto->rsp.m0.reloff = atp->bytes_xfered + atp->bytes_in_transit; cto->rsp.m0.ct_xfrlen = xfrlen; #ifdef DEBUG if (ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame && xfrlen > ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame) { isp_prt(isp, ISP_LOGWARN, "%s: truncating data frame with xfrlen %d to %d", __func__, xfrlen, xfrlen - (xfrlen >> 2)); ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame = 0; cto->rsp.m0.ct_xfrlen -= xfrlen >> 2; } #endif if (sendstatus) { resid = atp->orig_datalen - atp->bytes_xfered - xfrlen; if (cso->scsi_status == SCSI_STATUS_OK && resid == 0 /* && fctape == 0 */) { cto->ct_flags |= CT7_SENDSTATUS; atp->state = ATPD_STATE_LAST_CTIO; if (fctape) { cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF; } } else { atp->sendst = 1; /* send status later */ cto->ct_header.rqs_seqno &= ~ATPD_SEQ_NOTIFY_CAM; atp->state = ATPD_STATE_CTIO; } } else { atp->state = ATPD_STATE_CTIO; } isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x xfrlen=%u off=%u", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, xfrlen, atp->bytes_xfered); } if (isp_get_pcmd(isp, ccb)) { ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "out of PCMDs\n"); TAILQ_INSERT_HEAD(waitq, &ccb->ccb_h, sim_links.tqe); break; } handle = isp_allocate_handle(isp, ccb, ISP_HANDLE_TARGET); if (handle == 0) { ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "No XFLIST pointers for %s\n", __func__); TAILQ_INSERT_HEAD(waitq, &ccb->ccb_h, sim_links.tqe); isp_free_pcmd(isp, ccb); break; } atp->bytes_in_transit += xfrlen; PISP_PCMD(ccb)->datalen = xfrlen; /* * Call the dma setup routines for this entry (and any subsequent * CTIOs) if there's data to move, and then tell the f/w it's got * new things to play with. As with isp_start's usage of DMA setup, * any swizzling is done in the machine dependent layer. Because * of this, we put the request onto the queue area first in native * format. */ cto->ct_syshandle = handle; dmaresult = ISP_DMASETUP(isp, cso, cto); if (dmaresult != 0) { isp_destroy_handle(isp, handle); isp_free_pcmd(isp, ccb); if (dmaresult == CMD_EAGAIN) { TAILQ_INSERT_HEAD(waitq, &ccb->ccb_h, sim_links.tqe); break; } ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); continue; } ccb->ccb_h.status = CAM_REQ_INPROG | CAM_SIM_QUEUED; if (xfrlen) { ccb->ccb_h.spriv_field0 = atp->bytes_xfered; } else { ccb->ccb_h.spriv_field0 = ~0; } atp->ctcnt++; atp->seqno++; } } static void isp_refire_notify_ack(void *arg) { isp_tna_t *tp = arg; ispsoftc_t *isp = tp->isp; ISP_ASSERT_LOCKED(isp); if (isp_notify_ack(isp, tp->not)) { callout_schedule(&tp->timer, 5); } else { free(tp, M_DEVBUF); } } static void isp_complete_ctio(ispsoftc_t *isp, union ccb *ccb) { isp_rq_check_below(isp); ccb->ccb_h.status &= ~CAM_SIM_QUEUED; xpt_done(ccb); } static void isp_handle_platform_atio7(ispsoftc_t *isp, at7_entry_t *aep) { int cdbxlen; lun_id_t lun; uint16_t chan, nphdl = NIL_HANDLE; uint32_t did, sid; fcportdb_t *lp; tstate_t *tptr; struct ccb_accept_tio *atiop; atio_private_data_t *atp = NULL; atio_private_data_t *oatp; inot_private_data_t *ntp; did = (aep->at_hdr.d_id[0] << 16) | (aep->at_hdr.d_id[1] << 8) | aep->at_hdr.d_id[2]; sid = (aep->at_hdr.s_id[0] << 16) | (aep->at_hdr.s_id[1] << 8) | aep->at_hdr.s_id[2]; lun = CAM_EXTLUN_BYTE_SWIZZLE(be64dec(aep->at_cmnd.fcp_cmnd_lun)); if (ISP_CAP_MULTI_ID(isp) && isp->isp_nchan > 1) { /* Channel has to be derived from D_ID */ isp_find_chan_by_did(isp, did, &chan); if (chan == ISP_NOCHAN) { isp_prt(isp, ISP_LOGWARN, "%s: [RX_ID 0x%x] D_ID %x not found on any channel", __func__, aep->at_rxid, did); isp_endcmd(isp, aep, NIL_HANDLE, ISP_NOCHAN, ECMD_TERMINATE, 0); return; } } else { chan = 0; } /* * Find the PDB entry for this initiator */ if (isp_find_pdb_by_portid(isp, chan, sid, &lp) == 0) { /* * If we're not in the port database terminate the exchange. */ isp_prt(isp, ISP_LOGTINFO, "%s: [RX_ID 0x%x] D_ID 0x%06x found on Chan %d for S_ID 0x%06x wasn't in PDB already", __func__, aep->at_rxid, did, chan, sid); isp_dump_portdb(isp, chan); isp_endcmd(isp, aep, NIL_HANDLE, chan, ECMD_TERMINATE, 0); return; } nphdl = lp->handle; /* * Get the tstate pointer */ tptr = get_lun_statep(isp, chan, lun); if (tptr == NULL) { tptr = get_lun_statep(isp, chan, CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: [0x%x] no state pointer for lun %jx or wildcard", __func__, aep->at_rxid, (uintmax_t)lun); if (lun == 0) { isp_endcmd(isp, aep, nphdl, chan, SCSI_STATUS_BUSY, 0); } else { isp_endcmd(isp, aep, nphdl, chan, SCSI_STATUS_CHECK_COND | ECMD_SVALID | (0x5 << 12) | (0x25 << 16), 0); } return; } } /* * Start any commands pending resources first. */ if (isp_atio_restart(isp, chan, tptr)) goto noresrc; /* * If the f/w is out of resources, just send a BUSY status back. */ if (aep->at_rxid == AT7_NORESRC_RXID) { isp_endcmd(isp, aep, nphdl, chan, SCSI_BUSY, 0); return; } /* * If we're out of resources, just send a BUSY status back. */ atiop = (struct ccb_accept_tio *) SLIST_FIRST(&tptr->atios); if (atiop == NULL) { isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] out of atios", aep->at_rxid); goto noresrc; } oatp = isp_find_atpd(isp, chan, aep->at_rxid); if (oatp) { isp_prt(isp, oatp->state == ATPD_STATE_LAST_CTIO ? ISP_LOGTDEBUG0 : ISP_LOGWARN, "[0x%x] tag wraparound (N-Port Handle " "0x%04x S_ID 0x%04x OX_ID 0x%04x) oatp state %d", aep->at_rxid, nphdl, sid, aep->at_hdr.ox_id, oatp->state); /* * It's not a "no resource" condition- but we can treat it like one */ goto noresrc; } atp = isp_get_atpd(isp, chan, aep->at_rxid); if (atp == NULL) { isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] out of atps", aep->at_rxid); isp_endcmd(isp, aep, nphdl, chan, SCSI_BUSY, 0); return; } atp->word3 = lp->prli_word3; atp->state = ATPD_STATE_ATIO; SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, atiop->ccb_h.path, "Take FREE ATIO\n"); atiop->init_id = FC_PORTDB_TGT(isp, chan, lp); atiop->ccb_h.target_id = ISP_MAX_TARGETS(isp); atiop->ccb_h.target_lun = lun; atiop->sense_len = 0; cdbxlen = aep->at_cmnd.fcp_cmnd_alen_datadir >> FCP_CMND_ADDTL_CDBLEN_SHIFT; if (cdbxlen) { isp_prt(isp, ISP_LOGWARN, "additional CDBLEN ignored"); } cdbxlen = sizeof (aep->at_cmnd.cdb_dl.sf.fcp_cmnd_cdb); ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cmnd.cdb_dl.sf.fcp_cmnd_cdb, cdbxlen); atiop->cdb_len = cdbxlen; atiop->ccb_h.status = CAM_CDB_RECVD; atiop->tag_id = atp->tag; switch (aep->at_cmnd.fcp_cmnd_task_attribute & FCP_CMND_TASK_ATTR_MASK) { case FCP_CMND_TASK_ATTR_SIMPLE: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_SIMPLE_TASK; break; case FCP_CMND_TASK_ATTR_HEAD: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_HEAD_OF_QUEUE_TASK; break; case FCP_CMND_TASK_ATTR_ORDERED: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_ORDERED_TASK; break; case FCP_CMND_TASK_ATTR_ACA: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_ACA_TASK; break; case FCP_CMND_TASK_ATTR_UNTAGGED: default: atiop->tag_action = 0; break; } atiop->priority = (aep->at_cmnd.fcp_cmnd_task_attribute & FCP_CMND_PRIO_MASK) >> FCP_CMND_PRIO_SHIFT; atp->orig_datalen = aep->at_cmnd.cdb_dl.sf.fcp_cmnd_dl; atp->bytes_xfered = 0; atp->lun = lun; atp->nphdl = nphdl; atp->sid = sid; atp->did = did; atp->oxid = aep->at_hdr.ox_id; atp->rxid = aep->at_hdr.rx_id; atp->cdb0 = atiop->cdb_io.cdb_bytes[0]; atp->tattr = aep->at_cmnd.fcp_cmnd_task_attribute & FCP_CMND_TASK_ATTR_MASK; atp->state = ATPD_STATE_CAM; isp_prt(isp, ISP_LOGTDEBUG0, "ATIO7[0x%x] CDB=0x%x lun %jx datalen %u", aep->at_rxid, atp->cdb0, (uintmax_t)lun, atp->orig_datalen); xpt_done((union ccb *)atiop); return; noresrc: KASSERT(atp == NULL, ("%s: atp is not NULL on noresrc!\n", __func__)); ntp = isp_get_ntpd(isp, chan); if (ntp == NULL) { isp_endcmd(isp, aep, nphdl, chan, SCSI_STATUS_BUSY, 0); return; } memcpy(ntp->data, aep, QENTRY_LEN); STAILQ_INSERT_TAIL(&tptr->restart_queue, ntp, next); } /* * Handle starting an SRR (sequence retransmit request) * We get here when we've gotten the immediate notify * and the return of all outstanding CTIOs for this * transaction. */ static void isp_handle_srr_start(ispsoftc_t *isp, atio_private_data_t *atp) { in_fcentry_24xx_t *inot; uint32_t srr_off, ccb_off, ccb_len, ccb_end; union ccb *ccb; inot = (in_fcentry_24xx_t *)atp->srr; srr_off = inot->in_srr_reloff_lo | (inot->in_srr_reloff_hi << 16); ccb = atp->srr_ccb; atp->srr_ccb = NULL; atp->nsrr++; if (ccb == NULL) { isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] null ccb", atp->tag); goto fail; } ccb_off = ccb->ccb_h.spriv_field0; ccb_len = ccb->csio.dxfer_len; ccb_end = (ccb_off == ~0)? ~0 : ccb_off + ccb_len; switch (inot->in_srr_iu) { case R_CTL_INFO_SOLICITED_DATA: /* * We have to restart a FCP_DATA data out transaction */ atp->sendst = 0; atp->bytes_xfered = srr_off; if (ccb_len == 0) { isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x but current CCB doesn't transfer data", atp->tag, srr_off); goto mdp; } if (srr_off < ccb_off || ccb_off > srr_off + ccb_len) { isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x not covered by current CCB data range [0x%x..0x%x]", atp->tag, srr_off, ccb_off, ccb_end); goto mdp; } isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x covered by current CCB data range [0x%x..0x%x]", atp->tag, srr_off, ccb_off, ccb_end); break; case R_CTL_INFO_COMMAND_STATUS: isp_prt(isp, ISP_LOGTINFO, "SRR[0x%x] Got an FCP RSP SRR- resending status", atp->tag); atp->sendst = 1; /* * We have to restart a FCP_RSP IU transaction */ break; case R_CTL_INFO_DATA_DESCRIPTOR: /* * We have to restart an FCP DATA in transaction */ isp_prt(isp, ISP_LOGWARN, "Got an FCP DATA IN SRR- dropping"); goto fail; default: isp_prt(isp, ISP_LOGWARN, "Got an unknown information (%x) SRR- dropping", inot->in_srr_iu); goto fail; } /* * We can't do anything until this is acked, so we might as well start it now. * We aren't going to do the usual asynchronous ack issue because we need * to make sure this gets on the wire first. */ if (isp_notify_ack(isp, inot)) { isp_prt(isp, ISP_LOGWARN, "could not push positive ack for SRR- you lose"); goto fail; } isp_target_start_ctio(isp, ccb, FROM_SRR); return; fail: inot->in_reserved = 1; isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_REQ_CMP_ERR; isp_complete_ctio(isp, ccb); return; mdp: if (isp_notify_ack(isp, inot)) { isp_prt(isp, ISP_LOGWARN, "could not push positive ack for SRR- you lose"); goto fail; } ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_MESSAGE_RECV; /* * This is not a strict interpretation of MDP, but it's close */ ccb->csio.msg_ptr = &ccb->csio.sense_data.sense_buf[SSD_FULL_SIZE - 16]; ccb->csio.msg_len = 7; ccb->csio.msg_ptr[0] = MSG_EXTENDED; ccb->csio.msg_ptr[1] = 5; ccb->csio.msg_ptr[2] = 0; /* modify data pointer */ ccb->csio.msg_ptr[3] = srr_off >> 24; ccb->csio.msg_ptr[4] = srr_off >> 16; ccb->csio.msg_ptr[5] = srr_off >> 8; ccb->csio.msg_ptr[6] = srr_off; isp_complete_ctio(isp, ccb); } static void isp_handle_platform_srr(ispsoftc_t *isp, isp_notify_t *notify) { in_fcentry_24xx_t *inot = notify->nt_lreserved; atio_private_data_t *atp; uint32_t tag = notify->nt_tagval & 0xffffffff; atp = isp_find_atpd(isp, notify->nt_channel, tag); if (atp == NULL) { isp_prt(isp, ISP_LOGERR, "%s: cannot find adjunct for %x in SRR Notify", __func__, tag); isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); return; } atp->srr_notify_rcvd = 1; memcpy(atp->srr, inot, sizeof (atp->srr)); isp_prt(isp, ISP_LOGTINFO, "SRR[0x%x] flags 0x%x srr_iu %x reloff 0x%x", inot->in_rxid, inot->in_flags, inot->in_srr_iu, ((uint32_t)inot->in_srr_reloff_hi << 16) | inot->in_srr_reloff_lo); if (atp->srr_ccb) isp_handle_srr_start(isp, atp); } static void isp_handle_platform_ctio(ispsoftc_t *isp, ct7_entry_t *ct) { union ccb *ccb; int sentstatus = 0, ok = 0, notify_cam = 0, failure = 0; atio_private_data_t *atp = NULL; int bus; uint32_t handle, data_requested, resid; handle = ct->ct_syshandle; ccb = isp_find_xs(isp, handle); if (ccb == NULL) { isp_print_bytes(isp, "null ccb in isp_handle_platform_ctio", QENTRY_LEN, ct); return; } isp_destroy_handle(isp, handle); resid = data_requested = PISP_PCMD(ccb)->datalen; isp_free_pcmd(isp, ccb); bus = XS_CHANNEL(ccb); atp = isp_find_atpd(isp, bus, ct->ct_rxid); if (atp == NULL) { /* * XXX: isp_clear_commands() generates fake CTIO with zero * ct_rxid value, filling only ct_syshandle. Workaround * that using tag_id from the CCB, pointed by ct_syshandle. */ atp = isp_find_atpd(isp, bus, ccb->csio.tag_id); } if (atp == NULL) { isp_prt(isp, ISP_LOGERR, "%s: cannot find adjunct for %x after I/O", __func__, ccb->csio.tag_id); return; } KASSERT((atp->ctcnt > 0), ("ctio count not greater than zero")); atp->bytes_in_transit -= data_requested; atp->ctcnt -= 1; ccb->ccb_h.status &= ~CAM_STATUS_MASK; if (ct->ct_nphdl == CT7_SRR) { atp->srr_ccb = ccb; if (atp->srr_notify_rcvd) isp_handle_srr_start(isp, atp); return; } if (ct->ct_nphdl == CT_HBA_RESET) { sentstatus = (ccb->ccb_h.flags & CAM_SEND_STATUS) && (atp->sendst == 0); failure = CAM_UNREC_HBA_ERROR; } else { sentstatus = ct->ct_flags & CT7_SENDSTATUS; ok = (ct->ct_nphdl == CT7_OK); notify_cam = (ct->ct_header.rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0; if ((ct->ct_flags & CT7_DATAMASK) != CT7_NO_DATA) resid = ct->ct_resid; } isp_prt(isp, ok? ISP_LOGTDEBUG0 : ISP_LOGWARN, "%s: CTIO7[%x] seq %u nc %d sts 0x%x flg 0x%x sns %d resid %d %s", __func__, ct->ct_rxid, ATPD_GET_SEQNO(ct), notify_cam, ct->ct_nphdl, ct->ct_flags, (ccb->ccb_h.status & CAM_SENT_SENSE) != 0, resid, sentstatus? "FIN" : "MID"); if (ok) { if (data_requested > 0) { atp->bytes_xfered += data_requested - resid; ccb->csio.resid = ccb->csio.dxfer_len - (data_requested - resid); } if (sentstatus && (ccb->ccb_h.flags & CAM_SEND_SENSE)) ccb->ccb_h.status |= CAM_SENT_SENSE; ccb->ccb_h.status |= CAM_REQ_CMP; } else { notify_cam = 1; if (failure == CAM_UNREC_HBA_ERROR) ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR; else ccb->ccb_h.status |= CAM_REQ_CMP_ERR; } atp->state = ATPD_STATE_PDON; /* * We never *not* notify CAM when there has been any error (ok == 0), * so we never need to do an ATIO putback if we're not notifying CAM. */ isp_prt(isp, ISP_LOGTDEBUG0, "%s CTIO[0x%x] done (ok=%d nc=%d nowsendstatus=%d ccb ss=%d)", (sentstatus)? " FINAL " : "MIDTERM ", atp->tag, ok, notify_cam, atp->sendst, (ccb->ccb_h.flags & CAM_SEND_STATUS) != 0); if (notify_cam == 0) { if (atp->sendst) { isp_target_start_ctio(isp, ccb, FROM_CTIO_DONE); } return; } /* * We are done with this ATIO if we successfully sent status. * In all other cases expect either another CTIO or XPT_ABORT. */ if (ok && sentstatus) isp_put_atpd(isp, bus, atp); /* * We're telling CAM we're done with this CTIO transaction. * * 24XX cards never need an ATIO put back. */ isp_complete_ctio(isp, ccb); } static int isp_handle_platform_target_notify_ack(ispsoftc_t *isp, isp_notify_t *mp, uint32_t rsp) { ct7_entry_t local, *cto = &local; if (isp->isp_state != ISP_RUNSTATE) { isp_prt(isp, ISP_LOGTINFO, "Notify Code 0x%x (qevalid=%d) acked- h/w not ready (dropping)", mp->nt_ncode, mp->nt_lreserved != NULL); return (0); } /* * This case is for a Task Management Function, which shows up as an ATIO7 entry. */ if (mp->nt_lreserved && ((isphdr_t *)mp->nt_lreserved)->rqs_entry_type == RQSTYPE_ATIO) { at7_entry_t *aep = (at7_entry_t *)mp->nt_lreserved; fcportdb_t *lp; uint32_t sid; uint16_t nphdl; sid = (aep->at_hdr.s_id[0] << 16) | (aep->at_hdr.s_id[1] << 8) | aep->at_hdr.s_id[2]; if (isp_find_pdb_by_portid(isp, mp->nt_channel, sid, &lp)) { nphdl = lp->handle; } else { nphdl = NIL_HANDLE; } ISP_MEMZERO(cto, sizeof (ct7_entry_t)); cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7; cto->ct_header.rqs_entry_count = 1; cto->ct_nphdl = nphdl; cto->ct_rxid = aep->at_rxid; cto->ct_vpidx = mp->nt_channel; cto->ct_iid_lo = sid; cto->ct_iid_hi = sid >> 16; cto->ct_oxid = aep->at_hdr.ox_id; cto->ct_flags = CT7_SENDSTATUS|CT7_NOACK|CT7_NO_DATA|CT7_FLAG_MODE1; cto->ct_flags |= (aep->at_ta_len >> 12) << CT7_TASK_ATTR_SHIFT; if (rsp != 0) { cto->ct_scsi_status |= (FCP_RSPLEN_VALID << 8); cto->rsp.m1.ct_resplen = 4; ISP_MEMZERO(cto->rsp.m1.ct_resp, sizeof (cto->rsp.m1.ct_resp)); cto->rsp.m1.ct_resp[0] = rsp & 0xff; cto->rsp.m1.ct_resp[1] = (rsp >> 8) & 0xff; cto->rsp.m1.ct_resp[2] = (rsp >> 16) & 0xff; cto->rsp.m1.ct_resp[3] = (rsp >> 24) & 0xff; } return (isp_send_entry(isp, cto)); } /* * This case is for a responding to an ABTS frame */ if (mp->nt_lreserved && ((isphdr_t *)mp->nt_lreserved)->rqs_entry_type == RQSTYPE_ABTS_RCVD) { /* * Overload nt_need_ack here to mark whether we've terminated the associated command. */ if (mp->nt_need_ack) { abts_t *abts = (abts_t *)mp->nt_lreserved; ISP_MEMZERO(cto, sizeof (ct7_entry_t)); isp_prt(isp, ISP_LOGTDEBUG0, "%s: [%x] terminating after ABTS received", __func__, abts->abts_rxid_task); cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7; cto->ct_header.rqs_entry_count = 1; cto->ct_nphdl = mp->nt_nphdl; cto->ct_rxid = abts->abts_rxid_task; cto->ct_iid_lo = mp->nt_sid; cto->ct_iid_hi = mp->nt_sid >> 16; cto->ct_oxid = abts->abts_ox_id; cto->ct_vpidx = mp->nt_channel; cto->ct_flags = CT7_NOACK|CT7_TERMINATE; if (isp_send_entry(isp, cto)) { return (ENOMEM); } mp->nt_need_ack = 0; } return (isp_acknak_abts(isp, mp->nt_lreserved, 0)); } /* * General purpose acknowledgement */ if (mp->nt_need_ack) { isp_prt(isp, ISP_LOGTINFO, "Notify Code 0x%x (qevalid=%d) being acked", mp->nt_ncode, mp->nt_lreserved != NULL); /* * Don't need to use the guaranteed send because the caller can retry */ return (isp_notify_ack(isp, mp->nt_lreserved)); } return (0); } /* * Handle task management functions. * * We show up here with a notify structure filled out. * * The nt_lreserved tag points to the original queue entry */ static void isp_handle_platform_target_tmf(ispsoftc_t *isp, isp_notify_t *notify) { tstate_t *tptr; fcportdb_t *lp; struct ccb_immediate_notify *inot; inot_private_data_t *ntp = NULL; atio_private_data_t *atp; lun_id_t lun; isp_prt(isp, ISP_LOGTDEBUG0, "%s: code 0x%x sid 0x%x tagval 0x%016llx chan %d lun %jx", __func__, notify->nt_ncode, notify->nt_sid, (unsigned long long) notify->nt_tagval, notify->nt_channel, notify->nt_lun); if (notify->nt_lun == LUN_ANY) { if (notify->nt_tagval == TAG_ANY) { lun = CAM_LUN_WILDCARD; } else { atp = isp_find_atpd(isp, notify->nt_channel, notify->nt_tagval & 0xffffffff); lun = atp ? atp->lun : CAM_LUN_WILDCARD; } } else { lun = notify->nt_lun; } tptr = get_lun_statep(isp, notify->nt_channel, lun); if (tptr == NULL) { tptr = get_lun_statep(isp, notify->nt_channel, CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: no state pointer found for chan %d lun %#jx", __func__, notify->nt_channel, (uintmax_t)lun); goto bad; } } inot = (struct ccb_immediate_notify *) SLIST_FIRST(&tptr->inots); if (inot == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: out of immediate notify structures for chan %d lun %#jx", __func__, notify->nt_channel, (uintmax_t)lun); goto bad; } inot->ccb_h.target_id = ISP_MAX_TARGETS(isp); inot->ccb_h.target_lun = lun; if (isp_find_pdb_by_portid(isp, notify->nt_channel, notify->nt_sid, &lp) == 0 && isp_find_pdb_by_handle(isp, notify->nt_channel, notify->nt_nphdl, &lp) == 0) { inot->initiator_id = CAM_TARGET_WILDCARD; } else { inot->initiator_id = FC_PORTDB_TGT(isp, notify->nt_channel, lp); } inot->seq_id = notify->nt_tagval; inot->tag_id = notify->nt_tagval >> 32; switch (notify->nt_ncode) { case NT_ABORT_TASK: isp_target_mark_aborted_early(isp, notify->nt_channel, tptr, inot->tag_id); inot->arg = MSG_ABORT_TASK; break; case NT_ABORT_TASK_SET: isp_target_mark_aborted_early(isp, notify->nt_channel, tptr, TAG_ANY); inot->arg = MSG_ABORT_TASK_SET; break; case NT_CLEAR_ACA: inot->arg = MSG_CLEAR_ACA; break; case NT_CLEAR_TASK_SET: inot->arg = MSG_CLEAR_TASK_SET; break; case NT_LUN_RESET: inot->arg = MSG_LOGICAL_UNIT_RESET; break; case NT_TARGET_RESET: inot->arg = MSG_TARGET_RESET; break; case NT_QUERY_TASK_SET: inot->arg = MSG_QUERY_TASK_SET; break; case NT_QUERY_ASYNC_EVENT: inot->arg = MSG_QUERY_ASYNC_EVENT; break; default: isp_prt(isp, ISP_LOGWARN, "%s: unknown TMF code 0x%x for chan %d lun %#jx", __func__, notify->nt_ncode, notify->nt_channel, (uintmax_t)lun); goto bad; } ntp = isp_get_ntpd(isp, notify->nt_channel); if (ntp == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: out of inotify private structures", __func__); goto bad; } ISP_MEMCPY(&ntp->nt, notify, sizeof (isp_notify_t)); if (notify->nt_lreserved) { ISP_MEMCPY(&ntp->data, notify->nt_lreserved, QENTRY_LEN); ntp->nt.nt_lreserved = &ntp->data; } ntp->seq_id = notify->nt_tagval; ntp->tag_id = notify->nt_tagval >> 32; SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, inot->ccb_h.path, "Take FREE INOT\n"); inot->ccb_h.status = CAM_MESSAGE_RECV; xpt_done((union ccb *)inot); return; bad: if (notify->nt_need_ack) { if (((isphdr_t *)notify->nt_lreserved)->rqs_entry_type == RQSTYPE_ABTS_RCVD) { if (isp_acknak_abts(isp, notify->nt_lreserved, ENOMEM)) { isp_prt(isp, ISP_LOGWARN, "you lose- unable to send an ACKNAK"); } } else { isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, notify->nt_lreserved); } } } static void isp_target_mark_aborted_early(ispsoftc_t *isp, int chan, tstate_t *tptr, uint32_t tag_id) { struct isp_fc *fc = ISP_FC_PC(isp, chan); atio_private_data_t *atp; inot_private_data_t *ntp, *tmp; uint32_t this_tag_id; /* * First, clean any commands pending restart */ STAILQ_FOREACH_SAFE(ntp, &tptr->restart_queue, next, tmp) { this_tag_id = ((at7_entry_t *)ntp->data)->at_rxid; if ((uint64_t)tag_id == TAG_ANY || tag_id == this_tag_id) { isp_endcmd(isp, ntp->data, NIL_HANDLE, chan, ECMD_TERMINATE, 0); isp_put_ntpd(isp, chan, ntp); STAILQ_REMOVE(&tptr->restart_queue, ntp, inot_private_data, next); } } /* * Now mark other ones dead as well. */ for (atp = fc->atpool; atp < &fc->atpool[ATPDPSIZE]; atp++) { if (atp->lun != tptr->ts_lun) continue; if ((uint64_t)tag_id == TAG_ANY || atp->tag == tag_id) atp->dead = 1; } } #endif static void isp_poll(struct cam_sim *sim) { ispsoftc_t *isp = cam_sim_softc(sim); ISP_RUN_ISR(isp); } static void isp_watchdog(void *arg) { struct ccb_scsiio *xs = arg; ispsoftc_t *isp; uint32_t ohandle = ISP_HANDLE_FREE, handle; isp = XS_ISP(xs); handle = isp_find_handle(isp, xs); /* * Hand crank the interrupt code just to be sure the command isn't stuck somewhere. */ if (handle != ISP_HANDLE_FREE) { ISP_RUN_ISR(isp); ohandle = handle; handle = isp_find_handle(isp, xs); } if (handle != ISP_HANDLE_FREE) { /* * Try and make sure the command is really dead before * we release the handle (and DMA resources) for reuse. * * If we are successful in aborting the command then * we're done here because we'll get the command returned * back separately. */ if (isp_control(isp, ISPCTL_ABORT_CMD, xs) == 0) { return; } /* * Note that after calling the above, the command may in * fact have been completed. */ xs = isp_find_xs(isp, handle); /* * If the command no longer exists, then we won't * be able to find the xs again with this handle. */ if (xs == NULL) { return; } /* * After this point, the command is really dead. */ ISP_DMAFREE(isp, xs); isp_destroy_handle(isp, handle); isp_prt(isp, ISP_LOGERR, "%s: timeout for handle 0x%x", __func__, handle); XS_SETERR(xs, CAM_CMD_TIMEOUT); isp_done(xs); } else { if (ohandle != ISP_HANDLE_FREE) { isp_prt(isp, ISP_LOGWARN, "%s: timeout for handle 0x%x, recovered during interrupt", __func__, ohandle); } else { isp_prt(isp, ISP_LOGWARN, "%s: timeout for handle already free", __func__); } } } static void isp_make_here(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt) { union ccb *ccb; struct isp_fc *fc = ISP_FC_PC(isp, chan); /* * Allocate a CCB, create a wildcard path for this target and schedule a rescan. */ ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { isp_prt(isp, ISP_LOGWARN, "Chan %d unable to alloc CCB for rescan", chan); return; } if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(fc->sim), tgt, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { isp_prt(isp, ISP_LOGWARN, "unable to create path for rescan"); xpt_free_ccb(ccb); return; } xpt_rescan(ccb); } static void isp_make_gone(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt) { struct cam_path *tp; struct isp_fc *fc = ISP_FC_PC(isp, chan); if (xpt_create_path(&tp, NULL, cam_sim_path(fc->sim), tgt, CAM_LUN_WILDCARD) == CAM_REQ_CMP) { xpt_async(AC_LOST_DEVICE, tp, NULL); xpt_free_path(tp); } } /* * Gone Device Timer Function- when we have decided that a device has gone * away, we wait a specific period of time prior to telling the OS it has * gone away. * * This timer function fires once a second and then scans the port database * for devices that are marked dead but still have a virtual target assigned. * We decrement a counter for that port database entry, and when it hits zero, * we tell the OS the device has gone away. */ static void isp_gdt(void *arg) { struct isp_fc *fc = arg; taskqueue_enqueue(taskqueue_thread, &fc->gtask); } static void isp_gdt_task(void *arg, int pending) { struct isp_fc *fc = arg; ispsoftc_t *isp = fc->isp; int chan = fc - ISP_FC_PC(isp, 0); fcportdb_t *lp; struct ac_contract ac; struct ac_device_changed *adc; int dbidx, more_to_do = 0; ISP_LOCK(isp); isp_prt(isp, ISP_LOGDEBUG0, "Chan %d GDT timer expired", chan); for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) { lp = &FCPARAM(isp, chan)->portdb[dbidx]; if (lp->state != FC_PORTDB_STATE_ZOMBIE) { continue; } if (lp->gone_timer != 0) { lp->gone_timer -= 1; more_to_do++; continue; } isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Gone Device Timeout"); if (lp->is_target) { lp->is_target = 0; isp_make_gone(isp, lp, chan, dbidx); } if (lp->is_initiator) { lp->is_initiator = 0; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = dbidx; adc->arrived = 0; xpt_async(AC_CONTRACT, fc->path, &ac); } lp->state = FC_PORTDB_STATE_NIL; } if (fc->ready) { if (more_to_do) { callout_reset(&fc->gdt, hz, isp_gdt, fc); } else { callout_deactivate(&fc->gdt); isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Stopping Gone Device Timer @ %lu", chan, (unsigned long) time_uptime); } } ISP_UNLOCK(isp); } /* * When loop goes down we remember the time and freeze CAM command queue. * During some time period we are trying to reprobe the loop. But if we * fail, we tell the OS that devices have gone away and drop the freeze. * * We don't clear the devices out of our port database because, when loop * come back up, we have to do some actual cleanup with the chip at that * point (implicit PLOGO, e.g., to get the chip's port database state right). */ static void isp_loop_changed(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); struct isp_fc *fc = ISP_FC_PC(isp, chan); if (fc->loop_down_time) return; isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Loop changed", chan); if (fcp->role & ISP_ROLE_INITIATOR) isp_freeze_loopdown(isp, chan); fc->loop_down_time = time_uptime; wakeup(fc); } static void isp_loop_up(ispsoftc_t *isp, int chan) { struct isp_fc *fc = ISP_FC_PC(isp, chan); isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Loop is up", chan); fc->loop_seen_once = 1; fc->loop_down_time = 0; isp_unfreeze_loopdown(isp, chan); } static void isp_loop_dead(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); struct isp_fc *fc = ISP_FC_PC(isp, chan); fcportdb_t *lp; struct ac_contract ac; struct ac_device_changed *adc; int dbidx, i; isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Loop is dead", chan); /* * Notify to the OS all targets who we now consider have departed. */ for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) { lp = &fcp->portdb[dbidx]; if (lp->state == FC_PORTDB_STATE_NIL) continue; for (i = 0; i < ISP_HANDLE_NUM(isp); i++) { struct ccb_scsiio *xs; if (ISP_H2HT(isp->isp_xflist[i].handle) != ISP_HANDLE_INITIATOR) { continue; } if ((xs = isp->isp_xflist[i].cmd) == NULL) { continue; } if (dbidx != XS_TGT(xs)) { continue; } isp_prt(isp, ISP_LOGWARN, "command handle 0x%x for %d.%d.%jx orphaned by loop down timeout", isp->isp_xflist[i].handle, chan, XS_TGT(xs), (uintmax_t)XS_LUN(xs)); /* * Just like in isp_watchdog, abort the outstanding * command or immediately free its resources if it is * not active */ if (isp_control(isp, ISPCTL_ABORT_CMD, xs) == 0) { continue; } ISP_DMAFREE(isp, xs); isp_destroy_handle(isp, isp->isp_xflist[i].handle); isp_prt(isp, ISP_LOGWARN, "command handle 0x%x for %d.%d.%jx could not be aborted and was destroyed", isp->isp_xflist[i].handle, chan, XS_TGT(xs), (uintmax_t)XS_LUN(xs)); XS_SETERR(xs, HBA_BUSRESET); isp_done(xs); } isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Loop Down Timeout"); if (lp->is_target) { lp->is_target = 0; isp_make_gone(isp, lp, chan, dbidx); } if (lp->is_initiator) { lp->is_initiator = 0; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = dbidx; adc->arrived = 0; xpt_async(AC_CONTRACT, fc->path, &ac); } } isp_unfreeze_loopdown(isp, chan); fc->loop_down_time = 0; } static void isp_kthread(void *arg) { struct isp_fc *fc = arg; ispsoftc_t *isp = fc->isp; int chan = fc - ISP_FC_PC(isp, 0); int slp = 0, d; int lb, lim; ISP_LOCK(isp); while (isp->isp_osinfo.is_exiting == 0) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Checking FC state", chan); lb = isp_fc_runstate(isp, chan, 250000); isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d FC got to %s state", chan, isp_fc_loop_statename(lb)); /* * Our action is different based upon whether we're supporting * Initiator mode or not. If we are, we might freeze the simq * when loop is down and set all sorts of different delays to * check again. * * If not, we simply just wait for loop to come up. */ if (lb == LOOP_READY || lb < 0) { slp = 0; } else { /* * If we've never seen loop up and we've waited longer * than quickboot time, or we've seen loop up but we've * waited longer than loop_down_limit, give up and go * to sleep until loop comes up. */ if (fc->loop_seen_once == 0) lim = isp_quickboot_time; else lim = fc->loop_down_limit; d = time_uptime - fc->loop_down_time; if (d >= lim) slp = 0; else if (d < 10) slp = 1; else if (d < 30) slp = 5; else if (d < 60) slp = 10; else if (d < 120) slp = 20; else slp = 30; } if (slp == 0) { if (lb == LOOP_READY) isp_loop_up(isp, chan); else isp_loop_dead(isp, chan); } isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d sleep for %d seconds", chan, slp); msleep(fc, &isp->isp_lock, PRIBIO, "ispf", slp * hz); } fc->num_threads -= 1; wakeup(&fc->num_threads); ISP_UNLOCK(isp); kthread_exit(); } #ifdef ISP_TARGET_MODE static void isp_abort_atio(ispsoftc_t *isp, union ccb *ccb) { atio_private_data_t *atp; union ccb *accb = ccb->cab.abort_ccb; struct ccb_hdr *sccb; tstate_t *tptr; tptr = get_lun_statep(isp, XS_CHANNEL(accb), XS_LUN(accb)); if (tptr != NULL) { /* Search for the ATIO among queueued. */ SLIST_FOREACH(sccb, &tptr->atios, sim_links.sle) { if (sccb != &accb->ccb_h) continue; SLIST_REMOVE(&tptr->atios, sccb, ccb_hdr, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, sccb->path, "Abort FREE ATIO\n"); accb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(accb); ccb->ccb_h.status = CAM_REQ_CMP; return; } } /* Search for the ATIO among running. */ atp = isp_find_atpd(isp, XS_CHANNEL(accb), accb->atio.tag_id); if (atp != NULL) { /* Send TERMINATE to firmware. */ if (!atp->dead) { uint8_t storage[QENTRY_LEN]; ct7_entry_t *cto = (ct7_entry_t *) storage; ISP_MEMZERO(cto, sizeof (ct7_entry_t)); cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7; cto->ct_header.rqs_entry_count = 1; cto->ct_nphdl = atp->nphdl; cto->ct_rxid = atp->tag; cto->ct_iid_lo = atp->sid; cto->ct_iid_hi = atp->sid >> 16; cto->ct_oxid = atp->oxid; cto->ct_vpidx = XS_CHANNEL(accb); cto->ct_flags = CT7_NOACK|CT7_TERMINATE; isp_send_entry(isp, cto); } isp_put_atpd(isp, XS_CHANNEL(accb), atp); ccb->ccb_h.status = CAM_REQ_CMP; } else { ccb->ccb_h.status = CAM_UA_ABORT; } } static void isp_abort_inot(ispsoftc_t *isp, union ccb *ccb) { inot_private_data_t *ntp; union ccb *accb = ccb->cab.abort_ccb; struct ccb_hdr *sccb; tstate_t *tptr; tptr = get_lun_statep(isp, XS_CHANNEL(accb), XS_LUN(accb)); if (tptr != NULL) { /* Search for the INOT among queueued. */ SLIST_FOREACH(sccb, &tptr->inots, sim_links.sle) { if (sccb != &accb->ccb_h) continue; SLIST_REMOVE(&tptr->inots, sccb, ccb_hdr, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, sccb->path, "Abort FREE INOT\n"); accb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(accb); ccb->ccb_h.status = CAM_REQ_CMP; return; } } /* Search for the INOT among running. */ ntp = isp_find_ntpd(isp, XS_CHANNEL(accb), accb->cin1.tag_id, accb->cin1.seq_id); if (ntp != NULL) { if (ntp->nt.nt_need_ack) { isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, ntp->nt.nt_lreserved); } isp_put_ntpd(isp, XS_CHANNEL(accb), ntp); ccb->ccb_h.status = CAM_REQ_CMP; } else { ccb->ccb_h.status = CAM_UA_ABORT; return; } } #endif static void isp_action(struct cam_sim *sim, union ccb *ccb) { int bus, tgt, error; ispsoftc_t *isp; fcparam *fcp; struct ccb_trans_settings *cts; sbintime_t ts; CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("isp_action\n")); isp = (ispsoftc_t *)cam_sim_softc(sim); ISP_ASSERT_LOCKED(isp); bus = cam_sim_bus(sim); isp_prt(isp, ISP_LOGDEBUG2, "isp_action code %x", ccb->ccb_h.func_code); ISP_PCMD(ccb) = NULL; switch (ccb->ccb_h.func_code) { case XPT_SCSI_IO: /* Execute the requested I/O operation */ /* * Do a couple of preliminary checks... */ if ((ccb->ccb_h.flags & CAM_CDB_POINTER) != 0) { if ((ccb->ccb_h.flags & CAM_CDB_PHYS) != 0) { ccb->ccb_h.status = CAM_REQ_INVALID; isp_done((struct ccb_scsiio *) ccb); break; } } #ifdef DIAGNOSTIC if (ccb->ccb_h.target_id >= ISP_MAX_TARGETS(isp)) { xpt_print(ccb->ccb_h.path, "invalid target\n"); ccb->ccb_h.status = CAM_PATH_INVALID; } if (ccb->ccb_h.status == CAM_PATH_INVALID) { xpt_done(ccb); break; } #endif ccb->csio.scsi_status = SCSI_STATUS_OK; if (isp_get_pcmd(isp, ccb)) { isp_prt(isp, ISP_LOGWARN, "out of PCMDs"); cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 250, 0); ccb->ccb_h.status = CAM_REQUEUE_REQ; xpt_done(ccb); break; } error = isp_start((XS_T *) ccb); isp_rq_check_above(isp); switch (error) { case 0: ccb->ccb_h.status |= CAM_SIM_QUEUED; if (ccb->ccb_h.timeout == CAM_TIME_INFINITY) break; /* Give firmware extra 10s to handle timeout. */ ts = SBT_1MS * ccb->ccb_h.timeout + 10 * SBT_1S; callout_reset_sbt(&PISP_PCMD(ccb)->wdog, ts, 0, isp_watchdog, ccb, 0); break; case CMD_RQLATER: isp_prt(isp, ISP_LOGDEBUG0, "%d.%jx retry later", XS_TGT(ccb), (uintmax_t)XS_LUN(ccb)); cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 1000, 0); ccb->ccb_h.status = CAM_REQUEUE_REQ; isp_free_pcmd(isp, ccb); xpt_done(ccb); break; case CMD_EAGAIN: isp_free_pcmd(isp, ccb); cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 10, 0); ccb->ccb_h.status = CAM_REQUEUE_REQ; xpt_done(ccb); break; case CMD_COMPLETE: isp_done((struct ccb_scsiio *) ccb); break; default: isp_prt(isp, ISP_LOGERR, "What's this? 0x%x at %d in file %s", error, __LINE__, __FILE__); ccb->ccb_h.status = CAM_REQUEUE_REQ; isp_free_pcmd(isp, ccb); xpt_done(ccb); } break; #ifdef ISP_TARGET_MODE case XPT_EN_LUN: /* Enable/Disable LUN as a target */ if (ccb->cel.enable) { isp_enable_lun(isp, ccb); } else { isp_disable_lun(isp, ccb); } break; case XPT_IMMEDIATE_NOTIFY: /* Add Immediate Notify Resource */ case XPT_ACCEPT_TARGET_IO: /* Add Accept Target IO Resource */ { tstate_t *tptr = get_lun_statep(isp, XS_CHANNEL(ccb), ccb->ccb_h.target_lun); if (tptr == NULL) { const char *str; if (ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) str = "XPT_IMMEDIATE_NOTIFY"; else str = "XPT_ACCEPT_TARGET_IO"; ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "%s: no state pointer found for %s\n", __func__, str); ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); break; } if (ccb->ccb_h.func_code == XPT_ACCEPT_TARGET_IO) { ccb->atio.tag_id = 0; SLIST_INSERT_HEAD(&tptr->atios, &ccb->ccb_h, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE ATIO\n"); } else if (ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) { ccb->cin1.seq_id = ccb->cin1.tag_id = 0; SLIST_INSERT_HEAD(&tptr->inots, &ccb->ccb_h, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE INOT\n"); } ccb->ccb_h.status = CAM_REQ_INPROG; break; } case XPT_NOTIFY_ACKNOWLEDGE: /* notify ack */ { inot_private_data_t *ntp; /* * XXX: Because we cannot guarantee that the path information in the notify acknowledge ccb * XXX: matches that for the immediate notify, we have to *search* for the notify structure */ /* * All the relevant path information is in the associated immediate notify */ ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, ccb->ccb_h.path, "%s: [0x%x] NOTIFY ACKNOWLEDGE for 0x%x seen\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id); ntp = isp_find_ntpd(isp, XS_CHANNEL(ccb), ccb->cna2.tag_id, ccb->cna2.seq_id); if (ntp == NULL) { ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "%s: [0x%x] XPT_NOTIFY_ACKNOWLEDGE of 0x%x cannot find ntp private data\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id); ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); break; } if (isp_handle_platform_target_notify_ack(isp, &ntp->nt, (ccb->ccb_h.flags & CAM_SEND_STATUS) ? ccb->cna2.arg : 0)) { cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 10, 0); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_REQUEUE_REQ; break; } isp_put_ntpd(isp, XS_CHANNEL(ccb), ntp); ccb->ccb_h.status = CAM_REQ_CMP; ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, ccb->ccb_h.path, "%s: [0x%x] calling xpt_done for tag 0x%x\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id); xpt_done(ccb); break; } case XPT_CONT_TARGET_IO: isp_target_start_ctio(isp, ccb, FROM_CAM); isp_rq_check_above(isp); break; #endif case XPT_RESET_DEV: /* BDR the specified SCSI device */ tgt = ccb->ccb_h.target_id; tgt |= (bus << 16); error = isp_control(isp, ISPCTL_RESET_DEV, bus, tgt); if (error) { ccb->ccb_h.status = CAM_REQ_CMP_ERR; } else { /* * If we have a FC device, reset the Command * Reference Number, because the target will expect * that we re-start the CRN at 1 after a reset. */ isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); ccb->ccb_h.status = CAM_REQ_CMP; } xpt_done(ccb); break; case XPT_ABORT: /* Abort the specified CCB */ { union ccb *accb = ccb->cab.abort_ccb; switch (accb->ccb_h.func_code) { #ifdef ISP_TARGET_MODE case XPT_ACCEPT_TARGET_IO: isp_abort_atio(isp, ccb); break; case XPT_IMMEDIATE_NOTIFY: isp_abort_inot(isp, ccb); break; #endif case XPT_SCSI_IO: error = isp_control(isp, ISPCTL_ABORT_CMD, accb); if (error) { ccb->ccb_h.status = CAM_UA_ABORT; } else { ccb->ccb_h.status = CAM_REQ_CMP; } break; default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } /* * This is not a queued CCB, so the caller expects it to be * complete when control is returned. */ break; } #define IS_CURRENT_SETTINGS(c) (c->type == CTS_TYPE_CURRENT_SETTINGS) case XPT_SET_TRAN_SETTINGS: /* Nexus Settings */ cts = &ccb->cts; if (!IS_CURRENT_SETTINGS(cts)) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings_scsi *scsi; struct ccb_trans_settings_fc *fc; cts = &ccb->cts; scsi = &cts->proto_specific.scsi; fc = &cts->xport_specific.fc; tgt = cts->ccb_h.target_id; fcp = FCPARAM(isp, bus); cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_2; cts->transport = XPORT_FC; cts->transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; fc->valid = CTS_FC_VALID_SPEED; fc->bitrate = fcp->isp_gbspeed * 100000; if (tgt < MAX_FC_TARG) { fcportdb_t *lp = &fcp->portdb[tgt]; fc->wwnn = lp->node_wwn; fc->wwpn = lp->port_wwn; fc->port = lp->portid; fc->valid |= CTS_FC_VALID_WWNN | CTS_FC_VALID_WWPN | CTS_FC_VALID_PORT; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; } case XPT_CALC_GEOMETRY: cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); break; case XPT_RESET_BUS: /* Reset the specified bus */ error = isp_control(isp, ISPCTL_RESET_BUS, bus); if (error) { ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); break; } if (bootverbose) { xpt_print(ccb->ccb_h.path, "reset bus on channel %d\n", bus); } xpt_async(AC_BUS_RESET, ISP_FC_PC(isp, bus)->path, 0); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; case XPT_TERM_IO: /* Terminate the I/O process */ ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; case XPT_SET_SIM_KNOB: /* Set SIM knobs */ { struct ccb_sim_knob *kp = &ccb->knob; fcparam *fcp = FCPARAM(isp, bus); if (kp->xport_specific.fc.valid & KNOB_VALID_ADDRESS) { fcp->isp_wwnn = ISP_FC_PC(isp, bus)->def_wwnn = kp->xport_specific.fc.wwnn; fcp->isp_wwpn = ISP_FC_PC(isp, bus)->def_wwpn = kp->xport_specific.fc.wwpn; isp_prt(isp, ISP_LOGALL, "Setting Channel %d wwns to 0x%jx 0x%jx", bus, fcp->isp_wwnn, fcp->isp_wwpn); } ccb->ccb_h.status = CAM_REQ_CMP; if (kp->xport_specific.fc.valid & KNOB_VALID_ROLE) { int rchange = 0; int newrole = 0; switch (kp->xport_specific.fc.role) { case KNOB_ROLE_NONE: if (fcp->role != ISP_ROLE_NONE) { rchange = 1; newrole = ISP_ROLE_NONE; } break; case KNOB_ROLE_TARGET: if (fcp->role != ISP_ROLE_TARGET) { rchange = 1; newrole = ISP_ROLE_TARGET; } break; case KNOB_ROLE_INITIATOR: if (fcp->role != ISP_ROLE_INITIATOR) { rchange = 1; newrole = ISP_ROLE_INITIATOR; } break; case KNOB_ROLE_BOTH: if (fcp->role != ISP_ROLE_BOTH) { rchange = 1; newrole = ISP_ROLE_BOTH; } break; } if (rchange) { ISP_PATH_PRT(isp, ISP_LOGCONFIG, ccb->ccb_h.path, "changing role on from %d to %d\n", fcp->role, newrole); if (isp_control(isp, ISPCTL_CHANGE_ROLE, bus, newrole) != 0) { ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); break; } } } xpt_done(ccb); break; } case XPT_GET_SIM_KNOB_OLD: /* Get SIM knobs -- compat value */ case XPT_GET_SIM_KNOB: /* Get SIM knobs */ { struct ccb_sim_knob *kp = &ccb->knob; fcparam *fcp = FCPARAM(isp, bus); kp->xport_specific.fc.wwnn = fcp->isp_wwnn; kp->xport_specific.fc.wwpn = fcp->isp_wwpn; switch (fcp->role) { case ISP_ROLE_NONE: kp->xport_specific.fc.role = KNOB_ROLE_NONE; break; case ISP_ROLE_TARGET: kp->xport_specific.fc.role = KNOB_ROLE_TARGET; break; case ISP_ROLE_INITIATOR: kp->xport_specific.fc.role = KNOB_ROLE_INITIATOR; break; case ISP_ROLE_BOTH: kp->xport_specific.fc.role = KNOB_ROLE_BOTH; break; } kp->xport_specific.fc.valid = KNOB_VALID_ADDRESS | KNOB_VALID_ROLE; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; } case XPT_PATH_INQ: /* Path routing inquiry */ { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; #ifdef ISP_TARGET_MODE cpi->target_sprt = PIT_PROCESSOR | PIT_DISCONNECT | PIT_TERM_IO; #else cpi->target_sprt = 0; #endif cpi->hba_eng_cnt = 0; cpi->max_target = ISP_MAX_TARGETS(isp) - 1; cpi->max_lun = 255; cpi->bus_id = cam_sim_bus(sim); cpi->maxio = (ISP_NSEG64_MAX - 1) * PAGE_SIZE; fcp = FCPARAM(isp, bus); cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED; cpi->hba_misc |= PIM_EXTLUNS | PIM_NOSCAN; /* * Because our loop ID can shift from time to time, * make our initiator ID out of range of our bus. */ cpi->initiator_id = cpi->max_target + 1; /* * Set base transfer capabilities for Fibre Channel, for this HBA. */ if (IS_25XX(isp)) cpi->base_transfer_speed = 8000000; else cpi->base_transfer_speed = 4000000; cpi->hba_inquiry = PI_TAG_ABLE; cpi->transport = XPORT_FC; cpi->transport_version = 0; cpi->xport_specific.fc.wwnn = fcp->isp_wwnn; cpi->xport_specific.fc.wwpn = fcp->isp_wwpn; cpi->xport_specific.fc.port = fcp->isp_portid; cpi->xport_specific.fc.bitrate = fcp->isp_gbspeed * 1000; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_2; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "Qlogic", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; } } void isp_done(XS_T *sccb) { ispsoftc_t *isp = XS_ISP(sccb); uint32_t status; if (XS_NOERR(sccb)) XS_SETERR(sccb, CAM_REQ_CMP); if ((sccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP && (sccb->scsi_status != SCSI_STATUS_OK)) { sccb->ccb_h.status &= ~CAM_STATUS_MASK; if ((sccb->scsi_status == SCSI_STATUS_CHECK_COND) && (sccb->ccb_h.status & CAM_AUTOSNS_VALID) == 0) { sccb->ccb_h.status |= CAM_AUTOSENSE_FAIL; } else { sccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } } sccb->ccb_h.status &= ~CAM_SIM_QUEUED; status = sccb->ccb_h.status & CAM_STATUS_MASK; if (status != CAM_REQ_CMP && (sccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { sccb->ccb_h.status |= CAM_DEV_QFRZN; xpt_freeze_devq(sccb->ccb_h.path, 1); } if (ISP_PCMD(sccb)) { if (callout_active(&PISP_PCMD(sccb)->wdog)) callout_stop(&PISP_PCMD(sccb)->wdog); isp_free_pcmd(isp, (union ccb *) sccb); } isp_rq_check_below(isp); xpt_done((union ccb *) sccb); } void isp_async(ispsoftc_t *isp, ispasync_t cmd, ...) { int bus; static const char prom[] = "Chan %d [%d] WWPN 0x%16jx PortID 0x%06x handle 0x%x %s %s"; char buf[64]; char *msg = NULL; target_id_t tgt = 0; fcportdb_t *lp; struct isp_fc *fc; struct ac_contract ac; struct ac_device_changed *adc; va_list ap; switch (cmd) { case ISPASYNC_LOOP_RESET: { uint16_t lipp; fcparam *fcp; va_start(ap, cmd); bus = va_arg(ap, int); va_end(ap); lipp = ISP_READ(isp, OUTMAILBOX1); fcp = FCPARAM(isp, bus); isp_prt(isp, ISP_LOGINFO, "Chan %d LOOP Reset, LIP primitive %x", bus, lipp); /* * Per FCP-4, a Reset LIP should result in a CRN reset. Other * LIPs and loop up/down events should never reset the CRN. For * an as of yet unknown reason, 24xx series cards (and * potentially others) can interrupt with a LIP Reset status * when no LIP reset came down the wire. Additionally, the LIP * primitive accompanying this status would not be a valid LIP * Reset primitive, but some variation of an invalid AL_PA * LIP. As a result, we have to verify the AL_PD in the LIP * addresses our port before blindly resetting. */ if (FCP_IS_DEST_ALPD(fcp, (lipp & 0x00FF))) isp_fcp_reset_crn(isp, bus, /*tgt*/0, /*tgt_set*/ 0); isp_loop_changed(isp, bus); break; } case ISPASYNC_LIP: if (msg == NULL) msg = "LIP Received"; /* FALLTHROUGH */ case ISPASYNC_LOOP_DOWN: if (msg == NULL) msg = "LOOP Down"; /* FALLTHROUGH */ case ISPASYNC_LOOP_UP: if (msg == NULL) msg = "LOOP Up"; va_start(ap, cmd); bus = va_arg(ap, int); va_end(ap); isp_loop_changed(isp, bus); isp_prt(isp, ISP_LOGINFO, "Chan %d %s", bus, msg); break; case ISPASYNC_DEV_ARRIVED: va_start(ap, cmd); bus = va_arg(ap, int); lp = va_arg(ap, fcportdb_t *); va_end(ap); fc = ISP_FC_PC(isp, bus); tgt = FC_PORTDB_TGT(isp, bus, lp); isp_gen_role_str(buf, sizeof (buf), lp->prli_word3); isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "arrived"); if ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) && (lp->prli_word3 & PRLI_WD3_TARGET_FUNCTION)) { lp->is_target = 1; isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); isp_make_here(isp, lp, bus, tgt); } if ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) && (lp->prli_word3 & PRLI_WD3_INITIATOR_FUNCTION)) { lp->is_initiator = 1; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = tgt; adc->arrived = 1; xpt_async(AC_CONTRACT, fc->path, &ac); } break; case ISPASYNC_DEV_CHANGED: case ISPASYNC_DEV_STAYED: { int crn_reset_done; crn_reset_done = 0; va_start(ap, cmd); bus = va_arg(ap, int); lp = va_arg(ap, fcportdb_t *); va_end(ap); fc = ISP_FC_PC(isp, bus); tgt = FC_PORTDB_TGT(isp, bus, lp); isp_gen_role_str(buf, sizeof (buf), lp->new_prli_word3); if (cmd == ISPASYNC_DEV_CHANGED) isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->new_portid, lp->handle, buf, "changed"); else isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "stayed"); if (lp->is_target != ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) && (lp->new_prli_word3 & PRLI_WD3_TARGET_FUNCTION))) { lp->is_target = !lp->is_target; if (lp->is_target) { if (cmd == ISPASYNC_DEV_CHANGED) { isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); crn_reset_done = 1; } isp_make_here(isp, lp, bus, tgt); } else { isp_make_gone(isp, lp, bus, tgt); if (cmd == ISPASYNC_DEV_CHANGED) { isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); crn_reset_done = 1; } } } if (lp->is_initiator != ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) && (lp->new_prli_word3 & PRLI_WD3_INITIATOR_FUNCTION))) { lp->is_initiator = !lp->is_initiator; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = tgt; adc->arrived = lp->is_initiator; xpt_async(AC_CONTRACT, fc->path, &ac); } if ((cmd == ISPASYNC_DEV_CHANGED) && (crn_reset_done == 0)) isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); break; } case ISPASYNC_DEV_GONE: va_start(ap, cmd); bus = va_arg(ap, int); lp = va_arg(ap, fcportdb_t *); va_end(ap); fc = ISP_FC_PC(isp, bus); tgt = FC_PORTDB_TGT(isp, bus, lp); /* * If this has a virtual target or initiator set the isp_gdt * timer running on it to delay its departure. */ isp_gen_role_str(buf, sizeof (buf), lp->prli_word3); if (lp->is_target || lp->is_initiator) { lp->state = FC_PORTDB_STATE_ZOMBIE; lp->gone_timer = fc->gone_device_time; isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone zombie"); if (fc->ready && !callout_active(&fc->gdt)) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Starting Gone Device Timer with %u seconds time now %lu", bus, lp->gone_timer, (unsigned long)time_uptime); callout_reset(&fc->gdt, hz, isp_gdt, fc); } break; } isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone"); break; case ISPASYNC_CHANGE_NOTIFY: { char *msg; int evt, nphdl, nlstate, portid, reason; va_start(ap, cmd); bus = va_arg(ap, int); evt = va_arg(ap, int); if (evt == ISPASYNC_CHANGE_PDB) { nphdl = va_arg(ap, int); nlstate = va_arg(ap, int); reason = va_arg(ap, int); } else if (evt == ISPASYNC_CHANGE_SNS) { portid = va_arg(ap, int); } else { nphdl = NIL_HANDLE; nlstate = reason = 0; } va_end(ap); if (evt == ISPASYNC_CHANGE_PDB) { int tgt_set = 0; msg = "Port Database Changed"; isp_prt(isp, ISP_LOGINFO, "Chan %d %s (nphdl 0x%x state 0x%x reason 0x%x)", bus, msg, nphdl, nlstate, reason); /* * Port database syncs are not sufficient for * determining that logins or logouts are done on the * loop, but this information is directly available from * the reason code from the incoming mbox. We must reset * the fcp crn on these events according to FCP-4 */ switch (reason) { case PDB24XX_AE_IMPL_LOGO_1: case PDB24XX_AE_IMPL_LOGO_2: case PDB24XX_AE_IMPL_LOGO_3: case PDB24XX_AE_PLOGI_RCVD: case PDB24XX_AE_PRLI_RCVD: case PDB24XX_AE_PRLO_RCVD: case PDB24XX_AE_LOGO_RCVD: case PDB24XX_AE_PLOGI_DONE: case PDB24XX_AE_PRLI_DONE: /* * If the event is not global, twiddle tgt and * tgt_set to nominate only the target * associated with the nphdl. */ if (nphdl != PDB24XX_AE_GLOBAL) { /* Break if we don't yet have the pdb */ if (!isp_find_pdb_by_handle(isp, bus, nphdl, &lp)) break; tgt = FC_PORTDB_TGT(isp, bus, lp); tgt_set = 1; } isp_fcp_reset_crn(isp, bus, tgt, tgt_set); break; default: break; /* NOP */ } } else if (evt == ISPASYNC_CHANGE_SNS) { msg = "Name Server Database Changed"; isp_prt(isp, ISP_LOGINFO, "Chan %d %s (PortID 0x%06x)", bus, msg, portid); } else { msg = "Other Change Notify"; isp_prt(isp, ISP_LOGINFO, "Chan %d %s", bus, msg); } isp_loop_changed(isp, bus); break; } #ifdef ISP_TARGET_MODE case ISPASYNC_TARGET_NOTIFY: { isp_notify_t *notify; va_start(ap, cmd); notify = va_arg(ap, isp_notify_t *); va_end(ap); switch (notify->nt_ncode) { case NT_ABORT_TASK: case NT_ABORT_TASK_SET: case NT_CLEAR_ACA: case NT_CLEAR_TASK_SET: case NT_LUN_RESET: case NT_TARGET_RESET: case NT_QUERY_TASK_SET: case NT_QUERY_ASYNC_EVENT: /* * These are task management functions. */ isp_handle_platform_target_tmf(isp, notify); break; case NT_LIP_RESET: case NT_LINK_UP: case NT_LINK_DOWN: case NT_HBA_RESET: /* * No action need be taken here. */ break; case NT_SRR: isp_handle_platform_srr(isp, notify); break; default: isp_prt(isp, ISP_LOGALL, "target notify code 0x%x", notify->nt_ncode); isp_handle_platform_target_notify_ack(isp, notify, 0); break; } break; } case ISPASYNC_TARGET_NOTIFY_ACK: { void *inot; va_start(ap, cmd); inot = va_arg(ap, void *); va_end(ap); if (isp_notify_ack(isp, inot)) { isp_tna_t *tp = malloc(sizeof (*tp), M_DEVBUF, M_NOWAIT); if (tp) { tp->isp = isp; memcpy(tp->data, inot, sizeof (tp->data)); tp->not = tp->data; callout_init_mtx(&tp->timer, &isp->isp_lock, 0); callout_reset(&tp->timer, 5, isp_refire_notify_ack, tp); } else { isp_prt(isp, ISP_LOGERR, "you lose- cannot allocate a notify refire"); } } break; } case ISPASYNC_TARGET_ACTION: { isphdr_t *hp; va_start(ap, cmd); hp = va_arg(ap, isphdr_t *); va_end(ap); switch (hp->rqs_entry_type) { case RQSTYPE_ATIO: isp_handle_platform_atio7(isp, (at7_entry_t *)hp); break; case RQSTYPE_CTIO7: isp_handle_platform_ctio(isp, (ct7_entry_t *)hp); break; default: isp_prt(isp, ISP_LOGWARN, "%s: unhandled target action 0x%x", __func__, hp->rqs_entry_type); break; } break; } #endif case ISPASYNC_FW_CRASH: { uint16_t mbox1; mbox1 = ISP_READ(isp, OUTMAILBOX1); isp_prt(isp, ISP_LOGERR, "Internal Firmware Error @ RISC Address 0x%x", mbox1); #if 0 isp_reinit(isp, 1); isp_async(isp, ISPASYNC_FW_RESTARTED, NULL); #endif break; } default: isp_prt(isp, ISP_LOGERR, "unknown isp_async event %d", cmd); break; } } uint64_t isp_default_wwn(ispsoftc_t * isp, int chan, int isactive, int iswwnn) { uint64_t seed; struct isp_fc *fc = ISP_FC_PC(isp, chan); /* First try to use explicitly configured WWNs. */ seed = iswwnn ? fc->def_wwnn : fc->def_wwpn; if (seed) return (seed); /* Otherwise try to use WWNs from NVRAM. */ if (isactive) { seed = iswwnn ? FCPARAM(isp, chan)->isp_wwnn_nvram : FCPARAM(isp, chan)->isp_wwpn_nvram; if (seed) return (seed); } /* If still no WWNs, try to steal them from the first channel. */ if (chan > 0) { seed = iswwnn ? ISP_FC_PC(isp, 0)->def_wwnn : ISP_FC_PC(isp, 0)->def_wwpn; if (seed == 0) { seed = iswwnn ? FCPARAM(isp, 0)->isp_wwnn_nvram : FCPARAM(isp, 0)->isp_wwpn_nvram; } } /* If still nothing -- improvise. */ if (seed == 0) { seed = 0x400000007F000000ull + device_get_unit(isp->isp_dev); if (!iswwnn) seed ^= 0x0100000000000000ULL; } /* For additional channels we have to improvise even more. */ if (!iswwnn && chan > 0) { /* * We'll stick our channel number plus one first into bits * 57..59 and thence into bits 52..55 which allows for 8 bits * of channel which is enough for our maximum of 255 channels. */ seed ^= 0x0100000000000000ULL; seed ^= ((uint64_t) (chan + 1) & 0xf) << 56; seed ^= ((uint64_t) ((chan + 1) >> 4) & 0xf) << 52; } return (seed); } void isp_prt(ispsoftc_t *isp, int level, const char *fmt, ...) { int loc; char lbuf[200]; va_list ap; if (level != ISP_LOGALL && (level & isp->isp_dblev) == 0) { return; } snprintf(lbuf, sizeof (lbuf), "%s: ", device_get_nameunit(isp->isp_dev)); loc = strlen(lbuf); va_start(ap, fmt); vsnprintf(&lbuf[loc], sizeof (lbuf) - loc - 1, fmt, ap); va_end(ap); printf("%s\n", lbuf); } void isp_xs_prt(ispsoftc_t *isp, XS_T *xs, int level, const char *fmt, ...) { va_list ap; if (level != ISP_LOGALL && (level & isp->isp_dblev) == 0) { return; } xpt_print_path(xs->ccb_h.path); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); } uint64_t isp_nanotime_sub(struct timespec *b, struct timespec *a) { uint64_t elapsed; struct timespec x; timespecsub(b, a, &x); elapsed = GET_NANOSEC(&x); if (elapsed == 0) elapsed++; return (elapsed); } int isp_fc_scratch_acquire(ispsoftc_t *isp, int chan) { struct isp_fc *fc = ISP_FC_PC(isp, chan); if (fc->fcbsy) return (-1); fc->fcbsy = 1; return (0); } void isp_platform_intr(void *arg) { ispsoftc_t *isp = arg; ISP_LOCK(isp); ISP_RUN_ISR(isp); ISP_UNLOCK(isp); } void isp_platform_intr_resp(void *arg) { ispsoftc_t *isp = arg; ISP_LOCK(isp); isp_intr_respq(isp); ISP_UNLOCK(isp); /* We have handshake enabled, so explicitly complete interrupt */ ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT); } void isp_platform_intr_atio(void *arg) { ispsoftc_t *isp = arg; ISP_LOCK(isp); #ifdef ISP_TARGET_MODE isp_intr_atioq(isp); #endif ISP_UNLOCK(isp); /* We have handshake enabled, so explicitly complete interrupt */ ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT); } typedef struct { ispsoftc_t *isp; struct ccb_scsiio *csio; void *qe; int error; } mush_t; static void isp_dma2(void *arg, bus_dma_segment_t *dm_segs, int nseg, int error) { mush_t *mp = (mush_t *) arg; ispsoftc_t *isp= mp->isp; struct ccb_scsiio *csio = mp->csio; bus_dmasync_op_t op; if (error) { mp->error = error; return; } if ((csio->ccb_h.func_code == XPT_CONT_TARGET_IO) ^ ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN)) op = BUS_DMASYNC_PREREAD; else op = BUS_DMASYNC_PREWRITE; bus_dmamap_sync(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap, op); mp->error = ISP_SEND_CMD(isp, mp->qe, dm_segs, nseg); if (mp->error) isp_dmafree(isp, csio); } int isp_dmasetup(ispsoftc_t *isp, struct ccb_scsiio *csio, void *qe) { mush_t mp; int error; if (XS_XFRLEN(csio)) { mp.isp = isp; mp.csio = csio; mp.qe = qe; mp.error = 0; error = bus_dmamap_load_ccb(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap, (union ccb *)csio, isp_dma2, &mp, BUS_DMA_NOWAIT); if (error == 0) error = mp.error; } else { error = ISP_SEND_CMD(isp, qe, NULL, 0); } switch (error) { case 0: case CMD_COMPLETE: case CMD_EAGAIN: case CMD_RQLATER: break; case ENOMEM: error = CMD_EAGAIN; break; case EINVAL: case EFBIG: csio->ccb_h.status = CAM_REQ_INVALID; error = CMD_COMPLETE; break; default: csio->ccb_h.status = CAM_UNREC_HBA_ERROR; error = CMD_COMPLETE; break; } return (error); } void isp_dmafree(ispsoftc_t *isp, struct ccb_scsiio *csio) { bus_dmasync_op_t op; if (XS_XFRLEN(csio) == 0) return; if ((csio->ccb_h.func_code == XPT_CONT_TARGET_IO) ^ ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN)) op = BUS_DMASYNC_POSTREAD; else op = BUS_DMASYNC_POSTWRITE; bus_dmamap_sync(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap, op); bus_dmamap_unload(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap); } /* * Reset the command reference number for all LUNs on a specific target * (needed when a target arrives again) or for all targets on a port * (needed for events like a LIP). */ void isp_fcp_reset_crn(ispsoftc_t *isp, int chan, uint32_t tgt, int tgt_set) { struct isp_fc *fc = ISP_FC_PC(isp, chan); struct isp_nexus *nxp; int i; if (tgt_set == 0) isp_prt(isp, ISP_LOGDEBUG0, "Chan %d resetting CRN on all targets", chan); else isp_prt(isp, ISP_LOGDEBUG0, "Chan %d resetting CRN on target %u", chan, tgt); for (i = 0; i < NEXUS_HASH_WIDTH; i++) { for (nxp = fc->nexus_hash[i]; nxp != NULL; nxp = nxp->next) { if (tgt_set == 0 || tgt == nxp->tgt) nxp->crnseed = 0; } } } int isp_fcp_next_crn(ispsoftc_t *isp, uint8_t *crnp, XS_T *cmd) { lun_id_t lun; uint32_t chan, tgt; struct isp_fc *fc; struct isp_nexus *nxp; int idx; chan = XS_CHANNEL(cmd); tgt = XS_TGT(cmd); lun = XS_LUN(cmd); fc = ISP_FC_PC(isp, chan); idx = NEXUS_HASH(tgt, lun); nxp = fc->nexus_hash[idx]; while (nxp) { if (nxp->tgt == tgt && nxp->lun == lun) break; nxp = nxp->next; } if (nxp == NULL) { nxp = fc->nexus_free_list; if (nxp == NULL) { nxp = malloc(sizeof (struct isp_nexus), M_DEVBUF, M_ZERO|M_NOWAIT); if (nxp == NULL) { return (-1); } } else { fc->nexus_free_list = nxp->next; } nxp->tgt = tgt; nxp->lun = lun; nxp->next = fc->nexus_hash[idx]; fc->nexus_hash[idx] = nxp; } if (nxp->crnseed == 0) nxp->crnseed = 1; *crnp = nxp->crnseed++; return (0); } /* * We enter with the lock held */ void isp_timer(void *arg) { ispsoftc_t *isp = arg; #ifdef ISP_TARGET_MODE isp_tmcmd_restart(isp); #endif callout_reset(&isp->isp_osinfo.tmo, isp_timer_count, isp_timer, isp); } #ifdef ISP_TARGET_MODE isp_ecmd_t * isp_get_ecmd(ispsoftc_t *isp) { isp_ecmd_t *ecmd = isp->isp_osinfo.ecmd_free; if (ecmd) { isp->isp_osinfo.ecmd_free = ecmd->next; } return (ecmd); } void isp_put_ecmd(ispsoftc_t *isp, isp_ecmd_t *ecmd) { ecmd->next = isp->isp_osinfo.ecmd_free; isp->isp_osinfo.ecmd_free = ecmd; } #endif diff --git a/sys/dev/mpr/mpr.c b/sys/dev/mpr/mpr.c index 5d57be27f9b2..264123ed62ec 100644 --- a/sys/dev/mpr/mpr.c +++ b/sys/dev/mpr/mpr.c @@ -1,4014 +1,4014 @@ /*- * Copyright (c) 2009 Yahoo! Inc. * Copyright (c) 2011-2015 LSI Corp. * Copyright (c) 2013-2016 Avago Technologies * Copyright 2000-2020 Broadcom Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Broadcom Inc. (LSI) MPT-Fusion Host Adapter FreeBSD * */ #include __FBSDID("$FreeBSD$"); /* Communications core for Avago Technologies (LSI) MPT3 */ /* TODO Move headers to mprvar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mpr_diag_reset(struct mpr_softc *sc, int sleep_flag); static int mpr_init_queues(struct mpr_softc *sc); static void mpr_resize_queues(struct mpr_softc *sc); static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag); static int mpr_transition_operational(struct mpr_softc *sc); static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching); static void mpr_iocfacts_free(struct mpr_softc *sc); static void mpr_startup(void *arg); static int mpr_send_iocinit(struct mpr_softc *sc); static int mpr_alloc_queues(struct mpr_softc *sc); static int mpr_alloc_hw_queues(struct mpr_softc *sc); static int mpr_alloc_replies(struct mpr_softc *sc); static int mpr_alloc_requests(struct mpr_softc *sc); static int mpr_alloc_nvme_prp_pages(struct mpr_softc *sc); static int mpr_attach_log(struct mpr_softc *sc); static __inline void mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm); static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply); static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm); static void mpr_periodic(void *); static int mpr_reregister_events(struct mpr_softc *sc); static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm); static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts); static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag); static int mpr_debug_sysctl(SYSCTL_HANDLER_ARGS); static int mpr_dump_reqs(SYSCTL_HANDLER_ARGS); static void mpr_parse_debug(struct mpr_softc *sc, char *list); static void adjust_iocfacts_endianness(MPI2_IOC_FACTS_REPLY *facts); SYSCTL_NODE(_hw, OID_AUTO, mpr, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MPR Driver Parameters"); MALLOC_DEFINE(M_MPR, "mpr", "mpr driver memory"); /* * Do a "Diagnostic Reset" aka a hard reset. This should get the chip out of * any state and back to its initialization state machine. */ static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d }; /* * Added this union to smoothly convert le64toh cm->cm_desc.Words. * Compiler only supports uint64_t to be passed as an argument. * Otherwise it will throw this error: * "aggregate value used where an integer was expected" */ typedef union { u64 word; struct { u32 low; u32 high; } u; } request_descriptor_t; /* Rate limit chain-fail messages to 1 per minute */ static struct timeval mpr_chainfail_interval = { 60, 0 }; /* * sleep_flag can be either CAN_SLEEP or NO_SLEEP. * If this function is called from process context, it can sleep * and there is no harm to sleep, in case if this fuction is called * from Interrupt handler, we can not sleep and need NO_SLEEP flag set. * based on sleep flags driver will call either msleep, pause or DELAY. * msleep and pause are of same variant, but pause is used when mpr_mtx * is not hold by driver. */ static int mpr_diag_reset(struct mpr_softc *sc,int sleep_flag) { uint32_t reg; int i, error, tries = 0; uint8_t first_wait_done = FALSE; mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); /* Clear any pending interrupts */ mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* * Force NO_SLEEP for threads prohibited to sleep * e.a Thread from interrupt handler are prohibited to sleep. */ if (curthread->td_no_sleeping) sleep_flag = NO_SLEEP; mpr_dprint(sc, MPR_INIT, "sequence start, sleep_flag=%d\n", sleep_flag); /* Push the magic sequence */ error = ETIMEDOUT; while (tries++ < 20) { for (i = 0; i < sizeof(mpt2_reset_magic); i++) mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, mpt2_reset_magic[i]); /* wait 100 msec */ if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdiag", hz/10); else if (sleep_flag == CAN_SLEEP) pause("mprdiag", hz/10); else DELAY(100 * 1000); reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) { error = 0; break; } } if (error) { mpr_dprint(sc, MPR_INIT, "sequence failed, error=%d, exit\n", error); return (error); } /* Send the actual reset. XXX need to refresh the reg? */ reg |= MPI2_DIAG_RESET_ADAPTER; mpr_dprint(sc, MPR_INIT, "sequence success, sending reset, reg= 0x%x\n", reg); mpr_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET, reg); /* Wait up to 300 seconds in 50ms intervals */ error = ETIMEDOUT; for (i = 0; i < 6000; i++) { /* * Wait 50 msec. If this is the first time through, wait 256 * msec to satisfy Diag Reset timing requirements. */ if (first_wait_done) { if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdiag", hz/20); else if (sleep_flag == CAN_SLEEP) pause("mprdiag", hz/20); else DELAY(50 * 1000); } else { DELAY(256 * 1000); first_wait_done = TRUE; } /* * Check for the RESET_ADAPTER bit to be cleared first, then * wait for the RESET state to be cleared, which takes a little * longer. */ reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_RESET_ADAPTER) { continue; } reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) { error = 0; break; } } if (error) { mpr_dprint(sc, MPR_INIT, "reset failed, error= %d, exit\n", error); return (error); } mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0); mpr_dprint(sc, MPR_INIT, "diag reset success, exit\n"); return (0); } static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag) { int error; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); error = 0; mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET << MPI2_DOORBELL_FUNCTION_SHIFT); if (mpr_wait_db_ack(sc, 5, sleep_flag) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Doorbell handshake failed\n"); error = ETIMEDOUT; } mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } static int mpr_transition_ready(struct mpr_softc *sc) { uint32_t reg, state; int error, tries = 0; int sleep_flags; MPR_FUNCTRACE(sc); /* If we are in attach call, do not sleep */ sleep_flags = (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE) ? CAN_SLEEP : NO_SLEEP; error = 0; mpr_dprint(sc, MPR_INIT, "%s entered, sleep_flags= %d\n", __func__, sleep_flags); while (tries++ < 1200) { reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_dprint(sc, MPR_INIT, " Doorbell= 0x%x\n", reg); /* * Ensure the IOC is ready to talk. If it's not, try * resetting it. */ if (reg & MPI2_DOORBELL_USED) { mpr_dprint(sc, MPR_INIT, " Not ready, sending diag " "reset\n"); mpr_diag_reset(sc, sleep_flags); DELAY(50000); continue; } /* Is the adapter owned by another peer? */ if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) == (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC is under the " "control of another peer host, aborting " "initialization.\n"); error = ENXIO; break; } state = reg & MPI2_IOC_STATE_MASK; if (state == MPI2_IOC_STATE_READY) { /* Ready to go! */ error = 0; break; } else if (state == MPI2_IOC_STATE_FAULT) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC in fault " "state 0x%x, resetting\n", state & MPI2_DOORBELL_FAULT_CODE_MASK); mpr_diag_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_OPERATIONAL) { /* Need to take ownership */ mpr_message_unit_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_RESET) { /* Wait a bit, IOC might be in transition */ mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC in unexpected reset state\n"); } else { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC in unknown state 0x%x\n", state); error = EINVAL; break; } /* Wait 50ms for things to settle down. */ DELAY(50000); } if (error) mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Cannot transition IOC to ready\n"); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } static int mpr_transition_operational(struct mpr_softc *sc) { uint32_t reg, state; int error; MPR_FUNCTRACE(sc); error = 0; reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_dprint(sc, MPR_INIT, "%s entered, Doorbell= 0x%x\n", __func__, reg); state = reg & MPI2_IOC_STATE_MASK; if (state != MPI2_IOC_STATE_READY) { mpr_dprint(sc, MPR_INIT, "IOC not ready\n"); if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "failed to transition ready, exit\n"); return (error); } } error = mpr_send_iocinit(sc); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } static void mpr_resize_queues(struct mpr_softc *sc) { u_int reqcr, prireqcr, maxio, sges_per_frame, chain_seg_size; /* * Size the queues. Since the reply queues always need one free * entry, we'll deduct one reply message here. The LSI documents * suggest instead to add a count to the request queue, but I think * that it's better to deduct from reply queue. */ prireqcr = MAX(1, sc->max_prireqframes); prireqcr = MIN(prireqcr, sc->facts->HighPriorityCredit); reqcr = MAX(2, sc->max_reqframes); reqcr = MIN(reqcr, sc->facts->RequestCredit); sc->num_reqs = prireqcr + reqcr; sc->num_prireqs = prireqcr; sc->num_replies = MIN(sc->max_replyframes + sc->max_evtframes, sc->facts->MaxReplyDescriptorPostQueueDepth) - 1; /* Store the request frame size in bytes rather than as 32bit words */ sc->reqframesz = sc->facts->IOCRequestFrameSize * 4; /* * Gen3 and beyond uses the IOCMaxChainSegmentSize from IOC Facts to * get the size of a Chain Frame. Previous versions use the size as a * Request Frame for the Chain Frame size. If IOCMaxChainSegmentSize * is 0, use the default value. The IOCMaxChainSegmentSize is the * number of 16-byte elelements that can fit in a Chain Frame, which is * the size of an IEEE Simple SGE. */ if (sc->facts->MsgVersion >= MPI2_VERSION_02_05) { chain_seg_size = sc->facts->IOCMaxChainSegmentSize; if (chain_seg_size == 0) chain_seg_size = MPR_DEFAULT_CHAIN_SEG_SIZE; sc->chain_frame_size = chain_seg_size * MPR_MAX_CHAIN_ELEMENT_SIZE; } else { sc->chain_frame_size = sc->reqframesz; } /* * Max IO Size is Page Size * the following: * ((SGEs per frame - 1 for chain element) * Max Chain Depth) * + 1 for no chain needed in last frame * * If user suggests a Max IO size to use, use the smaller of the * user's value and the calculated value as long as the user's * value is larger than 0. The user's value is in pages. */ sges_per_frame = sc->chain_frame_size/sizeof(MPI2_IEEE_SGE_SIMPLE64)-1; maxio = (sges_per_frame * sc->facts->MaxChainDepth + 1) * PAGE_SIZE; /* * If I/O size limitation requested then use it and pass up to CAM. * If not, use maxphys as an optimization hint, but report HW limit. */ if (sc->max_io_pages > 0) { maxio = min(maxio, sc->max_io_pages * PAGE_SIZE); sc->maxio = maxio; } else { sc->maxio = maxio; maxio = min(maxio, maxphys); } sc->num_chains = (maxio / PAGE_SIZE + sges_per_frame - 2) / sges_per_frame * reqcr; if (sc->max_chains > 0 && sc->max_chains < sc->num_chains) sc->num_chains = sc->max_chains; /* * Figure out the number of MSIx-based queues. If the firmware or * user has done something crazy and not allowed enough credit for * the queues to be useful then don't enable multi-queue. */ if (sc->facts->MaxMSIxVectors < 2) sc->msi_msgs = 1; if (sc->msi_msgs > 1) { sc->msi_msgs = MIN(sc->msi_msgs, mp_ncpus); sc->msi_msgs = MIN(sc->msi_msgs, sc->facts->MaxMSIxVectors); if (sc->num_reqs / sc->msi_msgs < 2) sc->msi_msgs = 1; } mpr_dprint(sc, MPR_INIT, "Sized queues to q=%d reqs=%d replies=%d\n", sc->msi_msgs, sc->num_reqs, sc->num_replies); } /* * This is called during attach and when re-initializing due to a Diag Reset. * IOC Facts is used to allocate many of the structures needed by the driver. * If called from attach, de-allocation is not required because the driver has * not allocated any structures yet, but if called from a Diag Reset, previously * allocated structures based on IOC Facts will need to be freed and re- * allocated bases on the latest IOC Facts. */ static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching) { int error; Mpi2IOCFactsReply_t saved_facts; uint8_t saved_mode, reallocating; mpr_dprint(sc, MPR_INIT|MPR_TRACE, "%s entered\n", __func__); /* Save old IOC Facts and then only reallocate if Facts have changed */ if (!attaching) { bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY)); } /* * Get IOC Facts. In all cases throughout this function, panic if doing * a re-initialization and only return the error if attaching so the OS * can handle it. */ if ((error = mpr_get_iocfacts(sc, sc->facts)) != 0) { if (attaching) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to get " "IOC Facts with error %d, exit\n", error); return (error); } else { panic("%s failed to get IOC Facts with error %d\n", __func__, error); } } MPR_DPRINT_PAGE(sc, MPR_XINFO, iocfacts, sc->facts); snprintf(sc->fw_version, sizeof(sc->fw_version), "%02d.%02d.%02d.%02d", sc->facts->FWVersion.Struct.Major, sc->facts->FWVersion.Struct.Minor, sc->facts->FWVersion.Struct.Unit, sc->facts->FWVersion.Struct.Dev); snprintf(sc->msg_version, sizeof(sc->msg_version), "%d.%d", (sc->facts->MsgVersion & MPI2_IOCFACTS_MSGVERSION_MAJOR_MASK) >> MPI2_IOCFACTS_MSGVERSION_MAJOR_SHIFT, (sc->facts->MsgVersion & MPI2_IOCFACTS_MSGVERSION_MINOR_MASK) >> MPI2_IOCFACTS_MSGVERSION_MINOR_SHIFT); mpr_dprint(sc, MPR_INFO, "Firmware: %s, Driver: %s\n", sc->fw_version, MPR_DRIVER_VERSION); mpr_dprint(sc, MPR_INFO, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities, "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf" "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR" "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc" "\22FastPath" "\23RDPQArray" "\24AtomicReqDesc" "\25PCIeSRIOV"); /* * If the chip doesn't support event replay then a hard reset will be * required to trigger a full discovery. Do the reset here then * retransition to Ready. A hard reset might have already been done, * but it doesn't hurt to do it again. Only do this if attaching, not * for a Diag Reset. */ if (attaching && ((sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0)) { mpr_dprint(sc, MPR_INIT, "No event replay, resetting\n"); mpr_diag_reset(sc, NO_SLEEP); if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to " "transition to ready with error %d, exit\n", error); return (error); } } /* * Set flag if IR Firmware is loaded. If the RAID Capability has * changed from the previous IOC Facts, log a warning, but only if * checking this after a Diag Reset and not during attach. */ saved_mode = sc->ir_firmware; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) sc->ir_firmware = 1; if (!attaching) { if (sc->ir_firmware != saved_mode) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "new IR/IT mode " "in IOC Facts does not match previous mode\n"); } } /* Only deallocate and reallocate if relevant IOC Facts have changed */ reallocating = FALSE; sc->mpr_flags &= ~MPR_FLAGS_REALLOCATED; if ((!attaching) && ((saved_facts.MsgVersion != sc->facts->MsgVersion) || (saved_facts.HeaderVersion != sc->facts->HeaderVersion) || (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) || (saved_facts.RequestCredit != sc->facts->RequestCredit) || (saved_facts.ProductID != sc->facts->ProductID) || (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) || (saved_facts.IOCRequestFrameSize != sc->facts->IOCRequestFrameSize) || (saved_facts.IOCMaxChainSegmentSize != sc->facts->IOCMaxChainSegmentSize) || (saved_facts.MaxTargets != sc->facts->MaxTargets) || (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) || (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) || (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) || (saved_facts.MaxReplyDescriptorPostQueueDepth != sc->facts->MaxReplyDescriptorPostQueueDepth) || (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) || (saved_facts.MaxVolumes != sc->facts->MaxVolumes) || (saved_facts.MaxPersistentEntries != sc->facts->MaxPersistentEntries))) { reallocating = TRUE; /* Record that we reallocated everything */ sc->mpr_flags |= MPR_FLAGS_REALLOCATED; } /* * Some things should be done if attaching or re-allocating after a Diag * Reset, but are not needed after a Diag Reset if the FW has not * changed. */ if (attaching || reallocating) { /* * Check if controller supports FW diag buffers and set flag to * enable each type. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED]. enabled = TRUE; /* * Set flags for some supported items. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP) sc->eedp_enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) sc->control_TLR = TRUE; if ((sc->facts->IOCCapabilities & MPI26_IOCFACTS_CAPABILITY_ATOMIC_REQ) && (sc->mpr_flags & MPR_FLAGS_SEA_IOC)) sc->atomic_desc_capable = TRUE; mpr_resize_queues(sc); /* * Initialize all Tail Queues */ TAILQ_INIT(&sc->req_list); TAILQ_INIT(&sc->high_priority_req_list); TAILQ_INIT(&sc->chain_list); TAILQ_INIT(&sc->prp_page_list); TAILQ_INIT(&sc->tm_list); } /* * If doing a Diag Reset and the FW is significantly different * (reallocating will be set above in IOC Facts comparison), then all * buffers based on the IOC Facts will need to be freed before they are * reallocated. */ if (reallocating) { mpr_iocfacts_free(sc); mprsas_realloc_targets(sc, saved_facts.MaxTargets + saved_facts.MaxVolumes); } /* * Any deallocation has been completed. Now start reallocating * if needed. Will only need to reallocate if attaching or if the new * IOC Facts are different from the previous IOC Facts after a Diag * Reset. Targets have already been allocated above if needed. */ error = 0; while (attaching || reallocating) { if ((error = mpr_alloc_hw_queues(sc)) != 0) break; if ((error = mpr_alloc_replies(sc)) != 0) break; if ((error = mpr_alloc_requests(sc)) != 0) break; if ((error = mpr_alloc_queues(sc)) != 0) break; break; } if (error) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Failed to alloc queues with error %d\n", error); mpr_free(sc); return (error); } /* Always initialize the queues */ bzero(sc->free_queue, sc->fqdepth * 4); mpr_init_queues(sc); /* * Always get the chip out of the reset state, but only panic if not * attaching. If attaching and there is an error, that is handled by * the OS. */ error = mpr_transition_operational(sc); if (error != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to " "transition to operational with error %d\n", error); mpr_free(sc); return (error); } /* * Finish the queue initialization. * These are set here instead of in mpr_init_queues() because the * IOC resets these values during the state transition in * mpr_transition_operational(). The free index is set to 1 * because the corresponding index in the IOC is set to 0, and the * IOC treats the queues as full if both are set to the same value. * Hence the reason that the queue can't hold all of the possible * replies. */ sc->replypostindex = 0; mpr_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex); mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0); /* * Attach the subsystems so they can prepare their event masks. * XXX Should be dynamic so that IM/IR and user modules can attach */ error = 0; while (attaching) { mpr_dprint(sc, MPR_INIT, "Attaching subsystems\n"); if ((error = mpr_attach_log(sc)) != 0) break; if ((error = mpr_attach_sas(sc)) != 0) break; if ((error = mpr_attach_user(sc)) != 0) break; break; } if (error) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Failed to attach all subsystems: error %d\n", error); mpr_free(sc); return (error); } /* * XXX If the number of MSI-X vectors changes during re-init, this * won't see it and adjust. */ if (attaching && (error = mpr_pci_setup_interrupts(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Failed to setup interrupts\n"); mpr_free(sc); return (error); } return (error); } /* * This is called if memory is being free (during detach for example) and when * buffers need to be reallocated due to a Diag Reset. */ static void mpr_iocfacts_free(struct mpr_softc *sc) { struct mpr_command *cm; int i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if (sc->free_busaddr != 0) bus_dmamap_unload(sc->queues_dmat, sc->queues_map); if (sc->free_queue != NULL) bus_dmamem_free(sc->queues_dmat, sc->free_queue, sc->queues_map); if (sc->queues_dmat != NULL) bus_dma_tag_destroy(sc->queues_dmat); if (sc->chain_frames != NULL) { bus_dmamap_unload(sc->chain_dmat, sc->chain_map); bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); } if (sc->chain_dmat != NULL) bus_dma_tag_destroy(sc->chain_dmat); if (sc->sense_busaddr != 0) bus_dmamap_unload(sc->sense_dmat, sc->sense_map); if (sc->sense_frames != NULL) bus_dmamem_free(sc->sense_dmat, sc->sense_frames, sc->sense_map); if (sc->sense_dmat != NULL) bus_dma_tag_destroy(sc->sense_dmat); if (sc->prp_page_busaddr != 0) bus_dmamap_unload(sc->prp_page_dmat, sc->prp_page_map); if (sc->prp_pages != NULL) bus_dmamem_free(sc->prp_page_dmat, sc->prp_pages, sc->prp_page_map); if (sc->prp_page_dmat != NULL) bus_dma_tag_destroy(sc->prp_page_dmat); if (sc->reply_busaddr != 0) bus_dmamap_unload(sc->reply_dmat, sc->reply_map); if (sc->reply_frames != NULL) bus_dmamem_free(sc->reply_dmat, sc->reply_frames, sc->reply_map); if (sc->reply_dmat != NULL) bus_dma_tag_destroy(sc->reply_dmat); if (sc->req_busaddr != 0) bus_dmamap_unload(sc->req_dmat, sc->req_map); if (sc->req_frames != NULL) bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map); if (sc->req_dmat != NULL) bus_dma_tag_destroy(sc->req_dmat); if (sc->chains != NULL) free(sc->chains, M_MPR); if (sc->prps != NULL) free(sc->prps, M_MPR); if (sc->commands != NULL) { for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap); } free(sc->commands, M_MPR); } if (sc->buffer_dmat != NULL) bus_dma_tag_destroy(sc->buffer_dmat); mpr_pci_free_interrupts(sc); free(sc->queues, M_MPR); sc->queues = NULL; } /* * The terms diag reset and hard reset are used interchangeably in the MPI * docs to mean resetting the controller chip. In this code diag reset * cleans everything up, and the hard reset function just sends the reset * sequence to the chip. This should probably be refactored so that every * subsystem gets a reset notification of some sort, and can clean up * appropriately. */ int mpr_reinit(struct mpr_softc *sc) { int error; struct mprsas_softc *sassc; sassc = sc->sassc; MPR_FUNCTRACE(sc); mtx_assert(&sc->mpr_mtx, MA_OWNED); mpr_dprint(sc, MPR_INIT|MPR_INFO, "Reinitializing controller\n"); if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) { mpr_dprint(sc, MPR_INIT, "Reset already in progress\n"); return 0; } /* * Make sure the completion callbacks can recognize they're getting * a NULL cm_reply due to a reset. */ sc->mpr_flags |= MPR_FLAGS_DIAGRESET; /* * Mask interrupts here. */ mpr_dprint(sc, MPR_INIT, "Masking interrupts and resetting\n"); mpr_mask_intr(sc); error = mpr_diag_reset(sc, CAN_SLEEP); if (error != 0) { panic("%s hard reset failed with error %d\n", __func__, error); } /* Restore the PCI state, including the MSI-X registers */ mpr_pci_restore(sc); /* Give the I/O subsystem special priority to get itself prepared */ mprsas_handle_reinit(sc); /* * Get IOC Facts and allocate all structures based on this information. * The attach function will also call mpr_iocfacts_allocate at startup. * If relevant values have changed in IOC Facts, this function will free * all of the memory based on IOC Facts and reallocate that memory. */ if ((error = mpr_iocfacts_allocate(sc, FALSE)) != 0) { panic("%s IOC Facts based allocation failed with error %d\n", __func__, error); } /* * Mapping structures will be re-allocated after getting IOC Page8, so * free these structures here. */ mpr_mapping_exit(sc); /* * The static page function currently read is IOC Page8. Others can be * added in future. It's possible that the values in IOC Page8 have * changed after a Diag Reset due to user modification, so always read * these. Interrupts are masked, so unmask them before getting config * pages. */ mpr_unmask_intr(sc); sc->mpr_flags &= ~MPR_FLAGS_DIAGRESET; mpr_base_static_config_pages(sc); /* * Some mapping info is based in IOC Page8 data, so re-initialize the * mapping tables. */ mpr_mapping_initialize(sc); /* * Restart will reload the event masks clobbered by the reset, and * then enable the port. */ mpr_reregister_events(sc); /* the end of discovery will release the simq, so we're done. */ mpr_dprint(sc, MPR_INIT|MPR_XINFO, "Finished sc %p post %u free %u\n", sc, sc->replypostindex, sc->replyfreeindex); mprsas_release_simq_reinit(sassc); mpr_dprint(sc, MPR_INIT, "%s exit error= %d\n", __func__, error); return 0; } /* Wait for the chip to ACK a word that we've put into its FIFO * Wait for seconds. In single loop wait for busy loop * for 500 microseconds. * Total is [ 0.5 * (2000 * ) ] in miliseconds. * */ static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag) { u32 cntdn, count; u32 int_status; u32 doorbell; count = 0; cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout; do { int_status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { mpr_dprint(sc, MPR_TRACE, "%s: successful count(%d), " "timeout(%d)\n", __func__, count, timeout); return 0; } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { doorbell = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mpr_dprint(sc, MPR_FAULT, "fault_state(0x%04x)!\n", doorbell); return (EFAULT); } } else if (int_status == 0xFFFFFFFF) goto out; /* * If it can sleep, sleep for 1 milisecond, else busy loop for * 0.5 milisecond */ if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdba", hz/1000); else if (sleep_flag == CAN_SLEEP) pause("mprdba", hz/1000); else DELAY(500); count++; } while (--cntdn); out: mpr_dprint(sc, MPR_FAULT, "%s: failed due to timeout count(%d), " "int_status(%x)!\n", __func__, count, int_status); return (ETIMEDOUT); } /* Wait for the chip to signal that the next word in its FIFO can be fetched */ static int mpr_wait_db_int(struct mpr_softc *sc) { int retry; for (retry = 0; retry < MPR_DB_MAX_WAIT; retry++) { if ((mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) & MPI2_HIS_IOC2SYS_DB_STATUS) != 0) return (0); DELAY(2000); } return (ETIMEDOUT); } /* Step through the synchronous command state machine, i.e. "Doorbell mode" */ static int mpr_request_sync(struct mpr_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, int req_sz, int reply_sz, int timeout) { uint32_t *data32; uint16_t *data16; int i, count, ioc_sz, residual; int sleep_flags = CAN_SLEEP; if (curthread->td_no_sleeping) sleep_flags = NO_SLEEP; /* Step 1 */ mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Step 2 */ if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) return (EBUSY); /* Step 3 * Announce that a message is coming through the doorbell. Messages * are pushed at 32bit words, so round up if needed. */ count = (req_sz + 3) / 4; mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) | (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT)); /* Step 4 */ if (mpr_wait_db_int(sc) || (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) { mpr_dprint(sc, MPR_FAULT, "Doorbell failed to activate\n"); return (ENXIO); } mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed\n"); return (ENXIO); } /* Step 5 */ /* Clock out the message data synchronously in 32-bit dwords*/ data32 = (uint32_t *)req; for (i = 0; i < count; i++) { mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i])); if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout while writing doorbell\n"); return (ENXIO); } } /* Step 6 */ /* Clock in the reply in 16-bit words. The total length of the * message is always in the 4th byte, so clock out the first 2 words * manually, then loop the rest. */ data16 = (uint16_t *)reply; if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 0\n"); return (ENXIO); } /* * If in a BE platform, swap bytes using le16toh to not * disturb 8 bit field neighbors in destination structure * pointed by data16. */ data16[0] = le16toh(mpr_regread(sc, MPI2_DOORBELL_OFFSET)) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 1\n"); return (ENXIO); } data16[1] = le16toh(mpr_regread(sc, MPI2_DOORBELL_OFFSET)) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Number of 32bit words in the message */ ioc_sz = reply->MsgLength; /* * Figure out how many 16bit words to clock in without overrunning. * The precision loss with dividing reply_sz can safely be * ignored because the messages can only be multiples of 32bits. */ residual = 0; count = MIN((reply_sz / 4), ioc_sz) * 2; if (count < ioc_sz * 2) { residual = ioc_sz * 2 - count; mpr_dprint(sc, MPR_ERROR, "Driver error, throwing away %d " "residual message words\n", residual); } for (i = 2; i < count; i++) { if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell %d\n", i); return (ENXIO); } data16[i] = le16toh(mpr_regread(sc, MPI2_DOORBELL_OFFSET)) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* * Pull out residual words that won't fit into the provided buffer. * This keeps the chip from hanging due to a driver programming * error. */ while (residual--) { if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell\n"); return (ENXIO); } (void)mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* Step 7 */ if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout waiting to exit doorbell\n"); return (ENXIO); } if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) mpr_dprint(sc, MPR_FAULT, "Warning, doorbell still active\n"); mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); return (0); } static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm) { request_descriptor_t rd; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_TRACE, "SMID %u cm %p ccb %p\n", cm->cm_desc.Default.SMID, cm, cm->cm_ccb); if (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE && !(sc->mpr_flags & MPR_FLAGS_SHUTDOWN)) mtx_assert(&sc->mpr_mtx, MA_OWNED); if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("command not busy\n")); cm->cm_state = MPR_CM_STATE_INQUEUE; if (sc->atomic_desc_capable) { rd.u.low = cm->cm_desc.Words.Low; mpr_regwrite(sc, MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET, rd.u.low); } else { rd.u.low = htole32(cm->cm_desc.Words.Low); rd.u.high = htole32(cm->cm_desc.Words.High); mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, rd.u.low); mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET, rd.u.high); } } /* * Ioc facts are read in 16 bit words and and stored with le16toh, * this takes care of proper U8 fields endianness in * MPI2_IOC_FACTS_REPLY, but we still need to swap back U16 fields. */ static void adjust_iocfacts_endianness(MPI2_IOC_FACTS_REPLY *facts) { facts->HeaderVersion = le16toh(facts->HeaderVersion); facts->Reserved1 = le16toh(facts->Reserved1); facts->IOCExceptions = le16toh(facts->IOCExceptions); facts->IOCStatus = le16toh(facts->IOCStatus); facts->IOCLogInfo = le32toh(facts->IOCLogInfo); facts->RequestCredit = le16toh(facts->RequestCredit); facts->ProductID = le16toh(facts->ProductID); facts->IOCCapabilities = le32toh(facts->IOCCapabilities); facts->IOCRequestFrameSize = le16toh(facts->IOCRequestFrameSize); facts->IOCMaxChainSegmentSize = le16toh(facts->IOCMaxChainSegmentSize); facts->MaxInitiators = le16toh(facts->MaxInitiators); facts->MaxTargets = le16toh(facts->MaxTargets); facts->MaxSasExpanders = le16toh(facts->MaxSasExpanders); facts->MaxEnclosures = le16toh(facts->MaxEnclosures); facts->ProtocolFlags = le16toh(facts->ProtocolFlags); facts->HighPriorityCredit = le16toh(facts->HighPriorityCredit); facts->MaxReplyDescriptorPostQueueDepth = le16toh(facts->MaxReplyDescriptorPostQueueDepth); facts->MaxDevHandle = le16toh(facts->MaxDevHandle); facts->MaxPersistentEntries = le16toh(facts->MaxPersistentEntries); facts->MinDevHandle = le16toh(facts->MinDevHandle); } /* * Just the FACTS, ma'am. */ static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts) { MPI2_DEFAULT_REPLY *reply; MPI2_IOC_FACTS_REQUEST request; int error, req_sz, reply_sz; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); req_sz = sizeof(MPI2_IOC_FACTS_REQUEST); reply_sz = sizeof(MPI2_IOC_FACTS_REPLY); reply = (MPI2_DEFAULT_REPLY *)facts; bzero(&request, req_sz); request.Function = MPI2_FUNCTION_IOC_FACTS; error = mpr_request_sync(sc, &request, reply, req_sz, reply_sz, 5); adjust_iocfacts_endianness(facts); mpr_dprint(sc, MPR_TRACE, "facts->IOCCapabilities 0x%x\n", facts->IOCCapabilities); mpr_dprint(sc, MPR_INIT, "%s exit, error= %d\n", __func__, error); return (error); } static int mpr_send_iocinit(struct mpr_softc *sc) { MPI2_IOC_INIT_REQUEST init; MPI2_DEFAULT_REPLY reply; int req_sz, reply_sz, error; struct timeval now; uint64_t time_in_msec; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); /* Do a quick sanity check on proper initialization */ if ((sc->pqdepth == 0) || (sc->fqdepth == 0) || (sc->reqframesz == 0) || (sc->replyframesz == 0)) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Driver not fully initialized for IOCInit\n"); return (EINVAL); } req_sz = sizeof(MPI2_IOC_INIT_REQUEST); reply_sz = sizeof(MPI2_IOC_INIT_REPLY); bzero(&init, req_sz); bzero(&reply, reply_sz); /* * Fill in the init block. Note that most addresses are * deliberately in the lower 32bits of memory. This is a micro- * optimzation for PCI/PCIX, though it's not clear if it helps PCIe. */ init.Function = MPI2_FUNCTION_IOC_INIT; init.WhoInit = MPI2_WHOINIT_HOST_DRIVER; init.MsgVersion = htole16(MPI2_VERSION); init.HeaderVersion = htole16(MPI2_HEADER_VERSION); init.SystemRequestFrameSize = htole16((uint16_t)(sc->reqframesz / 4)); init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth); init.ReplyFreeQueueDepth = htole16(sc->fqdepth); init.SenseBufferAddressHigh = 0; init.SystemReplyAddressHigh = 0; init.SystemRequestFrameBaseAddress.High = 0; init.SystemRequestFrameBaseAddress.Low = htole32((uint32_t)sc->req_busaddr); init.ReplyDescriptorPostQueueAddress.High = 0; init.ReplyDescriptorPostQueueAddress.Low = htole32((uint32_t)sc->post_busaddr); init.ReplyFreeQueueAddress.High = 0; init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr); getmicrotime(&now); time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000); init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF); init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF); init.HostPageSize = HOST_PAGE_SIZE_4K; error = mpr_request_sync(sc, &init, &reply, req_sz, reply_sz, 5); if ((le16toh(reply.IOCStatus) & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; mpr_dprint(sc, MPR_INIT, "IOCInit status= 0x%x\n", le16toh(reply.IOCStatus)); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } void mpr_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } void mpr_memaddr_wait_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mpr_busdma_context *ctx; int need_unload, need_free; ctx = (struct mpr_busdma_context *)arg; need_unload = 0; need_free = 0; mpr_lock(ctx->softc); ctx->error = error; ctx->completed = 1; if ((error == 0) && (ctx->abandoned == 0)) { *ctx->addr = segs[0].ds_addr; } else { if (nsegs != 0) need_unload = 1; if (ctx->abandoned != 0) need_free = 1; } if (need_free == 0) wakeup(ctx); mpr_unlock(ctx->softc); if (need_unload != 0) { bus_dmamap_unload(ctx->buffer_dmat, ctx->buffer_dmamap); *ctx->addr = 0; } if (need_free != 0) free(ctx, M_MPR); } static int mpr_alloc_queues(struct mpr_softc *sc) { struct mpr_queue *q; int nq, i; nq = sc->msi_msgs; mpr_dprint(sc, MPR_INIT|MPR_XINFO, "Allocating %d I/O queues\n", nq); sc->queues = malloc(sizeof(struct mpr_queue) * nq, M_MPR, M_NOWAIT|M_ZERO); if (sc->queues == NULL) return (ENOMEM); for (i = 0; i < nq; i++) { q = &sc->queues[i]; mpr_dprint(sc, MPR_INIT, "Configuring queue %d %p\n", i, q); q->sc = sc; q->qnum = i; } return (0); } static int mpr_alloc_hw_queues(struct mpr_softc *sc) { bus_dma_template_t t; bus_addr_t queues_busaddr; uint8_t *queues; int qsize, fqsize, pqsize; /* * The reply free queue contains 4 byte entries in multiples of 16 and * aligned on a 16 byte boundary. There must always be an unused entry. * This queue supplies fresh reply frames for the firmware to use. * * The reply descriptor post queue contains 8 byte entries in * multiples of 16 and aligned on a 16 byte boundary. This queue * contains filled-in reply frames sent from the firmware to the host. * * These two queues are allocated together for simplicity. */ sc->fqdepth = roundup2(sc->num_replies + 1, 16); sc->pqdepth = roundup2(sc->num_replies + 1, 16); fqsize= sc->fqdepth * 4; pqsize = sc->pqdepth * 8; qsize = fqsize + pqsize; bus_dma_template_init(&t, sc->mpr_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(16), BD_MAXSIZE(qsize), BD_MAXSEGSIZE(qsize), BD_NSEGMENTS(1), BD_LOWADDR(BUS_SPACE_MAXADDR_32BIT)); if (bus_dma_template_tag(&t, &sc->queues_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate queues DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT, &sc->queues_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate queues memory\n"); return (ENOMEM); } bzero(queues, qsize); bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize, mpr_memaddr_cb, &queues_busaddr, 0); sc->free_queue = (uint32_t *)queues; sc->free_busaddr = queues_busaddr; sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize); sc->post_busaddr = queues_busaddr + fqsize; mpr_dprint(sc, MPR_INIT, "free queue busaddr= %#016jx size= %d\n", (uintmax_t)sc->free_busaddr, fqsize); mpr_dprint(sc, MPR_INIT, "reply queue busaddr= %#016jx size= %d\n", (uintmax_t)sc->post_busaddr, pqsize); return (0); } static int mpr_alloc_replies(struct mpr_softc *sc) { bus_dma_template_t t; int rsize, num_replies; /* Store the reply frame size in bytes rather than as 32bit words */ sc->replyframesz = sc->facts->ReplyFrameSize * 4; /* * sc->num_replies should be one less than sc->fqdepth. We need to * allocate space for sc->fqdepth replies, but only sc->num_replies * replies can be used at once. */ num_replies = max(sc->fqdepth, sc->num_replies); rsize = sc->replyframesz * num_replies; bus_dma_template_init(&t, sc->mpr_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(4), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize), BD_NSEGMENTS(1), BD_LOWADDR(BUS_SPACE_MAXADDR_32BIT)); if (bus_dma_template_tag(&t, &sc->reply_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate replies DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames, BUS_DMA_NOWAIT, &sc->reply_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate replies memory\n"); return (ENOMEM); } bzero(sc->reply_frames, rsize); bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize, mpr_memaddr_cb, &sc->reply_busaddr, 0); mpr_dprint(sc, MPR_INIT, "reply frames busaddr= %#016jx size= %d\n", (uintmax_t)sc->reply_busaddr, rsize); return (0); } static void mpr_load_chains_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mpr_softc *sc = arg; struct mpr_chain *chain; bus_size_t bo; int i, o, s; if (error != 0) return; for (i = 0, o = 0, s = 0; s < nsegs; s++) { for (bo = 0; bo + sc->chain_frame_size <= segs[s].ds_len; bo += sc->chain_frame_size) { chain = &sc->chains[i++]; chain->chain =(MPI2_SGE_IO_UNION *)(sc->chain_frames+o); chain->chain_busaddr = segs[s].ds_addr + bo; o += sc->chain_frame_size; mpr_free_chain(sc, chain); } if (bo != segs[s].ds_len) o += segs[s].ds_len - bo; } sc->chain_free_lowwater = i; } static int mpr_alloc_requests(struct mpr_softc *sc) { bus_dma_template_t t; struct mpr_command *cm; int i, rsize, nsegs; rsize = sc->reqframesz * sc->num_reqs; bus_dma_template_init(&t, sc->mpr_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(16), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize), BD_NSEGMENTS(1), BD_LOWADDR(BUS_SPACE_MAXADDR_32BIT)); if (bus_dma_template_tag(&t, &sc->req_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate request DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames, BUS_DMA_NOWAIT, &sc->req_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate request memory\n"); return (ENOMEM); } bzero(sc->req_frames, rsize); bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize, mpr_memaddr_cb, &sc->req_busaddr, 0); mpr_dprint(sc, MPR_INIT, "request frames busaddr= %#016jx size= %d\n", (uintmax_t)sc->req_busaddr, rsize); sc->chains = malloc(sizeof(struct mpr_chain) * sc->num_chains, M_MPR, M_NOWAIT | M_ZERO); if (!sc->chains) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate chain memory\n"); return (ENOMEM); } rsize = sc->chain_frame_size * sc->num_chains; bus_dma_template_init(&t, sc->mpr_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(16), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize), BD_NSEGMENTS((howmany(rsize, PAGE_SIZE)))); if (bus_dma_template_tag(&t, &sc->chain_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate chain DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->chain_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate chain memory\n"); return (ENOMEM); } if (bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize, mpr_load_chains_cb, sc, BUS_DMA_NOWAIT)) { mpr_dprint(sc, MPR_ERROR, "Cannot load chain memory\n"); bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); return (ENOMEM); } rsize = MPR_SENSE_LEN * sc->num_reqs; bus_dma_template_clone(&t, sc->req_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(1), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize)); if (bus_dma_template_tag(&t, &sc->sense_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames, BUS_DMA_NOWAIT, &sc->sense_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate sense memory\n"); return (ENOMEM); } bzero(sc->sense_frames, rsize); bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize, mpr_memaddr_cb, &sc->sense_busaddr, 0); mpr_dprint(sc, MPR_INIT, "sense frames busaddr= %#016jx size= %d\n", (uintmax_t)sc->sense_busaddr, rsize); /* * Allocate NVMe PRP Pages for NVMe SGL support only if the FW supports * these devices. */ if ((sc->facts->MsgVersion >= MPI2_VERSION_02_06) && (sc->facts->ProtocolFlags & MPI2_IOCFACTS_PROTOCOL_NVME_DEVICES)) { if (mpr_alloc_nvme_prp_pages(sc) == ENOMEM) return (ENOMEM); } nsegs = (sc->maxio / PAGE_SIZE) + 1; bus_dma_template_init(&t, sc->mpr_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_MAXSIZE(BUS_SPACE_MAXSIZE_32BIT), BD_NSEGMENTS(nsegs), BD_MAXSEGSIZE(BUS_SPACE_MAXSIZE_32BIT), BD_FLAGS(BUS_DMA_ALLOCNOW), BD_LOCKFUNC(busdma_lock_mutex), BD_LOCKFUNCARG(&sc->mpr_mtx)); if (bus_dma_template_tag(&t, &sc->buffer_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * SMID 0 cannot be used as a free command per the firmware spec. * Just drop that command instead of risking accounting bugs. */ sc->commands = malloc(sizeof(struct mpr_command) * sc->num_reqs, M_MPR, M_WAITOK | M_ZERO); for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; cm->cm_req = sc->req_frames + i * sc->reqframesz; cm->cm_req_busaddr = sc->req_busaddr + i * sc->reqframesz; cm->cm_sense = &sc->sense_frames[i]; cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN; cm->cm_desc.Default.SMID = htole16(i); cm->cm_sc = sc; cm->cm_state = MPR_CM_STATE_BUSY; TAILQ_INIT(&cm->cm_chain_list); TAILQ_INIT(&cm->cm_prp_page_list); callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0); /* XXX Is a failure here a critical problem? */ if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) == 0) { if (i <= sc->num_prireqs) mpr_free_high_priority_command(sc, cm); else mpr_free_command(sc, cm); } else { panic("failed to allocate command %d\n", i); sc->num_reqs = i; break; } } return (0); } /* * Allocate contiguous buffers for PCIe NVMe devices for building native PRPs, * which are scatter/gather lists for NVMe devices. * * This buffer must be contiguous due to the nature of how NVMe PRPs are built * and translated by FW. * * returns ENOMEM if memory could not be allocated, otherwise returns 0. */ static int mpr_alloc_nvme_prp_pages(struct mpr_softc *sc) { bus_dma_template_t t; struct mpr_prp_page *prp_page; int PRPs_per_page, PRPs_required, pages_required; int rsize, i; /* * Assuming a MAX_IO_SIZE of 1MB and a PAGE_SIZE of 4k, the max number * of PRPs (NVMe's Scatter/Gather Element) needed per I/O is: * MAX_IO_SIZE / PAGE_SIZE = 256 * * 1 PRP entry in main frame for PRP list pointer still leaves 255 PRPs * required for the remainder of the 1MB I/O. 512 PRPs can fit into one * page (4096 / 8 = 512), so only one page is required for each I/O. * * Each of these buffers will need to be contiguous. For simplicity, * only one buffer is allocated here, which has all of the space * required for the NVMe Queue Depth. If there are problems allocating * this one buffer, this function will need to change to allocate * individual, contiguous NVME_QDEPTH buffers. * * The real calculation will use the real max io size. Above is just an * example. * */ PRPs_required = sc->maxio / PAGE_SIZE; PRPs_per_page = (PAGE_SIZE / PRP_ENTRY_SIZE) - 1; pages_required = (PRPs_required / PRPs_per_page) + 1; sc->prp_buffer_size = PAGE_SIZE * pages_required; rsize = sc->prp_buffer_size * NVME_QDEPTH; bus_dma_template_init(&t, sc->mpr_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(4), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize), BD_NSEGMENTS(1), BD_LOWADDR(BUS_SPACE_MAXADDR_32BIT)); if (bus_dma_template_tag(&t, &sc->prp_page_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate NVMe PRP DMA " "tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->prp_page_dmat, (void **)&sc->prp_pages, BUS_DMA_NOWAIT, &sc->prp_page_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate NVMe PRP memory\n"); return (ENOMEM); } bzero(sc->prp_pages, rsize); bus_dmamap_load(sc->prp_page_dmat, sc->prp_page_map, sc->prp_pages, rsize, mpr_memaddr_cb, &sc->prp_page_busaddr, 0); sc->prps = malloc(sizeof(struct mpr_prp_page) * NVME_QDEPTH, M_MPR, M_WAITOK | M_ZERO); for (i = 0; i < NVME_QDEPTH; i++) { prp_page = &sc->prps[i]; prp_page->prp_page = (uint64_t *)(sc->prp_pages + i * sc->prp_buffer_size); prp_page->prp_page_busaddr = (uint64_t)(sc->prp_page_busaddr + i * sc->prp_buffer_size); mpr_free_prp_page(sc, prp_page); sc->prp_pages_free_lowwater++; } return (0); } static int mpr_init_queues(struct mpr_softc *sc) { int i; memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8); /* * According to the spec, we need to use one less reply than we * have space for on the queue. So sc->num_replies (the number we * use) should be less than sc->fqdepth (allocated size). */ if (sc->num_replies >= sc->fqdepth) return (EINVAL); /* * Initialize all of the free queue entries. */ for (i = 0; i < sc->fqdepth; i++) { sc->free_queue[i] = htole32(sc->reply_busaddr + (i * sc->replyframesz)); } sc->replyfreeindex = sc->num_replies; return (0); } /* Get the driver parameter tunables. Lowest priority are the driver defaults. * Next are the global settings, if they exist. Highest are the per-unit * settings, if they exist. */ void mpr_get_tunables(struct mpr_softc *sc) { char tmpstr[80], mpr_debug[80]; /* XXX default to some debugging for now */ sc->mpr_debug = MPR_INFO | MPR_FAULT; sc->disable_msix = 0; sc->disable_msi = 0; sc->max_msix = MPR_MSIX_MAX; sc->max_chains = MPR_CHAIN_FRAMES; sc->max_io_pages = MPR_MAXIO_PAGES; sc->enable_ssu = MPR_SSU_ENABLE_SSD_DISABLE_HDD; sc->spinup_wait_time = DEFAULT_SPINUP_WAIT; sc->use_phynum = 1; sc->max_reqframes = MPR_REQ_FRAMES; sc->max_prireqframes = MPR_PRI_REQ_FRAMES; sc->max_replyframes = MPR_REPLY_FRAMES; sc->max_evtframes = MPR_EVT_REPLY_FRAMES; /* * Grab the global variables. */ bzero(mpr_debug, 80); if (TUNABLE_STR_FETCH("hw.mpr.debug_level", mpr_debug, 80) != 0) mpr_parse_debug(sc, mpr_debug); TUNABLE_INT_FETCH("hw.mpr.disable_msix", &sc->disable_msix); TUNABLE_INT_FETCH("hw.mpr.disable_msi", &sc->disable_msi); TUNABLE_INT_FETCH("hw.mpr.max_msix", &sc->max_msix); TUNABLE_INT_FETCH("hw.mpr.max_chains", &sc->max_chains); TUNABLE_INT_FETCH("hw.mpr.max_io_pages", &sc->max_io_pages); TUNABLE_INT_FETCH("hw.mpr.enable_ssu", &sc->enable_ssu); TUNABLE_INT_FETCH("hw.mpr.spinup_wait_time", &sc->spinup_wait_time); TUNABLE_INT_FETCH("hw.mpr.use_phy_num", &sc->use_phynum); TUNABLE_INT_FETCH("hw.mpr.max_reqframes", &sc->max_reqframes); TUNABLE_INT_FETCH("hw.mpr.max_prireqframes", &sc->max_prireqframes); TUNABLE_INT_FETCH("hw.mpr.max_replyframes", &sc->max_replyframes); TUNABLE_INT_FETCH("hw.mpr.max_evtframes", &sc->max_evtframes); /* Grab the unit-instance variables */ snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.debug_level", device_get_unit(sc->mpr_dev)); bzero(mpr_debug, 80); if (TUNABLE_STR_FETCH(tmpstr, mpr_debug, 80) != 0) mpr_parse_debug(sc, mpr_debug); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msix", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msi", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_msix", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_chains", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_chains); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_io_pages", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_io_pages); bzero(sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.exclude_ids", device_get_unit(sc->mpr_dev)); TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.enable_ssu", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->enable_ssu); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.spinup_wait_time", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->spinup_wait_time); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.use_phy_num", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_reqframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_reqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_prireqframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_prireqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_replyframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_replyframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_evtframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_evtframes); } static void mpr_setup_sysctl(struct mpr_softc *sc) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; char tmpstr[80], tmpstr2[80]; /* * Setup the sysctl variable so the user can change the debug level * on the fly. */ snprintf(tmpstr, sizeof(tmpstr), "MPR controller %d", device_get_unit(sc->mpr_dev)); snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mpr_dev)); sysctl_ctx = device_get_sysctl_ctx(sc->mpr_dev); if (sysctl_ctx != NULL) sysctl_tree = device_get_sysctl_tree(sc->mpr_dev); if (sysctl_tree == NULL) { sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_mpr), OID_AUTO, tmpstr2, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr); if (sc->sysctl_tree == NULL) return; sysctl_ctx = &sc->sysctl_ctx; sysctl_tree = sc->sysctl_tree; } SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "debug_level", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, mpr_debug_sysctl, "A", "mpr debug level"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0, "Disable the use of MSI-X interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_msix", CTLFLAG_RD, &sc->max_msix, 0, "User-defined maximum number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "msix_msgs", CTLFLAG_RD, &sc->msi_msgs, 0, "Negotiated number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_reqframes", CTLFLAG_RD, &sc->max_reqframes, 0, "Total number of allocated request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_prireqframes", CTLFLAG_RD, &sc->max_prireqframes, 0, "Total number of allocated high priority request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_replyframes", CTLFLAG_RD, &sc->max_replyframes, 0, "Total number of allocated reply frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_evtframes", CTLFLAG_RD, &sc->max_evtframes, 0, "Total number of event frames allocated"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, strlen(sc->fw_version), "firmware version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "driver_version", CTLFLAG_RD, MPR_DRIVER_VERSION, strlen(MPR_DRIVER_VERSION), "driver version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "msg_version", CTLFLAG_RD, sc->msg_version, strlen(sc->msg_version), "message interface version"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_active", CTLFLAG_RD, &sc->io_cmds_active, 0, "number of currently active commands"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, &sc->io_cmds_highwater, 0, "maximum active commands seen"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free", CTLFLAG_RD, &sc->chain_free, 0, "number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free_lowwater", CTLFLAG_RD, &sc->chain_free_lowwater, 0,"lowest number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_chains", CTLFLAG_RD, &sc->max_chains, 0,"maximum chain frames that will be allocated"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_io_pages", CTLFLAG_RD, &sc->max_io_pages, 0,"maximum pages to allow per I/O (if <1 use " "IOCFacts)"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "enable_ssu", CTLFLAG_RW, &sc->enable_ssu, 0, "enable SSU to SATA SSD/HDD at shutdown"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_alloc_fail", CTLFLAG_RD, &sc->chain_alloc_fail, "chain allocation failures"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "spinup_wait_time", CTLFLAG_RD, &sc->spinup_wait_time, DEFAULT_SPINUP_WAIT, "seconds to wait for " "spinup after SATA ID error"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "dump_reqs", - CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, sc, 0, mpr_dump_reqs, "I", "Dump Active Requests"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0, "Use the phy number for enumeration"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "prp_pages_free", CTLFLAG_RD, &sc->prp_pages_free, 0, "number of free PRP pages"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "prp_pages_free_lowwater", CTLFLAG_RD, &sc->prp_pages_free_lowwater, 0,"lowest number of free PRP pages"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "prp_page_alloc_fail", CTLFLAG_RD, &sc->prp_page_alloc_fail, "PRP page allocation failures"); } static struct mpr_debug_string { char *name; int flag; } mpr_debug_strings[] = { {"info", MPR_INFO}, {"fault", MPR_FAULT}, {"event", MPR_EVENT}, {"log", MPR_LOG}, {"recovery", MPR_RECOVERY}, {"error", MPR_ERROR}, {"init", MPR_INIT}, {"xinfo", MPR_XINFO}, {"user", MPR_USER}, {"mapping", MPR_MAPPING}, {"trace", MPR_TRACE} }; enum mpr_debug_level_combiner { COMB_NONE, COMB_ADD, COMB_SUB }; static int mpr_debug_sysctl(SYSCTL_HANDLER_ARGS) { struct mpr_softc *sc; struct mpr_debug_string *string; struct sbuf *sbuf; char *buffer; size_t sz; int i, len, debug, error; sc = (struct mpr_softc *)arg1; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); debug = sc->mpr_debug; sbuf_printf(sbuf, "%#x", debug); sz = sizeof(mpr_debug_strings) / sizeof(mpr_debug_strings[0]); for (i = 0; i < sz; i++) { string = &mpr_debug_strings[i]; if (debug & string->flag) sbuf_printf(sbuf, ",%s", string->name); } error = sbuf_finish(sbuf); sbuf_delete(sbuf); if (error || req->newptr == NULL) return (error); len = req->newlen - req->newidx; if (len == 0) return (0); buffer = malloc(len, M_MPR, M_ZERO|M_WAITOK); error = SYSCTL_IN(req, buffer, len); mpr_parse_debug(sc, buffer); free(buffer, M_MPR); return (error); } static void mpr_parse_debug(struct mpr_softc *sc, char *list) { struct mpr_debug_string *string; enum mpr_debug_level_combiner op; char *token, *endtoken; size_t sz; int flags, i; if (list == NULL || *list == '\0') return; if (*list == '+') { op = COMB_ADD; list++; } else if (*list == '-') { op = COMB_SUB; list++; } else op = COMB_NONE; if (*list == '\0') return; flags = 0; sz = sizeof(mpr_debug_strings) / sizeof(mpr_debug_strings[0]); while ((token = strsep(&list, ":,")) != NULL) { /* Handle integer flags */ flags |= strtol(token, &endtoken, 0); if (token != endtoken) continue; /* Handle text flags */ for (i = 0; i < sz; i++) { string = &mpr_debug_strings[i]; if (strcasecmp(token, string->name) == 0) { flags |= string->flag; break; } } } switch (op) { case COMB_NONE: sc->mpr_debug = flags; break; case COMB_ADD: sc->mpr_debug |= flags; break; case COMB_SUB: sc->mpr_debug &= (~flags); break; } return; } struct mpr_dumpreq_hdr { uint32_t smid; uint32_t state; uint32_t numframes; uint32_t deschi; uint32_t desclo; }; static int mpr_dump_reqs(SYSCTL_HANDLER_ARGS) { struct mpr_softc *sc; struct mpr_chain *chain, *chain1; struct mpr_command *cm; struct mpr_dumpreq_hdr hdr; struct sbuf *sb; uint32_t smid, state; int i, numreqs, error = 0; sc = (struct mpr_softc *)arg1; if ((error = priv_check(curthread, PRIV_DRIVER)) != 0) { printf("priv check error %d\n", error); return (error); } state = MPR_CM_STATE_INQUEUE; smid = 1; numreqs = sc->num_reqs; if (req->newptr != NULL) return (EINVAL); if (smid == 0 || smid > sc->num_reqs) return (EINVAL); if (numreqs <= 0 || (numreqs + smid > sc->num_reqs)) numreqs = sc->num_reqs; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); /* Best effort, no locking */ for (i = smid; i < numreqs; i++) { cm = &sc->commands[i]; if (cm->cm_state != state) continue; hdr.smid = i; hdr.state = cm->cm_state; hdr.numframes = 1; hdr.deschi = cm->cm_desc.Words.High; hdr.desclo = cm->cm_desc.Words.Low; TAILQ_FOREACH_SAFE(chain, &cm->cm_chain_list, chain_link, chain1) hdr.numframes++; sbuf_bcat(sb, &hdr, sizeof(hdr)); sbuf_bcat(sb, cm->cm_req, 128); TAILQ_FOREACH_SAFE(chain, &cm->cm_chain_list, chain_link, chain1) sbuf_bcat(sb, chain->chain, 128); } error = sbuf_finish(sb); sbuf_delete(sb); return (error); } int mpr_attach(struct mpr_softc *sc) { int error; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); mtx_init(&sc->mpr_mtx, "MPR lock", NULL, MTX_DEF); callout_init_mtx(&sc->periodic, &sc->mpr_mtx, 0); callout_init_mtx(&sc->device_check_callout, &sc->mpr_mtx, 0); TAILQ_INIT(&sc->event_list); timevalclear(&sc->lastfail); if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to transition ready\n"); return (error); } sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPR, M_ZERO|M_NOWAIT); if (!sc->facts) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Cannot allocate memory, exit\n"); return (ENOMEM); } /* * Get IOC Facts and allocate all structures based on this information. * A Diag Reset will also call mpr_iocfacts_allocate and re-read the IOC * Facts. If relevant values have changed in IOC Facts, this function * will free all of the memory based on IOC Facts and reallocate that * memory. If this fails, any allocated memory should already be freed. */ if ((error = mpr_iocfacts_allocate(sc, TRUE)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC Facts allocation " "failed with error %d\n", error); return (error); } /* Start the periodic watchdog check on the IOC Doorbell */ mpr_periodic(sc); /* * The portenable will kick off discovery events that will drive the * rest of the initialization process. The CAM/SAS module will * hold up the boot sequence until discovery is complete. */ sc->mpr_ich.ich_func = mpr_startup; sc->mpr_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mpr_ich) != 0) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Cannot establish MPR config hook\n"); error = EINVAL; } /* * Allow IR to shutdown gracefully when shutdown occurs. */ sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final, mprsas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT); if (sc->shutdown_eh == NULL) mpr_dprint(sc, MPR_INIT|MPR_ERROR, "shutdown event registration failed\n"); mpr_setup_sysctl(sc); sc->mpr_flags |= MPR_FLAGS_ATTACH_DONE; mpr_dprint(sc, MPR_INIT, "%s exit error= %d\n", __func__, error); return (error); } /* Run through any late-start handlers. */ static void mpr_startup(void *arg) { struct mpr_softc *sc; sc = (struct mpr_softc *)arg; mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); mpr_lock(sc); mpr_unmask_intr(sc); /* initialize device mapping tables */ mpr_base_static_config_pages(sc); mpr_mapping_initialize(sc); mprsas_startup(sc); mpr_unlock(sc); mpr_dprint(sc, MPR_INIT, "disestablish config intrhook\n"); config_intrhook_disestablish(&sc->mpr_ich); sc->mpr_ich.ich_arg = NULL; mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); } /* Periodic watchdog. Is called with the driver lock already held. */ static void mpr_periodic(void *arg) { struct mpr_softc *sc; uint32_t db; sc = (struct mpr_softc *)arg; if (sc->mpr_flags & MPR_FLAGS_SHUTDOWN) return; db = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { if ((db & MPI2_DOORBELL_FAULT_CODE_MASK) == IFAULT_IOP_OVER_TEMP_THRESHOLD_EXCEEDED) { panic("TEMPERATURE FAULT: STOPPING."); } mpr_dprint(sc, MPR_FAULT, "IOC Fault 0x%08x, Resetting\n", db); mpr_reinit(sc); } callout_reset(&sc->periodic, MPR_PERIODIC_DELAY * hz, mpr_periodic, sc); } static void mpr_log_evt_handler(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *event) { MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry; MPR_DPRINT_EVENT(sc, generic, event); switch (event->Event) { case MPI2_EVENT_LOG_DATA: mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_DATA:\n"); if (sc->mpr_debug & MPR_EVENT) hexdump(event->EventData, event->EventDataLength, NULL, 0); break; case MPI2_EVENT_LOG_ENTRY_ADDED: entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData; mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event " "0x%x Sequence %d:\n", entry->LogEntryQualifier, entry->LogSequence); break; default: break; } return; } static int mpr_attach_log(struct mpr_softc *sc) { uint8_t events[16]; bzero(events, 16); setbit(events, MPI2_EVENT_LOG_DATA); setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED); mpr_register_events(sc, events, mpr_log_evt_handler, NULL, &sc->mpr_log_eh); return (0); } static int mpr_detach_log(struct mpr_softc *sc) { if (sc->mpr_log_eh != NULL) mpr_deregister_events(sc, sc->mpr_log_eh); return (0); } /* * Free all of the driver resources and detach submodules. Should be called * without the lock held. */ int mpr_free(struct mpr_softc *sc) { int error; mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); /* Turn off the watchdog */ mpr_lock(sc); sc->mpr_flags |= MPR_FLAGS_SHUTDOWN; mpr_unlock(sc); /* Lock must not be held for this */ callout_drain(&sc->periodic); callout_drain(&sc->device_check_callout); if (((error = mpr_detach_log(sc)) != 0) || ((error = mpr_detach_sas(sc)) != 0)) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "failed to detach " "subsystems, error= %d, exit\n", error); return (error); } mpr_detach_user(sc); /* Put the IOC back in the READY state. */ mpr_lock(sc); if ((error = mpr_transition_ready(sc)) != 0) { mpr_unlock(sc); return (error); } mpr_unlock(sc); if (sc->facts != NULL) free(sc->facts, M_MPR); /* * Free all buffers that are based on IOC Facts. A Diag Reset may need * to free these buffers too. */ mpr_iocfacts_free(sc); if (sc->sysctl_tree != NULL) sysctl_ctx_free(&sc->sysctl_ctx); /* Deregister the shutdown function */ if (sc->shutdown_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh); mtx_destroy(&sc->mpr_mtx); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (0); } static __inline void mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm) { MPR_FUNCTRACE(sc); if (cm == NULL) { mpr_dprint(sc, MPR_ERROR, "Completing NULL command\n"); return; } cm->cm_state = MPR_CM_STATE_BUSY; if (cm->cm_flags & MPR_CM_FLAGS_POLLED) cm->cm_flags |= MPR_CM_FLAGS_COMPLETE; if (cm->cm_complete != NULL) { mpr_dprint(sc, MPR_TRACE, "%s cm %p calling cm_complete %p data %p reply %p\n", __func__, cm, cm->cm_complete, cm->cm_complete_data, cm->cm_reply); cm->cm_complete(sc, cm); } if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) { mpr_dprint(sc, MPR_TRACE, "waking up %p\n", cm); wakeup(cm); } if (sc->io_cmds_active != 0) { sc->io_cmds_active--; } else { mpr_dprint(sc, MPR_ERROR, "Warning: io_cmds_active is " "out of sync - resynching to 0\n"); } } static void mpr_sas_log_info(struct mpr_softc *sc , u32 log_info) { union loginfo_type { u32 loginfo; struct { u32 subcode:16; u32 code:8; u32 originator:4; u32 bus_type:4; } dw; }; union loginfo_type sas_loginfo; char *originator_str = NULL; sas_loginfo.loginfo = log_info; if (sas_loginfo.dw.bus_type != 3 /*SAS*/) return; /* each nexus loss loginfo */ if (log_info == 0x31170000) return; /* eat the loginfos associated with task aborts */ if ((log_info == 30050000) || (log_info == 0x31140000) || (log_info == 0x31130000)) return; switch (sas_loginfo.dw.originator) { case 0: originator_str = "IOP"; break; case 1: originator_str = "PL"; break; case 2: originator_str = "IR"; break; } mpr_dprint(sc, MPR_LOG, "log_info(0x%08x): originator(%s), " "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str, sas_loginfo.dw.code, sas_loginfo.dw.subcode); } static void mpr_display_reply_info(struct mpr_softc *sc, uint8_t *reply) { MPI2DefaultReply_t *mpi_reply; u16 sc_status; mpi_reply = (MPI2DefaultReply_t*)reply; sc_status = le16toh(mpi_reply->IOCStatus); if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) mpr_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo)); } void mpr_intr(void *data) { struct mpr_softc *sc; uint32_t status; sc = (struct mpr_softc *)data; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* * Check interrupt status register to flush the bus. This is * needed for both INTx interrupts and driver-driven polling */ status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0) return; mpr_lock(sc); mpr_intr_locked(data); mpr_unlock(sc); return; } /* * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the * chip. Hopefully this theory is correct. */ void mpr_intr_msi(void *data) { struct mpr_softc *sc; sc = (struct mpr_softc *)data; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); mpr_lock(sc); mpr_intr_locked(data); mpr_unlock(sc); return; } /* * The locking is overly broad and simplistic, but easy to deal with for now. */ void mpr_intr_locked(void *data) { MPI2_REPLY_DESCRIPTORS_UNION *desc; MPI2_DIAG_RELEASE_REPLY *rel_rep; mpr_fw_diagnostic_buffer_t *pBuffer; struct mpr_softc *sc; uint64_t tdesc; struct mpr_command *cm = NULL; uint8_t flags; u_int pq; sc = (struct mpr_softc *)data; pq = sc->replypostindex; mpr_dprint(sc, MPR_TRACE, "%s sc %p starting with replypostindex %u\n", __func__, sc, sc->replypostindex); for ( ;; ) { cm = NULL; desc = &sc->post_queue[sc->replypostindex]; /* * Copy and clear out the descriptor so that any reentry will * immediately know that this descriptor has already been * looked at. There is unfortunate casting magic because the * MPI API doesn't have a cardinal 64bit type. */ tdesc = 0xffffffffffffffff; tdesc = atomic_swap_64((uint64_t *)desc, tdesc); desc = (MPI2_REPLY_DESCRIPTORS_UNION *)&tdesc; flags = desc->Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) || (le32toh(desc->Words.High) == 0xffffffff)) break; /* increment the replypostindex now, so that event handlers * and cm completion handlers which decide to do a diag * reset can zero it without it getting incremented again * afterwards, and we break out of this loop on the next * iteration since the reply post queue has been cleared to * 0xFF and all descriptors look unused (which they are). */ if (++sc->replypostindex >= sc->pqdepth) sc->replypostindex = 0; switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS: case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE, ("command not inqueue\n")); cm->cm_state = MPR_CM_STATE_BUSY; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: { uint32_t baddr; uint8_t *reply; /* * Re-compose the reply address from the address * sent back from the chip. The ReplyFrameAddress * is the lower 32 bits of the physical address of * particular reply frame. Convert that address to * host format, and then use that to provide the * offset against the virtual address base * (sc->reply_frames). */ baddr = le32toh(desc->AddressReply.ReplyFrameAddress); reply = sc->reply_frames + (baddr - ((uint32_t)sc->reply_busaddr)); /* * Make sure the reply we got back is in a valid * range. If not, go ahead and panic here, since * we'll probably panic as soon as we deference the * reply pointer anyway. */ if ((reply < sc->reply_frames) || (reply > (sc->reply_frames + (sc->fqdepth * sc->replyframesz)))) { printf("%s: WARNING: reply %p out of range!\n", __func__, reply); printf("%s: reply_frames %p, fqdepth %d, " "frame size %d\n", __func__, sc->reply_frames, sc->fqdepth, sc->replyframesz); printf("%s: baddr %#x,\n", __func__, baddr); /* LSI-TODO. See Linux Code for Graceful exit */ panic("Reply address out of range"); } if (le16toh(desc->AddressReply.SMID) == 0) { if (((MPI2_DEFAULT_REPLY *)reply)->Function == MPI2_FUNCTION_DIAG_BUFFER_POST) { /* * If SMID is 0 for Diag Buffer Post, * this implies that the reply is due to * a release function with a status that * the buffer has been released. Set * the buffer flags accordingly. */ rel_rep = (MPI2_DIAG_RELEASE_REPLY *)reply; if ((le16toh(rel_rep->IOCStatus) & MPI2_IOCSTATUS_MASK) == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) { pBuffer = &sc->fw_diag_buffer_list[ rel_rep->BufferType]; pBuffer->valid_data = TRUE; pBuffer->owned_by_firmware = FALSE; pBuffer->immediate = FALSE; } } else mpr_dispatch_event(sc, baddr, (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; if (cm->cm_state == MPR_CM_STATE_INQUEUE) { cm->cm_reply = reply; cm->cm_reply_data = le32toh(desc->AddressReply. ReplyFrameAddress); } else { mpr_dprint(sc, MPR_RECOVERY, "Bad state for ADDRESS_REPLY status," " ignoring state %d cm %p\n", cm->cm_state, cm); } } break; } case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS: case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER: case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS: default: /* Unhandled */ mpr_dprint(sc, MPR_ERROR, "Unhandled reply 0x%x\n", desc->Default.ReplyFlags); cm = NULL; break; } if (cm != NULL) { // Print Error reply frame if (cm->cm_reply) mpr_display_reply_info(sc,cm->cm_reply); mpr_complete_command(sc, cm); } } if (pq != sc->replypostindex) { mpr_dprint(sc, MPR_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); } return; } static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply) { struct mpr_event_handle *eh; int event, handled = 0; event = le16toh(reply->Event); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { if (isset(eh->mask, event)) { eh->callback(sc, data, reply); handled++; } } if (handled == 0) mpr_dprint(sc, MPR_EVENT, "Unhandled event 0x%x\n", le16toh(event)); /* * This is the only place that the event/reply should be freed. * Anything wanting to hold onto the event data should have * already copied it into their own storage. */ mpr_free_reply(sc, data); } static void mpr_reregister_events_complete(struct mpr_softc *sc, struct mpr_command *cm) { mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if (cm->cm_reply) MPR_DPRINT_EVENT(sc, generic, (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply); mpr_free_command(sc, cm); /* next, send a port enable */ mprsas_startup(sc); } /* * For both register_events and update_events, the caller supplies a bitmap * of events that it _wants_. These functions then turn that into a bitmask * suitable for the controller. */ int mpr_register_events(struct mpr_softc *sc, uint8_t *mask, mpr_evt_callback_t *cb, void *data, struct mpr_event_handle **handle) { struct mpr_event_handle *eh; int error = 0; eh = malloc(sizeof(struct mpr_event_handle), M_MPR, M_WAITOK|M_ZERO); eh->callback = cb; eh->data = data; TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list); if (mask != NULL) error = mpr_update_events(sc, eh, mask); *handle = eh; return (error); } int mpr_update_events(struct mpr_softc *sc, struct mpr_event_handle *handle, uint8_t *mask) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; MPI2_EVENT_NOTIFICATION_REPLY *reply = NULL; struct mpr_command *cm = NULL; struct mpr_event_handle *eh; int error, i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if ((mask != NULL) && (handle != NULL)) bcopy(mask, &handle->mask[0], 16); memset(sc->event_mask, 0xff, 16); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < 16; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mpr_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPR_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; error = mpr_request_polled(sc, &cm); if (cm != NULL) reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply; if ((reply == NULL) || (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; if (reply) MPR_DPRINT_EVENT(sc, generic, reply); mpr_dprint(sc, MPR_TRACE, "%s finished error %d\n", __func__, error); if (cm != NULL) mpr_free_command(sc, cm); return (error); } static int mpr_reregister_events(struct mpr_softc *sc) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; struct mpr_command *cm; struct mpr_event_handle *eh; int error, i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* first, reregister events */ memset(sc->event_mask, 0xff, 16); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < 16; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mpr_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPR_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; cm->cm_complete = mpr_reregister_events_complete; error = mpr_map_command(sc, cm); mpr_dprint(sc, MPR_TRACE, "%s finished with error %d\n", __func__, error); return (error); } int mpr_deregister_events(struct mpr_softc *sc, struct mpr_event_handle *handle) { TAILQ_REMOVE(&sc->event_list, handle, eh_list); free(handle, M_MPR); return (mpr_update_events(sc, NULL, NULL)); } /** * mpr_build_nvme_prp - This function is called for NVMe end devices to build a * native SGL (NVMe PRP). The native SGL is built starting in the first PRP entry * of the NVMe message (PRP1). If the data buffer is small enough to be described * entirely using PRP1, then PRP2 is not used. If needed, PRP2 is used to * describe a larger data buffer. If the data buffer is too large to describe * using the two PRP entriess inside the NVMe message, then PRP1 describes the * first data memory segment, and PRP2 contains a pointer to a PRP list located * elsewhere in memory to describe the remaining data memory segments. The PRP * list will be contiguous. * The native SGL for NVMe devices is a Physical Region Page (PRP). A PRP * consists of a list of PRP entries to describe a number of noncontigous * physical memory segments as a single memory buffer, just as a SGL does. Note * however, that this function is only used by the IOCTL call, so the memory * given will be guaranteed to be contiguous. There is no need to translate * non-contiguous SGL into a PRP in this case. All PRPs will describe contiguous * space that is one page size each. * * Each NVMe message contains two PRP entries. The first (PRP1) either contains * a PRP list pointer or a PRP element, depending upon the command. PRP2 contains * the second PRP element if the memory being described fits within 2 PRP * entries, or a PRP list pointer if the PRP spans more than two entries. * * A PRP list pointer contains the address of a PRP list, structured as a linear * array of PRP entries. Each PRP entry in this list describes a segment of * physical memory. * * Each 64-bit PRP entry comprises an address and an offset field. The address * always points to the beginning of a PAGE_SIZE physical memory page, and the * offset describes where within that page the memory segment begins. Only the * first element in a PRP list may contain a non-zero offest, implying that all * memory segments following the first begin at the start of a PAGE_SIZE page. * * Each PRP element normally describes a chunck of PAGE_SIZE physical memory, * with exceptions for the first and last elements in the list. If the memory * being described by the list begins at a non-zero offset within the first page, * then the first PRP element will contain a non-zero offset indicating where the * region begins within the page. The last memory segment may end before the end * of the PAGE_SIZE segment, depending upon the overall size of the memory being * described by the PRP list. * * Since PRP entries lack any indication of size, the overall data buffer length * is used to determine where the end of the data memory buffer is located, and * how many PRP entries are required to describe it. * * Returns nothing. */ void mpr_build_nvme_prp(struct mpr_softc *sc, struct mpr_command *cm, Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request, void *data, uint32_t data_in_sz, uint32_t data_out_sz) { int prp_size = PRP_ENTRY_SIZE; uint64_t *prp_entry, *prp1_entry, *prp2_entry; uint64_t *prp_entry_phys, *prp_page, *prp_page_phys; uint32_t offset, entry_len, page_mask_result, page_mask; bus_addr_t paddr; size_t length; struct mpr_prp_page *prp_page_info = NULL; /* * Not all commands require a data transfer. If no data, just return * without constructing any PRP. */ if (!data_in_sz && !data_out_sz) return; /* * Set pointers to PRP1 and PRP2, which are in the NVMe command. PRP1 is * located at a 24 byte offset from the start of the NVMe command. Then * set the current PRP entry pointer to PRP1. */ prp1_entry = (uint64_t *)(nvme_encap_request->NVMe_Command + NVME_CMD_PRP1_OFFSET); prp2_entry = (uint64_t *)(nvme_encap_request->NVMe_Command + NVME_CMD_PRP2_OFFSET); prp_entry = prp1_entry; /* * For the PRP entries, use the specially allocated buffer of * contiguous memory. PRP Page allocation failures should not happen * because there should be enough PRP page buffers to account for the * possible NVMe QDepth. */ prp_page_info = mpr_alloc_prp_page(sc); KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be " "used for building a native NVMe SGL.\n", __func__)); prp_page = (uint64_t *)prp_page_info->prp_page; prp_page_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr; /* * Insert the allocated PRP page into the command's PRP page list. This * will be freed when the command is freed. */ TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link); /* * Check if we are within 1 entry of a page boundary we don't want our * first entry to be a PRP List entry. */ page_mask = PAGE_SIZE - 1; page_mask_result = (uintptr_t)((uint8_t *)prp_page + prp_size) & page_mask; if (!page_mask_result) { /* Bump up to next page boundary. */ prp_page = (uint64_t *)((uint8_t *)prp_page + prp_size); prp_page_phys = (uint64_t *)((uint8_t *)prp_page_phys + prp_size); } /* * Set PRP physical pointer, which initially points to the current PRP * DMA memory page. */ prp_entry_phys = prp_page_phys; /* Get physical address and length of the data buffer. */ paddr = (bus_addr_t)(uintptr_t)data; if (data_in_sz) length = data_in_sz; else length = data_out_sz; /* Loop while the length is not zero. */ while (length) { /* * Check if we need to put a list pointer here if we are at page * boundary - prp_size (8 bytes). */ page_mask_result = (uintptr_t)((uint8_t *)prp_entry_phys + prp_size) & page_mask; if (!page_mask_result) { /* * This is the last entry in a PRP List, so we need to * put a PRP list pointer here. What this does is: * - bump the current memory pointer to the next * address, which will be the next full page. * - set the PRP Entry to point to that page. This is * now the PRP List pointer. * - bump the PRP Entry pointer the start of the next * page. Since all of this PRP memory is contiguous, * no need to get a new page - it's just the next * address. */ prp_entry_phys++; *prp_entry = htole64((uint64_t)(uintptr_t)prp_entry_phys); prp_entry++; } /* Need to handle if entry will be part of a page. */ offset = (uint32_t)paddr & page_mask; entry_len = PAGE_SIZE - offset; if (prp_entry == prp1_entry) { /* * Must fill in the first PRP pointer (PRP1) before * moving on. */ *prp1_entry = htole64((uint64_t)paddr); /* * Now point to the second PRP entry within the * command (PRP2). */ prp_entry = prp2_entry; } else if (prp_entry == prp2_entry) { /* * Should the PRP2 entry be a PRP List pointer or just a * regular PRP pointer? If there is more than one more * page of data, must use a PRP List pointer. */ if (length > PAGE_SIZE) { /* * PRP2 will contain a PRP List pointer because * more PRP's are needed with this command. The * list will start at the beginning of the * contiguous buffer. */ *prp2_entry = htole64( (uint64_t)(uintptr_t)prp_entry_phys); /* * The next PRP Entry will be the start of the * first PRP List. */ prp_entry = prp_page; } else { /* * After this, the PRP Entries are complete. * This command uses 2 PRP's and no PRP list. */ *prp2_entry = htole64((uint64_t)paddr); } } else { /* * Put entry in list and bump the addresses. * * After PRP1 and PRP2 are filled in, this will fill in * all remaining PRP entries in a PRP List, one per each * time through the loop. */ *prp_entry = htole64((uint64_t)paddr); prp_entry++; prp_entry_phys++; } /* * Bump the phys address of the command's data buffer by the * entry_len. */ paddr += entry_len; /* Decrement length accounting for last partial page. */ if (entry_len > length) length = 0; else length -= entry_len; } } /* * mpr_check_pcie_native_sgl - This function is called for PCIe end devices to * determine if the driver needs to build a native SGL. If so, that native SGL * is built in the contiguous buffers allocated especially for PCIe SGL * creation. If the driver will not build a native SGL, return TRUE and a * normal IEEE SGL will be built. Currently this routine supports NVMe devices * only. * * Returns FALSE (0) if native SGL was built, TRUE (1) if no SGL was built. */ static int mpr_check_pcie_native_sgl(struct mpr_softc *sc, struct mpr_command *cm, bus_dma_segment_t *segs, int segs_left) { uint32_t i, sge_dwords, length, offset, entry_len; uint32_t num_entries, buff_len = 0, sges_in_segment; uint32_t page_mask, page_mask_result, *curr_buff; uint32_t *ptr_sgl, *ptr_first_sgl, first_page_offset; uint32_t first_page_data_size, end_residual; uint64_t *msg_phys; bus_addr_t paddr; int build_native_sgl = 0, first_prp_entry; int prp_size = PRP_ENTRY_SIZE; Mpi25IeeeSgeChain64_t *main_chain_element = NULL; struct mpr_prp_page *prp_page_info = NULL; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* * Add up the sizes of each segment length to get the total transfer * size, which will be checked against the Maximum Data Transfer Size. * If the data transfer length exceeds the MDTS for this device, just * return 1 so a normal IEEE SGL will be built. F/W will break the I/O * up into multiple I/O's. [nvme_mdts = 0 means unlimited] */ for (i = 0; i < segs_left; i++) buff_len += htole32(segs[i].ds_len); if ((cm->cm_targ->MDTS > 0) && (buff_len > cm->cm_targ->MDTS)) return 1; /* Create page_mask (to get offset within page) */ page_mask = PAGE_SIZE - 1; /* * Check if the number of elements exceeds the max number that can be * put in the main message frame (H/W can only translate an SGL that * is contained entirely in the main message frame). */ sges_in_segment = (sc->reqframesz - offsetof(Mpi25SCSIIORequest_t, SGL)) / sizeof(MPI25_SGE_IO_UNION); if (segs_left > sges_in_segment) build_native_sgl = 1; else { /* * NVMe uses one PRP for each physical page (or part of physical * page). * if 4 pages or less then IEEE is OK * if > 5 pages then we need to build a native SGL * if > 4 and <= 5 pages, then check the physical address of * the first SG entry, then if this first size in the page * is >= the residual beyond 4 pages then use IEEE, * otherwise use native SGL */ if (buff_len > (PAGE_SIZE * 5)) build_native_sgl = 1; else if ((buff_len > (PAGE_SIZE * 4)) && (buff_len <= (PAGE_SIZE * 5)) ) { msg_phys = (uint64_t *)(uintptr_t)segs[0].ds_addr; first_page_offset = ((uint32_t)(uint64_t)(uintptr_t)msg_phys & page_mask); first_page_data_size = PAGE_SIZE - first_page_offset; end_residual = buff_len % PAGE_SIZE; /* * If offset into first page pushes the end of the data * beyond end of the 5th page, we need the extra PRP * list. */ if (first_page_data_size < end_residual) build_native_sgl = 1; /* * Check if first SG entry size is < residual beyond 4 * pages. */ if (htole32(segs[0].ds_len) < (buff_len - (PAGE_SIZE * 4))) build_native_sgl = 1; } } /* check if native SGL is needed */ if (!build_native_sgl) return 1; /* * Native SGL is needed. * Put a chain element in main message frame that points to the first * chain buffer. * * NOTE: The ChainOffset field must be 0 when using a chain pointer to * a native SGL. */ /* Set main message chain element pointer */ main_chain_element = (pMpi25IeeeSgeChain64_t)cm->cm_sge; /* * For NVMe the chain element needs to be the 2nd SGL entry in the main * message. */ main_chain_element = (Mpi25IeeeSgeChain64_t *) ((uint8_t *)main_chain_element + sizeof(MPI25_IEEE_SGE_CHAIN64)); /* * For the PRP entries, use the specially allocated buffer of * contiguous memory. PRP Page allocation failures should not happen * because there should be enough PRP page buffers to account for the * possible NVMe QDepth. */ prp_page_info = mpr_alloc_prp_page(sc); KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be " "used for building a native NVMe SGL.\n", __func__)); curr_buff = (uint32_t *)prp_page_info->prp_page; msg_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr; /* * Insert the allocated PRP page into the command's PRP page list. This * will be freed when the command is freed. */ TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link); /* * Check if we are within 1 entry of a page boundary we don't want our * first entry to be a PRP List entry. */ page_mask_result = (uintptr_t)((uint8_t *)curr_buff + prp_size) & page_mask; if (!page_mask_result) { /* Bump up to next page boundary. */ curr_buff = (uint32_t *)((uint8_t *)curr_buff + prp_size); msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size); } /* Fill in the chain element and make it an NVMe segment type. */ main_chain_element->Address.High = htole32((uint32_t)((uint64_t)(uintptr_t)msg_phys >> 32)); main_chain_element->Address.Low = htole32((uint32_t)(uintptr_t)msg_phys); main_chain_element->NextChainOffset = 0; main_chain_element->Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR | MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP; /* Set SGL pointer to start of contiguous PCIe buffer. */ ptr_sgl = curr_buff; sge_dwords = 2; num_entries = 0; /* * NVMe has a very convoluted PRP format. One PRP is required for each * page or partial page. We need to split up OS SG entries if they are * longer than one page or cross a page boundary. We also have to insert * a PRP list pointer entry as the last entry in each physical page of * the PRP list. * * NOTE: The first PRP "entry" is actually placed in the first SGL entry * in the main message in IEEE 64 format. The 2nd entry in the main * message is the chain element, and the rest of the PRP entries are * built in the contiguous PCIe buffer. */ first_prp_entry = 1; ptr_first_sgl = (uint32_t *)cm->cm_sge; for (i = 0; i < segs_left; i++) { /* Get physical address and length of this SG entry. */ paddr = segs[i].ds_addr; length = segs[i].ds_len; /* * Check whether a given SGE buffer lies on a non-PAGED * boundary if this is not the first page. If so, this is not * expected so have FW build the SGL. */ if ((i != 0) && (((uint32_t)paddr & page_mask) != 0)) { mpr_dprint(sc, MPR_ERROR, "Unaligned SGE while " "building NVMe PRPs, low address is 0x%x\n", (uint32_t)paddr); return 1; } /* Apart from last SGE, if any other SGE boundary is not page * aligned then it means that hole exists. Existence of hole * leads to data corruption. So fallback to IEEE SGEs. */ if (i != (segs_left - 1)) { if (((uint32_t)paddr + length) & page_mask) { mpr_dprint(sc, MPR_ERROR, "Unaligned SGE " "boundary while building NVMe PRPs, low " "address: 0x%x and length: %u\n", (uint32_t)paddr, length); return 1; } } /* Loop while the length is not zero. */ while (length) { /* * Check if we need to put a list pointer here if we are * at page boundary - prp_size. */ page_mask_result = (uintptr_t)((uint8_t *)ptr_sgl + prp_size) & page_mask; if (!page_mask_result) { /* * Need to put a PRP list pointer here. */ msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size); *ptr_sgl = htole32((uintptr_t)msg_phys); *(ptr_sgl+1) = htole32((uint64_t)(uintptr_t) msg_phys >> 32); ptr_sgl += sge_dwords; num_entries++; } /* Need to handle if entry will be part of a page. */ offset = (uint32_t)paddr & page_mask; entry_len = PAGE_SIZE - offset; if (first_prp_entry) { /* * Put IEEE entry in first SGE in main message. * (Simple element, System addr, not end of * list.) */ *ptr_first_sgl = htole32((uint32_t)paddr); *(ptr_first_sgl + 1) = htole32((uint32_t)((uint64_t)paddr >> 32)); *(ptr_first_sgl + 2) = htole32(entry_len); *(ptr_first_sgl + 3) = 0; /* No longer the first PRP entry. */ first_prp_entry = 0; } else { /* Put entry in list. */ *ptr_sgl = htole32((uint32_t)paddr); *(ptr_sgl + 1) = htole32((uint32_t)((uint64_t)paddr >> 32)); /* Bump ptr_sgl, msg_phys, and num_entries. */ ptr_sgl += sge_dwords; msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size); num_entries++; } /* Bump the phys address by the entry_len. */ paddr += entry_len; /* Decrement length accounting for last partial page. */ if (entry_len > length) length = 0; else length -= entry_len; } } /* Set chain element Length. */ main_chain_element->Length = htole32(num_entries * prp_size); /* Return 0, indicating we built a native SGL. */ return 0; } /* * Add a chain element as the next SGE for the specified command. * Reset cm_sge and cm_sgesize to indicate all the available space. Chains are * only required for IEEE commands. Therefore there is no code for commands * that have the MPR_CM_FLAGS_SGE_SIMPLE flag set (and those commands * shouldn't be requesting chains). */ static int mpr_add_chain(struct mpr_command *cm, int segsleft) { struct mpr_softc *sc = cm->cm_sc; MPI2_REQUEST_HEADER *req; MPI25_IEEE_SGE_CHAIN64 *ieee_sgc; struct mpr_chain *chain; int sgc_size, current_segs, rem_segs, segs_per_frame; uint8_t next_chain_offset = 0; /* * Fail if a command is requesting a chain for SIMPLE SGE's. For SAS3 * only IEEE commands should be requesting chains. Return some error * code other than 0. */ if (cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE) { mpr_dprint(sc, MPR_ERROR, "A chain element cannot be added to " "an MPI SGL.\n"); return(ENOBUFS); } sgc_size = sizeof(MPI25_IEEE_SGE_CHAIN64); if (cm->cm_sglsize < sgc_size) panic("MPR: Need SGE Error Code\n"); chain = mpr_alloc_chain(cm->cm_sc); if (chain == NULL) return (ENOBUFS); /* * Note: a double-linked list is used to make it easier to walk for * debugging. */ TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link); /* * Need to know if the number of frames left is more than 1 or not. If * more than 1 frame is required, NextChainOffset will need to be set, * which will just be the last segment of the frame. */ rem_segs = 0; if (cm->cm_sglsize < (sgc_size * segsleft)) { /* * rem_segs is the number of segements remaining after the * segments that will go into the current frame. Since it is * known that at least one more frame is required, account for * the chain element. To know if more than one more frame is * required, just check if there will be a remainder after using * the current frame (with this chain) and the next frame. If * so the NextChainOffset must be the last element of the next * frame. */ current_segs = (cm->cm_sglsize / sgc_size) - 1; rem_segs = segsleft - current_segs; segs_per_frame = sc->chain_frame_size / sgc_size; if (rem_segs > segs_per_frame) { next_chain_offset = segs_per_frame - 1; } } ieee_sgc = &((MPI25_SGE_IO_UNION *)cm->cm_sge)->IeeeChain; ieee_sgc->Length = next_chain_offset ? htole32((uint32_t)sc->chain_frame_size) : htole32((uint32_t)rem_segs * (uint32_t)sgc_size); ieee_sgc->NextChainOffset = next_chain_offset; ieee_sgc->Flags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); ieee_sgc->Address.Low = htole32(chain->chain_busaddr); ieee_sgc->Address.High = htole32(chain->chain_busaddr >> 32); cm->cm_sge = &((MPI25_SGE_IO_UNION *)chain->chain)->IeeeSimple; req = (MPI2_REQUEST_HEADER *)cm->cm_req; req->ChainOffset = (sc->chain_frame_size - sgc_size) >> 4; cm->cm_sglsize = sc->chain_frame_size; return (0); } /* * Add one scatter-gather element to the scatter-gather list for a command. * Maintain cm_sglsize and cm_sge as the remaining size and pointer to the * next SGE to fill in, respectively. In Gen3, the MPI SGL does not have a * chain, so don't consider any chain additions. */ int mpr_push_sge(struct mpr_command *cm, MPI2_SGE_SIMPLE64 *sge, size_t len, int segsleft) { uint32_t saved_buf_len, saved_address_low, saved_address_high; u32 sge_flags; /* * case 1: >=1 more segment, no room for anything (error) * case 2: 1 more segment and enough room for it */ if (cm->cm_sglsize < (segsleft * sizeof(MPI2_SGE_SIMPLE64))) { mpr_dprint(cm->cm_sc, MPR_ERROR, "%s: warning: Not enough room for MPI SGL in frame.\n", __func__); return(ENOBUFS); } KASSERT(segsleft == 1, ("segsleft cannot be more than 1 for an MPI SGL; segsleft = %d\n", segsleft)); /* * There is one more segment left to add for the MPI SGL and there is * enough room in the frame to add it. This is the normal case because * MPI SGL's don't have chains, otherwise something is wrong. * * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = sge->FlagsLength & 0x00FFFFFF; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->FlagsLength = cm->cm_out_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); bcopy(sge, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); } sge->FlagsLength = saved_buf_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); if (cm->cm_flags & MPR_CM_FLAGS_DATAIN) { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) << MPI2_SGE_FLAGS_SHIFT); } else { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) << MPI2_SGE_FLAGS_SHIFT); } sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); bcopy(sge, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (0); } /* * Add one IEEE scatter-gather element (chain or simple) to the IEEE scatter- * gather list for a command. Maintain cm_sglsize and cm_sge as the * remaining size and pointer to the next SGE to fill in, respectively. */ int mpr_push_ieee_sge(struct mpr_command *cm, void *sgep, int segsleft) { MPI2_IEEE_SGE_SIMPLE64 *sge = sgep; int error, ieee_sge_size = sizeof(MPI25_SGE_IO_UNION); uint32_t saved_buf_len, saved_address_low, saved_address_high; uint32_t sge_length; /* * case 1: No room for chain or segment (error). * case 2: Two or more segments left but only room for chain. * case 3: Last segment and room for it, so set flags. */ /* * There should be room for at least one element, or there is a big * problem. */ if (cm->cm_sglsize < ieee_sge_size) panic("MPR: Need SGE Error Code\n"); if ((segsleft >= 2) && (cm->cm_sglsize < (ieee_sge_size * 2))) { if ((error = mpr_add_chain(cm, segsleft)) != 0) return (error); } if (segsleft == 1) { /* * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = sge->Length; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->Length = cm->cm_out_len; sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); cm->cm_sglsize -= ieee_sge_size; /* Endian Safe code */ sge_length = sge->Length; sge->Length = htole32(sge_length); bcopy(sgep, cm->cm_sge, ieee_sge_size); cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + ieee_sge_size); } sge->Length = saved_buf_len; sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR | MPI25_IEEE_SGE_FLAGS_END_OF_LIST); sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; } cm->cm_sglsize -= ieee_sge_size; /* Endian Safe code */ sge_length = sge->Length; sge->Length = htole32(sge_length); bcopy(sgep, cm->cm_sge, ieee_sge_size); cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + ieee_sge_size); return (0); } /* * Add one dma segment to the scatter-gather list for a command. */ int mpr_add_dmaseg(struct mpr_command *cm, vm_paddr_t pa, size_t len, u_int flags, int segsleft) { MPI2_SGE_SIMPLE64 sge; MPI2_IEEE_SGE_SIMPLE64 ieee_sge; if (!(cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE)) { ieee_sge.Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); ieee_sge.Length = len; mpr_from_u64(pa, &ieee_sge.Address); return (mpr_push_ieee_sge(cm, &ieee_sge, segsleft)); } else { /* * This driver always uses 64-bit address elements for * simplicity. */ flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING; /* Set Endian safe macro in mpr_push_sge */ sge.FlagsLength = len | (flags << MPI2_SGE_FLAGS_SHIFT); mpr_from_u64(pa, &sge.Address); return (mpr_push_sge(cm, &sge, sizeof sge, segsleft)); } } static void mpr_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mpr_softc *sc; struct mpr_command *cm; u_int i, dir, sflags; cm = (struct mpr_command *)arg; sc = cm->cm_sc; /* * In this case, just print out a warning and let the chip tell the * user they did the wrong thing. */ if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) { mpr_dprint(sc, MPR_ERROR, "%s: warning: busdma returned %d " "segments, more than the %d allowed\n", __func__, nsegs, cm->cm_max_segs); } /* * Set up DMA direction flags. Bi-directional requests are also handled * here. In that case, both direction flags will be set. */ sflags = 0; if (cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) { /* * We have to add a special case for SMP passthrough, there * is no easy way to generically handle it. The first * S/G element is used for the command (therefore the * direction bit needs to be set). The second one is used * for the reply. We'll leave it to the caller to make * sure we only have two buffers. */ /* * Even though the busdma man page says it doesn't make * sense to have both direction flags, it does in this case. * We have one s/g element being accessed in each direction. */ dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD; /* * Set the direction flag on the first buffer in the SMP * passthrough request. We'll clear it for the second one. */ sflags |= MPI2_SGE_FLAGS_DIRECTION | MPI2_SGE_FLAGS_END_OF_BUFFER; } else if (cm->cm_flags & MPR_CM_FLAGS_DATAOUT) { sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC; dir = BUS_DMASYNC_PREWRITE; } else dir = BUS_DMASYNC_PREREAD; /* Check if a native SG list is needed for an NVMe PCIe device. */ if (cm->cm_targ && cm->cm_targ->is_nvme && mpr_check_pcie_native_sgl(sc, cm, segs, nsegs) == 0) { /* A native SG list was built, skip to end. */ goto out; } for (i = 0; i < nsegs; i++) { if ((cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) && (i != 0)) { sflags &= ~MPI2_SGE_FLAGS_DIRECTION; } error = mpr_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len, sflags, nsegs - i); if (error != 0) { /* Resource shortage, roll back! */ if (ratecheck(&sc->lastfail, &mpr_chainfail_interval)) mpr_dprint(sc, MPR_INFO, "Out of chain frames, " "consider increasing hw.mpr.max_chains.\n"); cm->cm_flags |= MPR_CM_FLAGS_CHAIN_FAILED; mpr_complete_command(sc, cm); return; } } out: bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir); mpr_enqueue_request(sc, cm); return; } static void mpr_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { mpr_data_cb(arg, segs, nsegs, error); } /* * This is the routine to enqueue commands ansynchronously. * Note that the only error path here is from bus_dmamap_load(), which can * return EINPROGRESS if it is waiting for resources. Other than this, it's * assumed that if you have a command in-hand, then you have enough credits * to use it. */ int mpr_map_command(struct mpr_softc *sc, struct mpr_command *cm) { int error = 0; if (cm->cm_flags & MPR_CM_FLAGS_USE_UIO) { error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap, &cm->cm_uio, mpr_data_cb2, cm, 0); } else if (cm->cm_flags & MPR_CM_FLAGS_USE_CCB) { error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, mpr_data_cb, cm, 0); } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) { error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_length, mpr_data_cb, cm, 0); } else { /* Add a zero-length element as needed */ if (cm->cm_sge != NULL) mpr_add_dmaseg(cm, 0, 0, 0, 1); mpr_enqueue_request(sc, cm); } return (error); } /* * This is the routine to enqueue commands synchronously. An error of * EINPROGRESS from mpr_map_command() is ignored since the command will * be executed and enqueued automatically. Other errors come from msleep(). */ int mpr_wait_command(struct mpr_softc *sc, struct mpr_command **cmp, int timeout, int sleep_flag) { int error, rc; struct timeval cur_time, start_time; struct mpr_command *cm = *cmp; if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) return EBUSY; cm->cm_complete = NULL; cm->cm_flags |= (MPR_CM_FLAGS_WAKEUP + MPR_CM_FLAGS_POLLED); error = mpr_map_command(sc, cm); if ((error != 0) && (error != EINPROGRESS)) return (error); // Check for context and wait for 50 mSec at a time until time has // expired or the command has finished. If msleep can't be used, need // to poll. if (curthread->td_no_sleeping) sleep_flag = NO_SLEEP; getmicrouptime(&start_time); if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) { error = msleep(cm, &sc->mpr_mtx, 0, "mprwait", timeout*hz); if (error == EWOULDBLOCK) { /* * Record the actual elapsed time in the case of a * timeout for the message below. */ getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); } } else { while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { mpr_intr_locked(sc); if (sleep_flag == CAN_SLEEP) pause("mprwait", hz/20); else DELAY(50000); getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); if (cur_time.tv_sec > timeout) { error = EWOULDBLOCK; break; } } } if (error == EWOULDBLOCK) { if (cm->cm_timeout_handler == NULL) { mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s, timeout=%d," " elapsed=%jd\n", __func__, timeout, (intmax_t)cur_time.tv_sec); rc = mpr_reinit(sc); mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); } else cm->cm_timeout_handler(sc, cm); if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) { /* * Tell the caller that we freed the command in a * reinit. */ *cmp = NULL; } error = ETIMEDOUT; } return (error); } /* * This is the routine to enqueue a command synchonously and poll for * completion. Its use should be rare. */ int mpr_request_polled(struct mpr_softc *sc, struct mpr_command **cmp) { int error, rc; struct timeval cur_time, start_time; struct mpr_command *cm = *cmp; error = 0; cm->cm_flags |= MPR_CM_FLAGS_POLLED; cm->cm_complete = NULL; mpr_map_command(sc, cm); getmicrouptime(&start_time); while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { mpr_intr_locked(sc); if (mtx_owned(&sc->mpr_mtx)) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprpoll", hz/20); else pause("mprpoll", hz/20); /* * Check for real-time timeout and fail if more than 60 seconds. */ getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); if (cur_time.tv_sec > 60) { mpr_dprint(sc, MPR_FAULT, "polling failed\n"); error = ETIMEDOUT; break; } } cm->cm_state = MPR_CM_STATE_BUSY; if (error) { mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s\n", __func__); rc = mpr_reinit(sc); mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) { /* * Tell the caller that we freed the command in a * reinit. */ *cmp = NULL; } } return (error); } /* * The MPT driver had a verbose interface for config pages. In this driver, * reduce it to much simpler terms, similar to the Linux driver. */ int mpr_read_config_page(struct mpr_softc *sc, struct mpr_config_params *params) { MPI2_CONFIG_REQUEST *req; struct mpr_command *cm; int error; if (sc->mpr_flags & MPR_FLAGS_BUSY) { return (EBUSY); } cm = mpr_alloc_command(sc); if (cm == NULL) { return (EBUSY); } req = (MPI2_CONFIG_REQUEST *)cm->cm_req; req->Function = MPI2_FUNCTION_CONFIG; req->Action = params->action; req->SGLFlags = 0; req->ChainOffset = 0; req->PageAddress = params->page_address; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Ext; req->ExtPageType = hdr->ExtPageType; req->ExtPageLength = hdr->ExtPageLength; req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; req->Header.PageLength = 0; /* Must be set to zero */ req->Header.PageNumber = hdr->PageNumber; req->Header.PageVersion = hdr->PageVersion; } else { MPI2_CONFIG_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Struct; req->Header.PageType = hdr->PageType; req->Header.PageNumber = hdr->PageNumber; req->Header.PageLength = hdr->PageLength; req->Header.PageVersion = hdr->PageVersion; } cm->cm_data = params->buffer; cm->cm_length = params->length; if (cm->cm_data != NULL) { cm->cm_sge = &req->PageBufferSGE; cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION); cm->cm_flags = MPR_CM_FLAGS_SGE_SIMPLE | MPR_CM_FLAGS_DATAIN; } else cm->cm_sge = NULL; cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_complete_data = params; if (params->callback != NULL) { cm->cm_complete = mpr_config_complete; return (mpr_map_command(sc, cm)); } else { error = mpr_wait_command(sc, &cm, 0, CAN_SLEEP); if (error) { mpr_dprint(sc, MPR_FAULT, "Error %d reading config page\n", error); if (cm != NULL) mpr_free_command(sc, cm); return (error); } mpr_config_complete(sc, cm); } return (0); } int mpr_write_config_page(struct mpr_softc *sc, struct mpr_config_params *params) { return (EINVAL); } static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm) { MPI2_CONFIG_REPLY *reply; struct mpr_config_params *params; MPR_FUNCTRACE(sc); params = cm->cm_complete_data; if (cm->cm_data != NULL) { bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap); } /* * XXX KDM need to do more error recovery? This results in the * device in question not getting probed. */ if ((cm->cm_flags & MPR_CM_FLAGS_ERROR_MASK) != 0) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } reply = (MPI2_CONFIG_REPLY *)cm->cm_reply; if (reply == NULL) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } params->status = reply->IOCStatus; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { params->hdr.Ext.ExtPageType = reply->ExtPageType; params->hdr.Ext.ExtPageLength = reply->ExtPageLength; params->hdr.Ext.PageType = reply->Header.PageType; params->hdr.Ext.PageNumber = reply->Header.PageNumber; params->hdr.Ext.PageVersion = reply->Header.PageVersion; } else { params->hdr.Struct.PageType = reply->Header.PageType; params->hdr.Struct.PageNumber = reply->Header.PageNumber; params->hdr.Struct.PageLength = reply->Header.PageLength; params->hdr.Struct.PageVersion = reply->Header.PageVersion; } done: mpr_free_command(sc, cm); if (params->callback != NULL) params->callback(sc, params); return; } diff --git a/sys/dev/mps/mps.c b/sys/dev/mps/mps.c index e4d79b10e358..935b20a1bc5a 100644 --- a/sys/dev/mps/mps.c +++ b/sys/dev/mps/mps.c @@ -1,3235 +1,3235 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Yahoo! Inc. * Copyright (c) 2011-2015 LSI Corp. * Copyright (c) 2013-2015 Avago Technologies * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Avago Technologies (LSI) MPT-Fusion Host Adapter FreeBSD * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* Communications core for Avago Technologies (LSI) MPT2 */ /* TODO Move headers to mpsvar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mps_diag_reset(struct mps_softc *sc, int sleep_flag); static int mps_init_queues(struct mps_softc *sc); static void mps_resize_queues(struct mps_softc *sc); static int mps_message_unit_reset(struct mps_softc *sc, int sleep_flag); static int mps_transition_operational(struct mps_softc *sc); static int mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching); static void mps_iocfacts_free(struct mps_softc *sc); static void mps_startup(void *arg); static int mps_send_iocinit(struct mps_softc *sc); static int mps_alloc_queues(struct mps_softc *sc); static int mps_alloc_hw_queues(struct mps_softc *sc); static int mps_alloc_replies(struct mps_softc *sc); static int mps_alloc_requests(struct mps_softc *sc); static int mps_attach_log(struct mps_softc *sc); static __inline void mps_complete_command(struct mps_softc *sc, struct mps_command *cm); static void mps_dispatch_event(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply); static void mps_config_complete(struct mps_softc *sc, struct mps_command *cm); static void mps_periodic(void *); static int mps_reregister_events(struct mps_softc *sc); static void mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm); static int mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts); static int mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag); static int mps_debug_sysctl(SYSCTL_HANDLER_ARGS); static int mps_dump_reqs(SYSCTL_HANDLER_ARGS); static void mps_parse_debug(struct mps_softc *sc, char *list); SYSCTL_NODE(_hw, OID_AUTO, mps, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "MPS Driver Parameters"); MALLOC_DEFINE(M_MPT2, "mps", "mpt2 driver memory"); MALLOC_DECLARE(M_MPSUSER); /* * Do a "Diagnostic Reset" aka a hard reset. This should get the chip out of * any state and back to its initialization state machine. */ static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d }; /* Added this union to smoothly convert le64toh cm->cm_desc.Words. * Compiler only support unint64_t to be passed as argument. * Otherwise it will throw below error * "aggregate value used where an integer was expected" */ typedef union { u64 word; struct { u32 low; u32 high; } u; } request_descriptor_t; /* Rate limit chain-fail messages to 1 per minute */ static struct timeval mps_chainfail_interval = { 60, 0 }; /* * sleep_flag can be either CAN_SLEEP or NO_SLEEP. * If this function is called from process context, it can sleep * and there is no harm to sleep, in case if this fuction is called * from Interrupt handler, we can not sleep and need NO_SLEEP flag set. * based on sleep flags driver will call either msleep, pause or DELAY. * msleep and pause are of same variant, but pause is used when mps_mtx * is not hold by driver. * */ static int mps_diag_reset(struct mps_softc *sc,int sleep_flag) { uint32_t reg; int i, error, tries = 0; uint8_t first_wait_done = FALSE; mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); /* Clear any pending interrupts */ mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* * Force NO_SLEEP for threads prohibited to sleep * e.a Thread from interrupt handler are prohibited to sleep. */ if (curthread->td_no_sleeping != 0) sleep_flag = NO_SLEEP; mps_dprint(sc, MPS_INIT, "sequence start, sleep_flag= %d\n", sleep_flag); /* Push the magic sequence */ error = ETIMEDOUT; while (tries++ < 20) { for (i = 0; i < sizeof(mpt2_reset_magic); i++) mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, mpt2_reset_magic[i]); /* wait 100 msec */ if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdiag", hz/10); else if (sleep_flag == CAN_SLEEP) pause("mpsdiag", hz/10); else DELAY(100 * 1000); reg = mps_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) { error = 0; break; } } if (error) { mps_dprint(sc, MPS_INIT, "sequence failed, error=%d, exit\n", error); return (error); } /* Send the actual reset. XXX need to refresh the reg? */ reg |= MPI2_DIAG_RESET_ADAPTER; mps_dprint(sc, MPS_INIT, "sequence success, sending reset, reg= 0x%x\n", reg); mps_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET, reg); /* Wait up to 300 seconds in 50ms intervals */ error = ETIMEDOUT; for (i = 0; i < 6000; i++) { /* * Wait 50 msec. If this is the first time through, wait 256 * msec to satisfy Diag Reset timing requirements. */ if (first_wait_done) { if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdiag", hz/20); else if (sleep_flag == CAN_SLEEP) pause("mpsdiag", hz/20); else DELAY(50 * 1000); } else { DELAY(256 * 1000); first_wait_done = TRUE; } /* * Check for the RESET_ADAPTER bit to be cleared first, then * wait for the RESET state to be cleared, which takes a little * longer. */ reg = mps_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_RESET_ADAPTER) { continue; } reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) { error = 0; break; } } if (error) { mps_dprint(sc, MPS_INIT, "reset failed, error= %d, exit\n", error); return (error); } mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0); mps_dprint(sc, MPS_INIT, "diag reset success, exit\n"); return (0); } static int mps_message_unit_reset(struct mps_softc *sc, int sleep_flag) { int error; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); error = 0; mps_regwrite(sc, MPI2_DOORBELL_OFFSET, MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET << MPI2_DOORBELL_FUNCTION_SHIFT); if (mps_wait_db_ack(sc, 5, sleep_flag) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Doorbell handshake failed\n"); error = ETIMEDOUT; } mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } static int mps_transition_ready(struct mps_softc *sc) { uint32_t reg, state; int error, tries = 0; int sleep_flags; MPS_FUNCTRACE(sc); /* If we are in attach call, do not sleep */ sleep_flags = (sc->mps_flags & MPS_FLAGS_ATTACH_DONE) ? CAN_SLEEP:NO_SLEEP; error = 0; mps_dprint(sc, MPS_INIT, "%s entered, sleep_flags= %d\n", __func__, sleep_flags); while (tries++ < 1200) { reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_dprint(sc, MPS_INIT, " Doorbell= 0x%x\n", reg); /* * Ensure the IOC is ready to talk. If it's not, try * resetting it. */ if (reg & MPI2_DOORBELL_USED) { mps_dprint(sc, MPS_INIT, " Not ready, sending diag " "reset\n"); mps_diag_reset(sc, sleep_flags); DELAY(50000); continue; } /* Is the adapter owned by another peer? */ if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) == (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC is under the " "control of another peer host, aborting " "initialization.\n"); error = ENXIO; break; } state = reg & MPI2_IOC_STATE_MASK; if (state == MPI2_IOC_STATE_READY) { /* Ready to go! */ error = 0; break; } else if (state == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC in fault " "state 0x%x, resetting\n", state & MPI2_DOORBELL_FAULT_CODE_MASK); mps_diag_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_OPERATIONAL) { /* Need to take ownership */ mps_message_unit_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_RESET) { /* Wait a bit, IOC might be in transition */ mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC in unexpected reset state\n"); } else { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC in unknown state 0x%x\n", state); error = EINVAL; break; } /* Wait 50ms for things to settle down. */ DELAY(50000); } if (error) mps_dprint(sc, MPS_INIT|MPS_FAULT, "Cannot transition IOC to ready\n"); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } static int mps_transition_operational(struct mps_softc *sc) { uint32_t reg, state; int error; MPS_FUNCTRACE(sc); error = 0; reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_dprint(sc, MPS_INIT, "%s entered, Doorbell= 0x%x\n", __func__, reg); state = reg & MPI2_IOC_STATE_MASK; if (state != MPI2_IOC_STATE_READY) { mps_dprint(sc, MPS_INIT, "IOC not ready\n"); if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "failed to transition ready, exit\n"); return (error); } } error = mps_send_iocinit(sc); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } static void mps_resize_queues(struct mps_softc *sc) { u_int reqcr, prireqcr, maxio, sges_per_frame; /* * Size the queues. Since the reply queues always need one free * entry, we'll deduct one reply message here. The LSI documents * suggest instead to add a count to the request queue, but I think * that it's better to deduct from reply queue. */ prireqcr = MAX(1, sc->max_prireqframes); prireqcr = MIN(prireqcr, sc->facts->HighPriorityCredit); reqcr = MAX(2, sc->max_reqframes); reqcr = MIN(reqcr, sc->facts->RequestCredit); sc->num_reqs = prireqcr + reqcr; sc->num_prireqs = prireqcr; sc->num_replies = MIN(sc->max_replyframes + sc->max_evtframes, sc->facts->MaxReplyDescriptorPostQueueDepth) - 1; /* Store the request frame size in bytes rather than as 32bit words */ sc->reqframesz = sc->facts->IOCRequestFrameSize * 4; /* * Max IO Size is Page Size * the following: * ((SGEs per frame - 1 for chain element) * Max Chain Depth) * + 1 for no chain needed in last frame * * If user suggests a Max IO size to use, use the smaller of the * user's value and the calculated value as long as the user's * value is larger than 0. The user's value is in pages. */ sges_per_frame = sc->reqframesz / sizeof(MPI2_SGE_SIMPLE64) - 1; maxio = (sges_per_frame * sc->facts->MaxChainDepth + 1) * PAGE_SIZE; /* * If I/O size limitation requested, then use it and pass up to CAM. * If not, use maxphys as an optimization hint, but report HW limit. */ if (sc->max_io_pages > 0) { maxio = min(maxio, sc->max_io_pages * PAGE_SIZE); sc->maxio = maxio; } else { sc->maxio = maxio; maxio = min(maxio, maxphys); } sc->num_chains = (maxio / PAGE_SIZE + sges_per_frame - 2) / sges_per_frame * reqcr; if (sc->max_chains > 0 && sc->max_chains < sc->num_chains) sc->num_chains = sc->max_chains; /* * Figure out the number of MSIx-based queues. If the firmware or * user has done something crazy and not allowed enough credit for * the queues to be useful then don't enable multi-queue. */ if (sc->facts->MaxMSIxVectors < 2) sc->msi_msgs = 1; if (sc->msi_msgs > 1) { sc->msi_msgs = MIN(sc->msi_msgs, mp_ncpus); sc->msi_msgs = MIN(sc->msi_msgs, sc->facts->MaxMSIxVectors); if (sc->num_reqs / sc->msi_msgs < 2) sc->msi_msgs = 1; } mps_dprint(sc, MPS_INIT, "Sized queues to q=%d reqs=%d replies=%d\n", sc->msi_msgs, sc->num_reqs, sc->num_replies); } /* * This is called during attach and when re-initializing due to a Diag Reset. * IOC Facts is used to allocate many of the structures needed by the driver. * If called from attach, de-allocation is not required because the driver has * not allocated any structures yet, but if called from a Diag Reset, previously * allocated structures based on IOC Facts will need to be freed and re- * allocated bases on the latest IOC Facts. */ static int mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching) { int error; Mpi2IOCFactsReply_t saved_facts; uint8_t saved_mode, reallocating; mps_dprint(sc, MPS_INIT|MPS_TRACE, "%s entered\n", __func__); /* Save old IOC Facts and then only reallocate if Facts have changed */ if (!attaching) { bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY)); } /* * Get IOC Facts. In all cases throughout this function, panic if doing * a re-initialization and only return the error if attaching so the OS * can handle it. */ if ((error = mps_get_iocfacts(sc, sc->facts)) != 0) { if (attaching) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to get " "IOC Facts with error %d, exit\n", error); return (error); } else { panic("%s failed to get IOC Facts with error %d\n", __func__, error); } } MPS_DPRINT_PAGE(sc, MPS_XINFO, iocfacts, sc->facts); snprintf(sc->fw_version, sizeof(sc->fw_version), "%02d.%02d.%02d.%02d", sc->facts->FWVersion.Struct.Major, sc->facts->FWVersion.Struct.Minor, sc->facts->FWVersion.Struct.Unit, sc->facts->FWVersion.Struct.Dev); snprintf(sc->msg_version, sizeof(sc->msg_version), "%d.%d", (sc->facts->MsgVersion & MPI2_IOCFACTS_MSGVERSION_MAJOR_MASK) >> MPI2_IOCFACTS_MSGVERSION_MAJOR_SHIFT, (sc->facts->MsgVersion & MPI2_IOCFACTS_MSGVERSION_MINOR_MASK) >> MPI2_IOCFACTS_MSGVERSION_MINOR_SHIFT); mps_dprint(sc, MPS_INFO, "Firmware: %s, Driver: %s\n", sc->fw_version, MPS_DRIVER_VERSION); mps_dprint(sc, MPS_INFO, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities, "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf" "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR" "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc"); /* * If the chip doesn't support event replay then a hard reset will be * required to trigger a full discovery. Do the reset here then * retransition to Ready. A hard reset might have already been done, * but it doesn't hurt to do it again. Only do this if attaching, not * for a Diag Reset. */ if (attaching && ((sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0)) { mps_dprint(sc, MPS_INIT, "No event replay, reseting\n"); mps_diag_reset(sc, NO_SLEEP); if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to " "transition to ready with error %d, exit\n", error); return (error); } } /* * Set flag if IR Firmware is loaded. If the RAID Capability has * changed from the previous IOC Facts, log a warning, but only if * checking this after a Diag Reset and not during attach. */ saved_mode = sc->ir_firmware; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) sc->ir_firmware = 1; if (!attaching) { if (sc->ir_firmware != saved_mode) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "new IR/IT mode " "in IOC Facts does not match previous mode\n"); } } /* Only deallocate and reallocate if relevant IOC Facts have changed */ reallocating = FALSE; sc->mps_flags &= ~MPS_FLAGS_REALLOCATED; if ((!attaching) && ((saved_facts.MsgVersion != sc->facts->MsgVersion) || (saved_facts.HeaderVersion != sc->facts->HeaderVersion) || (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) || (saved_facts.RequestCredit != sc->facts->RequestCredit) || (saved_facts.ProductID != sc->facts->ProductID) || (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) || (saved_facts.IOCRequestFrameSize != sc->facts->IOCRequestFrameSize) || (saved_facts.MaxTargets != sc->facts->MaxTargets) || (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) || (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) || (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) || (saved_facts.MaxReplyDescriptorPostQueueDepth != sc->facts->MaxReplyDescriptorPostQueueDepth) || (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) || (saved_facts.MaxVolumes != sc->facts->MaxVolumes) || (saved_facts.MaxPersistentEntries != sc->facts->MaxPersistentEntries))) { reallocating = TRUE; /* Record that we reallocated everything */ sc->mps_flags |= MPS_FLAGS_REALLOCATED; } /* * Some things should be done if attaching or re-allocating after a Diag * Reset, but are not needed after a Diag Reset if the FW has not * changed. */ if (attaching || reallocating) { /* * Check if controller supports FW diag buffers and set flag to * enable each type. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED]. enabled = TRUE; /* * Set flag if EEDP is supported and if TLR is supported. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP) sc->eedp_enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) sc->control_TLR = TRUE; mps_resize_queues(sc); /* * Initialize all Tail Queues */ TAILQ_INIT(&sc->req_list); TAILQ_INIT(&sc->high_priority_req_list); TAILQ_INIT(&sc->chain_list); TAILQ_INIT(&sc->tm_list); } /* * If doing a Diag Reset and the FW is significantly different * (reallocating will be set above in IOC Facts comparison), then all * buffers based on the IOC Facts will need to be freed before they are * reallocated. */ if (reallocating) { mps_iocfacts_free(sc); mpssas_realloc_targets(sc, saved_facts.MaxTargets + saved_facts.MaxVolumes); } /* * Any deallocation has been completed. Now start reallocating * if needed. Will only need to reallocate if attaching or if the new * IOC Facts are different from the previous IOC Facts after a Diag * Reset. Targets have already been allocated above if needed. */ error = 0; while (attaching || reallocating) { if ((error = mps_alloc_hw_queues(sc)) != 0) break; if ((error = mps_alloc_replies(sc)) != 0) break; if ((error = mps_alloc_requests(sc)) != 0) break; if ((error = mps_alloc_queues(sc)) != 0) break; break; } if (error) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to alloc queues with error %d\n", error); mps_free(sc); return (error); } /* Always initialize the queues */ bzero(sc->free_queue, sc->fqdepth * 4); mps_init_queues(sc); /* * Always get the chip out of the reset state, but only panic if not * attaching. If attaching and there is an error, that is handled by * the OS. */ error = mps_transition_operational(sc); if (error != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to " "transition to operational with error %d\n", error); mps_free(sc); return (error); } /* * Finish the queue initialization. * These are set here instead of in mps_init_queues() because the * IOC resets these values during the state transition in * mps_transition_operational(). The free index is set to 1 * because the corresponding index in the IOC is set to 0, and the * IOC treats the queues as full if both are set to the same value. * Hence the reason that the queue can't hold all of the possible * replies. */ sc->replypostindex = 0; mps_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex); mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0); /* * Attach the subsystems so they can prepare their event masks. * XXX Should be dynamic so that IM/IR and user modules can attach */ error = 0; while (attaching) { mps_dprint(sc, MPS_INIT, "Attaching subsystems\n"); if ((error = mps_attach_log(sc)) != 0) break; if ((error = mps_attach_sas(sc)) != 0) break; if ((error = mps_attach_user(sc)) != 0) break; break; } if (error) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to attach all " "subsystems: error %d\n", error); mps_free(sc); return (error); } /* * XXX If the number of MSI-X vectors changes during re-init, this * won't see it and adjust. */ if (attaching && (error = mps_pci_setup_interrupts(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to setup " "interrupts\n"); mps_free(sc); return (error); } /* * Set flag if this is a WD controller. This shouldn't ever change, but * reset it after a Diag Reset, just in case. */ sc->WD_available = FALSE; if (pci_get_device(sc->mps_dev) == MPI2_MFGPAGE_DEVID_SSS6200) sc->WD_available = TRUE; return (error); } /* * This is called if memory is being free (during detach for example) and when * buffers need to be reallocated due to a Diag Reset. */ static void mps_iocfacts_free(struct mps_softc *sc) { struct mps_command *cm; int i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if (sc->free_busaddr != 0) bus_dmamap_unload(sc->queues_dmat, sc->queues_map); if (sc->free_queue != NULL) bus_dmamem_free(sc->queues_dmat, sc->free_queue, sc->queues_map); if (sc->queues_dmat != NULL) bus_dma_tag_destroy(sc->queues_dmat); if (sc->chain_frames != NULL) { bus_dmamap_unload(sc->chain_dmat, sc->chain_map); bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); } if (sc->chain_dmat != NULL) bus_dma_tag_destroy(sc->chain_dmat); if (sc->sense_busaddr != 0) bus_dmamap_unload(sc->sense_dmat, sc->sense_map); if (sc->sense_frames != NULL) bus_dmamem_free(sc->sense_dmat, sc->sense_frames, sc->sense_map); if (sc->sense_dmat != NULL) bus_dma_tag_destroy(sc->sense_dmat); if (sc->reply_busaddr != 0) bus_dmamap_unload(sc->reply_dmat, sc->reply_map); if (sc->reply_frames != NULL) bus_dmamem_free(sc->reply_dmat, sc->reply_frames, sc->reply_map); if (sc->reply_dmat != NULL) bus_dma_tag_destroy(sc->reply_dmat); if (sc->req_busaddr != 0) bus_dmamap_unload(sc->req_dmat, sc->req_map); if (sc->req_frames != NULL) bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map); if (sc->req_dmat != NULL) bus_dma_tag_destroy(sc->req_dmat); if (sc->chains != NULL) free(sc->chains, M_MPT2); if (sc->commands != NULL) { for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap); } free(sc->commands, M_MPT2); } if (sc->buffer_dmat != NULL) bus_dma_tag_destroy(sc->buffer_dmat); mps_pci_free_interrupts(sc); free(sc->queues, M_MPT2); sc->queues = NULL; } /* * The terms diag reset and hard reset are used interchangeably in the MPI * docs to mean resetting the controller chip. In this code diag reset * cleans everything up, and the hard reset function just sends the reset * sequence to the chip. This should probably be refactored so that every * subsystem gets a reset notification of some sort, and can clean up * appropriately. */ int mps_reinit(struct mps_softc *sc) { int error; struct mpssas_softc *sassc; sassc = sc->sassc; MPS_FUNCTRACE(sc); mtx_assert(&sc->mps_mtx, MA_OWNED); mps_dprint(sc, MPS_INIT|MPS_INFO, "Reinitializing controller\n"); if (sc->mps_flags & MPS_FLAGS_DIAGRESET) { mps_dprint(sc, MPS_INIT, "Reset already in progress\n"); return 0; } /* make sure the completion callbacks can recognize they're getting * a NULL cm_reply due to a reset. */ sc->mps_flags |= MPS_FLAGS_DIAGRESET; /* * Mask interrupts here. */ mps_dprint(sc, MPS_INIT, "masking interrupts and resetting\n"); mps_mask_intr(sc); error = mps_diag_reset(sc, CAN_SLEEP); if (error != 0) { /* XXXSL No need to panic here */ panic("%s hard reset failed with error %d\n", __func__, error); } /* Restore the PCI state, including the MSI-X registers */ mps_pci_restore(sc); /* Give the I/O subsystem special priority to get itself prepared */ mpssas_handle_reinit(sc); /* * Get IOC Facts and allocate all structures based on this information. * The attach function will also call mps_iocfacts_allocate at startup. * If relevant values have changed in IOC Facts, this function will free * all of the memory based on IOC Facts and reallocate that memory. */ if ((error = mps_iocfacts_allocate(sc, FALSE)) != 0) { panic("%s IOC Facts based allocation failed with error %d\n", __func__, error); } /* * Mapping structures will be re-allocated after getting IOC Page8, so * free these structures here. */ mps_mapping_exit(sc); /* * The static page function currently read is IOC Page8. Others can be * added in future. It's possible that the values in IOC Page8 have * changed after a Diag Reset due to user modification, so always read * these. Interrupts are masked, so unmask them before getting config * pages. */ mps_unmask_intr(sc); sc->mps_flags &= ~MPS_FLAGS_DIAGRESET; mps_base_static_config_pages(sc); /* * Some mapping info is based in IOC Page8 data, so re-initialize the * mapping tables. */ mps_mapping_initialize(sc); /* * Restart will reload the event masks clobbered by the reset, and * then enable the port. */ mps_reregister_events(sc); /* the end of discovery will release the simq, so we're done. */ mps_dprint(sc, MPS_INIT|MPS_XINFO, "Finished sc %p post %u free %u\n", sc, sc->replypostindex, sc->replyfreeindex); mpssas_release_simq_reinit(sassc); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return 0; } /* Wait for the chip to ACK a word that we've put into its FIFO * Wait for seconds. In single loop wait for busy loop * for 500 microseconds. * Total is [ 0.5 * (2000 * ) ] in miliseconds. * */ static int mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag) { u32 cntdn, count; u32 int_status; u32 doorbell; count = 0; cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout; do { int_status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { mps_dprint(sc, MPS_TRACE, "%s: successful count(%d), timeout(%d)\n", __func__, count, timeout); return 0; } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { doorbell = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_FAULT, "fault_state(0x%04x)!\n", doorbell); return (EFAULT); } } else if (int_status == 0xFFFFFFFF) goto out; /* If it can sleep, sleep for 1 milisecond, else busy loop for * 0.5 milisecond */ if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdba", hz/1000); else if (sleep_flag == CAN_SLEEP) pause("mpsdba", hz/1000); else DELAY(500); count++; } while (--cntdn); out: mps_dprint(sc, MPS_FAULT, "%s: failed due to timeout count(%d), " "int_status(%x)!\n", __func__, count, int_status); return (ETIMEDOUT); } /* Wait for the chip to signal that the next word in its FIFO can be fetched */ static int mps_wait_db_int(struct mps_softc *sc) { int retry; for (retry = 0; retry < MPS_DB_MAX_WAIT; retry++) { if ((mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) & MPI2_HIS_IOC2SYS_DB_STATUS) != 0) return (0); DELAY(2000); } return (ETIMEDOUT); } /* Step through the synchronous command state machine, i.e. "Doorbell mode" */ static int mps_request_sync(struct mps_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, int req_sz, int reply_sz, int timeout) { uint32_t *data32; uint16_t *data16; int i, count, ioc_sz, residual; int sleep_flags = CAN_SLEEP; if (curthread->td_no_sleeping != 0) sleep_flags = NO_SLEEP; /* Step 1 */ mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Step 2 */ if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) return (EBUSY); /* Step 3 * Announce that a message is coming through the doorbell. Messages * are pushed at 32bit words, so round up if needed. */ count = (req_sz + 3) / 4; mps_regwrite(sc, MPI2_DOORBELL_OFFSET, (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) | (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT)); /* Step 4 */ if (mps_wait_db_int(sc) || (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) { mps_dprint(sc, MPS_FAULT, "Doorbell failed to activate\n"); return (ENXIO); } mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) { mps_dprint(sc, MPS_FAULT, "Doorbell handshake failed\n"); return (ENXIO); } /* Step 5 */ /* Clock out the message data synchronously in 32-bit dwords*/ data32 = (uint32_t *)req; for (i = 0; i < count; i++) { mps_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i])); if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout while writing doorbell\n"); return (ENXIO); } } /* Step 6 */ /* Clock in the reply in 16-bit words. The total length of the * message is always in the 4th byte, so clock out the first 2 words * manually, then loop the rest. */ data16 = (uint16_t *)reply; if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 0\n"); return (ENXIO); } data16[0] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 1\n"); return (ENXIO); } data16[1] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Number of 32bit words in the message */ ioc_sz = reply->MsgLength; /* * Figure out how many 16bit words to clock in without overrunning. * The precision loss with dividing reply_sz can safely be * ignored because the messages can only be multiples of 32bits. */ residual = 0; count = MIN((reply_sz / 4), ioc_sz) * 2; if (count < ioc_sz * 2) { residual = ioc_sz * 2 - count; mps_dprint(sc, MPS_ERROR, "Driver error, throwing away %d " "residual message words\n", residual); } for (i = 2; i < count; i++) { if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell %d\n", i); return (ENXIO); } data16[i] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* * Pull out residual words that won't fit into the provided buffer. * This keeps the chip from hanging due to a driver programming * error. */ while (residual--) { if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell\n"); return (ENXIO); } (void)mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* Step 7 */ if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout waiting to exit doorbell\n"); return (ENXIO); } if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) mps_dprint(sc, MPS_FAULT, "Warning, doorbell still active\n"); mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); return (0); } static void mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm) { request_descriptor_t rd; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_TRACE, "SMID %u cm %p ccb %p\n", cm->cm_desc.Default.SMID, cm, cm->cm_ccb); if (sc->mps_flags & MPS_FLAGS_ATTACH_DONE && !(sc->mps_flags & MPS_FLAGS_SHUTDOWN)) mtx_assert(&sc->mps_mtx, MA_OWNED); if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; rd.u.low = cm->cm_desc.Words.Low; rd.u.high = cm->cm_desc.Words.High; rd.word = htole64(rd.word); KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("command not busy\n")); cm->cm_state = MPS_CM_STATE_INQUEUE; /* TODO-We may need to make below regwrite atomic */ mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, rd.u.low); mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET, rd.u.high); } /* * Just the FACTS, ma'am. */ static int mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts) { MPI2_DEFAULT_REPLY *reply; MPI2_IOC_FACTS_REQUEST request; int error, req_sz, reply_sz; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); req_sz = sizeof(MPI2_IOC_FACTS_REQUEST); reply_sz = sizeof(MPI2_IOC_FACTS_REPLY); reply = (MPI2_DEFAULT_REPLY *)facts; bzero(&request, req_sz); request.Function = MPI2_FUNCTION_IOC_FACTS; error = mps_request_sync(sc, &request, reply, req_sz, reply_sz, 5); mps_dprint(sc, MPS_INIT, "%s exit error= %d\n", __func__, error); return (error); } static int mps_send_iocinit(struct mps_softc *sc) { MPI2_IOC_INIT_REQUEST init; MPI2_DEFAULT_REPLY reply; int req_sz, reply_sz, error; struct timeval now; uint64_t time_in_msec; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); /* Do a quick sanity check on proper initialization */ if ((sc->pqdepth == 0) || (sc->fqdepth == 0) || (sc->reqframesz == 0) || (sc->replyframesz == 0)) { mps_dprint(sc, MPS_INIT|MPS_ERROR, "Driver not fully initialized for IOCInit\n"); return (EINVAL); } req_sz = sizeof(MPI2_IOC_INIT_REQUEST); reply_sz = sizeof(MPI2_IOC_INIT_REPLY); bzero(&init, req_sz); bzero(&reply, reply_sz); /* * Fill in the init block. Note that most addresses are * deliberately in the lower 32bits of memory. This is a micro- * optimzation for PCI/PCIX, though it's not clear if it helps PCIe. */ init.Function = MPI2_FUNCTION_IOC_INIT; init.WhoInit = MPI2_WHOINIT_HOST_DRIVER; init.MsgVersion = htole16(MPI2_VERSION); init.HeaderVersion = htole16(MPI2_HEADER_VERSION); init.SystemRequestFrameSize = htole16((uint16_t)(sc->reqframesz / 4)); init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth); init.ReplyFreeQueueDepth = htole16(sc->fqdepth); init.SenseBufferAddressHigh = 0; init.SystemReplyAddressHigh = 0; init.SystemRequestFrameBaseAddress.High = 0; init.SystemRequestFrameBaseAddress.Low = htole32((uint32_t)sc->req_busaddr); init.ReplyDescriptorPostQueueAddress.High = 0; init.ReplyDescriptorPostQueueAddress.Low = htole32((uint32_t)sc->post_busaddr); init.ReplyFreeQueueAddress.High = 0; init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr); getmicrotime(&now); time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000); init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF); init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF); error = mps_request_sync(sc, &init, &reply, req_sz, reply_sz, 5); if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; mps_dprint(sc, MPS_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } void mps_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } void mps_memaddr_wait_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mps_busdma_context *ctx; int need_unload, need_free; ctx = (struct mps_busdma_context *)arg; need_unload = 0; need_free = 0; mps_lock(ctx->softc); ctx->error = error; ctx->completed = 1; if ((error == 0) && (ctx->abandoned == 0)) { *ctx->addr = segs[0].ds_addr; } else { if (nsegs != 0) need_unload = 1; if (ctx->abandoned != 0) need_free = 1; } if (need_free == 0) wakeup(ctx); mps_unlock(ctx->softc); if (need_unload != 0) { bus_dmamap_unload(ctx->buffer_dmat, ctx->buffer_dmamap); *ctx->addr = 0; } if (need_free != 0) free(ctx, M_MPSUSER); } static int mps_alloc_queues(struct mps_softc *sc) { struct mps_queue *q; u_int nq, i; nq = sc->msi_msgs; mps_dprint(sc, MPS_INIT|MPS_XINFO, "Allocating %d I/O queues\n", nq); sc->queues = malloc(sizeof(struct mps_queue) * nq, M_MPT2, M_NOWAIT|M_ZERO); if (sc->queues == NULL) return (ENOMEM); for (i = 0; i < nq; i++) { q = &sc->queues[i]; mps_dprint(sc, MPS_INIT, "Configuring queue %d %p\n", i, q); q->sc = sc; q->qnum = i; } return (0); } static int mps_alloc_hw_queues(struct mps_softc *sc) { bus_dma_template_t t; bus_addr_t queues_busaddr; uint8_t *queues; int qsize, fqsize, pqsize; /* * The reply free queue contains 4 byte entries in multiples of 16 and * aligned on a 16 byte boundary. There must always be an unused entry. * This queue supplies fresh reply frames for the firmware to use. * * The reply descriptor post queue contains 8 byte entries in * multiples of 16 and aligned on a 16 byte boundary. This queue * contains filled-in reply frames sent from the firmware to the host. * * These two queues are allocated together for simplicity. */ sc->fqdepth = roundup2(sc->num_replies + 1, 16); sc->pqdepth = roundup2(sc->num_replies + 1, 16); fqsize= sc->fqdepth * 4; pqsize = sc->pqdepth * 8; qsize = fqsize + pqsize; bus_dma_template_init(&t, sc->mps_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(16), BD_MAXSIZE(qsize), BD_MAXSEGSIZE(qsize), BD_NSEGMENTS(1), BD_LOWADDR(BUS_SPACE_MAXADDR_32BIT)); if (bus_dma_template_tag(&t, &sc->queues_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate queues DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT, &sc->queues_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate queues memory\n"); return (ENOMEM); } bzero(queues, qsize); bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize, mps_memaddr_cb, &queues_busaddr, 0); sc->free_queue = (uint32_t *)queues; sc->free_busaddr = queues_busaddr; sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize); sc->post_busaddr = queues_busaddr + fqsize; mps_dprint(sc, MPS_INIT, "free queue busaddr= %#016jx size= %d\n", (uintmax_t)sc->free_busaddr, fqsize); mps_dprint(sc, MPS_INIT, "reply queue busaddr= %#016jx size= %d\n", (uintmax_t)sc->post_busaddr, pqsize); return (0); } static int mps_alloc_replies(struct mps_softc *sc) { bus_dma_template_t t; int rsize, num_replies; /* Store the reply frame size in bytes rather than as 32bit words */ sc->replyframesz = sc->facts->ReplyFrameSize * 4; /* * sc->num_replies should be one less than sc->fqdepth. We need to * allocate space for sc->fqdepth replies, but only sc->num_replies * replies can be used at once. */ num_replies = max(sc->fqdepth, sc->num_replies); rsize = sc->replyframesz * num_replies; bus_dma_template_init(&t, sc->mps_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(4), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize), BD_NSEGMENTS(1), BD_LOWADDR(BUS_SPACE_MAXADDR_32BIT)); if (bus_dma_template_tag(&t, &sc->reply_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate replies DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames, BUS_DMA_NOWAIT, &sc->reply_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate replies memory\n"); return (ENOMEM); } bzero(sc->reply_frames, rsize); bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize, mps_memaddr_cb, &sc->reply_busaddr, 0); mps_dprint(sc, MPS_INIT, "reply frames busaddr= %#016jx size= %d\n", (uintmax_t)sc->reply_busaddr, rsize); return (0); } static void mps_load_chains_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mps_softc *sc = arg; struct mps_chain *chain; bus_size_t bo; int i, o, s; if (error != 0) return; for (i = 0, o = 0, s = 0; s < nsegs; s++) { for (bo = 0; bo + sc->reqframesz <= segs[s].ds_len; bo += sc->reqframesz) { chain = &sc->chains[i++]; chain->chain =(MPI2_SGE_IO_UNION *)(sc->chain_frames+o); chain->chain_busaddr = segs[s].ds_addr + bo; o += sc->reqframesz; mps_free_chain(sc, chain); } if (bo != segs[s].ds_len) o += segs[s].ds_len - bo; } sc->chain_free_lowwater = i; } static int mps_alloc_requests(struct mps_softc *sc) { bus_dma_template_t t; struct mps_command *cm; int i, rsize, nsegs; rsize = sc->reqframesz * sc->num_reqs; bus_dma_template_init(&t, sc->mps_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(16), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize), BD_NSEGMENTS(1), BD_LOWADDR(BUS_SPACE_MAXADDR_32BIT)); if (bus_dma_template_tag(&t, &sc->req_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate request DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames, BUS_DMA_NOWAIT, &sc->req_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate request memory\n"); return (ENOMEM); } bzero(sc->req_frames, rsize); bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize, mps_memaddr_cb, &sc->req_busaddr, 0); mps_dprint(sc, MPS_INIT, "request frames busaddr= %#016jx size= %d\n", (uintmax_t)sc->req_busaddr, rsize); sc->chains = malloc(sizeof(struct mps_chain) * sc->num_chains, M_MPT2, M_NOWAIT | M_ZERO); if (!sc->chains) { mps_dprint(sc, MPS_ERROR, "Cannot allocate chain memory\n"); return (ENOMEM); } rsize = sc->reqframesz * sc->num_chains; bus_dma_template_clone(&t, sc->req_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize), BD_NSEGMENTS(howmany(rsize, PAGE_SIZE))); if (bus_dma_template_tag(&t, &sc->chain_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate chain DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->chain_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate chain memory\n"); return (ENOMEM); } if (bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize, mps_load_chains_cb, sc, BUS_DMA_NOWAIT)) { mps_dprint(sc, MPS_ERROR, "Cannot load chain memory\n"); bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); return (ENOMEM); } rsize = MPS_SENSE_LEN * sc->num_reqs; bus_dma_template_clone(&t, sc->req_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_ALIGNMENT(1), BD_MAXSIZE(rsize), BD_MAXSEGSIZE(rsize)); if (bus_dma_template_tag(&t, &sc->sense_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames, BUS_DMA_NOWAIT, &sc->sense_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate sense memory\n"); return (ENOMEM); } bzero(sc->sense_frames, rsize); bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize, mps_memaddr_cb, &sc->sense_busaddr, 0); mps_dprint(sc, MPS_INIT, "sense frames busaddr= %#016jx size= %d\n", (uintmax_t)sc->sense_busaddr, rsize); nsegs = (sc->maxio / PAGE_SIZE) + 1; bus_dma_template_init(&t, sc->mps_parent_dmat); BUS_DMA_TEMPLATE_FILL(&t, BD_MAXSIZE(BUS_SPACE_MAXSIZE_32BIT), BD_NSEGMENTS(nsegs), BD_MAXSEGSIZE(BUS_SPACE_MAXSIZE_24BIT), BD_FLAGS(BUS_DMA_ALLOCNOW), BD_LOCKFUNC(busdma_lock_mutex), BD_LOCKFUNCARG(&sc->mps_mtx)); if (bus_dma_template_tag(&t, &sc->buffer_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * SMID 0 cannot be used as a free command per the firmware spec. * Just drop that command instead of risking accounting bugs. */ sc->commands = malloc(sizeof(struct mps_command) * sc->num_reqs, M_MPT2, M_WAITOK | M_ZERO); for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; cm->cm_req = sc->req_frames + i * sc->reqframesz; cm->cm_req_busaddr = sc->req_busaddr + i * sc->reqframesz; cm->cm_sense = &sc->sense_frames[i]; cm->cm_sense_busaddr = sc->sense_busaddr + i * MPS_SENSE_LEN; cm->cm_desc.Default.SMID = i; cm->cm_sc = sc; cm->cm_state = MPS_CM_STATE_BUSY; TAILQ_INIT(&cm->cm_chain_list); callout_init_mtx(&cm->cm_callout, &sc->mps_mtx, 0); /* XXX Is a failure here a critical problem? */ if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) == 0) if (i <= sc->num_prireqs) mps_free_high_priority_command(sc, cm); else mps_free_command(sc, cm); else { panic("failed to allocate command %d\n", i); sc->num_reqs = i; break; } } return (0); } static int mps_init_queues(struct mps_softc *sc) { int i; memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8); /* * According to the spec, we need to use one less reply than we * have space for on the queue. So sc->num_replies (the number we * use) should be less than sc->fqdepth (allocated size). */ if (sc->num_replies >= sc->fqdepth) return (EINVAL); /* * Initialize all of the free queue entries. */ for (i = 0; i < sc->fqdepth; i++) sc->free_queue[i] = sc->reply_busaddr + (i * sc->replyframesz); sc->replyfreeindex = sc->num_replies; return (0); } /* Get the driver parameter tunables. Lowest priority are the driver defaults. * Next are the global settings, if they exist. Highest are the per-unit * settings, if they exist. */ void mps_get_tunables(struct mps_softc *sc) { char tmpstr[80], mps_debug[80]; /* XXX default to some debugging for now */ sc->mps_debug = MPS_INFO|MPS_FAULT; sc->disable_msix = 0; sc->disable_msi = 0; sc->max_msix = MPS_MSIX_MAX; sc->max_chains = MPS_CHAIN_FRAMES; sc->max_io_pages = MPS_MAXIO_PAGES; sc->enable_ssu = MPS_SSU_ENABLE_SSD_DISABLE_HDD; sc->spinup_wait_time = DEFAULT_SPINUP_WAIT; sc->use_phynum = 1; sc->max_reqframes = MPS_REQ_FRAMES; sc->max_prireqframes = MPS_PRI_REQ_FRAMES; sc->max_replyframes = MPS_REPLY_FRAMES; sc->max_evtframes = MPS_EVT_REPLY_FRAMES; /* * Grab the global variables. */ bzero(mps_debug, 80); if (TUNABLE_STR_FETCH("hw.mps.debug_level", mps_debug, 80) != 0) mps_parse_debug(sc, mps_debug); TUNABLE_INT_FETCH("hw.mps.disable_msix", &sc->disable_msix); TUNABLE_INT_FETCH("hw.mps.disable_msi", &sc->disable_msi); TUNABLE_INT_FETCH("hw.mps.max_msix", &sc->max_msix); TUNABLE_INT_FETCH("hw.mps.max_chains", &sc->max_chains); TUNABLE_INT_FETCH("hw.mps.max_io_pages", &sc->max_io_pages); TUNABLE_INT_FETCH("hw.mps.enable_ssu", &sc->enable_ssu); TUNABLE_INT_FETCH("hw.mps.spinup_wait_time", &sc->spinup_wait_time); TUNABLE_INT_FETCH("hw.mps.use_phy_num", &sc->use_phynum); TUNABLE_INT_FETCH("hw.mps.max_reqframes", &sc->max_reqframes); TUNABLE_INT_FETCH("hw.mps.max_prireqframes", &sc->max_prireqframes); TUNABLE_INT_FETCH("hw.mps.max_replyframes", &sc->max_replyframes); TUNABLE_INT_FETCH("hw.mps.max_evtframes", &sc->max_evtframes); /* Grab the unit-instance variables */ snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.debug_level", device_get_unit(sc->mps_dev)); bzero(mps_debug, 80); if (TUNABLE_STR_FETCH(tmpstr, mps_debug, 80) != 0) mps_parse_debug(sc, mps_debug); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msix", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msi", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_msix", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_chains", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_chains); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_io_pages", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_io_pages); bzero(sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.exclude_ids", device_get_unit(sc->mps_dev)); TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.enable_ssu", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->enable_ssu); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.spinup_wait_time", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->spinup_wait_time); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.use_phy_num", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_reqframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_reqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_prireqframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_prireqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_replyframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_replyframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_evtframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_evtframes); } static void mps_setup_sysctl(struct mps_softc *sc) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; char tmpstr[80], tmpstr2[80]; /* * Setup the sysctl variable so the user can change the debug level * on the fly. */ snprintf(tmpstr, sizeof(tmpstr), "MPS controller %d", device_get_unit(sc->mps_dev)); snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mps_dev)); sysctl_ctx = device_get_sysctl_ctx(sc->mps_dev); if (sysctl_ctx != NULL) sysctl_tree = device_get_sysctl_tree(sc->mps_dev); if (sysctl_tree == NULL) { sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_mps), OID_AUTO, tmpstr2, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr); if (sc->sysctl_tree == NULL) return; sysctl_ctx = &sc->sysctl_ctx; sysctl_tree = sc->sysctl_tree; } SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "debug_level", CTLTYPE_STRING | CTLFLAG_RW |CTLFLAG_MPSAFE, sc, 0, mps_debug_sysctl, "A", "mps debug level"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0, "Disable the use of MSI-X interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msi", CTLFLAG_RD, &sc->disable_msi, 0, "Disable the use of MSI interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_msix", CTLFLAG_RD, &sc->max_msix, 0, "User-defined maximum number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "msix_msgs", CTLFLAG_RD, &sc->msi_msgs, 0, "Negotiated number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_reqframes", CTLFLAG_RD, &sc->max_reqframes, 0, "Total number of allocated request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_prireqframes", CTLFLAG_RD, &sc->max_prireqframes, 0, "Total number of allocated high priority request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_replyframes", CTLFLAG_RD, &sc->max_replyframes, 0, "Total number of allocated reply frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_evtframes", CTLFLAG_RD, &sc->max_evtframes, 0, "Total number of event frames allocated"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, strlen(sc->fw_version), "firmware version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "driver_version", CTLFLAG_RD, MPS_DRIVER_VERSION, strlen(MPS_DRIVER_VERSION), "driver version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "msg_version", CTLFLAG_RD, sc->msg_version, strlen(sc->msg_version), "message interface version"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_active", CTLFLAG_RD, &sc->io_cmds_active, 0, "number of currently active commands"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, &sc->io_cmds_highwater, 0, "maximum active commands seen"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free", CTLFLAG_RD, &sc->chain_free, 0, "number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free_lowwater", CTLFLAG_RD, &sc->chain_free_lowwater, 0,"lowest number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_chains", CTLFLAG_RD, &sc->max_chains, 0,"maximum chain frames that will be allocated"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_io_pages", CTLFLAG_RD, &sc->max_io_pages, 0,"maximum pages to allow per I/O (if <1 use " "IOCFacts)"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "enable_ssu", CTLFLAG_RW, &sc->enable_ssu, 0, "enable SSU to SATA SSD/HDD at shutdown"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_alloc_fail", CTLFLAG_RD, &sc->chain_alloc_fail, "chain allocation failures"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "spinup_wait_time", CTLFLAG_RD, &sc->spinup_wait_time, DEFAULT_SPINUP_WAIT, "seconds to wait for " "spinup after SATA ID error"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "mapping_table_dump", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, mps_mapping_dump, "A", "Mapping Table Dump"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "encl_table_dump", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, mps_mapping_encl_dump, "A", "Enclosure Table Dump"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "dump_reqs", - CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, sc, 0, mps_dump_reqs, "I", "Dump Active Requests"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0, "Use the phy number for enumeration"); } static struct mps_debug_string { char *name; int flag; } mps_debug_strings[] = { {"info", MPS_INFO}, {"fault", MPS_FAULT}, {"event", MPS_EVENT}, {"log", MPS_LOG}, {"recovery", MPS_RECOVERY}, {"error", MPS_ERROR}, {"init", MPS_INIT}, {"xinfo", MPS_XINFO}, {"user", MPS_USER}, {"mapping", MPS_MAPPING}, {"trace", MPS_TRACE} }; enum mps_debug_level_combiner { COMB_NONE, COMB_ADD, COMB_SUB }; static int mps_debug_sysctl(SYSCTL_HANDLER_ARGS) { struct mps_softc *sc; struct mps_debug_string *string; struct sbuf *sbuf; char *buffer; size_t sz; int i, len, debug, error; sc = (struct mps_softc *)arg1; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); debug = sc->mps_debug; sbuf_printf(sbuf, "%#x", debug); sz = sizeof(mps_debug_strings) / sizeof(mps_debug_strings[0]); for (i = 0; i < sz; i++) { string = &mps_debug_strings[i]; if (debug & string->flag) sbuf_printf(sbuf, ",%s", string->name); } error = sbuf_finish(sbuf); sbuf_delete(sbuf); if (error || req->newptr == NULL) return (error); len = req->newlen - req->newidx; if (len == 0) return (0); buffer = malloc(len, M_MPT2, M_ZERO|M_WAITOK); error = SYSCTL_IN(req, buffer, len); mps_parse_debug(sc, buffer); free(buffer, M_MPT2); return (error); } static void mps_parse_debug(struct mps_softc *sc, char *list) { struct mps_debug_string *string; enum mps_debug_level_combiner op; char *token, *endtoken; size_t sz; int flags, i; if (list == NULL || *list == '\0') return; if (*list == '+') { op = COMB_ADD; list++; } else if (*list == '-') { op = COMB_SUB; list++; } else op = COMB_NONE; if (*list == '\0') return; flags = 0; sz = sizeof(mps_debug_strings) / sizeof(mps_debug_strings[0]); while ((token = strsep(&list, ":,")) != NULL) { /* Handle integer flags */ flags |= strtol(token, &endtoken, 0); if (token != endtoken) continue; /* Handle text flags */ for (i = 0; i < sz; i++) { string = &mps_debug_strings[i]; if (strcasecmp(token, string->name) == 0) { flags |= string->flag; break; } } } switch (op) { case COMB_NONE: sc->mps_debug = flags; break; case COMB_ADD: sc->mps_debug |= flags; break; case COMB_SUB: sc->mps_debug &= (~flags); break; } return; } struct mps_dumpreq_hdr { uint32_t smid; uint32_t state; uint32_t numframes; uint32_t deschi; uint32_t desclo; }; static int mps_dump_reqs(SYSCTL_HANDLER_ARGS) { struct mps_softc *sc; struct mps_chain *chain, *chain1; struct mps_command *cm; struct mps_dumpreq_hdr hdr; struct sbuf *sb; uint32_t smid, state; int i, numreqs, error = 0; sc = (struct mps_softc *)arg1; if ((error = priv_check(curthread, PRIV_DRIVER)) != 0) { printf("priv check error %d\n", error); return (error); } state = MPS_CM_STATE_INQUEUE; smid = 1; numreqs = sc->num_reqs; if (req->newptr != NULL) return (EINVAL); if (smid == 0 || smid > sc->num_reqs) return (EINVAL); if (numreqs <= 0 || (numreqs + smid > sc->num_reqs)) numreqs = sc->num_reqs; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); /* Best effort, no locking */ for (i = smid; i < numreqs; i++) { cm = &sc->commands[i]; if (cm->cm_state != state) continue; hdr.smid = i; hdr.state = cm->cm_state; hdr.numframes = 1; hdr.deschi = cm->cm_desc.Words.High; hdr.desclo = cm->cm_desc.Words.Low; TAILQ_FOREACH_SAFE(chain, &cm->cm_chain_list, chain_link, chain1) hdr.numframes++; sbuf_bcat(sb, &hdr, sizeof(hdr)); sbuf_bcat(sb, cm->cm_req, 128); TAILQ_FOREACH_SAFE(chain, &cm->cm_chain_list, chain_link, chain1) sbuf_bcat(sb, chain->chain, 128); } error = sbuf_finish(sb); sbuf_delete(sb); return (error); } int mps_attach(struct mps_softc *sc) { int error; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); mtx_init(&sc->mps_mtx, "MPT2SAS lock", NULL, MTX_DEF); callout_init_mtx(&sc->periodic, &sc->mps_mtx, 0); callout_init_mtx(&sc->device_check_callout, &sc->mps_mtx, 0); TAILQ_INIT(&sc->event_list); timevalclear(&sc->lastfail); if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "failed to transition " "ready\n"); return (error); } sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPT2, M_ZERO|M_NOWAIT); if(!sc->facts) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Cannot allocate memory, " "exit\n"); return (ENOMEM); } /* * Get IOC Facts and allocate all structures based on this information. * A Diag Reset will also call mps_iocfacts_allocate and re-read the IOC * Facts. If relevant values have changed in IOC Facts, this function * will free all of the memory based on IOC Facts and reallocate that * memory. If this fails, any allocated memory should already be freed. */ if ((error = mps_iocfacts_allocate(sc, TRUE)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC Facts based allocation " "failed with error %d, exit\n", error); return (error); } /* Start the periodic watchdog check on the IOC Doorbell */ mps_periodic(sc); /* * The portenable will kick off discovery events that will drive the * rest of the initialization process. The CAM/SAS module will * hold up the boot sequence until discovery is complete. */ sc->mps_ich.ich_func = mps_startup; sc->mps_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mps_ich) != 0) { mps_dprint(sc, MPS_INIT|MPS_ERROR, "Cannot establish MPS config hook\n"); error = EINVAL; } /* * Allow IR to shutdown gracefully when shutdown occurs. */ sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final, mpssas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT); if (sc->shutdown_eh == NULL) mps_dprint(sc, MPS_INIT|MPS_ERROR, "shutdown event registration failed\n"); mps_setup_sysctl(sc); sc->mps_flags |= MPS_FLAGS_ATTACH_DONE; mps_dprint(sc, MPS_INIT, "%s exit error= %d\n", __func__, error); return (error); } /* Run through any late-start handlers. */ static void mps_startup(void *arg) { struct mps_softc *sc; sc = (struct mps_softc *)arg; mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); mps_lock(sc); mps_unmask_intr(sc); /* initialize device mapping tables */ mps_base_static_config_pages(sc); mps_mapping_initialize(sc); mpssas_startup(sc); mps_unlock(sc); mps_dprint(sc, MPS_INIT, "disestablish config intrhook\n"); config_intrhook_disestablish(&sc->mps_ich); sc->mps_ich.ich_arg = NULL; mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); } /* Periodic watchdog. Is called with the driver lock already held. */ static void mps_periodic(void *arg) { struct mps_softc *sc; uint32_t db; sc = (struct mps_softc *)arg; if (sc->mps_flags & MPS_FLAGS_SHUTDOWN) return; db = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_FAULT, "IOC Fault 0x%08x, Resetting\n", db); mps_reinit(sc); } callout_reset(&sc->periodic, MPS_PERIODIC_DELAY * hz, mps_periodic, sc); } static void mps_log_evt_handler(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *event) { MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry; MPS_DPRINT_EVENT(sc, generic, event); switch (event->Event) { case MPI2_EVENT_LOG_DATA: mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_DATA:\n"); if (sc->mps_debug & MPS_EVENT) hexdump(event->EventData, event->EventDataLength, NULL, 0); break; case MPI2_EVENT_LOG_ENTRY_ADDED: entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData; mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event " "0x%x Sequence %d:\n", entry->LogEntryQualifier, entry->LogSequence); break; default: break; } return; } static int mps_attach_log(struct mps_softc *sc) { u32 events[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS]; bzero(events, 16); setbit(events, MPI2_EVENT_LOG_DATA); setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED); mps_register_events(sc, events, mps_log_evt_handler, NULL, &sc->mps_log_eh); return (0); } static int mps_detach_log(struct mps_softc *sc) { if (sc->mps_log_eh != NULL) mps_deregister_events(sc, sc->mps_log_eh); return (0); } /* * Free all of the driver resources and detach submodules. Should be called * without the lock held. */ int mps_free(struct mps_softc *sc) { int error; mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); /* Turn off the watchdog */ mps_lock(sc); sc->mps_flags |= MPS_FLAGS_SHUTDOWN; mps_unlock(sc); /* Lock must not be held for this */ callout_drain(&sc->periodic); callout_drain(&sc->device_check_callout); if (((error = mps_detach_log(sc)) != 0) || ((error = mps_detach_sas(sc)) != 0)) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "failed to detach " "subsystems, exit\n"); return (error); } mps_detach_user(sc); /* Put the IOC back in the READY state. */ mps_lock(sc); if ((error = mps_transition_ready(sc)) != 0) { mps_unlock(sc); return (error); } mps_unlock(sc); if (sc->facts != NULL) free(sc->facts, M_MPT2); /* * Free all buffers that are based on IOC Facts. A Diag Reset may need * to free these buffers too. */ mps_iocfacts_free(sc); if (sc->sysctl_tree != NULL) sysctl_ctx_free(&sc->sysctl_ctx); /* Deregister the shutdown function */ if (sc->shutdown_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh); mtx_destroy(&sc->mps_mtx); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (0); } static __inline void mps_complete_command(struct mps_softc *sc, struct mps_command *cm) { MPS_FUNCTRACE(sc); if (cm == NULL) { mps_dprint(sc, MPS_ERROR, "Completing NULL command\n"); return; } if (cm->cm_flags & MPS_CM_FLAGS_POLLED) cm->cm_flags |= MPS_CM_FLAGS_COMPLETE; if (cm->cm_complete != NULL) { mps_dprint(sc, MPS_TRACE, "%s cm %p calling cm_complete %p data %p reply %p\n", __func__, cm, cm->cm_complete, cm->cm_complete_data, cm->cm_reply); cm->cm_complete(sc, cm); } if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) { mps_dprint(sc, MPS_TRACE, "waking up %p\n", cm); wakeup(cm); } if (cm->cm_sc->io_cmds_active != 0) { cm->cm_sc->io_cmds_active--; } else { mps_dprint(sc, MPS_ERROR, "Warning: io_cmds_active is " "out of sync - resynching to 0\n"); } } static void mps_sas_log_info(struct mps_softc *sc , u32 log_info) { union loginfo_type { u32 loginfo; struct { u32 subcode:16; u32 code:8; u32 originator:4; u32 bus_type:4; } dw; }; union loginfo_type sas_loginfo; char *originator_str = NULL; sas_loginfo.loginfo = log_info; if (sas_loginfo.dw.bus_type != 3 /*SAS*/) return; /* each nexus loss loginfo */ if (log_info == 0x31170000) return; /* eat the loginfos associated with task aborts */ if ((log_info == 30050000 || log_info == 0x31140000 || log_info == 0x31130000)) return; switch (sas_loginfo.dw.originator) { case 0: originator_str = "IOP"; break; case 1: originator_str = "PL"; break; case 2: originator_str = "IR"; break; } mps_dprint(sc, MPS_LOG, "log_info(0x%08x): originator(%s), " "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str, sas_loginfo.dw.code, sas_loginfo.dw.subcode); } static void mps_display_reply_info(struct mps_softc *sc, uint8_t *reply) { MPI2DefaultReply_t *mpi_reply; u16 sc_status; mpi_reply = (MPI2DefaultReply_t*)reply; sc_status = le16toh(mpi_reply->IOCStatus); if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) mps_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo)); } void mps_intr(void *data) { struct mps_softc *sc; uint32_t status; sc = (struct mps_softc *)data; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* * Check interrupt status register to flush the bus. This is * needed for both INTx interrupts and driver-driven polling */ status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0) return; mps_lock(sc); mps_intr_locked(data); mps_unlock(sc); return; } /* * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the * chip. Hopefully this theory is correct. */ void mps_intr_msi(void *data) { struct mps_softc *sc; sc = (struct mps_softc *)data; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); mps_lock(sc); mps_intr_locked(data); mps_unlock(sc); return; } /* * The locking is overly broad and simplistic, but easy to deal with for now. */ void mps_intr_locked(void *data) { MPI2_REPLY_DESCRIPTORS_UNION *desc; MPI2_DIAG_RELEASE_REPLY *rel_rep; mps_fw_diagnostic_buffer_t *pBuffer; struct mps_softc *sc; struct mps_command *cm = NULL; uint64_t tdesc; uint8_t flags; u_int pq; sc = (struct mps_softc *)data; pq = sc->replypostindex; mps_dprint(sc, MPS_TRACE, "%s sc %p starting with replypostindex %u\n", __func__, sc, sc->replypostindex); for ( ;; ) { cm = NULL; desc = &sc->post_queue[sc->replypostindex]; /* * Copy and clear out the descriptor so that any reentry will * immediately know that this descriptor has already been * looked at. There is unfortunate casting magic because the * MPI API doesn't have a cardinal 64bit type. */ tdesc = 0xffffffffffffffff; tdesc = atomic_swap_64((uint64_t *)desc, tdesc); desc = (MPI2_REPLY_DESCRIPTORS_UNION *)&tdesc; flags = desc->Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) || (le32toh(desc->Words.High) == 0xffffffff)) break; /* increment the replypostindex now, so that event handlers * and cm completion handlers which decide to do a diag * reset can zero it without it getting incremented again * afterwards, and we break out of this loop on the next * iteration since the reply post queue has been cleared to * 0xFF and all descriptors look unused (which they are). */ if (++sc->replypostindex >= sc->pqdepth) sc->replypostindex = 0; switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE, ("command not inqueue\n")); cm->cm_state = MPS_CM_STATE_BUSY; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: { uint32_t baddr; uint8_t *reply; /* * Re-compose the reply address from the address * sent back from the chip. The ReplyFrameAddress * is the lower 32 bits of the physical address of * particular reply frame. Convert that address to * host format, and then use that to provide the * offset against the virtual address base * (sc->reply_frames). */ baddr = le32toh(desc->AddressReply.ReplyFrameAddress); reply = sc->reply_frames + (baddr - ((uint32_t)sc->reply_busaddr)); /* * Make sure the reply we got back is in a valid * range. If not, go ahead and panic here, since * we'll probably panic as soon as we deference the * reply pointer anyway. */ if ((reply < sc->reply_frames) || (reply > (sc->reply_frames + (sc->fqdepth * sc->replyframesz)))) { printf("%s: WARNING: reply %p out of range!\n", __func__, reply); printf("%s: reply_frames %p, fqdepth %d, " "frame size %d\n", __func__, sc->reply_frames, sc->fqdepth, sc->replyframesz); printf("%s: baddr %#x,\n", __func__, baddr); /* LSI-TODO. See Linux Code for Graceful exit */ panic("Reply address out of range"); } if (le16toh(desc->AddressReply.SMID) == 0) { if (((MPI2_DEFAULT_REPLY *)reply)->Function == MPI2_FUNCTION_DIAG_BUFFER_POST) { /* * If SMID is 0 for Diag Buffer Post, * this implies that the reply is due to * a release function with a status that * the buffer has been released. Set * the buffer flags accordingly. */ rel_rep = (MPI2_DIAG_RELEASE_REPLY *)reply; if ((le16toh(rel_rep->IOCStatus) & MPI2_IOCSTATUS_MASK) == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) { pBuffer = &sc->fw_diag_buffer_list[ rel_rep->BufferType]; pBuffer->valid_data = TRUE; pBuffer->owned_by_firmware = FALSE; pBuffer->immediate = FALSE; } } else mps_dispatch_event(sc, baddr, (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { /* * Ignore commands not in INQUEUE state * since they've already been completed * via another path. */ cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; if (cm->cm_state == MPS_CM_STATE_INQUEUE) { cm->cm_state = MPS_CM_STATE_BUSY; cm->cm_reply = reply; cm->cm_reply_data = le32toh( desc->AddressReply.ReplyFrameAddress); } else { mps_dprint(sc, MPS_RECOVERY, "Bad state for ADDRESS_REPLY status," " ignoring state %d cm %p\n", cm->cm_state, cm); } } break; } case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS: case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER: case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS: default: /* Unhandled */ mps_dprint(sc, MPS_ERROR, "Unhandled reply 0x%x\n", desc->Default.ReplyFlags); cm = NULL; break; } if (cm != NULL) { // Print Error reply frame if (cm->cm_reply) mps_display_reply_info(sc,cm->cm_reply); mps_complete_command(sc, cm); } } if (pq != sc->replypostindex) { mps_dprint(sc, MPS_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); } return; } static void mps_dispatch_event(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply) { struct mps_event_handle *eh; int event, handled = 0; event = le16toh(reply->Event); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { if (isset(eh->mask, event)) { eh->callback(sc, data, reply); handled++; } } if (handled == 0) mps_dprint(sc, MPS_EVENT, "Unhandled event 0x%x\n", le16toh(event)); /* * This is the only place that the event/reply should be freed. * Anything wanting to hold onto the event data should have * already copied it into their own storage. */ mps_free_reply(sc, data); } static void mps_reregister_events_complete(struct mps_softc *sc, struct mps_command *cm) { mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if (cm->cm_reply) MPS_DPRINT_EVENT(sc, generic, (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply); mps_free_command(sc, cm); /* next, send a port enable */ mpssas_startup(sc); } /* * For both register_events and update_events, the caller supplies a bitmap * of events that it _wants_. These functions then turn that into a bitmask * suitable for the controller. */ int mps_register_events(struct mps_softc *sc, u32 *mask, mps_evt_callback_t *cb, void *data, struct mps_event_handle **handle) { struct mps_event_handle *eh; int error = 0; eh = malloc(sizeof(struct mps_event_handle), M_MPT2, M_WAITOK|M_ZERO); eh->callback = cb; eh->data = data; TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list); if (mask != NULL) error = mps_update_events(sc, eh, mask); *handle = eh; return (error); } int mps_update_events(struct mps_softc *sc, struct mps_event_handle *handle, u32 *mask) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; MPI2_EVENT_NOTIFICATION_REPLY *reply = NULL; struct mps_command *cm; int error, i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if ((mask != NULL) && (handle != NULL)) bcopy(mask, &handle->mask[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] = -1; for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] &= ~handle->mask[i]; if ((cm = mps_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPS_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; error = mps_wait_command(sc, &cm, 60, 0); if (cm != NULL) reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply; if ((reply == NULL) || (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; if (reply) MPS_DPRINT_EVENT(sc, generic, reply); mps_dprint(sc, MPS_TRACE, "%s finished error %d\n", __func__, error); if (cm != NULL) mps_free_command(sc, cm); return (error); } static int mps_reregister_events(struct mps_softc *sc) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; struct mps_command *cm; struct mps_event_handle *eh; int error, i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* first, reregister events */ for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] = -1; TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mps_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPS_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; cm->cm_complete = mps_reregister_events_complete; error = mps_map_command(sc, cm); mps_dprint(sc, MPS_TRACE, "%s finished with error %d\n", __func__, error); return (error); } void mps_deregister_events(struct mps_softc *sc, struct mps_event_handle *handle) { TAILQ_REMOVE(&sc->event_list, handle, eh_list); free(handle, M_MPT2); } /* * Add a chain element as the next SGE for the specified command. * Reset cm_sge and cm_sgesize to indicate all the available space. */ static int mps_add_chain(struct mps_command *cm) { MPI2_SGE_CHAIN32 *sgc; struct mps_chain *chain; u_int space; if (cm->cm_sglsize < MPS_SGC_SIZE) panic("MPS: Need SGE Error Code\n"); chain = mps_alloc_chain(cm->cm_sc); if (chain == NULL) return (ENOBUFS); space = cm->cm_sc->reqframesz; /* * Note: a double-linked list is used to make it easier to * walk for debugging. */ TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link); sgc = (MPI2_SGE_CHAIN32 *)&cm->cm_sge->MpiChain; sgc->Length = htole16(space); sgc->NextChainOffset = 0; /* TODO Looks like bug in Setting sgc->Flags. * sgc->Flags = ( MPI2_SGE_FLAGS_CHAIN_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING | * MPI2_SGE_FLAGS_SYSTEM_ADDRESS) << MPI2_SGE_FLAGS_SHIFT * This is fine.. because we are not using simple element. In case of * MPI2_SGE_CHAIN32, we have separate Length and Flags feild. */ sgc->Flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT; sgc->Address = htole32(chain->chain_busaddr); cm->cm_sge = (MPI2_SGE_IO_UNION *)&chain->chain->MpiSimple; cm->cm_sglsize = space; return (0); } /* * Add one scatter-gather element (chain, simple, transaction context) * to the scatter-gather list for a command. Maintain cm_sglsize and * cm_sge as the remaining size and pointer to the next SGE to fill * in, respectively. */ int mps_push_sge(struct mps_command *cm, void *sgep, size_t len, int segsleft) { MPI2_SGE_TRANSACTION_UNION *tc = sgep; MPI2_SGE_SIMPLE64 *sge = sgep; int error, type; uint32_t saved_buf_len, saved_address_low, saved_address_high; type = (tc->Flags & MPI2_SGE_FLAGS_ELEMENT_MASK); #ifdef INVARIANTS switch (type) { case MPI2_SGE_FLAGS_TRANSACTION_ELEMENT: { if (len != tc->DetailsLength + 4) panic("TC %p length %u or %zu?", tc, tc->DetailsLength + 4, len); } break; case MPI2_SGE_FLAGS_CHAIN_ELEMENT: /* Driver only uses 32-bit chain elements */ if (len != MPS_SGC_SIZE) panic("CHAIN %p length %u or %zu?", sgep, MPS_SGC_SIZE, len); break; case MPI2_SGE_FLAGS_SIMPLE_ELEMENT: /* Driver only uses 64-bit SGE simple elements */ if (len != MPS_SGE64_SIZE) panic("SGE simple %p length %u or %zu?", sge, MPS_SGE64_SIZE, len); if (((le32toh(sge->FlagsLength) >> MPI2_SGE_FLAGS_SHIFT) & MPI2_SGE_FLAGS_ADDRESS_SIZE) == 0) panic("SGE simple %p not marked 64-bit?", sge); break; default: panic("Unexpected SGE %p, flags %02x", tc, tc->Flags); } #endif /* * case 1: 1 more segment, enough room for it * case 2: 2 more segments, enough room for both * case 3: >=2 more segments, only enough room for 1 and a chain * case 4: >=1 more segment, enough room for only a chain * case 5: >=1 more segment, no room for anything (error) */ /* * There should be room for at least a chain element, or this * code is buggy. Case (5). */ if (cm->cm_sglsize < MPS_SGC_SIZE) panic("MPS: Need SGE Error Code\n"); if (segsleft >= 1 && cm->cm_sglsize < len + MPS_SGC_SIZE) { /* * 1 or more segment, enough room for only a chain. * Hope the previous element wasn't a Simple entry * that needed to be marked with * MPI2_SGE_FLAGS_LAST_ELEMENT. Case (4). */ if ((error = mps_add_chain(cm)) != 0) return (error); } if (segsleft >= 2 && cm->cm_sglsize < len + MPS_SGC_SIZE + MPS_SGE64_SIZE) { /* * There are 2 or more segments left to add, and only * enough room for 1 and a chain. Case (3). * * Mark as last element in this chain if necessary. */ if (type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) { sge->FlagsLength |= htole32( MPI2_SGE_FLAGS_LAST_ELEMENT << MPI2_SGE_FLAGS_SHIFT); } /* * Add the item then a chain. Do the chain now, * rather than on the next iteration, to simplify * understanding the code. */ cm->cm_sglsize -= len; bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (mps_add_chain(cm)); } #ifdef INVARIANTS /* Case 1: 1 more segment, enough room for it. */ if (segsleft == 1 && cm->cm_sglsize < len) panic("1 seg left and no room? %u versus %zu", cm->cm_sglsize, len); /* Case 2: 2 more segments, enough room for both */ if (segsleft == 2 && cm->cm_sglsize < len + MPS_SGE64_SIZE) panic("2 segs left and no room? %u versus %zu", cm->cm_sglsize, len); #endif if (segsleft == 1 && type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) { /* * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = le32toh(sge->FlagsLength) & 0x00FFFFFF; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->FlagsLength = htole32(cm->cm_out_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT)); cm->cm_sglsize -= len; bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); } saved_buf_len |= ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); if (cm->cm_flags & MPS_CM_FLAGS_DATAIN) { saved_buf_len |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) << MPI2_SGE_FLAGS_SHIFT); } else { saved_buf_len |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) << MPI2_SGE_FLAGS_SHIFT); } sge->FlagsLength = htole32(saved_buf_len); sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; } cm->cm_sglsize -= len; bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (0); } /* * Add one dma segment to the scatter-gather list for a command. */ int mps_add_dmaseg(struct mps_command *cm, vm_paddr_t pa, size_t len, u_int flags, int segsleft) { MPI2_SGE_SIMPLE64 sge; /* * This driver always uses 64-bit address elements for simplicity. */ bzero(&sge, sizeof(sge)); flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING; sge.FlagsLength = htole32(len | (flags << MPI2_SGE_FLAGS_SHIFT)); mps_from_u64(pa, &sge.Address); return (mps_push_sge(cm, &sge, sizeof sge, segsleft)); } static void mps_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mps_softc *sc; struct mps_command *cm; u_int i, dir, sflags; cm = (struct mps_command *)arg; sc = cm->cm_sc; /* * In this case, just print out a warning and let the chip tell the * user they did the wrong thing. */ if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) { mps_dprint(sc, MPS_ERROR, "%s: warning: busdma returned %d segments, " "more than the %d allowed\n", __func__, nsegs, cm->cm_max_segs); } /* * Set up DMA direction flags. Bi-directional requests are also handled * here. In that case, both direction flags will be set. */ sflags = 0; if (cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) { /* * We have to add a special case for SMP passthrough, there * is no easy way to generically handle it. The first * S/G element is used for the command (therefore the * direction bit needs to be set). The second one is used * for the reply. We'll leave it to the caller to make * sure we only have two buffers. */ /* * Even though the busdma man page says it doesn't make * sense to have both direction flags, it does in this case. * We have one s/g element being accessed in each direction. */ dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD; /* * Set the direction flag on the first buffer in the SMP * passthrough request. We'll clear it for the second one. */ sflags |= MPI2_SGE_FLAGS_DIRECTION | MPI2_SGE_FLAGS_END_OF_BUFFER; } else if (cm->cm_flags & MPS_CM_FLAGS_DATAOUT) { sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC; dir = BUS_DMASYNC_PREWRITE; } else dir = BUS_DMASYNC_PREREAD; for (i = 0; i < nsegs; i++) { if ((cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) && (i != 0)) { sflags &= ~MPI2_SGE_FLAGS_DIRECTION; } error = mps_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len, sflags, nsegs - i); if (error != 0) { /* Resource shortage, roll back! */ if (ratecheck(&sc->lastfail, &mps_chainfail_interval)) mps_dprint(sc, MPS_INFO, "Out of chain frames, " "consider increasing hw.mps.max_chains.\n"); cm->cm_flags |= MPS_CM_FLAGS_CHAIN_FAILED; mps_complete_command(sc, cm); return; } } bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir); mps_enqueue_request(sc, cm); return; } static void mps_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { mps_data_cb(arg, segs, nsegs, error); } /* * This is the routine to enqueue commands ansynchronously. * Note that the only error path here is from bus_dmamap_load(), which can * return EINPROGRESS if it is waiting for resources. Other than this, it's * assumed that if you have a command in-hand, then you have enough credits * to use it. */ int mps_map_command(struct mps_softc *sc, struct mps_command *cm) { int error = 0; if (cm->cm_flags & MPS_CM_FLAGS_USE_UIO) { error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap, &cm->cm_uio, mps_data_cb2, cm, 0); } else if (cm->cm_flags & MPS_CM_FLAGS_USE_CCB) { error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, mps_data_cb, cm, 0); } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) { error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_length, mps_data_cb, cm, 0); } else { /* Add a zero-length element as needed */ if (cm->cm_sge != NULL) mps_add_dmaseg(cm, 0, 0, 0, 1); mps_enqueue_request(sc, cm); } return (error); } /* * This is the routine to enqueue commands synchronously. An error of * EINPROGRESS from mps_map_command() is ignored since the command will * be executed and enqueued automatically. Other errors come from msleep(). */ int mps_wait_command(struct mps_softc *sc, struct mps_command **cmp, int timeout, int sleep_flag) { int error, rc; struct timeval cur_time, start_time; struct mps_command *cm = *cmp; if (sc->mps_flags & MPS_FLAGS_DIAGRESET) return EBUSY; cm->cm_complete = NULL; cm->cm_flags |= MPS_CM_FLAGS_POLLED; error = mps_map_command(sc, cm); if ((error != 0) && (error != EINPROGRESS)) return (error); /* * Check for context and wait for 50 mSec at a time until time has * expired or the command has finished. If msleep can't be used, need * to poll. */ if (curthread->td_no_sleeping != 0) sleep_flag = NO_SLEEP; getmicrouptime(&start_time); if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) { cm->cm_flags |= MPS_CM_FLAGS_WAKEUP; error = msleep(cm, &sc->mps_mtx, 0, "mpswait", timeout*hz); if (error == EWOULDBLOCK) { /* * Record the actual elapsed time in the case of a * timeout for the message below. */ getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); } } else { while ((cm->cm_flags & MPS_CM_FLAGS_COMPLETE) == 0) { mps_intr_locked(sc); if (sleep_flag == CAN_SLEEP) pause("mpswait", hz/20); else DELAY(50000); getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); if (cur_time.tv_sec > timeout) { error = EWOULDBLOCK; break; } } } if (error == EWOULDBLOCK) { if (cm->cm_timeout_handler == NULL) { mps_dprint(sc, MPS_FAULT, "Calling Reinit from %s, timeout=%d," " elapsed=%jd\n", __func__, timeout, (intmax_t)cur_time.tv_sec); rc = mps_reinit(sc); mps_dprint(sc, MPS_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); } else cm->cm_timeout_handler(sc, cm); if (sc->mps_flags & MPS_FLAGS_REALLOCATED) { /* * Tell the caller that we freed the command in a * reinit. */ *cmp = NULL; } error = ETIMEDOUT; } return (error); } /* * The MPT driver had a verbose interface for config pages. In this driver, * reduce it to much simpler terms, similar to the Linux driver. */ int mps_read_config_page(struct mps_softc *sc, struct mps_config_params *params) { MPI2_CONFIG_REQUEST *req; struct mps_command *cm; int error; if (sc->mps_flags & MPS_FLAGS_BUSY) { return (EBUSY); } cm = mps_alloc_command(sc); if (cm == NULL) { return (EBUSY); } req = (MPI2_CONFIG_REQUEST *)cm->cm_req; req->Function = MPI2_FUNCTION_CONFIG; req->Action = params->action; req->SGLFlags = 0; req->ChainOffset = 0; req->PageAddress = params->page_address; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Ext; req->ExtPageType = hdr->ExtPageType; req->ExtPageLength = hdr->ExtPageLength; req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; req->Header.PageLength = 0; /* Must be set to zero */ req->Header.PageNumber = hdr->PageNumber; req->Header.PageVersion = hdr->PageVersion; } else { MPI2_CONFIG_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Struct; req->Header.PageType = hdr->PageType; req->Header.PageNumber = hdr->PageNumber; req->Header.PageLength = hdr->PageLength; req->Header.PageVersion = hdr->PageVersion; } cm->cm_data = params->buffer; cm->cm_length = params->length; if (cm->cm_data != NULL) { cm->cm_sge = &req->PageBufferSGE; cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION); cm->cm_flags = MPS_CM_FLAGS_SGE_SIMPLE | MPS_CM_FLAGS_DATAIN; } else cm->cm_sge = NULL; cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_complete_data = params; if (params->callback != NULL) { cm->cm_complete = mps_config_complete; return (mps_map_command(sc, cm)); } else { error = mps_wait_command(sc, &cm, 0, CAN_SLEEP); if (error) { mps_dprint(sc, MPS_FAULT, "Error %d reading config page\n", error); if (cm != NULL) mps_free_command(sc, cm); return (error); } mps_config_complete(sc, cm); } return (0); } int mps_write_config_page(struct mps_softc *sc, struct mps_config_params *params) { return (EINVAL); } static void mps_config_complete(struct mps_softc *sc, struct mps_command *cm) { MPI2_CONFIG_REPLY *reply; struct mps_config_params *params; MPS_FUNCTRACE(sc); params = cm->cm_complete_data; if (cm->cm_data != NULL) { bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap); } /* * XXX KDM need to do more error recovery? This results in the * device in question not getting probed. */ if ((cm->cm_flags & MPS_CM_FLAGS_ERROR_MASK) != 0) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } reply = (MPI2_CONFIG_REPLY *)cm->cm_reply; if (reply == NULL) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } params->status = reply->IOCStatus; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { params->hdr.Ext.ExtPageType = reply->ExtPageType; params->hdr.Ext.ExtPageLength = reply->ExtPageLength; params->hdr.Ext.PageType = reply->Header.PageType; params->hdr.Ext.PageNumber = reply->Header.PageNumber; params->hdr.Ext.PageVersion = reply->Header.PageVersion; } else { params->hdr.Struct.PageType = reply->Header.PageType; params->hdr.Struct.PageNumber = reply->Header.PageNumber; params->hdr.Struct.PageLength = reply->Header.PageLength; params->hdr.Struct.PageVersion = reply->Header.PageVersion; } done: mps_free_command(sc, cm); if (params->callback != NULL) params->callback(sc, params); return; } diff --git a/sys/dev/mpt/mpt_raid.c b/sys/dev/mpt/mpt_raid.c index 28667d85cf4d..8350122bc337 100644 --- a/sys/dev/mpt/mpt_raid.c +++ b/sys/dev/mpt/mpt_raid.c @@ -1,1838 +1,1832 @@ /*- * Routines for handling the integrated RAID features LSI MPT Fusion adapters. * * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2005, WHEEL Sp. z o.o. * Copyright (c) 2005 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon including * a substantially similar Disclaimer requirement for further binary * redistribution. * 3. Neither the names of the above listed copyright holders nor the names * of any contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE COPYRIGHT * OWNER OR CONTRIBUTOR IS ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Some Breakage and Bug Fixing added later. * Copyright (c) 2006, by Matthew Jacob * All Rights Reserved * * Support from LSI-Logic has also gone a great deal toward making this a * workable subsystem and is gratefully acknowledged. */ #include __FBSDID("$FreeBSD$"); #include #include #include "dev/mpt/mpilib/mpi_ioc.h" /* XXX Fix Event Handling!!! */ #include "dev/mpt/mpilib/mpi_raid.h" #include #include #include #include #include #include #include #include #include struct mpt_raid_action_result { union { MPI_RAID_VOL_INDICATOR indicator_struct; uint32_t new_settings; uint8_t phys_disk_num; } action_data; uint16_t action_status; }; #define REQ_TO_RAID_ACTION_RESULT(req) ((struct mpt_raid_action_result *) \ (((MSG_RAID_ACTION_REQUEST *)(req->req_vbuf)) + 1)) #define REQ_IOCSTATUS(req) ((req)->IOCStatus & MPI_IOCSTATUS_MASK) static mpt_probe_handler_t mpt_raid_probe; static mpt_attach_handler_t mpt_raid_attach; static mpt_enable_handler_t mpt_raid_enable; static mpt_event_handler_t mpt_raid_event; static mpt_shutdown_handler_t mpt_raid_shutdown; static mpt_reset_handler_t mpt_raid_ioc_reset; static mpt_detach_handler_t mpt_raid_detach; static struct mpt_personality mpt_raid_personality = { .name = "mpt_raid", .probe = mpt_raid_probe, .attach = mpt_raid_attach, .enable = mpt_raid_enable, .event = mpt_raid_event, .reset = mpt_raid_ioc_reset, .shutdown = mpt_raid_shutdown, .detach = mpt_raid_detach, }; DECLARE_MPT_PERSONALITY(mpt_raid, SI_ORDER_THIRD); MPT_PERSONALITY_DEPEND(mpt_raid, mpt_cam, 1, 1, 1); static mpt_reply_handler_t mpt_raid_reply_handler; static int mpt_raid_reply_frame_handler(struct mpt_softc *mpt, request_t *req, MSG_DEFAULT_REPLY *reply_frame); static int mpt_spawn_raid_thread(struct mpt_softc *mpt); static void mpt_terminate_raid_thread(struct mpt_softc *mpt); static void mpt_raid_thread(void *arg); static callout_func_t mpt_raid_timer; #if 0 static void mpt_enable_vol(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol, int enable); #endif static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *); static void mpt_adjust_queue_depth(struct mpt_softc *, struct mpt_raid_volume *, struct cam_path *); static void mpt_raid_sysctl_attach(struct mpt_softc *); static const char *mpt_vol_type(struct mpt_raid_volume *vol); static const char *mpt_vol_state(struct mpt_raid_volume *vol); static const char *mpt_disk_state(struct mpt_raid_disk *disk); static void mpt_vol_prt(struct mpt_softc *mpt, struct mpt_raid_volume *vol, const char *fmt, ...); static void mpt_disk_prt(struct mpt_softc *mpt, struct mpt_raid_disk *disk, const char *fmt, ...); static int mpt_issue_raid_req(struct mpt_softc *mpt, struct mpt_raid_volume *vol, struct mpt_raid_disk *disk, request_t *req, u_int Action, uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len, int write, int wait); static int mpt_refresh_raid_data(struct mpt_softc *mpt); static void mpt_schedule_raid_refresh(struct mpt_softc *mpt); static uint32_t raid_handler_id = MPT_HANDLER_ID_NONE; static const char * mpt_vol_type(struct mpt_raid_volume *vol) { switch (vol->config_page->VolumeType) { case MPI_RAID_VOL_TYPE_IS: return ("RAID-0"); case MPI_RAID_VOL_TYPE_IME: return ("RAID-1E"); case MPI_RAID_VOL_TYPE_IM: return ("RAID-1"); default: return ("Unknown"); } } static const char * mpt_vol_state(struct mpt_raid_volume *vol) { switch (vol->config_page->VolumeStatus.State) { case MPI_RAIDVOL0_STATUS_STATE_OPTIMAL: return ("Optimal"); case MPI_RAIDVOL0_STATUS_STATE_DEGRADED: return ("Degraded"); case MPI_RAIDVOL0_STATUS_STATE_FAILED: return ("Failed"); default: return ("Unknown"); } } static const char * mpt_disk_state(struct mpt_raid_disk *disk) { switch (disk->config_page.PhysDiskStatus.State) { case MPI_PHYSDISK0_STATUS_ONLINE: return ("Online"); case MPI_PHYSDISK0_STATUS_MISSING: return ("Missing"); case MPI_PHYSDISK0_STATUS_NOT_COMPATIBLE: return ("Incompatible"); case MPI_PHYSDISK0_STATUS_FAILED: return ("Failed"); case MPI_PHYSDISK0_STATUS_INITIALIZING: return ("Initializing"); case MPI_PHYSDISK0_STATUS_OFFLINE_REQUESTED: return ("Offline Requested"); case MPI_PHYSDISK0_STATUS_FAILED_REQUESTED: return ("Failed per Host Request"); case MPI_PHYSDISK0_STATUS_OTHER_OFFLINE: return ("Offline"); default: return ("Unknown"); } } static void mpt_vol_prt(struct mpt_softc *mpt, struct mpt_raid_volume *vol, const char *fmt, ...) { va_list ap; printf("%s:vol%d(%s:%d:%d): ", device_get_nameunit(mpt->dev), (u_int)(vol - mpt->raid_volumes), device_get_nameunit(mpt->dev), vol->config_page->VolumeBus, vol->config_page->VolumeID); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } static void mpt_disk_prt(struct mpt_softc *mpt, struct mpt_raid_disk *disk, const char *fmt, ...) { va_list ap; if (disk->volume != NULL) { printf("(%s:vol%d:%d): ", device_get_nameunit(mpt->dev), disk->volume->config_page->VolumeID, disk->member_number); } else { printf("(%s:%d:%d): ", device_get_nameunit(mpt->dev), disk->config_page.PhysDiskBus, disk->config_page.PhysDiskID); } va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } static void mpt_raid_async(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { struct mpt_softc *mpt; mpt = (struct mpt_softc*)callback_arg; switch (code) { case AC_FOUND_DEVICE: { struct ccb_getdev *cgd; struct mpt_raid_volume *mpt_vol; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) { break; } mpt_lprt(mpt, MPT_PRT_DEBUG, "Callback for %d\n", cgd->ccb_h.target_id); RAID_VOL_FOREACH(mpt, mpt_vol) { if ((mpt_vol->flags & MPT_RVF_ACTIVE) == 0) continue; if (mpt_vol->config_page->VolumeID == cgd->ccb_h.target_id) { mpt_adjust_queue_depth(mpt, mpt_vol, path); break; } } } default: break; } } static int mpt_raid_probe(struct mpt_softc *mpt) { if (mpt->ioc_page2 == NULL || mpt->ioc_page2->MaxPhysDisks == 0) { return (ENODEV); } return (0); } static int mpt_raid_attach(struct mpt_softc *mpt) { struct ccb_setasync csa; mpt_handler_t handler; int error; mpt_callout_init(mpt, &mpt->raid_timer); error = mpt_spawn_raid_thread(mpt); if (error != 0) { mpt_prt(mpt, "Unable to spawn RAID thread!\n"); goto cleanup; } MPT_LOCK(mpt); handler.reply_handler = mpt_raid_reply_handler; error = mpt_register_handler(mpt, MPT_HANDLER_REPLY, handler, &raid_handler_id); if (error != 0) { mpt_prt(mpt, "Unable to register RAID haandler!\n"); goto cleanup; } xpt_setup_ccb(&csa.ccb_h, mpt->path, 5); csa.ccb_h.func_code = XPT_SASYNC_CB; csa.event_enable = AC_FOUND_DEVICE; csa.callback = mpt_raid_async; csa.callback_arg = mpt; xpt_action((union ccb *)&csa); if (csa.ccb_h.status != CAM_REQ_CMP) { mpt_prt(mpt, "mpt_raid_attach: Unable to register " "CAM async handler.\n"); } MPT_UNLOCK(mpt); mpt_raid_sysctl_attach(mpt); return (0); cleanup: MPT_UNLOCK(mpt); mpt_raid_detach(mpt); return (error); } static int mpt_raid_enable(struct mpt_softc *mpt) { return (0); } static void mpt_raid_detach(struct mpt_softc *mpt) { struct ccb_setasync csa; mpt_handler_t handler; mpt_callout_drain(mpt, &mpt->raid_timer); MPT_LOCK(mpt); mpt_terminate_raid_thread(mpt); handler.reply_handler = mpt_raid_reply_handler; mpt_deregister_handler(mpt, MPT_HANDLER_REPLY, handler, raid_handler_id); xpt_setup_ccb(&csa.ccb_h, mpt->path, /*priority*/5); csa.ccb_h.func_code = XPT_SASYNC_CB; csa.event_enable = 0; csa.callback = mpt_raid_async; csa.callback_arg = mpt; xpt_action((union ccb *)&csa); MPT_UNLOCK(mpt); } static void mpt_raid_ioc_reset(struct mpt_softc *mpt, int type) { /* Nothing to do yet. */ } static const char *raid_event_txt[] = { "Volume Created", "Volume Deleted", "Volume Settings Changed", "Volume Status Changed", "Volume Physical Disk Membership Changed", "Physical Disk Created", "Physical Disk Deleted", "Physical Disk Settings Changed", "Physical Disk Status Changed", "Domain Validation Required", "SMART Data Received", "Replace Action Started", }; static int mpt_raid_event(struct mpt_softc *mpt, request_t *req, MSG_EVENT_NOTIFY_REPLY *msg) { EVENT_DATA_RAID *raid_event; struct mpt_raid_volume *mpt_vol; struct mpt_raid_disk *mpt_disk; CONFIG_PAGE_RAID_VOL_0 *vol_pg; int i; int print_event; if (msg->Event != MPI_EVENT_INTEGRATED_RAID) { return (0); } raid_event = (EVENT_DATA_RAID *)&msg->Data; mpt_vol = NULL; vol_pg = NULL; if (mpt->raid_volumes != NULL && mpt->ioc_page2 != NULL) { for (i = 0; i < mpt->ioc_page2->MaxVolumes; i++) { mpt_vol = &mpt->raid_volumes[i]; vol_pg = mpt_vol->config_page; if ((mpt_vol->flags & MPT_RVF_ACTIVE) == 0) continue; if (vol_pg->VolumeID == raid_event->VolumeID && vol_pg->VolumeBus == raid_event->VolumeBus) break; } if (i >= mpt->ioc_page2->MaxVolumes) { mpt_vol = NULL; vol_pg = NULL; } } mpt_disk = NULL; if (raid_event->PhysDiskNum != 0xFF && mpt->raid_disks != NULL) { mpt_disk = mpt->raid_disks + raid_event->PhysDiskNum; if ((mpt_disk->flags & MPT_RDF_ACTIVE) == 0) { mpt_disk = NULL; } } print_event = 1; switch(raid_event->ReasonCode) { case MPI_EVENT_RAID_RC_VOLUME_CREATED: case MPI_EVENT_RAID_RC_VOLUME_DELETED: break; case MPI_EVENT_RAID_RC_VOLUME_STATUS_CHANGED: if (mpt_vol != NULL) { if ((mpt_vol->flags & MPT_RVF_UP2DATE) != 0) { mpt_vol->flags &= ~MPT_RVF_UP2DATE; } else { /* * Coalesce status messages into one * per background run of our RAID thread. * This removes "spurious" status messages * from our output. */ print_event = 0; } } break; case MPI_EVENT_RAID_RC_VOLUME_SETTINGS_CHANGED: case MPI_EVENT_RAID_RC_VOLUME_PHYSDISK_CHANGED: mpt->raid_rescan++; if (mpt_vol != NULL) { mpt_vol->flags &= ~(MPT_RVF_UP2DATE|MPT_RVF_ANNOUNCED); } break; case MPI_EVENT_RAID_RC_PHYSDISK_CREATED: case MPI_EVENT_RAID_RC_PHYSDISK_DELETED: mpt->raid_rescan++; break; case MPI_EVENT_RAID_RC_PHYSDISK_SETTINGS_CHANGED: case MPI_EVENT_RAID_RC_PHYSDISK_STATUS_CHANGED: mpt->raid_rescan++; if (mpt_disk != NULL) { mpt_disk->flags &= ~MPT_RDF_UP2DATE; } break; case MPI_EVENT_RAID_RC_DOMAIN_VAL_NEEDED: mpt->raid_rescan++; break; case MPI_EVENT_RAID_RC_SMART_DATA: case MPI_EVENT_RAID_RC_REPLACE_ACTION_STARTED: break; } if (print_event) { if (mpt_disk != NULL) { mpt_disk_prt(mpt, mpt_disk, ""); } else if (mpt_vol != NULL) { mpt_vol_prt(mpt, mpt_vol, ""); } else { mpt_prt(mpt, "Volume(%d:%d", raid_event->VolumeBus, raid_event->VolumeID); if (raid_event->PhysDiskNum != 0xFF) mpt_prtc(mpt, ":%d): ", raid_event->PhysDiskNum); else mpt_prtc(mpt, "): "); } if (raid_event->ReasonCode >= NUM_ELEMENTS(raid_event_txt)) mpt_prtc(mpt, "Unhandled RaidEvent %#x\n", raid_event->ReasonCode); else mpt_prtc(mpt, "%s\n", raid_event_txt[raid_event->ReasonCode]); } if (raid_event->ReasonCode == MPI_EVENT_RAID_RC_SMART_DATA) { /* XXX Use CAM's print sense for this... */ if (mpt_disk != NULL) mpt_disk_prt(mpt, mpt_disk, ""); else mpt_prt(mpt, "Volume(%d:%d:%d: ", raid_event->VolumeBus, raid_event->VolumeID, raid_event->PhysDiskNum); mpt_prtc(mpt, "ASC 0x%x, ASCQ 0x%x)\n", raid_event->ASC, raid_event->ASCQ); } mpt_raid_wakeup(mpt); return (1); } static void mpt_raid_shutdown(struct mpt_softc *mpt) { struct mpt_raid_volume *mpt_vol; if (mpt->raid_mwce_setting != MPT_RAID_MWCE_REBUILD_ONLY) { return; } mpt->raid_mwce_setting = MPT_RAID_MWCE_OFF; RAID_VOL_FOREACH(mpt, mpt_vol) { mpt_verify_mwce(mpt, mpt_vol); } } static int mpt_raid_reply_handler(struct mpt_softc *mpt, request_t *req, uint32_t reply_desc, MSG_DEFAULT_REPLY *reply_frame) { int free_req; if (req == NULL) return (TRUE); free_req = TRUE; if (reply_frame != NULL) free_req = mpt_raid_reply_frame_handler(mpt, req, reply_frame); #ifdef NOTYET else if (req->ccb != NULL) { /* Complete Quiesce CCB with error... */ } #endif req->state &= ~REQ_STATE_QUEUED; req->state |= REQ_STATE_DONE; TAILQ_REMOVE(&mpt->request_pending_list, req, links); if ((req->state & REQ_STATE_NEED_WAKEUP) != 0) { wakeup(req); } else if (free_req) { mpt_free_request(mpt, req); } return (TRUE); } /* * Parse additional completion information in the reply * frame for RAID I/O requests. */ static int mpt_raid_reply_frame_handler(struct mpt_softc *mpt, request_t *req, MSG_DEFAULT_REPLY *reply_frame) { MSG_RAID_ACTION_REPLY *reply; struct mpt_raid_action_result *action_result; MSG_RAID_ACTION_REQUEST *rap; reply = (MSG_RAID_ACTION_REPLY *)reply_frame; req->IOCStatus = le16toh(reply->IOCStatus); rap = (MSG_RAID_ACTION_REQUEST *)req->req_vbuf; switch (rap->Action) { case MPI_RAID_ACTION_QUIESCE_PHYS_IO: mpt_prt(mpt, "QUIESCE PHYSIO DONE\n"); break; case MPI_RAID_ACTION_ENABLE_PHYS_IO: mpt_prt(mpt, "ENABLY PHYSIO DONE\n"); break; default: break; } action_result = REQ_TO_RAID_ACTION_RESULT(req); memcpy(&action_result->action_data, &reply->ActionData, sizeof(action_result->action_data)); action_result->action_status = le16toh(reply->ActionStatus); return (TRUE); } /* * Utiltity routine to perform a RAID action command; */ static int mpt_issue_raid_req(struct mpt_softc *mpt, struct mpt_raid_volume *vol, struct mpt_raid_disk *disk, request_t *req, u_int Action, uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len, int write, int wait) { MSG_RAID_ACTION_REQUEST *rap; SGE_SIMPLE32 *se; rap = req->req_vbuf; memset(rap, 0, sizeof *rap); rap->Action = Action; rap->ActionDataWord = htole32(ActionDataWord); rap->Function = MPI_FUNCTION_RAID_ACTION; rap->VolumeID = vol->config_page->VolumeID; rap->VolumeBus = vol->config_page->VolumeBus; if (disk != NULL) rap->PhysDiskNum = disk->config_page.PhysDiskNum; else rap->PhysDiskNum = 0xFF; se = (SGE_SIMPLE32 *)&rap->ActionDataSGE; se->Address = htole32(addr); MPI_pSGE_SET_LENGTH(se, len); MPI_pSGE_SET_FLAGS(se, (MPI_SGE_FLAGS_SIMPLE_ELEMENT | MPI_SGE_FLAGS_LAST_ELEMENT | MPI_SGE_FLAGS_END_OF_BUFFER | MPI_SGE_FLAGS_END_OF_LIST | (write ? MPI_SGE_FLAGS_HOST_TO_IOC : MPI_SGE_FLAGS_IOC_TO_HOST))); se->FlagsLength = htole32(se->FlagsLength); rap->MsgContext = htole32(req->index | raid_handler_id); mpt_check_doorbell(mpt); mpt_send_cmd(mpt, req); if (wait) { return (mpt_wait_req(mpt, req, REQ_STATE_DONE, REQ_STATE_DONE, /*sleep_ok*/FALSE, /*time_ms*/2000)); } else { return (0); } } /*************************** RAID Status Monitoring ***************************/ static int mpt_spawn_raid_thread(struct mpt_softc *mpt) { int error; /* * Freeze out any CAM transactions until our thread * is able to run at least once. We need to update * our RAID pages before acception I/O or we may * reject I/O to an ID we later determine is for a * hidden physdisk. */ MPT_LOCK(mpt); xpt_freeze_simq(mpt->phydisk_sim, 1); MPT_UNLOCK(mpt); error = kproc_create(mpt_raid_thread, mpt, &mpt->raid_thread, /*flags*/0, /*altstack*/0, "mpt_raid%d", mpt->unit); if (error != 0) { MPT_LOCK(mpt); xpt_release_simq(mpt->phydisk_sim, /*run_queue*/FALSE); MPT_UNLOCK(mpt); } return (error); } static void mpt_terminate_raid_thread(struct mpt_softc *mpt) { if (mpt->raid_thread == NULL) { return; } mpt->shutdwn_raid = 1; wakeup(&mpt->raid_volumes); /* * Sleep on a slightly different location * for this interlock just for added safety. */ mpt_sleep(mpt, &mpt->raid_thread, PUSER, "thtrm", 0); } static void mpt_raid_thread(void *arg) { struct mpt_softc *mpt; int firstrun; mpt = (struct mpt_softc *)arg; firstrun = 1; MPT_LOCK(mpt); while (mpt->shutdwn_raid == 0) { if (mpt->raid_wakeup == 0) { mpt_sleep(mpt, &mpt->raid_volumes, PUSER, "idle", 0); continue; } mpt->raid_wakeup = 0; if (mpt_refresh_raid_data(mpt)) { mpt_schedule_raid_refresh(mpt); /* XX NOT QUITE RIGHT */ continue; } /* * Now that we have our first snapshot of RAID data, * allow CAM to access our physical disk bus. */ if (firstrun) { firstrun = 0; xpt_release_simq(mpt->phydisk_sim, TRUE); } if (mpt->raid_rescan != 0) { union ccb *ccb; int error; mpt->raid_rescan = 0; MPT_UNLOCK(mpt); ccb = xpt_alloc_ccb(); MPT_LOCK(mpt); error = xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(mpt->phydisk_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD); if (error != CAM_REQ_CMP) { xpt_free_ccb(ccb); mpt_prt(mpt, "Unable to rescan RAID Bus!\n"); } else { xpt_rescan(ccb); } } } mpt->raid_thread = NULL; wakeup(&mpt->raid_thread); MPT_UNLOCK(mpt); kproc_exit(0); } #if 0 static void mpt_raid_quiesce_timeout(void *arg) { /* Complete the CCB with error */ /* COWWWW */ } static timeout_t mpt_raid_quiesce_timeout; cam_status mpt_raid_quiesce_disk(struct mpt_softc *mpt, struct mpt_raid_disk *mpt_disk, request_t *req) { union ccb *ccb; ccb = req->ccb; if ((mpt_disk->flags & MPT_RDF_QUIESCED) != 0) return (CAM_REQ_CMP); if ((mpt_disk->flags & MPT_RDF_QUIESCING) == 0) { int rv; mpt_disk->flags |= MPT_RDF_QUIESCING; xpt_freeze_devq(ccb->ccb_h.path, 1); rv = mpt_issue_raid_req(mpt, mpt_disk->volume, mpt_disk, req, MPI_RAID_ACTION_QUIESCE_PHYS_IO, /*ActionData*/0, /*addr*/0, /*len*/0, /*write*/FALSE, /*wait*/FALSE); if (rv != 0) return (CAM_REQ_CMP_ERR); mpt_req_timeout(req, mpt_raid_quiesce_timeout, ccb, 5 * hz); #if 0 if (rv == ETIMEDOUT) { mpt_disk_prt(mpt, mpt_disk, "mpt_raid_quiesce_disk: " "Quiece Timed-out\n"); xpt_release_devq(ccb->ccb_h.path, 1, /*run*/0); return (CAM_REQ_CMP_ERR); } ar = REQ_TO_RAID_ACTION_RESULT(req); if (rv != 0 || REQ_IOCSTATUS(req) != MPI_IOCSTATUS_SUCCESS || (ar->action_status != MPI_RAID_ACTION_ASTATUS_SUCCESS)) { mpt_disk_prt(mpt, mpt_disk, "Quiece Failed" "%d:%x:%x\n", rv, req->IOCStatus, ar->action_status); xpt_release_devq(ccb->ccb_h.path, 1, /*run*/0); return (CAM_REQ_CMP_ERR); } #endif return (CAM_REQ_INPROG); } return (CAM_REQUEUE_REQ); } #endif /* XXX Ignores that there may be multiple buses/IOCs involved. */ cam_status mpt_map_physdisk(struct mpt_softc *mpt, union ccb *ccb, target_id_t *tgt) { struct mpt_raid_disk *mpt_disk; mpt_disk = mpt->raid_disks + ccb->ccb_h.target_id; if (ccb->ccb_h.target_id < mpt->raid_max_disks && (mpt_disk->flags & MPT_RDF_ACTIVE) != 0) { *tgt = mpt_disk->config_page.PhysDiskID; return (0); } mpt_lprt(mpt, MPT_PRT_DEBUG1, "mpt_map_physdisk(%d) - Not Active\n", ccb->ccb_h.target_id); return (-1); } /* XXX Ignores that there may be multiple buses/IOCs involved. */ int mpt_is_raid_member(struct mpt_softc *mpt, target_id_t tgt) { struct mpt_raid_disk *mpt_disk; int i; if (mpt->ioc_page2 == NULL || mpt->ioc_page2->MaxPhysDisks == 0) return (0); for (i = 0; i < mpt->ioc_page2->MaxPhysDisks; i++) { mpt_disk = &mpt->raid_disks[i]; if ((mpt_disk->flags & MPT_RDF_ACTIVE) != 0 && mpt_disk->config_page.PhysDiskID == tgt) return (1); } return (0); } /* XXX Ignores that there may be multiple buses/IOCs involved. */ int mpt_is_raid_volume(struct mpt_softc *mpt, target_id_t tgt) { CONFIG_PAGE_IOC_2_RAID_VOL *ioc_vol; CONFIG_PAGE_IOC_2_RAID_VOL *ioc_last_vol; if (mpt->ioc_page2 == NULL || mpt->ioc_page2->MaxPhysDisks == 0) { return (0); } ioc_vol = mpt->ioc_page2->RaidVolume; ioc_last_vol = ioc_vol + mpt->ioc_page2->NumActiveVolumes; for (;ioc_vol != ioc_last_vol; ioc_vol++) { if (ioc_vol->VolumeID == tgt) { return (1); } } return (0); } #if 0 static void mpt_enable_vol(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol, int enable) { request_t *req; struct mpt_raid_action_result *ar; CONFIG_PAGE_RAID_VOL_0 *vol_pg; int enabled; int rv; vol_pg = mpt_vol->config_page; enabled = vol_pg->VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_ENABLED; /* * If the setting matches the configuration, * there is nothing to do. */ if ((enabled && enable) || (!enabled && !enable)) return; req = mpt_get_request(mpt, /*sleep_ok*/TRUE); if (req == NULL) { mpt_vol_prt(mpt, mpt_vol, "mpt_enable_vol: Get request failed!\n"); return; } rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req, enable ? MPI_RAID_ACTION_ENABLE_VOLUME : MPI_RAID_ACTION_DISABLE_VOLUME, /*data*/0, /*addr*/0, /*len*/0, /*write*/FALSE, /*wait*/TRUE); if (rv == ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_enable_vol: " "%s Volume Timed-out\n", enable ? "Enable" : "Disable"); return; } ar = REQ_TO_RAID_ACTION_RESULT(req); if (rv != 0 || REQ_IOCSTATUS(req) != MPI_IOCSTATUS_SUCCESS || (ar->action_status != MPI_RAID_ACTION_ASTATUS_SUCCESS)) { mpt_vol_prt(mpt, mpt_vol, "%s Volume Failed: %d:%x:%x\n", enable ? "Enable" : "Disable", rv, req->IOCStatus, ar->action_status); } mpt_free_request(mpt, req); } #endif static void mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol) { request_t *req; struct mpt_raid_action_result *ar; CONFIG_PAGE_RAID_VOL_0 *vol_pg; uint32_t data; int rv; int resyncing; int mwce; vol_pg = mpt_vol->config_page; resyncing = vol_pg->VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS; mwce = vol_pg->VolumeSettings.Settings & MPI_RAIDVOL0_SETTING_WRITE_CACHING_ENABLE; /* * If the setting matches the configuration, * there is nothing to do. */ switch (mpt->raid_mwce_setting) { case MPT_RAID_MWCE_REBUILD_ONLY: if ((resyncing && mwce) || (!resyncing && !mwce)) { return; } mpt_vol->flags ^= MPT_RVF_WCE_CHANGED; if ((mpt_vol->flags & MPT_RVF_WCE_CHANGED) == 0) { /* * Wait one more status update to see if * resyncing gets enabled. It gets disabled * temporarilly when WCE is changed. */ return; } break; case MPT_RAID_MWCE_ON: if (mwce) return; break; case MPT_RAID_MWCE_OFF: if (!mwce) return; break; case MPT_RAID_MWCE_NC: return; } req = mpt_get_request(mpt, /*sleep_ok*/TRUE); if (req == NULL) { mpt_vol_prt(mpt, mpt_vol, "mpt_verify_mwce: Get request failed!\n"); return; } vol_pg->VolumeSettings.Settings ^= MPI_RAIDVOL0_SETTING_WRITE_CACHING_ENABLE; memcpy(&data, &vol_pg->VolumeSettings, sizeof(data)); vol_pg->VolumeSettings.Settings ^= MPI_RAIDVOL0_SETTING_WRITE_CACHING_ENABLE; rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req, MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS, data, /*addr*/0, /*len*/0, /*write*/FALSE, /*wait*/TRUE); if (rv == ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_verify_mwce: " "Write Cache Enable Timed-out\n"); return; } ar = REQ_TO_RAID_ACTION_RESULT(req); if (rv != 0 || REQ_IOCSTATUS(req) != MPI_IOCSTATUS_SUCCESS || (ar->action_status != MPI_RAID_ACTION_ASTATUS_SUCCESS)) { mpt_vol_prt(mpt, mpt_vol, "Write Cache Enable Failed: " "%d:%x:%x\n", rv, req->IOCStatus, ar->action_status); } else { vol_pg->VolumeSettings.Settings ^= MPI_RAIDVOL0_SETTING_WRITE_CACHING_ENABLE; } mpt_free_request(mpt, req); } static void mpt_verify_resync_rate(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol) { request_t *req; struct mpt_raid_action_result *ar; CONFIG_PAGE_RAID_VOL_0 *vol_pg; u_int prio; int rv; vol_pg = mpt_vol->config_page; if (mpt->raid_resync_rate == MPT_RAID_RESYNC_RATE_NC) return; /* * If the current RAID resync rate does not * match our configured rate, update it. */ prio = vol_pg->VolumeSettings.Settings & MPI_RAIDVOL0_SETTING_PRIORITY_RESYNC; if (vol_pg->ResyncRate != 0 && vol_pg->ResyncRate != mpt->raid_resync_rate) { req = mpt_get_request(mpt, /*sleep_ok*/TRUE); if (req == NULL) { mpt_vol_prt(mpt, mpt_vol, "mpt_verify_resync_rate: " "Get request failed!\n"); return; } rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req, MPI_RAID_ACTION_SET_RESYNC_RATE, mpt->raid_resync_rate, /*addr*/0, /*len*/0, /*write*/FALSE, /*wait*/TRUE); if (rv == ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_data: " "Resync Rate Setting Timed-out\n"); return; } ar = REQ_TO_RAID_ACTION_RESULT(req); if (rv != 0 || REQ_IOCSTATUS(req) != MPI_IOCSTATUS_SUCCESS || (ar->action_status != MPI_RAID_ACTION_ASTATUS_SUCCESS)) { mpt_vol_prt(mpt, mpt_vol, "Resync Rate Setting Failed: " "%d:%x:%x\n", rv, req->IOCStatus, ar->action_status); } else vol_pg->ResyncRate = mpt->raid_resync_rate; mpt_free_request(mpt, req); } else if ((prio && mpt->raid_resync_rate < 128) || (!prio && mpt->raid_resync_rate >= 128)) { uint32_t data; req = mpt_get_request(mpt, /*sleep_ok*/TRUE); if (req == NULL) { mpt_vol_prt(mpt, mpt_vol, "mpt_verify_resync_rate: " "Get request failed!\n"); return; } vol_pg->VolumeSettings.Settings ^= MPI_RAIDVOL0_SETTING_PRIORITY_RESYNC; memcpy(&data, &vol_pg->VolumeSettings, sizeof(data)); vol_pg->VolumeSettings.Settings ^= MPI_RAIDVOL0_SETTING_PRIORITY_RESYNC; rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req, MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS, data, /*addr*/0, /*len*/0, /*write*/FALSE, /*wait*/TRUE); if (rv == ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_data: " "Resync Rate Setting Timed-out\n"); return; } ar = REQ_TO_RAID_ACTION_RESULT(req); if (rv != 0 || REQ_IOCSTATUS(req) != MPI_IOCSTATUS_SUCCESS || (ar->action_status != MPI_RAID_ACTION_ASTATUS_SUCCESS)) { mpt_vol_prt(mpt, mpt_vol, "Resync Rate Setting Failed: " "%d:%x:%x\n", rv, req->IOCStatus, ar->action_status); } else { vol_pg->VolumeSettings.Settings ^= MPI_RAIDVOL0_SETTING_PRIORITY_RESYNC; } mpt_free_request(mpt, req); } } static void mpt_adjust_queue_depth(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol, struct cam_path *path) { struct ccb_relsim crs; xpt_setup_ccb(&crs.ccb_h, path, /*priority*/5); crs.ccb_h.func_code = XPT_REL_SIMQ; crs.ccb_h.flags = CAM_DEV_QFREEZE; crs.release_flags = RELSIM_ADJUST_OPENINGS; crs.openings = mpt->raid_queue_depth; xpt_action((union ccb *)&crs); if (crs.ccb_h.status != CAM_REQ_CMP) mpt_vol_prt(mpt, mpt_vol, "mpt_adjust_queue_depth failed " "with CAM status %#x\n", crs.ccb_h.status); } static void mpt_announce_vol(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol) { CONFIG_PAGE_RAID_VOL_0 *vol_pg; u_int i; vol_pg = mpt_vol->config_page; mpt_vol_prt(mpt, mpt_vol, "Settings ("); for (i = 1; i <= 0x8000; i <<= 1) { switch (vol_pg->VolumeSettings.Settings & i) { case MPI_RAIDVOL0_SETTING_WRITE_CACHING_ENABLE: mpt_prtc(mpt, " Member-WCE"); break; case MPI_RAIDVOL0_SETTING_OFFLINE_ON_SMART: mpt_prtc(mpt, " Offline-On-SMART-Err"); break; case MPI_RAIDVOL0_SETTING_AUTO_CONFIGURE: mpt_prtc(mpt, " Hot-Plug-Spares"); break; case MPI_RAIDVOL0_SETTING_PRIORITY_RESYNC: mpt_prtc(mpt, " High-Priority-ReSync"); break; default: break; } } mpt_prtc(mpt, " )\n"); if (vol_pg->VolumeSettings.HotSparePool != 0) { mpt_vol_prt(mpt, mpt_vol, "Using Spare Pool%s", powerof2(vol_pg->VolumeSettings.HotSparePool) ? ":" : "s:"); for (i = 0; i < 8; i++) { u_int mask; mask = 0x1 << i; if ((vol_pg->VolumeSettings.HotSparePool & mask) == 0) continue; mpt_prtc(mpt, " %d", i); } mpt_prtc(mpt, "\n"); } mpt_vol_prt(mpt, mpt_vol, "%d Members:\n", vol_pg->NumPhysDisks); for (i = 0; i < vol_pg->NumPhysDisks; i++){ struct mpt_raid_disk *mpt_disk; CONFIG_PAGE_RAID_PHYS_DISK_0 *disk_pg; int pt_bus = cam_sim_bus(mpt->phydisk_sim); U8 f, s; mpt_disk = mpt->raid_disks + vol_pg->PhysDisk[i].PhysDiskNum; disk_pg = &mpt_disk->config_page; mpt_prtc(mpt, " "); mpt_prtc(mpt, "(%s:%d:%d:0): ", device_get_nameunit(mpt->dev), pt_bus, disk_pg->PhysDiskID); if (vol_pg->VolumeType == MPI_RAID_VOL_TYPE_IM) { mpt_prtc(mpt, "%s", mpt_disk->member_number == 0? "Primary" : "Secondary"); } else { mpt_prtc(mpt, "Stripe Position %d", mpt_disk->member_number); } f = disk_pg->PhysDiskStatus.Flags; s = disk_pg->PhysDiskStatus.State; if (f & MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC) { mpt_prtc(mpt, " Out of Sync"); } if (f & MPI_PHYSDISK0_STATUS_FLAG_QUIESCED) { mpt_prtc(mpt, " Quiesced"); } if (f & MPI_PHYSDISK0_STATUS_FLAG_INACTIVE_VOLUME) { mpt_prtc(mpt, " Inactive"); } if (f & MPI_PHYSDISK0_STATUS_FLAG_OPTIMAL_PREVIOUS) { mpt_prtc(mpt, " Was Optimal"); } if (f & MPI_PHYSDISK0_STATUS_FLAG_NOT_OPTIMAL_PREVIOUS) { mpt_prtc(mpt, " Was Non-Optimal"); } switch (s) { case MPI_PHYSDISK0_STATUS_ONLINE: mpt_prtc(mpt, " Online"); break; case MPI_PHYSDISK0_STATUS_MISSING: mpt_prtc(mpt, " Missing"); break; case MPI_PHYSDISK0_STATUS_NOT_COMPATIBLE: mpt_prtc(mpt, " Incompatible"); break; case MPI_PHYSDISK0_STATUS_FAILED: mpt_prtc(mpt, " Failed"); break; case MPI_PHYSDISK0_STATUS_INITIALIZING: mpt_prtc(mpt, " Initializing"); break; case MPI_PHYSDISK0_STATUS_OFFLINE_REQUESTED: mpt_prtc(mpt, " Requested Offline"); break; case MPI_PHYSDISK0_STATUS_FAILED_REQUESTED: mpt_prtc(mpt, " Requested Failed"); break; case MPI_PHYSDISK0_STATUS_OTHER_OFFLINE: default: mpt_prtc(mpt, " Offline Other (%x)", s); break; } mpt_prtc(mpt, "\n"); } } static void mpt_announce_disk(struct mpt_softc *mpt, struct mpt_raid_disk *mpt_disk) { CONFIG_PAGE_RAID_PHYS_DISK_0 *disk_pg; int rd_bus = cam_sim_bus(mpt->sim); int pt_bus = cam_sim_bus(mpt->phydisk_sim); u_int i; disk_pg = &mpt_disk->config_page; mpt_disk_prt(mpt, mpt_disk, "Physical (%s:%d:%d:0), Pass-thru (%s:%d:%d:0)\n", device_get_nameunit(mpt->dev), rd_bus, disk_pg->PhysDiskID, device_get_nameunit(mpt->dev), pt_bus, mpt_disk - mpt->raid_disks); if (disk_pg->PhysDiskSettings.HotSparePool == 0) return; mpt_disk_prt(mpt, mpt_disk, "Member of Hot Spare Pool%s", powerof2(disk_pg->PhysDiskSettings.HotSparePool) ? ":" : "s:"); for (i = 0; i < 8; i++) { u_int mask; mask = 0x1 << i; if ((disk_pg->PhysDiskSettings.HotSparePool & mask) == 0) continue; mpt_prtc(mpt, " %d", i); } mpt_prtc(mpt, "\n"); } static void mpt_refresh_raid_disk(struct mpt_softc *mpt, struct mpt_raid_disk *mpt_disk, IOC_3_PHYS_DISK *ioc_disk) { int rv; rv = mpt_read_cfg_header(mpt, MPI_CONFIG_PAGETYPE_RAID_PHYSDISK, /*PageNumber*/0, ioc_disk->PhysDiskNum, &mpt_disk->config_page.Header, /*sleep_ok*/TRUE, /*timeout_ms*/5000); if (rv != 0) { mpt_prt(mpt, "mpt_refresh_raid_disk: " "Failed to read RAID Disk Hdr(%d)\n", ioc_disk->PhysDiskNum); return; } rv = mpt_read_cur_cfg_page(mpt, ioc_disk->PhysDiskNum, &mpt_disk->config_page.Header, sizeof(mpt_disk->config_page), /*sleep_ok*/TRUE, /*timeout_ms*/5000); if (rv != 0) mpt_prt(mpt, "mpt_refresh_raid_disk: " "Failed to read RAID Disk Page(%d)\n", ioc_disk->PhysDiskNum); mpt2host_config_page_raid_phys_disk_0(&mpt_disk->config_page); } static void mpt_refresh_raid_vol(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol, CONFIG_PAGE_IOC_2_RAID_VOL *ioc_vol) { CONFIG_PAGE_RAID_VOL_0 *vol_pg; struct mpt_raid_action_result *ar; request_t *req; int rv; int i; vol_pg = mpt_vol->config_page; mpt_vol->flags &= ~MPT_RVF_UP2DATE; rv = mpt_read_cfg_header(mpt, MPI_CONFIG_PAGETYPE_RAID_VOLUME, 0, ioc_vol->VolumePageNumber, &vol_pg->Header, TRUE, 5000); if (rv != 0) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_vol: Failed to read RAID Vol Hdr(%d)\n", ioc_vol->VolumePageNumber); return; } rv = mpt_read_cur_cfg_page(mpt, ioc_vol->VolumePageNumber, &vol_pg->Header, mpt->raid_page0_len, TRUE, 5000); if (rv != 0) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_vol: Failed to read RAID Vol Page(%d)\n", ioc_vol->VolumePageNumber); return; } mpt2host_config_page_raid_vol_0(vol_pg); mpt_vol->flags |= MPT_RVF_ACTIVE; /* Update disk entry array data. */ for (i = 0; i < vol_pg->NumPhysDisks; i++) { struct mpt_raid_disk *mpt_disk; mpt_disk = mpt->raid_disks + vol_pg->PhysDisk[i].PhysDiskNum; mpt_disk->volume = mpt_vol; mpt_disk->member_number = vol_pg->PhysDisk[i].PhysDiskMap; if (vol_pg->VolumeType == MPI_RAID_VOL_TYPE_IM) { mpt_disk->member_number--; } } if ((vol_pg->VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS) == 0) return; req = mpt_get_request(mpt, TRUE); if (req == NULL) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_vol: Get request failed!\n"); return; } rv = mpt_issue_raid_req(mpt, mpt_vol, NULL, req, MPI_RAID_ACTION_INDICATOR_STRUCT, 0, 0, 0, FALSE, TRUE); if (rv == ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_vol: Progress Indicator fetch timeout\n"); mpt_free_request(mpt, req); return; } ar = REQ_TO_RAID_ACTION_RESULT(req); if (rv == 0 && ar->action_status == MPI_RAID_ACTION_ASTATUS_SUCCESS && REQ_IOCSTATUS(req) == MPI_IOCSTATUS_SUCCESS) { memcpy(&mpt_vol->sync_progress, &ar->action_data.indicator_struct, sizeof(mpt_vol->sync_progress)); mpt2host_mpi_raid_vol_indicator(&mpt_vol->sync_progress); } else { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_vol: Progress indicator fetch failed!\n"); } mpt_free_request(mpt, req); } /* * Update in-core information about RAID support. We update any entries * that didn't previously exists or have been marked as needing to * be updated by our event handler. Interesting changes are displayed * to the console. */ static int mpt_refresh_raid_data(struct mpt_softc *mpt) { CONFIG_PAGE_IOC_2_RAID_VOL *ioc_vol; CONFIG_PAGE_IOC_2_RAID_VOL *ioc_last_vol; IOC_3_PHYS_DISK *ioc_disk; IOC_3_PHYS_DISK *ioc_last_disk; CONFIG_PAGE_RAID_VOL_0 *vol_pg; size_t len; int rv; int i; u_int nonopt_volumes; if (mpt->ioc_page2 == NULL || mpt->ioc_page3 == NULL) { return (0); } /* * Mark all items as unreferenced by the configuration. * This allows us to find, report, and discard stale * entries. */ for (i = 0; i < mpt->ioc_page2->MaxPhysDisks; i++) { mpt->raid_disks[i].flags &= ~MPT_RDF_REFERENCED; } for (i = 0; i < mpt->ioc_page2->MaxVolumes; i++) { mpt->raid_volumes[i].flags &= ~MPT_RVF_REFERENCED; } /* * Get Physical Disk information. */ len = mpt->ioc_page3->Header.PageLength * sizeof(uint32_t); rv = mpt_read_cur_cfg_page(mpt, /*PageAddress*/0, &mpt->ioc_page3->Header, len, /*sleep_ok*/TRUE, /*timeout_ms*/5000); if (rv) { mpt_prt(mpt, "mpt_refresh_raid_data: Failed to read IOC Page 3\n"); return (-1); } mpt2host_config_page_ioc3(mpt->ioc_page3); ioc_disk = mpt->ioc_page3->PhysDisk; ioc_last_disk = ioc_disk + mpt->ioc_page3->NumPhysDisks; for (; ioc_disk != ioc_last_disk; ioc_disk++) { struct mpt_raid_disk *mpt_disk; mpt_disk = mpt->raid_disks + ioc_disk->PhysDiskNum; mpt_disk->flags |= MPT_RDF_REFERENCED; if ((mpt_disk->flags & (MPT_RDF_ACTIVE|MPT_RDF_UP2DATE)) != (MPT_RDF_ACTIVE|MPT_RDF_UP2DATE)) { mpt_refresh_raid_disk(mpt, mpt_disk, ioc_disk); } mpt_disk->flags |= MPT_RDF_ACTIVE; mpt->raid_rescan++; } /* * Refresh volume data. */ len = mpt->ioc_page2->Header.PageLength * sizeof(uint32_t); rv = mpt_read_cur_cfg_page(mpt, /*PageAddress*/0, &mpt->ioc_page2->Header, len, /*sleep_ok*/TRUE, /*timeout_ms*/5000); if (rv) { mpt_prt(mpt, "mpt_refresh_raid_data: " "Failed to read IOC Page 2\n"); return (-1); } mpt2host_config_page_ioc2(mpt->ioc_page2); ioc_vol = mpt->ioc_page2->RaidVolume; ioc_last_vol = ioc_vol + mpt->ioc_page2->NumActiveVolumes; for (;ioc_vol != ioc_last_vol; ioc_vol++) { struct mpt_raid_volume *mpt_vol; mpt_vol = mpt->raid_volumes + ioc_vol->VolumePageNumber; mpt_vol->flags |= MPT_RVF_REFERENCED; vol_pg = mpt_vol->config_page; if (vol_pg == NULL) continue; if (((mpt_vol->flags & (MPT_RVF_ACTIVE|MPT_RVF_UP2DATE)) != (MPT_RVF_ACTIVE|MPT_RVF_UP2DATE)) || (vol_pg->VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS) != 0) { mpt_refresh_raid_vol(mpt, mpt_vol, ioc_vol); } mpt_vol->flags |= MPT_RVF_ACTIVE; } nonopt_volumes = 0; for (i = 0; i < mpt->ioc_page2->MaxVolumes; i++) { struct mpt_raid_volume *mpt_vol; uint64_t total; uint64_t left; int m; u_int prio; mpt_vol = &mpt->raid_volumes[i]; if ((mpt_vol->flags & MPT_RVF_ACTIVE) == 0) { continue; } vol_pg = mpt_vol->config_page; if ((mpt_vol->flags & (MPT_RVF_REFERENCED|MPT_RVF_ANNOUNCED)) == MPT_RVF_ANNOUNCED) { mpt_vol_prt(mpt, mpt_vol, "No longer configured\n"); mpt_vol->flags = 0; continue; } if ((mpt_vol->flags & MPT_RVF_ANNOUNCED) == 0) { mpt_announce_vol(mpt, mpt_vol); mpt_vol->flags |= MPT_RVF_ANNOUNCED; } if (vol_pg->VolumeStatus.State != MPI_RAIDVOL0_STATUS_STATE_OPTIMAL) nonopt_volumes++; if ((mpt_vol->flags & MPT_RVF_UP2DATE) != 0) continue; mpt_vol->flags |= MPT_RVF_UP2DATE; mpt_vol_prt(mpt, mpt_vol, "%s - %s\n", mpt_vol_type(mpt_vol), mpt_vol_state(mpt_vol)); mpt_verify_mwce(mpt, mpt_vol); if (vol_pg->VolumeStatus.Flags == 0) { continue; } mpt_vol_prt(mpt, mpt_vol, "Status ("); for (m = 1; m <= 0x80; m <<= 1) { switch (vol_pg->VolumeStatus.Flags & m) { case MPI_RAIDVOL0_STATUS_FLAG_ENABLED: mpt_prtc(mpt, " Enabled"); break; case MPI_RAIDVOL0_STATUS_FLAG_QUIESCED: mpt_prtc(mpt, " Quiesced"); break; case MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS: mpt_prtc(mpt, " Re-Syncing"); break; case MPI_RAIDVOL0_STATUS_FLAG_VOLUME_INACTIVE: mpt_prtc(mpt, " Inactive"); break; default: break; } } mpt_prtc(mpt, " )\n"); if ((vol_pg->VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS) == 0) continue; mpt_verify_resync_rate(mpt, mpt_vol); left = MPT_U64_2_SCALAR(mpt_vol->sync_progress.BlocksRemaining); total = MPT_U64_2_SCALAR(mpt_vol->sync_progress.TotalBlocks); if (vol_pg->ResyncRate != 0) { prio = ((u_int)vol_pg->ResyncRate * 100000) / 0xFF; mpt_vol_prt(mpt, mpt_vol, "Rate %d.%d%%\n", prio / 1000, prio % 1000); } else { prio = vol_pg->VolumeSettings.Settings & MPI_RAIDVOL0_SETTING_PRIORITY_RESYNC; mpt_vol_prt(mpt, mpt_vol, "%s Priority Re-Sync\n", prio ? "High" : "Low"); } mpt_vol_prt(mpt, mpt_vol, "%ju of %ju " "blocks remaining\n", (uintmax_t)left, (uintmax_t)total); /* Periodically report on sync progress. */ mpt_schedule_raid_refresh(mpt); } for (i = 0; i < mpt->ioc_page2->MaxPhysDisks; i++) { struct mpt_raid_disk *mpt_disk; CONFIG_PAGE_RAID_PHYS_DISK_0 *disk_pg; int m; mpt_disk = &mpt->raid_disks[i]; disk_pg = &mpt_disk->config_page; if ((mpt_disk->flags & MPT_RDF_ACTIVE) == 0) continue; if ((mpt_disk->flags & (MPT_RDF_REFERENCED|MPT_RDF_ANNOUNCED)) == MPT_RDF_ANNOUNCED) { mpt_disk_prt(mpt, mpt_disk, "No longer configured\n"); mpt_disk->flags = 0; mpt->raid_rescan++; continue; } if ((mpt_disk->flags & MPT_RDF_ANNOUNCED) == 0) { mpt_announce_disk(mpt, mpt_disk); mpt_disk->flags |= MPT_RVF_ANNOUNCED; } if ((mpt_disk->flags & MPT_RDF_UP2DATE) != 0) continue; mpt_disk->flags |= MPT_RDF_UP2DATE; mpt_disk_prt(mpt, mpt_disk, "%s\n", mpt_disk_state(mpt_disk)); if (disk_pg->PhysDiskStatus.Flags == 0) continue; mpt_disk_prt(mpt, mpt_disk, "Status ("); for (m = 1; m <= 0x80; m <<= 1) { switch (disk_pg->PhysDiskStatus.Flags & m) { case MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC: mpt_prtc(mpt, " Out-Of-Sync"); break; case MPI_PHYSDISK0_STATUS_FLAG_QUIESCED: mpt_prtc(mpt, " Quiesced"); break; default: break; } } mpt_prtc(mpt, " )\n"); } mpt->raid_nonopt_volumes = nonopt_volumes; return (0); } static void mpt_raid_timer(void *arg) { struct mpt_softc *mpt; mpt = (struct mpt_softc *)arg; MPT_LOCK_ASSERT(mpt); mpt_raid_wakeup(mpt); } static void mpt_schedule_raid_refresh(struct mpt_softc *mpt) { callout_reset(&mpt->raid_timer, MPT_RAID_SYNC_REPORT_INTERVAL, mpt_raid_timer, mpt); } void mpt_raid_free_mem(struct mpt_softc *mpt) { if (mpt->raid_volumes) { struct mpt_raid_volume *mpt_raid; int i; for (i = 0; i < mpt->raid_max_volumes; i++) { mpt_raid = &mpt->raid_volumes[i]; if (mpt_raid->config_page) { free(mpt_raid->config_page, M_DEVBUF); mpt_raid->config_page = NULL; } } free(mpt->raid_volumes, M_DEVBUF); mpt->raid_volumes = NULL; } if (mpt->raid_disks) { free(mpt->raid_disks, M_DEVBUF); mpt->raid_disks = NULL; } if (mpt->ioc_page2) { free(mpt->ioc_page2, M_DEVBUF); mpt->ioc_page2 = NULL; } if (mpt->ioc_page3) { free(mpt->ioc_page3, M_DEVBUF); mpt->ioc_page3 = NULL; } mpt->raid_max_volumes = 0; mpt->raid_max_disks = 0; } static int mpt_raid_set_vol_resync_rate(struct mpt_softc *mpt, u_int rate) { struct mpt_raid_volume *mpt_vol; if ((rate > MPT_RAID_RESYNC_RATE_MAX || rate < MPT_RAID_RESYNC_RATE_MIN) && rate != MPT_RAID_RESYNC_RATE_NC) return (EINVAL); MPT_LOCK(mpt); mpt->raid_resync_rate = rate; RAID_VOL_FOREACH(mpt, mpt_vol) { if ((mpt_vol->flags & MPT_RVF_ACTIVE) == 0) { continue; } mpt_verify_resync_rate(mpt, mpt_vol); } MPT_UNLOCK(mpt); return (0); } static int mpt_raid_set_vol_queue_depth(struct mpt_softc *mpt, u_int vol_queue_depth) { struct mpt_raid_volume *mpt_vol; if (vol_queue_depth > 255 || vol_queue_depth < 1) return (EINVAL); MPT_LOCK(mpt); mpt->raid_queue_depth = vol_queue_depth; RAID_VOL_FOREACH(mpt, mpt_vol) { struct cam_path *path; int error; if ((mpt_vol->flags & MPT_RVF_ACTIVE) == 0) continue; mpt->raid_rescan = 0; error = xpt_create_path(&path, NULL, cam_sim_path(mpt->sim), mpt_vol->config_page->VolumeID, /*lun*/0); if (error != CAM_REQ_CMP) { mpt_vol_prt(mpt, mpt_vol, "Unable to allocate path!\n"); continue; } mpt_adjust_queue_depth(mpt, mpt_vol, path); xpt_free_path(path); } MPT_UNLOCK(mpt); return (0); } static int mpt_raid_set_vol_mwce(struct mpt_softc *mpt, mpt_raid_mwce_t mwce) { struct mpt_raid_volume *mpt_vol; int force_full_resync; MPT_LOCK(mpt); if (mwce == mpt->raid_mwce_setting) { MPT_UNLOCK(mpt); return (0); } /* * Catch MWCE being left on due to a failed shutdown. Since * sysctls cannot be set by the loader, we treat the first * setting of this varible specially and force a full volume * resync if MWCE is enabled and a resync is in progress. */ force_full_resync = 0; if (mpt->raid_mwce_set == 0 && mpt->raid_mwce_setting == MPT_RAID_MWCE_NC && mwce == MPT_RAID_MWCE_REBUILD_ONLY) force_full_resync = 1; mpt->raid_mwce_setting = mwce; RAID_VOL_FOREACH(mpt, mpt_vol) { CONFIG_PAGE_RAID_VOL_0 *vol_pg; int resyncing; int mwce; if ((mpt_vol->flags & MPT_RVF_ACTIVE) == 0) continue; vol_pg = mpt_vol->config_page; resyncing = vol_pg->VolumeStatus.Flags & MPI_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS; mwce = vol_pg->VolumeSettings.Settings & MPI_RAIDVOL0_SETTING_WRITE_CACHING_ENABLE; if (force_full_resync && resyncing && mwce) { /* * XXX disable/enable volume should force a resync, * but we'll need to queice, drain, and restart * I/O to do that. */ mpt_vol_prt(mpt, mpt_vol, "WARNING - Unsafe shutdown " "detected. Suggest full resync.\n"); } mpt_verify_mwce(mpt, mpt_vol); } mpt->raid_mwce_set = 1; MPT_UNLOCK(mpt); return (0); } static const char *mpt_vol_mwce_strs[] = { "On", "Off", "On-During-Rebuild", "NC" }; static int mpt_raid_sysctl_vol_member_wce(SYSCTL_HANDLER_ARGS) { char inbuf[20]; struct mpt_softc *mpt; const char *str; int error; u_int size; u_int i; - GIANT_REQUIRED; - mpt = (struct mpt_softc *)arg1; str = mpt_vol_mwce_strs[mpt->raid_mwce_setting]; error = SYSCTL_OUT(req, str, strlen(str) + 1); if (error || !req->newptr) { return (error); } size = req->newlen - req->newidx; if (size >= sizeof(inbuf)) { return (EINVAL); } error = SYSCTL_IN(req, inbuf, size); if (error) { return (error); } inbuf[size] = '\0'; for (i = 0; i < NUM_ELEMENTS(mpt_vol_mwce_strs); i++) { if (strcmp(mpt_vol_mwce_strs[i], inbuf) == 0) { return (mpt_raid_set_vol_mwce(mpt, i)); } } return (EINVAL); } static int mpt_raid_sysctl_vol_resync_rate(SYSCTL_HANDLER_ARGS) { struct mpt_softc *mpt; u_int raid_resync_rate; int error; - GIANT_REQUIRED; - mpt = (struct mpt_softc *)arg1; raid_resync_rate = mpt->raid_resync_rate; error = sysctl_handle_int(oidp, &raid_resync_rate, 0, req); if (error || !req->newptr) { return error; } return (mpt_raid_set_vol_resync_rate(mpt, raid_resync_rate)); } static int mpt_raid_sysctl_vol_queue_depth(SYSCTL_HANDLER_ARGS) { struct mpt_softc *mpt; u_int raid_queue_depth; int error; - GIANT_REQUIRED; - mpt = (struct mpt_softc *)arg1; raid_queue_depth = mpt->raid_queue_depth; error = sysctl_handle_int(oidp, &raid_queue_depth, 0, req); if (error || !req->newptr) { return error; } return (mpt_raid_set_vol_queue_depth(mpt, raid_queue_depth)); } static void mpt_raid_sysctl_attach(struct mpt_softc *mpt) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(mpt->dev); struct sysctl_oid *tree = device_get_sysctl_tree(mpt->dev); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "vol_member_wce", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + "vol_member_wce", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, mpt, 0, mpt_raid_sysctl_vol_member_wce, "A", "volume member WCE(On,Off,On-During-Rebuild,NC)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "vol_queue_depth", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + "vol_queue_depth", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, mpt, 0, mpt_raid_sysctl_vol_queue_depth, "I", "default volume queue depth"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "vol_resync_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + "vol_resync_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, mpt, 0, mpt_raid_sysctl_vol_resync_rate, "I", "volume resync priority (0 == NC, 1 - 255)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "nonoptimal_volumes", CTLFLAG_RD, &mpt->raid_nonopt_volumes, 0, "number of nonoptimal volumes"); } diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c index e1771febea9b..9ec1f14511f5 100644 --- a/sys/dev/nvme/nvme_sysctl.c +++ b/sys/dev/nvme/nvme_sysctl.c @@ -1,358 +1,358 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2016 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_nvme.h" #include #include #include #include "nvme_private.h" #ifndef NVME_USE_NVD #define NVME_USE_NVD 1 #endif int nvme_use_nvd = NVME_USE_NVD; bool nvme_verbose_cmd_dump = false; SYSCTL_NODE(_hw, OID_AUTO, nvme, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "NVMe sysctl tunables"); SYSCTL_INT(_hw_nvme, OID_AUTO, use_nvd, CTLFLAG_RDTUN, &nvme_use_nvd, 1, "1 = Create NVD devices, 0 = Create NDA devices"); SYSCTL_BOOL(_hw_nvme, OID_AUTO, verbose_cmd_dump, CTLFLAG_RWTUN, &nvme_verbose_cmd_dump, 0, "enable verbose command printting when a command fails"); static void nvme_dump_queue(struct nvme_qpair *qpair) { struct nvme_completion *cpl; struct nvme_command *cmd; int i; printf("id:%04Xh phase:%d\n", qpair->id, qpair->phase); printf("Completion queue:\n"); for (i = 0; i < qpair->num_entries; i++) { cpl = &qpair->cpl[i]; printf("%05d: ", i); nvme_dump_completion(cpl); } printf("Submission queue:\n"); for (i = 0; i < qpair->num_entries; i++) { cmd = &qpair->cmd[i]; printf("%05d: ", i); nvme_dump_command(cmd); } } static int nvme_sysctl_dump_debug(SYSCTL_HANDLER_ARGS) { struct nvme_qpair *qpair = arg1; uint32_t val = 0; int error = sysctl_handle_int(oidp, &val, 0, req); if (error) return (error); if (val != 0) nvme_dump_queue(qpair); return (0); } static int nvme_sysctl_int_coal_time(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; uint32_t oldval = ctrlr->int_coal_time; int error = sysctl_handle_int(oidp, &ctrlr->int_coal_time, 0, req); if (error) return (error); if (oldval != ctrlr->int_coal_time) nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, NULL); return (0); } static int nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; uint32_t oldval = ctrlr->int_coal_threshold; int error = sysctl_handle_int(oidp, &ctrlr->int_coal_threshold, 0, req); if (error) return (error); if (oldval != ctrlr->int_coal_threshold) nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time, ctrlr->int_coal_threshold, NULL, NULL); return (0); } static int nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; uint32_t newval = ctrlr->timeout_period; int error = sysctl_handle_int(oidp, &newval, 0, req); if (error || (req->newptr == NULL)) return (error); if (newval > NVME_MAX_TIMEOUT_PERIOD || newval < NVME_MIN_TIMEOUT_PERIOD) { return (EINVAL); } else { ctrlr->timeout_period = newval; } return (0); } static void nvme_qpair_reset_stats(struct nvme_qpair *qpair) { qpair->num_cmds = 0; qpair->num_intr_handler_calls = 0; qpair->num_retries = 0; qpair->num_failures = 0; } static int nvme_sysctl_num_cmds(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; int64_t num_cmds = 0; int i; num_cmds = ctrlr->adminq.num_cmds; for (i = 0; i < ctrlr->num_io_queues; i++) num_cmds += ctrlr->ioq[i].num_cmds; return (sysctl_handle_64(oidp, &num_cmds, 0, req)); } static int nvme_sysctl_num_intr_handler_calls(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; int64_t num_intr_handler_calls = 0; int i; num_intr_handler_calls = ctrlr->adminq.num_intr_handler_calls; for (i = 0; i < ctrlr->num_io_queues; i++) num_intr_handler_calls += ctrlr->ioq[i].num_intr_handler_calls; return (sysctl_handle_64(oidp, &num_intr_handler_calls, 0, req)); } static int nvme_sysctl_num_retries(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; int64_t num_retries = 0; int i; num_retries = ctrlr->adminq.num_retries; for (i = 0; i < ctrlr->num_io_queues; i++) num_retries += ctrlr->ioq[i].num_retries; return (sysctl_handle_64(oidp, &num_retries, 0, req)); } static int nvme_sysctl_num_failures(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; int64_t num_failures = 0; int i; num_failures = ctrlr->adminq.num_failures; for (i = 0; i < ctrlr->num_io_queues; i++) num_failures += ctrlr->ioq[i].num_failures; return (sysctl_handle_64(oidp, &num_failures, 0, req)); } static int nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS) { struct nvme_controller *ctrlr = arg1; uint32_t i, val = 0; int error = sysctl_handle_int(oidp, &val, 0, req); if (error) return (error); if (val != 0) { nvme_qpair_reset_stats(&ctrlr->adminq); for (i = 0; i < ctrlr->num_io_queues; i++) nvme_qpair_reset_stats(&ctrlr->ioq[i]); } return (0); } static void nvme_sysctl_initialize_queue(struct nvme_qpair *qpair, struct sysctl_ctx_list *ctrlr_ctx, struct sysctl_oid *que_tree) { struct sysctl_oid_list *que_list = SYSCTL_CHILDREN(que_tree); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries", CTLFLAG_RD, &qpair->num_entries, 0, "Number of entries in hardware queue"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers", CTLFLAG_RD, &qpair->num_trackers, 0, "Number of trackers pre-allocated for this queue pair"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head", CTLFLAG_RD, &qpair->sq_head, 0, "Current head of submission queue (as observed by driver)"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qpair->sq_tail, 0, "Current tail of submission queue (as observed by driver)"); SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "cq_head", CTLFLAG_RD, &qpair->cq_head, 0, "Current head of completion queue (as observed by driver)"); SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_cmds", CTLFLAG_RD, &qpair->num_cmds, "Number of commands submitted"); SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_intr_handler_calls", CTLFLAG_RD, &qpair->num_intr_handler_calls, "Number of times interrupt handler was invoked (will typically be " "less than number of actual interrupts generated due to " "coalescing)"); SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_retries", CTLFLAG_RD, &qpair->num_retries, "Number of commands retried"); SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_failures", CTLFLAG_RD, &qpair->num_failures, "Number of commands ending in failure after all retries"); SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO, - "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + "dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, qpair, 0, nvme_sysctl_dump_debug, "IU", "Dump debug data"); } void nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr) { struct sysctl_ctx_list *ctrlr_ctx; struct sysctl_oid *ctrlr_tree, *que_tree; struct sysctl_oid_list *ctrlr_list; #define QUEUE_NAME_LENGTH 16 char queue_name[QUEUE_NAME_LENGTH]; int i; ctrlr_ctx = device_get_sysctl_ctx(ctrlr->dev); ctrlr_tree = device_get_sysctl_tree(ctrlr->dev); ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree); SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_io_queues", CTLFLAG_RD, &ctrlr->num_io_queues, 0, "Number of I/O queue pairs"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, - "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + "int_coal_time", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_int_coal_time, "IU", "Interrupt coalescing timeout (in microseconds)"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "int_coal_threshold", - CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, ctrlr, 0, + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_int_coal_threshold, "IU", "Interrupt coalescing threshold"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, - "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + "timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_timeout_period, "IU", "Timeout period (in seconds)"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, - "num_cmds", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, + "num_cmds", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_num_cmds, "IU", "Number of commands submitted"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_intr_handler_calls", - CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, ctrlr, 0, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_num_intr_handler_calls, "IU", "Number of times interrupt handler was invoked (will " "typically be less than number of actual interrupts " "generated due to coalescing)"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, - "num_retries", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, + "num_retries", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_num_retries, "IU", "Number of commands retried"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, - "num_failures", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, + "num_failures", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_num_failures, "IU", "Number of commands ending in failure after all retries"); SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, - "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, ctrlr, + "reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero"); que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue"); nvme_sysctl_initialize_queue(&ctrlr->adminq, ctrlr_ctx, que_tree); for (i = 0; i < ctrlr->num_io_queues; i++) { snprintf(queue_name, QUEUE_NAME_LENGTH, "ioq%d", i); que_tree = SYSCTL_ADD_NODE(ctrlr_ctx, ctrlr_list, OID_AUTO, queue_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IO Queue"); nvme_sysctl_initialize_queue(&ctrlr->ioq[i], ctrlr_ctx, que_tree); } }