Index: projects/powernv/arm/arm/hdmi_if.m =================================================================== --- projects/powernv/arm/arm/hdmi_if.m (revision 290990) +++ projects/powernv/arm/arm/hdmi_if.m (revision 290991) @@ -1,57 +1,59 @@ #- # Copyright (c) 2015 Oleksandr Tymoshenko # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # #include #include #include INTERFACE hdmi; HEADER { #include - typedef void (*hdmi_event_hook)(void *, int); + typedef void (*hdmi_event_hook)(void *, device_t, int); EVENTHANDLER_DECLARE(hdmi_event, hdmi_event_hook); + + #define HDMI_EVENT_CONNECTED 0 } # # Get EDID info # METHOD int get_edid { device_t dev; uint8_t **edid; uint32_t *edid_length; }; # # Set videomode # METHOD int set_videomode { device_t dev; const struct videomode *videomode; }; Index: projects/powernv/arm/arm/pl310.c =================================================================== --- projects/powernv/arm/arm/pl310.c (revision 290990) +++ projects/powernv/arm/arm/pl310.c (revision 290991) @@ -1,549 +1,549 @@ /*- * Copyright (c) 2012 Olivier Houchard * Copyright (c) 2011 * Ben Gray . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BEN GRAY ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BEN GRAY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Define this if you need to disable PL310 for debugging purpose * Spec: * http://infocenter.arm.com/help/topic/com.arm.doc.ddi0246e/DDI0246E_l2c310_r3p1_trm.pdf */ /* * Hardcode errata for now * http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0246b/pr01s02s02.html */ #define PL310_ERRATA_588369 #define PL310_ERRATA_753970 #define PL310_ERRATA_727915 #define PL310_LOCK(sc) do { \ mtx_lock_spin(&(sc)->sc_mtx); \ } while(0); #define PL310_UNLOCK(sc) do { \ mtx_unlock_spin(&(sc)->sc_mtx); \ } while(0); static int pl310_enabled = 1; TUNABLE_INT("hw.pl310.enabled", &pl310_enabled); static uint32_t g_l2cache_way_mask; static const uint32_t g_l2cache_line_size = 32; static const uint32_t g_l2cache_align_mask = (32 - 1); static uint32_t g_l2cache_size; static uint32_t g_way_size; static uint32_t g_ways_assoc; static struct pl310_softc *pl310_softc; static struct ofw_compat_data compat_data[] = { {"arm,pl310", true}, /* Non-standard, FreeBSD. */ {"arm,pl310-cache", true}, {NULL, false} }; -void +static void pl310_print_config(struct pl310_softc *sc) { uint32_t aux, prefetch; const char *dis = "disabled"; const char *ena = "enabled"; aux = pl310_read4(sc, PL310_AUX_CTRL); prefetch = pl310_read4(sc, PL310_PREFETCH_CTRL); device_printf(sc->sc_dev, "Early BRESP response: %s\n", (aux & AUX_CTRL_EARLY_BRESP) ? ena : dis); device_printf(sc->sc_dev, "Instruction prefetch: %s\n", (aux & AUX_CTRL_INSTR_PREFETCH) ? ena : dis); device_printf(sc->sc_dev, "Data prefetch: %s\n", (aux & AUX_CTRL_DATA_PREFETCH) ? ena : dis); device_printf(sc->sc_dev, "Non-secure interrupt control: %s\n", (aux & AUX_CTRL_NS_INT_CTRL) ? ena : dis); device_printf(sc->sc_dev, "Non-secure lockdown: %s\n", (aux & AUX_CTRL_NS_LOCKDOWN) ? ena : dis); device_printf(sc->sc_dev, "Share override: %s\n", (aux & AUX_CTRL_SHARE_OVERRIDE) ? ena : dis); device_printf(sc->sc_dev, "Double linefill: %s\n", (prefetch & PREFETCH_CTRL_DL) ? ena : dis); device_printf(sc->sc_dev, "Instruction prefetch: %s\n", (prefetch & PREFETCH_CTRL_INSTR_PREFETCH) ? ena : dis); device_printf(sc->sc_dev, "Data prefetch: %s\n", (prefetch & PREFETCH_CTRL_DATA_PREFETCH) ? ena : dis); device_printf(sc->sc_dev, "Double linefill on WRAP request: %s\n", (prefetch & PREFETCH_CTRL_DL_ON_WRAP) ? ena : dis); device_printf(sc->sc_dev, "Prefetch drop: %s\n", (prefetch & PREFETCH_CTRL_PREFETCH_DROP) ? ena : dis); device_printf(sc->sc_dev, "Incr double Linefill: %s\n", (prefetch & PREFETCH_CTRL_INCR_DL) ? ena : dis); device_printf(sc->sc_dev, "Not same ID on exclusive sequence: %s\n", (prefetch & PREFETCH_CTRL_NOTSAMEID) ? ena : dis); device_printf(sc->sc_dev, "Prefetch offset: %d\n", (prefetch & PREFETCH_CTRL_OFFSET_MASK)); } void pl310_set_ram_latency(struct pl310_softc *sc, uint32_t which_reg, uint32_t read, uint32_t write, uint32_t setup) { uint32_t v; KASSERT(which_reg == PL310_TAG_RAM_CTRL || which_reg == PL310_DATA_RAM_CTRL, ("bad pl310 ram latency register address")); v = pl310_read4(sc, which_reg); if (setup != 0) { KASSERT(setup <= 8, ("bad pl310 setup latency: %d", setup)); v &= ~RAM_CTRL_SETUP_MASK; v |= (setup - 1) << RAM_CTRL_SETUP_SHIFT; } if (read != 0) { KASSERT(read <= 8, ("bad pl310 read latency: %d", read)); v &= ~RAM_CTRL_READ_MASK; v |= (read - 1) << RAM_CTRL_READ_SHIFT; } if (write != 0) { KASSERT(write <= 8, ("bad pl310 write latency: %d", write)); v &= ~RAM_CTRL_WRITE_MASK; v |= (write - 1) << RAM_CTRL_WRITE_SHIFT; } pl310_write4(sc, which_reg, v); } static int pl310_filter(void *arg) { struct pl310_softc *sc = arg; uint32_t intr; intr = pl310_read4(sc, PL310_INTR_MASK); if (!sc->sc_enabled && (intr & INTR_MASK_ECNTR)) { /* * This is for debug purpose, so be blunt about it * We disable PL310 only when something fishy is going * on and we need to make sure L2 cache is 100% disabled */ panic("pl310: caches disabled but cache event detected\n"); } return (FILTER_HANDLED); } static __inline void pl310_wait_background_op(uint32_t off, uint32_t mask) { while (pl310_read4(pl310_softc, off) & mask) continue; } /** * pl310_cache_sync - performs a cache sync operation * * According to the TRM: * * "Before writing to any other register you must perform an explicit * Cache Sync operation. This is particularly important when the cache is * enabled and changes to how the cache allocates new lines are to be made." * * */ static __inline void pl310_cache_sync(void) { if ((pl310_softc == NULL) || !pl310_softc->sc_enabled) return; #ifdef PL310_ERRATA_753970 if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r3p0) /* Write uncached PL310 register */ pl310_write4(pl310_softc, 0x740, 0xffffffff); else #endif pl310_write4(pl310_softc, PL310_CACHE_SYNC, 0xffffffff); } static void pl310_wbinv_all(void) { if ((pl310_softc == NULL) || !pl310_softc->sc_enabled) return; PL310_LOCK(pl310_softc); #ifdef PL310_ERRATA_727915 if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r2p0) { int i, j; for (i = 0; i < g_ways_assoc; i++) { for (j = 0; j < g_way_size / g_l2cache_line_size; j++) { pl310_write4(pl310_softc, PL310_CLEAN_INV_LINE_IDX, (i << 28 | j << 5)); } } pl310_cache_sync(); PL310_UNLOCK(pl310_softc); return; } if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r3p0) platform_pl310_write_debug(pl310_softc, 3); #endif pl310_write4(pl310_softc, PL310_CLEAN_INV_WAY, g_l2cache_way_mask); pl310_wait_background_op(PL310_CLEAN_INV_WAY, g_l2cache_way_mask); pl310_cache_sync(); #ifdef PL310_ERRATA_727915 if (pl310_softc->sc_rtl_revision == CACHE_ID_RELEASE_r3p0) platform_pl310_write_debug(pl310_softc, 0); #endif PL310_UNLOCK(pl310_softc); } static void pl310_wbinv_range(vm_paddr_t start, vm_size_t size) { if ((pl310_softc == NULL) || !pl310_softc->sc_enabled) return; PL310_LOCK(pl310_softc); if (start & g_l2cache_align_mask) { size += start & g_l2cache_align_mask; start &= ~g_l2cache_align_mask; } if (size & g_l2cache_align_mask) { size &= ~g_l2cache_align_mask; size += g_l2cache_line_size; } #ifdef PL310_ERRATA_727915 platform_pl310_write_debug(pl310_softc, 3); #endif while (size > 0) { #ifdef PL310_ERRATA_588369 if (pl310_softc->sc_rtl_revision <= CACHE_ID_RELEASE_r1p0) { /* * Errata 588369 says that clean + inv may keep the * cache line if it was clean, the recommanded * workaround is to clean then invalidate the cache * line, with write-back and cache linefill disabled. */ pl310_write4(pl310_softc, PL310_CLEAN_LINE_PA, start); pl310_write4(pl310_softc, PL310_INV_LINE_PA, start); } else #endif pl310_write4(pl310_softc, PL310_CLEAN_INV_LINE_PA, start); start += g_l2cache_line_size; size -= g_l2cache_line_size; } #ifdef PL310_ERRATA_727915 platform_pl310_write_debug(pl310_softc, 0); #endif pl310_cache_sync(); PL310_UNLOCK(pl310_softc); } static void pl310_wb_range(vm_paddr_t start, vm_size_t size) { if ((pl310_softc == NULL) || !pl310_softc->sc_enabled) return; PL310_LOCK(pl310_softc); if (start & g_l2cache_align_mask) { size += start & g_l2cache_align_mask; start &= ~g_l2cache_align_mask; } if (size & g_l2cache_align_mask) { size &= ~g_l2cache_align_mask; size += g_l2cache_line_size; } while (size > 0) { pl310_write4(pl310_softc, PL310_CLEAN_LINE_PA, start); start += g_l2cache_line_size; size -= g_l2cache_line_size; } pl310_cache_sync(); PL310_UNLOCK(pl310_softc); } static void pl310_inv_range(vm_paddr_t start, vm_size_t size) { if ((pl310_softc == NULL) || !pl310_softc->sc_enabled) return; PL310_LOCK(pl310_softc); if (start & g_l2cache_align_mask) { size += start & g_l2cache_align_mask; start &= ~g_l2cache_align_mask; } if (size & g_l2cache_align_mask) { size &= ~g_l2cache_align_mask; size += g_l2cache_line_size; } while (size > 0) { pl310_write4(pl310_softc, PL310_INV_LINE_PA, start); start += g_l2cache_line_size; size -= g_l2cache_line_size; } pl310_cache_sync(); PL310_UNLOCK(pl310_softc); } static void pl310_drain_writebuf(void) { if ((pl310_softc == NULL) || !pl310_softc->sc_enabled) return; PL310_LOCK(pl310_softc); pl310_cache_sync(); PL310_UNLOCK(pl310_softc); } static void pl310_set_way_sizes(struct pl310_softc *sc) { uint32_t aux_value; aux_value = pl310_read4(sc, PL310_AUX_CTRL); g_way_size = (aux_value & AUX_CTRL_WAY_SIZE_MASK) >> AUX_CTRL_WAY_SIZE_SHIFT; g_way_size = 1 << (g_way_size + 13); if (aux_value & (1 << AUX_CTRL_ASSOCIATIVITY_SHIFT)) g_ways_assoc = 16; else g_ways_assoc = 8; g_l2cache_way_mask = (1 << g_ways_assoc) - 1; g_l2cache_size = g_way_size * g_ways_assoc; } /* * Setup interrupt handling. This is done only if the cache controller is * disabled, for debugging. We set counters so when a cache event happens we'll * get interrupted and be warned that something is wrong, because no cache * events should happen if we're disabled. */ static void pl310_config_intr(void *arg) { struct pl310_softc * sc; sc = arg; /* activate the interrupt */ bus_setup_intr(sc->sc_dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE, pl310_filter, NULL, sc, &sc->sc_irq_h); /* Cache Line Eviction for Counter 0 */ pl310_write4(sc, PL310_EVENT_COUNTER0_CONF, EVENT_COUNTER_CONF_INCR | EVENT_COUNTER_CONF_CO); /* Data Read Request for Counter 1 */ pl310_write4(sc, PL310_EVENT_COUNTER1_CONF, EVENT_COUNTER_CONF_INCR | EVENT_COUNTER_CONF_DRREQ); /* Enable and clear pending interrupts */ pl310_write4(sc, PL310_INTR_CLEAR, INTR_MASK_ECNTR); pl310_write4(sc, PL310_INTR_MASK, INTR_MASK_ALL); /* Enable counters and reset C0 and C1 */ pl310_write4(sc, PL310_EVENT_COUNTER_CTRL, EVENT_COUNTER_CTRL_ENABLED | EVENT_COUNTER_CTRL_C0_RESET | EVENT_COUNTER_CTRL_C1_RESET); config_intrhook_disestablish(sc->sc_ich); free(sc->sc_ich, M_DEVBUF); sc->sc_ich = NULL; } static int pl310_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data) return (ENXIO); device_set_desc(dev, "PL310 L2 cache controller"); return (0); } static int pl310_attach(device_t dev) { struct pl310_softc *sc = device_get_softc(dev); int rid; uint32_t cache_id, debug_ctrl; sc->sc_dev = dev; rid = 0; sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_mem_res == NULL) panic("%s: Cannot map registers", device_get_name(dev)); /* Allocate an IRQ resource */ rid = 0; sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | RF_SHAREABLE); if (sc->sc_irq_res == NULL) { device_printf(dev, "cannot allocate IRQ, not using interrupt\n"); } pl310_softc = sc; mtx_init(&sc->sc_mtx, "pl310lock", NULL, MTX_SPIN); cache_id = pl310_read4(sc, PL310_CACHE_ID); sc->sc_rtl_revision = (cache_id >> CACHE_ID_RELEASE_SHIFT) & CACHE_ID_RELEASE_MASK; device_printf(dev, "Part number: 0x%x, release: 0x%x\n", (cache_id >> CACHE_ID_PARTNUM_SHIFT) & CACHE_ID_PARTNUM_MASK, (cache_id >> CACHE_ID_RELEASE_SHIFT) & CACHE_ID_RELEASE_MASK); /* * If L2 cache is already enabled then something has violated the rules, * because caches are supposed to be off at kernel entry. The cache * must be disabled to write the configuration registers without * triggering an access error (SLVERR), but there's no documented safe * procedure for disabling the L2 cache in the manual. So we'll try to * invent one: * - Use the debug register to force write-through mode and prevent * linefills (allocation of new lines on read); now anything we do * will not cause new data to come into the L2 cache. * - Writeback and invalidate the current contents. * - Disable the controller. * - Restore the original debug settings. */ if (pl310_read4(sc, PL310_CTRL) & CTRL_ENABLED) { device_printf(dev, "Warning: L2 Cache should not already be " "active; trying to de-activate and re-initialize...\n"); sc->sc_enabled = 1; debug_ctrl = pl310_read4(sc, PL310_DEBUG_CTRL); platform_pl310_write_debug(sc, debug_ctrl | DEBUG_CTRL_DISABLE_WRITEBACK | DEBUG_CTRL_DISABLE_LINEFILL); pl310_set_way_sizes(sc); pl310_wbinv_all(); platform_pl310_write_ctrl(sc, CTRL_DISABLED); platform_pl310_write_debug(sc, debug_ctrl); } sc->sc_enabled = pl310_enabled; if (sc->sc_enabled) { platform_pl310_init(sc); pl310_set_way_sizes(sc); /* platform init might change these */ pl310_write4(pl310_softc, PL310_INV_WAY, 0xffff); pl310_wait_background_op(PL310_INV_WAY, 0xffff); platform_pl310_write_ctrl(sc, CTRL_ENABLED); device_printf(dev, "L2 Cache enabled: %uKB/%dB %d ways\n", (g_l2cache_size / 1024), g_l2cache_line_size, g_ways_assoc); if (bootverbose) pl310_print_config(sc); } else { if (sc->sc_irq_res != NULL) { sc->sc_ich = malloc(sizeof(*sc->sc_ich), M_DEVBUF, M_WAITOK); sc->sc_ich->ich_func = pl310_config_intr; sc->sc_ich->ich_arg = sc; if (config_intrhook_establish(sc->sc_ich) != 0) { device_printf(dev, "config_intrhook_establish failed\n"); free(sc->sc_ich, M_DEVBUF); return(ENXIO); } } device_printf(dev, "L2 Cache disabled\n"); } /* Set the l2 functions in the set of cpufuncs */ cpufuncs.cf_l2cache_wbinv_all = pl310_wbinv_all; cpufuncs.cf_l2cache_wbinv_range = pl310_wbinv_range; cpufuncs.cf_l2cache_inv_range = pl310_inv_range; cpufuncs.cf_l2cache_wb_range = pl310_wb_range; cpufuncs.cf_l2cache_drain_writebuf = pl310_drain_writebuf; return (0); } static device_method_t pl310_methods[] = { DEVMETHOD(device_probe, pl310_probe), DEVMETHOD(device_attach, pl310_attach), DEVMETHOD_END }; static driver_t pl310_driver = { "l2cache", pl310_methods, sizeof(struct pl310_softc), }; static devclass_t pl310_devclass; EARLY_DRIVER_MODULE(pl310, simplebus, pl310_driver, pl310_devclass, 0, 0, BUS_PASS_CPU + BUS_PASS_ORDER_MIDDLE); Index: projects/powernv/arm/freescale/imx/imx6_ccm.c =================================================================== --- projects/powernv/arm/freescale/imx/imx6_ccm.c (revision 290990) +++ projects/powernv/arm/freescale/imx/imx6_ccm.c (revision 290991) @@ -1,356 +1,384 @@ /*- * Copyright (c) 2013 Ian Lepore * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Clocks and power control driver for Freescale i.MX6 family of SoCs. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef CCGR_CLK_MODE_ALWAYS #define CCGR_CLK_MODE_OFF 0 #define CCGR_CLK_MODE_RUNMODE 1 #define CCGR_CLK_MODE_ALWAYS 3 #endif struct ccm_softc { device_t dev; struct resource *mem_res; }; static struct ccm_softc *ccm_sc; static inline uint32_t RD4(struct ccm_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off)); } static inline void WR4(struct ccm_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } /* * Until we have a fully functional ccm driver which implements the fdt_clock * interface, use the age-old workaround of unconditionally enabling the clocks * for devices we might need to use. The SoC defaults to most clocks enabled, * but the rom boot code and u-boot disable a few of them. We turn on only * what's needed to run the chip plus devices we have drivers for, and turn off * devices we don't yet have drivers for. (Note that USB is not turned on here * because that is one we do when the driver asks for it.) */ static void ccm_init_gates(struct ccm_softc *sc) { - /* Turns on... */ - WR4(sc, CCM_CCGR0, 0x0000003f); /* ahpbdma, aipstz 1 & 2 busses */ - WR4(sc, CCM_CCGR1, 0x00300c00); /* gpt, enet */ - WR4(sc, CCM_CCGR2, 0x0fffffc0); /* ipmux & ipsync (bridges), iomux, i2c */ - WR4(sc, CCM_CCGR3, 0x3ff00000); /* DDR memory controller */ - WR4(sc, CCM_CCGR4, 0x0000f300); /* pl301 bus crossbar */ - WR4(sc, CCM_CCGR5, 0x0ffc00c0); /* uarts, ssi, sdma */ - WR4(sc, CCM_CCGR6, 0x000003ff); /* usdhc 1-4, usboh3 */ + uint32_t reg; + + /* ahpbdma, aipstz 1 & 2 busses */ + reg = CCGR0_AIPS_TZ1 | CCGR0_AIPS_TZ2 | CCGR0_ABPHDMA; + WR4(sc, CCM_CCGR0, reg); + + /* gpt, enet */ + reg = CCGR1_ENET | CCGR1_GPT; + WR4(sc, CCM_CCGR1, reg); + + /* ipmux & ipsync (bridges), iomux, i2c */ + reg = CCGR2_I2C1 | CCGR2_I2C2 | CCGR2_I2C3 | CCGR2_IIM | + CCGR2_IOMUX_IPT | CCGR2_IPMUX1 | CCGR2_IPMUX2 | CCGR2_IPMUX3 | + CCGR2_IPSYNC_IP2APB_TZASC1 | CCGR2_IPSYNC_IP2APB_TZASC2 | + CCGR2_IPSYNC_VDOA; + WR4(sc, CCM_CCGR2, reg); + + /* DDR memory controller */ + reg = CCGR3_OCRAM | CCGR3_MMDC_CORE_IPG | + CCGR3_MMDC_CORE_ACLK_FAST | CCGR3_CG11 | CCGR3_CG13; + WR4(sc, CCM_CCGR3, reg); + + /* pl301 bus crossbar */ + reg = CCGR4_PL301_MX6QFAST1_S133 | + CCGR4_PL301_MX6QPER1_BCH | CCGR4_PL301_MX6QPER2_MAIN; + WR4(sc, CCM_CCGR4, reg); + + /* uarts, ssi, sdma */ + reg = CCGR5_SDMA | CCGR5_SSI1 | CCGR5_SSI2 | CCGR5_SSI3 | + CCGR5_UART | CCGR5_UART_SERIAL; + WR4(sc, CCM_CCGR5, reg); + + /* usdhc 1-4, usboh3 */ + reg = CCGR6_USBOH3 | CCGR6_USDHC1 | CCGR6_USDHC2 | + CCGR6_USDHC3 | CCGR6_USDHC4; + WR4(sc, CCM_CCGR6, reg); } static int ccm_detach(device_t dev) { struct ccm_softc *sc; sc = device_get_softc(dev); if (sc->mem_res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->mem_res); return (0); } static int ccm_attach(device_t dev) { struct ccm_softc *sc; int err, rid; uint32_t reg; sc = device_get_softc(dev); err = 0; /* Allocate bus_space resources. */ rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) { device_printf(dev, "Cannot allocate memory resources\n"); err = ENXIO; goto out; } ccm_sc = sc; /* * Configure the Low Power Mode setting to leave the ARM core power on * when a WFI instruction is executed. This lets the MPCore timers and * GIC continue to run, which is helpful when the only thing that can * wake you up is an MPCore Private Timer interrupt delivered via GIC. * * XXX Based on the docs, setting CCM_CGPR_INT_MEM_CLK_LPM shouldn't be * required when the LPM bits are set to LPM_RUN. But experimentally * I've experienced a fairly rare lockup when not setting it. I was * unable to prove conclusively that the lockup was related to power * management or that this definitively fixes it. Revisit this. */ reg = RD4(sc, CCM_CGPR); reg |= CCM_CGPR_INT_MEM_CLK_LPM; WR4(sc, CCM_CGPR, reg); reg = RD4(sc, CCM_CLPCR); reg = (reg & ~CCM_CLPCR_LPM_MASK) | CCM_CLPCR_LPM_RUN; WR4(sc, CCM_CLPCR, reg); ccm_init_gates(sc); err = 0; out: if (err != 0) ccm_detach(dev); return (err); } static int ccm_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_is_compatible(dev, "fsl,imx6q-ccm") == 0) return (ENXIO); device_set_desc(dev, "Freescale i.MX6 Clock Control Module"); return (BUS_PROBE_DEFAULT); } void imx_ccm_ssi_configure(device_t _ssidev) { struct ccm_softc *sc; uint32_t reg; sc = ccm_sc; /* * Select PLL4 (Audio PLL) clock multiplexer as source. * PLL output frequency = Fref * (DIV_SELECT + NUM/DENOM). */ reg = RD4(sc, CCM_CSCMR1); reg &= ~(SSI_CLK_SEL_M << SSI1_CLK_SEL_S); reg |= (SSI_CLK_SEL_PLL4 << SSI1_CLK_SEL_S); reg &= ~(SSI_CLK_SEL_M << SSI2_CLK_SEL_S); reg |= (SSI_CLK_SEL_PLL4 << SSI2_CLK_SEL_S); reg &= ~(SSI_CLK_SEL_M << SSI3_CLK_SEL_S); reg |= (SSI_CLK_SEL_PLL4 << SSI3_CLK_SEL_S); WR4(sc, CCM_CSCMR1, reg); /* * Ensure we have set hardware-default values * for pre and post dividers. */ /* SSI1 and SSI3 */ reg = RD4(sc, CCM_CS1CDR); /* Divide by 2 */ reg &= ~(SSI_CLK_PODF_MASK << SSI1_CLK_PODF_SHIFT); reg &= ~(SSI_CLK_PODF_MASK << SSI3_CLK_PODF_SHIFT); reg |= (0x1 << SSI1_CLK_PODF_SHIFT); reg |= (0x1 << SSI3_CLK_PODF_SHIFT); /* Divide by 4 */ reg &= ~(SSI_CLK_PRED_MASK << SSI1_CLK_PRED_SHIFT); reg &= ~(SSI_CLK_PRED_MASK << SSI3_CLK_PRED_SHIFT); reg |= (0x3 << SSI1_CLK_PRED_SHIFT); reg |= (0x3 << SSI3_CLK_PRED_SHIFT); WR4(sc, CCM_CS1CDR, reg); /* SSI2 */ reg = RD4(sc, CCM_CS2CDR); /* Divide by 2 */ reg &= ~(SSI_CLK_PODF_MASK << SSI2_CLK_PODF_SHIFT); reg |= (0x1 << SSI2_CLK_PODF_SHIFT); /* Divide by 4 */ reg &= ~(SSI_CLK_PRED_MASK << SSI2_CLK_PRED_SHIFT); reg |= (0x3 << SSI2_CLK_PRED_SHIFT); WR4(sc, CCM_CS2CDR, reg); } void imx_ccm_usb_enable(device_t _usbdev) { /* * For imx6, the USBOH3 clock gate is bits 0-1 of CCGR6, so no need for * shifting and masking here, just set the low-order two bits to ALWAYS. */ WR4(ccm_sc, CCM_CCGR6, RD4(ccm_sc, CCM_CCGR6) | CCGR_CLK_MODE_ALWAYS); } void imx_ccm_usbphy_enable(device_t _phydev) { /* * XXX Which unit? * Right now it's not clear how to figure from fdt data which phy unit * we're supposed to operate on. Until this is worked out, just enable * both PHYs. */ #if 0 int phy_num, regoff; phy_num = 0; /* XXX */ switch (phy_num) { case 0: regoff = 0; break; case 1: regoff = 0x10; break; default: device_printf(ccm_sc->dev, "Bad PHY number %u,\n", phy_num); return; } imx6_anatop_write_4(IMX6_ANALOG_CCM_PLL_USB1 + regoff, IMX6_ANALOG_CCM_PLL_USB_ENABLE | IMX6_ANALOG_CCM_PLL_USB_POWER | IMX6_ANALOG_CCM_PLL_USB_EN_USB_CLKS); #else imx6_anatop_write_4(IMX6_ANALOG_CCM_PLL_USB1 + 0, IMX6_ANALOG_CCM_PLL_USB_ENABLE | IMX6_ANALOG_CCM_PLL_USB_POWER | IMX6_ANALOG_CCM_PLL_USB_EN_USB_CLKS); imx6_anatop_write_4(IMX6_ANALOG_CCM_PLL_USB1 + 0x10, IMX6_ANALOG_CCM_PLL_USB_ENABLE | IMX6_ANALOG_CCM_PLL_USB_POWER | IMX6_ANALOG_CCM_PLL_USB_EN_USB_CLKS); #endif } uint32_t imx_ccm_ipg_hz(void) { return (66000000); } uint32_t imx_ccm_perclk_hz(void) { return (66000000); } uint32_t imx_ccm_sdhci_hz(void) { return (200000000); } uint32_t imx_ccm_uart_hz(void) { return (80000000); } uint32_t imx_ccm_ahb_hz(void) { return (132000000); } uint32_t imx_ccm_get_cacrr(void) { return (RD4(ccm_sc, CCM_CACCR)); } void imx_ccm_set_cacrr(uint32_t divisor) { WR4(ccm_sc, CCM_CACCR, divisor); } static device_method_t ccm_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ccm_probe), DEVMETHOD(device_attach, ccm_attach), DEVMETHOD(device_detach, ccm_detach), DEVMETHOD_END }; static driver_t ccm_driver = { "ccm", ccm_methods, sizeof(struct ccm_softc) }; static devclass_t ccm_devclass; EARLY_DRIVER_MODULE(ccm, simplebus, ccm_driver, ccm_devclass, 0, 0, BUS_PASS_CPU + BUS_PASS_ORDER_EARLY); Index: projects/powernv/arm/freescale/imx/imx6_ccmreg.h =================================================================== --- projects/powernv/arm/freescale/imx/imx6_ccmreg.h (revision 290990) +++ projects/powernv/arm/freescale/imx/imx6_ccmreg.h (revision 290991) @@ -1,69 +1,114 @@ /*- * Copyright (c) 2013 Ian Lepore * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef IMX6_CCMREG_H #define IMX6_CCMREG_H #define CCM_CACCR 0x010 #define CCM_CSCMR1 0x01C #define SSI1_CLK_SEL_S 10 #define SSI2_CLK_SEL_S 12 #define SSI3_CLK_SEL_S 14 #define SSI_CLK_SEL_M 0x3 #define SSI_CLK_SEL_508_PFD 0 #define SSI_CLK_SEL_454_PFD 1 #define SSI_CLK_SEL_PLL4 2 #define CCM_CSCMR2 0x020 #define CCM_CS1CDR 0x028 #define SSI1_CLK_PODF_SHIFT 0 #define SSI1_CLK_PRED_SHIFT 6 #define SSI3_CLK_PODF_SHIFT 16 #define SSI3_CLK_PRED_SHIFT 22 #define SSI_CLK_PODF_MASK 0x3f #define SSI_CLK_PRED_MASK 0x7 #define CCM_CS2CDR 0x02C #define SSI2_CLK_PODF_SHIFT 0 #define SSI2_CLK_PRED_SHIFT 6 #define CCM_CSCDR2 0x038 #define CCM_CLPCR 0x054 #define CCM_CLPCR_LPM_MASK 0x03 #define CCM_CLPCR_LPM_RUN 0x00 #define CCM_CLPCR_LPM_WAIT 0x01 #define CCM_CLPCR_LPM_STOP 0x02 #define CCM_CGPR 0x064 #define CCM_CGPR_INT_MEM_CLK_LPM (1 << 17) #define CCM_CCGR0 0x068 +#define CCGR0_AIPS_TZ1 (0x3 << 0) +#define CCGR0_AIPS_TZ2 (0x3 << 2) +#define CCGR0_ABPHDMA (0x3 << 4) #define CCM_CCGR1 0x06C +#define CCGR1_ENET (0x3 << 10) +#define CCGR1_GPT (0x3 << 20) #define CCM_CCGR2 0x070 +#define CCGR2_HDMI_TX (0x3 << 0) +#define CCGR2_HDMI_TX_ISFR (0x3 << 4) +#define CCGR2_I2C1 (0x3 << 6) +#define CCGR2_I2C2 (0x3 << 8) +#define CCGR2_I2C3 (0x3 << 10) +#define CCGR2_IIM (0x3 << 12) +#define CCGR2_IOMUX_IPT (0x3 << 14) +#define CCGR2_IPMUX1 (0x3 << 16) +#define CCGR2_IPMUX2 (0x3 << 18) +#define CCGR2_IPMUX3 (0x3 << 20) +#define CCGR2_IPSYNC_IP2APB_TZASC1 (0x3 << 22) +#define CCGR2_IPSYNC_IP2APB_TZASC2 (0x3 << 24) +#define CCGR2_IPSYNC_VDOA (0x3 << 26) #define CCM_CCGR3 0x074 +#define CCGR3_IPU1_IPU (0x3 << 0) +#define CCGR3_IPU1_DI0 (0x3 << 2) +#define CCGR3_IPU1_DI1 (0x3 << 4) +#define CCGR3_IPU2_IPU (0x3 << 6) +#define CCGR3_IPU2_DI0 (0x3 << 8) +#define CCGR3_IPU2_DI1 (0x3 << 10) +#define CCGR3_LDB_DI0 (0x3 << 12) +#define CCGR3_LDB_DI1 (0x3 << 14) +#define CCGR3_MMDC_CORE_ACLK_FAST (0x3 << 20) +#define CCGR3_CG11 (0x3 << 22) +#define CCGR3_MMDC_CORE_IPG (0x3 << 24) +#define CCGR3_CG13 (0x3 << 26) +#define CCGR3_OCRAM (0x3 << 28) #define CCM_CCGR4 0x078 +#define CCGR4_PL301_MX6QFAST1_S133 (0x3 << 8) +#define CCGR4_PL301_MX6QPER1_BCH (0x3 << 12) +#define CCGR4_PL301_MX6QPER2_MAIN (0x3 << 14) #define CCM_CCGR5 0x07C +#define CCGR5_SDMA (0x3 << 6) +#define CCGR5_SSI1 (0x3 << 18) +#define CCGR5_SSI2 (0x3 << 20) +#define CCGR5_SSI3 (0x3 << 22) +#define CCGR5_UART (0x3 << 24) +#define CCGR5_UART_SERIAL (0x3 << 26) #define CCM_CCGR6 0x080 +#define CCGR6_USBOH3 (0x3 << 0) +#define CCGR6_USDHC1 (0x3 << 2) +#define CCGR6_USDHC2 (0x3 << 4) +#define CCGR6_USDHC3 (0x3 << 6) +#define CCGR6_USDHC4 (0x3 << 8) #define CCM_CMEOR 0x088 #endif Index: projects/powernv/arm/include/pcb.h =================================================================== --- projects/powernv/arm/include/pcb.h (revision 290990) +++ projects/powernv/arm/include/pcb.h (revision 290991) @@ -1,85 +1,88 @@ /* $NetBSD: pcb.h,v 1.10 2003/10/13 21:46:39 scw Exp $ */ /*- * Copyright (c) 2001 Matt Thomas . * Copyright (c) 1994 Mark Brinicombe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the RiscBSD team. * 4. The name "RiscBSD" nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY RISCBSD ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL RISCBSD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PCB_H_ #define _MACHINE_PCB_H_ #include #include /* * WARNING! * Keep pcb_regs first for faster access in switch.S */ struct pcb { struct switchframe pcb_regs; /* CPU state */ u_int pcb_flags; #define PCB_OWNFPU 0x00000001 #define PCB_NOALIGNFLT 0x00000002 caddr_t pcb_onfault; /* On fault handler */ -#ifdef ARM_NEW_PMAP - uint32_t pcb_pagedir; /* TTB0 value */ -#else - vm_offset_t pcb_pagedir; /* PT hooks */ + vm_offset_t pcb_pagedir; /* TTB0 value */ + /* + * XXX: + * Variables pcb_pl1vec, pcb_l1vec, pcb_dacr are used solely + * by old PMAP. Keep them here for PCB binary compatibility + * between old and new PMAP. + */ uint32_t *pcb_pl1vec; /* PTR to vector_base L1 entry*/ uint32_t pcb_l1vec; /* Value to stuff on ctx sw */ u_int pcb_dacr; /* Domain Access Control Reg */ -#endif + struct vfp_state pcb_vfpstate; /* VP/NEON state */ u_int pcb_vfpcpu; /* VP/NEON last cpu */ } __aligned(8); /* * We need the PCB to be aligned on 8 bytes, as we may * access it using ldrd/strd, and ARM ABI require it * to by aligned on 8 bytes. */ /* * No additional data for core dumps. */ struct md_coredump { int md_empty; }; void makectx(struct trapframe *tf, struct pcb *pcb); #ifdef _KERNEL void savectx(struct pcb *) __returns_twice; #endif /* _KERNEL */ #endif /* !_MACHINE_PCB_H_ */ Index: projects/powernv/arm/include/pl310.h =================================================================== --- projects/powernv/arm/include/pl310.h (revision 290990) +++ projects/powernv/arm/include/pl310.h (revision 290991) @@ -1,188 +1,187 @@ /*- * Copyright (c) 2012 Olivier Houchard. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * $FreeBSD$ */ #ifndef PL310_H_ #define PL310_H_ /** * PL310 - L2 Cache Controller register offsets. * */ #define PL310_CACHE_ID 0x000 #define CACHE_ID_RELEASE_SHIFT 0 #define CACHE_ID_RELEASE_MASK 0x3f #define CACHE_ID_RELEASE_r0p0 0x00 #define CACHE_ID_RELEASE_r1p0 0x02 #define CACHE_ID_RELEASE_r2p0 0x04 #define CACHE_ID_RELEASE_r3p0 0x05 #define CACHE_ID_RELEASE_r3p1 0x06 #define CACHE_ID_RELEASE_r3p2 0x08 #define CACHE_ID_RELEASE_r3p3 0x09 #define CACHE_ID_PARTNUM_SHIFT 6 #define CACHE_ID_PARTNUM_MASK 0xf #define CACHE_ID_PARTNUM_VALUE 0x3 #define PL310_CACHE_TYPE 0x004 #define PL310_CTRL 0x100 #define CTRL_ENABLED 0x01 #define CTRL_DISABLED 0x00 #define PL310_AUX_CTRL 0x104 #define AUX_CTRL_MASK 0xc0000fff #define AUX_CTRL_ASSOCIATIVITY_SHIFT 16 #define AUX_CTRL_WAY_SIZE_SHIFT 17 #define AUX_CTRL_WAY_SIZE_MASK (0x7 << 17) #define AUX_CTRL_SHARE_OVERRIDE (1 << 22) #define AUX_CTRL_NS_LOCKDOWN (1 << 26) #define AUX_CTRL_NS_INT_CTRL (1 << 27) #define AUX_CTRL_DATA_PREFETCH (1 << 28) #define AUX_CTRL_INSTR_PREFETCH (1 << 29) #define AUX_CTRL_EARLY_BRESP (1 << 30) #define PL310_TAG_RAM_CTRL 0x108 #define PL310_DATA_RAM_CTRL 0x10C #define RAM_CTRL_WRITE_SHIFT 8 #define RAM_CTRL_WRITE_MASK (0x7 << 8) #define RAM_CTRL_READ_SHIFT 4 #define RAM_CTRL_READ_MASK (0x7 << 4) #define RAM_CTRL_SETUP_SHIFT 0 #define RAM_CTRL_SETUP_MASK (0x7 << 0) #define PL310_EVENT_COUNTER_CTRL 0x200 #define EVENT_COUNTER_CTRL_ENABLED (1 << 0) #define EVENT_COUNTER_CTRL_C0_RESET (1 << 1) #define EVENT_COUNTER_CTRL_C1_RESET (1 << 2) #define PL310_EVENT_COUNTER1_CONF 0x204 #define PL310_EVENT_COUNTER0_CONF 0x208 #define EVENT_COUNTER_CONF_NOINTR 0 #define EVENT_COUNTER_CONF_INCR 1 #define EVENT_COUNTER_CONF_OVFW 2 #define EVENT_COUNTER_CONF_NOEV (0 << 2) #define EVENT_COUNTER_CONF_CO (1 << 2) #define EVENT_COUNTER_CONF_DRHIT (2 << 2) #define EVENT_COUNTER_CONF_DRREQ (3 << 2) #define EVENT_COUNTER_CONF_DWHIT (4 << 2) #define EVENT_COUNTER_CONF_DWREQ (5 << 2) #define EVENT_COUNTER_CONF_DWTREQ (6 << 2) #define EVENT_COUNTER_CONF_DIRHIT (7 << 2) #define EVENT_COUNTER_CONF_DIRREQ (8 << 2) #define EVENT_COUNTER_CONF_WA (9 << 2) #define PL310_EVENT_COUNTER1_VAL 0x20C #define PL310_EVENT_COUNTER0_VAL 0x210 #define PL310_INTR_MASK 0x214 #define PL310_MASKED_INTR_STAT 0x218 #define PL310_RAW_INTR_STAT 0x21C #define PL310_INTR_CLEAR 0x220 #define INTR_MASK_ALL ((1 << 9) - 1) #define INTR_MASK_ECNTR (1 << 0) #define INTR_MASK_PARRT (1 << 1) #define INTR_MASK_PARRD (1 << 2) #define INTR_MASK_ERRWT (1 << 3) #define INTR_MASK_ERRWD (1 << 4) #define INTR_MASK_ERRRT (1 << 5) #define INTR_MASK_ERRRD (1 << 6) #define INTR_MASK_SLVERR (1 << 7) #define INTR_MASK_DECERR (1 << 8) #define PL310_CACHE_SYNC 0x730 #define PL310_INV_LINE_PA 0x770 #define PL310_INV_WAY 0x77C #define PL310_CLEAN_LINE_PA 0x7B0 #define PL310_CLEAN_LINE_IDX 0x7B8 #define PL310_CLEAN_WAY 0x7BC #define PL310_CLEAN_INV_LINE_PA 0x7F0 #define PL310_CLEAN_INV_LINE_IDX 0x7F8 #define PL310_CLEAN_INV_WAY 0x7FC #define PL310_LOCKDOWN_D_WAY(x) (0x900 + ((x) * 8)) #define PL310_LOCKDOWN_I_WAY(x) (0x904 + ((x) * 8)) #define PL310_LOCKDOWN_LINE_ENABLE 0x950 #define PL310_UNLOCK_ALL_LINES_WAY 0x954 #define PL310_ADDR_FILTER_STAR 0xC00 #define PL310_ADDR_FILTER_END 0xC04 #define PL310_DEBUG_CTRL 0xF40 #define DEBUG_CTRL_DISABLE_LINEFILL (1 << 0) #define DEBUG_CTRL_DISABLE_WRITEBACK (1 << 1) #define DEBUG_CTRL_SPNIDEN (1 << 2) #define PL310_PREFETCH_CTRL 0xF60 #define PREFETCH_CTRL_OFFSET_MASK (0x1f) #define PREFETCH_CTRL_NOTSAMEID (1 << 21) #define PREFETCH_CTRL_INCR_DL (1 << 23) #define PREFETCH_CTRL_PREFETCH_DROP (1 << 24) #define PREFETCH_CTRL_DL_ON_WRAP (1 << 27) #define PREFETCH_CTRL_DATA_PREFETCH (1 << 28) #define PREFETCH_CTRL_INSTR_PREFETCH (1 << 29) #define PREFETCH_CTRL_DL (1 << 30) #define PL310_POWER_CTRL 0xF80 #define POWER_CTRL_ENABLE_GATING (1 << 0) #define POWER_CTRL_ENABLE_STANDBY (1 << 1) struct intr_config_hook; struct pl310_softc { device_t sc_dev; struct resource *sc_mem_res; struct resource *sc_irq_res; void* sc_irq_h; int sc_enabled; struct mtx sc_mtx; u_int sc_rtl_revision; struct intr_config_hook *sc_ich; }; /** * pl310_read4 - read a 32-bit value from the PL310 registers * pl310_write4 - write a 32-bit value from the PL310 registers * @off: byte offset within the register set to read from * @val: the value to write into the register * * * LOCKING: * None * * RETURNS: * nothing in case of write function, if read function returns the value read. */ static __inline uint32_t pl310_read4(struct pl310_softc *sc, bus_size_t off) { return bus_read_4(sc->sc_mem_res, off); } static __inline void pl310_write4(struct pl310_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->sc_mem_res, off, val); } -void pl310_print_config(struct pl310_softc *sc); void pl310_set_ram_latency(struct pl310_softc *sc, uint32_t which_reg, uint32_t read, uint32_t write, uint32_t setup); void platform_pl310_init(struct pl310_softc *); void platform_pl310_write_ctrl(struct pl310_softc *, uint32_t); void platform_pl310_write_debug(struct pl310_softc *, uint32_t); #endif /* PL310_H_ */ Index: projects/powernv/arm/samsung/exynos/exynos5_ehci.c =================================================================== --- projects/powernv/arm/samsung/exynos/exynos5_ehci.c (revision 290990) +++ projects/powernv/arm/samsung/exynos/exynos5_ehci.c (revision 290991) @@ -1,394 +1,394 @@ /*- * Copyright (c) 2013-2014 Ruslan Bukin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bus.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gpio_if.h" #include "opt_platform.h" /* GPIO control */ #define GPIO_OUTPUT 1 #define GPIO_INPUT 0 #define PIN_USB 161 /* SYSREG */ #define EXYNOS5_SYSREG_USB2_PHY 0x0 #define USB2_MODE_HOST 0x1 /* USB HOST */ #define HOST_CTRL_CLK_24MHZ (5 << 16) #define HOST_CTRL_CLK_MASK (7 << 16) #define HOST_CTRL_SIDDQ (1 << 6) #define HOST_CTRL_SLEEP (1 << 5) #define HOST_CTRL_SUSPEND (1 << 4) #define HOST_CTRL_RESET_LINK (1 << 1) #define HOST_CTRL_RESET_PHY (1 << 0) #define HOST_CTRL_RESET_PHY_ALL (1U << 31) /* Forward declarations */ static int exynos_ehci_attach(device_t dev); static int exynos_ehci_detach(device_t dev); static int exynos_ehci_probe(device_t dev); struct exynos_ehci_softc { device_t dev; ehci_softc_t base; struct resource *res[4]; bus_space_tag_t host_bst; bus_space_tag_t sysreg_bst; bus_space_handle_t host_bsh; bus_space_handle_t sysreg_bsh; }; static struct resource_spec exynos_ehci_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, { SYS_RES_MEMORY, 1, RF_ACTIVE }, { SYS_RES_MEMORY, 2, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE }, { -1, 0 } }; static device_method_t ehci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, exynos_ehci_probe), DEVMETHOD(device_attach, exynos_ehci_attach), DEVMETHOD(device_detach, exynos_ehci_detach), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), DEVMETHOD(device_shutdown, bus_generic_shutdown), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), { 0, 0 } }; /* kobj_class definition */ static driver_t ehci_driver = { "ehci", ehci_methods, - sizeof(ehci_softc_t) + sizeof(struct exynos_ehci_softc) }; static devclass_t ehci_devclass; DRIVER_MODULE(ehci, simplebus, ehci_driver, ehci_devclass, 0, 0); MODULE_DEPEND(ehci, usb, 1, 1, 1); /* * Public methods */ static int exynos_ehci_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_is_compatible(dev, "exynos,usb-ehci") == 0) return (ENXIO); device_set_desc(dev, "Exynos integrated USB controller"); return (BUS_PROBE_DEFAULT); } static int gpio_ctrl(struct exynos_ehci_softc *esc, int dir, int power) { device_t gpio_dev; /* Get the GPIO device, we need this to give power to USB */ gpio_dev = devclass_get_device(devclass_find("gpio"), 0); if (gpio_dev == NULL) { device_printf(esc->dev, "cant find gpio_dev\n"); return (1); } if (power) GPIO_PIN_SET(gpio_dev, PIN_USB, GPIO_PIN_HIGH); else GPIO_PIN_SET(gpio_dev, PIN_USB, GPIO_PIN_LOW); if (dir) GPIO_PIN_SETFLAGS(gpio_dev, PIN_USB, GPIO_PIN_OUTPUT); else GPIO_PIN_SETFLAGS(gpio_dev, PIN_USB, GPIO_PIN_INPUT); return (0); } static int reset_hsic_hub(struct exynos_ehci_softc *esc, phandle_t hub) { device_t gpio_dev; pcell_t pin; /* TODO: check that hub is compatible with "smsc,usb3503" */ if (!OF_hasprop(hub, "freebsd,reset-gpio")) { return (1); } if (OF_getencprop(hub, "freebsd,reset-gpio", &pin, sizeof(pin)) < 0) { device_printf(esc->dev, "failed to decode reset GPIO pin number for HSIC hub\n"); return (1); } /* Get the GPIO device, we need this to give power to USB */ gpio_dev = devclass_get_device(devclass_find("gpio"), 0); if (gpio_dev == NULL) { device_printf(esc->dev, "Cant find gpio device\n"); return (1); } GPIO_PIN_SET(gpio_dev, pin, GPIO_PIN_LOW); DELAY(100); GPIO_PIN_SET(gpio_dev, pin, GPIO_PIN_HIGH); return (0); } static int phy_init(struct exynos_ehci_softc *esc) { int reg; phandle_t hub; gpio_ctrl(esc, GPIO_INPUT, 1); /* set USB HOST mode */ bus_space_write_4(esc->sysreg_bst, esc->sysreg_bsh, EXYNOS5_SYSREG_USB2_PHY, USB2_MODE_HOST); /* Power ON phy */ usb2_phy_power_on(); reg = bus_space_read_4(esc->host_bst, esc->host_bsh, 0x0); reg &= ~(HOST_CTRL_CLK_MASK | HOST_CTRL_RESET_PHY | HOST_CTRL_RESET_PHY_ALL | HOST_CTRL_SIDDQ | HOST_CTRL_SUSPEND | HOST_CTRL_SLEEP); reg |= (HOST_CTRL_CLK_24MHZ | HOST_CTRL_RESET_LINK); bus_space_write_4(esc->host_bst, esc->host_bsh, 0x0, reg); DELAY(10); reg = bus_space_read_4(esc->host_bst, esc->host_bsh, 0x0); reg &= ~(HOST_CTRL_RESET_LINK); bus_space_write_4(esc->host_bst, esc->host_bsh, 0x0, reg); if ((hub = OF_finddevice("/hsichub")) != 0) { reset_hsic_hub(esc, hub); } gpio_ctrl(esc, GPIO_OUTPUT, 1); return (0); } static int exynos_ehci_attach(device_t dev) { struct exynos_ehci_softc *esc; ehci_softc_t *sc; bus_space_handle_t bsh; int err; esc = device_get_softc(dev); esc->dev = dev; sc = &esc->base; sc->sc_bus.parent = dev; sc->sc_bus.devices = sc->sc_devices; sc->sc_bus.devices_max = EHCI_MAX_DEVICES; sc->sc_bus.dma_bits = 32; if (bus_alloc_resources(dev, exynos_ehci_spec, esc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } /* EHCI registers */ sc->sc_io_tag = rman_get_bustag(esc->res[0]); bsh = rman_get_bushandle(esc->res[0]); sc->sc_io_size = rman_get_size(esc->res[0]); /* EHCI HOST ctrl registers */ esc->host_bst = rman_get_bustag(esc->res[1]); esc->host_bsh = rman_get_bushandle(esc->res[1]); /* SYSREG */ esc->sysreg_bst = rman_get_bustag(esc->res[2]); esc->sysreg_bsh = rman_get_bushandle(esc->res[2]); /* get all DMA memory */ if (usb_bus_mem_alloc_all(&sc->sc_bus, USB_GET_DMA_TAG(dev), &ehci_iterate_hw_softc)) return (ENXIO); /* * Set handle to USB related registers subregion used by * generic EHCI driver. */ err = bus_space_subregion(sc->sc_io_tag, bsh, 0x0, sc->sc_io_size, &sc->sc_io_hdl); if (err != 0) return (ENXIO); phy_init(esc); /* Setup interrupt handler */ err = bus_setup_intr(dev, esc->res[3], INTR_TYPE_BIO | INTR_MPSAFE, NULL, (driver_intr_t *)ehci_interrupt, sc, &sc->sc_intr_hdl); if (err) { device_printf(dev, "Could not setup irq, " "%d\n", err); return (1); } /* Add USB device */ sc->sc_bus.bdev = device_add_child(dev, "usbus", -1); if (!sc->sc_bus.bdev) { device_printf(dev, "Could not add USB device\n"); err = bus_teardown_intr(dev, esc->res[3], sc->sc_intr_hdl); if (err) device_printf(dev, "Could not tear down irq," " %d\n", err); return (1); } device_set_ivars(sc->sc_bus.bdev, &sc->sc_bus); strlcpy(sc->sc_vendor, "Samsung", sizeof(sc->sc_vendor)); err = ehci_init(sc); if (!err) { sc->sc_flags |= EHCI_SCFLG_DONEINIT; err = device_probe_and_attach(sc->sc_bus.bdev); } else { device_printf(dev, "USB init failed err=%d\n", err); device_delete_child(dev, sc->sc_bus.bdev); sc->sc_bus.bdev = NULL; err = bus_teardown_intr(dev, esc->res[3], sc->sc_intr_hdl); if (err) device_printf(dev, "Could not tear down irq," " %d\n", err); return (1); } return (0); } static int exynos_ehci_detach(device_t dev) { struct exynos_ehci_softc *esc; ehci_softc_t *sc; int err; esc = device_get_softc(dev); sc = &esc->base; if (sc->sc_flags & EHCI_SCFLG_DONEINIT) return (0); /* * only call ehci_detach() after ehci_init() */ if (sc->sc_flags & EHCI_SCFLG_DONEINIT) { ehci_detach(sc); sc->sc_flags &= ~EHCI_SCFLG_DONEINIT; } /* * Disable interrupts that might have been switched on in * ehci_init. */ if (sc->sc_io_tag && sc->sc_io_hdl) bus_space_write_4(sc->sc_io_tag, sc->sc_io_hdl, EHCI_USBINTR, 0); if (esc->res[3] && sc->sc_intr_hdl) { err = bus_teardown_intr(dev, esc->res[3], sc->sc_intr_hdl); if (err) { device_printf(dev, "Could not tear down irq," " %d\n", err); return (err); } sc->sc_intr_hdl = NULL; } if (sc->sc_bus.bdev) { device_delete_child(dev, sc->sc_bus.bdev); sc->sc_bus.bdev = NULL; } /* During module unload there are lots of children leftover */ device_delete_children(dev); bus_release_resources(dev, exynos_ehci_spec, esc->res); return (0); } Index: projects/powernv/arm/ti/am335x/am335x_lcd.c =================================================================== --- projects/powernv/arm/ti/am335x/am335x_lcd.c (revision 290990) +++ projects/powernv/arm/ti/am335x/am335x_lcd.c (revision 290991) @@ -1,1047 +1,1047 @@ /*- * Copyright 2013 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_syscons.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_SC #include #else /* VT */ #include #endif #include #include #include "am335x_lcd.h" #include "am335x_pwm.h" #include "fb_if.h" #include "hdmi_if.h" #define LCD_PID 0x00 #define LCD_CTRL 0x04 #define CTRL_DIV_MASK 0xff #define CTRL_DIV_SHIFT 8 #define CTRL_AUTO_UFLOW_RESTART (1 << 1) #define CTRL_RASTER_MODE 1 #define CTRL_LIDD_MODE 0 #define LCD_LIDD_CTRL 0x0C #define LCD_LIDD_CS0_CONF 0x10 #define LCD_LIDD_CS0_ADDR 0x14 #define LCD_LIDD_CS0_DATA 0x18 #define LCD_LIDD_CS1_CONF 0x1C #define LCD_LIDD_CS1_ADDR 0x20 #define LCD_LIDD_CS1_DATA 0x24 #define LCD_RASTER_CTRL 0x28 #define RASTER_CTRL_TFT24_UNPACKED (1 << 26) #define RASTER_CTRL_TFT24 (1 << 25) #define RASTER_CTRL_STN565 (1 << 24) #define RASTER_CTRL_TFTPMAP (1 << 23) #define RASTER_CTRL_NIBMODE (1 << 22) #define RASTER_CTRL_PALMODE_SHIFT 20 #define PALETTE_PALETTE_AND_DATA 0x00 #define PALETTE_PALETTE_ONLY 0x01 #define PALETTE_DATA_ONLY 0x02 #define RASTER_CTRL_REQDLY_SHIFT 12 #define RASTER_CTRL_MONO8B (1 << 9) #define RASTER_CTRL_RBORDER (1 << 8) #define RASTER_CTRL_LCDTFT (1 << 7) #define RASTER_CTRL_LCDBW (1 << 1) #define RASTER_CTRL_LCDEN (1 << 0) #define LCD_RASTER_TIMING_0 0x2C #define RASTER_TIMING_0_HBP_SHIFT 24 #define RASTER_TIMING_0_HFP_SHIFT 16 #define RASTER_TIMING_0_HSW_SHIFT 10 #define RASTER_TIMING_0_PPLLSB_SHIFT 4 #define RASTER_TIMING_0_PPLMSB_SHIFT 3 #define LCD_RASTER_TIMING_1 0x30 #define RASTER_TIMING_1_VBP_SHIFT 24 #define RASTER_TIMING_1_VFP_SHIFT 16 #define RASTER_TIMING_1_VSW_SHIFT 10 #define RASTER_TIMING_1_LPP_SHIFT 0 #define LCD_RASTER_TIMING_2 0x34 #define RASTER_TIMING_2_HSWHI_SHIFT 27 #define RASTER_TIMING_2_LPP_B10_SHIFT 26 #define RASTER_TIMING_2_PHSVS (1 << 25) #define RASTER_TIMING_2_PHSVS_RISE (1 << 24) #define RASTER_TIMING_2_PHSVS_FALL (0 << 24) #define RASTER_TIMING_2_IOE (1 << 23) #define RASTER_TIMING_2_IPC (1 << 22) #define RASTER_TIMING_2_IHS (1 << 21) #define RASTER_TIMING_2_IVS (1 << 20) #define RASTER_TIMING_2_ACBI_SHIFT 16 #define RASTER_TIMING_2_ACB_SHIFT 8 #define RASTER_TIMING_2_HBPHI_SHIFT 4 #define RASTER_TIMING_2_HFPHI_SHIFT 0 #define LCD_RASTER_SUBPANEL 0x38 #define LCD_RASTER_SUBPANEL2 0x3C #define LCD_LCDDMA_CTRL 0x40 #define LCDDMA_CTRL_DMA_MASTER_PRIO_SHIFT 16 #define LCDDMA_CTRL_TH_FIFO_RDY_SHIFT 8 #define LCDDMA_CTRL_BURST_SIZE_SHIFT 4 #define LCDDMA_CTRL_BYTES_SWAP (1 << 3) #define LCDDMA_CTRL_BE (1 << 1) #define LCDDMA_CTRL_FB0_ONLY 0 #define LCDDMA_CTRL_FB0_FB1 (1 << 0) #define LCD_LCDDMA_FB0_BASE 0x44 #define LCD_LCDDMA_FB0_CEILING 0x48 #define LCD_LCDDMA_FB1_BASE 0x4C #define LCD_LCDDMA_FB1_CEILING 0x50 #define LCD_SYSCONFIG 0x54 #define SYSCONFIG_STANDBY_FORCE (0 << 4) #define SYSCONFIG_STANDBY_NONE (1 << 4) #define SYSCONFIG_STANDBY_SMART (2 << 4) #define SYSCONFIG_IDLE_FORCE (0 << 2) #define SYSCONFIG_IDLE_NONE (1 << 2) #define SYSCONFIG_IDLE_SMART (2 << 2) #define LCD_IRQSTATUS_RAW 0x58 #define LCD_IRQSTATUS 0x5C #define LCD_IRQENABLE_SET 0x60 #define LCD_IRQENABLE_CLEAR 0x64 #define IRQ_EOF1 (1 << 9) #define IRQ_EOF0 (1 << 8) #define IRQ_PL (1 << 6) #define IRQ_FUF (1 << 5) #define IRQ_ACB (1 << 3) #define IRQ_SYNC_LOST (1 << 2) #define IRQ_RASTER_DONE (1 << 1) #define IRQ_FRAME_DONE (1 << 0) #define LCD_END_OF_INT_IND 0x68 #define LCD_CLKC_ENABLE 0x6C #define CLKC_ENABLE_DMA (1 << 2) #define CLKC_ENABLE_LDID (1 << 1) #define CLKC_ENABLE_CORE (1 << 0) #define LCD_CLKC_RESET 0x70 #define CLKC_RESET_MAIN (1 << 3) #define CLKC_RESET_DMA (1 << 2) #define CLKC_RESET_LDID (1 << 1) #define CLKC_RESET_CORE (1 << 0) #define LCD_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define LCD_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define LCD_LOCK_INIT(_sc) mtx_init(&(_sc)->sc_mtx, \ device_get_nameunit(_sc->sc_dev), "am335x_lcd", MTX_DEF) #define LCD_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx); #define LCD_READ4(_sc, reg) bus_read_4((_sc)->sc_mem_res, reg); #define LCD_WRITE4(_sc, reg, value) \ bus_write_4((_sc)->sc_mem_res, reg, value); /* Backlight is controlled by eCAS interface on PWM unit 0 */ #define PWM_UNIT 0 #define PWM_PERIOD 100 #define MODE_HBP(mode) ((mode)->htotal - (mode)->hsync_end) #define MODE_HFP(mode) ((mode)->hsync_start - (mode)->hdisplay) #define MODE_HSW(mode) ((mode)->hsync_end - (mode)->hsync_start) #define MODE_VBP(mode) ((mode)->vtotal - (mode)->vsync_end) #define MODE_VFP(mode) ((mode)->vsync_start - (mode)->vdisplay) #define MODE_VSW(mode) ((mode)->vsync_end - (mode)->vsync_start) #define MAX_PIXEL_CLOCK 126000 #define MAX_BANDWIDTH (1280*1024*60) struct am335x_lcd_softc { device_t sc_dev; struct fb_info sc_fb_info; struct resource *sc_mem_res; struct resource *sc_irq_res; void *sc_intr_hl; struct mtx sc_mtx; int sc_backlight; struct sysctl_oid *sc_oid; struct panel_info sc_panel; /* Framebuffer */ bus_dma_tag_t sc_dma_tag; bus_dmamap_t sc_dma_map; size_t sc_fb_size; bus_addr_t sc_fb_phys; uint8_t *sc_fb_base; /* HDMI framer */ phandle_t sc_hdmi_framer; eventhandler_tag sc_hdmi_evh; }; static void am335x_fb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { bus_addr_t *addr; if (err) return; addr = (bus_addr_t*)arg; *addr = segs[0].ds_addr; } static uint32_t am335x_lcd_calc_divisor(uint32_t reference, uint32_t freq) { uint32_t div, i; uint32_t delta, min_delta; min_delta = freq; div = 255; /* Raster mode case: divisors are in range from 2 to 255 */ for (i = 2; i < 255; i++) { delta = abs(reference/i - freq); if (delta < min_delta) { div = i; min_delta = delta; } } return (div); } static int am335x_lcd_sysctl_backlight(SYSCTL_HANDLER_ARGS) { struct am335x_lcd_softc *sc = (struct am335x_lcd_softc*)arg1; int error; int backlight; backlight = sc->sc_backlight; error = sysctl_handle_int(oidp, &backlight, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (backlight < 0) backlight = 0; if (backlight > 100) backlight = 100; LCD_LOCK(sc); error = am335x_pwm_config_ecap(PWM_UNIT, PWM_PERIOD, backlight*PWM_PERIOD/100); if (error == 0) sc->sc_backlight = backlight; LCD_UNLOCK(sc); return (error); } static uint32_t am335x_mode_vrefresh(const struct videomode *mode) { uint32_t refresh; /* Calculate vertical refresh rate */ refresh = (mode->dot_clock * 1000 / mode->htotal); refresh = (refresh + mode->vtotal / 2) / mode->vtotal; if (mode->flags & VID_INTERLACE) refresh *= 2; if (mode->flags & VID_DBLSCAN) refresh /= 2; return refresh; } static int am335x_mode_is_valid(const struct videomode *mode) { uint32_t hbp, hfp, hsw; uint32_t vbp, vfp, vsw; if (mode->dot_clock > MAX_PIXEL_CLOCK) return (0); if (mode->hdisplay & 0xf) return (0); if (mode->vdisplay > 2048) return (0); /* Check ranges for timing parameters */ hbp = MODE_HBP(mode) - 1; hfp = MODE_HFP(mode) - 1; hsw = MODE_HSW(mode) - 1; vbp = MODE_VBP(mode); vfp = MODE_VFP(mode); vsw = MODE_VSW(mode) - 1; if (hbp > 0x3ff) return (0); if (hfp > 0x3ff) return (0); if (hsw > 0x3ff) return (0); if (vbp > 0xff) return (0); if (vfp > 0xff) return (0); if (vsw > 0x3f) return (0); if (mode->vdisplay*mode->hdisplay*am335x_mode_vrefresh(mode) > MAX_BANDWIDTH) return (0); return (1); } static void am335x_read_hdmi_property(device_t dev) { phandle_t node; phandle_t hdmi_xref; struct am335x_lcd_softc *sc; sc = device_get_softc(dev); node = ofw_bus_get_node(dev); if (OF_getencprop(node, "hdmi", &hdmi_xref, sizeof(hdmi_xref)) == -1) sc->sc_hdmi_framer = 0; else sc->sc_hdmi_framer = hdmi_xref; } static int am335x_read_property(device_t dev, phandle_t node, const char *name, uint32_t *val) { pcell_t cell; if ((OF_getprop(node, name, &cell, sizeof(cell))) <= 0) { device_printf(dev, "missing '%s' attribute in LCD panel info\n", name); return (ENXIO); } *val = fdt32_to_cpu(cell); return (0); } static int am335x_read_timing(device_t dev, phandle_t node, struct panel_info *panel) { int error; phandle_t timings_node, timing_node, native; timings_node = ofw_bus_find_child(node, "display-timings"); if (timings_node == 0) { device_printf(dev, "no \"display-timings\" node\n"); return (-1); } if (OF_searchencprop(timings_node, "native-mode", &native, sizeof(native)) == -1) { device_printf(dev, "no \"native-mode\" reference in \"timings\" node\n"); return (-1); } timing_node = OF_node_from_xref(native); error = 0; if ((error = am335x_read_property(dev, timing_node, "hactive", &panel->panel_width))) goto out; if ((error = am335x_read_property(dev, timing_node, "vactive", &panel->panel_height))) goto out; if ((error = am335x_read_property(dev, timing_node, "hfront-porch", &panel->panel_hfp))) goto out; if ((error = am335x_read_property(dev, timing_node, "hback-porch", &panel->panel_hbp))) goto out; if ((error = am335x_read_property(dev, timing_node, "hsync-len", &panel->panel_hsw))) goto out; if ((error = am335x_read_property(dev, timing_node, "vfront-porch", &panel->panel_vfp))) goto out; if ((error = am335x_read_property(dev, timing_node, "vback-porch", &panel->panel_vbp))) goto out; if ((error = am335x_read_property(dev, timing_node, "vsync-len", &panel->panel_vsw))) goto out; if ((error = am335x_read_property(dev, timing_node, "clock-frequency", &panel->panel_pxl_clk))) goto out; if ((error = am335x_read_property(dev, timing_node, "pixelclk-active", &panel->pixelclk_active))) goto out; if ((error = am335x_read_property(dev, timing_node, "hsync-active", &panel->hsync_active))) goto out; if ((error = am335x_read_property(dev, timing_node, "vsync-active", &panel->vsync_active))) goto out; out: return (error); } static int am335x_read_panel_info(device_t dev, phandle_t node, struct panel_info *panel) { phandle_t panel_info_node; panel_info_node = ofw_bus_find_child(node, "panel-info"); if (panel_info_node == 0) return (-1); am335x_read_property(dev, panel_info_node, "ac-bias", &panel->ac_bias); am335x_read_property(dev, panel_info_node, "ac-bias-intrpt", &panel->ac_bias_intrpt); am335x_read_property(dev, panel_info_node, "dma-burst-sz", &panel->dma_burst_sz); am335x_read_property(dev, panel_info_node, "bpp", &panel->bpp); am335x_read_property(dev, panel_info_node, "fdd", &panel->fdd); am335x_read_property(dev, panel_info_node, "sync-edge", &panel->sync_edge); am335x_read_property(dev, panel_info_node, "sync-ctrl", &panel->sync_ctrl); return (0); } static void am335x_lcd_intr(void *arg) { struct am335x_lcd_softc *sc = arg; uint32_t reg; reg = LCD_READ4(sc, LCD_IRQSTATUS); LCD_WRITE4(sc, LCD_IRQSTATUS, reg); /* Read value back to make sure it reached the hardware */ reg = LCD_READ4(sc, LCD_IRQSTATUS); if (reg & IRQ_SYNC_LOST) { reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg &= ~RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg |= RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); goto done; } if (reg & IRQ_PL) { reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg &= ~RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg |= RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); goto done; } if (reg & IRQ_EOF0) { LCD_WRITE4(sc, LCD_LCDDMA_FB0_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB0_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); reg &= ~IRQ_EOF0; } if (reg & IRQ_EOF1) { LCD_WRITE4(sc, LCD_LCDDMA_FB1_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB1_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); reg &= ~IRQ_EOF1; } if (reg & IRQ_FUF) { /* TODO: Handle FUF */ } if (reg & IRQ_ACB) { /* TODO: Handle ACB */ } done: LCD_WRITE4(sc, LCD_END_OF_INT_IND, 0); /* Read value back to make sure it reached the hardware */ reg = LCD_READ4(sc, LCD_END_OF_INT_IND); } static const struct videomode * am335x_lcd_pick_mode(struct edid_info *ei) { const struct videomode *videomode; const struct videomode *m; int n; /* Get standard VGA as default */ videomode = NULL; /* * Pick a mode. */ if (ei->edid_preferred_mode != NULL) { if (am335x_mode_is_valid(ei->edid_preferred_mode)) videomode = ei->edid_preferred_mode; } if (videomode == NULL) { m = ei->edid_modes; sort_modes(ei->edid_modes, &ei->edid_preferred_mode, ei->edid_nmodes); for (n = 0; n < ei->edid_nmodes; n++) if (am335x_mode_is_valid(&m[n])) { videomode = &m[n]; break; } } return videomode; } static int am335x_lcd_configure(struct am335x_lcd_softc *sc) { int div; uint32_t reg, timing0, timing1, timing2; uint32_t burst_log; size_t dma_size; uint32_t hbp, hfp, hsw; uint32_t vbp, vfp, vsw; uint32_t width, height; unsigned int ref_freq; int err; /* * try to adjust clock to get double of requested frequency * HDMI/DVI displays are very sensitive to error in frequncy value */ if (ti_prcm_clk_set_source_freq(LCDC_CLK, sc->sc_panel.panel_pxl_clk*2)) { device_printf(sc->sc_dev, "can't set source frequency\n"); return (ENXIO); } if (ti_prcm_clk_get_source_freq(LCDC_CLK, &ref_freq)) { device_printf(sc->sc_dev, "can't get reference frequency\n"); return (ENXIO); } /* Panle initialization */ dma_size = round_page(sc->sc_panel.panel_width*sc->sc_panel.panel_height*sc->sc_panel.bpp/8); /* * Now allocate framebuffer memory */ err = bus_dma_tag_create( bus_get_dma_tag(sc->sc_dev), 4, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_size, 1, /* maxsize, nsegments */ dma_size, 0, /* maxsegsize, flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->sc_dma_tag); if (err) goto done; err = bus_dmamem_alloc(sc->sc_dma_tag, (void **)&sc->sc_fb_base, BUS_DMA_COHERENT, &sc->sc_dma_map); if (err) { device_printf(sc->sc_dev, "cannot allocate framebuffer\n"); goto done; } err = bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, sc->sc_fb_base, dma_size, am335x_fb_dmamap_cb, &sc->sc_fb_phys, BUS_DMA_NOWAIT); if (err) { device_printf(sc->sc_dev, "cannot load DMA map\n"); goto done; } /* Make sure it's blank */ memset(sc->sc_fb_base, 0x0, dma_size); /* Calculate actual FB Size */ sc->sc_fb_size = sc->sc_panel.panel_width*sc->sc_panel.panel_height*sc->sc_panel.bpp/8; /* Only raster mode is supported */ reg = CTRL_RASTER_MODE; div = am335x_lcd_calc_divisor(ref_freq, sc->sc_panel.panel_pxl_clk); reg |= (div << CTRL_DIV_SHIFT); LCD_WRITE4(sc, LCD_CTRL, reg); /* Set timing */ timing0 = timing1 = timing2 = 0; hbp = sc->sc_panel.panel_hbp - 1; hfp = sc->sc_panel.panel_hfp - 1; hsw = sc->sc_panel.panel_hsw - 1; vbp = sc->sc_panel.panel_vbp; vfp = sc->sc_panel.panel_vfp; vsw = sc->sc_panel.panel_vsw - 1; height = sc->sc_panel.panel_height - 1; width = sc->sc_panel.panel_width - 1; /* Horizontal back porch */ timing0 |= (hbp & 0xff) << RASTER_TIMING_0_HBP_SHIFT; timing2 |= ((hbp >> 8) & 3) << RASTER_TIMING_2_HBPHI_SHIFT; /* Horizontal front porch */ timing0 |= (hfp & 0xff) << RASTER_TIMING_0_HFP_SHIFT; timing2 |= ((hfp >> 8) & 3) << RASTER_TIMING_2_HFPHI_SHIFT; /* Horizontal sync width */ timing0 |= (hsw & 0x3f) << RASTER_TIMING_0_HSW_SHIFT; timing2 |= ((hsw >> 6) & 0xf) << RASTER_TIMING_2_HSWHI_SHIFT; /* Vertical back porch, front porch, sync width */ timing1 |= (vbp & 0xff) << RASTER_TIMING_1_VBP_SHIFT; timing1 |= (vfp & 0xff) << RASTER_TIMING_1_VFP_SHIFT; timing1 |= (vsw & 0x3f) << RASTER_TIMING_1_VSW_SHIFT; /* Pixels per line */ timing0 |= ((width >> 10) & 1) << RASTER_TIMING_0_PPLMSB_SHIFT; timing0 |= ((width >> 4) & 0x3f) << RASTER_TIMING_0_PPLLSB_SHIFT; /* Lines per panel */ timing1 |= (height & 0x3ff) << RASTER_TIMING_1_LPP_SHIFT; timing2 |= ((height >> 10 ) & 1) << RASTER_TIMING_2_LPP_B10_SHIFT; /* clock signal settings */ if (sc->sc_panel.sync_ctrl) timing2 |= RASTER_TIMING_2_PHSVS; if (sc->sc_panel.sync_edge) timing2 |= RASTER_TIMING_2_PHSVS_RISE; else timing2 |= RASTER_TIMING_2_PHSVS_FALL; if (sc->sc_panel.hsync_active == 0) timing2 |= RASTER_TIMING_2_IHS; if (sc->sc_panel.vsync_active == 0) timing2 |= RASTER_TIMING_2_IVS; if (sc->sc_panel.pixelclk_active == 0) timing2 |= RASTER_TIMING_2_IPC; /* AC bias */ timing2 |= (sc->sc_panel.ac_bias << RASTER_TIMING_2_ACB_SHIFT); timing2 |= (sc->sc_panel.ac_bias_intrpt << RASTER_TIMING_2_ACBI_SHIFT); LCD_WRITE4(sc, LCD_RASTER_TIMING_0, timing0); LCD_WRITE4(sc, LCD_RASTER_TIMING_1, timing1); LCD_WRITE4(sc, LCD_RASTER_TIMING_2, timing2); /* DMA settings */ reg = LCDDMA_CTRL_FB0_FB1; /* Find power of 2 for current burst size */ switch (sc->sc_panel.dma_burst_sz) { case 1: burst_log = 0; break; case 2: burst_log = 1; break; case 4: burst_log = 2; break; case 8: burst_log = 3; break; case 16: default: burst_log = 4; break; } reg |= (burst_log << LCDDMA_CTRL_BURST_SIZE_SHIFT); /* XXX: FIFO TH */ reg |= (0 << LCDDMA_CTRL_TH_FIFO_RDY_SHIFT); LCD_WRITE4(sc, LCD_LCDDMA_CTRL, reg); LCD_WRITE4(sc, LCD_LCDDMA_FB0_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB0_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); LCD_WRITE4(sc, LCD_LCDDMA_FB1_BASE, sc->sc_fb_phys); LCD_WRITE4(sc, LCD_LCDDMA_FB1_CEILING, sc->sc_fb_phys + sc->sc_fb_size - 1); /* Enable LCD */ reg = RASTER_CTRL_LCDTFT; reg |= (sc->sc_panel.fdd << RASTER_CTRL_REQDLY_SHIFT); reg |= (PALETTE_DATA_ONLY << RASTER_CTRL_PALMODE_SHIFT); if (sc->sc_panel.bpp >= 24) reg |= RASTER_CTRL_TFT24; if (sc->sc_panel.bpp == 32) reg |= RASTER_CTRL_TFT24_UNPACKED; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); LCD_WRITE4(sc, LCD_CLKC_ENABLE, CLKC_ENABLE_DMA | CLKC_ENABLE_LDID | CLKC_ENABLE_CORE); LCD_WRITE4(sc, LCD_CLKC_RESET, CLKC_RESET_MAIN); DELAY(100); LCD_WRITE4(sc, LCD_CLKC_RESET, 0); reg = IRQ_EOF1 | IRQ_EOF0 | IRQ_FUF | IRQ_PL | IRQ_ACB | IRQ_SYNC_LOST | IRQ_RASTER_DONE | IRQ_FRAME_DONE; LCD_WRITE4(sc, LCD_IRQENABLE_SET, reg); reg = LCD_READ4(sc, LCD_RASTER_CTRL); reg |= RASTER_CTRL_LCDEN; LCD_WRITE4(sc, LCD_RASTER_CTRL, reg); LCD_WRITE4(sc, LCD_SYSCONFIG, SYSCONFIG_STANDBY_SMART | SYSCONFIG_IDLE_SMART); sc->sc_fb_info.fb_name = device_get_nameunit(sc->sc_dev); sc->sc_fb_info.fb_vbase = (intptr_t)sc->sc_fb_base; sc->sc_fb_info.fb_pbase = sc->sc_fb_phys; sc->sc_fb_info.fb_size = sc->sc_fb_size; sc->sc_fb_info.fb_bpp = sc->sc_fb_info.fb_depth = sc->sc_panel.bpp; sc->sc_fb_info.fb_stride = sc->sc_panel.panel_width*sc->sc_panel.bpp / 8; sc->sc_fb_info.fb_width = sc->sc_panel.panel_width; sc->sc_fb_info.fb_height = sc->sc_panel.panel_height; #ifdef DEV_SC err = (sc_attach_unit(device_get_unit(sc->sc_dev), device_get_flags(sc->sc_dev) | SC_AUTODETECT_KBD)); if (err) { device_printf(sc->sc_dev, "failed to attach syscons\n"); goto fail; } am335x_lcd_syscons_setup((vm_offset_t)sc->sc_fb_base, sc->sc_fb_phys, &panel); #else /* VT */ device_t fbd = device_add_child(sc->sc_dev, "fbd", device_get_unit(sc->sc_dev)); if (fbd != NULL) { if (device_probe_and_attach(fbd) != 0) device_printf(sc->sc_dev, "failed to attach fbd device\n"); } else device_printf(sc->sc_dev, "failed to add fbd child\n"); #endif done: return (err); } static void -am335x_lcd_hdmi_event(void *arg) +am335x_lcd_hdmi_event(void *arg, device_t hdmi, int event) { struct am335x_lcd_softc *sc; const struct videomode *videomode; struct videomode hdmi_mode; device_t hdmi_dev; uint8_t *edid; uint32_t edid_len; struct edid_info ei; sc = arg; /* Nothing to work with */ if (!sc->sc_hdmi_framer) { device_printf(sc->sc_dev, "HDMI event without HDMI framer set\n"); return; } hdmi_dev = OF_device_from_xref(sc->sc_hdmi_framer); if (!hdmi_dev) { device_printf(sc->sc_dev, "no actual device for \"hdmi\" property\n"); return; } edid = NULL; edid_len = 0; if (HDMI_GET_EDID(hdmi_dev, &edid, &edid_len) != 0) { device_printf(sc->sc_dev, "failed to get EDID info from HDMI framer\n"); return; } videomode = NULL; if (edid_parse(edid, &ei) == 0) { edid_print(&ei); videomode = am335x_lcd_pick_mode(&ei); } else device_printf(sc->sc_dev, "failed to parse EDID\n"); /* Use standard VGA as fallback */ if (videomode == NULL) videomode = pick_mode_by_ref(640, 480, 60); if (videomode == NULL) { device_printf(sc->sc_dev, "failed to find usable videomode"); return; } device_printf(sc->sc_dev, "detected videomode: %dx%d @ %dKHz\n", videomode->hdisplay, videomode->vdisplay, am335x_mode_vrefresh(videomode)); sc->sc_panel.panel_width = videomode->hdisplay; sc->sc_panel.panel_height = videomode->vdisplay; sc->sc_panel.panel_hfp = videomode->hsync_start - videomode->hdisplay; sc->sc_panel.panel_hbp = videomode->htotal - videomode->hsync_end; sc->sc_panel.panel_hsw = videomode->hsync_end - videomode->hsync_start; sc->sc_panel.panel_vfp = videomode->vsync_start - videomode->vdisplay; sc->sc_panel.panel_vbp = videomode->vtotal - videomode->vsync_end; sc->sc_panel.panel_vsw = videomode->vsync_end - videomode->vsync_start; sc->sc_panel.pixelclk_active = 1; /* logic for HSYNC should be reversed */ if (videomode->flags & VID_NHSYNC) sc->sc_panel.hsync_active = 1; else sc->sc_panel.hsync_active = 0; if (videomode->flags & VID_NVSYNC) sc->sc_panel.vsync_active = 0; else sc->sc_panel.vsync_active = 1; sc->sc_panel.panel_pxl_clk = videomode->dot_clock * 1000; am335x_lcd_configure(sc); memcpy(&hdmi_mode, videomode, sizeof(hdmi_mode)); hdmi_mode.hskew = videomode->hsync_end - videomode->hsync_start; hdmi_mode.flags |= VID_HSKEW; HDMI_SET_VIDEOMODE(hdmi_dev, &hdmi_mode); } static int am335x_lcd_probe(device_t dev) { #ifdef DEV_SC int err; #endif if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "ti,am33xx-tilcdc")) return (ENXIO); device_set_desc(dev, "AM335x LCD controller"); #ifdef DEV_SC err = sc_probe_unit(device_get_unit(dev), device_get_flags(dev) | SC_AUTODETECT_KBD); if (err != 0) return (err); #endif return (BUS_PROBE_DEFAULT); } static int am335x_lcd_attach(device_t dev) { struct am335x_lcd_softc *sc; int err; int rid; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; phandle_t root, panel_node; err = 0; sc = device_get_softc(dev); sc->sc_dev = dev; am335x_read_hdmi_property(dev); root = OF_finddevice("/"); if (root == 0) { device_printf(dev, "failed to get FDT root node\n"); return (ENXIO); } sc->sc_panel.ac_bias = 255; sc->sc_panel.ac_bias_intrpt = 0; sc->sc_panel.dma_burst_sz = 16; sc->sc_panel.bpp = 16; sc->sc_panel.fdd = 128; sc->sc_panel.sync_edge = 0; sc->sc_panel.sync_ctrl = 1; panel_node = fdt_find_compatible(root, "ti,tilcdc,panel", 1); if (panel_node != 0) { device_printf(dev, "using static panel info\n"); if (am335x_read_panel_info(dev, panel_node, &sc->sc_panel)) { device_printf(dev, "failed to read panel info\n"); return (ENXIO); } if (am335x_read_timing(dev, panel_node, &sc->sc_panel)) { device_printf(dev, "failed to read timings\n"); return (ENXIO); } } ti_prcm_clk_enable(LCDC_CLK); rid = 0; sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->sc_mem_res) { device_printf(dev, "cannot allocate memory window\n"); return (ENXIO); } rid = 0; sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!sc->sc_irq_res) { bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); device_printf(dev, "cannot allocate interrupt\n"); return (ENXIO); } if (bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, am335x_lcd_intr, sc, &sc->sc_intr_hl) != 0) { bus_release_resource(dev, SYS_RES_IRQ, rid, sc->sc_irq_res); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem_res); device_printf(dev, "Unable to setup the irq handler.\n"); return (ENXIO); } LCD_LOCK_INIT(sc); /* Init backlight interface */ ctx = device_get_sysctl_ctx(sc->sc_dev); tree = device_get_sysctl_tree(sc->sc_dev); sc->sc_oid = SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "backlight", CTLTYPE_INT | CTLFLAG_RW, sc, 0, am335x_lcd_sysctl_backlight, "I", "LCD backlight"); sc->sc_backlight = 0; /* Check if eCAS interface is available at this point */ if (am335x_pwm_config_ecap(PWM_UNIT, PWM_PERIOD, PWM_PERIOD) == 0) sc->sc_backlight = 100; if (panel_node != 0) am335x_lcd_configure(sc); else sc->sc_hdmi_evh = EVENTHANDLER_REGISTER(hdmi_event, - am335x_lcd_hdmi_event, sc, 0); + am335x_lcd_hdmi_event, sc, EVENTHANDLER_PRI_ANY); return (0); } static int am335x_lcd_detach(device_t dev) { /* Do not let unload driver */ return (EBUSY); } static struct fb_info * am335x_lcd_fb_getinfo(device_t dev) { struct am335x_lcd_softc *sc; sc = device_get_softc(dev); return (&sc->sc_fb_info); } static device_method_t am335x_lcd_methods[] = { DEVMETHOD(device_probe, am335x_lcd_probe), DEVMETHOD(device_attach, am335x_lcd_attach), DEVMETHOD(device_detach, am335x_lcd_detach), /* Framebuffer service methods */ DEVMETHOD(fb_getinfo, am335x_lcd_fb_getinfo), DEVMETHOD_END }; static driver_t am335x_lcd_driver = { "fb", am335x_lcd_methods, sizeof(struct am335x_lcd_softc), }; static devclass_t am335x_lcd_devclass; DRIVER_MODULE(am335x_lcd, simplebus, am335x_lcd_driver, am335x_lcd_devclass, 0, 0); MODULE_VERSION(am335x_lcd, 1); MODULE_DEPEND(am335x_lcd, simplebus, 1, 1, 1); Index: projects/powernv/arm/ti/am335x/tda19988.c =================================================================== --- projects/powernv/arm/ti/am335x/tda19988.c (revision 290990) +++ projects/powernv/arm/ti/am335x/tda19988.c (revision 290991) @@ -1,809 +1,809 @@ /*- * Copyright (c) 2015 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * NXP TDA19988 HDMI encoder */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iicbus_if.h" #include "hdmi_if.h" #define MKREG(page, addr) (((page) << 8) | (addr)) #define REGPAGE(reg) (((reg) >> 8) & 0xff) #define REGADDR(reg) ((reg) & 0xff) #define TDA_VERSION MKREG(0x00, 0x00) #define TDA_MAIN_CNTRL0 MKREG(0x00, 0x01) #define MAIN_CNTRL0_SR (1 << 0) #define TDA_VERSION_MSB MKREG(0x00, 0x02) #define TDA_SOFTRESET MKREG(0x00, 0x0a) #define SOFTRESET_I2C (1 << 1) #define SOFTRESET_AUDIO (1 << 0) #define TDA_DDC_CTRL MKREG(0x00, 0x0b) #define DDC_ENABLE 0 #define TDA_CCLK MKREG(0x00, 0x0c) #define CCLK_ENABLE 1 #define TDA_INT_FLAGS_2 MKREG(0x00, 0x11) #define INT_FLAGS_2_EDID_BLK_RD (1 << 1) #define TDA_VIP_CNTRL_0 MKREG(0x00, 0x20) #define TDA_VIP_CNTRL_1 MKREG(0x00, 0x21) #define TDA_VIP_CNTRL_2 MKREG(0x00, 0x22) #define TDA_VIP_CNTRL_3 MKREG(0x00, 0x23) #define VIP_CNTRL_3_SYNC_HS (2 << 4) #define VIP_CNTRL_3_V_TGL (1 << 2) #define VIP_CNTRL_3_H_TGL (1 << 1) #define TDA_VIP_CNTRL_4 MKREG(0x00, 0x24) #define VIP_CNTRL_4_BLANKIT_NDE (0 << 2) #define VIP_CNTRL_4_BLANKIT_HS_VS (1 << 2) #define VIP_CNTRL_4_BLANKIT_NHS_VS (2 << 2) #define VIP_CNTRL_4_BLANKIT_HE_VE (3 << 2) #define VIP_CNTRL_4_BLC_NONE (0 << 0) #define VIP_CNTRL_4_BLC_RGB444 (1 << 0) #define VIP_CNTRL_4_BLC_YUV444 (2 << 0) #define VIP_CNTRL_4_BLC_YUV422 (3 << 0) #define TDA_VIP_CNTRL_5 MKREG(0x00, 0x25) #define VIP_CNTRL_5_SP_CNT(n) (((n) & 3) << 1) #define TDA_MUX_VP_VIP_OUT MKREG(0x00, 0x27) #define TDA_MAT_CONTRL MKREG(0x00, 0x80) #define MAT_CONTRL_MAT_BP (1 << 2) #define TDA_VIDFORMAT MKREG(0x00, 0xa0) #define TDA_REFPIX_MSB MKREG(0x00, 0xa1) #define TDA_REFPIX_LSB MKREG(0x00, 0xa2) #define TDA_REFLINE_MSB MKREG(0x00, 0xa3) #define TDA_REFLINE_LSB MKREG(0x00, 0xa4) #define TDA_NPIX_MSB MKREG(0x00, 0xa5) #define TDA_NPIX_LSB MKREG(0x00, 0xa6) #define TDA_NLINE_MSB MKREG(0x00, 0xa7) #define TDA_NLINE_LSB MKREG(0x00, 0xa8) #define TDA_VS_LINE_STRT_1_MSB MKREG(0x00, 0xa9) #define TDA_VS_LINE_STRT_1_LSB MKREG(0x00, 0xaa) #define TDA_VS_PIX_STRT_1_MSB MKREG(0x00, 0xab) #define TDA_VS_PIX_STRT_1_LSB MKREG(0x00, 0xac) #define TDA_VS_LINE_END_1_MSB MKREG(0x00, 0xad) #define TDA_VS_LINE_END_1_LSB MKREG(0x00, 0xae) #define TDA_VS_PIX_END_1_MSB MKREG(0x00, 0xaf) #define TDA_VS_PIX_END_1_LSB MKREG(0x00, 0xb0) #define TDA_VS_LINE_STRT_2_MSB MKREG(0x00, 0xb1) #define TDA_VS_LINE_STRT_2_LSB MKREG(0x00, 0xb2) #define TDA_VS_PIX_STRT_2_MSB MKREG(0x00, 0xb3) #define TDA_VS_PIX_STRT_2_LSB MKREG(0x00, 0xb4) #define TDA_VS_LINE_END_2_MSB MKREG(0x00, 0xb5) #define TDA_VS_LINE_END_2_LSB MKREG(0x00, 0xb6) #define TDA_VS_PIX_END_2_MSB MKREG(0x00, 0xb7) #define TDA_VS_PIX_END_2_LSB MKREG(0x00, 0xb8) #define TDA_HS_PIX_START_MSB MKREG(0x00, 0xb9) #define TDA_HS_PIX_START_LSB MKREG(0x00, 0xba) #define TDA_HS_PIX_STOP_MSB MKREG(0x00, 0xbb) #define TDA_HS_PIX_STOP_LSB MKREG(0x00, 0xbc) #define TDA_VWIN_START_1_MSB MKREG(0x00, 0xbd) #define TDA_VWIN_START_1_LSB MKREG(0x00, 0xbe) #define TDA_VWIN_END_1_MSB MKREG(0x00, 0xbf) #define TDA_VWIN_END_1_LSB MKREG(0x00, 0xc0) #define TDA_VWIN_START_2_MSB MKREG(0x00, 0xc1) #define TDA_VWIN_START_2_LSB MKREG(0x00, 0xc2) #define TDA_VWIN_END_2_MSB MKREG(0x00, 0xc3) #define TDA_VWIN_END_2_LSB MKREG(0x00, 0xc4) #define TDA_DE_START_MSB MKREG(0x00, 0xc5) #define TDA_DE_START_LSB MKREG(0x00, 0xc6) #define TDA_DE_STOP_MSB MKREG(0x00, 0xc7) #define TDA_DE_STOP_LSB MKREG(0x00, 0xc8) #define TDA_TBG_CNTRL_0 MKREG(0x00, 0xca) #define TBG_CNTRL_0_SYNC_ONCE (1 << 7) #define TBG_CNTRL_0_SYNC_MTHD (1 << 6) #define TDA_TBG_CNTRL_1 MKREG(0x00, 0xcb) #define TBG_CNTRL_1_DWIN_DIS (1 << 6) #define TBG_CNTRL_1_TGL_EN (1 << 2) #define TBG_CNTRL_1_V_TGL (1 << 1) #define TBG_CNTRL_1_H_TGL (1 << 0) #define TDA_HVF_CNTRL_0 MKREG(0x00, 0xe4) #define HVF_CNTRL_0_PREFIL_NONE (0 << 2) #define HVF_CNTRL_0_INTPOL_BYPASS (0 << 0) #define TDA_HVF_CNTRL_1 MKREG(0x00, 0xe5) #define HVF_CNTRL_1_VQR(x) (((x) & 3) << 2) #define HVF_CNTRL_1_VQR_FULL HVF_CNTRL_1_VQR(0) #define TDA_ENABLE_SPACE MKREG(0x00, 0xd6) #define TDA_RPT_CNTRL MKREG(0x00, 0xf0) #define TDA_PLL_SERIAL_1 MKREG(0x02, 0x00) #define PLL_SERIAL_1_SRL_MAN_IP (1 << 6) #define TDA_PLL_SERIAL_2 MKREG(0x02, 0x01) #define PLL_SERIAL_2_SRL_PR(x) (((x) & 0xf) << 4) #define PLL_SERIAL_2_SRL_NOSC(x) (((x) & 0x3) << 0) #define TDA_PLL_SERIAL_3 MKREG(0x02, 0x02) #define PLL_SERIAL_3_SRL_PXIN_SEL (1 << 4) #define PLL_SERIAL_3_SRL_DE (1 << 2) #define PLL_SERIAL_3_SRL_CCIR (1 << 0) #define TDA_SERIALIZER MKREG(0x02, 0x03) #define TDA_BUFFER_OUT MKREG(0x02, 0x04) #define TDA_PLL_SCG1 MKREG(0x02, 0x05) #define TDA_PLL_SCG2 MKREG(0x02, 0x06) #define TDA_PLL_SCGN1 MKREG(0x02, 0x07) #define TDA_PLL_SCGN2 MKREG(0x02, 0x08) #define TDA_PLL_SCGR1 MKREG(0x02, 0x09) #define TDA_PLL_SCGR2 MKREG(0x02, 0x0a) #define TDA_SEL_CLK MKREG(0x02, 0x11) #define SEL_CLK_ENA_SC_CLK (1 << 3) #define SEL_CLK_SEL_VRF_CLK(x) (((x) & 3) << 1) #define SEL_CLK_SEL_CLK1 (1 << 0) #define TDA_ANA_GENERAL MKREG(0x02, 0x12) #define TDA_EDID_DATA0 MKREG(0x09, 0x00) #define TDA_EDID_CTRL MKREG(0x09, 0xfa) #define TDA_DDC_ADDR MKREG(0x09, 0xfb) #define TDA_DDC_OFFS MKREG(0x09, 0xfc) #define TDA_DDC_SEGM_ADDR MKREG(0x09, 0xfd) #define TDA_DDC_SEGM MKREG(0x09, 0xfe) #define TDA_IF_VSP MKREG(0x10, 0x20) #define TDA_IF_AVI MKREG(0x10, 0x40) #define TDA_IF_SPD MKREG(0x10, 0x60) #define TDA_IF_AUD MKREG(0x10, 0x80) #define TDA_IF_MPS MKREG(0x10, 0xa0) #define TDA_ENC_CNTRL MKREG(0x11, 0x0d) #define ENC_CNTRL_DVI_MODE (0 << 2) #define ENC_CNTRL_HDMI_MODE (1 << 2) #define TDA_DIP_IF_FLAGS MKREG(0x11, 0x0f) #define DIP_IF_FLAGS_IF5 (1 << 5) #define DIP_IF_FLAGS_IF4 (1 << 4) #define DIP_IF_FLAGS_IF3 (1 << 3) #define DIP_IF_FLAGS_IF2 (1 << 2) /* AVI IF on page 10h */ #define DIP_IF_FLAGS_IF1 (1 << 1) #define TDA_TX3 MKREG(0x12, 0x9a) #define TDA_TX4 MKREG(0x12, 0x9b) #define TX4_PD_RAM (1 << 1) #define TDA_HDCP_TX33 MKREG(0x12, 0xb8) #define HDCP_TX33_HDMI (1 << 1) #define TDA_CURPAGE_ADDR 0xff #define TDA_CEC_ENAMODS 0xff #define ENAMODS_RXSENS (1 << 2) #define ENAMODS_HDMI (1 << 1) #define TDA_CEC_FRO_IM_CLK_CTRL 0xfb #define CEC_FRO_IM_CLK_CTRL_GHOST_DIS (1 << 7) #define CEC_FRO_IM_CLK_CTRL_IMCLK_SEL (1 << 1) /* EDID reading */ #define EDID_LENGTH 0x80 #define MAX_READ_ATTEMPTS 100 /* EDID fields */ #define EDID_MODES0 35 #define EDID_MODES1 36 #define EDID_TIMING_START 38 #define EDID_TIMING_END 54 #define EDID_TIMING_X(v) (((v) + 31) * 8) #define EDID_FREQ(v) (((v) & 0x3f) + 60) #define EDID_RATIO(v) (((v) >> 6) & 0x3) #define EDID_RATIO_10x16 0 #define EDID_RATIO_3x4 1 #define EDID_RATIO_4x5 2 #define EDID_RATIO_9x16 3 #define TDA19988 0x0301 struct tda19988_softc { device_t sc_dev; uint32_t sc_addr; uint32_t sc_cec_addr; uint16_t sc_version; struct intr_config_hook enum_hook; int sc_current_page; uint8_t *sc_edid; uint32_t sc_edid_len; }; static int tda19988_set_page(struct tda19988_softc *sc, uint8_t page) { uint8_t addr = TDA_CURPAGE_ADDR; uint8_t cmd[2]; int result; struct iic_msg msg[] = { { sc->sc_addr, IIC_M_WR, 2, cmd }, }; cmd[0] = addr; cmd[1] = page; result = (iicbus_transfer(sc->sc_dev, msg, 1)); if (result) printf("tda19988_set_page failed: %d\n", result); else sc->sc_current_page = page; return (result); } static int tda19988_cec_read(struct tda19988_softc *sc, uint8_t addr, uint8_t *data) { int result; struct iic_msg msg[] = { { sc->sc_cec_addr, IIC_M_WR, 1, &addr }, { sc->sc_cec_addr, IIC_M_RD, 1, data }, }; result = iicbus_transfer(sc->sc_dev, msg, 2); if (result) printf("tda19988_cec_read failed: %d\n", result); return (result); } static int tda19988_cec_write(struct tda19988_softc *sc, uint8_t address, uint8_t data) { uint8_t cmd[2]; int result; struct iic_msg msg[] = { { sc->sc_cec_addr, IIC_M_WR, 2, cmd }, }; cmd[0] = address; cmd[1] = data; result = iicbus_transfer(sc->sc_dev, msg, 1); if (result) printf("tda19988_cec_write failed: %d\n", result); return (result); } static int tda19988_block_read(struct tda19988_softc *sc, uint16_t addr, uint8_t *data, int len) { uint8_t reg; int result; struct iic_msg msg[] = { { sc->sc_addr, IIC_M_WR, 1, ® }, { sc->sc_addr, IIC_M_RD, len, data }, }; reg = REGADDR(addr); if (sc->sc_current_page != REGPAGE(addr)) tda19988_set_page(sc, REGPAGE(addr)); result = (iicbus_transfer(sc->sc_dev, msg, 2)); if (result) device_printf(sc->sc_dev, "tda19988_block_read failed: %d\n", result); return (result); } static int tda19988_reg_read(struct tda19988_softc *sc, uint16_t addr, uint8_t *data) { uint8_t reg; int result; struct iic_msg msg[] = { { sc->sc_addr, IIC_M_WR, 1, ® }, { sc->sc_addr, IIC_M_RD, 1, data }, }; reg = REGADDR(addr); if (sc->sc_current_page != REGPAGE(addr)) tda19988_set_page(sc, REGPAGE(addr)); result = (iicbus_transfer(sc->sc_dev, msg, 2)); if (result) device_printf(sc->sc_dev, "tda19988_reg_read failed: %d\n", result); return (result); } static int tda19988_reg_write(struct tda19988_softc *sc, uint16_t address, uint8_t data) { uint8_t cmd[2]; int result; struct iic_msg msg[] = { { sc->sc_addr, IIC_M_WR, 2, cmd }, }; cmd[0] = REGADDR(address); cmd[1] = data; if (sc->sc_current_page != REGPAGE(address)) tda19988_set_page(sc, REGPAGE(address)); result = iicbus_transfer(sc->sc_dev, msg, 1); if (result) device_printf(sc->sc_dev, "tda19988_reg_write failed: %d\n", result); return (result); } static int tda19988_reg_write2(struct tda19988_softc *sc, uint16_t address, uint16_t data) { uint8_t cmd[3]; int result; struct iic_msg msg[] = { { sc->sc_addr, IIC_M_WR, 3, cmd }, }; cmd[0] = REGADDR(address); cmd[1] = (data >> 8); cmd[2] = (data & 0xff); if (sc->sc_current_page != REGPAGE(address)) tda19988_set_page(sc, REGPAGE(address)); result = iicbus_transfer(sc->sc_dev, msg, 1); if (result) device_printf(sc->sc_dev, "tda19988_reg_write2 failed: %d\n", result); return (result); } static void tda19988_reg_set(struct tda19988_softc *sc, uint16_t addr, uint8_t flags) { uint8_t data; tda19988_reg_read(sc, addr, &data); data |= flags; tda19988_reg_write(sc, addr, data); } static void tda19988_reg_clear(struct tda19988_softc *sc, uint16_t addr, uint8_t flags) { uint8_t data; tda19988_reg_read(sc, addr, &data); data &= ~flags; tda19988_reg_write(sc, addr, data); } static int tda19988_probe(device_t dev) { if (!ofw_bus_is_compatible(dev, "nxp,tda998x")) return (ENXIO); return (BUS_PROBE_DEFAULT); } static void tda19988_init_encoder(struct tda19988_softc *sc, const struct videomode *mode) { uint16_t ref_pix, ref_line, n_pix, n_line; uint16_t hs_pix_start, hs_pix_stop; uint16_t vs1_pix_start, vs1_pix_stop; uint16_t vs1_line_start, vs1_line_end; uint16_t vs2_pix_start, vs2_pix_stop; uint16_t vs2_line_start, vs2_line_end; uint16_t vwin1_line_start, vwin1_line_end; uint16_t vwin2_line_start, vwin2_line_end; uint16_t de_start, de_stop; uint8_t reg, div; n_pix = mode->htotal; n_line = mode->vtotal; hs_pix_stop = mode->hsync_end - mode->hdisplay; hs_pix_start = mode->hsync_start - mode->hdisplay; de_stop = mode->htotal; de_start = mode->htotal - mode->hdisplay; ref_pix = hs_pix_start + 3; if (mode->flags & VID_HSKEW) ref_pix += mode->hskew; if ((mode->flags & VID_INTERLACE) == 0) { ref_line = 1 + mode->vsync_start - mode->vdisplay; vwin1_line_start = mode->vtotal - mode->vdisplay - 1; vwin1_line_end = vwin1_line_start + mode->vdisplay; vs1_pix_start = vs1_pix_stop = hs_pix_start; vs1_line_start = mode->vsync_start - mode->vdisplay; vs1_line_end = vs1_line_start + mode->vsync_end - mode->vsync_start; vwin2_line_start = vwin2_line_end = 0; vs2_pix_start = vs2_pix_stop = 0; vs2_line_start = vs2_line_end = 0; } else { ref_line = 1 + (mode->vsync_start - mode->vdisplay)/2; vwin1_line_start = (mode->vtotal - mode->vdisplay)/2; vwin1_line_end = vwin1_line_start + mode->vdisplay/2; vs1_pix_start = vs1_pix_stop = hs_pix_start; vs1_line_start = (mode->vsync_start - mode->vdisplay)/2; vs1_line_end = vs1_line_start + (mode->vsync_end - mode->vsync_start)/2; vwin2_line_start = vwin1_line_start + mode->vtotal/2; vwin2_line_end = vwin2_line_start + mode->vdisplay/2; vs2_pix_start = vs2_pix_stop = hs_pix_start + mode->htotal/2; vs2_line_start = vs1_line_start + mode->vtotal/2 ; vs2_line_end = vs2_line_start + (mode->vsync_end - mode->vsync_start)/2; } div = 148500 / mode->dot_clock; if (div != 0) { div--; if (div > 3) div = 3; } /* set HDMI HDCP mode off */ tda19988_reg_set(sc, TDA_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS); tda19988_reg_clear(sc, TDA_HDCP_TX33, HDCP_TX33_HDMI); tda19988_reg_write(sc, TDA_ENC_CNTRL, ENC_CNTRL_DVI_MODE); /* no pre-filter or interpolator */ tda19988_reg_write(sc, TDA_HVF_CNTRL_0, HVF_CNTRL_0_INTPOL_BYPASS | HVF_CNTRL_0_PREFIL_NONE); tda19988_reg_write(sc, TDA_VIP_CNTRL_5, VIP_CNTRL_5_SP_CNT(0)); tda19988_reg_write(sc, TDA_VIP_CNTRL_4, VIP_CNTRL_4_BLANKIT_NDE | VIP_CNTRL_4_BLC_NONE); tda19988_reg_clear(sc, TDA_PLL_SERIAL_3, PLL_SERIAL_3_SRL_CCIR); tda19988_reg_clear(sc, TDA_PLL_SERIAL_1, PLL_SERIAL_1_SRL_MAN_IP); tda19988_reg_clear(sc, TDA_PLL_SERIAL_3, PLL_SERIAL_3_SRL_DE); tda19988_reg_write(sc, TDA_SERIALIZER, 0); tda19988_reg_write(sc, TDA_HVF_CNTRL_1, HVF_CNTRL_1_VQR_FULL); tda19988_reg_write(sc, TDA_RPT_CNTRL, 0); tda19988_reg_write(sc, TDA_SEL_CLK, SEL_CLK_SEL_VRF_CLK(0) | SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK); tda19988_reg_write(sc, TDA_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(div) | PLL_SERIAL_2_SRL_PR(0)); tda19988_reg_set(sc, TDA_MAT_CONTRL, MAT_CONTRL_MAT_BP); tda19988_reg_write(sc, TDA_ANA_GENERAL, 0x09); tda19988_reg_clear(sc, TDA_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_MTHD); /* * Sync on rising HSYNC/VSYNC */ reg = VIP_CNTRL_3_SYNC_HS; if (mode->flags & VID_NHSYNC) reg |= VIP_CNTRL_3_H_TGL; if (mode->flags & VID_NVSYNC) reg |= VIP_CNTRL_3_V_TGL; tda19988_reg_write(sc, TDA_VIP_CNTRL_3, reg); reg = TBG_CNTRL_1_TGL_EN; if (mode->flags & VID_NHSYNC) reg |= TBG_CNTRL_1_H_TGL; if (mode->flags & VID_NVSYNC) reg |= TBG_CNTRL_1_V_TGL; tda19988_reg_write(sc, TDA_TBG_CNTRL_1, reg); /* Program timing */ tda19988_reg_write(sc, TDA_VIDFORMAT, 0x00); tda19988_reg_write2(sc, TDA_REFPIX_MSB, ref_pix); tda19988_reg_write2(sc, TDA_REFLINE_MSB, ref_line); tda19988_reg_write2(sc, TDA_NPIX_MSB, n_pix); tda19988_reg_write2(sc, TDA_NLINE_MSB, n_line); tda19988_reg_write2(sc, TDA_VS_LINE_STRT_1_MSB, vs1_line_start); tda19988_reg_write2(sc, TDA_VS_PIX_STRT_1_MSB, vs1_pix_start); tda19988_reg_write2(sc, TDA_VS_LINE_END_1_MSB, vs1_line_end); tda19988_reg_write2(sc, TDA_VS_PIX_END_1_MSB, vs1_pix_stop); tda19988_reg_write2(sc, TDA_VS_LINE_STRT_2_MSB, vs2_line_start); tda19988_reg_write2(sc, TDA_VS_PIX_STRT_2_MSB, vs2_pix_start); tda19988_reg_write2(sc, TDA_VS_LINE_END_2_MSB, vs2_line_end); tda19988_reg_write2(sc, TDA_VS_PIX_END_2_MSB, vs2_pix_stop); tda19988_reg_write2(sc, TDA_HS_PIX_START_MSB, hs_pix_start); tda19988_reg_write2(sc, TDA_HS_PIX_STOP_MSB, hs_pix_stop); tda19988_reg_write2(sc, TDA_VWIN_START_1_MSB, vwin1_line_start); tda19988_reg_write2(sc, TDA_VWIN_END_1_MSB, vwin1_line_end); tda19988_reg_write2(sc, TDA_VWIN_START_2_MSB, vwin2_line_start); tda19988_reg_write2(sc, TDA_VWIN_END_2_MSB, vwin2_line_end); tda19988_reg_write2(sc, TDA_DE_START_MSB, de_start); tda19988_reg_write2(sc, TDA_DE_STOP_MSB, de_stop); if (sc->sc_version == TDA19988) tda19988_reg_write(sc, TDA_ENABLE_SPACE, 0x00); /* must be last register set */ tda19988_reg_clear(sc, TDA_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_ONCE); } static int tda19988_read_edid_block(struct tda19988_softc *sc, uint8_t *buf, int block) { int attempt, err; uint8_t data; err = 0; tda19988_reg_set(sc, TDA_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD); /* Block 0 */ tda19988_reg_write(sc, TDA_DDC_ADDR, 0xa0); tda19988_reg_write(sc, TDA_DDC_OFFS, (block % 2) ? 128 : 0); tda19988_reg_write(sc, TDA_DDC_SEGM_ADDR, 0x60); tda19988_reg_write(sc, TDA_DDC_SEGM, block / 2); tda19988_reg_write(sc, TDA_EDID_CTRL, 1); tda19988_reg_write(sc, TDA_EDID_CTRL, 0); data = 0; for (attempt = 0; attempt < MAX_READ_ATTEMPTS; attempt++) { tda19988_reg_read(sc, TDA_INT_FLAGS_2, &data); if (data & INT_FLAGS_2_EDID_BLK_RD) break; pause("EDID", 1); } if (attempt == MAX_READ_ATTEMPTS) { err = -1; goto done; } if (tda19988_block_read(sc, TDA_EDID_DATA0, buf, EDID_LENGTH) != 0) { err = -1; goto done; } done: tda19988_reg_clear(sc, TDA_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD); return (err); } static int tda19988_read_edid(struct tda19988_softc *sc) { int err; int blocks, i; uint8_t *buf; err = 0; if (sc->sc_version == TDA19988) tda19988_reg_clear(sc, TDA_TX4, TX4_PD_RAM); err = tda19988_read_edid_block(sc, sc->sc_edid, 0); if (err) goto done; blocks = sc->sc_edid[0x7e]; if (blocks > 0) { sc->sc_edid = realloc(sc->sc_edid, EDID_LENGTH*(blocks+1), M_DEVBUF, M_WAITOK); sc->sc_edid_len = EDID_LENGTH*(blocks+1); for (i = 0; i < blocks; i++) { /* TODO: check validity */ buf = sc->sc_edid + EDID_LENGTH*(i+1); err = tda19988_read_edid_block(sc, buf, i); if (err) goto done; } } - EVENTHANDLER_INVOKE(hdmi_event, 0); + EVENTHANDLER_INVOKE(hdmi_event, sc->sc_dev, HDMI_EVENT_CONNECTED); done: if (sc->sc_version == TDA19988) tda19988_reg_set(sc, TDA_TX4, TX4_PD_RAM); return (err); } static void tda19988_start(void *xdev) { struct tda19988_softc *sc; device_t dev = (device_t)xdev; uint8_t data; uint16_t version; sc = device_get_softc(dev); tda19988_cec_write(sc, TDA_CEC_ENAMODS, ENAMODS_RXSENS | ENAMODS_HDMI); DELAY(1000); tda19988_cec_read(sc, 0xfe, &data); /* Reset core */ tda19988_reg_set(sc, TDA_SOFTRESET, 3); DELAY(100); tda19988_reg_clear(sc, TDA_SOFTRESET, 3); DELAY(100); /* reset transmitter: */ tda19988_reg_set(sc, TDA_MAIN_CNTRL0, MAIN_CNTRL0_SR); tda19988_reg_clear(sc, TDA_MAIN_CNTRL0, MAIN_CNTRL0_SR); /* PLL registers common configuration */ tda19988_reg_write(sc, TDA_PLL_SERIAL_1, 0x00); tda19988_reg_write(sc, TDA_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(1)); tda19988_reg_write(sc, TDA_PLL_SERIAL_3, 0x00); tda19988_reg_write(sc, TDA_SERIALIZER, 0x00); tda19988_reg_write(sc, TDA_BUFFER_OUT, 0x00); tda19988_reg_write(sc, TDA_PLL_SCG1, 0x00); tda19988_reg_write(sc, TDA_SEL_CLK, SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK); tda19988_reg_write(sc, TDA_PLL_SCGN1, 0xfa); tda19988_reg_write(sc, TDA_PLL_SCGN2, 0x00); tda19988_reg_write(sc, TDA_PLL_SCGR1, 0x5b); tda19988_reg_write(sc, TDA_PLL_SCGR2, 0x00); tda19988_reg_write(sc, TDA_PLL_SCG2, 0x10); /* Write the default value MUX register */ tda19988_reg_write(sc, TDA_MUX_VP_VIP_OUT, 0x24); version = 0; tda19988_reg_read(sc, TDA_VERSION, &data); version |= data; tda19988_reg_read(sc, TDA_VERSION_MSB, &data); version |= (data << 8); /* Clear feature bits */ sc->sc_version = version & ~0x30; switch (sc->sc_version) { case TDA19988: device_printf(dev, "TDA19988\n"); break; default: device_printf(dev, "Unknown device: %04x\n", sc->sc_version); goto done; } tda19988_reg_write(sc, TDA_DDC_CTRL, DDC_ENABLE); tda19988_reg_write(sc, TDA_TX3, 39); tda19988_cec_write(sc, TDA_CEC_FRO_IM_CLK_CTRL, CEC_FRO_IM_CLK_CTRL_GHOST_DIS | CEC_FRO_IM_CLK_CTRL_IMCLK_SEL); if (tda19988_read_edid(sc) < 0) { device_printf(dev, "failed to read EDID\n"); goto done; } /* Default values for RGB 4:4:4 mapping */ tda19988_reg_write(sc, TDA_VIP_CNTRL_0, 0x23); tda19988_reg_write(sc, TDA_VIP_CNTRL_1, 0x01); tda19988_reg_write(sc, TDA_VIP_CNTRL_2, 0x45); done: config_intrhook_disestablish(&sc->enum_hook); } static int tda19988_attach(device_t dev) { struct tda19988_softc *sc; phandle_t node; sc = device_get_softc(dev); sc->sc_dev = dev; sc->sc_addr = iicbus_get_addr(dev); sc->sc_cec_addr = (0x34 << 1); /* hardcoded */ sc->sc_edid = malloc(EDID_LENGTH, M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_edid_len = EDID_LENGTH; device_set_desc(dev, "NXP TDA19988 HDMI transmitter"); sc->enum_hook.ich_func = tda19988_start; sc->enum_hook.ich_arg = dev; if (config_intrhook_establish(&sc->enum_hook) != 0) return (ENOMEM); node = ofw_bus_get_node(dev); OF_device_register_xref(OF_xref_from_node(node), dev); return (0); } static int tda19988_detach(device_t dev) { /* XXX: Do not let unload drive */ return (EBUSY); } static int tda19988_get_edid(device_t dev, uint8_t **edid, uint32_t *edid_len) { struct tda19988_softc *sc; sc = device_get_softc(dev); if (sc->sc_edid) { *edid = sc->sc_edid; *edid_len = sc->sc_edid_len; } else return (ENXIO); return (0); } static int tda19988_set_videomode(device_t dev, const struct videomode *mode) { struct tda19988_softc *sc; sc = device_get_softc(dev); tda19988_init_encoder(sc, mode); return (0); } static device_method_t tda_methods[] = { DEVMETHOD(device_probe, tda19988_probe), DEVMETHOD(device_attach, tda19988_attach), DEVMETHOD(device_detach, tda19988_detach), /* HDMI methods */ DEVMETHOD(hdmi_get_edid, tda19988_get_edid), DEVMETHOD(hdmi_set_videomode, tda19988_set_videomode), {0, 0}, }; static driver_t tda_driver = { "tda", tda_methods, sizeof(struct tda19988_softc), }; static devclass_t tda_devclass; DRIVER_MODULE(tda, iicbus, tda_driver, tda_devclass, 0, 0); MODULE_VERSION(tda, 1); MODULE_DEPEND(tda, iicbus, 1, 1, 1); Index: projects/powernv/conf/kern.pre.mk =================================================================== --- projects/powernv/conf/kern.pre.mk (revision 290990) +++ projects/powernv/conf/kern.pre.mk (revision 290991) @@ -1,261 +1,261 @@ # $FreeBSD$ # Part of a unified Makefile for building kernels. This part contains all # of the definitions that need to be before %BEFORE_DEPEND. # Allow user to configure things that only effect src tree builds. # Note: This is duplicated from src.sys.mk to ensure that we include # /etc/src.conf when building the kernel. Kernels can be built without # the rest of /usr/src, but they still always process SRCCONF even though # the normal mechanisms to prevent that (compiling out of tree) won't # work. To ensure they do work, we have to duplicate thee few lines here. SRCCONF?= /etc/src.conf .if (exists(${SRCCONF}) || ${SRCCONF} != "/etc/src.conf") && !target(_srcconf_included_) .include "${SRCCONF}" _srcconf_included_: .endif .include .include .include "kern.opts.mk" # Can be overridden by makeoptions or /etc/make.conf KERNEL_KO?= kernel KERNEL?= kernel KODIR?= /boot/${KERNEL} LDSCRIPT_NAME?= ldscript.$M LDSCRIPT?= $S/conf/${LDSCRIPT_NAME} M= ${MACHINE} AWK?= awk CP?= cp LINT?= lint NM?= nm OBJCOPY?= objcopy SIZE?= size .if defined(DEBUG) _MINUS_O= -O CTFFLAGS+= -g .else .if ${MACHINE_CPUARCH} == "powerpc" _MINUS_O= -O # gcc miscompiles some code at -O2 .else _MINUS_O= -O2 .endif .endif .if ${MACHINE_CPUARCH} == "amd64" .if ${COMPILER_TYPE} == "clang" COPTFLAGS?=-O2 -pipe .else COPTFLAGS?=-O2 -frename-registers -pipe .endif .else COPTFLAGS?=${_MINUS_O} -pipe .endif .if !empty(COPTFLAGS:M-O[23s]) && empty(COPTFLAGS:M-fno-strict-aliasing) COPTFLAGS+= -fno-strict-aliasing .endif .if !defined(NO_CPU_COPTFLAGS) COPTFLAGS+= ${_CPUCFLAGS} .endif NOSTDINC= -nostdinc INCLUDES= ${NOSTDINC} ${INCLMAGIC} -I. -I$S .if make(depend) || make(kernel-depend) # This hack lets us use the ipfilter code without spamming a new # include path into contrib'ed source files. INCLUDES+= -I$S/contrib/ipfilter # ... and the same for ath INCLUDES+= -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal # ... and the same for the NgATM stuff INCLUDES+= -I$S/contrib/ngatm # ... and the same for vchiq INCLUDES+= -I$S/contrib/vchiq # ... and the same for twa INCLUDES+= -I$S/dev/twa # ... and the same for cxgb and cxgbe INCLUDES+= -I$S/dev/cxgb -I$S/dev/cxgbe .endif CFLAGS= ${COPTFLAGS} ${DEBUG} CFLAGS+= ${INCLUDES} -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h CFLAGS_PARAM_INLINE_UNIT_GROWTH?=100 CFLAGS_PARAM_LARGE_FUNCTION_GROWTH?=1000 .if ${MACHINE_CPUARCH} == "mips" CFLAGS_ARCH_PARAMS?=--param max-inline-insns-single=1000 .endif CFLAGS.gcc+= -fno-common -fms-extensions -finline-limit=${INLINE_LIMIT} CFLAGS.gcc+= --param inline-unit-growth=${CFLAGS_PARAM_INLINE_UNIT_GROWTH} CFLAGS.gcc+= --param large-function-growth=${CFLAGS_PARAM_LARGE_FUNCTION_GROWTH} .if defined(CFLAGS_ARCH_PARAMS) CFLAGS.gcc+=${CFLAGS_ARCH_PARAMS} .endif WERROR?= -Werror # XXX LOCORE means "don't declare C stuff" not "for locore.s". ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${CFLAGS} ${ASM_CFLAGS.${.IMPSRC:T}} .if defined(PROFLEVEL) && ${PROFLEVEL} >= 1 CFLAGS+= -DGPROF CFLAGS.gcc+= -falign-functions=16 .if ${PROFLEVEL} >= 2 CFLAGS+= -DGPROF4 -DGUPROF PROF= -pg .if ${COMPILER_TYPE} == "gcc" PROF+= -mprofiler-epilogue .endif .else PROF= -pg .endif .endif DEFINED_PROF= ${PROF} # Put configuration-specific C flags last (except for ${PROF}) so that they # can override the others. CFLAGS+= ${CONF_CFLAGS} # Optional linting. This can be overridden in /etc/make.conf. LINTFLAGS= ${LINTOBJKERNFLAGS} NORMAL_C= ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} NORMAL_S= ${CC:N${CCACHE_BIN}} -c ${ASM_CFLAGS} ${WERROR} ${.IMPSRC} PROFILE_C= ${CC} -c ${CFLAGS} ${WERROR} ${.IMPSRC} NORMAL_C_NOWERROR= ${CC} -c ${CFLAGS} ${PROF} ${.IMPSRC} NORMAL_M= ${AWK} -f $S/tools/makeobjops.awk ${.IMPSRC} -c ; \ ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.PREFIX}.c NORMAL_FW= uudecode -o ${.TARGET} ${.ALLSRC} NORMAL_FWO= ${LD} -b binary --no-warn-mismatch -d -warn-common -r \ -o ${.TARGET} ${.ALLSRC:M*.fw} # Common for dtrace / zfs CDDL_CFLAGS= -DFREEBSD_NAMECACHE -nostdinc -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common -I$S -I$S/cddl/contrib/opensolaris/common ${CFLAGS} -Wno-unknown-pragmas -Wno-missing-prototypes -Wno-undef -Wno-strict-prototypes -Wno-cast-qual -Wno-parentheses -Wno-redundant-decls -Wno-missing-braces -Wno-uninitialized -Wno-unused -Wno-inline -Wno-switch -Wno-pointer-arith -Wno-unknown-pragmas CDDL_CFLAGS+= -include $S/cddl/compat/opensolaris/sys/debug_compat.h CDDL_C= ${CC} -c ${CDDL_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} # Special flags for managing the compat compiles for ZFS ZFS_CFLAGS= -DBUILDING_ZFS -I$S/cddl/contrib/opensolaris/uts/common/fs/zfs -I$S/cddl/contrib/opensolaris/uts/common/zmod -I$S/cddl/contrib/opensolaris/common/zfs ${CDDL_CFLAGS} ZFS_ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${ZFS_CFLAGS} ZFS_C= ${CC} -c ${ZFS_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} ZFS_S= ${CC} -c ${ZFS_ASM_CFLAGS} ${WERROR} ${.IMPSRC} # Special flags for managing the compat compiles for DTrace DTRACE_CFLAGS= -DBUILDING_DTRACE ${CDDL_CFLAGS} -I$S/cddl/dev/dtrace -I$S/cddl/dev/dtrace/${MACHINE_CPUARCH} .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" DTRACE_CFLAGS+= -I$S/cddl/contrib/opensolaris/uts/intel -I$S/cddl/dev/dtrace/x86 .endif DTRACE_CFLAGS+= -I$S/cddl/contrib/opensolaris/common/util -I$S -DDIS_MEM -DSMP DTRACE_ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${DTRACE_CFLAGS} -DTRACE_C= ${CC} -c ${DTRACE_CFLAGS} ${CDDL_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} -DTRACE_S= ${CC} -c ${DTRACE_ASM_CFLAGS} ${CDDL_CFLAGS} ${WERROR} ${.IMPSRC} +DTRACE_C= ${CC} -c ${DTRACE_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} +DTRACE_S= ${CC} -c ${DTRACE_ASM_CFLAGS} ${WERROR} ${.IMPSRC} # Special flags for managing the compat compiles for DTrace/FBT FBT_CFLAGS= -DBUILDING_DTRACE -nostdinc -I$S/cddl/dev/fbt/${MACHINE_CPUARCH} -I$S/cddl/dev/fbt -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common -I$S ${CDDL_CFLAGS} .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" FBT_CFLAGS+= -I$S/cddl/dev/fbt/x86 .endif -FBT_C= ${CC} -c ${FBT_CFLAGS} ${CDDL_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} +FBT_C= ${CC} -c ${FBT_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC} .if ${MK_CTF} != "no" NORMAL_CTFCONVERT= ${CTFCONVERT} ${CTFFLAGS} ${.TARGET} .elif ${MAKE_VERSION} >= 5201111300 NORMAL_CTFCONVERT= .else NORMAL_CTFCONVERT= @: .endif NORMAL_LINT= ${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC} # Linux Kernel Programming Interface C-flags LINUXKPI_INCLUDES= -I$S/compat/linuxkpi/common/include LINUXKPI_C= ${NORMAL_C} ${LINUXKPI_INCLUDES} # Infiniband C flags. Correct include paths and omit errors that linux # does not honor. OFEDINCLUDES= -I$S/ofed/include ${LINUXKPI_INCLUDES} OFEDNOERR= -Wno-cast-qual -Wno-pointer-arith OFEDCFLAGS= ${CFLAGS:N-I*} ${OFEDINCLUDES} ${CFLAGS:M-I*} ${OFEDNOERR} OFED_C_NOIMP= ${CC} -c -o ${.TARGET} ${OFEDCFLAGS} ${WERROR} ${PROF} OFED_C= ${OFED_C_NOIMP} ${.IMPSRC} GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/} SYSTEM_CFILES= config.c env.c hints.c vnode_if.c SYSTEM_DEP= Makefile ${SYSTEM_OBJS} SYSTEM_OBJS= locore.o ${MDOBJS} ${OBJS} SYSTEM_OBJS+= ${SYSTEM_CFILES:.c=.o} SYSTEM_OBJS+= hack.So .if ${MFS_IMAGE:Uno} != "no" SYSTEM_OBJS+= embedfs_${MFS_IMAGE:T:R}.o .endif SYSTEM_LD= @${LD} -Bdynamic -T ${LDSCRIPT} ${_LDFLAGS} --no-warn-mismatch \ --warn-common --export-dynamic --dynamic-linker /red/herring \ -o ${.TARGET} -X ${SYSTEM_OBJS} vers.o SYSTEM_LD_TAIL= @${OBJCOPY} --strip-symbol gcc2_compiled. ${.TARGET} ; \ ${SIZE} ${.TARGET} ; chmod 755 ${.TARGET} SYSTEM_DEP+= ${LDSCRIPT} # Calculate path for .m files early, if needed. .if !defined(_MPATH) __MPATH!=find ${S:tA}/ -name \*_if.m _MPATH=${__MPATH:H:O:u} .endif # MKMODULESENV is set here so that port makefiles can augment # them. MKMODULESENV+= MAKEOBJDIRPREFIX=${.OBJDIR}/modules KMODDIR=${KODIR} MKMODULESENV+= MACHINE_CPUARCH=${MACHINE_CPUARCH} MKMODULESENV+= MACHINE=${MACHINE} MACHINE_ARCH=${MACHINE_ARCH} MKMODULESENV+= MODULES_EXTRA="${MODULES_EXTRA}" WITHOUT_MODULES="${WITHOUT_MODULES}" .if (${KERN_IDENT} == LINT) MKMODULESENV+= ALL_MODULES=LINT .endif .if defined(MODULES_OVERRIDE) MKMODULESENV+= MODULES_OVERRIDE="${MODULES_OVERRIDE}" .endif .if defined(DEBUG) MKMODULESENV+= DEBUG_FLAGS="${DEBUG}" .endif MKMODULESENV+= _MPATH="${_MPATH}" # Architecture and output format arguments for objdump to convert image to # object file .if ${MFS_IMAGE:Uno} != "no" .if !defined(EMBEDFS_FORMAT.${MACHINE_ARCH}) EMBEDFS_FORMAT.${MACHINE_ARCH}!= awk -F'"' '/OUTPUT_FORMAT/ {print $$2}' ${LDSCRIPT} .if empty(EMBEDFS_FORMAT.${MACHINE_ARCH}) .undef EMBEDFS_FORMAT.${MACHINE_ARCH} .endif .endif .if !defined(EMBEDFS_ARCH.${MACHINE_ARCH}) EMBEDFS_ARCH.${MACHINE_ARCH}!= sed -n '/OUTPUT_ARCH/s/.*(\(.*\)).*/\1/p' ${LDSCRIPT} .if empty(EMBEDFS_ARCH.${MACHINE_ARCH}) .undef EMBEDFS_ARCH.${MACHINE_ARCH} .endif .endif EMBEDFS_FORMAT.arm?= elf32-littlearm EMBEDFS_FORMAT.armv6?= elf32-littlearm EMBEDFS_FORMAT.mips?= elf32-tradbigmips EMBEDFS_FORMAT.mipsel?= elf32-tradlittlemips EMBEDFS_FORMAT.mips64?= elf64-tradbigmips EMBEDFS_FORMAT.mips64el?= elf64-tradlittlemips .endif # Detect kernel config options that force stack frames to be turned on. DDB_ENABLED!= grep DDB opt_ddb.h || true ; echo DTR_ENABLED!= grep KDTRACE_FRAME opt_kdtrace.h || true ; echo HWPMC_ENABLED!= grep HWPMC opt_hwpmc_hooks.h || true ; echo Index: projects/powernv/conf =================================================================== --- projects/powernv/conf (revision 290990) +++ projects/powernv/conf (revision 290991) Property changes on: projects/powernv/conf ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/conf:r290829-290990 Index: projects/powernv/dev/ata/chipsets/ata-intel.c =================================================================== --- projects/powernv/dev/ata/chipsets/ata-intel.c (revision 290990) +++ projects/powernv/dev/ata/chipsets/ata-intel.c (revision 290991) @@ -1,926 +1,926 @@ /*- * Copyright (c) 1998 - 2008 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* local prototypes */ static int ata_intel_chipinit(device_t dev); static int ata_intel_chipdeinit(device_t dev); static int ata_intel_ch_attach(device_t dev); static void ata_intel_reset(device_t dev); static int ata_intel_old_setmode(device_t dev, int target, int mode); static int ata_intel_new_setmode(device_t dev, int target, int mode); static int ata_intel_sch_setmode(device_t dev, int target, int mode); static int ata_intel_sata_getrev(device_t dev, int target); static int ata_intel_sata_status(device_t dev); static int ata_intel_sata_ahci_read(device_t dev, int port, int reg, u_int32_t *result); static int ata_intel_sata_cscr_read(device_t dev, int port, int reg, u_int32_t *result); static int ata_intel_sata_sidpr_read(device_t dev, int port, int reg, u_int32_t *result); static int ata_intel_sata_ahci_write(device_t dev, int port, int reg, u_int32_t result); static int ata_intel_sata_cscr_write(device_t dev, int port, int reg, u_int32_t result); static int ata_intel_sata_sidpr_write(device_t dev, int port, int reg, u_int32_t result); static int ata_intel_sata_sidpr_test(device_t dev); static int ata_intel_31244_ch_attach(device_t dev); static int ata_intel_31244_ch_detach(device_t dev); static int ata_intel_31244_status(device_t dev); static void ata_intel_31244_tf_write(struct ata_request *request); static void ata_intel_31244_reset(device_t dev); /* misc defines */ #define INTEL_ICH5 2 #define INTEL_6CH 4 #define INTEL_6CH2 8 #define INTEL_ICH7 16 struct ata_intel_data { struct mtx lock; u_char smap[4]; }; #define ATA_INTEL_SMAP(ctlr, ch) \ &((struct ata_intel_data *)((ctlr)->chipset_data))->smap[(ch)->unit * 2] #define ATA_INTEL_LOCK(ctlr) \ mtx_lock(&((struct ata_intel_data *)((ctlr)->chipset_data))->lock) #define ATA_INTEL_UNLOCK(ctlr) \ mtx_unlock(&((struct ata_intel_data *)((ctlr)->chipset_data))->lock) /* * Intel chipset support functions */ static int ata_intel_probe(device_t dev) { struct ata_pci_controller *ctlr = device_get_softc(dev); static const struct ata_chip_id ids[] = {{ ATA_I82371FB, 0, 0, 2, ATA_WDMA2, "PIIX" }, { ATA_I82371SB, 0, 0, 2, ATA_WDMA2, "PIIX3" }, { ATA_I82371AB, 0, 0, 2, ATA_UDMA2, "PIIX4" }, { ATA_I82443MX, 0, 0, 2, ATA_UDMA2, "PIIX4" }, { ATA_I82451NX, 0, 0, 2, ATA_UDMA2, "PIIX4" }, { ATA_I82801AB, 0, 0, 2, ATA_UDMA2, "ICH0" }, { ATA_I82801AA, 0, 0, 2, ATA_UDMA4, "ICH" }, { ATA_I82372FB, 0, 0, 2, ATA_UDMA4, "ICH" }, { ATA_I82801BA, 0, 0, 2, ATA_UDMA5, "ICH2" }, { ATA_I82801BA_1, 0, 0, 2, ATA_UDMA5, "ICH2" }, { ATA_I82801CA, 0, 0, 2, ATA_UDMA5, "ICH3" }, { ATA_I82801CA_1, 0, 0, 2, ATA_UDMA5, "ICH3" }, { ATA_I82801DB, 0, 0, 2, ATA_UDMA5, "ICH4" }, { ATA_I82801DB_1, 0, 0, 2, ATA_UDMA5, "ICH4" }, { ATA_I82801EB, 0, 0, 2, ATA_UDMA5, "ICH5" }, { ATA_I82801EB_S1, 0, INTEL_ICH5, 2, ATA_SA150, "ICH5" }, { ATA_I82801EB_R1, 0, INTEL_ICH5, 2, ATA_SA150, "ICH5" }, { ATA_I6300ESB, 0, 0, 2, ATA_UDMA5, "6300ESB" }, { ATA_I6300ESB_S1, 0, INTEL_ICH5, 2, ATA_SA150, "6300ESB" }, { ATA_I6300ESB_R1, 0, INTEL_ICH5, 2, ATA_SA150, "6300ESB" }, { ATA_I82801FB, 0, 0, 2, ATA_UDMA5, "ICH6" }, { ATA_I82801FB_S1, 0, 0, 0, ATA_SA150, "ICH6" }, { ATA_I82801FB_R1, 0, 0, 0, ATA_SA150, "ICH6" }, { ATA_I82801FBM, 0, 0, 0, ATA_SA150, "ICH6M" }, { ATA_I82801GB, 0, 0, 1, ATA_UDMA5, "ICH7" }, { ATA_I82801GB_S1, 0, INTEL_ICH7, 0, ATA_SA300, "ICH7" }, { ATA_I82801GBM_S1, 0, INTEL_ICH7, 0, ATA_SA150, "ICH7M" }, { ATA_I63XXESB2, 0, 0, 1, ATA_UDMA5, "63XXESB2" }, { ATA_I63XXESB2_S1, 0, 0, 0, ATA_SA300, "63XXESB2" }, { ATA_I82801HB_S1, 0, INTEL_6CH, 0, ATA_SA300, "ICH8" }, { ATA_I82801HB_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH8" }, { ATA_I82801HBM, 0, 0, 1, ATA_UDMA5, "ICH8M" }, { ATA_I82801HBM_S1, 0, INTEL_6CH, 0, ATA_SA300, "ICH8M" }, { ATA_I82801IB_S1, 0, INTEL_6CH, 0, ATA_SA300, "ICH9" }, { ATA_I82801IB_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH9" }, { ATA_I82801IB_S3, 0, INTEL_6CH2, 0, ATA_SA300, "ICH9" }, { ATA_I82801IBM_S1, 0, INTEL_6CH2, 0, ATA_SA300, "ICH9M" }, { ATA_I82801IBM_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH9M" }, { ATA_I82801JIB_S1, 0, INTEL_6CH, 0, ATA_SA300, "ICH10" }, { ATA_I82801JIB_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH10" }, { ATA_I82801JD_S1, 0, INTEL_6CH, 0, ATA_SA300, "ICH10" }, { ATA_I82801JD_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH10" }, { ATA_I82801JI_S1, 0, INTEL_6CH, 0, ATA_SA300, "ICH10" }, { ATA_I82801JI_S2, 0, INTEL_6CH2, 0, ATA_SA300, "ICH10" }, { ATA_5Series_S1, 0, INTEL_6CH, 0, ATA_SA300, "5 Series/3400 Series PCH" }, { ATA_5Series_S2, 0, INTEL_6CH2, 0, ATA_SA300, "5 Series/3400 Series PCH" }, { ATA_5Series_S3, 0, INTEL_6CH2, 0, ATA_SA300, "5 Series/3400 Series PCH" }, { ATA_5Series_S4, 0, INTEL_6CH, 0, ATA_SA300, "5 Series/3400 Series PCH" }, { ATA_5Series_S5, 0, INTEL_6CH2, 0, ATA_SA300, "5 Series/3400 Series PCH" }, { ATA_5Series_S6, 0, INTEL_6CH, 0, ATA_SA300, "5 Series/3400 Series PCH" }, { ATA_CPT_S1, 0, INTEL_6CH, 0, ATA_SA600, "Cougar Point" }, { ATA_CPT_S2, 0, INTEL_6CH, 0, ATA_SA600, "Cougar Point" }, { ATA_CPT_S3, 0, INTEL_6CH2, 0, ATA_SA300, "Cougar Point" }, { ATA_CPT_S4, 0, INTEL_6CH2, 0, ATA_SA300, "Cougar Point" }, { ATA_PBG_S1, 0, INTEL_6CH, 0, ATA_SA600, "Patsburg" }, { ATA_PBG_S2, 0, INTEL_6CH2, 0, ATA_SA300, "Patsburg" }, { ATA_PPT_S1, 0, INTEL_6CH, 0, ATA_SA600, "Panther Point" }, { ATA_PPT_S2, 0, INTEL_6CH, 0, ATA_SA600, "Panther Point" }, { ATA_PPT_S3, 0, INTEL_6CH2, 0, ATA_SA300, "Panther Point" }, { ATA_PPT_S4, 0, INTEL_6CH2, 0, ATA_SA300, "Panther Point" }, { ATA_AVOTON_S1, 0, INTEL_6CH, 0, ATA_SA600, "Avoton" }, { ATA_AVOTON_S2, 0, INTEL_6CH, 0, ATA_SA600, "Avoton" }, { ATA_AVOTON_S3, 0, INTEL_6CH2, 0, ATA_SA300, "Avoton" }, { ATA_AVOTON_S4, 0, INTEL_6CH2, 0, ATA_SA300, "Avoton" }, { ATA_LPT_S1, 0, INTEL_6CH, 0, ATA_SA600, "Lynx Point" }, { ATA_LPT_S2, 0, INTEL_6CH, 0, ATA_SA600, "Lynx Point" }, { ATA_LPT_S3, 0, INTEL_6CH2, 0, ATA_SA600, "Lynx Point" }, { ATA_LPT_S4, 0, INTEL_6CH2, 0, ATA_SA600, "Lynx Point" }, { ATA_WCPT_S1, 0, INTEL_6CH, 0, ATA_SA600, "Wildcat Point" }, { ATA_WCPT_S2, 0, INTEL_6CH, 0, ATA_SA600, "Wildcat Point" }, { ATA_WCPT_S3, 0, INTEL_6CH2, 0, ATA_SA600, "Wildcat Point" }, { ATA_WCPT_S4, 0, INTEL_6CH2, 0, ATA_SA600, "Wildcat Point" }, { ATA_WELLS_S1, 0, INTEL_6CH, 0, ATA_SA600, "Wellsburg" }, { ATA_WELLS_S2, 0, INTEL_6CH2, 0, ATA_SA600, "Wellsburg" }, { ATA_WELLS_S3, 0, INTEL_6CH, 0, ATA_SA600, "Wellsburg" }, { ATA_WELLS_S4, 0, INTEL_6CH2, 0, ATA_SA600, "Wellsburg" }, { ATA_LPTLP_S1, 0, INTEL_6CH, 0, ATA_SA600, "Lynx Point-LP" }, { ATA_LPTLP_S2, 0, INTEL_6CH, 0, ATA_SA600, "Lynx Point-LP" }, { ATA_LPTLP_S3, 0, INTEL_6CH2, 0, ATA_SA300, "Lynx Point-LP" }, { ATA_LPTLP_S4, 0, INTEL_6CH2, 0, ATA_SA300, "Lynx Point-LP" }, { ATA_I31244, 0, 0, 2, ATA_SA150, "31244" }, { ATA_ISCH, 0, 0, 1, ATA_UDMA5, "SCH" }, { ATA_COLETOCRK_S1, 0, INTEL_6CH2, 0, ATA_SA300, "COLETOCRK" }, { ATA_COLETOCRK_S2, 0, INTEL_6CH2, 0, ATA_SA300, "COLETOCRK" }, { 0, 0, 0, 0, 0, 0}}; if (pci_get_vendor(dev) != ATA_INTEL_ID) return ENXIO; if (!(ctlr->chip = ata_match_chip(dev, ids))) return ENXIO; ata_set_desc(dev); ctlr->chipinit = ata_intel_chipinit; ctlr->chipdeinit = ata_intel_chipdeinit; return (BUS_PROBE_LOW_PRIORITY); } static int ata_intel_chipinit(device_t dev) { struct ata_pci_controller *ctlr = device_get_softc(dev); struct ata_intel_data *data; if (ata_setup_interrupt(dev, ata_generic_intr)) return ENXIO; data = malloc(sizeof(struct ata_intel_data), M_ATAPCI, M_WAITOK | M_ZERO); mtx_init(&data->lock, "Intel SATA lock", NULL, MTX_DEF); ctlr->chipset_data = (void *)data; /* good old PIIX needs special treatment (not implemented) */ if (ctlr->chip->chipid == ATA_I82371FB) { ctlr->setmode = ata_intel_old_setmode; } /* the intel 31244 needs special care if in DPA mode */ else if (ctlr->chip->chipid == ATA_I31244) { if (pci_get_subclass(dev) != PCIS_STORAGE_IDE) { ctlr->r_type2 = SYS_RES_MEMORY; ctlr->r_rid2 = PCIR_BAR(0); if (!(ctlr->r_res2 = bus_alloc_resource_any(dev, ctlr->r_type2, &ctlr->r_rid2, RF_ACTIVE))) return ENXIO; ctlr->channels = 4; ctlr->ch_attach = ata_intel_31244_ch_attach; ctlr->ch_detach = ata_intel_31244_ch_detach; ctlr->reset = ata_intel_31244_reset; } ctlr->setmode = ata_sata_setmode; ctlr->getrev = ata_sata_getrev; } /* SCH */ else if (ctlr->chip->chipid == ATA_ISCH) { ctlr->channels = 1; ctlr->ch_attach = ata_intel_ch_attach; ctlr->ch_detach = ata_pci_ch_detach; ctlr->setmode = ata_intel_sch_setmode; } /* non SATA intel chips goes here */ else if (ctlr->chip->max_dma < ATA_SA150) { ctlr->channels = ctlr->chip->cfg2; ctlr->ch_attach = ata_intel_ch_attach; ctlr->ch_detach = ata_pci_ch_detach; ctlr->setmode = ata_intel_new_setmode; } /* SATA parts can be either compat or AHCI */ else { /* force all ports active "the legacy way" */ pci_write_config(dev, 0x92, pci_read_config(dev, 0x92, 2) | 0x0f, 2); ctlr->ch_attach = ata_intel_ch_attach; ctlr->ch_detach = ata_pci_ch_detach; ctlr->reset = ata_intel_reset; /* BAR(5) may point to SATA interface registers */ if ((ctlr->chip->cfg1 & INTEL_ICH7)) { ctlr->r_type2 = SYS_RES_MEMORY; ctlr->r_rid2 = PCIR_BAR(5); ctlr->r_res2 = bus_alloc_resource_any(dev, ctlr->r_type2, &ctlr->r_rid2, RF_ACTIVE); if (ctlr->r_res2 != NULL) { /* Set SCRAE bit to enable registers access. */ pci_write_config(dev, 0x94, pci_read_config(dev, 0x94, 4) | (1 << 9), 4); /* Set Ports Implemented register bits. */ ATA_OUTL(ctlr->r_res2, 0x0C, ATA_INL(ctlr->r_res2, 0x0C) | 0xf); } /* Skip BAR(5) on ICH8M Apples, system locks up on access. */ } else if (ctlr->chip->chipid != ATA_I82801HBM_S1 || pci_get_subvendor(dev) != 0x106b) { ctlr->r_type2 = SYS_RES_IOPORT; ctlr->r_rid2 = PCIR_BAR(5); ctlr->r_res2 = bus_alloc_resource_any(dev, ctlr->r_type2, &ctlr->r_rid2, RF_ACTIVE); } if (ctlr->r_res2 != NULL || (ctlr->chip->cfg1 & INTEL_ICH5)) ctlr->getrev = ata_intel_sata_getrev; ctlr->setmode = ata_sata_setmode; } return 0; } static int ata_intel_chipdeinit(device_t dev) { struct ata_pci_controller *ctlr = device_get_softc(dev); struct ata_intel_data *data; data = ctlr->chipset_data; mtx_destroy(&data->lock); free(data, M_ATAPCI); ctlr->chipset_data = NULL; return (0); } static int ata_intel_ch_attach(device_t dev) { struct ata_pci_controller *ctlr; struct ata_channel *ch; u_char *smap; u_int map; /* setup the usual register normal pci style */ if (ata_pci_ch_attach(dev)) return (ENXIO); ctlr = device_get_softc(device_get_parent(dev)); ch = device_get_softc(dev); /* if r_res2 is valid it points to SATA interface registers */ if (ctlr->r_res2) { ch->r_io[ATA_IDX_ADDR].res = ctlr->r_res2; ch->r_io[ATA_IDX_ADDR].offset = 0x00; ch->r_io[ATA_IDX_DATA].res = ctlr->r_res2; ch->r_io[ATA_IDX_DATA].offset = 0x04; } ch->flags |= ATA_ALWAYS_DMASTAT; if (ctlr->chip->max_dma >= ATA_SA150) { smap = ATA_INTEL_SMAP(ctlr, ch); map = pci_read_config(device_get_parent(dev), 0x90, 1); if (ctlr->chip->cfg1 & INTEL_ICH5) { map &= 0x07; if ((map & 0x04) == 0) { ch->flags |= ATA_SATA; ch->flags |= ATA_NO_SLAVE; smap[0] = (map & 0x01) ^ ch->unit; smap[1] = 0; } else if ((map & 0x02) == 0 && ch->unit == 0) { ch->flags |= ATA_SATA; smap[0] = (map & 0x01) ? 1 : 0; smap[1] = (map & 0x01) ? 0 : 1; } else if ((map & 0x02) != 0 && ch->unit == 1) { ch->flags |= ATA_SATA; smap[0] = (map & 0x01) ? 1 : 0; smap[1] = (map & 0x01) ? 0 : 1; } } else if (ctlr->chip->cfg1 & INTEL_6CH2) { ch->flags |= ATA_SATA; ch->flags |= ATA_NO_SLAVE; smap[0] = (ch->unit == 0) ? 0 : 1; smap[1] = 0; } else { map &= 0x03; if (map == 0x00) { ch->flags |= ATA_SATA; smap[0] = (ch->unit == 0) ? 0 : 1; smap[1] = (ch->unit == 0) ? 2 : 3; } else if (map == 0x02 && ch->unit == 0) { ch->flags |= ATA_SATA; smap[0] = 0; smap[1] = 2; } else if (map == 0x01 && ch->unit == 1) { ch->flags |= ATA_SATA; smap[0] = 1; smap[1] = 3; } } if (ch->flags & ATA_SATA) { if ((ctlr->chip->cfg1 & INTEL_ICH5)) { ch->hw.pm_read = ata_intel_sata_cscr_read; ch->hw.pm_write = ata_intel_sata_cscr_write; } else if (ctlr->r_res2) { if ((ctlr->chip->cfg1 & INTEL_ICH7)) { ch->hw.pm_read = ata_intel_sata_ahci_read; ch->hw.pm_write = ata_intel_sata_ahci_write; } else if (ata_intel_sata_sidpr_test(dev)) { ch->hw.pm_read = ata_intel_sata_sidpr_read; ch->hw.pm_write = ata_intel_sata_sidpr_write; }; } if (ch->hw.pm_write != NULL) { ch->flags |= ATA_PERIODIC_POLL; ch->hw.status = ata_intel_sata_status; ata_sata_scr_write(ch, 0, ATA_SERROR, 0xffffffff); if ((ch->flags & ATA_NO_SLAVE) == 0) { ata_sata_scr_write(ch, 1, ATA_SERROR, 0xffffffff); } } } else ctlr->setmode = ata_intel_new_setmode; if (ctlr->chip->max_dma >= ATA_SA600) ch->flags |= ATA_USE_16BIT; } else if (ctlr->chip->chipid != ATA_ISCH) ch->flags |= ATA_CHECKS_CABLE; return (0); } static void ata_intel_reset(device_t dev) { device_t parent = device_get_parent(dev); struct ata_pci_controller *ctlr = device_get_softc(parent); struct ata_channel *ch = device_get_softc(dev); int mask, pshift, timeout, devs; u_char *smap; uint16_t pcs; /* In combined mode, skip SATA stuff for PATA channel. */ if ((ch->flags & ATA_SATA) == 0) return (ata_generic_reset(dev)); /* Do hard-reset on respective SATA ports. */ smap = ATA_INTEL_SMAP(ctlr, ch); mask = 1 << smap[0]; if ((ch->flags & ATA_NO_SLAVE) == 0) mask |= (1 << smap[1]); pci_write_config(parent, 0x92, pci_read_config(parent, 0x92, 2) & ~mask, 2); - DELAY(10); + DELAY(100); pci_write_config(parent, 0x92, pci_read_config(parent, 0x92, 2) | mask, 2); /* Wait up to 1 sec for "connect well". */ if (ctlr->chip->cfg1 & (INTEL_6CH | INTEL_6CH2)) pshift = 8; else pshift = 4; for (timeout = 0; timeout < 100 ; timeout++) { pcs = (pci_read_config(parent, 0x92, 2) >> pshift) & mask; if ((pcs == mask) && (ATA_IDX_INB(ch, ATA_STATUS) != 0xff)) break; ata_udelay(10000); } if (bootverbose) device_printf(dev, "SATA reset: ports status=0x%02x\n", pcs); /* If any device found, do soft-reset. */ if (ch->hw.pm_read != NULL) { devs = ata_sata_phy_reset(dev, 0, 2) ? ATA_ATA_MASTER : 0; if ((ch->flags & ATA_NO_SLAVE) == 0) devs |= ata_sata_phy_reset(dev, 1, 2) ? ATA_ATA_SLAVE : 0; } else { devs = (pcs & (1 << smap[0])) ? ATA_ATA_MASTER : 0; if ((ch->flags & ATA_NO_SLAVE) == 0) devs |= (pcs & (1 << smap[1])) ? ATA_ATA_SLAVE : 0; } if (devs) { ata_generic_reset(dev); /* Reset may give fake slave when only ATAPI master present. */ ch->devices &= (devs | (devs * ATA_ATAPI_MASTER)); } else ch->devices = 0; } static int ata_intel_old_setmode(device_t dev, int target, int mode) { device_t parent = device_get_parent(dev); struct ata_pci_controller *ctlr = device_get_softc(parent); mode = min(mode, ctlr->chip->max_dma); return (mode); } static int ata_intel_new_setmode(device_t dev, int target, int mode) { device_t parent = device_get_parent(dev); struct ata_pci_controller *ctlr = device_get_softc(parent); struct ata_channel *ch = device_get_softc(dev); int devno = (ch->unit << 1) + target; int piomode; u_int32_t reg40 = pci_read_config(parent, 0x40, 4); u_int8_t reg44 = pci_read_config(parent, 0x44, 1); u_int8_t reg48 = pci_read_config(parent, 0x48, 1); u_int16_t reg4a = pci_read_config(parent, 0x4a, 2); u_int16_t reg54 = pci_read_config(parent, 0x54, 2); u_int32_t mask40 = 0, new40 = 0; u_int8_t mask44 = 0, new44 = 0; static const uint8_t timings[] = { 0x00, 0x00, 0x10, 0x21, 0x23, 0x00, 0x21, 0x23 }; static const uint8_t utimings[] = { 0x00, 0x01, 0x02, 0x01, 0x02, 0x01, 0x02 }; /* In combined mode, skip PATA stuff for SATA channel. */ if (ch->flags & ATA_SATA) return (ata_sata_setmode(dev, target, mode)); mode = min(mode, ctlr->chip->max_dma); if (ata_dma_check_80pin && mode > ATA_UDMA2 && !(reg54 & (0x10 << devno))) { ata_print_cable(dev, "controller"); mode = ATA_UDMA2; } /* Enable/disable UDMA and set timings. */ if (mode >= ATA_UDMA0) { pci_write_config(parent, 0x48, reg48 | (0x0001 << devno), 2); pci_write_config(parent, 0x4a, (reg4a & ~(0x3 << (devno << 2))) | (utimings[mode & ATA_MODE_MASK] << (devno<<2)), 2); piomode = ATA_PIO4; } else { pci_write_config(parent, 0x48, reg48 & ~(0x0001 << devno), 2); pci_write_config(parent, 0x4a, (reg4a & ~(0x3 << (devno << 2))),2); piomode = mode; } reg54 |= 0x0400; /* Set UDMA reference clock (33/66/133MHz). */ reg54 &= ~(0x1001 << devno); if (mode >= ATA_UDMA5) reg54 |= (0x1000 << devno); else if (mode >= ATA_UDMA3) reg54 |= (0x1 << devno); pci_write_config(parent, 0x54, reg54, 2); /* Allow PIO/WDMA timing controls. */ reg40 &= ~0x00ff00ff; reg40 |= 0x40774077; /* Set PIO/WDMA timings. */ if (target == 0) { mask40 = 0x3300; new40 = timings[ata_mode2idx(piomode)] << 8; } else { mask44 = 0x0f; new44 = ((timings[ata_mode2idx(piomode)] & 0x30) >> 2) | (timings[ata_mode2idx(piomode)] & 0x03); } if (ch->unit) { mask40 <<= 16; new40 <<= 16; mask44 <<= 4; new44 <<= 4; } pci_write_config(parent, 0x40, (reg40 & ~mask40) | new40, 4); pci_write_config(parent, 0x44, (reg44 & ~mask44) | new44, 1); return (mode); } static int ata_intel_sch_setmode(device_t dev, int target, int mode) { device_t parent = device_get_parent(dev); struct ata_pci_controller *ctlr = device_get_softc(parent); u_int8_t dtim = 0x80 + (target << 2); u_int32_t tim = pci_read_config(parent, dtim, 4); int piomode; mode = min(mode, ctlr->chip->max_dma); if (mode >= ATA_UDMA0) { tim |= (0x1 << 31); tim &= ~(0x7 << 16); tim |= ((mode & ATA_MODE_MASK) << 16); piomode = ATA_PIO4; } else if (mode >= ATA_WDMA0) { tim &= ~(0x1 << 31); tim &= ~(0x3 << 8); tim |= ((mode & ATA_MODE_MASK) << 8); piomode = (mode == ATA_WDMA0) ? ATA_PIO0 : (mode == ATA_WDMA1) ? ATA_PIO3 : ATA_PIO4; } else piomode = mode; tim &= ~(0x7); tim |= (piomode & 0x7); pci_write_config(parent, dtim, tim, 4); return (mode); } static int ata_intel_sata_getrev(device_t dev, int target) { struct ata_channel *ch = device_get_softc(dev); uint32_t status; if (ata_sata_scr_read(ch, target, ATA_SSTATUS, &status) == 0) return ((status & 0x0f0) >> 4); return (0xff); } static int ata_intel_sata_status(device_t dev) { struct ata_channel *ch = device_get_softc(dev); ata_sata_phy_check_events(dev, 0); if ((ch->flags & ATA_NO_SLAVE) == 0) ata_sata_phy_check_events(dev, 1); return ata_pci_status(dev); } static int ata_intel_sata_ahci_read(device_t dev, int port, int reg, u_int32_t *result) { struct ata_pci_controller *ctlr; struct ata_channel *ch; device_t parent; u_char *smap; int offset; parent = device_get_parent(dev); ctlr = device_get_softc(parent); ch = device_get_softc(dev); port = (port == 1) ? 1 : 0; smap = ATA_INTEL_SMAP(ctlr, ch); offset = 0x100 + smap[port] * 0x80; switch (reg) { case ATA_SSTATUS: reg = 0x28; break; case ATA_SCONTROL: reg = 0x2c; break; case ATA_SERROR: reg = 0x30; break; default: return (EINVAL); } *result = ATA_INL(ctlr->r_res2, offset + reg); return (0); } static int ata_intel_sata_cscr_read(device_t dev, int port, int reg, u_int32_t *result) { struct ata_pci_controller *ctlr; struct ata_channel *ch; device_t parent; u_char *smap; parent = device_get_parent(dev); ctlr = device_get_softc(parent); ch = device_get_softc(dev); smap = ATA_INTEL_SMAP(ctlr, ch); port = (port == 1) ? 1 : 0; switch (reg) { case ATA_SSTATUS: reg = 0; break; case ATA_SERROR: reg = 1; break; case ATA_SCONTROL: reg = 2; break; default: return (EINVAL); } ATA_INTEL_LOCK(ctlr); pci_write_config(parent, 0xa0, 0x50 + smap[port] * 0x10 + reg * 4, 4); *result = pci_read_config(parent, 0xa4, 4); ATA_INTEL_UNLOCK(ctlr); return (0); } static int ata_intel_sata_sidpr_read(device_t dev, int port, int reg, u_int32_t *result) { struct ata_pci_controller *ctlr; struct ata_channel *ch; device_t parent; parent = device_get_parent(dev); ctlr = device_get_softc(parent); ch = device_get_softc(dev); port = (port == 1) ? 1 : 0; switch (reg) { case ATA_SSTATUS: reg = 0; break; case ATA_SCONTROL: reg = 1; break; case ATA_SERROR: reg = 2; break; default: return (EINVAL); } ATA_INTEL_LOCK(ctlr); ATA_IDX_OUTL(ch, ATA_IDX_ADDR, ((ch->unit * 2 + port) << 8) + reg); *result = ATA_IDX_INL(ch, ATA_IDX_DATA); ATA_INTEL_UNLOCK(ctlr); return (0); } static int ata_intel_sata_ahci_write(device_t dev, int port, int reg, u_int32_t value) { struct ata_pci_controller *ctlr; struct ata_channel *ch; device_t parent; u_char *smap; int offset; parent = device_get_parent(dev); ctlr = device_get_softc(parent); ch = device_get_softc(dev); port = (port == 1) ? 1 : 0; smap = ATA_INTEL_SMAP(ctlr, ch); offset = 0x100 + smap[port] * 0x80; switch (reg) { case ATA_SSTATUS: reg = 0x28; break; case ATA_SCONTROL: reg = 0x2c; break; case ATA_SERROR: reg = 0x30; break; default: return (EINVAL); } ATA_OUTL(ctlr->r_res2, offset + reg, value); return (0); } static int ata_intel_sata_cscr_write(device_t dev, int port, int reg, u_int32_t value) { struct ata_pci_controller *ctlr; struct ata_channel *ch; device_t parent; u_char *smap; parent = device_get_parent(dev); ctlr = device_get_softc(parent); ch = device_get_softc(dev); smap = ATA_INTEL_SMAP(ctlr, ch); port = (port == 1) ? 1 : 0; switch (reg) { case ATA_SSTATUS: reg = 0; break; case ATA_SERROR: reg = 1; break; case ATA_SCONTROL: reg = 2; break; default: return (EINVAL); } ATA_INTEL_LOCK(ctlr); pci_write_config(parent, 0xa0, 0x50 + smap[port] * 0x10 + reg * 4, 4); pci_write_config(parent, 0xa4, value, 4); ATA_INTEL_UNLOCK(ctlr); return (0); } static int ata_intel_sata_sidpr_write(device_t dev, int port, int reg, u_int32_t value) { struct ata_pci_controller *ctlr; struct ata_channel *ch; device_t parent; parent = device_get_parent(dev); ctlr = device_get_softc(parent); ch = device_get_softc(dev); port = (port == 1) ? 1 : 0; switch (reg) { case ATA_SSTATUS: reg = 0; break; case ATA_SCONTROL: reg = 1; break; case ATA_SERROR: reg = 2; break; default: return (EINVAL); } ATA_INTEL_LOCK(ctlr); ATA_IDX_OUTL(ch, ATA_IDX_ADDR, ((ch->unit * 2 + port) << 8) + reg); ATA_IDX_OUTL(ch, ATA_IDX_DATA, value); ATA_INTEL_UNLOCK(ctlr); return (0); } static int ata_intel_sata_sidpr_test(device_t dev) { struct ata_channel *ch = device_get_softc(dev); int port; uint32_t val; port = (ch->flags & ATA_NO_SLAVE) ? 0 : 1; for (; port >= 0; port--) { ata_intel_sata_sidpr_read(dev, port, ATA_SCONTROL, &val); if ((val & ATA_SC_IPM_MASK) == (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER)) return (1); val |= ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER; ata_intel_sata_sidpr_write(dev, port, ATA_SCONTROL, val); ata_intel_sata_sidpr_read(dev, port, ATA_SCONTROL, &val); if ((val & ATA_SC_IPM_MASK) == (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER)) return (1); } if (bootverbose) device_printf(dev, "SControl registers are not functional: %08x\n", val); return (0); } static int ata_intel_31244_ch_attach(device_t dev) { struct ata_pci_controller *ctlr = device_get_softc(device_get_parent(dev)); struct ata_channel *ch = device_get_softc(dev); int i; int ch_offset; ata_pci_dmainit(dev); ch_offset = 0x200 + ch->unit * 0x200; for (i = ATA_DATA; i < ATA_MAX_RES; i++) ch->r_io[i].res = ctlr->r_res2; /* setup ATA registers */ ch->r_io[ATA_DATA].offset = ch_offset + 0x00; ch->r_io[ATA_FEATURE].offset = ch_offset + 0x06; ch->r_io[ATA_COUNT].offset = ch_offset + 0x08; ch->r_io[ATA_SECTOR].offset = ch_offset + 0x0c; ch->r_io[ATA_CYL_LSB].offset = ch_offset + 0x10; ch->r_io[ATA_CYL_MSB].offset = ch_offset + 0x14; ch->r_io[ATA_DRIVE].offset = ch_offset + 0x18; ch->r_io[ATA_COMMAND].offset = ch_offset + 0x1d; ch->r_io[ATA_ERROR].offset = ch_offset + 0x04; ch->r_io[ATA_STATUS].offset = ch_offset + 0x1c; ch->r_io[ATA_ALTSTAT].offset = ch_offset + 0x28; ch->r_io[ATA_CONTROL].offset = ch_offset + 0x29; /* setup DMA registers */ ch->r_io[ATA_SSTATUS].offset = ch_offset + 0x100; ch->r_io[ATA_SERROR].offset = ch_offset + 0x104; ch->r_io[ATA_SCONTROL].offset = ch_offset + 0x108; /* setup SATA registers */ ch->r_io[ATA_BMCMD_PORT].offset = ch_offset + 0x70; ch->r_io[ATA_BMSTAT_PORT].offset = ch_offset + 0x72; ch->r_io[ATA_BMDTP_PORT].offset = ch_offset + 0x74; ch->flags |= ATA_NO_SLAVE; ch->flags |= ATA_SATA; ata_pci_hw(dev); ch->hw.status = ata_intel_31244_status; ch->hw.tf_write = ata_intel_31244_tf_write; /* enable PHY state change interrupt */ ATA_OUTL(ctlr->r_res2, 0x4, ATA_INL(ctlr->r_res2, 0x04) | (0x01 << (ch->unit << 3))); return 0; } static int ata_intel_31244_ch_detach(device_t dev) { ata_pci_dmafini(dev); return (0); } static int ata_intel_31244_status(device_t dev) { /* do we have any PHY events ? */ ata_sata_phy_check_events(dev, -1); /* any drive action to take care of ? */ return ata_pci_status(dev); } static void ata_intel_31244_tf_write(struct ata_request *request) { struct ata_channel *ch = device_get_softc(request->parent); if (request->flags & ATA_R_48BIT) { ATA_IDX_OUTW(ch, ATA_FEATURE, request->u.ata.feature); ATA_IDX_OUTW(ch, ATA_COUNT, request->u.ata.count); ATA_IDX_OUTW(ch, ATA_SECTOR, ((request->u.ata.lba >> 16) & 0xff00) | (request->u.ata.lba & 0x00ff)); ATA_IDX_OUTW(ch, ATA_CYL_LSB, ((request->u.ata.lba >> 24) & 0xff00) | ((request->u.ata.lba >> 8) & 0x00ff)); ATA_IDX_OUTW(ch, ATA_CYL_MSB, ((request->u.ata.lba >> 32) & 0xff00) | ((request->u.ata.lba >> 16) & 0x00ff)); ATA_IDX_OUTW(ch, ATA_DRIVE, ATA_D_LBA | ATA_DEV(request->unit)); } else { ATA_IDX_OUTB(ch, ATA_FEATURE, request->u.ata.feature); ATA_IDX_OUTB(ch, ATA_COUNT, request->u.ata.count); ATA_IDX_OUTB(ch, ATA_SECTOR, request->u.ata.lba); ATA_IDX_OUTB(ch, ATA_CYL_LSB, request->u.ata.lba >> 8); ATA_IDX_OUTB(ch, ATA_CYL_MSB, request->u.ata.lba >> 16); ATA_IDX_OUTB(ch, ATA_DRIVE, ATA_D_IBM | ATA_D_LBA | ATA_DEV(request->unit) | ((request->u.ata.lba >> 24) & 0x0f)); } } static void ata_intel_31244_reset(device_t dev) { struct ata_channel *ch = device_get_softc(dev); if (ata_sata_phy_reset(dev, -1, 1)) ata_generic_reset(dev); else ch->devices = 0; } ATA_DECLARE_DRIVER(ata_intel); Index: projects/powernv/dev/hwpmc/hwpmc_mod.c =================================================================== --- projects/powernv/dev/hwpmc/hwpmc_mod.c (revision 290990) +++ projects/powernv/dev/hwpmc/hwpmc_mod.c (revision 290991) @@ -1,5191 +1,5211 @@ /*- * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* needs to be after */ #include #include #include #include #include #include #include #include "hwpmc_soft.h" /* * Types */ enum pmc_flags { PMC_FLAG_NONE = 0x00, /* do nothing */ PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ }; /* * The offset in sysent where the syscall is allocated. */ static int pmc_syscall_num = NO_SYSCALL; struct pmc_cpu **pmc_pcpu; /* per-cpu state */ pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] struct mtx_pool *pmc_mtxpool; static int *pmc_pmcdisp; /* PMC row dispositions */ #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) #define PMC_MARK_ROW_FREE(R) do { \ pmc_pmcdisp[(R)] = 0; \ } while (0) #define PMC_MARK_ROW_STANDALONE(R) do { \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \ ("[pmc,%d] row disposition error", __LINE__)); \ } while (0) #define PMC_UNMARK_ROW_STANDALONE(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) #define PMC_MARK_ROW_THREAD(R) do { \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ atomic_add_int(&pmc_pmcdisp[(R)], 1); \ } while (0) #define PMC_UNMARK_ROW_THREAD(R) do { \ atomic_add_int(&pmc_pmcdisp[(R)], -1); \ KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ __LINE__)); \ } while (0) /* various event handlers */ static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag, pmc_kld_unload_tag; /* Module statistics */ struct pmc_op_getdriverstats pmc_stats; /* Machine/processor dependent operations */ static struct pmc_mdep *md; /* * Hash tables mapping owner processes and target threads to PMCs. */ struct mtx pmc_processhash_mtx; /* spin mutex */ static u_long pmc_processhashmask; static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; /* * Hash table of PMC owner descriptors. This table is protected by * the shared PMC "sx" lock. */ static u_long pmc_ownerhashmask; static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; /* * List of PMC owners with system-wide sampling PMCs. */ static LIST_HEAD(, pmc_owner) pmc_ss_owners; /* * A map of row indices to classdep structures. */ static struct pmc_classdep **pmc_rowindex_to_classdep; /* * Prototypes */ #ifdef HWPMC_DEBUG static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); static int pmc_debugflags_parse(char *newstr, char *fence); #endif static int load(struct module *module, int cmd, void *arg); static int pmc_attach_process(struct proc *p, struct pmc *pm); static struct pmc *pmc_allocate_pmc_descriptor(void); static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); static int pmc_attach_one_process(struct proc *p, struct pmc *pm); static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu); static int pmc_can_attach(struct pmc *pm, struct proc *p); static void pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf); static void pmc_cleanup(void); static int pmc_detach_process(struct proc *p, struct pmc *pm); static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags); static void pmc_destroy_owner_descriptor(struct pmc_owner *po); static void pmc_destroy_pmc_descriptor(struct pmc *pm); static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmc); static struct pmc_process *pmc_find_process_descriptor(struct proc *p, uint32_t mode); static void pmc_force_context_switch(void); static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp); static void pmc_log_all_process_mappings(struct pmc_owner *po); static void pmc_log_kernel_mappings(struct pmc *pm); static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); static void pmc_maybe_remove_owner(struct pmc_owner *po); static void pmc_process_csw_in(struct thread *td); static void pmc_process_csw_out(struct thread *td); static void pmc_process_exit(void *arg, struct proc *p); static void pmc_process_fork(void *arg, struct proc *p1, struct proc *p2, int n); static void pmc_process_samples(int cpu, int soft); static void pmc_release_pmc_descriptor(struct pmc *pmc); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_remove_process_descriptor(struct pmc_process *pp); static void pmc_restore_cpu_binding(struct pmc_binding *pb); static void pmc_save_cpu_binding(struct pmc_binding *pb); static void pmc_select_cpu(int cpu); static int pmc_start(struct pmc *pm); static int pmc_stop(struct pmc *pm); static int pmc_syscall_handler(struct thread *td, void *syscall_args); static void pmc_unlink_target_process(struct pmc *pmc, struct pmc_process *pp); static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp); static struct pmc_mdep *pmc_generic_cpu_initialize(void); static void pmc_generic_cpu_finalize(struct pmc_mdep *md); /* * Kernel tunables and sysctl(8) interface. */ SYSCTL_DECL(_kern_hwpmc); static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, &pmc_callchaindepth, 0, "depth of call chain records"); #ifdef HWPMC_DEBUG struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; char pmc_debugstr[PMC_DEBUG_STRSIZE]; TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr)); SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH, 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); #endif /* * kern.hwpmc.hashrows -- determines the number of rows in the * of the hash table used to look up threads */ static int pmc_hashsize = PMC_HASH_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN, &pmc_hashsize, 0, "rows in hash tables"); /* * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU */ static int pmc_nsamples = PMC_NSAMPLES; SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN, &pmc_nsamples, 0, "number of PC samples per CPU"); /* * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. */ static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN, &pmc_mtxpool_size, 0, "size of spin mutex pool"); /* * security.bsd.unprivileged_syspmcs -- allow non-root processes to * allocate system-wide PMCs. * * Allowing unprivileged processes to allocate system PMCs is convenient * if system-wide measurements need to be taken concurrently with other * per-process measurements. This feature is turned off by default. */ static int pmc_unprivileged_syspmcs = 0; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN, &pmc_unprivileged_syspmcs, 0, "allow unprivileged process to allocate system PMCs"); /* * Hash function. Discard the lower 2 bits of the pointer since * these are always zero for our uses. The hash multiplier is * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). */ #if LONG_BIT == 64 #define _PMC_HM 11400714819323198486u #elif LONG_BIT == 32 #define _PMC_HM 2654435769u #else #error Must know the size of 'long' to compile #endif #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) /* * Syscall structures */ /* The `sysent' for the new syscall */ static struct sysent pmc_sysent = { 2, /* sy_narg */ pmc_syscall_handler /* sy_call */ }; static struct syscall_module_data pmc_syscall_mod = { load, NULL, &pmc_syscall_num, &pmc_sysent, #if (__FreeBSD_version >= 1100000) { 0, NULL }, SY_THR_STATIC_KLD, #else { 0, NULL } #endif }; static moduledata_t pmc_mod = { PMC_MODULE_NAME, syscall_module_handler, &pmc_syscall_mod }; DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); MODULE_VERSION(pmc, PMC_VERSION); #ifdef HWPMC_DEBUG enum pmc_dbgparse_state { PMCDS_WS, /* in whitespace */ PMCDS_MAJOR, /* seen a major keyword */ PMCDS_MINOR }; static int pmc_debugflags_parse(char *newstr, char *fence) { char c, *p, *q; struct pmc_debugflags *tmpflags; int error, found, *newbits, tmp; size_t kwlen; tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO); p = newstr; error = 0; for (; p < fence && (c = *p); p++) { /* skip white space */ if (c == ' ' || c == '\t') continue; /* look for a keyword followed by "=" */ for (q = p; p < fence && (c = *p) && c != '='; p++) ; if (c != '=') { error = EINVAL; goto done; } kwlen = p - q; newbits = NULL; /* lookup flag group name */ #define DBG_SET_FLAG_MAJ(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ newbits = &tmpflags->pdb_ ## F; DBG_SET_FLAG_MAJ("cpu", CPU); DBG_SET_FLAG_MAJ("csw", CSW); DBG_SET_FLAG_MAJ("logging", LOG); DBG_SET_FLAG_MAJ("module", MOD); DBG_SET_FLAG_MAJ("md", MDP); DBG_SET_FLAG_MAJ("owner", OWN); DBG_SET_FLAG_MAJ("pmc", PMC); DBG_SET_FLAG_MAJ("process", PRC); DBG_SET_FLAG_MAJ("sampling", SAM); if (newbits == NULL) { error = EINVAL; goto done; } p++; /* skip the '=' */ /* Now parse the individual flags */ tmp = 0; newflag: for (q = p; p < fence && (c = *p); p++) if (c == ' ' || c == '\t' || c == ',') break; /* p == fence or c == ws or c == "," or c == 0 */ if ((kwlen = p - q) == 0) { *newbits = tmp; continue; } found = 0; #define DBG_SET_FLAG_MIN(S,F) \ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) /* a '*' denotes all possible flags in the group */ if (kwlen == 1 && *q == '*') tmp = found = ~0; /* look for individual flag names */ DBG_SET_FLAG_MIN("allocaterow", ALR); DBG_SET_FLAG_MIN("allocate", ALL); DBG_SET_FLAG_MIN("attach", ATT); DBG_SET_FLAG_MIN("bind", BND); DBG_SET_FLAG_MIN("config", CFG); DBG_SET_FLAG_MIN("exec", EXC); DBG_SET_FLAG_MIN("exit", EXT); DBG_SET_FLAG_MIN("find", FND); DBG_SET_FLAG_MIN("flush", FLS); DBG_SET_FLAG_MIN("fork", FRK); DBG_SET_FLAG_MIN("getbuf", GTB); DBG_SET_FLAG_MIN("hook", PMH); DBG_SET_FLAG_MIN("init", INI); DBG_SET_FLAG_MIN("intr", INT); DBG_SET_FLAG_MIN("linktarget", TLK); DBG_SET_FLAG_MIN("mayberemove", OMR); DBG_SET_FLAG_MIN("ops", OPS); DBG_SET_FLAG_MIN("read", REA); DBG_SET_FLAG_MIN("register", REG); DBG_SET_FLAG_MIN("release", REL); DBG_SET_FLAG_MIN("remove", ORM); DBG_SET_FLAG_MIN("sample", SAM); DBG_SET_FLAG_MIN("scheduleio", SIO); DBG_SET_FLAG_MIN("select", SEL); DBG_SET_FLAG_MIN("signal", SIG); DBG_SET_FLAG_MIN("swi", SWI); DBG_SET_FLAG_MIN("swo", SWO); DBG_SET_FLAG_MIN("start", STA); DBG_SET_FLAG_MIN("stop", STO); DBG_SET_FLAG_MIN("syscall", PMS); DBG_SET_FLAG_MIN("unlinktarget", TUL); DBG_SET_FLAG_MIN("write", WRI); if (found == 0) { /* unrecognized flag name */ error = EINVAL; goto done; } if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ *newbits = tmp; continue; } p++; goto newflag; } /* save the new flag set */ bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); done: free(tmpflags, M_PMC); return error; } static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) { char *fence, *newstr; int error; unsigned int n; (void) arg1; (void) arg2; /* unused parameters */ n = sizeof(pmc_debugstr); newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO); (void) strlcpy(newstr, pmc_debugstr, n); error = sysctl_handle_string(oidp, newstr, n, req); /* if there is a new string, parse and copy it */ if (error == 0 && req->newptr != NULL) { fence = newstr + (n < req->newlen ? n : req->newlen + 1); if ((error = pmc_debugflags_parse(newstr, fence)) == 0) (void) strlcpy(pmc_debugstr, newstr, sizeof(pmc_debugstr)); } free(newstr, M_PMC); return error; } #endif /* * Map a row index to a classdep structure and return the adjusted row * index for the PMC class index. */ static struct pmc_classdep * pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri) { struct pmc_classdep *pcd; (void) md; KASSERT(ri >= 0 && ri < md->pmd_npmc, ("[pmc,%d] illegal row-index %d", __LINE__, ri)); pcd = pmc_rowindex_to_classdep[ri]; KASSERT(pcd != NULL, ("[pmc,%d] ri %d null pcd", __LINE__, ri)); *adjri = ri - pcd->pcd_ri; KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num, ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri)); return (pcd); } /* * Concurrency Control * * The driver manages the following data structures: * * - target process descriptors, one per target process * - owner process descriptors (and attached lists), one per owner process * - lookup hash tables for owner and target processes * - PMC descriptors (and attached lists) * - per-cpu hardware state * - the 'hook' variable through which the kernel calls into * this module * - the machine hardware state (managed by the MD layer) * * These data structures are accessed from: * * - thread context-switch code * - interrupt handlers (possibly on multiple cpus) * - kernel threads on multiple cpus running on behalf of user * processes doing system calls * - this driver's private kernel threads * * = Locks and Locking strategy = * * The driver uses four locking strategies for its operation: * * - The global SX lock "pmc_sx" is used to protect internal * data structures. * * Calls into the module by syscall() start with this lock being * held in exclusive mode. Depending on the requested operation, * the lock may be downgraded to 'shared' mode to allow more * concurrent readers into the module. Calls into the module from * other parts of the kernel acquire the lock in shared mode. * * This SX lock is held in exclusive mode for any operations that * modify the linkages between the driver's internal data structures. * * The 'pmc_hook' function pointer is also protected by this lock. * It is only examined with the sx lock held in exclusive mode. The * kernel module is allowed to be unloaded only with the sx lock held * in exclusive mode. In normal syscall handling, after acquiring the * pmc_sx lock we first check that 'pmc_hook' is non-null before * proceeding. This prevents races between the thread unloading the module * and other threads seeking to use the module. * * - Lookups of target process structures and owner process structures * cannot use the global "pmc_sx" SX lock because these lookups need * to happen during context switches and in other critical sections * where sleeping is not allowed. We protect these lookup tables * with their own private spin-mutexes, "pmc_processhash_mtx" and * "pmc_ownerhash_mtx". * * - Interrupt handlers work in a lock free manner. At interrupt * time, handlers look at the PMC pointer (phw->phw_pmc) configured * when the PMC was started. If this pointer is NULL, the interrupt * is ignored after updating driver statistics. We ensure that this * pointer is set (using an atomic operation if necessary) before the * PMC hardware is started. Conversely, this pointer is unset atomically * only after the PMC hardware is stopped. * * We ensure that everything needed for the operation of an * interrupt handler is available without it needing to acquire any * locks. We also ensure that a PMC's software state is destroyed only * after the PMC is taken off hardware (on all CPUs). * * - Context-switch handling with process-private PMCs needs more * care. * * A given process may be the target of multiple PMCs. For example, * PMCATTACH and PMCDETACH may be requested by a process on one CPU * while the target process is running on another. A PMC could also * be getting released because its owner is exiting. We tackle * these situations in the following manner: * * - each target process structure 'pmc_process' has an array * of 'struct pmc *' pointers, one for each hardware PMC. * * - At context switch IN time, each "target" PMC in RUNNING state * gets started on hardware and a pointer to each PMC is copied into * the per-cpu phw array. The 'runcount' for the PMC is * incremented. * * - At context switch OUT time, all process-virtual PMCs are stopped * on hardware. The saved value is added to the PMCs value field * only if the PMC is in a non-deleted state (the PMCs state could * have changed during the current time slice). * * Note that since in-between a switch IN on a processor and a switch * OUT, the PMC could have been released on another CPU. Therefore * context switch OUT always looks at the hardware state to turn * OFF PMCs and will update a PMC's saved value only if reachable * from the target process record. * * - OP PMCRELEASE could be called on a PMC at any time (the PMC could * be attached to many processes at the time of the call and could * be active on multiple CPUs). * * We prevent further scheduling of the PMC by marking it as in * state 'DELETED'. If the runcount of the PMC is non-zero then * this PMC is currently running on a CPU somewhere. The thread * doing the PMCRELEASE operation waits by repeatedly doing a * pause() till the runcount comes to zero. * * The contents of a PMC descriptor (struct pmc) are protected using * a spin-mutex. In order to save space, we use a mutex pool. * * In terms of lock types used by witness(4), we use: * - Type "pmc-sx", used by the global SX lock. * - Type "pmc-sleep", for sleep mutexes used by logger threads. * - Type "pmc-per-proc", for protecting PMC owner descriptors. * - Type "pmc-leaf", used for all other spin mutexes. */ /* * save the cpu binding of the current kthread */ static void pmc_save_cpu_binding(struct pmc_binding *pb) { PMCDBG0(CPU,BND,2, "save-cpu"); thread_lock(curthread); pb->pb_bound = sched_is_bound(curthread); pb->pb_cpu = curthread->td_oncpu; thread_unlock(curthread); PMCDBG1(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); } /* * restore the cpu binding of the current thread */ static void pmc_restore_cpu_binding(struct pmc_binding *pb) { PMCDBG2(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", curthread->td_oncpu, pb->pb_cpu); thread_lock(curthread); if (pb->pb_bound) sched_bind(curthread, pb->pb_cpu); else sched_unbind(curthread); thread_unlock(curthread); PMCDBG0(CPU,BND,2, "restore-cpu done"); } /* * move execution over the specified cpu and bind it there. */ static void pmc_select_cpu(int cpu) { KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); /* Never move to an inactive CPU. */ KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive " "CPU %d", __LINE__, cpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d", cpu); thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); KASSERT(curthread->td_oncpu == cpu, ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, cpu, curthread->td_oncpu)); PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); } /* * Force a context switch. * * We do this by pause'ing for 1 tick -- invoking mi_switch() is not * guaranteed to force a context switch. */ static void pmc_force_context_switch(void) { pause("pmcctx", 1); } /* * Get the file name for an executable. This is a simple wrapper * around vn_fullpath(9). */ static void pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) { *fullpath = "unknown"; *freepath = NULL; vn_fullpath(curthread, v, fullpath, freepath); } /* * remove an process owning PMCs */ void pmc_remove_owner(struct pmc_owner *po) { struct pmc *pm, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG1(OWN,ORM,1, "remove-owner po=%p", po); /* Remove descriptor from the owner hash table */ LIST_REMOVE(po, po_next); /* release all owned PMC descriptors */ LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { PMCDBG1(OWN,ORM,2, "pmc=%p", pm); KASSERT(pm->pm_owner == po, ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); pmc_release_pmc_descriptor(pm); /* will unlink from the list */ pmc_destroy_pmc_descriptor(pm); } KASSERT(po->po_sscount == 0, ("[pmc,%d] SS count not zero", __LINE__)); KASSERT(LIST_EMPTY(&po->po_pmcs), ("[pmc,%d] PMC list not empty", __LINE__)); /* de-configure the log file if present */ if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_deconfigure_log(po); } /* * remove an owner process record if all conditions are met. */ static void pmc_maybe_remove_owner(struct pmc_owner *po) { PMCDBG1(OWN,OMR,1, "maybe-remove-owner po=%p", po); /* * Remove owner record if * - this process does not own any PMCs * - this process has not allocated a system-wide sampling buffer */ if (LIST_EMPTY(&po->po_pmcs) && ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } } /* * Add an association between a target process and a PMC. */ static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct pmc_target *pt; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", __LINE__, pm, pp->pp_proc->p_pid)); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1), ("[pmc,%d] Illegal reference count %d for process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); #ifdef HWPMC_DEBUG LIST_FOREACH(pt, &pm->pm_targets, pt_next) if (pt->pt_process == pp) KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", __LINE__, pp, pm)); #endif pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO); pt->pt_process = pp; LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, (uintptr_t)pm); if (pm->pm_owner->po_owner == pp->pp_proc) pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; /* * Initialize the per-process values at this row index. */ pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? pm->pm_sc.pm_reloadcount : 0; pp->pp_refcnt++; } /* * Removes the association between a target process and a PMC. */ static void pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) { int ri; struct proc *p; struct pmc_target *ptgt; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL && pp != NULL, ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); ri = PMC_TO_ROWINDEX(pm); PMCDBG3(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, ri, pm, pp->pp_pmcs[ri].pp_pmc)); pp->pp_pmcs[ri].pp_pmc = NULL; pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; /* Remove owner-specific flags */ if (pm->pm_owner->po_owner == pp->pp_proc) { pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; } pp->pp_refcnt--; /* Remove the target process from the PMC structure */ LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) if (ptgt->pt_process == pp) break; KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); LIST_REMOVE(ptgt, pt_next); free(ptgt, M_PMC); /* if the PMC now lacks targets, send the owner a SIGIO */ if (LIST_EMPTY(&pm->pm_targets)) { p = pm->pm_owner->po_owner; PROC_LOCK(p); kern_psignal(p, SIGIO); PROC_UNLOCK(p); PMCDBG2(PRC,SIG,2, "signalling proc=%p signal=%d", p, SIGIO); } } /* * Check if PMC 'pm' may be attached to target process 't'. */ static int pmc_can_attach(struct pmc *pm, struct proc *t) { struct proc *o; /* pmc owner */ struct ucred *oc, *tc; /* owner, target credentials */ int decline_attach, i; /* * A PMC's owner can always attach that PMC to itself. */ if ((o = pm->pm_owner->po_owner) == t) return 0; PROC_LOCK(o); oc = o->p_ucred; crhold(oc); PROC_UNLOCK(o); PROC_LOCK(t); tc = t->p_ucred; crhold(tc); PROC_UNLOCK(t); /* * The effective uid of the PMC owner should match at least one * of the {effective,real,saved} uids of the target process. */ decline_attach = oc->cr_uid != tc->cr_uid && oc->cr_uid != tc->cr_svuid && oc->cr_uid != tc->cr_ruid; /* * Every one of the target's group ids, must be in the owner's * group list. */ for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) decline_attach = !groupmember(tc->cr_groups[i], oc); /* check the read and saved gids too */ if (decline_attach == 0) decline_attach = !groupmember(tc->cr_rgid, oc) || !groupmember(tc->cr_svgid, oc); crfree(tc); crfree(oc); return !decline_attach; } /* * Attach a process to a PMC. */ static int pmc_attach_one_process(struct proc *p, struct pmc *pm) { int ri; char *fullpath, *freepath; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * Locate the process descriptor corresponding to process 'p', * allocating space as needed. * * Verify that rowindex 'pm_rowindex' is free in the process * descriptor. * * If not, allocate space for a descriptor and link the * process descriptor and PMC. */ ri = PMC_TO_ROWINDEX(pm); if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) return ENOMEM; if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */ return EEXIST; if (pp->pp_pmcs[ri].pp_pmc != NULL) return EBUSY; pmc_link_target_process(pm, pp); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) pm->pm_flags |= PMC_F_NEEDS_LOGFILE; pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ /* issue an attach event to a configured log file */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { pmc_getfilename(p->p_textvp, &fullpath, &freepath); if (p->p_flag & P_KTHREAD) { fullpath = kernelname; freepath = NULL; } else pmclog_process_pmcattach(pm, p->p_pid, fullpath); if (freepath) free(freepath, M_TEMP); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_process_mappings(pm->pm_owner, p); } /* mark process as using HWPMCs */ PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); return 0; } /* * Attach a process and optionally its children */ static int pmc_attach_process(struct proc *p, struct pmc *pm) { int error; struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * If this PMC successfully allowed a GETMSR operation * in the past, disallow further ATTACHes. */ if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) return EPERM; if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_attach_one_process(p, pm); /* * Traverse all child processes, attaching them to * this PMC. */ sx_slock(&proctree_lock); top = p; for (;;) { if ((error = pmc_attach_one_process(p, pm)) != 0) break; if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } if (error) (void) pmc_detach_process(top, pm); done: sx_sunlock(&proctree_lock); return error; } /* * Detach a process from a PMC. If there are no other PMCs tracking * this process, remove the process structure from its hash table. If * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. */ static int pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) { int ri; struct pmc_process *pp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm != NULL, ("[pmc,%d] null pm pointer", __LINE__)); ri = PMC_TO_ROWINDEX(pm); PMCDBG6(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", pm, ri, p, p->p_pid, p->p_comm, flags); if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) return ESRCH; if (pp->pp_pmcs[ri].pp_pmc != pm) return EINVAL; pmc_unlink_target_process(pm, pp); /* Issue a detach entry if a log file is configured */ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcdetach(pm, p->p_pid); /* * If there are no PMCs targetting this process, we remove its * descriptor from the target hash table and unset the P_HWPMC * flag in the struct proc. */ KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal refcnt %d for process struct %p", __LINE__, pp->pp_refcnt, pp)); if (pp->pp_refcnt != 0) /* still a target of some PMC */ return 0; pmc_remove_process_descriptor(pp); if (flags & PMC_FLAG_REMOVE) free(pp, M_PMC); PROC_LOCK(p); p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); return 0; } /* * Detach a process and optionally its descendants from a PMC. */ static int pmc_detach_process(struct proc *p, struct pmc *pm) { struct proc *top; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG5(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); /* * Traverse all children, detaching them from this PMC. We * ignore errors since we could be detaching a PMC from a * partially attached proc tree. */ sx_slock(&proctree_lock); top = p; for (;;) { (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); if (LIST_EMPTY(&pm->pm_targets)) pm->pm_flags &= ~PMC_F_ATTACH_DONE; return 0; } /* * Thread context switch IN */ static void pmc_process_csw_in(struct thread *td) { int cpu; unsigned int adjri, ri; struct pmc *pm; struct proc *p; struct pmc_cpu *pc; struct pmc_hw *phw; pmc_value_t newvalue; struct pmc_process *pp; struct pmc_classdep *pcd; p = td->td_proc; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) return; KASSERT(pp->pp_proc == td->td_proc, ("[pmc,%d] not my thread state", __LINE__)); critical_enter(); /* no preemption from this point */ cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] wierd CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; for (ri = 0; ri < md->pmd_npmc; ri++) { if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) continue; KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Target PMC in non-virtual mode (%d)", __LINE__, PMC_TO_MODE(pm))); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] Row index mismatch pmc %d != ri %d", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Only PMCs that are marked as 'RUNNING' need * be placed on hardware. */ if (pm->pm_state != PMC_STATE_RUNNING) continue; /* increment PMC runcount */ atomic_add_rel_int(&pm->pm_runcount, 1); /* configure the HWPMC we are going to use. */ pcd = pmc_ri_to_classdep(md, ri, &adjri); pcd->pcd_config_pmc(cpu, adjri, pm); phw = pc->pc_hwpmcs[ri]; KASSERT(phw != NULL, ("[pmc,%d] null hw pointer", __LINE__)); KASSERT(phw->phw_pmc == pm, ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, phw->phw_pmc, pm)); /* * Write out saved value and start the PMC. * * Sampling PMCs use a per-process value, while * counting mode PMCs use a per-pmc value that is * inherited across descendants. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { mtx_pool_lock_spin(pmc_mtxpool, pm); + + /* + * Use the saved value calculated after the most recent + * thread switch out to start this counter. Reset + * the saved count in case another thread from this + * process switches in before any threads switch out. + */ newvalue = PMC_PCPU_SAVED(cpu,ri) = pp->pp_pmcs[ri].pp_pmcval; + pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, ("[pmc,%d] illegal mode=%d", __LINE__, PMC_TO_MODE(pm))); mtx_pool_lock_spin(pmc_mtxpool, pm); newvalue = PMC_PCPU_SAVED(cpu, ri) = pm->pm_gv.pm_savedvalue; mtx_pool_unlock_spin(pmc_mtxpool, pm); } PMCDBG3(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); pcd->pcd_write_pmc(cpu, adjri, newvalue); /* If a sampling mode PMC, reset stalled state. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) CPU_CLR_ATOMIC(cpu, &pm->pm_stalled); /* Indicate that we desire this to run. */ CPU_SET_ATOMIC(cpu, &pm->pm_cpustate); /* Start the PMC. */ pcd->pcd_start_pmc(cpu, adjri); } /* * perform any other architecture/cpu dependent thread * switch-in actions. */ (void) (*md->pmd_switch_in)(pc, pp); critical_exit(); } /* * Thread context switch OUT. */ static void pmc_process_csw_out(struct thread *td) { int cpu; int64_t tmp; struct pmc *pm; struct proc *p; enum pmc_mode mode; struct pmc_cpu *pc; pmc_value_t newvalue; unsigned int adjri, ri; struct pmc_process *pp; struct pmc_classdep *pcd; /* * Locate our process descriptor; this may be NULL if * this process is exiting and we have already removed * the process from the target process table. * * Note that due to kernel preemption, multiple * context switches may happen while the process is * exiting. * * Note also that if the target process cannot be * found we still need to deconfigure any PMCs that * are currently running on hardware. */ p = td->td_proc; pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); /* * save PMCs */ critical_enter(); cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ PMCDBG5(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, p->p_pid, p->p_comm, pp); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d wierd CPU id %d", __LINE__, cpu)); pc = pmc_pcpu[cpu]; /* * When a PMC gets unlinked from a target PMC, it will * be removed from the target's pp_pmc[] array. * * However, on a MP system, the target could have been * executing on another CPU at the time of the unlink. * So, at context switch OUT time, we need to look at * the hardware to determine if a PMC is scheduled on * it. */ for (ri = 0; ri < md->pmd_npmc; ri++) { pcd = pmc_ri_to_classdep(md, ri, &adjri); pm = NULL; (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); if (pm == NULL) /* nothing at this row index */ continue; mode = PMC_TO_MODE(pm); if (!PMC_IS_VIRTUAL_MODE(mode)) continue; /* not a process virtual PMC */ KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Change desired state, and then stop if not stalled. * This two-step dance should avoid race conditions where * an interrupt re-enables the PMC after this code has * already checked the pm_stalled flag. */ CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); if (!CPU_ISSET(cpu, &pm->pm_stalled)) pcd->pcd_stop_pmc(cpu, adjri); /* reduce this PMC's runcount */ atomic_subtract_rel_int(&pm->pm_runcount, 1); /* * If this PMC is associated with this process, * save the reading. */ if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(pp->pp_refcnt > 0, ("[pmc,%d] pp refcnt = %d", __LINE__, pp->pp_refcnt)); pcd->pcd_read_pmc(cpu, adjri, &newvalue); - tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); - - PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd", cpu, ri, - tmp); - if (mode == PMC_MODE_TS) { + PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (samp)", + cpu, ri, PMC_PCPU_SAVED(cpu,ri) - newvalue); /* * For sampling process-virtual PMCs, - * we expect the count to be - * decreasing as the 'value' - * programmed into the PMC is the - * number of events to be seen till - * the next sampling interrupt. + * newvalue is the number of events to be seen + * until the next sampling interrupt. + * We can just add the events left from this + * invocation to the counter, then adjust + * in case we overflow our range. + * + * (Recall that we reload the counter every + * time we use it.) */ - if (tmp < 0) - tmp += pm->pm_sc.pm_reloadcount; mtx_pool_lock_spin(pmc_mtxpool, pm); - pp->pp_pmcs[ri].pp_pmcval -= tmp; - if ((int64_t) pp->pp_pmcs[ri].pp_pmcval <= 0) - pp->pp_pmcs[ri].pp_pmcval += + + pp->pp_pmcs[ri].pp_pmcval += newvalue; + if (pp->pp_pmcs[ri].pp_pmcval > + pm->pm_sc.pm_reloadcount) + pp->pp_pmcs[ri].pp_pmcval -= pm->pm_sc.pm_reloadcount; + KASSERT(pp->pp_pmcs[ri].pp_pmcval > 0 && + pp->pp_pmcs[ri].pp_pmcval <= + pm->pm_sc.pm_reloadcount, + ("[pmc,%d] pp_pmcval outside of expected " + "range cpu=%d ri=%d pp_pmcval=%jx " + "pm_reloadcount=%jx", __LINE__, cpu, ri, + pp->pp_pmcs[ri].pp_pmcval, + pm->pm_sc.pm_reloadcount)); mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { + tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); + + PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (count)", + cpu, ri, tmp); /* * For counting process-virtual PMCs, * we expect the count to be * increasing monotonically, modulo a 64 * bit wraparound. */ KASSERT((int64_t) tmp >= 0, ("[pmc,%d] negative increment cpu=%d " "ri=%d newvalue=%jx saved=%jx " "incr=%jx", __LINE__, cpu, ri, newvalue, PMC_PCPU_SAVED(cpu,ri), tmp)); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); if (pm->pm_flags & PMC_F_LOG_PROCCSW) pmclog_process_proccsw(pm, pp, tmp); } } /* mark hardware as free */ pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * perform any other architecture/cpu dependent thread * switch out functions. */ (void) (*md->pmd_switch_out)(pc, pp); critical_exit(); } /* * A mapping change for a process. */ static void pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) { int ri; pid_t pid; char *fullpath, *freepath; const struct pmc *pm; struct pmc_owner *po; const struct pmc_process *pp; freepath = fullpath = NULL; pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); pid = td->td_proc->p_pid; /* Inform owners of all system-wide sampling PMCs. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) goto done; /* * Inform sampling PMC owners tracking this process. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_in(pm->pm_owner, pid, pkm->pm_address, fullpath); done: if (freepath) free(freepath, M_TEMP); } /* * Log an munmap request. */ static void pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) { int ri; pid_t pid; struct pmc_owner *po; const struct pmc *pm; const struct pmc_process *pp; pid = td->td_proc->p_pid; LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) return; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmclog_process_map_out(pm->pm_owner, pid, pkm->pm_address, pkm->pm_address + pkm->pm_size); } /* * Log mapping information about the kernel. */ static void pmc_log_kernel_mappings(struct pmc *pm) { struct pmc_owner *po; struct pmckern_map_in *km, *kmbase; sx_assert(&pmc_sx, SX_LOCKED); KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] non-sampling PMC (%p) desires mapping information", __LINE__, (void *) pm)); po = pm->pm_owner; if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) return; /* * Log the current set of kernel modules. */ kmbase = linker_hwpmc_list_objects(); for (km = kmbase; km->pm_file != NULL; km++) { PMCDBG2(LOG,REG,1,"%s %p", (char *) km->pm_file, (void *) km->pm_address); pmclog_process_map_in(po, (pid_t) -1, km->pm_address, km->pm_file); } free(kmbase, M_LINKER); po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; } /* * Log the mappings for a single process. */ static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) { vm_map_t map; struct vnode *vp; struct vmspace *vm; vm_map_entry_t entry; vm_offset_t last_end; u_int last_timestamp; struct vnode *last_vp; vm_offset_t start_addr; vm_object_t obj, lobj, tobj; char *fullpath, *freepath; last_vp = NULL; last_end = (vm_offset_t) 0; fullpath = freepath = NULL; if ((vm = vmspace_acquire_ref(p)) == NULL) return; map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; entry = entry->next) { if (entry == NULL) { PMCDBG2(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly " "NULL! pid=%d vm_map=%p\n", p->p_pid, map); break; } /* * We only care about executable map entries. */ if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || !(entry->protection & VM_PROT_EXECUTE) || (entry->object.vm_object == NULL)) { continue; } obj = entry->object.vm_object; VM_OBJECT_RLOCK(obj); /* * Walk the backing_object list to find the base * (non-shadowed) vm_object. */ for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) { if (tobj != obj) VM_OBJECT_RLOCK(tobj); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); lobj = tobj; } /* * At this point lobj is the base vm_object and it is locked. */ if (lobj == NULL) { PMCDBG3(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d " "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj); VM_OBJECT_RUNLOCK(obj); continue; } vp = vm_object_vnode(lobj); if (vp == NULL) { if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * Skip contiguous regions that point to the same * vnode, so we don't emit redundant MAP-IN * directives. */ if (entry->start == last_end && vp == last_vp) { last_end = entry->end; if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); continue; } /* * We don't want to keep the proc's vm_map or this * vm_object locked while we walk the pathname, since * vn_fullpath() can sleep. However, if we drop the * lock, it's possible for concurrent activity to * modify the vm_map list. To protect against this, * we save the vm_map timestamp before we release the * lock, and check it after we reacquire the lock * below. */ start_addr = entry->start; last_end = entry->end; last_timestamp = map->timestamp; vm_map_unlock_read(map); vref(vp); if (lobj != obj) VM_OBJECT_RUNLOCK(lobj); VM_OBJECT_RUNLOCK(obj); freepath = NULL; pmc_getfilename(vp, &fullpath, &freepath); last_vp = vp; vrele(vp); vp = NULL; pmclog_process_map_in(po, p->p_pid, start_addr, fullpath); if (freepath) free(freepath, M_TEMP); vm_map_lock_read(map); /* * If our saved timestamp doesn't match, this means * that the vm_map was modified out from under us and * we can't trust our current "entry" pointer. Do a * new lookup for this entry. If there is no entry * for this address range, vm_map_lookup_entry() will * return the previous one, so we always want to go to * entry->next on the next loop iteration. * * There is an edge condition here that can occur if * there is no entry at or before this address. In * this situation, vm_map_lookup_entry returns * &map->header, which would cause our loop to abort * without processing the rest of the map. However, * in practice this will never happen for process * vm_map. This is because the executable's text * segment is the first mapping in the proc's address * space, and this mapping is never removed until the * process exits, so there will always be a non-header * entry at or before the requested address for * vm_map_lookup_entry to return. */ if (map->timestamp != last_timestamp) vm_map_lookup_entry(map, last_end - 1, &entry); } vm_map_unlock_read(map); vmspace_free(vm); return; } /* * Log mappings for all processes in the system. */ static void pmc_log_all_process_mappings(struct pmc_owner *po) { struct proc *p, *top; sx_assert(&pmc_sx, SX_XLOCKED); if ((p = pfind(1)) == NULL) panic("[pmc,%d] Cannot find init", __LINE__); PROC_UNLOCK(p); sx_slock(&proctree_lock); top = p; for (;;) { pmc_log_process_mappings(po, p); if (!LIST_EMPTY(&p->p_children)) p = LIST_FIRST(&p->p_children); else for (;;) { if (p == top) goto done; if (LIST_NEXT(p, p_sibling)) { p = LIST_NEXT(p, p_sibling); break; } p = p->p_pptr; } } done: sx_sunlock(&proctree_lock); } /* * The 'hook' invoked from the kernel proper */ #ifdef HWPMC_DEBUG const char *pmc_hooknames[] = { /* these strings correspond to PMC_FN_* in */ "", "EXEC", "CSW-IN", "CSW-OUT", "SAMPLE", "UNUSED1", "UNUSED2", "MMAP", "MUNMAP", "CALLCHAIN-NMI", "CALLCHAIN-SOFT", "SOFTSAMPLING" }; #endif static int pmc_hook_handler(struct thread *td, int function, void *arg) { PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, pmc_hooknames[function], arg); switch (function) { /* * Process exec() */ case PMC_FN_PROCESS_EXEC: { char *fullpath, *freepath; unsigned int ri; int is_using_hwpmcs; struct pmc *pm; struct proc *p; struct pmc_owner *po; struct pmc_process *pp; struct pmckern_procexec *pk; sx_assert(&pmc_sx, SX_XLOCKED); p = td->td_proc; pmc_getfilename(p->p_textvp, &fullpath, &freepath); pk = (struct pmckern_procexec *) arg; /* Inform owners of SS mode PMCs of the exec event. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, PMC_ID_INVALID, p->p_pid, pk->pm_entryaddr, fullpath); PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); if (!is_using_hwpmcs) { if (freepath) free(freepath, M_TEMP); break; } /* * PMCs are not inherited across an exec(): remove any * PMCs that this process is the owner of. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } /* * If the process being exec'ed is not the target of any * PMC, we are done. */ if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { if (freepath) free(freepath, M_TEMP); break; } /* * Log the exec event to all monitoring owners. Skip * owners who have already recieved the event because * they had system sampling PMCs active. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procexec(po, pm->pm_id, p->p_pid, pk->pm_entryaddr, fullpath); } if (freepath) free(freepath, M_TEMP); PMCDBG4(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", p, p->p_pid, p->p_comm, pk->pm_credentialschanged); if (pk->pm_credentialschanged == 0) /* no change */ break; /* * If the newly exec()'ed process has a different credential * than before, allow it to be the target of a PMC only if * the PMC's owner has sufficient priviledge. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) if (pmc_can_attach(pm, td->td_proc) != 0) pmc_detach_one_process(td->td_proc, pm, PMC_FLAG_NONE); KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, pp->pp_refcnt, pp)); /* * If this process is no longer the target of any * PMCs, we can remove the process entry and free * up space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); free(pp, M_PMC); break; } } break; case PMC_FN_CSW_IN: pmc_process_csw_in(td); break; case PMC_FN_CSW_OUT: pmc_process_csw_out(td); break; /* * Process accumulated PC samples. * * This function is expected to be called by hardclock() for * each CPU that has accumulated PC samples. * * This function is to be executed on the CPU whose samples * are being processed. */ case PMC_FN_DO_SAMPLES: /* * Clear the cpu specific bit in the CPU mask before * do the rest of the processing. If the NMI handler * gets invoked after the "atomic_clear_int()" call * below but before "pmc_process_samples()" gets * around to processing the interrupt, then we will * come back here at the next hardclock() tick (and * may find nothing to do if "pmc_process_samples()" * had already processed the interrupt). We don't * lose the interrupt sample. */ CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask); pmc_process_samples(PCPU_GET(cpuid), PMC_HR); pmc_process_samples(PCPU_GET(cpuid), PMC_SR); break; case PMC_FN_MMAP: sx_assert(&pmc_sx, SX_LOCKED); pmc_process_mmap(td, (struct pmckern_map_in *) arg); break; case PMC_FN_MUNMAP: sx_assert(&pmc_sx, SX_LOCKED); pmc_process_munmap(td, (struct pmckern_map_out *) arg); break; case PMC_FN_USER_CALLCHAIN: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR, (struct trapframe *) arg); td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_USER_CALLCHAIN_SOFT: /* * Record a call chain. */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_SR, (struct trapframe *) arg); td->td_pflags &= ~TDP_CALLCHAIN; break; case PMC_FN_SOFT_SAMPLING: /* * Call soft PMC sampling intr. */ pmc_soft_intr((struct pmckern_soft *) arg); break; default: #ifdef HWPMC_DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); #endif break; } return 0; } /* * allocate a 'struct pmc_owner' descriptor in the owner hash table. */ static struct pmc_owner * pmc_allocate_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; /* allocate space for N pointers and one descriptor struct */ po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO); po->po_owner = p; LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ TAILQ_INIT(&po->po_logbuffers); mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); PMCDBG4(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", p, p->p_pid, p->p_comm, po); return po; } static void pmc_destroy_owner_descriptor(struct pmc_owner *po) { PMCDBG4(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); mtx_destroy(&po->po_mtx); free(po, M_PMC); } /* * find the descriptor corresponding to process 'p', adding or removing it * as specified by 'mode'. */ static struct pmc_process * pmc_find_process_descriptor(struct proc *p, uint32_t mode) { uint32_t hindex; struct pmc_process *pp, *ppnew; struct pmc_processhash *pph; hindex = PMC_HASH_PTR(p, pmc_processhashmask); pph = &pmc_processhash[hindex]; ppnew = NULL; /* * Pre-allocate memory in the FIND_ALLOCATE case since we * cannot call malloc(9) once we hold a spin lock. */ if (mode & PMC_FLAG_ALLOCATE) ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc * sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO); mtx_lock_spin(&pmc_processhash_mtx); LIST_FOREACH(pp, pph, pp_next) if (pp->pp_proc == p) break; if ((mode & PMC_FLAG_REMOVE) && pp != NULL) LIST_REMOVE(pp, pp_next); if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && ppnew != NULL) { ppnew->pp_proc = p; LIST_INSERT_HEAD(pph, ppnew, pp_next); pp = ppnew; ppnew = NULL; } mtx_unlock_spin(&pmc_processhash_mtx); if (pp != NULL && ppnew != NULL) free(ppnew, M_PMC); return pp; } /* * remove a process descriptor from the process hash table. */ static void pmc_remove_process_descriptor(struct pmc_process *pp) { KASSERT(pp->pp_refcnt == 0, ("[pmc,%d] Removing process descriptor %p with count %d", __LINE__, pp, pp->pp_refcnt)); mtx_lock_spin(&pmc_processhash_mtx); LIST_REMOVE(pp, pp_next); mtx_unlock_spin(&pmc_processhash_mtx); } /* * find an owner descriptor corresponding to proc 'p' */ static struct pmc_owner * pmc_find_owner_descriptor(struct proc *p) { uint32_t hindex; struct pmc_owner *po; struct pmc_ownerhash *poh; hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); poh = &pmc_ownerhash[hindex]; po = NULL; LIST_FOREACH(po, poh, po_next) if (po->po_owner == p) break; PMCDBG5(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); return po; } /* * pmc_allocate_pmc_descriptor * * Allocate a pmc descriptor and initialize its * fields. */ static struct pmc * pmc_allocate_pmc_descriptor(void) { struct pmc *pmc; pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO); PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); return pmc; } /* * Destroy a pmc descriptor. */ static void pmc_destroy_pmc_descriptor(struct pmc *pm) { KASSERT(pm->pm_state == PMC_STATE_DELETED || pm->pm_state == PMC_STATE_FREE, ("[pmc,%d] destroying non-deleted PMC", __LINE__)); KASSERT(LIST_EMPTY(&pm->pm_targets), ("[pmc,%d] destroying pmc with targets", __LINE__)); KASSERT(pm->pm_owner == NULL, ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); KASSERT(pm->pm_runcount == 0, ("[pmc,%d] pmc has non-zero run count %d", __LINE__, pm->pm_runcount)); free(pm, M_PMC); } static void pmc_wait_for_pmc_idle(struct pmc *pm) { #ifdef HWPMC_DEBUG volatile int maxloop; maxloop = 100 * pmc_cpu_max(); #endif /* * Loop (with a forced context switch) till the PMC's runcount * comes down to zero. */ while (atomic_load_acq_32(&pm->pm_runcount) > 0) { #ifdef HWPMC_DEBUG maxloop--; KASSERT(maxloop > 0, ("[pmc,%d] (ri%d, rc%d) waiting too long for " "pmc to be free", __LINE__, PMC_TO_ROWINDEX(pm), pm->pm_runcount)); #endif pmc_force_context_switch(); } } /* * This function does the following things: * * - detaches the PMC from hardware * - unlinks all target threads that were attached to it * - removes the PMC from its owner's list * - destroys the PMC private mutex * * Once this function completes, the given pmc pointer can be freed by * calling pmc_destroy_pmc_descriptor(). */ static void pmc_release_pmc_descriptor(struct pmc *pm) { enum pmc_mode mode; struct pmc_hw *phw; u_int adjri, ri, cpu; struct pmc_owner *po; struct pmc_binding pb; struct pmc_process *pp; struct pmc_classdep *pcd; struct pmc_target *ptgt, *tmp; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mode = PMC_TO_MODE(pm); PMCDBG3(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, mode); /* * First, we take the PMC off hardware. */ cpu = 0; if (PMC_IS_SYSTEM_MODE(mode)) { /* * A system mode PMC runs on a specific CPU. Switch * to this CPU and turn hardware off. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); pmc_select_cpu(cpu); /* switch off non-stalled CPUs */ CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); if (pm->pm_state == PMC_STATE_RUNNING && !CPU_ISSET(cpu, &pm->pm_stalled)) { phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; KASSERT(phw->phw_pmc == pm, ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", __LINE__, ri, phw->phw_pmc, pm)); PMCDBG2(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_stop_pmc(cpu, adjri); critical_exit(); } PMCDBG2(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); critical_enter(); pcd->pcd_config_pmc(cpu, adjri, NULL); critical_exit(); /* adjust the global and process count of SS mode PMCs */ if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { po = pm->pm_owner; po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); LIST_REMOVE(po, po_ssnext); } } pm->pm_state = PMC_STATE_DELETED; pmc_restore_cpu_binding(&pb); /* * We could have references to this PMC structure in * the per-cpu sample queues. Wait for the queue to * drain. */ pmc_wait_for_pmc_idle(pm); } else if (PMC_IS_VIRTUAL_MODE(mode)) { /* * A virtual PMC could be running on multiple CPUs at * a given instant. * * By marking its state as DELETED, we ensure that * this PMC is never further scheduled on hardware. * * Then we wait till all CPUs are done with this PMC. */ pm->pm_state = PMC_STATE_DELETED; /* Wait for the PMCs runcount to come to zero. */ pmc_wait_for_pmc_idle(pm); /* * At this point the PMC is off all CPUs and cannot be * freshly scheduled onto a CPU. It is now safe to * unlink all targets from this PMC. If a * process-record's refcount falls to zero, we remove * it from the hash table. The module-wide SX lock * protects us from races. */ LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { pp = ptgt->pt_process; pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); /* * If the target process record shows that no * PMCs are attached to it, reclaim its space. */ if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); free(pp, M_PMC); } } cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ } /* * Release any MD resources */ (void) pcd->pcd_release_pmc(cpu, adjri, pm); /* * Update row disposition */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) PMC_UNMARK_ROW_STANDALONE(ri); else PMC_UNMARK_ROW_THREAD(ri); /* unlink from the owner's list */ if (pm->pm_owner) { LIST_REMOVE(pm, pm_next); pm->pm_owner = NULL; } } /* * Register an owner and a pmc. */ static int pmc_register_owner(struct proc *p, struct pmc *pmc) { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) return ENOMEM; KASSERT(pmc->pm_owner == NULL, ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); pmc->pm_owner = po; LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); PROC_LOCK(p); p->p_flag |= P_HWPMC; PROC_UNLOCK(p); if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_pmcallocate(pmc); PMCDBG2(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", po, pmc); return 0; } /* * Return the current row disposition: * == 0 => FREE * > 0 => PROCESS MODE * < 0 => SYSTEM MODE */ int pmc_getrowdisp(int ri) { return pmc_pmcdisp[ri]; } /* * Check if a PMC at row index 'ri' can be allocated to the current * process. * * Allocation can fail if: * - the current process is already being profiled by a PMC at index 'ri', * attached to it via OP_PMCATTACH. * - the current process has already allocated a PMC at index 'ri' * via OP_ALLOCATE. */ static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) { enum pmc_mode mode; struct pmc *pm; struct pmc_owner *po; struct pmc_process *pp; PMCDBG5(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); /* * We shouldn't have already allocated a process-mode PMC at * row index 'ri'. * * We shouldn't have allocated a system-wide PMC on the same * CPU and same RI. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) LIST_FOREACH(pm, &po->po_pmcs, pm_next) { if (PMC_TO_ROWINDEX(pm) == ri) { mode = PMC_TO_MODE(pm); if (PMC_IS_VIRTUAL_MODE(mode)) return EEXIST; if (PMC_IS_SYSTEM_MODE(mode) && (int) PMC_TO_CPU(pm) == cpu) return EEXIST; } } /* * We also shouldn't be the target of any PMC at this index * since otherwise a PMC_ATTACH to ourselves will fail. */ if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) if (pp->pp_pmcs[ri].pp_pmc) return EEXIST; PMCDBG4(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", p, p->p_pid, p->p_comm, ri); return 0; } /* * Check if a given PMC at row index 'ri' can be currently used in * mode 'mode'. */ static int pmc_can_allocate_row(int ri, enum pmc_mode mode) { enum pmc_disp disp; sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG2(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); if (PMC_IS_SYSTEM_MODE(mode)) disp = PMC_DISP_STANDALONE; else disp = PMC_DISP_THREAD; /* * check disposition for PMC row 'ri': * * Expected disposition Row-disposition Result * * STANDALONE STANDALONE or FREE proceed * STANDALONE THREAD fail * THREAD THREAD or FREE proceed * THREAD STANDALONE fail */ if (!PMC_ROW_DISP_IS_FREE(ri) && !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) return EBUSY; /* * All OK */ PMCDBG2(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); return 0; } /* * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. */ static struct pmc * pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) { struct pmc *pm; KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_id == pmcid) return pm; return NULL; } static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) { struct pmc *pm, *opm; struct pmc_owner *po; struct pmc_process *pp; KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); PMCDBG1(PMC,FND,1, "find-pmc id=%d", pmcid); if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) { /* * In case of PMC_F_DESCENDANTS child processes we will not find * the current process in the owners hash list. Find the owner * process first and from there lookup the po. */ if ((pp = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE)) == NULL) { return ESRCH; } else { opm = pp->pp_pmcs[PMC_ID_TO_ROWINDEX(pmcid)].pp_pmc; if (opm == NULL) return ESRCH; if ((opm->pm_flags & (PMC_F_ATTACHED_TO_OWNER| PMC_F_DESCENDANTS)) != (PMC_F_ATTACHED_TO_OWNER| PMC_F_DESCENDANTS)) return ESRCH; po = opm->pm_owner; } } if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) return EINVAL; PMCDBG2(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); *pmc = pm; return 0; } /* * Start a PMC. */ static int pmc_start(struct pmc *pm) { enum pmc_mode mode; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, error, cpu, ri; KASSERT(pm != NULL, ("[pmc,%d] null pm", __LINE__)); mode = PMC_TO_MODE(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); error = 0; PMCDBG3(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); po = pm->pm_owner; /* * Disallow PMCSTART if a logfile is required but has not been * configured yet. */ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return (EDOOFUS); /* programming error */ /* * If this is a sampling mode PMC, log mapping information for * the kernel modules that are currently loaded. */ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_kernel_mappings(pm); if (PMC_IS_VIRTUAL_MODE(mode)) { /* * If a PMCATTACH has never been done on this PMC, * attach it to its owner process. */ if (LIST_EMPTY(&pm->pm_targets)) error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH : pmc_attach_process(po->po_owner, pm); /* * If the PMC is attached to its owner, then force a context * switch to ensure that the MD state gets set correctly. */ if (error == 0) { pm->pm_state = PMC_STATE_RUNNING; if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) pmc_force_context_switch(); } return (error); } /* * A system-wide PMC. * * Add the owner to the global list if this is a system-wide * sampling PMC. */ if (mode == PMC_MODE_SS) { if (po->po_sscount == 0) { LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); atomic_add_rel_int(&pmc_ss_count, 1); PMCDBG1(PMC,OPS,1, "po=%p in global list", po); } po->po_sscount++; /* * Log mapping information for all existing processes in the * system. Subsequent mappings are logged as they happen; * see pmc_process_mmap(). */ if (po->po_logprocmaps == 0) { pmc_log_all_process_mappings(po); po->po_logprocmaps = 1; } } /* * Move to the CPU associated with this * PMC, and start the hardware. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); if (!pmc_cpu_is_active(cpu)) return (ENXIO); pmc_select_cpu(cpu); /* * global PMCs are configured at allocation time * so write out the initial value and start the PMC. */ pm->pm_state = PMC_STATE_RUNNING; critical_enter(); if ((error = pcd->pcd_write_pmc(cpu, adjri, PMC_IS_SAMPLING_MODE(mode) ? pm->pm_sc.pm_reloadcount : pm->pm_sc.pm_initial)) == 0) { /* If a sampling mode PMC, reset stalled state. */ if (PMC_IS_SAMPLING_MODE(mode)) CPU_CLR_ATOMIC(cpu, &pm->pm_stalled); /* Indicate that we desire this to run. Start it. */ CPU_SET_ATOMIC(cpu, &pm->pm_cpustate); error = pcd->pcd_start_pmc(cpu, adjri); } critical_exit(); pmc_restore_cpu_binding(&pb); return (error); } /* * Stop a PMC. */ static int pmc_stop(struct pmc *pm) { struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; int adjri, cpu, error, ri; KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); PMCDBG3(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); pm->pm_state = PMC_STATE_STOPPED; /* * If the PMC is a virtual mode one, changing the state to * non-RUNNING is enough to ensure that the PMC never gets * scheduled. * * If this PMC is current running on a CPU, then it will * handled correctly at the time its target process is context * switched out. */ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) return 0; /* * A system-mode PMC. Move to the CPU associated with * this PMC, and stop the hardware. We update the * 'initial count' so that a subsequent PMCSTART will * resume counting from the current hardware count. */ pmc_save_cpu_binding(&pb); cpu = PMC_TO_CPU(pm); KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); if (!pmc_cpu_is_active(cpu)) return ENXIO; pmc_select_cpu(cpu); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); critical_enter(); if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0) error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial); critical_exit(); pmc_restore_cpu_binding(&pb); po = pm->pm_owner; /* remove this owner from the global list of SS PMC owners */ if (PMC_TO_MODE(pm) == PMC_MODE_SS) { po->po_sscount--; if (po->po_sscount == 0) { atomic_subtract_rel_int(&pmc_ss_count, 1); LIST_REMOVE(po, po_ssnext); PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po); } } return (error); } #ifdef HWPMC_DEBUG static const char *pmc_op_to_name[] = { #undef __PMC_OP #define __PMC_OP(N, D) #N , __PMC_OPS() NULL }; #endif /* * The syscall interface */ #define PMC_GET_SX_XLOCK(...) do { \ sx_xlock(&pmc_sx); \ if (pmc_hook == NULL) { \ sx_xunlock(&pmc_sx); \ return __VA_ARGS__; \ } \ } while (0) #define PMC_DOWNGRADE_SX() do { \ sx_downgrade(&pmc_sx); \ is_sx_downgraded = 1; \ } while (0) static int pmc_syscall_handler(struct thread *td, void *syscall_args) { int error, is_sx_downgraded, is_sx_locked, op; struct pmc_syscall_args *c; void *arg; PMC_GET_SX_XLOCK(ENOSYS); DROP_GIANT(); is_sx_downgraded = 0; is_sx_locked = 1; c = (struct pmc_syscall_args *) syscall_args; op = c->pmop_code; arg = c->pmop_data; PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, pmc_op_to_name[op], arg); error = 0; atomic_add_int(&pmc_stats.pm_syscalls, 1); switch(op) { /* * Configure a log file. * * XXX This OP will be reworked. */ case PMC_OP_CONFIGURELOG: { struct proc *p; struct pmc *pm; struct pmc_owner *po; struct pmc_op_configurelog cl; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &cl, sizeof(cl))) != 0) break; /* mark this process as owning a log file */ p = td->td_proc; if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { error = ENOMEM; break; } /* * If a valid fd was passed in, try to configure that, * otherwise if 'fd' was less than zero and there was * a log file configured, flush its buffers and * de-configure it. */ if (cl.pm_logfd >= 0) { sx_xunlock(&pmc_sx); is_sx_locked = 0; error = pmclog_configure_log(md, po, cl.pm_logfd); } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { pmclog_process_closelog(po); error = pmclog_close(po); if (error == 0) { LIST_FOREACH(pm, &po->po_pmcs, pm_next) if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && pm->pm_state == PMC_STATE_RUNNING) pmc_stop(pm); error = pmclog_deconfigure_log(po); } } else error = EINVAL; if (error) break; } break; /* * Flush a log file. */ case PMC_OP_FLUSHLOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_flush(po); } break; /* * Close a log file. */ case PMC_OP_CLOSELOG: { struct pmc_owner *po; sx_assert(&pmc_sx, SX_XLOCKED); if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } error = pmclog_close(po); } break; /* * Retrieve hardware configuration. */ case PMC_OP_GETCPUINFO: /* CPU information */ { struct pmc_op_getcpuinfo gci; struct pmc_classinfo *pci; struct pmc_classdep *pcd; int cl; gci.pm_cputype = md->pmd_cputype; gci.pm_ncpu = pmc_cpu_max(); gci.pm_npmc = md->pmd_npmc; gci.pm_nclass = md->pmd_nclass; pci = gci.pm_classes; pcd = md->pmd_classdep; for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) { pci->pm_caps = pcd->pcd_caps; pci->pm_class = pcd->pcd_class; pci->pm_width = pcd->pcd_width; pci->pm_num = pcd->pcd_num; } error = copyout(&gci, arg, sizeof(gci)); } break; /* * Retrieve soft events list. */ case PMC_OP_GETDYNEVENTINFO: { enum pmc_class cl; enum pmc_event ev; struct pmc_op_getdyneventinfo *gei; struct pmc_dyn_event_descr dev; struct pmc_soft *ps; uint32_t nevent; sx_assert(&pmc_sx, SX_LOCKED); gei = (struct pmc_op_getdyneventinfo *) arg; if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0) break; /* Only SOFT class is dynamic. */ if (cl != PMC_CLASS_SOFT) { error = EINVAL; break; } nevent = 0; for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) { ps = pmc_soft_ev_acquire(ev); if (ps == NULL) continue; bcopy(&ps->ps_ev, &dev, sizeof(dev)); pmc_soft_ev_release(ps); error = copyout(&dev, &gei->pm_events[nevent], sizeof(struct pmc_dyn_event_descr)); if (error != 0) break; nevent++; } if (error != 0) break; error = copyout(&nevent, &gei->pm_nevent, sizeof(nevent)); } break; /* * Get module statistics */ case PMC_OP_GETDRIVERSTATS: { struct pmc_op_getdriverstats gms; bcopy(&pmc_stats, &gms, sizeof(gms)); error = copyout(&gms, arg, sizeof(gms)); } break; /* * Retrieve module version number */ case PMC_OP_GETMODULEVERSION: { uint32_t cv, modv; /* retrieve the client's idea of the ABI version */ if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) break; /* don't service clients newer than our driver */ modv = PMC_VERSION; if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { error = EPROGMISMATCH; break; } error = copyout(&modv, arg, sizeof(int)); } break; /* * Retrieve the state of all the PMCs on a given * CPU. */ case PMC_OP_GETPMCINFO: { int ari; struct pmc *pm; size_t pmcinfo_size; uint32_t cpu, n, npmc; struct pmc_owner *po; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_info *p, *pmcinfo; struct pmc_op_getpmcinfo *gpi; PMC_DOWNGRADE_SX(); gpi = (struct pmc_op_getpmcinfo *) arg; if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) break; if (cpu >= pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* switch to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); npmc = md->pmd_npmc; pmcinfo_size = npmc * sizeof(struct pmc_info); pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK); p = pmcinfo; for (n = 0; n < md->pmd_npmc; n++, p++) { pcd = pmc_ri_to_classdep(md, n, &ari); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0) break; if (PMC_ROW_DISP_IS_STANDALONE(n)) p->pm_rowdisp = PMC_DISP_STANDALONE; else if (PMC_ROW_DISP_IS_THREAD(n)) p->pm_rowdisp = PMC_DISP_THREAD; else p->pm_rowdisp = PMC_DISP_FREE; p->pm_ownerpid = -1; if (pm == NULL) /* no PMC associated */ continue; po = pm->pm_owner; KASSERT(po->po_owner != NULL, ("[pmc,%d] pmc_owner had a null proc pointer", __LINE__)); p->pm_ownerpid = po->po_owner->p_pid; p->pm_mode = PMC_TO_MODE(pm); p->pm_event = pm->pm_event; p->pm_flags = pm->pm_flags; if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) p->pm_reloadcount = pm->pm_sc.pm_reloadcount; } pmc_restore_cpu_binding(&pb); /* now copy out the PMC info collected */ if (error == 0) error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); free(pmcinfo, M_PMC); } break; /* * Set the administrative state of a PMC. I.e. whether * the PMC is to be used or not. */ case PMC_OP_PMCADMIN: { int cpu, ri; enum pmc_state request; struct pmc_cpu *pc; struct pmc_hw *phw; struct pmc_op_pmcadmin pma; struct pmc_binding pb; sx_assert(&pmc_sx, SX_XLOCKED); KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); error = priv_check(td, PRIV_PMC_MANAGE); if (error) break; if ((error = copyin(arg, &pma, sizeof(pma))) != 0) break; cpu = pma.pm_cpu; if (cpu < 0 || cpu >= (int) pmc_cpu_max()) { error = EINVAL; break; } if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } request = pma.pm_state; if (request != PMC_STATE_DISABLED && request != PMC_STATE_FREE) { error = EINVAL; break; } ri = pma.pm_pmc; /* pmc id == row index */ if (ri < 0 || ri >= (int) md->pmd_npmc) { error = EINVAL; break; } /* * We can't disable a PMC with a row-index allocated * for process virtual PMCs. */ if (PMC_ROW_DISP_IS_THREAD(ri) && request == PMC_STATE_DISABLED) { error = EBUSY; break; } /* * otherwise, this PMC on this CPU is either free or * in system-wide mode. */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); pc = pmc_pcpu[cpu]; phw = pc->pc_hwpmcs[ri]; /* * XXX do we need some kind of 'forced' disable? */ if (phw->phw_pmc == NULL) { if (request == PMC_STATE_DISABLED && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; PMC_MARK_ROW_STANDALONE(ri); } else if (request == PMC_STATE_FREE && (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; PMC_UNMARK_ROW_STANDALONE(ri); } /* other cases are a no-op */ } else error = EBUSY; pmc_restore_cpu_binding(&pb); } break; /* * Allocate a PMC. */ case PMC_OP_PMCALLOCATE: { int adjri, n; u_int cpu; uint32_t caps; struct pmc *pmc; enum pmc_mode mode; struct pmc_hw *phw; struct pmc_binding pb; struct pmc_classdep *pcd; struct pmc_op_pmcallocate pa; if ((error = copyin(arg, &pa, sizeof(pa))) != 0) break; caps = pa.pm_caps; mode = pa.pm_mode; cpu = pa.pm_cpu; if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && mode != PMC_MODE_TS && mode != PMC_MODE_TC) || (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) { error = EINVAL; break; } /* * Virtual PMCs should only ask for a default CPU. * System mode PMCs need to specify a non-default CPU. */ if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { error = EINVAL; break; } /* * Check that an inactive CPU is not being asked for. */ if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* * Refuse an allocation for a system-wide PMC if this * process has been jailed, or if this process lacks * super-user credentials and the sysctl tunable * 'security.bsd.unprivileged_syspmcs' is zero. */ if (PMC_IS_SYSTEM_MODE(mode)) { if (jailed(curthread->td_ucred)) { error = EPERM; break; } if (!pmc_unprivileged_syspmcs) { error = priv_check(curthread, PRIV_PMC_SYSTEM); if (error) break; } } /* * Look for valid values for 'pm_flags' */ if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) { error = EINVAL; break; } /* process logging options are not allowed for system PMCs */ if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags & (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) { error = EINVAL; break; } /* * All sampling mode PMCs need to be able to interrupt the * CPU. */ if (PMC_IS_SAMPLING_MODE(mode)) caps |= PMC_CAP_INTERRUPT; /* A valid class specifier should have been passed in. */ for (n = 0; n < md->pmd_nclass; n++) if (md->pmd_classdep[n].pcd_class == pa.pm_class) break; if (n == md->pmd_nclass) { error = EINVAL; break; } /* The requested PMC capabilities should be feasible. */ if ((md->pmd_classdep[n].pcd_caps & caps) != caps) { error = EOPNOTSUPP; break; } PMCDBG4(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", pa.pm_ev, caps, mode, cpu); pmc = pmc_allocate_pmc_descriptor(); pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, PMC_ID_INVALID); pmc->pm_event = pa.pm_ev; pmc->pm_state = PMC_STATE_FREE; pmc->pm_caps = caps; pmc->pm_flags = pa.pm_flags; /* switch thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); #define PMC_IS_SHAREABLE_PMC(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ PMC_PHW_FLAG_IS_SHAREABLE) #define PMC_IS_UNALLOCATED(cpu, n) \ (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) if (PMC_IS_SYSTEM_MODE(mode)) { pmc_select_cpu(cpu); for (n = 0; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, cpu) == 0 && (PMC_IS_UNALLOCATED(cpu, n) || PMC_IS_SHAREABLE_PMC(cpu, n)) && pcd->pcd_allocate_pmc(cpu, adjri, pmc, &pa) == 0) break; } } else { /* Process virtual mode */ for (n = 0; n < (int) md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( curthread->td_proc, n, PMC_CPU_ANY) == 0 && pcd->pcd_allocate_pmc(curthread->td_oncpu, adjri, pmc, &pa) == 0) break; } } #undef PMC_IS_UNALLOCATED #undef PMC_IS_SHAREABLE_PMC pmc_restore_cpu_binding(&pb); if (n == (int) md->pmd_npmc) { pmc_destroy_pmc_descriptor(pmc); pmc = NULL; error = EINVAL; break; } /* Fill in the correct value in the ID field */ pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); PMCDBG5(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); /* Process mode PMCs with logging enabled need log files */ if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* All system mode sampling PMCs require a log file */ if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; /* * Configure global pmc's immediately */ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; pcd = pmc_ri_to_classdep(md, n, &adjri); if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) { (void) pcd->pcd_release_pmc(cpu, adjri, pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; pmc_restore_cpu_binding(&pb); error = EPERM; break; } pmc_restore_cpu_binding(&pb); } pmc->pm_state = PMC_STATE_ALLOCATED; /* * mark row disposition */ if (PMC_IS_SYSTEM_MODE(mode)) PMC_MARK_ROW_STANDALONE(n); else PMC_MARK_ROW_THREAD(n); /* * Register this PMC with the current thread as its owner. */ if ((error = pmc_register_owner(curthread->td_proc, pmc)) != 0) { pmc_release_pmc_descriptor(pmc); pmc_destroy_pmc_descriptor(pmc); pmc = NULL; break; } /* * Return the allocated index. */ pa.pm_pmcid = pmc->pm_id; error = copyout(&pa, arg, sizeof(pa)); } break; /* * Attach a PMC to a process. */ case PMC_OP_PMCATTACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { error = EINVAL; break; } /* PMCs may be (re)attached only when allocated or stopped */ if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } else if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED) { error = EINVAL; break; } /* lookup pid */ if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Ignore processes that are working on exiting. */ if (p->p_flag & P_WEXIT) { error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ break; } /* * we are allowed to attach a PMC to a process if * we can debug it. */ error = p_candebug(curthread, p); PROC_UNLOCK(p); if (error == 0) error = pmc_attach_process(p, pm); } break; /* * Detach an attached PMC from a process. */ case PMC_OP_PMCDETACH: { struct pmc *pm; struct proc *p; struct pmc_op_pmcattach a; if ((error = copyin(arg, &a, sizeof(a))) != 0) break; if (a.pm_pid < 0) { error = EINVAL; break; } else if (a.pm_pid == 0) a.pm_pid = td->td_proc->p_pid; if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; if ((p = pfind(a.pm_pid)) == NULL) { error = ESRCH; break; } /* * Treat processes that are in the process of exiting * as if they were not present. */ if (p->p_flag & P_WEXIT) error = ESRCH; PROC_UNLOCK(p); /* pfind() returns a locked process */ if (error == 0) error = pmc_detach_process(p, pm); } break; /* * Retrieve the MSR number associated with the counter * 'pmc_id'. This allows processes to directly use RDPMC * instructions to read their PMCs, without the overhead of a * system call. */ case PMC_OP_PMCGETMSR: { int adjri, ri; struct pmc *pm; struct pmc_target *pt; struct pmc_op_getmsr gm; struct pmc_classdep *pcd; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &gm, sizeof(gm))) != 0) break; if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) break; /* * The allocated PMC has to be a process virtual PMC, * i.e., of type MODE_T[CS]. Global PMCs can only be * read using the PMCREAD operation since they may be * allocated on a different CPU than the one we could * be running on at the time of the RDPMC instruction. * * The GETMSR operation is not allowed for PMCs that * are inherited across processes. */ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || (pm->pm_flags & PMC_F_DESCENDANTS)) { error = EINVAL; break; } /* * It only makes sense to use a RDPMC (or its * equivalent instruction on non-x86 architectures) on * a process that has allocated and attached a PMC to * itself. Conversely the PMC is only allowed to have * one process attached to it -- its owner. */ if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || LIST_NEXT(pt, pt_next) != NULL || pt->pt_process->pp_proc != pm->pm_owner->po_owner) { error = EINVAL; break; } ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); /* PMC class has no 'GETMSR' support */ if (pcd->pcd_get_msr == NULL) { error = ENOSYS; break; } if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0) break; if ((error = copyout(&gm, arg, sizeof(gm))) < 0) break; /* * Mark our process as using MSRs. Update machine * state using a forced context switch. */ pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; pmc_force_context_switch(); } break; /* * Release an allocated PMC */ case PMC_OP_PMCRELEASE: { pmc_id_t pmcid; struct pmc *pm; struct pmc_owner *po; struct pmc_op_simple sp; /* * Find PMC pointer for the named PMC. * * Use pmc_release_pmc_descriptor() to switch off the * PMC, remove all its target threads, and remove the * PMC from its owner's list. * * Remove the owner record if this is the last PMC * owned. * * Free up space. */ if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; po = pm->pm_owner; pmc_release_pmc_descriptor(pm); pmc_maybe_remove_owner(po); pmc_destroy_pmc_descriptor(pm); } break; /* * Read and/or write a PMC. */ case PMC_OP_PMCRW: { int adjri; struct pmc *pm; uint32_t cpu, ri; pmc_value_t oldvalue; struct pmc_binding pb; struct pmc_op_pmcrw prw; struct pmc_classdep *pcd; struct pmc_op_pmcrw *pprw; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &prw, sizeof(prw))) != 0) break; ri = 0; PMCDBG2(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, prw.pm_flags); /* must have at least one flag set */ if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { error = EINVAL; break; } /* locate pmc descriptor */ if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) break; /* Can't read a PMC that hasn't been started. */ if (pm->pm_state != PMC_STATE_ALLOCATED && pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } /* writing a new value is allowed only for 'STOPPED' pmcs */ if (pm->pm_state == PMC_STATE_RUNNING && (prw.pm_flags & PMC_F_NEWVALUE)) { error = EBUSY; break; } if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { /* * If this PMC is attached to its owner (i.e., * the process requesting this operation) and * is running, then attempt to get an * upto-date reading from hardware for a READ. * Writes are only allowed when the PMC is * stopped, so only update the saved value * field. * * If the PMC is not running, or is not * attached to its owner, read/write to the * savedvalue field. */ ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); mtx_pool_lock_spin(pmc_mtxpool, pm); cpu = curthread->td_oncpu; if (prw.pm_flags & PMC_F_OLDVALUE) { if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && (pm->pm_state == PMC_STATE_RUNNING)) error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue); else oldvalue = pm->pm_gv.pm_savedvalue; } if (prw.pm_flags & PMC_F_NEWVALUE) pm->pm_gv.pm_savedvalue = prw.pm_value; mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { /* System mode PMCs */ cpu = PMC_TO_CPU(pm); ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); if (!pmc_cpu_is_active(cpu)) { error = ENXIO; break; } /* move this thread to CPU 'cpu' */ pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); critical_enter(); /* save old value */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = (*pcd->pcd_read_pmc)(cpu, adjri, &oldvalue))) goto error; /* write out new value */ if (prw.pm_flags & PMC_F_NEWVALUE) error = (*pcd->pcd_write_pmc)(cpu, adjri, prw.pm_value); error: critical_exit(); pmc_restore_cpu_binding(&pb); if (error) break; } pprw = (struct pmc_op_pmcrw *) arg; #ifdef HWPMC_DEBUG if (prw.pm_flags & PMC_F_NEWVALUE) PMCDBG3(PMC,OPS,2, "rw id=%d new %jx -> old %jx", ri, prw.pm_value, oldvalue); else if (prw.pm_flags & PMC_F_OLDVALUE) PMCDBG2(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); #endif /* return old value if requested */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = copyout(&oldvalue, &pprw->pm_value, sizeof(prw.pm_value)))) break; } break; /* * Set the sampling rate for a sampling mode PMC and the * initial count for a counting mode PMC. */ case PMC_OP_PMCSETCOUNT: { struct pmc *pm; struct pmc_op_pmcsetcount sc; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sc, sizeof(sc))) != 0) break; if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) break; if (pm->pm_state == PMC_STATE_RUNNING) { error = EBUSY; break; } if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pm->pm_sc.pm_reloadcount = sc.pm_count; else pm->pm_sc.pm_initial = sc.pm_count; } break; /* * Start a PMC. */ case PMC_OP_PMCSTART: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; sx_assert(&pmc_sx, SX_XLOCKED); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmcid %x != id %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ break; else if (pm->pm_state != PMC_STATE_STOPPED && pm->pm_state != PMC_STATE_ALLOCATED) { error = EINVAL; break; } error = pmc_start(pm); } break; /* * Stop a PMC. */ case PMC_OP_PMCSTOP: { pmc_id_t pmcid; struct pmc *pm; struct pmc_op_simple sp; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &sp, sizeof(sp))) != 0) break; pmcid = sp.pm_pmcid; /* * Mark the PMC as inactive and invoke the MD stop * routines if needed. */ if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; KASSERT(pmcid == pm->pm_id, ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ break; else if (pm->pm_state != PMC_STATE_RUNNING) { error = EINVAL; break; } error = pmc_stop(pm); } break; /* * Write a user supplied value to the log file. */ case PMC_OP_WRITELOG: { struct pmc_op_writelog wl; struct pmc_owner *po; PMC_DOWNGRADE_SX(); if ((error = copyin(arg, &wl, sizeof(wl))) != 0) break; if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { error = EINVAL; break; } if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { error = EINVAL; break; } error = pmclog_process_userlog(po, &wl); } break; default: error = EINVAL; break; } if (is_sx_locked != 0) { if (is_sx_downgraded) sx_sunlock(&pmc_sx); else sx_xunlock(&pmc_sx); } if (error) atomic_add_int(&pmc_stats.pm_syscall_errors, 1); PICKUP_GIANT(); return error; } /* * Helper functions */ /* * Mark the thread as needing callchain capture and post an AST. The * actual callchain capture will be done in a context where it is safe * to take page faults. */ static void pmc_post_callchain_callback(void) { struct thread *td; td = curthread; /* * If there is multiple PMCs for the same interrupt ignore new post */ if (td->td_pflags & TDP_CALLCHAIN) return; /* * Mark this thread as needing callchain capture. * `td->td_pflags' will be safe to touch because this thread * was in user space when it was interrupted. */ td->td_pflags |= TDP_CALLCHAIN; /* * Don't let this thread migrate between CPUs until callchain * capture completes. */ sched_pin(); return; } /* * Interrupt processing. * * Find a free slot in the per-cpu array of samples and capture the * current callchain there. If a sample was successfully added, a bit * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook * needs to be invoked from the clock handler. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ int pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, int inuserspace) { int error, callchaindepth; struct thread *td; struct pmc_sample *ps; struct pmc_samplebuffer *psb; error = 0; /* * Allocate space for a sample buffer. */ psb = pmc_pcpu[cpu]->pc_sb[ring]; ps = psb->ps_write; if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ CPU_SET_ATOMIC(cpu, &pm->pm_stalled); atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1); PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); callchaindepth = 1; error = ENOMEM; goto done; } /* Fill in entry. */ PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); KASSERT(pm->pm_runcount >= 0, ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm, pm->pm_runcount)); atomic_add_rel_int(&pm->pm_runcount, 1); /* hold onto PMC */ ps->ps_pmc = pm; if ((td = curthread) && td->td_proc) ps->ps_pid = td->td_proc->p_pid; else ps->ps_pid = -1; ps->ps_cpu = cpu; ps->ps_td = td; ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? pmc_callchaindepth : 1; if (callchaindepth == 1) ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); else { /* * Kernel stack traversals can be done immediately, * while we defer to an AST for user space traversals. */ if (!inuserspace) { callchaindepth = pmc_save_kernel_callchain(ps->ps_pc, callchaindepth, tf); } else { pmc_post_callchain_callback(); callchaindepth = PMC_SAMPLE_INUSE; } } ps->ps_nsamples = callchaindepth; /* mark entry as in use */ /* increment write pointer, modulo ring buffer size */ ps++; if (ps == psb->ps_fence) psb->ps_write = psb->ps_samples; else psb->ps_write = ps; done: /* mark CPU as needing processing */ if (callchaindepth != PMC_SAMPLE_INUSE) CPU_SET_ATOMIC(cpu, &pmc_cpumask); return (error); } /* * Capture a user call chain. This function will be called from ast() * before control returns to userland and before the process gets * rescheduled. */ static void pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) { struct pmc *pm; struct thread *td; struct pmc_sample *ps, *ps_end; struct pmc_samplebuffer *psb; #ifdef INVARIANTS int ncallchains; #endif psb = pmc_pcpu[cpu]->pc_sb[ring]; td = curthread; KASSERT(td->td_pflags & TDP_CALLCHAIN, ("[pmc,%d] Retrieving callchain for thread that doesn't want it", __LINE__)); #ifdef INVARIANTS ncallchains = 0; #endif /* * Iterate through all deferred callchain requests. * Walk from the current read pointer to the current * write pointer. */ ps = psb->ps_read; ps_end = psb->ps_write; do { if (ps->ps_nsamples != PMC_SAMPLE_INUSE) goto next; if (ps->ps_td != td) goto next; KASSERT(ps->ps_cpu == cpu, ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__, ps->ps_cpu, PCPU_GET(cpuid))); pm = ps->ps_pmc; KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, ("[pmc,%d] Retrieving callchain for PMC that doesn't " "want it", __LINE__)); KASSERT(pm->pm_runcount > 0, ("[pmc,%d] runcount %d", __LINE__, pm->pm_runcount)); /* * Retrieve the callchain and mark the sample buffer * as 'processable' by the timer tick sweep code. */ ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc, pmc_callchaindepth, tf); #ifdef INVARIANTS ncallchains++; #endif next: /* increment the pointer, modulo sample ring size */ if (++ps == psb->ps_fence) ps = psb->ps_samples; } while (ps != ps_end); KASSERT(ncallchains > 0, ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, cpu)); KASSERT(td->td_pinned == 1, ("[pmc,%d] invalid td_pinned value", __LINE__)); sched_unpin(); /* Can migrate safely now. */ /* mark CPU as needing processing */ CPU_SET_ATOMIC(cpu, &pmc_cpumask); return; } /* * Process saved PC samples. */ static void pmc_process_samples(int cpu, int ring) { struct pmc *pm; int adjri, n; struct thread *td; struct pmc_owner *po; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *psb; KASSERT(PCPU_GET(cpuid) == cpu, ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, PCPU_GET(cpuid), cpu)); psb = pmc_pcpu[cpu]->pc_sb[ring]; for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ ps = psb->ps_read; if (ps->ps_nsamples == PMC_SAMPLE_FREE) break; pm = ps->ps_pmc; KASSERT(pm->pm_runcount > 0, ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm, pm->pm_runcount)); po = pm->pm_owner; KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, pm, PMC_TO_MODE(pm))); /* Ignore PMCs that have been switched off */ if (pm->pm_state != PMC_STATE_RUNNING) goto entrydone; /* If there is a pending AST wait for completion */ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ CPU_SET_ATOMIC(cpu, &pmc_cpumask); break; } PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, pm, ps->ps_nsamples, ps->ps_flags, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); /* * If this is a process-mode PMC that is attached to * its owner, and if the PC is in user mode, update * profiling statistics like timer-based profiling * would have done. */ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { if (ps->ps_flags & PMC_CC_F_USERSPACE) { td = FIRST_THREAD_IN_PROC(po->po_owner); addupc_intr(td, ps->ps_pc[0], 1); } goto entrydone; } /* * Otherwise, this is either a sampling mode PMC that * is attached to a different process than its owner, * or a system-wide sampling PMC. Dispatch a log * entry to the PMC's owner process. */ pmclog_process_callchain(pm, ps); entrydone: ps->ps_nsamples = 0; /* mark entry as free */ atomic_subtract_rel_int(&pm->pm_runcount, 1); /* increment read pointer, modulo sample size */ if (++ps == psb->ps_fence) psb->ps_read = psb->ps_samples; else psb->ps_read = ps; } atomic_add_int(&pmc_stats.pm_log_sweeps, 1); /* Do not re-enable stalled PMCs if we failed to process any samples */ if (n == 0) return; /* * Restart any stalled sampling PMCs on this CPU. * * If the NMI handler sets the pm_stalled field of a PMC after * the check below, we'll end up processing the stalled PMC at * the next hardclock tick. */ for (n = 0; n < md->pmd_npmc; n++) { pcd = pmc_ri_to_classdep(md, n, &adjri); KASSERT(pcd != NULL, ("[pmc,%d] null pcd ri=%d", __LINE__, n)); (void) (*pcd->pcd_get_config)(cpu,adjri,&pm); if (pm == NULL || /* !cfg'ed */ pm->pm_state != PMC_STATE_RUNNING || /* !active */ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ !CPU_ISSET(cpu, &pm->pm_cpustate) || /* !desired */ !CPU_ISSET(cpu, &pm->pm_stalled)) /* !stalled */ continue; CPU_CLR_ATOMIC(cpu, &pm->pm_stalled); (*pcd->pcd_start_pmc)(cpu, adjri); } } /* * Event handlers. */ /* * Handle a process exit. * * Remove this process from all hash tables. If this process * owned any PMCs, turn off those PMCs and deallocate them, * removing any associations with target processes. * * This function will be called by the last 'thread' of a * process. * * XXX This eventhandler gets called early in the exit process. * Consider using a 'hook' invocation from thread_exit() or equivalent * spot. Another negative is that kse_exit doesn't seem to call * exit1() [??]. * */ static void pmc_process_exit(void *arg __unused, struct proc *p) { struct pmc *pm; int adjri, cpu; unsigned int ri; int is_using_hwpmcs; struct pmc_owner *po; struct pmc_process *pp; struct pmc_classdep *pcd; pmc_value_t newvalue, tmp; PROC_LOCK(p); is_using_hwpmcs = p->p_flag & P_HWPMC; PROC_UNLOCK(p); /* * Log a sysexit event to all SS PMC owners. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_sysexit(po, p->p_pid); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG3(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); /* * Since this code is invoked by the last thread in an exiting * process, we would have context switched IN at some prior * point. However, with PREEMPTION, kernel mode context * switches may happen any time, so we want to disable a * context switch OUT till we get any PMCs targetting this * process off the hardware. * * We also need to atomically remove this process' * entry from our target process hash table, using * PMC_FLAG_REMOVE. */ PMCDBG3(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, p->p_comm); critical_enter(); /* no preemption */ cpu = curthread->td_oncpu; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_REMOVE)) != NULL) { PMCDBG2(PRC,EXT,2, "process-exit proc=%p pmc-process=%p", p, pp); /* * The exiting process could the target of * some PMCs which will be running on * currently executing CPU. * * We need to turn these PMCs off like we * would do at context switch OUT time. */ for (ri = 0; ri < md->pmd_npmc; ri++) { /* * Pick up the pmc pointer from hardware * state similar to the CSW_OUT code. */ pm = NULL; pcd = pmc_ri_to_classdep(md, ri, &adjri); (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); PMCDBG2(PRC,EXT,2, "ri=%d pm=%p", ri, pm); if (pm == NULL || !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) continue; PMCDBG4(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, pm, pm->pm_state); KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", __LINE__, PMC_TO_ROWINDEX(pm), ri)); KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); KASSERT(pm->pm_runcount > 0, ("[pmc,%d] bad runcount ri %d rc %d", __LINE__, ri, pm->pm_runcount)); /* * Change desired state, and then stop if not * stalled. This two-step dance should avoid * race conditions where an interrupt re-enables * the PMC after this code has already checked * the pm_stalled flag. */ if (CPU_ISSET(cpu, &pm->pm_cpustate)) { CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); if (!CPU_ISSET(cpu, &pm->pm_stalled)) { (void) pcd->pcd_stop_pmc(cpu, adjri); pcd->pcd_read_pmc(cpu, adjri, &newvalue); tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); mtx_pool_lock_spin(pmc_mtxpool, pm); pm->pm_gv.pm_savedvalue += tmp; pp->pp_pmcs[ri].pp_pmcval += tmp; mtx_pool_unlock_spin(pmc_mtxpool, pm); } } atomic_subtract_rel_int(&pm->pm_runcount,1); KASSERT((int) pm->pm_runcount >= 0, ("[pmc,%d] runcount is %d", __LINE__, ri)); (void) pcd->pcd_config_pmc(cpu, adjri, NULL); } /* * Inform the MD layer of this pseudo "context switch * out" */ (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); critical_exit(); /* ok to be pre-empted now */ /* * Unlink this process from the PMCs that are * targetting it. This will send a signal to * all PMC owner's whose PMCs are orphaned. * * Log PMC value at exit time if requested. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) pmclog_process_procexit(pm, pp); pmc_unlink_target_process(pm, pp); } free(pp, M_PMC); } else critical_exit(); /* pp == NULL */ /* * If the process owned PMCs, free them up and free up * memory. */ if ((po = pmc_find_owner_descriptor(p)) != NULL) { pmc_remove_owner(po); pmc_destroy_owner_descriptor(po); } sx_xunlock(&pmc_sx); } /* * Handle a process fork. * * If the parent process 'p1' is under HWPMC monitoring, then copy * over any attached PMCs that have 'do_descendants' semantics. */ static void pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, int flags) { int is_using_hwpmcs; unsigned int ri; uint32_t do_descendants; struct pmc *pm; struct pmc_owner *po; struct pmc_process *ppnew, *ppold; (void) flags; /* unused parameter */ PROC_LOCK(p1); is_using_hwpmcs = p1->p_flag & P_HWPMC; PROC_UNLOCK(p1); /* * If there are system-wide sampling PMCs active, we need to * log all fork events to their owner's logs. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); if (!is_using_hwpmcs) return; PMC_GET_SX_XLOCK(); PMCDBG4(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, p1->p_pid, p1->p_comm, newproc); /* * If the parent process (curthread->td_proc) is a * target of any PMCs, look for PMCs that are to be * inherited, and link these into the new process * descriptor. */ if ((ppold = pmc_find_process_descriptor(curthread->td_proc, PMC_FLAG_NONE)) == NULL) goto done; /* nothing to do */ do_descendants = 0; for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS; if (do_descendants == 0) /* nothing to do */ goto done; /* allocate a descriptor for the new process */ if ((ppnew = pmc_find_process_descriptor(newproc, PMC_FLAG_ALLOCATE)) == NULL) goto done; /* * Run through all PMCs that were targeting the old process * and which specified F_DESCENDANTS and attach them to the * new process. * * Log the fork event to all owners of PMCs attached to this * process, if not already logged. */ for (ri = 0; ri < md->pmd_npmc; ri++) if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && (pm->pm_flags & PMC_F_DESCENDANTS)) { pmc_link_target_process(pm, ppnew); po = pm->pm_owner; if (po->po_sscount == 0 && po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); } /* * Now mark the new process as being tracked by this driver. */ PROC_LOCK(newproc); newproc->p_flag |= P_HWPMC; PROC_UNLOCK(newproc); done: sx_xunlock(&pmc_sx); } static void pmc_kld_load(void *arg __unused, linker_file_t lf) { struct pmc_owner *po; sx_slock(&pmc_sx); /* * Notify owners of system sampling PMCs about KLD operations. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_in(po, (pid_t) -1, (uintfptr_t) lf->address, lf->filename); /* * TODO: Notify owners of (all) process-sampling PMCs too. */ sx_sunlock(&pmc_sx); } static void pmc_kld_unload(void *arg __unused, const char *filename __unused, caddr_t address, size_t size) { struct pmc_owner *po; sx_slock(&pmc_sx); LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, (pid_t) -1, (uintfptr_t) address, (uintfptr_t) address + size); /* * TODO: Notify owners of process-sampling PMCs. */ sx_sunlock(&pmc_sx); } /* * initialization */ static const char * pmc_name_of_pmcclass(enum pmc_class class) { switch (class) { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) \ case PMC_CLASS_##S: \ return #S; __PMC_CLASSES(); default: return (""); } } /* * Base class initializer: allocate structure and set default classes. */ struct pmc_mdep * pmc_mdep_alloc(int nclasses) { struct pmc_mdep *md; int n; /* SOFT + md classes */ n = 1 + nclasses; md = malloc(sizeof(struct pmc_mdep) + n * sizeof(struct pmc_classdep), M_PMC, M_WAITOK|M_ZERO); md->pmd_nclass = n; /* Add base class. */ pmc_soft_initialize(md); return md; } void pmc_mdep_free(struct pmc_mdep *md) { pmc_soft_finalize(md); free(md, M_PMC); } static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; return (0); } static struct pmc_mdep * pmc_generic_cpu_initialize(void) { struct pmc_mdep *md; md = pmc_mdep_alloc(0); md->pmd_cputype = PMC_CPU_GENERIC; md->pmd_pcpu_init = NULL; md->pmd_pcpu_fini = NULL; md->pmd_switch_in = generic_switch_in; md->pmd_switch_out = generic_switch_out; return (md); } static void pmc_generic_cpu_finalize(struct pmc_mdep *md) { (void) md; } static int pmc_initialize(void) { int c, cpu, error, n, ri; unsigned int maxcpu; struct pmc_binding pb; struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *sb; md = NULL; error = 0; #ifdef HWPMC_DEBUG /* parse debug flags first */ if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, sizeof(pmc_debugstr))) pmc_debugflags_parse(pmc_debugstr, pmc_debugstr+strlen(pmc_debugstr)); #endif PMCDBG1(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); /* check kernel version */ if (pmc_kernel_version != PMC_VERSION) { if (pmc_kernel_version == 0) printf("hwpmc: this kernel has not been compiled with " "'options HWPMC_HOOKS'.\n"); else printf("hwpmc: kernel version (0x%x) does not match " "module version (0x%x).\n", pmc_kernel_version, PMC_VERSION); return EPROGMISMATCH; } /* * check sysctl parameters */ if (pmc_hashsize <= 0) { (void) printf("hwpmc: tunable \"hashsize\"=%d must be " "greater than zero.\n", pmc_hashsize); pmc_hashsize = PMC_HASH_SIZE; } if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { (void) printf("hwpmc: tunable \"nsamples\"=%d out of " "range.\n", pmc_nsamples); pmc_nsamples = PMC_NSAMPLES; } if (pmc_callchaindepth <= 0 || pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " "range - using %d.\n", pmc_callchaindepth, PMC_CALLCHAIN_DEPTH_MAX); pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX; } md = pmc_md_initialize(); if (md == NULL) { /* Default to generic CPU. */ md = pmc_generic_cpu_initialize(); if (md == NULL) return (ENOSYS); } KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1, ("[pmc,%d] no classes or pmcs", __LINE__)); /* Compute the map from row-indices to classdep pointers. */ pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); for (n = 0; n < md->pmd_npmc; n++) pmc_rowindex_to_classdep[n] = NULL; for (ri = c = 0; c < md->pmd_nclass; c++) { pcd = &md->pmd_classdep[c]; for (n = 0; n < pcd->pcd_num; n++, ri++) pmc_rowindex_to_classdep[ri] = pcd; } KASSERT(ri == md->pmd_npmc, ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__, ri, md->pmd_npmc)); maxcpu = pmc_cpu_max(); /* allocate space for the per-cpu array */ pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC, M_WAITOK|M_ZERO); /* per-cpu 'saved values' for managing process-mode PMCs */ pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, M_PMC, M_WAITOK); /* Perform CPU-dependent initialization. */ pmc_save_cpu_binding(&pb); error = 0; for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; pmc_select_cpu(cpu); pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) + md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC, M_WAITOK|M_ZERO); if (md->pmd_pcpu_init) error = md->pmd_pcpu_init(md, cpu); for (n = 0; error == 0 && n < md->pmd_nclass; n++) error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu); } pmc_restore_cpu_binding(&pb); if (error) return (error); /* allocate space for the sample array */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; sb = malloc(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb; sb = malloc(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb; } /* allocate space for the row disposition array */ pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); /* mark all PMCs as available */ for (n = 0; n < (int) md->pmd_npmc; n++) PMC_MARK_ROW_FREE(n); /* allocate thread hash tables */ pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, &pmc_ownerhashmask); pmc_processhash = hashinit(pmc_hashsize, M_PMC, &pmc_processhashmask); mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", MTX_SPIN); LIST_INIT(&pmc_ss_owners); pmc_ss_count = 0; /* allocate a pool of spin mutexes */ pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, MTX_SPIN); PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, pmc_processhash, pmc_processhashmask); /* register process {exit,fork,exec} handlers */ pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); /* register kld event handlers */ pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load, NULL, EVENTHANDLER_PRI_ANY); pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload, NULL, EVENTHANDLER_PRI_ANY); /* initialize logging */ pmclog_initialize(); /* set hook functions */ pmc_intr = md->pmd_intr; pmc_hook = pmc_hook_handler; if (error == 0) { printf(PMC_MODULE_NAME ":"); for (n = 0; n < (int) md->pmd_nclass; n++) { pcd = &md->pmd_classdep[n]; printf(" %s/%d/%d/0x%b", pmc_name_of_pmcclass(pcd->pcd_class), pcd->pcd_num, pcd->pcd_width, pcd->pcd_caps, "\20" "\1INT\2USR\3SYS\4EDG\5THR" "\6REA\7WRI\10INV\11QUA\12PRC" "\13TAG\14CSC"); } printf("\n"); } return (error); } /* prepare to be unloaded */ static void pmc_cleanup(void) { int c, cpu; unsigned int maxcpu; struct pmc_ownerhash *ph; struct pmc_owner *po, *tmp; struct pmc_binding pb; #ifdef HWPMC_DEBUG struct pmc_processhash *prh; #endif PMCDBG0(MOD,INI,0, "cleanup"); /* switch off sampling */ CPU_ZERO(&pmc_cpumask); pmc_intr = NULL; sx_xlock(&pmc_sx); if (pmc_hook == NULL) { /* being unloaded already */ sx_xunlock(&pmc_sx); return; } pmc_hook = NULL; /* prevent new threads from entering module */ /* deregister event handlers */ EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag); EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag); /* send SIGBUS to all owner threads, free up allocations */ if (pmc_ownerhash) for (ph = pmc_ownerhash; ph <= &pmc_ownerhash[pmc_ownerhashmask]; ph++) { LIST_FOREACH_SAFE(po, ph, po_next, tmp) { pmc_remove_owner(po); /* send SIGBUS to owner processes */ PMCDBG3(MOD,INI,2, "cleanup signal proc=%p " "(%d, %s)", po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); PROC_LOCK(po->po_owner); kern_psignal(po->po_owner, SIGBUS); PROC_UNLOCK(po->po_owner); pmc_destroy_owner_descriptor(po); } } /* reclaim allocated data structures */ if (pmc_mtxpool) mtx_pool_destroy(&pmc_mtxpool); mtx_destroy(&pmc_processhash_mtx); if (pmc_processhash) { #ifdef HWPMC_DEBUG struct pmc_process *pp; PMCDBG0(MOD,INI,3, "destroy process hash"); for (prh = pmc_processhash; prh <= &pmc_processhash[pmc_processhashmask]; prh++) LIST_FOREACH(pp, prh, pp_next) PMCDBG1(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); #endif hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); pmc_processhash = NULL; } if (pmc_ownerhash) { PMCDBG0(MOD,INI,3, "destroy owner hash"); hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); pmc_ownerhash = NULL; } KASSERT(LIST_EMPTY(&pmc_ss_owners), ("[pmc,%d] Global SS owner list not empty", __LINE__)); KASSERT(pmc_ss_count == 0, ("[pmc,%d] Global SS count not empty", __LINE__)); /* do processor and pmc-class dependent cleanup */ maxcpu = pmc_cpu_max(); PMCDBG0(MOD,INI,3, "md cleanup"); if (md) { pmc_save_cpu_binding(&pb); for (cpu = 0; cpu < maxcpu; cpu++) { PMCDBG2(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", cpu, pmc_pcpu[cpu]); if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL) continue; pmc_select_cpu(cpu); for (c = 0; c < md->pmd_nclass; c++) md->pmd_classdep[c].pcd_pcpu_fini(md, cpu); if (md->pmd_pcpu_fini) md->pmd_pcpu_fini(md, cpu); } if (md->pmd_cputype == PMC_CPU_GENERIC) pmc_generic_cpu_finalize(md); else pmc_md_finalize(md); pmc_mdep_free(md); md = NULL; pmc_restore_cpu_binding(&pb); } /* Free per-cpu descriptors. */ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL, ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__, cpu)); KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, cpu)); free(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); free(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); free(pmc_pcpu[cpu], M_PMC); } free(pmc_pcpu, M_PMC); pmc_pcpu = NULL; free(pmc_pcpu_saved, M_PMC); pmc_pcpu_saved = NULL; if (pmc_pmcdisp) { free(pmc_pmcdisp, M_PMC); pmc_pmcdisp = NULL; } if (pmc_rowindex_to_classdep) { free(pmc_rowindex_to_classdep, M_PMC); pmc_rowindex_to_classdep = NULL; } pmclog_shutdown(); sx_xunlock(&pmc_sx); /* we are done */ } /* * The function called at load/unload. */ static int load (struct module *module __unused, int cmd, void *arg __unused) { int error; error = 0; switch (cmd) { case MOD_LOAD : /* initialize the subsystem */ error = pmc_initialize(); if (error != 0) break; PMCDBG2(MOD,INI,1, "syscall=%d maxcpu=%d", pmc_syscall_num, pmc_cpu_max()); break; case MOD_UNLOAD : case MOD_SHUTDOWN: pmc_cleanup(); PMCDBG0(MOD,INI,1, "unloaded"); break; default : error = EINVAL; /* XXX should panic(9) */ break; } return error; } /* memory pool */ MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module"); Index: projects/powernv/dev/isp/isp.c =================================================================== --- projects/powernv/dev/isp/isp.c (revision 290990) +++ projects/powernv/dev/isp/isp.c (revision 290991) @@ -1,8550 +1,8553 @@ /*- * Copyright (c) 1997-2009 by Matthew Jacob * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Machine and OS Independent (well, as best as possible) * code for the Qlogic ISP SCSI and FC-SCSI adapters. */ /* * Inspiration and ideas about this driver are from Erik Moe's Linux driver * (qlogicisp.c) and Dave Miller's SBus version of same (qlogicisp.c). Some * ideas dredged from the Solaris driver. */ /* * Include header file appropriate for platform we're building on. */ #ifdef __NetBSD__ #include __KERNEL_RCSID(0, "$NetBSD$"); #include #endif #ifdef __FreeBSD__ #include __FBSDID("$FreeBSD$"); #include #endif #ifdef __OpenBSD__ #include #endif #ifdef __linux__ #include "isp_linux.h" #endif #ifdef __svr4__ #include "isp_solaris.h" #endif /* * General defines */ #define MBOX_DELAY_COUNT 1000000 / 100 #define ISP_MARK_PORTDB(a, b, c) \ do { \ isp_prt(isp, ISP_LOG_SANCFG, \ "Chan %d ISP_MARK_PORTDB@LINE %d", (b), __LINE__); \ isp_mark_portdb((a), (b), (c)); \ } while (0) /* * Local static data */ static const char fconf[] = "Chan %d PortDB[%d] changed:\n current =(0x%x@0x%06x 0x%08x%08x 0x%08x%08x)\n database=(0x%x@0x%06x 0x%08x%08x 0x%08x%08x)"; static const char notresp[] = "Not RESPONSE in RESPONSE Queue (type 0x%x) @ idx %d (next %d) nlooked %d"; static const char topology[] = "Chan %d WWPN 0x%08x%08x PortID 0x%06x handle 0x%x, Connection '%s'"; static const char bun[] = "bad underrun (count %d, resid %d, status %s)"; static const char lipd[] = "Chan %d LIP destroyed %d active commands"; static const char sacq[] = "unable to acquire scratch area"; static const uint8_t alpa_map[] = { 0xef, 0xe8, 0xe4, 0xe2, 0xe1, 0xe0, 0xdc, 0xda, 0xd9, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc7, 0xc6, 0xc5, 0xc3, 0xbc, 0xba, 0xb9, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xae, 0xad, 0xac, 0xab, 0xaa, 0xa9, 0xa7, 0xa6, 0xa5, 0xa3, 0x9f, 0x9e, 0x9d, 0x9b, 0x98, 0x97, 0x90, 0x8f, 0x88, 0x84, 0x82, 0x81, 0x80, 0x7c, 0x7a, 0x79, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x67, 0x66, 0x65, 0x63, 0x5c, 0x5a, 0x59, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x47, 0x46, 0x45, 0x43, 0x3c, 0x3a, 0x39, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x27, 0x26, 0x25, 0x23, 0x1f, 0x1e, 0x1d, 0x1b, 0x18, 0x17, 0x10, 0x0f, 0x08, 0x04, 0x02, 0x01, 0x00 }; /* * Local function prototypes. */ static int isp_parse_async(ispsoftc_t *, uint16_t); static int isp_parse_async_fc(ispsoftc_t *, uint16_t); static int isp_handle_other_response(ispsoftc_t *, int, isphdr_t *, uint32_t *); static void isp_parse_status(ispsoftc_t *, ispstatusreq_t *, XS_T *, long *); static void isp_parse_status_24xx(ispsoftc_t *, isp24xx_statusreq_t *, XS_T *, long *); static void isp_fastpost_complete(ispsoftc_t *, uint32_t); static int isp_mbox_continue(ispsoftc_t *); static void isp_scsi_init(ispsoftc_t *); static void isp_scsi_channel_init(ispsoftc_t *, int); static void isp_fibre_init(ispsoftc_t *); static void isp_fibre_init_2400(ispsoftc_t *); static void isp_mark_portdb(ispsoftc_t *, int, int); static int isp_plogx(ispsoftc_t *, int, uint16_t, uint32_t, int, int); static int isp_port_login(ispsoftc_t *, uint16_t, uint32_t); static int isp_port_logout(ispsoftc_t *, uint16_t, uint32_t); static int isp_getpdb(ispsoftc_t *, int, uint16_t, isp_pdb_t *, int); static int isp_gethandles(ispsoftc_t *, int, uint16_t *, int *, int, int); static void isp_dump_chip_portdb(ispsoftc_t *, int, int); static uint64_t isp_get_wwn(ispsoftc_t *, int, int, int); static int isp_fclink_test(ispsoftc_t *, int, int); static int isp_pdb_sync(ispsoftc_t *, int); static int isp_scan_loop(ispsoftc_t *, int); static int isp_gid_ft_sns(ispsoftc_t *, int); static int isp_gid_ft_ct_passthru(ispsoftc_t *, int); static int isp_scan_fabric(ispsoftc_t *, int); static int isp_login_device(ispsoftc_t *, int, uint32_t, isp_pdb_t *, uint16_t *); static int isp_register_fc4_type(ispsoftc_t *, int); static int isp_register_fc4_type_24xx(ispsoftc_t *, int); static uint16_t isp_next_handle(ispsoftc_t *, uint16_t *); static void isp_fw_state(ispsoftc_t *, int); static void isp_mboxcmd_qnw(ispsoftc_t *, mbreg_t *, int); static void isp_mboxcmd(ispsoftc_t *, mbreg_t *); static void isp_spi_update(ispsoftc_t *, int); static void isp_setdfltsdparm(ispsoftc_t *); static void isp_setdfltfcparm(ispsoftc_t *, int); static int isp_read_nvram(ispsoftc_t *, int); static int isp_read_nvram_2400(ispsoftc_t *, uint8_t *); static void isp_rdnvram_word(ispsoftc_t *, int, uint16_t *); static void isp_rd_2400_nvram(ispsoftc_t *, uint32_t, uint32_t *); static void isp_parse_nvram_1020(ispsoftc_t *, uint8_t *); static void isp_parse_nvram_1080(ispsoftc_t *, int, uint8_t *); static void isp_parse_nvram_12160(ispsoftc_t *, int, uint8_t *); static void isp_parse_nvram_2100(ispsoftc_t *, uint8_t *); static void isp_parse_nvram_2400(ispsoftc_t *, uint8_t *); /* * Reset Hardware. * * Hit the chip over the head, download new f/w if available and set it running. * * Locking done elsewhere. */ void isp_reset(ispsoftc_t *isp, int do_load_defaults) { mbreg_t mbs; char *buf; uint64_t fwt; uint32_t code_org, val; int loops, i, dodnld = 1; const char *btype = "????"; static const char dcrc[] = "Downloaded RISC Code Checksum Failure"; isp->isp_state = ISP_NILSTATE; if (isp->isp_dead) { isp_shutdown(isp); ISP_DISABLE_INTS(isp); return; } /* * Basic types (SCSI, FibreChannel and PCI or SBus) * have been set in the MD code. We figure out more * here. Possibly more refined types based upon PCI * identification. Chip revision has been gathered. * * After we've fired this chip up, zero out the conf1 register * for SCSI adapters and do other settings for the 2100. */ ISP_DISABLE_INTS(isp); /* * Pick an initial maxcmds value which will be used * to allocate xflist pointer space. It may be changed * later by the firmware. */ if (IS_24XX(isp)) { isp->isp_maxcmds = 4096; } else if (IS_2322(isp)) { isp->isp_maxcmds = 2048; } else if (IS_23XX(isp) || IS_2200(isp)) { isp->isp_maxcmds = 1024; } else { isp->isp_maxcmds = 512; } /* * Set up DMA for the request and response queues. * * We do this now so we can use the request queue * for dma to load firmware from. */ if (ISP_MBOXDMASETUP(isp) != 0) { isp_prt(isp, ISP_LOGERR, "Cannot setup DMA"); return; } /* * Set up default request/response queue in-pointer/out-pointer * register indices. */ if (IS_24XX(isp)) { isp->isp_rqstinrp = BIU2400_REQINP; isp->isp_rqstoutrp = BIU2400_REQOUTP; isp->isp_respinrp = BIU2400_RSPINP; isp->isp_respoutrp = BIU2400_RSPOUTP; } else if (IS_23XX(isp)) { isp->isp_rqstinrp = BIU_REQINP; isp->isp_rqstoutrp = BIU_REQOUTP; isp->isp_respinrp = BIU_RSPINP; isp->isp_respoutrp = BIU_RSPOUTP; } else { isp->isp_rqstinrp = INMAILBOX4; isp->isp_rqstoutrp = OUTMAILBOX4; isp->isp_respinrp = OUTMAILBOX5; isp->isp_respoutrp = INMAILBOX5; } /* * Put the board into PAUSE mode (so we can read the SXP registers * or write FPM/FBM registers). */ if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_HOST_INT); ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT); ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_PAUSE); } else { ISP_WRITE(isp, HCCR, HCCR_CMD_PAUSE); } if (IS_FC(isp)) { switch (isp->isp_type) { case ISP_HA_FC_2100: btype = "2100"; break; case ISP_HA_FC_2200: btype = "2200"; break; case ISP_HA_FC_2300: btype = "2300"; break; case ISP_HA_FC_2312: btype = "2312"; break; case ISP_HA_FC_2322: btype = "2322"; break; case ISP_HA_FC_2400: btype = "2422"; break; case ISP_HA_FC_2500: btype = "2532"; break; default: break; } if (!IS_24XX(isp)) { /* * While we're paused, reset the FPM module and FBM * fifos. */ ISP_WRITE(isp, BIU2100_CSR, BIU2100_FPM0_REGS); ISP_WRITE(isp, FPM_DIAG_CONFIG, FPM_SOFT_RESET); ISP_WRITE(isp, BIU2100_CSR, BIU2100_FB_REGS); ISP_WRITE(isp, FBM_CMD, FBMCMD_FIFO_RESET_ALL); ISP_WRITE(isp, BIU2100_CSR, BIU2100_RISC_REGS); } } else if (IS_1240(isp)) { sdparam *sdp; btype = "1240"; isp->isp_clock = 60; sdp = SDPARAM(isp, 0); sdp->isp_ultramode = 1; sdp = SDPARAM(isp, 1); sdp->isp_ultramode = 1; /* * XXX: Should probably do some bus sensing. */ } else if (IS_ULTRA3(isp)) { sdparam *sdp = isp->isp_param; isp->isp_clock = 100; if (IS_10160(isp)) btype = "10160"; else if (IS_12160(isp)) btype = "12160"; else btype = ""; sdp->isp_lvdmode = 1; if (IS_DUALBUS(isp)) { sdp++; sdp->isp_lvdmode = 1; } } else if (IS_ULTRA2(isp)) { static const char m[] = "bus %d is in %s Mode"; uint16_t l; sdparam *sdp = SDPARAM(isp, 0); isp->isp_clock = 100; if (IS_1280(isp)) btype = "1280"; else if (IS_1080(isp)) btype = "1080"; else btype = ""; l = ISP_READ(isp, SXP_PINS_DIFF) & ISP1080_MODE_MASK; switch (l) { case ISP1080_LVD_MODE: sdp->isp_lvdmode = 1; isp_prt(isp, ISP_LOGCONFIG, m, 0, "LVD"); break; case ISP1080_HVD_MODE: sdp->isp_diffmode = 1; isp_prt(isp, ISP_LOGCONFIG, m, 0, "Differential"); break; case ISP1080_SE_MODE: sdp->isp_ultramode = 1; isp_prt(isp, ISP_LOGCONFIG, m, 0, "Single-Ended"); break; default: isp_prt(isp, ISP_LOGERR, "unknown mode on bus %d (0x%x)", 0, l); break; } if (IS_DUALBUS(isp)) { sdp = SDPARAM(isp, 1); l = ISP_READ(isp, SXP_PINS_DIFF|SXP_BANK1_SELECT); l &= ISP1080_MODE_MASK; switch (l) { case ISP1080_LVD_MODE: sdp->isp_lvdmode = 1; isp_prt(isp, ISP_LOGCONFIG, m, 1, "LVD"); break; case ISP1080_HVD_MODE: sdp->isp_diffmode = 1; isp_prt(isp, ISP_LOGCONFIG, m, 1, "Differential"); break; case ISP1080_SE_MODE: sdp->isp_ultramode = 1; isp_prt(isp, ISP_LOGCONFIG, m, 1, "Single-Ended"); break; default: isp_prt(isp, ISP_LOGERR, "unknown mode on bus %d (0x%x)", 1, l); break; } } } else { sdparam *sdp = SDPARAM(isp, 0); i = ISP_READ(isp, BIU_CONF0) & BIU_CONF0_HW_MASK; switch (i) { default: isp_prt(isp, ISP_LOGALL, "Unknown Chip Type 0x%x", i); /* FALLTHROUGH */ case 1: btype = "1020"; isp->isp_type = ISP_HA_SCSI_1020; isp->isp_clock = 40; break; case 2: /* * Some 1020A chips are Ultra Capable, but don't * run the clock rate up for that unless told to * do so by the Ultra Capable bits being set. */ btype = "1020A"; isp->isp_type = ISP_HA_SCSI_1020A; isp->isp_clock = 40; break; case 3: btype = "1040"; isp->isp_type = ISP_HA_SCSI_1040; isp->isp_clock = 60; break; case 4: btype = "1040A"; isp->isp_type = ISP_HA_SCSI_1040A; isp->isp_clock = 60; break; case 5: btype = "1040B"; isp->isp_type = ISP_HA_SCSI_1040B; isp->isp_clock = 60; break; case 6: btype = "1040C"; isp->isp_type = ISP_HA_SCSI_1040C; isp->isp_clock = 60; break; } /* * Now, while we're at it, gather info about ultra * and/or differential mode. */ if (ISP_READ(isp, SXP_PINS_DIFF) & SXP_PINS_DIFF_MODE) { isp_prt(isp, ISP_LOGCONFIG, "Differential Mode"); sdp->isp_diffmode = 1; } else { sdp->isp_diffmode = 0; } i = ISP_READ(isp, RISC_PSR); if (isp->isp_bustype == ISP_BT_SBUS) { i &= RISC_PSR_SBUS_ULTRA; } else { i &= RISC_PSR_PCI_ULTRA; } if (i != 0) { isp_prt(isp, ISP_LOGCONFIG, "Ultra Mode Capable"); sdp->isp_ultramode = 1; /* * If we're in Ultra Mode, we have to be 60MHz clock- * even for the SBus version. */ isp->isp_clock = 60; } else { sdp->isp_ultramode = 0; /* * Clock is known. Gronk. */ } /* * Machine dependent clock (if set) overrides * our generic determinations. */ if (isp->isp_mdvec->dv_clock) { if (isp->isp_mdvec->dv_clock < isp->isp_clock) { isp->isp_clock = isp->isp_mdvec->dv_clock; } } } /* * Clear instrumentation */ isp->isp_intcnt = isp->isp_intbogus = 0; /* * Do MD specific pre initialization */ ISP_RESET0(isp); /* * Hit the chip over the head with hammer, * and give it a chance to recover. */ if (IS_SCSI(isp)) { ISP_WRITE(isp, BIU_ICR, BIU_ICR_SOFT_RESET); /* * A slight delay... */ ISP_DELAY(100); /* * Clear data && control DMA engines. */ ISP_WRITE(isp, CDMA_CONTROL, DMA_CNTRL_CLEAR_CHAN | DMA_CNTRL_RESET_INT); ISP_WRITE(isp, DDMA_CONTROL, DMA_CNTRL_CLEAR_CHAN | DMA_CNTRL_RESET_INT); } else if (IS_24XX(isp)) { /* * Stop DMA and wait for it to stop. */ ISP_WRITE(isp, BIU2400_CSR, BIU2400_DMA_STOP|(3 << 4)); for (val = loops = 0; loops < 30000; loops++) { ISP_DELAY(10); val = ISP_READ(isp, BIU2400_CSR); if ((val & BIU2400_DMA_ACTIVE) == 0) { break; } } if (val & BIU2400_DMA_ACTIVE) { ISP_RESET0(isp); isp_prt(isp, ISP_LOGERR, "DMA Failed to Stop on Reset"); return; } /* * Hold it in SOFT_RESET and STOP state for 100us. */ ISP_WRITE(isp, BIU2400_CSR, BIU2400_SOFT_RESET|BIU2400_DMA_STOP|(3 << 4)); ISP_DELAY(100); for (loops = 0; loops < 10000; loops++) { ISP_DELAY(5); val = ISP_READ(isp, OUTMAILBOX0); } for (val = loops = 0; loops < 500000; loops ++) { val = ISP_READ(isp, BIU2400_CSR); if ((val & BIU2400_SOFT_RESET) == 0) { break; } } if (val & BIU2400_SOFT_RESET) { ISP_RESET0(isp); isp_prt(isp, ISP_LOGERR, "Failed to come out of reset"); return; } } else { ISP_WRITE(isp, BIU2100_CSR, BIU2100_SOFT_RESET); /* * A slight delay... */ ISP_DELAY(100); /* * Clear data && control DMA engines. */ ISP_WRITE(isp, CDMA2100_CONTROL, DMA_CNTRL2100_CLEAR_CHAN | DMA_CNTRL2100_RESET_INT); ISP_WRITE(isp, TDMA2100_CONTROL, DMA_CNTRL2100_CLEAR_CHAN | DMA_CNTRL2100_RESET_INT); ISP_WRITE(isp, RDMA2100_CONTROL, DMA_CNTRL2100_CLEAR_CHAN | DMA_CNTRL2100_RESET_INT); } /* * Wait for ISP to be ready to go... */ loops = MBOX_DELAY_COUNT; for (;;) { if (IS_SCSI(isp)) { if (!(ISP_READ(isp, BIU_ICR) & BIU_ICR_SOFT_RESET)) { break; } } else if (IS_24XX(isp)) { if (ISP_READ(isp, OUTMAILBOX0) == 0) { break; } } else { if (!(ISP_READ(isp, BIU2100_CSR) & BIU2100_SOFT_RESET)) break; } ISP_DELAY(100); if (--loops < 0) { ISP_DUMPREGS(isp, "chip reset timed out"); ISP_RESET0(isp); return; } } /* * After we've fired this chip up, zero out the conf1 register * for SCSI adapters and other settings for the 2100. */ if (IS_SCSI(isp)) { ISP_WRITE(isp, BIU_CONF1, 0); } else if (!IS_24XX(isp)) { ISP_WRITE(isp, BIU2100_CSR, 0); } /* * Reset RISC Processor */ if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_RESET); ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_RELEASE); ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RESET); } else { ISP_WRITE(isp, HCCR, HCCR_CMD_RESET); ISP_DELAY(100); ISP_WRITE(isp, BIU_SEMA, 0); } /* * Post-RISC Reset stuff. */ if (IS_24XX(isp)) { for (val = loops = 0; loops < 5000000; loops++) { ISP_DELAY(5); val = ISP_READ(isp, OUTMAILBOX0); if (val == 0) { break; } } if (val != 0) { ISP_RESET0(isp); isp_prt(isp, ISP_LOGERR, "reset didn't clear"); return; } } else if (IS_SCSI(isp)) { uint16_t tmp = isp->isp_mdvec->dv_conf1; /* * Busted FIFO. Turn off all but burst enables. */ if (isp->isp_type == ISP_HA_SCSI_1040A) { tmp &= BIU_BURST_ENABLE; } ISP_SETBITS(isp, BIU_CONF1, tmp); if (tmp & BIU_BURST_ENABLE) { ISP_SETBITS(isp, CDMA_CONF, DMA_ENABLE_BURST); ISP_SETBITS(isp, DDMA_CONF, DMA_ENABLE_BURST); } if (SDPARAM(isp, 0)->isp_ptisp) { if (SDPARAM(isp, 0)->isp_ultramode) { while (ISP_READ(isp, RISC_MTR) != 0x1313) { ISP_WRITE(isp, RISC_MTR, 0x1313); ISP_WRITE(isp, HCCR, HCCR_CMD_STEP); } } else { ISP_WRITE(isp, RISC_MTR, 0x1212); } /* * PTI specific register */ ISP_WRITE(isp, RISC_EMB, DUAL_BANK); } else { ISP_WRITE(isp, RISC_MTR, 0x1212); } ISP_WRITE(isp, HCCR, HCCR_CMD_RELEASE); } else { ISP_WRITE(isp, RISC_MTR2100, 0x1212); if (IS_2200(isp) || IS_23XX(isp)) { ISP_WRITE(isp, HCCR, HCCR_2X00_DISABLE_PARITY_PAUSE); } ISP_WRITE(isp, HCCR, HCCR_CMD_RELEASE); } ISP_WRITE(isp, isp->isp_rqstinrp, 0); ISP_WRITE(isp, isp->isp_rqstoutrp, 0); ISP_WRITE(isp, isp->isp_respinrp, 0); ISP_WRITE(isp, isp->isp_respoutrp, 0); if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_PRI_REQINP, 0); ISP_WRITE(isp, BIU2400_PRI_REQOUTP, 0); ISP_WRITE(isp, BIU2400_ATIO_RSPINP, 0); ISP_WRITE(isp, BIU2400_ATIO_RSPOUTP, 0); } /* * Do MD specific post initialization */ ISP_RESET1(isp); /* * Wait for everything to finish firing up. * * Avoid doing this on early 2312s because you can generate a PCI * parity error (chip breakage). */ if (IS_2312(isp) && isp->isp_revision < 2) { ISP_DELAY(100); } else { loops = MBOX_DELAY_COUNT; while (ISP_READ(isp, OUTMAILBOX0) == MBOX_BUSY) { ISP_DELAY(100); if (--loops < 0) { ISP_RESET0(isp); isp_prt(isp, ISP_LOGERR, "MBOX_BUSY never cleared on reset"); return; } } } /* * Up until this point we've done everything by just reading or * setting registers. From this point on we rely on at least *some* * kind of firmware running in the card. */ /* * Do some sanity checking by running a NOP command. * If it succeeds, the ROM firmware is now running. */ MBSINIT(&mbs, MBOX_NO_OP, MBLOGALL, 0); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, "NOP command failed (%x)", mbs.param[0]); ISP_RESET0(isp); return; } /* * Do some operational tests */ if (IS_SCSI(isp) || IS_24XX(isp)) { static const uint16_t patterns[MAX_MAILBOX] = { 0x0000, 0xdead, 0xbeef, 0xffff, 0xa5a5, 0x5a5a, 0x7f7f, 0x7ff7, 0x3421, 0xabcd, 0xdcba, 0xfeef, 0xbead, 0xdebe, 0x2222, 0x3333, 0x5555, 0x6666, 0x7777, 0xaaaa, 0xffff, 0xdddd, 0x9999, 0x1fbc, 0x6666, 0x6677, 0x1122, 0x33ff, 0x0000, 0x0001, 0x1000, 0x1010, }; int nmbox = ISP_NMBOX(isp); if (IS_SCSI(isp)) nmbox = 6; MBSINIT(&mbs, MBOX_MAILBOX_REG_TEST, MBLOGALL, 0); for (i = 1; i < nmbox; i++) { mbs.param[i] = patterns[i]; } isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { ISP_RESET0(isp); return; } for (i = 1; i < nmbox; i++) { if (mbs.param[i] != patterns[i]) { ISP_RESET0(isp); isp_prt(isp, ISP_LOGERR, "Register Test Failed at Register %d: should have 0x%04x but got 0x%04x", i, patterns[i], mbs.param[i]); return; } } } /* * Download new Firmware, unless requested not to do so. * This is made slightly trickier in some cases where the * firmware of the ROM revision is newer than the revision * compiled into the driver. So, where we used to compare * versions of our f/w and the ROM f/w, now we just see * whether we have f/w at all and whether a config flag * has disabled our download. */ if ((isp->isp_mdvec->dv_ispfw == NULL) || (isp->isp_confopts & ISP_CFG_NORELOAD)) { dodnld = 0; } if (IS_24XX(isp)) { code_org = ISP_CODE_ORG_2400; } else if (IS_23XX(isp)) { code_org = ISP_CODE_ORG_2300; } else { code_org = ISP_CODE_ORG; } if (dodnld && IS_24XX(isp)) { const uint32_t *ptr = isp->isp_mdvec->dv_ispfw; int wordload; /* * Keep loading until we run out of f/w. */ code_org = ptr[2]; /* 1st load address is our start addr */ wordload = 0; for (;;) { uint32_t la, wi, wl; isp_prt(isp, ISP_LOGDEBUG0, "load 0x%x words of code at load address 0x%x", ptr[3], ptr[2]); wi = 0; la = ptr[2]; wl = ptr[3]; while (wi < ptr[3]) { uint32_t *cp; uint32_t nw; nw = ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)) >> 2; if (nw > wl) { nw = wl; } cp = isp->isp_rquest; for (i = 0; i < nw; i++) { ISP_IOXPUT_32(isp, ptr[wi++], &cp[i]); wl--; } MEMORYBARRIER(isp, SYNC_REQUEST, 0, ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)), -1); again: MBSINIT(&mbs, 0, MBLOGALL, 0); if (la < 0x10000 && nw < 0x10000) { mbs.param[0] = MBOX_LOAD_RISC_RAM_2100; mbs.param[1] = la; mbs.param[2] = DMA_WD1(isp->isp_rquest_dma); mbs.param[3] = DMA_WD0(isp->isp_rquest_dma); mbs.param[4] = nw; mbs.param[6] = DMA_WD3(isp->isp_rquest_dma); mbs.param[7] = DMA_WD2(isp->isp_rquest_dma); isp_prt(isp, ISP_LOGDEBUG0, "LOAD RISC RAM 2100 %u words at load address 0x%x", nw, la); } else if (wordload) { union { const uint32_t *cp; uint32_t *np; } ucd; ucd.cp = (const uint32_t *)cp; mbs.param[0] = MBOX_WRITE_RAM_WORD_EXTENDED; mbs.param[1] = la; mbs.param[2] = (*ucd.np); mbs.param[3] = (*ucd.np) >> 16; mbs.param[8] = la >> 16; isp->isp_mbxwrk0 = nw - 1; isp->isp_mbxworkp = ucd.np+1; isp->isp_mbxwrk1 = (la + 1); isp->isp_mbxwrk8 = (la + 1) >> 16; isp_prt(isp, ISP_LOGDEBUG0, "WRITE RAM WORD EXTENDED %u words at load address 0x%x", nw, la); } else { mbs.param[0] = MBOX_LOAD_RISC_RAM; mbs.param[1] = la; mbs.param[2] = DMA_WD1(isp->isp_rquest_dma); mbs.param[3] = DMA_WD0(isp->isp_rquest_dma); mbs.param[4] = nw >> 16; mbs.param[5] = nw; mbs.param[6] = DMA_WD3(isp->isp_rquest_dma); mbs.param[7] = DMA_WD2(isp->isp_rquest_dma); mbs.param[8] = la >> 16; isp_prt(isp, ISP_LOGDEBUG0, "LOAD RISC RAM %u words at load address 0x%x", nw, la); } isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { if (mbs.param[0] == MBOX_HOST_INTERFACE_ERROR) { isp_prt(isp, ISP_LOGERR, "switching to word load"); wordload = 1; goto again; } isp_prt(isp, ISP_LOGERR, "F/W Risc Ram Load Failed"); ISP_RESET0(isp); return; } la += nw; } if (ptr[1] == 0) { break; } ptr += ptr[3]; } isp->isp_loaded_fw = 1; } else if (dodnld && IS_23XX(isp)) { const uint16_t *ptr = isp->isp_mdvec->dv_ispfw; uint16_t wi, wl, segno; uint32_t la; la = code_org; segno = 0; for (;;) { uint32_t nxtaddr; isp_prt(isp, ISP_LOGDEBUG0, "load 0x%x words of code at load address 0x%x", ptr[3], la); wi = 0; wl = ptr[3]; while (wi < ptr[3]) { uint16_t *cp; uint16_t nw; nw = ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)) >> 1; if (nw > wl) { nw = wl; } if (nw > (1 << 15)) { nw = 1 << 15; } cp = isp->isp_rquest; for (i = 0; i < nw; i++) { ISP_IOXPUT_16(isp, ptr[wi++], &cp[i]); wl--; } MEMORYBARRIER(isp, SYNC_REQUEST, 0, ISP_QUEUE_SIZE(RQUEST_QUEUE_LEN(isp)), -1); MBSINIT(&mbs, 0, MBLOGALL, 0); if (la < 0x10000) { mbs.param[0] = MBOX_LOAD_RISC_RAM_2100; mbs.param[1] = la; mbs.param[2] = DMA_WD1(isp->isp_rquest_dma); mbs.param[3] = DMA_WD0(isp->isp_rquest_dma); mbs.param[4] = nw; mbs.param[6] = DMA_WD3(isp->isp_rquest_dma); mbs.param[7] = DMA_WD2(isp->isp_rquest_dma); isp_prt(isp, ISP_LOGDEBUG1, "LOAD RISC RAM 2100 %u words at load address 0x%x\n", nw, la); } else { mbs.param[0] = MBOX_LOAD_RISC_RAM; mbs.param[1] = la; mbs.param[2] = DMA_WD1(isp->isp_rquest_dma); mbs.param[3] = DMA_WD0(isp->isp_rquest_dma); mbs.param[4] = nw; mbs.param[6] = DMA_WD3(isp->isp_rquest_dma); mbs.param[7] = DMA_WD2(isp->isp_rquest_dma); mbs.param[8] = la >> 16; isp_prt(isp, ISP_LOGDEBUG1, "LOAD RISC RAM %u words at load address 0x%x\n", nw, la); } isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, "F/W Risc Ram Load Failed"); ISP_RESET0(isp); return; } la += nw; } if (!IS_2322(isp)) { break; } if (++segno == 3) { break; } /* * If we're a 2322, the firmware actually comes in * three chunks. We loaded the first at the code_org * address. The other two chunks, which follow right * after each other in memory here, get loaded at * addresses specfied at offset 0x9..0xB. */ nxtaddr = ptr[3]; ptr = &ptr[nxtaddr]; la = ptr[5] | ((ptr[4] & 0x3f) << 16); } isp->isp_loaded_fw = 1; } else if (dodnld) { union { const uint16_t *cp; uint16_t *np; } ucd; ucd.cp = isp->isp_mdvec->dv_ispfw; isp->isp_mbxworkp = &ucd.np[1]; isp->isp_mbxwrk0 = ucd.np[3] - 1; isp->isp_mbxwrk1 = code_org + 1; MBSINIT(&mbs, MBOX_WRITE_RAM_WORD, MBLOGNONE, 0); mbs.param[1] = code_org; mbs.param[2] = ucd.np[0]; isp_prt(isp, ISP_LOGDEBUG1, "WRITE RAM %u words at load address 0x%x", ucd.np[3], code_org); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, "F/W download failed at word %d", isp->isp_mbxwrk1 - code_org); ISP_RESET0(isp); return; } } else { isp->isp_loaded_fw = 0; isp_prt(isp, ISP_LOGDEBUG2, "skipping f/w download"); } /* * If we loaded firmware, verify its checksum */ if (isp->isp_loaded_fw) { MBSINIT(&mbs, MBOX_VERIFY_CHECKSUM, MBLOGNONE, 0); mbs.param[0] = MBOX_VERIFY_CHECKSUM; if (IS_24XX(isp)) { mbs.param[1] = code_org >> 16; mbs.param[2] = code_org; } else { mbs.param[1] = code_org; } isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, dcrc); ISP_RESET0(isp); return; } } /* * Now start it rolling. * * If we didn't actually download f/w, * we still need to (re)start it. */ MBSINIT(&mbs, MBOX_EXEC_FIRMWARE, MBLOGALL, 5000000); if (IS_24XX(isp)) { mbs.param[1] = code_org >> 16; mbs.param[2] = code_org; if (isp->isp_loaded_fw) { mbs.param[3] = 0; } else { mbs.param[3] = 1; } if (IS_25XX(isp)) { mbs.ibits |= 0x10; } } else if (IS_2322(isp)) { mbs.param[1] = code_org; if (isp->isp_loaded_fw) { mbs.param[2] = 0; } else { mbs.param[2] = 1; } } else { mbs.param[1] = code_org; } isp_mboxcmd(isp, &mbs); if (IS_2322(isp) || IS_24XX(isp)) { if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { ISP_RESET0(isp); return; } } if (IS_SCSI(isp)) { /* * Set CLOCK RATE, but only if asked to. */ if (isp->isp_clock) { MBSINIT(&mbs, MBOX_SET_CLOCK_RATE, MBLOGALL, 0); mbs.param[1] = isp->isp_clock; isp_mboxcmd(isp, &mbs); /* we will try not to care if this fails */ } } /* * Ask the chip for the current firmware version. * This should prove that the new firmware is working. */ MBSINIT(&mbs, MBOX_ABOUT_FIRMWARE, MBLOGALL, 0); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { ISP_RESET0(isp); return; } /* * The SBus firmware that we are using apparently does not return * major, minor, micro revisions in the mailbox registers, which * is really, really, annoying. */ if (ISP_SBUS_SUPPORTED && isp->isp_bustype == ISP_BT_SBUS) { if (dodnld) { #ifdef ISP_TARGET_MODE isp->isp_fwrev[0] = 7; isp->isp_fwrev[1] = 55; #else isp->isp_fwrev[0] = 1; isp->isp_fwrev[1] = 37; #endif isp->isp_fwrev[2] = 0; } } else { isp->isp_fwrev[0] = mbs.param[1]; isp->isp_fwrev[1] = mbs.param[2]; isp->isp_fwrev[2] = mbs.param[3]; } if (IS_FC(isp)) { /* * We do not believe firmware attributes for 2100 code less * than 1.17.0, unless it's the firmware we specifically * are loading. * * Note that all 22XX and later f/w is greater than 1.X.0. */ if ((ISP_FW_OLDER_THAN(isp, 1, 17, 1))) { #ifdef USE_SMALLER_2100_FIRMWARE isp->isp_fwattr = ISP_FW_ATTR_SCCLUN; #else isp->isp_fwattr = 0; #endif } else { isp->isp_fwattr = mbs.param[6]; } if (IS_24XX(isp)) { isp->isp_fwattr |= ((uint64_t) mbs.param[15]) << 16; if (isp->isp_fwattr & ISP2400_FW_ATTR_EXTNDED) { isp->isp_fwattr |= (((uint64_t) mbs.param[16]) << 32) | (((uint64_t) mbs.param[17]) << 48); } } } else if (IS_SCSI(isp)) { #ifndef ISP_TARGET_MODE isp->isp_fwattr = ISP_FW_ATTR_TMODE; #else isp->isp_fwattr = 0; #endif } isp_prt(isp, ISP_LOGCONFIG, "Board Type %s, Chip Revision 0x%x, %s F/W Revision %d.%d.%d", btype, isp->isp_revision, dodnld? "loaded" : "resident", isp->isp_fwrev[0], isp->isp_fwrev[1], isp->isp_fwrev[2]); fwt = isp->isp_fwattr; if (IS_24XX(isp)) { buf = FCPARAM(isp, 0)->isp_scratch; ISP_SNPRINTF(buf, ISP_FC_SCRLEN, "Attributes:"); if (fwt & ISP2400_FW_ATTR_CLASS2) { fwt ^=ISP2400_FW_ATTR_CLASS2; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s Class2", buf); } if (fwt & ISP2400_FW_ATTR_IP) { fwt ^=ISP2400_FW_ATTR_IP; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s IP", buf); } if (fwt & ISP2400_FW_ATTR_MULTIID) { fwt ^=ISP2400_FW_ATTR_MULTIID; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s MultiID", buf); } if (fwt & ISP2400_FW_ATTR_SB2) { fwt ^=ISP2400_FW_ATTR_SB2; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s SB2", buf); } if (fwt & ISP2400_FW_ATTR_T10CRC) { fwt ^=ISP2400_FW_ATTR_T10CRC; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s T10CRC", buf); } if (fwt & ISP2400_FW_ATTR_VI) { fwt ^=ISP2400_FW_ATTR_VI; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VI", buf); } if (fwt & ISP2400_FW_ATTR_MQ) { fwt ^=ISP2400_FW_ATTR_MQ; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s MQ", buf); } if (fwt & ISP2400_FW_ATTR_MSIX) { fwt ^=ISP2400_FW_ATTR_MSIX; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s MSIX", buf); } if (fwt & ISP2400_FW_ATTR_FCOE) { fwt ^=ISP2400_FW_ATTR_FCOE; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s FCOE", buf); } if (fwt & ISP2400_FW_ATTR_VP0) { fwt ^= ISP2400_FW_ATTR_VP0; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VP0_Decoupling", buf); } if (fwt & ISP2400_FW_ATTR_EXPFW) { fwt ^= ISP2400_FW_ATTR_EXPFW; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s (Experimental)", buf); } if (fwt & ISP2400_FW_ATTR_HOTFW) { fwt ^= ISP2400_FW_ATTR_HOTFW; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s HotFW", buf); } fwt &= ~ISP2400_FW_ATTR_EXTNDED; if (fwt & ISP2400_FW_ATTR_EXTVP) { fwt ^= ISP2400_FW_ATTR_EXTVP; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s ExtVP", buf); } if (fwt & ISP2400_FW_ATTR_VN2VN) { fwt ^= ISP2400_FW_ATTR_VN2VN; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VN2VN", buf); } if (fwt & ISP2400_FW_ATTR_EXMOFF) { fwt ^= ISP2400_FW_ATTR_EXMOFF; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s EXMOFF", buf); } if (fwt & ISP2400_FW_ATTR_NPMOFF) { fwt ^= ISP2400_FW_ATTR_NPMOFF; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s NPMOFF", buf); } if (fwt & ISP2400_FW_ATTR_DIFCHOP) { fwt ^= ISP2400_FW_ATTR_DIFCHOP; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s DIFCHOP", buf); } if (fwt & ISP2400_FW_ATTR_SRIOV) { fwt ^= ISP2400_FW_ATTR_SRIOV; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s SRIOV", buf); } if (fwt & ISP2400_FW_ATTR_ASICTMP) { fwt ^= ISP2400_FW_ATTR_ASICTMP; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s ASICTMP", buf); } if (fwt & ISP2400_FW_ATTR_ATIOMQ) { fwt ^= ISP2400_FW_ATTR_ATIOMQ; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s ATIOMQ", buf); } if (fwt) { ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s (unknown 0x%08x%08x)", buf, (uint32_t) (fwt >> 32), (uint32_t) fwt); } isp_prt(isp, ISP_LOGCONFIG, "%s", buf); } else if (IS_FC(isp)) { buf = FCPARAM(isp, 0)->isp_scratch; ISP_SNPRINTF(buf, ISP_FC_SCRLEN, "Attributes:"); if (fwt & ISP_FW_ATTR_TMODE) { fwt ^=ISP_FW_ATTR_TMODE; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s TargetMode", buf); } if (fwt & ISP_FW_ATTR_SCCLUN) { fwt ^=ISP_FW_ATTR_SCCLUN; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s SCC-Lun", buf); } if (fwt & ISP_FW_ATTR_FABRIC) { fwt ^=ISP_FW_ATTR_FABRIC; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s Fabric", buf); } if (fwt & ISP_FW_ATTR_CLASS2) { fwt ^=ISP_FW_ATTR_CLASS2; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s Class2", buf); } if (fwt & ISP_FW_ATTR_FCTAPE) { fwt ^=ISP_FW_ATTR_FCTAPE; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s FC-Tape", buf); } if (fwt & ISP_FW_ATTR_IP) { fwt ^=ISP_FW_ATTR_IP; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s IP", buf); } if (fwt & ISP_FW_ATTR_VI) { fwt ^=ISP_FW_ATTR_VI; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VI", buf); } if (fwt & ISP_FW_ATTR_VI_SOLARIS) { fwt ^=ISP_FW_ATTR_VI_SOLARIS; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s VI_SOLARIS", buf); } if (fwt & ISP_FW_ATTR_2KLOGINS) { fwt ^=ISP_FW_ATTR_2KLOGINS; ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s 2K-Login", buf); } if (fwt != 0) { ISP_SNPRINTF(buf, ISP_FC_SCRLEN - strlen(buf), "%s (unknown 0x%08x%08x)", buf, (uint32_t) (fwt >> 32), (uint32_t) fwt); } isp_prt(isp, ISP_LOGCONFIG, "%s", buf); } if (IS_24XX(isp)) { MBSINIT(&mbs, MBOX_GET_RESOURCE_COUNT, MBLOGALL, 0); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { ISP_RESET0(isp); return; } if (isp->isp_maxcmds >= mbs.param[3]) { isp->isp_maxcmds = mbs.param[3]; } } else { MBSINIT(&mbs, MBOX_GET_FIRMWARE_STATUS, MBLOGALL, 0); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { ISP_RESET0(isp); return; } if (isp->isp_maxcmds >= mbs.param[2]) { isp->isp_maxcmds = mbs.param[2]; } } isp_prt(isp, ISP_LOGCONFIG, "%d max I/O command limit set", isp->isp_maxcmds); /* * If we don't have Multi-ID f/w loaded, we need to restrict channels to one. * Only make this check for non-SCSI cards (I'm not sure firmware attributes * work for them). */ if (IS_FC(isp) && isp->isp_nchan > 1) { if (!ISP_CAP_MULTI_ID(isp)) { isp_prt(isp, ISP_LOGWARN, "non-MULTIID f/w loaded, " "only can enable 1 of %d channels", isp->isp_nchan); isp->isp_nchan = 1; } else if (!ISP_CAP_VP0(isp)) { isp_prt(isp, ISP_LOGWARN, "We can not use MULTIID " "feature properly without VP0_Decoupling"); isp->isp_nchan = 1; } } for (i = 0; i < isp->isp_nchan; i++) { isp_fw_state(isp, i); } if (isp->isp_dead) { isp_shutdown(isp); ISP_DISABLE_INTS(isp); return; } isp->isp_state = ISP_RESETSTATE; /* * Okay- now that we have new firmware running, we now (re)set our * notion of how many luns we support. This is somewhat tricky because * if we haven't loaded firmware, we sometimes do not have an easy way * of knowing how many luns we support. * * Expanded lun firmware gives you 32 luns for SCSI cards and * 16384 luns for Fibre Channel cards. * * It turns out that even for QLogic 2100s with ROM 1.10 and above * we do get a firmware attributes word returned in mailbox register 6. * * Because the lun is in a different position in the Request Queue * Entry structure for Fibre Channel with expanded lun firmware, we * can only support one lun (lun zero) when we don't know what kind * of firmware we're running. */ if (IS_SCSI(isp)) { if (dodnld) { if (IS_ULTRA2(isp) || IS_ULTRA3(isp)) { isp->isp_maxluns = 32; } else { isp->isp_maxluns = 8; } } else { isp->isp_maxluns = 8; } } else { if (ISP_CAP_SCCFW(isp)) { isp->isp_maxluns = 0; /* No limit -- 2/8 bytes */ } else { isp->isp_maxluns = 16; } } /* * We get some default values established. As a side * effect, NVRAM is read here (unless overriden by * a configuration flag). */ if (do_load_defaults) { if (IS_SCSI(isp)) { isp_setdfltsdparm(isp); } else { for (i = 0; i < isp->isp_nchan; i++) { isp_setdfltfcparm(isp, i); } } } } /* * Initialize Parameters of Hardware to a known state. * * Locks are held before coming here. */ void isp_init(ispsoftc_t *isp) { if (IS_FC(isp)) { if (IS_24XX(isp)) { isp_fibre_init_2400(isp); } else { isp_fibre_init(isp); } } else { isp_scsi_init(isp); } GET_NANOTIME(&isp->isp_init_time); } static void isp_scsi_init(ispsoftc_t *isp) { sdparam *sdp_chan0, *sdp_chan1; mbreg_t mbs; isp->isp_state = ISP_INITSTATE; sdp_chan0 = SDPARAM(isp, 0); sdp_chan1 = sdp_chan0; if (IS_DUALBUS(isp)) { sdp_chan1 = SDPARAM(isp, 1); } /* First do overall per-card settings. */ /* * If we have fast memory timing enabled, turn it on. */ if (sdp_chan0->isp_fast_mttr) { ISP_WRITE(isp, RISC_MTR, 0x1313); } /* * Set Retry Delay and Count. * You set both channels at the same time. */ MBSINIT(&mbs, MBOX_SET_RETRY_COUNT, MBLOGALL, 0); mbs.param[1] = sdp_chan0->isp_retry_count; mbs.param[2] = sdp_chan0->isp_retry_delay; mbs.param[6] = sdp_chan1->isp_retry_count; mbs.param[7] = sdp_chan1->isp_retry_delay; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } /* * Set ASYNC DATA SETUP time. This is very important. */ MBSINIT(&mbs, MBOX_SET_ASYNC_DATA_SETUP_TIME, MBLOGALL, 0); mbs.param[1] = sdp_chan0->isp_async_data_setup; mbs.param[2] = sdp_chan1->isp_async_data_setup; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } /* * Set ACTIVE Negation State. */ MBSINIT(&mbs, MBOX_SET_ACT_NEG_STATE, MBLOGNONE, 0); mbs.param[1] = (sdp_chan0->isp_req_ack_active_neg << 4) | (sdp_chan0->isp_data_line_active_neg << 5); mbs.param[2] = (sdp_chan1->isp_req_ack_active_neg << 4) | (sdp_chan1->isp_data_line_active_neg << 5); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, "failed to set active negation state (%d,%d), (%d,%d)", sdp_chan0->isp_req_ack_active_neg, sdp_chan0->isp_data_line_active_neg, sdp_chan1->isp_req_ack_active_neg, sdp_chan1->isp_data_line_active_neg); /* * But don't return. */ } /* * Set the Tag Aging limit */ MBSINIT(&mbs, MBOX_SET_TAG_AGE_LIMIT, MBLOGALL, 0); mbs.param[1] = sdp_chan0->isp_tag_aging; mbs.param[2] = sdp_chan1->isp_tag_aging; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, "failed to set tag age limit (%d,%d)", sdp_chan0->isp_tag_aging, sdp_chan1->isp_tag_aging); return; } /* * Set selection timeout. */ MBSINIT(&mbs, MBOX_SET_SELECT_TIMEOUT, MBLOGALL, 0); mbs.param[1] = sdp_chan0->isp_selection_timeout; mbs.param[2] = sdp_chan1->isp_selection_timeout; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } /* now do per-channel settings */ isp_scsi_channel_init(isp, 0); if (IS_DUALBUS(isp)) isp_scsi_channel_init(isp, 1); /* * Now enable request/response queues */ if (IS_ULTRA2(isp) || IS_1240(isp)) { MBSINIT(&mbs, MBOX_INIT_RES_QUEUE_A64, MBLOGALL, 0); mbs.param[1] = RESULT_QUEUE_LEN(isp); mbs.param[2] = DMA_WD1(isp->isp_result_dma); mbs.param[3] = DMA_WD0(isp->isp_result_dma); mbs.param[4] = 0; mbs.param[6] = DMA_WD3(isp->isp_result_dma); mbs.param[7] = DMA_WD2(isp->isp_result_dma); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } isp->isp_residx = isp->isp_resodx = mbs.param[5]; MBSINIT(&mbs, MBOX_INIT_REQ_QUEUE_A64, MBLOGALL, 0); mbs.param[1] = RQUEST_QUEUE_LEN(isp); mbs.param[2] = DMA_WD1(isp->isp_rquest_dma); mbs.param[3] = DMA_WD0(isp->isp_rquest_dma); mbs.param[5] = 0; mbs.param[6] = DMA_WD3(isp->isp_result_dma); mbs.param[7] = DMA_WD2(isp->isp_result_dma); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } isp->isp_reqidx = isp->isp_reqodx = mbs.param[4]; } else { MBSINIT(&mbs, MBOX_INIT_RES_QUEUE, MBLOGALL, 0); mbs.param[1] = RESULT_QUEUE_LEN(isp); mbs.param[2] = DMA_WD1(isp->isp_result_dma); mbs.param[3] = DMA_WD0(isp->isp_result_dma); mbs.param[4] = 0; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } isp->isp_residx = isp->isp_resodx = mbs.param[5]; MBSINIT(&mbs, MBOX_INIT_REQ_QUEUE, MBLOGALL, 0); mbs.param[1] = RQUEST_QUEUE_LEN(isp); mbs.param[2] = DMA_WD1(isp->isp_rquest_dma); mbs.param[3] = DMA_WD0(isp->isp_rquest_dma); mbs.param[5] = 0; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } isp->isp_reqidx = isp->isp_reqodx = mbs.param[4]; } /* * Turn on LVD transitions for ULTRA2 or better and other features * * Now that we have 32 bit handles, don't do any fast posting * any more. For Ultra2/Ultra3 cards, we can turn on 32 bit RIO * operation or use fast posting. To be conservative, we'll only * do this for Ultra3 cards now because the other cards are so * rare for this author to find and test with. */ MBSINIT(&mbs, MBOX_SET_FW_FEATURES, MBLOGALL, 0); if (IS_ULTRA2(isp)) mbs.param[1] |= FW_FEATURE_LVD_NOTIFY; #ifdef ISP_NO_RIO if (IS_ULTRA3(isp)) mbs.param[1] |= FW_FEATURE_FAST_POST; #else if (IS_ULTRA3(isp)) mbs.param[1] |= FW_FEATURE_RIO_32BIT; #endif if (mbs.param[1] != 0) { uint16_t sfeat = mbs.param[1]; isp_mboxcmd(isp, &mbs); if (mbs.param[0] == MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGINFO, "Enabled FW features (0x%x)", sfeat); } } isp->isp_state = ISP_RUNSTATE; } static void isp_scsi_channel_init(ispsoftc_t *isp, int chan) { sdparam *sdp; mbreg_t mbs; int tgt; sdp = SDPARAM(isp, chan); /* * Set (possibly new) Initiator ID. */ MBSINIT(&mbs, MBOX_SET_INIT_SCSI_ID, MBLOGALL, 0); mbs.param[1] = (chan << 7) | sdp->isp_initiator_id; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } isp_prt(isp, ISP_LOGINFO, "Chan %d Initiator ID is %d", chan, sdp->isp_initiator_id); /* * Set current per-target parameters to an initial safe minimum. */ for (tgt = 0; tgt < MAX_TARGETS; tgt++) { int lun; uint16_t sdf; if (sdp->isp_devparam[tgt].dev_enable == 0) { continue; } #ifndef ISP_TARGET_MODE sdf = sdp->isp_devparam[tgt].goal_flags; sdf &= DPARM_SAFE_DFLT; /* * It is not quite clear when this changed over so that * we could force narrow and async for 1000/1020 cards, * but assume that this is only the case for loaded * firmware. */ if (isp->isp_loaded_fw) { sdf |= DPARM_NARROW | DPARM_ASYNC; } #else /* * The !$*!)$!$)* f/w uses the same index into some * internal table to decide how to respond to negotiations, * so if we've said "let's be safe" for ID X, and ID X * selects *us*, the negotiations will back to 'safe' * (as in narrow/async). What the f/w *should* do is * use the initiator id settings to decide how to respond. */ sdp->isp_devparam[tgt].goal_flags = sdf = DPARM_DEFAULT; #endif MBSINIT(&mbs, MBOX_SET_TARGET_PARAMS, MBLOGNONE, 0); mbs.param[1] = (chan << 15) | (tgt << 8); mbs.param[2] = sdf; if ((sdf & DPARM_SYNC) == 0) { mbs.param[3] = 0; } else { mbs.param[3] = (sdp->isp_devparam[tgt].goal_offset << 8) | (sdp->isp_devparam[tgt].goal_period); } isp_prt(isp, ISP_LOGDEBUG0, "Initial Settings bus%d tgt%d flags 0x%x off 0x%x per 0x%x", chan, tgt, mbs.param[2], mbs.param[3] >> 8, mbs.param[3] & 0xff); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { sdf = DPARM_SAFE_DFLT; MBSINIT(&mbs, MBOX_SET_TARGET_PARAMS, MBLOGALL, 0); mbs.param[1] = (tgt << 8) | (chan << 15); mbs.param[2] = sdf; mbs.param[3] = 0; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { continue; } } /* * We don't update any information directly from the f/w * because we need to run at least one command to cause a * new state to be latched up. So, we just assume that we * converge to the values we just had set. * * Ensure that we don't believe tagged queuing is enabled yet. * It turns out that sometimes the ISP just ignores our * attempts to set parameters for devices that it hasn't * seen yet. */ sdp->isp_devparam[tgt].actv_flags = sdf & ~DPARM_TQING; for (lun = 0; lun < (int) isp->isp_maxluns; lun++) { MBSINIT(&mbs, MBOX_SET_DEV_QUEUE_PARAMS, MBLOGALL, 0); mbs.param[1] = (chan << 15) | (tgt << 8) | lun; mbs.param[2] = sdp->isp_max_queue_depth; mbs.param[3] = sdp->isp_devparam[tgt].exc_throttle; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { break; } } } for (tgt = 0; tgt < MAX_TARGETS; tgt++) { if (sdp->isp_devparam[tgt].dev_refresh) { sdp->sendmarker = 1; sdp->update = 1; break; } } } /* * Fibre Channel specific initialization. */ static void isp_fibre_init(ispsoftc_t *isp) { fcparam *fcp; isp_icb_t local, *icbp = &local; mbreg_t mbs; /* * We only support one channel on non-24XX cards */ fcp = FCPARAM(isp, 0); if (fcp->role == ISP_ROLE_NONE) return; isp->isp_state = ISP_INITSTATE; ISP_MEMZERO(icbp, sizeof (*icbp)); icbp->icb_version = ICB_VERSION1; icbp->icb_fwoptions = fcp->isp_fwoptions; /* * Firmware Options are either retrieved from NVRAM or * are patched elsewhere. We check them for sanity here * and make changes based on board revision, but otherwise * let others decide policy. */ /* * If this is a 2100 < revision 5, we have to turn off FAIRNESS. */ if (IS_2100(isp) && isp->isp_revision < 5) { icbp->icb_fwoptions &= ~ICBOPT_FAIRNESS; } /* * We have to use FULL LOGIN even though it resets the loop too much * because otherwise port database entries don't get updated after * a LIP- this is a known f/w bug for 2100 f/w less than 1.17.0. */ if (!ISP_FW_NEWER_THAN(isp, 1, 17, 0)) { icbp->icb_fwoptions |= ICBOPT_FULL_LOGIN; } /* * Insist on Port Database Update Async notifications */ icbp->icb_fwoptions |= ICBOPT_PDBCHANGE_AE; /* * Make sure that target role reflects into fwoptions. */ if (fcp->role & ISP_ROLE_TARGET) { icbp->icb_fwoptions |= ICBOPT_TGT_ENABLE; } else { icbp->icb_fwoptions &= ~ICBOPT_TGT_ENABLE; } if (fcp->role & ISP_ROLE_INITIATOR) { icbp->icb_fwoptions &= ~ICBOPT_INI_DISABLE; } else { icbp->icb_fwoptions |= ICBOPT_INI_DISABLE; } icbp->icb_maxfrmlen = DEFAULT_FRAMESIZE(isp); if (icbp->icb_maxfrmlen < ICB_MIN_FRMLEN || icbp->icb_maxfrmlen > ICB_MAX_FRMLEN) { isp_prt(isp, ISP_LOGERR, "bad frame length (%d) from NVRAM- using %d", DEFAULT_FRAMESIZE(isp), ICB_DFLT_FRMLEN); icbp->icb_maxfrmlen = ICB_DFLT_FRMLEN; } icbp->icb_maxalloc = fcp->isp_maxalloc; if (icbp->icb_maxalloc < 1) { isp_prt(isp, ISP_LOGERR, "bad maximum allocation (%d)- using 16", fcp->isp_maxalloc); icbp->icb_maxalloc = 16; } icbp->icb_execthrottle = DEFAULT_EXEC_THROTTLE(isp); if (icbp->icb_execthrottle < 1) { isp_prt(isp, ISP_LOGERR, "bad execution throttle of %d- using %d", DEFAULT_EXEC_THROTTLE(isp), ICB_DFLT_THROTTLE); icbp->icb_execthrottle = ICB_DFLT_THROTTLE; } icbp->icb_retry_delay = fcp->isp_retry_delay; icbp->icb_retry_count = fcp->isp_retry_count; if (fcp->isp_loopid < LOCAL_LOOP_LIM) { icbp->icb_hardaddr = fcp->isp_loopid; if (isp->isp_confopts & ISP_CFG_OWNLOOPID) icbp->icb_fwoptions |= ICBOPT_HARD_ADDRESS; else icbp->icb_fwoptions |= ICBOPT_PREV_ADDRESS; } /* * Right now we just set extended options to prefer point-to-point * over loop based upon some soft config options. * * NB: for the 2300, ICBOPT_EXTENDED is required. */ if (IS_2100(isp)) { /* * We can't have Fast Posting any more- we now * have 32 bit handles. */ icbp->icb_fwoptions &= ~ICBOPT_FAST_POST; } else if (IS_2200(isp) || IS_23XX(isp)) { icbp->icb_fwoptions |= ICBOPT_EXTENDED; icbp->icb_xfwoptions = fcp->isp_xfwoptions; if (ISP_CAP_FCTAPE(isp)) { if (isp->isp_confopts & ISP_CFG_NOFCTAPE) icbp->icb_xfwoptions &= ~ICBXOPT_FCTAPE; if (isp->isp_confopts & ISP_CFG_FCTAPE) icbp->icb_xfwoptions |= ICBXOPT_FCTAPE; if (icbp->icb_xfwoptions & ICBXOPT_FCTAPE) { icbp->icb_fwoptions &= ~ICBOPT_FULL_LOGIN; /* per documents */ icbp->icb_xfwoptions |= ICBXOPT_FCTAPE_CCQ|ICBXOPT_FCTAPE_CONFIRM; FCPARAM(isp, 0)->fctape_enabled = 1; } else { FCPARAM(isp, 0)->fctape_enabled = 0; } } else { icbp->icb_xfwoptions &= ~ICBXOPT_FCTAPE; FCPARAM(isp, 0)->fctape_enabled = 0; } /* * Prefer or force Point-To-Point instead Loop? */ switch (isp->isp_confopts & ISP_CFG_PORT_PREF) { case ISP_CFG_NPORT: icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK; icbp->icb_xfwoptions |= ICBXOPT_PTP_2_LOOP; break; case ISP_CFG_NPORT_ONLY: icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK; icbp->icb_xfwoptions |= ICBXOPT_PTP_ONLY; break; case ISP_CFG_LPORT_ONLY: icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK; icbp->icb_xfwoptions |= ICBXOPT_LOOP_ONLY; break; default: /* * Let NVRAM settings define it if they are sane */ switch (icbp->icb_xfwoptions & ICBXOPT_TOPO_MASK) { case ICBXOPT_PTP_2_LOOP: case ICBXOPT_PTP_ONLY: case ICBXOPT_LOOP_ONLY: case ICBXOPT_LOOP_2_PTP: break; default: icbp->icb_xfwoptions &= ~ICBXOPT_TOPO_MASK; icbp->icb_xfwoptions |= ICBXOPT_LOOP_2_PTP; } break; } if (IS_2200(isp)) { /* * We can't have Fast Posting any more- we now * have 32 bit handles. * * RIO seemed to have to much breakage. * * Just opt for safety. */ icbp->icb_xfwoptions &= ~ICBXOPT_RIO_16BIT; icbp->icb_fwoptions &= ~ICBOPT_FAST_POST; } else { /* * QLogic recommends that FAST Posting be turned * off for 23XX cards and instead allow the HBA * to write response queue entries and interrupt * after a delay (ZIO). */ icbp->icb_fwoptions &= ~ICBOPT_FAST_POST; if ((fcp->isp_xfwoptions & ICBXOPT_TIMER_MASK) == ICBXOPT_ZIO) { icbp->icb_xfwoptions |= ICBXOPT_ZIO; icbp->icb_idelaytimer = 10; } icbp->icb_zfwoptions = fcp->isp_zfwoptions; if (isp->isp_confopts & ISP_CFG_ONEGB) { icbp->icb_zfwoptions &= ~ICBZOPT_RATE_MASK; icbp->icb_zfwoptions |= ICBZOPT_RATE_ONEGB; } else if (isp->isp_confopts & ISP_CFG_TWOGB) { icbp->icb_zfwoptions &= ~ICBZOPT_RATE_MASK; icbp->icb_zfwoptions |= ICBZOPT_RATE_TWOGB; } else { switch (icbp->icb_zfwoptions & ICBZOPT_RATE_MASK) { case ICBZOPT_RATE_ONEGB: case ICBZOPT_RATE_TWOGB: case ICBZOPT_RATE_AUTO: break; default: icbp->icb_zfwoptions &= ~ICBZOPT_RATE_MASK; icbp->icb_zfwoptions |= ICBZOPT_RATE_AUTO; break; } } } } /* * For 22XX > 2.1.26 && 23XX, set some options. */ if (ISP_FW_NEWER_THAN(isp, 2, 26, 0)) { MBSINIT(&mbs, MBOX_SET_FIRMWARE_OPTIONS, MBLOGALL, 0); mbs.param[1] = IFCOPT1_DISF7SWTCH|IFCOPT1_LIPASYNC|IFCOPT1_LIPF8; mbs.param[2] = 0; mbs.param[3] = 0; if (ISP_FW_NEWER_THAN(isp, 3, 16, 0)) { mbs.param[1] |= IFCOPT1_EQFQASYNC|IFCOPT1_CTIO_RETRY; if (fcp->role & ISP_ROLE_TARGET) { if (ISP_FW_NEWER_THAN(isp, 3, 25, 0)) { mbs.param[1] |= IFCOPT1_ENAPURE; } mbs.param[3] = IFCOPT3_NOPRLI; } } isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } } icbp->icb_logintime = ICB_LOGIN_TOV; #ifdef ISP_TARGET_MODE if (IS_23XX(isp) && (icbp->icb_fwoptions & ICBOPT_TGT_ENABLE)) { icbp->icb_lunenables = 0xffff; icbp->icb_ccnt = DFLT_CMND_CNT; icbp->icb_icnt = DFLT_INOT_CNT; icbp->icb_lunetimeout = ICB_LUN_ENABLE_TOV; } #endif if (fcp->isp_wwnn && fcp->isp_wwpn) { icbp->icb_fwoptions |= ICBOPT_BOTH_WWNS; MAKE_NODE_NAME_FROM_WWN(icbp->icb_nodename, fcp->isp_wwnn); MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn); isp_prt(isp, ISP_LOGDEBUG1, "Setting ICB Node 0x%08x%08x Port 0x%08x%08x", ((uint32_t) (fcp->isp_wwnn >> 32)), ((uint32_t) (fcp->isp_wwnn)), ((uint32_t) (fcp->isp_wwpn >> 32)), ((uint32_t) (fcp->isp_wwpn))); } else if (fcp->isp_wwpn) { icbp->icb_fwoptions &= ~ICBOPT_BOTH_WWNS; MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn); isp_prt(isp, ISP_LOGDEBUG1, "Setting ICB Port 0x%08x%08x", ((uint32_t) (fcp->isp_wwpn >> 32)), ((uint32_t) (fcp->isp_wwpn))); } else { isp_prt(isp, ISP_LOGERR, "No valid WWNs to use"); return; } icbp->icb_rqstqlen = RQUEST_QUEUE_LEN(isp); if (icbp->icb_rqstqlen < 1) { isp_prt(isp, ISP_LOGERR, "bad request queue length"); } icbp->icb_rsltqlen = RESULT_QUEUE_LEN(isp); if (icbp->icb_rsltqlen < 1) { isp_prt(isp, ISP_LOGERR, "bad result queue length"); } icbp->icb_rqstaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_rquest_dma); icbp->icb_rqstaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_rquest_dma); icbp->icb_rqstaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_rquest_dma); icbp->icb_rqstaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_rquest_dma); icbp->icb_respaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_result_dma); icbp->icb_respaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_result_dma); icbp->icb_respaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_result_dma); icbp->icb_respaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_result_dma); if (FC_SCRATCH_ACQUIRE(isp, 0)) { isp_prt(isp, ISP_LOGERR, sacq); return; } isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init: fwopt 0x%x xfwopt 0x%x zfwopt 0x%x", icbp->icb_fwoptions, icbp->icb_xfwoptions, icbp->icb_zfwoptions); isp_put_icb(isp, icbp, (isp_icb_t *)fcp->isp_scratch); /* * Init the firmware */ MBSINIT(&mbs, MBOX_INIT_FIRMWARE, MBLOGALL, 30000000); mbs.param[1] = 0; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); mbs.logval = MBLOGALL; isp_prt(isp, ISP_LOGDEBUG0, "INIT F/W from %p (%08x%08x)", fcp->isp_scratch, (uint32_t) ((uint64_t)fcp->isp_scdma >> 32), (uint32_t) fcp->isp_scdma); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof (*icbp), 0); isp_mboxcmd(isp, &mbs); FC_SCRATCH_RELEASE(isp, 0); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_print_bytes(isp, "isp_fibre_init", sizeof (*icbp), icbp); return; } isp->isp_reqidx = 0; isp->isp_reqodx = 0; isp->isp_residx = 0; isp->isp_resodx = 0; /* * Whatever happens, we're now committed to being here. */ isp->isp_state = ISP_RUNSTATE; } static void isp_fibre_init_2400(ispsoftc_t *isp) { fcparam *fcp; isp_icb_2400_t local, *icbp = &local; mbreg_t mbs; int chan; /* * Check to see whether all channels have *some* kind of role */ for (chan = 0; chan < isp->isp_nchan; chan++) { fcp = FCPARAM(isp, chan); if (fcp->role != ISP_ROLE_NONE) { break; } } if (chan == isp->isp_nchan) { isp_prt(isp, ISP_LOG_WARN1, "all %d channels with role 'none'", chan); return; } isp->isp_state = ISP_INITSTATE; /* * Start with channel 0. */ fcp = FCPARAM(isp, 0); /* * Turn on LIP F8 async event (1) */ MBSINIT(&mbs, MBOX_SET_FIRMWARE_OPTIONS, MBLOGALL, 0); mbs.param[1] = 1; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } ISP_MEMZERO(icbp, sizeof (*icbp)); icbp->icb_fwoptions1 = fcp->isp_fwoptions; icbp->icb_fwoptions2 = fcp->isp_xfwoptions; icbp->icb_fwoptions3 = fcp->isp_zfwoptions; if (isp->isp_nchan > 1 && ISP_CAP_VP0(isp)) { icbp->icb_fwoptions1 &= ~ICB2400_OPT1_INI_DISABLE; icbp->icb_fwoptions1 |= ICB2400_OPT1_TGT_ENABLE; } else { if (fcp->role & ISP_ROLE_TARGET) icbp->icb_fwoptions1 |= ICB2400_OPT1_TGT_ENABLE; else icbp->icb_fwoptions1 &= ~ICB2400_OPT1_TGT_ENABLE; if (fcp->role & ISP_ROLE_INITIATOR) icbp->icb_fwoptions1 &= ~ICB2400_OPT1_INI_DISABLE; else icbp->icb_fwoptions1 |= ICB2400_OPT1_INI_DISABLE; } icbp->icb_version = ICB_VERSION1; icbp->icb_maxfrmlen = DEFAULT_FRAMESIZE(isp); if (icbp->icb_maxfrmlen < ICB_MIN_FRMLEN || icbp->icb_maxfrmlen > ICB_MAX_FRMLEN) { isp_prt(isp, ISP_LOGERR, "bad frame length (%d) from NVRAM- using %d", DEFAULT_FRAMESIZE(isp), ICB_DFLT_FRMLEN); icbp->icb_maxfrmlen = ICB_DFLT_FRMLEN; } icbp->icb_execthrottle = DEFAULT_EXEC_THROTTLE(isp); if (icbp->icb_execthrottle < 1) { isp_prt(isp, ISP_LOGERR, "bad execution throttle of %d- using %d", DEFAULT_EXEC_THROTTLE(isp), ICB_DFLT_THROTTLE); icbp->icb_execthrottle = ICB_DFLT_THROTTLE; } /* * Set target exchange count. Take half if we are supporting both roles. */ if (icbp->icb_fwoptions1 & ICB2400_OPT1_TGT_ENABLE) { icbp->icb_xchgcnt = isp->isp_maxcmds; if ((icbp->icb_fwoptions1 & ICB2400_OPT1_INI_DISABLE) == 0) icbp->icb_xchgcnt >>= 1; } if (fcp->isp_loopid < LOCAL_LOOP_LIM) { icbp->icb_hardaddr = fcp->isp_loopid; if (isp->isp_confopts & ISP_CFG_OWNLOOPID) icbp->icb_fwoptions1 |= ICB2400_OPT1_HARD_ADDRESS; else icbp->icb_fwoptions1 |= ICB2400_OPT1_PREV_ADDRESS; } if (isp->isp_confopts & ISP_CFG_NOFCTAPE) { icbp->icb_fwoptions2 &= ~ICB2400_OPT2_FCTAPE; } if (isp->isp_confopts & ISP_CFG_FCTAPE) { icbp->icb_fwoptions2 |= ICB2400_OPT2_FCTAPE; } for (chan = 0; chan < isp->isp_nchan; chan++) { if (icbp->icb_fwoptions2 & ICB2400_OPT2_FCTAPE) FCPARAM(isp, chan)->fctape_enabled = 1; else FCPARAM(isp, chan)->fctape_enabled = 0; } switch (isp->isp_confopts & ISP_CFG_PORT_PREF) { case ISP_CFG_NPORT_ONLY: icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TOPO_MASK; icbp->icb_fwoptions2 |= ICB2400_OPT2_PTP_ONLY; break; case ISP_CFG_LPORT_ONLY: icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TOPO_MASK; icbp->icb_fwoptions2 |= ICB2400_OPT2_LOOP_ONLY; break; default: /* ISP_CFG_PTP_2_LOOP not available in 24XX/25XX */ icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TOPO_MASK; icbp->icb_fwoptions2 |= ICB2400_OPT2_LOOP_2_PTP; break; } switch (icbp->icb_fwoptions2 & ICB2400_OPT2_TIMER_MASK) { case ICB2400_OPT2_ZIO: case ICB2400_OPT2_ZIO1: icbp->icb_idelaytimer = 0; break; case 0: break; default: isp_prt(isp, ISP_LOGWARN, "bad value %x in fwopt2 timer field", icbp->icb_fwoptions2 & ICB2400_OPT2_TIMER_MASK); icbp->icb_fwoptions2 &= ~ICB2400_OPT2_TIMER_MASK; break; } if ((icbp->icb_fwoptions3 & ICB2400_OPT3_RSPSZ_MASK) == 0) { icbp->icb_fwoptions3 |= ICB2400_OPT3_RSPSZ_24; } icbp->icb_fwoptions3 &= ~ICB2400_OPT3_RATE_AUTO; if (isp->isp_confopts & ISP_CFG_ONEGB) { icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_ONEGB; } else if (isp->isp_confopts & ISP_CFG_TWOGB) { icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_TWOGB; } else if (isp->isp_confopts & ISP_CFG_FOURGB) { icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_FOURGB; } else if (IS_25XX(isp) && (isp->isp_confopts & ISP_CFG_EIGHTGB)) { icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_EIGHTGB; } else { icbp->icb_fwoptions3 |= ICB2400_OPT3_RATE_AUTO; } icbp->icb_logintime = ICB_LOGIN_TOV; if (fcp->isp_wwnn && fcp->isp_wwpn) { icbp->icb_fwoptions1 |= ICB2400_OPT1_BOTH_WWNS; MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn); MAKE_NODE_NAME_FROM_WWN(icbp->icb_nodename, fcp->isp_wwnn); isp_prt(isp, ISP_LOGDEBUG1, "Setting ICB Node 0x%08x%08x Port 0x%08x%08x", ((uint32_t) (fcp->isp_wwnn >> 32)), ((uint32_t) (fcp->isp_wwnn)), ((uint32_t) (fcp->isp_wwpn >> 32)), ((uint32_t) (fcp->isp_wwpn))); } else if (fcp->isp_wwpn) { icbp->icb_fwoptions1 &= ~ICB2400_OPT1_BOTH_WWNS; MAKE_NODE_NAME_FROM_WWN(icbp->icb_portname, fcp->isp_wwpn); isp_prt(isp, ISP_LOGDEBUG1, "Setting ICB Node to be same as Port 0x%08x%08x", ((uint32_t) (fcp->isp_wwpn >> 32)), ((uint32_t) (fcp->isp_wwpn))); } else { isp_prt(isp, ISP_LOGERR, "No valid WWNs to use"); return; } icbp->icb_retry_count = fcp->isp_retry_count; icbp->icb_rqstqlen = RQUEST_QUEUE_LEN(isp); if (icbp->icb_rqstqlen < 8) { isp_prt(isp, ISP_LOGERR, "bad request queue length %d", icbp->icb_rqstqlen); return; } icbp->icb_rsltqlen = RESULT_QUEUE_LEN(isp); if (icbp->icb_rsltqlen < 8) { isp_prt(isp, ISP_LOGERR, "bad result queue length %d", icbp->icb_rsltqlen); return; } icbp->icb_rqstaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_rquest_dma); icbp->icb_rqstaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_rquest_dma); icbp->icb_rqstaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_rquest_dma); icbp->icb_rqstaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_rquest_dma); icbp->icb_respaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_result_dma); icbp->icb_respaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_result_dma); icbp->icb_respaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_result_dma); icbp->icb_respaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_result_dma); #ifdef ISP_TARGET_MODE /* unconditionally set up the ATIO queue if we support target mode */ icbp->icb_atioqlen = RESULT_QUEUE_LEN(isp); if (icbp->icb_atioqlen < 8) { isp_prt(isp, ISP_LOGERR, "bad ATIO queue length %d", icbp->icb_atioqlen); return; } icbp->icb_atioqaddr[RQRSP_ADDR0015] = DMA_WD0(isp->isp_atioq_dma); icbp->icb_atioqaddr[RQRSP_ADDR1631] = DMA_WD1(isp->isp_atioq_dma); icbp->icb_atioqaddr[RQRSP_ADDR3247] = DMA_WD2(isp->isp_atioq_dma); icbp->icb_atioqaddr[RQRSP_ADDR4863] = DMA_WD3(isp->isp_atioq_dma); isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init_2400: atioq %04x%04x%04x%04x", DMA_WD3(isp->isp_atioq_dma), DMA_WD2(isp->isp_atioq_dma), DMA_WD1(isp->isp_atioq_dma), DMA_WD0(isp->isp_atioq_dma)); #endif isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init_2400: fwopt1 0x%x fwopt2 0x%x fwopt3 0x%x", icbp->icb_fwoptions1, icbp->icb_fwoptions2, icbp->icb_fwoptions3); isp_prt(isp, ISP_LOGDEBUG0, "isp_fibre_init_2400: rqst %04x%04x%04x%04x rsp %04x%04x%04x%04x", DMA_WD3(isp->isp_rquest_dma), DMA_WD2(isp->isp_rquest_dma), DMA_WD1(isp->isp_rquest_dma), DMA_WD0(isp->isp_rquest_dma), DMA_WD3(isp->isp_result_dma), DMA_WD2(isp->isp_result_dma), DMA_WD1(isp->isp_result_dma), DMA_WD0(isp->isp_result_dma)); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "isp_fibre_init_2400", sizeof (*icbp), icbp); } if (FC_SCRATCH_ACQUIRE(isp, 0)) { isp_prt(isp, ISP_LOGERR, sacq); return; } ISP_MEMZERO(fcp->isp_scratch, ISP_FC_SCRLEN); isp_put_icb_2400(isp, icbp, fcp->isp_scratch); /* * Now fill in information about any additional channels */ if (isp->isp_nchan > 1) { isp_icb_2400_vpinfo_t vpinfo, *vdst; vp_port_info_t pi, *pdst; size_t amt = 0; uint8_t *off; - vpinfo.vp_global_options = 0; + vpinfo.vp_global_options = ICB2400_VPGOPT_GEN_RIDA; if (ISP_CAP_VP0(isp)) { vpinfo.vp_global_options |= ICB2400_VPGOPT_VP0_DECOUPLE; vpinfo.vp_count = isp->isp_nchan; chan = 0; } else { vpinfo.vp_count = isp->isp_nchan - 1; chan = 1; } off = fcp->isp_scratch; off += ICB2400_VPINFO_OFF; vdst = (isp_icb_2400_vpinfo_t *) off; isp_put_icb_2400_vpinfo(isp, &vpinfo, vdst); amt = ICB2400_VPINFO_OFF + sizeof (isp_icb_2400_vpinfo_t); for (; chan < isp->isp_nchan; chan++) { fcparam *fcp2; ISP_MEMZERO(&pi, sizeof (pi)); fcp2 = FCPARAM(isp, chan); if (fcp2->role != ISP_ROLE_NONE) { - pi.vp_port_options = ICB2400_VPOPT_ENABLED; + pi.vp_port_options = ICB2400_VPOPT_ENABLED | + ICB2400_VPOPT_ENA_SNSLOGIN; if (fcp2->role & ISP_ROLE_INITIATOR) pi.vp_port_options |= ICB2400_VPOPT_INI_ENABLE; if ((fcp2->role & ISP_ROLE_TARGET) == 0) pi.vp_port_options |= ICB2400_VPOPT_TGT_DISABLE; } if (fcp2->isp_loopid < LOCAL_LOOP_LIM) { pi.vp_port_loopid = fcp2->isp_loopid; if (isp->isp_confopts & ISP_CFG_OWNLOOPID) pi.vp_port_options |= ICB2400_VPOPT_HARD_ADDRESS; else pi.vp_port_options |= ICB2400_VPOPT_PREV_ADDRESS; } MAKE_NODE_NAME_FROM_WWN(pi.vp_port_portname, fcp2->isp_wwpn); MAKE_NODE_NAME_FROM_WWN(pi.vp_port_nodename, fcp2->isp_wwnn); off = fcp->isp_scratch; if (ISP_CAP_VP0(isp)) off += ICB2400_VPINFO_PORT_OFF(chan); else off += ICB2400_VPINFO_PORT_OFF(chan - 1); pdst = (vp_port_info_t *) off; isp_put_vp_port_info(isp, &pi, pdst); amt += ICB2400_VPOPT_WRITE_SIZE; } if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "isp_fibre_init_2400", amt - ICB2400_VPINFO_OFF, (char *)fcp->isp_scratch + ICB2400_VPINFO_OFF); } } /* * Init the firmware */ MBSINIT(&mbs, 0, MBLOGALL, 30000000); if (isp->isp_nchan > 1) { mbs.param[0] = MBOX_INIT_FIRMWARE_MULTI_ID; } else { mbs.param[0] = MBOX_INIT_FIRMWARE; } mbs.param[1] = 0; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); isp_prt(isp, ISP_LOGDEBUG0, "INIT F/W from %04x%04x%04x%04x", DMA_WD3(fcp->isp_scdma), DMA_WD2(fcp->isp_scdma), DMA_WD1(fcp->isp_scdma), DMA_WD0(fcp->isp_scdma)); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof (*icbp), 0); isp_mboxcmd(isp, &mbs); FC_SCRATCH_RELEASE(isp, 0); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return; } isp->isp_reqidx = 0; isp->isp_reqodx = 0; isp->isp_residx = 0; isp->isp_resodx = 0; isp->isp_atioodx = 0; /* * Whatever happens, we're now committed to being here. */ isp->isp_state = ISP_RUNSTATE; } static void isp_mark_portdb(ispsoftc_t *isp, int chan, int disposition) { fcparam *fcp = FCPARAM(isp, chan); fcportdb_t *lp; int i; for (i = 0; i < MAX_FC_TARG; i++) { lp = &fcp->portdb[i]; switch (lp->state) { case FC_PORTDB_STATE_PROBATIONAL: case FC_PORTDB_STATE_DEAD: case FC_PORTDB_STATE_CHANGED: case FC_PORTDB_STATE_PENDING_VALID: case FC_PORTDB_STATE_VALID: if (disposition > 0) lp->state = FC_PORTDB_STATE_PROBATIONAL; else { lp->state = FC_PORTDB_STATE_NIL; isp_async(isp, ISPASYNC_DEV_GONE, chan, lp); } break; case FC_PORTDB_STATE_ZOMBIE: break; case FC_PORTDB_STATE_NIL: case FC_PORTDB_STATE_NEW: default: ISP_MEMZERO(lp, sizeof(*lp)); lp->state = FC_PORTDB_STATE_NIL; break; } } } /* * Perform an IOCB PLOGI or LOGO via EXECUTE IOCB A64 for 24XX cards * or via FABRIC LOGIN/FABRIC LOGOUT for other cards. */ static int isp_plogx(ispsoftc_t *isp, int chan, uint16_t handle, uint32_t portid, int flags, int gs) { mbreg_t mbs; uint8_t q[QENTRY_LEN]; isp_plogx_t *plp; fcparam *fcp; uint8_t *scp; uint32_t sst, parm1; int rval, lev; const char *msg; char buf[64]; if (!IS_24XX(isp)) { int action = flags & PLOGX_FLG_CMD_MASK; if (action == PLOGX_FLG_CMD_PLOGI) { return (isp_port_login(isp, handle, portid)); } else if (action == PLOGX_FLG_CMD_LOGO) { return (isp_port_logout(isp, handle, portid)); } else { return (MBOX_INVALID_COMMAND); } } ISP_MEMZERO(q, QENTRY_LEN); plp = (isp_plogx_t *) q; plp->plogx_header.rqs_entry_count = 1; plp->plogx_header.rqs_entry_type = RQSTYPE_LOGIN; plp->plogx_handle = 0xffffffff; plp->plogx_nphdl = handle; plp->plogx_vphdl = chan; plp->plogx_portlo = portid; plp->plogx_rspsz_porthi = (portid >> 16) & 0xff; plp->plogx_flags = flags; if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "IOCB LOGX", QENTRY_LEN, plp); } if (gs == 0) { if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); return (-1); } } fcp = FCPARAM(isp, chan); scp = fcp->isp_scratch; isp_put_plogx(isp, plp, (isp_plogx_t *) scp); MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 500000); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { rval = mbs.param[0]; goto out; } MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); scp += QENTRY_LEN; isp_get_plogx(isp, (isp_plogx_t *) scp, plp); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "IOCB LOGX response", QENTRY_LEN, plp); } if (plp->plogx_status == PLOGX_STATUS_OK) { rval = 0; goto out; } else if (plp->plogx_status != PLOGX_STATUS_IOCBERR) { isp_prt(isp, ISP_LOGWARN, "status 0x%x on port login IOCB channel %d", plp->plogx_status, chan); rval = -1; goto out; } sst = plp->plogx_ioparm[0].lo16 | (plp->plogx_ioparm[0].hi16 << 16); parm1 = plp->plogx_ioparm[1].lo16 | (plp->plogx_ioparm[1].hi16 << 16); rval = -1; lev = ISP_LOGERR; msg = NULL; switch (sst) { case PLOGX_IOCBERR_NOLINK: msg = "no link"; break; case PLOGX_IOCBERR_NOIOCB: msg = "no IOCB buffer"; break; case PLOGX_IOCBERR_NOXGHG: msg = "no Exchange Control Block"; break; case PLOGX_IOCBERR_FAILED: ISP_SNPRINTF(buf, sizeof (buf), "reason 0x%x (last LOGIN state 0x%x)", parm1 & 0xff, (parm1 >> 8) & 0xff); msg = buf; break; case PLOGX_IOCBERR_NOFABRIC: msg = "no fabric"; break; case PLOGX_IOCBERR_NOTREADY: msg = "firmware not ready"; break; case PLOGX_IOCBERR_NOLOGIN: ISP_SNPRINTF(buf, sizeof (buf), "not logged in (last state 0x%x)", parm1); msg = buf; rval = MBOX_NOT_LOGGED_IN; break; case PLOGX_IOCBERR_REJECT: ISP_SNPRINTF(buf, sizeof (buf), "LS_RJT = 0x%x", parm1); msg = buf; break; case PLOGX_IOCBERR_NOPCB: msg = "no PCB allocated"; break; case PLOGX_IOCBERR_EINVAL: ISP_SNPRINTF(buf, sizeof (buf), "invalid parameter at offset 0x%x", parm1); msg = buf; break; case PLOGX_IOCBERR_PORTUSED: lev = ISP_LOG_SANCFG|ISP_LOG_WARN1; ISP_SNPRINTF(buf, sizeof (buf), "already logged in with N-Port handle 0x%x", parm1); msg = buf; rval = MBOX_PORT_ID_USED | (parm1 << 16); break; case PLOGX_IOCBERR_HNDLUSED: lev = ISP_LOG_SANCFG|ISP_LOG_WARN1; ISP_SNPRINTF(buf, sizeof (buf), "handle already used for PortID 0x%06x", parm1); msg = buf; rval = MBOX_LOOP_ID_USED; break; case PLOGX_IOCBERR_NOHANDLE: msg = "no handle allocated"; break; case PLOGX_IOCBERR_NOFLOGI: msg = "no FLOGI_ACC"; break; default: ISP_SNPRINTF(buf, sizeof (buf), "status %x from %x", plp->plogx_status, flags); msg = buf; break; } if (msg) { isp_prt(isp, ISP_LOGERR, "Chan %d PLOGX PortID 0x%06x to N-Port handle 0x%x: %s", chan, portid, handle, msg); } out: if (gs == 0) { FC_SCRATCH_RELEASE(isp, chan); } return (rval); } static int isp_port_login(ispsoftc_t *isp, uint16_t handle, uint32_t portid) { mbreg_t mbs; MBSINIT(&mbs, MBOX_FABRIC_LOGIN, MBLOGNONE, 500000); if (ISP_CAP_2KLOGIN(isp)) { mbs.param[1] = handle; mbs.ibits = (1 << 10); } else { mbs.param[1] = handle << 8; } mbs.param[2] = portid >> 16; mbs.param[3] = portid; mbs.logval = MBLOGNONE; mbs.timeout = 500000; isp_mboxcmd(isp, &mbs); switch (mbs.param[0]) { case MBOX_PORT_ID_USED: isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: portid 0x%06x already logged in as %u", portid, mbs.param[1]); return (MBOX_PORT_ID_USED | (mbs.param[1] << 16)); case MBOX_LOOP_ID_USED: isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: handle 0x%x in use for port id 0x%02xXXXX", handle, mbs.param[1] & 0xff); return (MBOX_LOOP_ID_USED); case MBOX_COMMAND_COMPLETE: return (0); case MBOX_COMMAND_ERROR: isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: error 0x%x in PLOGI to port 0x%06x", mbs.param[1], portid); return (MBOX_COMMAND_ERROR); case MBOX_ALL_IDS_USED: isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "isp_port_login: all IDs used for fabric login"); return (MBOX_ALL_IDS_USED); default: isp_prt(isp, ISP_LOG_SANCFG, "isp_port_login: error 0x%x on port login of 0x%06x@0x%0x", mbs.param[0], portid, handle); return (mbs.param[0]); } } /* * Pre-24XX fabric port logout * * Note that portid is not used */ static int isp_port_logout(ispsoftc_t *isp, uint16_t handle, uint32_t portid) { mbreg_t mbs; MBSINIT(&mbs, MBOX_FABRIC_LOGOUT, MBLOGNONE, 500000); if (ISP_CAP_2KLOGIN(isp)) { mbs.param[1] = handle; mbs.ibits = (1 << 10); } else { mbs.param[1] = handle << 8; } isp_mboxcmd(isp, &mbs); return (mbs.param[0] == MBOX_COMMAND_COMPLETE? 0 : mbs.param[0]); } static int isp_getpdb(ispsoftc_t *isp, int chan, uint16_t id, isp_pdb_t *pdb, int dolock) { fcparam *fcp = FCPARAM(isp, chan); mbreg_t mbs; union { isp_pdb_21xx_t fred; isp_pdb_24xx_t bill; } un; MBSINIT(&mbs, MBOX_GET_PORT_DB, MBLOGALL & ~MBLOGMASK(MBOX_COMMAND_PARAM_ERROR), 250000); if (IS_24XX(isp)) { mbs.ibits = (1 << 9)|(1 << 10); mbs.param[1] = id; mbs.param[9] = chan; } else if (ISP_CAP_2KLOGIN(isp)) { mbs.param[1] = id; } else { mbs.param[1] = id << 8; } mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); if (dolock) { if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); return (-1); } } MEMORYBARRIER(isp, SYNC_SFORDEV, 0, sizeof (un), chan); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { if (dolock) { FC_SCRATCH_RELEASE(isp, chan); } return (mbs.param[0] | (mbs.param[1] << 16)); } if (IS_24XX(isp)) { isp_get_pdb_24xx(isp, fcp->isp_scratch, &un.bill); pdb->handle = un.bill.pdb_handle; pdb->prli_word3 = un.bill.pdb_prli_svc3; pdb->portid = BITS2WORD_24XX(un.bill.pdb_portid_bits); ISP_MEMCPY(pdb->portname, un.bill.pdb_portname, 8); ISP_MEMCPY(pdb->nodename, un.bill.pdb_nodename, 8); isp_prt(isp, ISP_LOG_SANCFG, "Chan %d handle 0x%x Port 0x%06x flags 0x%x curstate %x", chan, id, pdb->portid, un.bill.pdb_flags, un.bill.pdb_curstate); if (un.bill.pdb_curstate < PDB2400_STATE_PLOGI_DONE || un.bill.pdb_curstate > PDB2400_STATE_LOGGED_IN) { mbs.param[0] = MBOX_NOT_LOGGED_IN; if (dolock) { FC_SCRATCH_RELEASE(isp, chan); } return (mbs.param[0]); } } else { isp_get_pdb_21xx(isp, fcp->isp_scratch, &un.fred); pdb->handle = un.fred.pdb_loopid; pdb->prli_word3 = un.fred.pdb_prli_svc3; pdb->portid = BITS2WORD(un.fred.pdb_portid_bits); ISP_MEMCPY(pdb->portname, un.fred.pdb_portname, 8); ISP_MEMCPY(pdb->nodename, un.fred.pdb_nodename, 8); } if (dolock) { FC_SCRATCH_RELEASE(isp, chan); } return (0); } static int isp_gethandles(ispsoftc_t *isp, int chan, uint16_t *handles, int *num, int dolock, int loop) { fcparam *fcp = FCPARAM(isp, chan); mbreg_t mbs; isp_pnhle_21xx_t el1, *elp1; isp_pnhle_23xx_t el3, *elp3; isp_pnhle_24xx_t el4, *elp4; int i, j; uint32_t p; uint16_t h; MBSINIT(&mbs, MBOX_GET_ID_LIST, MBLOGALL, 250000); if (IS_24XX(isp)) { mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); mbs.param[8] = ISP_FC_SCRLEN; mbs.param[9] = chan; } else { mbs.ibits = (1 << 1)|(1 << 2)|(1 << 3)|(1 << 6); mbs.param[1] = DMA_WD1(fcp->isp_scdma); mbs.param[2] = DMA_WD0(fcp->isp_scdma); mbs.param[3] = DMA_WD3(fcp->isp_scdma); mbs.param[6] = DMA_WD2(fcp->isp_scdma); } if (dolock) { if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); return (-1); } } MEMORYBARRIER(isp, SYNC_SFORDEV, 0, ISP_FC_SCRLEN, chan); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { if (dolock) { FC_SCRATCH_RELEASE(isp, chan); } return (mbs.param[0] | (mbs.param[1] << 16)); } elp1 = fcp->isp_scratch; elp3 = fcp->isp_scratch; elp4 = fcp->isp_scratch; for (i = 0, j = 0; i < mbs.param[1] && j < *num; i++) { if (IS_24XX(isp)) { isp_get_pnhle_24xx(isp, &elp4[i], &el4); p = el4.pnhle_port_id_lo | (el4.pnhle_port_id_hi << 16); h = el4.pnhle_handle; } else if (IS_23XX(isp)) { isp_get_pnhle_23xx(isp, &elp3[i], &el3); p = el3.pnhle_port_id_lo | (el3.pnhle_port_id_hi << 16); h = el3.pnhle_handle; } else { /* 21xx */ isp_get_pnhle_21xx(isp, &elp1[i], &el1); p = el1.pnhle_port_id_lo | ((el1.pnhle_port_id_hi_handle & 0xff) << 16); h = el1.pnhle_port_id_hi_handle >> 8; } if (loop && (p >> 8) != (fcp->isp_portid >> 8)) continue; handles[j++] = h; } *num = j; if (dolock) FC_SCRATCH_RELEASE(isp, chan); return (0); } static void isp_dump_chip_portdb(ispsoftc_t *isp, int chan, int dolock) { isp_pdb_t pdb; int lim, loopid; isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGINFO, "Chan %d chip port dump", chan); if (ISP_CAP_2KLOGIN(isp)) { lim = NPH_MAX_2K; } else { lim = NPH_MAX; } for (loopid = 0; loopid != lim; loopid++) { if (isp_getpdb(isp, chan, loopid, &pdb, dolock)) { continue; } isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGINFO, "Chan %d Loopid 0x%04x " "PortID 0x%06x WWPN 0x%02x%02x%02x%02x%02x%02x%02x%02x", chan, loopid, pdb.portid, pdb.portname[0], pdb.portname[1], pdb.portname[2], pdb.portname[3], pdb.portname[4], pdb.portname[5], pdb.portname[6], pdb.portname[7]); } } static uint64_t isp_get_wwn(ispsoftc_t *isp, int chan, int loopid, int nodename) { uint64_t wwn = INI_NONE; fcparam *fcp = FCPARAM(isp, chan); mbreg_t mbs; if (fcp->isp_fwstate < FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) { return (wwn); } MBSINIT(&mbs, MBOX_GET_PORT_NAME, MBLOGALL & ~MBLOGMASK(MBOX_COMMAND_PARAM_ERROR), 500000); if (ISP_CAP_2KLOGIN(isp)) { mbs.param[1] = loopid; if (nodename) { mbs.param[10] = 1; } mbs.param[9] = chan; } else { mbs.ibitm = 3; mbs.param[1] = loopid << 8; if (nodename) { mbs.param[1] |= 1; } } isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return (wwn); } if (IS_24XX(isp)) { wwn = (((uint64_t)(mbs.param[2] >> 8)) << 56) | (((uint64_t)(mbs.param[2] & 0xff)) << 48) | (((uint64_t)(mbs.param[3] >> 8)) << 40) | (((uint64_t)(mbs.param[3] & 0xff)) << 32) | (((uint64_t)(mbs.param[6] >> 8)) << 24) | (((uint64_t)(mbs.param[6] & 0xff)) << 16) | (((uint64_t)(mbs.param[7] >> 8)) << 8) | (((uint64_t)(mbs.param[7] & 0xff))); } else { wwn = (((uint64_t)(mbs.param[2] & 0xff)) << 56) | (((uint64_t)(mbs.param[2] >> 8)) << 48) | (((uint64_t)(mbs.param[3] & 0xff)) << 40) | (((uint64_t)(mbs.param[3] >> 8)) << 32) | (((uint64_t)(mbs.param[6] & 0xff)) << 24) | (((uint64_t)(mbs.param[6] >> 8)) << 16) | (((uint64_t)(mbs.param[7] & 0xff)) << 8) | (((uint64_t)(mbs.param[7] >> 8))); } return (wwn); } /* * Make sure we have good FC link. */ static int isp_fclink_test(ispsoftc_t *isp, int chan, int usdelay) { mbreg_t mbs; int check_for_fabric, r; uint8_t lwfs; int loopid; fcparam *fcp; fcportdb_t *lp; isp_pdb_t pdb; NANOTIME_T hra, hrb; fcp = FCPARAM(isp, chan); isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Link Test Entry", chan); ISP_MARK_PORTDB(isp, chan, 1); /* * Wait up to N microseconds for F/W to go to a ready state. */ lwfs = FW_CONFIG_WAIT; GET_NANOTIME(&hra); while (1) { isp_fw_state(isp, chan); if (lwfs != fcp->isp_fwstate) { isp_prt(isp, ISP_LOGCONFIG|ISP_LOG_SANCFG, "Chan %d Firmware State <%s->%s>", chan, isp_fc_fw_statename((int)lwfs), isp_fc_fw_statename((int)fcp->isp_fwstate)); lwfs = fcp->isp_fwstate; } if (fcp->isp_fwstate == FW_READY) { break; } GET_NANOTIME(&hrb); if ((NANOTIME_SUB(&hrb, &hra) / 1000 + 1000 >= usdelay)) break; ISP_SLEEP(isp, 1000); } /* * If we haven't gone to 'ready' state, return. */ if (fcp->isp_fwstate != FW_READY) { isp_prt(isp, ISP_LOG_SANCFG, "%s: chan %d not at FW_READY state", __func__, chan); return (-1); } /* * Get our Loop ID and Port ID. */ MBSINIT(&mbs, MBOX_GET_LOOP_ID, MBLOGALL, 0); mbs.param[9] = chan; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return (-1); } if (ISP_CAP_2KLOGIN(isp)) { fcp->isp_loopid = mbs.param[1]; } else { fcp->isp_loopid = mbs.param[1] & 0xff; } if (IS_2100(isp)) { fcp->isp_topo = TOPO_NL_PORT; } else { int topo = (int) mbs.param[6]; if (topo < TOPO_NL_PORT || topo > TOPO_PTP_STUB) { topo = TOPO_PTP_STUB; } fcp->isp_topo = topo; } fcp->isp_portid = mbs.param[2] | (mbs.param[3] << 16); if (IS_2100(isp)) { /* * Don't bother with fabric if we are using really old * 2100 firmware. It's just not worth it. */ if (ISP_FW_NEWER_THAN(isp, 1, 15, 37)) { check_for_fabric = 1; } else { check_for_fabric = 0; } } else if (fcp->isp_topo == TOPO_FL_PORT || fcp->isp_topo == TOPO_F_PORT) { check_for_fabric = 1; } else { check_for_fabric = 0; } /* * Check to make sure we got a valid loopid * The 24XX seems to mess this up for multiple channels. */ if (fcp->isp_topo == TOPO_FL_PORT || fcp->isp_topo == TOPO_NL_PORT) { uint8_t alpa = fcp->isp_portid; if (alpa == 0) { /* "Cannot Happen" */ isp_prt(isp, ISP_LOGWARN, "Zero AL_PA for Loop Topology?"); } else { int i; for (i = 0; alpa_map[i]; i++) { if (alpa_map[i] == alpa) { break; } } if (alpa_map[i] && fcp->isp_loopid != i) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d deriving loopid %d from AL_PA map (AL_PA 0x%x) and ignoring returned value %d (AL_PA 0x%x)", chan, i, alpa_map[i], fcp->isp_loopid, alpa); fcp->isp_loopid = i; } } } if (IS_24XX(isp)) { /* XXX SHOULDN'T THIS BE FOR 2K F/W? XXX */ loopid = NPH_FL_ID; } else { loopid = FL_ID; } if (check_for_fabric) { r = isp_getpdb(isp, chan, loopid, &pdb, 1); if (r && (fcp->isp_topo == TOPO_F_PORT || fcp->isp_topo == TOPO_FL_PORT)) { isp_prt(isp, ISP_LOGWARN, "fabric topology but cannot get info about fabric controller (0x%x)", r); fcp->isp_topo = TOPO_PTP_STUB; } } else { r = -1; } if (r == 0) { if (IS_2100(isp)) { fcp->isp_topo = TOPO_FL_PORT; } if (pdb.portid == 0) { /* * Crock. */ fcp->isp_topo = TOPO_NL_PORT; goto not_on_fabric; } /* * Save the Fabric controller's port database entry. */ lp = &fcp->portdb[FL_ID]; lp->state = FC_PORTDB_STATE_PENDING_VALID; MAKE_WWN_FROM_NODE_NAME(lp->node_wwn, pdb.nodename); MAKE_WWN_FROM_NODE_NAME(lp->port_wwn, pdb.portname); lp->prli_word3 = pdb.prli_word3; lp->portid = pdb.portid; lp->handle = pdb.handle; lp->new_portid = lp->portid; lp->new_prli_word3 = lp->prli_word3; if (IS_24XX(isp)) { if (check_for_fabric) { /* * The mbs is still hanging out from the MBOX_GET_LOOP_ID above. */ fcp->isp_fabric_params = mbs.param[7]; } else { fcp->isp_fabric_params = 0; } - if (chan) { - fcp->isp_sns_hdl = NPH_RESERVED - chan; - r = isp_plogx(isp, chan, fcp->isp_sns_hdl, SNS_PORT_ID, PLOGX_FLG_CMD_PLOGI | PLOGX_FLG_COND_PLOGI | PLOGX_FLG_SKIP_PRLI, 0); - if (r) { - isp_prt(isp, ISP_LOGWARN, "%s: Chan %d cannot log into SNS", __func__, chan); - return (-1); - } - } else { - fcp->isp_sns_hdl = NPH_SNS_ID; - } + fcp->isp_sns_hdl = NPH_SNS_ID; r = isp_register_fc4_type_24xx(isp, chan); } else { fcp->isp_sns_hdl = SNS_ID; r = isp_register_fc4_type(isp, chan); } if (r) { isp_prt(isp, ISP_LOGWARN|ISP_LOG_SANCFG, "%s: register fc4 type failed", __func__); return (-1); } } else { not_on_fabric: fcp->portdb[FL_ID].state = FC_PORTDB_STATE_NIL; } fcp->isp_gbspeed = 1; if (IS_23XX(isp) || IS_24XX(isp)) { MBSINIT(&mbs, MBOX_GET_SET_DATA_RATE, MBLOGALL, 3000000); mbs.param[1] = MBGSD_GET_RATE; /* mbs.param[2] undefined if we're just getting rate */ isp_mboxcmd(isp, &mbs); if (mbs.param[0] == MBOX_COMMAND_COMPLETE) { if (mbs.param[1] == MBGSD_EIGHTGB) { isp_prt(isp, ISP_LOGINFO, "Chan %d 8Gb link speed", chan); fcp->isp_gbspeed = 8; } else if (mbs.param[1] == MBGSD_FOURGB) { isp_prt(isp, ISP_LOGINFO, "Chan %d 4Gb link speed", chan); fcp->isp_gbspeed = 4; } else if (mbs.param[1] == MBGSD_TWOGB) { isp_prt(isp, ISP_LOGINFO, "Chan %d 2Gb link speed", chan); fcp->isp_gbspeed = 2; } else if (mbs.param[1] == MBGSD_ONEGB) { isp_prt(isp, ISP_LOGINFO, "Chan %d 1Gb link speed", chan); fcp->isp_gbspeed = 1; } } } /* * Announce ourselves, too. */ isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGCONFIG, topology, chan, (uint32_t) (fcp->isp_wwpn >> 32), (uint32_t) fcp->isp_wwpn, fcp->isp_portid, fcp->isp_loopid, isp_fc_toponame(fcp)); isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Link Test Complete", chan); return (0); } /* * Complete the synchronization of our Port Database. * * At this point, we've scanned the local loop (if any) and the fabric * and performed fabric logins on all new devices. * * Our task here is to go through our port database and remove any entities * that are still marked probational (issuing PLOGO for ones which we had * PLOGI'd into) or are dead. * * Our task here is to also check policy to decide whether devices which * have *changed* in some way should still be kept active. For example, * if a device has just changed PortID, we can either elect to treat it * as an old device or as a newly arrived device (and notify the outer * layer appropriately). * * We also do initiator map target id assignment here for new initiator * devices and refresh old ones ot make sure that they point to the correct * entities. */ static int isp_pdb_sync(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); fcportdb_t *lp; uint16_t dbidx; if (fcp->isp_loopstate == LOOP_READY) { return (0); } /* * Make sure we're okay for doing this right now. */ if (fcp->isp_loopstate != LOOP_PDB_RCVD && fcp->isp_loopstate != LOOP_FSCAN_DONE && fcp->isp_loopstate != LOOP_LSCAN_DONE) { isp_prt(isp, ISP_LOGWARN, "isp_pdb_sync: bad loopstate %d", fcp->isp_loopstate); return (-1); } if (fcp->isp_topo == TOPO_FL_PORT || fcp->isp_topo == TOPO_NL_PORT || fcp->isp_topo == TOPO_N_PORT) { if (fcp->isp_loopstate < LOOP_LSCAN_DONE) { if (isp_scan_loop(isp, chan) != 0) { isp_prt(isp, ISP_LOGWARN, "isp_pdb_sync: isp_scan_loop failed"); return (-1); } } } if (fcp->isp_topo == TOPO_F_PORT || fcp->isp_topo == TOPO_FL_PORT) { if (fcp->isp_loopstate < LOOP_FSCAN_DONE) { if (isp_scan_fabric(isp, chan) != 0) { isp_prt(isp, ISP_LOGWARN, "isp_pdb_sync: isp_scan_fabric failed"); return (-1); } } } isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Synchronizing PDBs", chan); fcp->isp_loopstate = LOOP_SYNCING_PDB; for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) { lp = &fcp->portdb[dbidx]; if (lp->state == FC_PORTDB_STATE_NIL || lp->state == FC_PORTDB_STATE_VALID) { continue; } switch (lp->state) { case FC_PORTDB_STATE_PROBATIONAL: case FC_PORTDB_STATE_DEAD: lp->state = FC_PORTDB_STATE_NIL; isp_async(isp, ISPASYNC_DEV_GONE, chan, lp); if (lp->autologin == 0) { (void) isp_plogx(isp, chan, lp->handle, lp->portid, PLOGX_FLG_CMD_LOGO | PLOGX_FLG_IMPLICIT | PLOGX_FLG_FREE_NPHDL, 0); } else { lp->autologin = 0; } lp->new_prli_word3 = 0; lp->new_portid = 0; /* * Note that we might come out of this with our state * set to FC_PORTDB_STATE_ZOMBIE. */ break; case FC_PORTDB_STATE_NEW: lp->portid = lp->new_portid; lp->prli_word3 = lp->new_prli_word3; lp->state = FC_PORTDB_STATE_VALID; isp_async(isp, ISPASYNC_DEV_ARRIVED, chan, lp); lp->new_prli_word3 = 0; lp->new_portid = 0; break; case FC_PORTDB_STATE_CHANGED: lp->state = FC_PORTDB_STATE_VALID; isp_async(isp, ISPASYNC_DEV_CHANGED, chan, lp); lp->portid = lp->new_portid; lp->prli_word3 = lp->new_prli_word3; lp->new_prli_word3 = 0; lp->new_portid = 0; break; case FC_PORTDB_STATE_PENDING_VALID: lp->portid = lp->new_portid; lp->prli_word3 = lp->new_prli_word3; lp->state = FC_PORTDB_STATE_VALID; isp_async(isp, ISPASYNC_DEV_STAYED, chan, lp); if (dbidx != FL_ID) { lp->new_prli_word3 = 0; lp->new_portid = 0; } break; case FC_PORTDB_STATE_ZOMBIE: break; default: isp_prt(isp, ISP_LOGWARN, "isp_pdb_sync: state %d for idx %d", lp->state, dbidx); isp_dump_portdb(isp, chan); } } /* * If we get here, we've for sure seen not only a valid loop * but know what is or isn't on it, so mark this for usage * in isp_start. */ fcp->loop_seen_once = 1; fcp->isp_loopstate = LOOP_READY; return (0); } /* * Scan local loop for devices. */ static int isp_scan_loop(ispsoftc_t *isp, int chan) { fcportdb_t *lp, tmp; fcparam *fcp = FCPARAM(isp, chan); int i, idx, lim, r; isp_pdb_t pdb; uint16_t handles[LOCAL_LOOP_LIM]; uint16_t handle; if (fcp->isp_fwstate < FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) { return (-1); } if (fcp->isp_loopstate > LOOP_SCANNING_LOOP) { return (0); } if (fcp->isp_topo != TOPO_NL_PORT && fcp->isp_topo != TOPO_FL_PORT && fcp->isp_topo != TOPO_N_PORT) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d no loop topology to scan", chan); fcp->isp_loopstate = LOOP_LSCAN_DONE; return (0); } fcp->isp_loopstate = LOOP_SCANNING_LOOP; lim = LOCAL_LOOP_LIM; r = isp_gethandles(isp, chan, handles, &lim, 1, 1); if (r != 0) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d getting list of handles failed with %x", chan, r); fail: ISP_MARK_PORTDB(isp, chan, 1); isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC scan loop DONE (bad)", chan); return (-1); } isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC scan loop -- %d ports", chan, lim); /* * Run through the list and get the port database info for each one. */ for (idx = 0; idx < lim; idx++) { handle = handles[idx]; /* * Don't scan "special" ids. */ if (ISP_CAP_2KLOGIN(isp)) { - if (handle >= NPH_RESERVED - isp->isp_nchan) + if (handle >= NPH_RESERVED) continue; } else { if (handle >= FL_ID && handle <= SNS_ID) continue; } /* * In older cards with older f/w GET_PORT_DATABASE has been * known to hang. This trick gets around that problem. */ if (IS_2100(isp) || IS_2200(isp)) { uint64_t node_wwn = isp_get_wwn(isp, chan, handle, 1); if (fcp->isp_loopstate < LOOP_SCANNING_LOOP) goto fail; if (node_wwn == INI_NONE) { continue; } } /* * Get the port database entity for this index. */ r = isp_getpdb(isp, chan, handle, &pdb, 1); if (r != 0) { isp_prt(isp, ISP_LOGDEBUG1, "Chan %d FC scan loop handle %d returned %x", chan, handle, r); if (fcp->isp_loopstate < LOOP_SCANNING_LOOP) goto fail; continue; } if (fcp->isp_loopstate < LOOP_SCANNING_LOOP) goto fail; /* * On *very* old 2100 firmware we would end up sometimes * with the firmware returning the port database entry * for something else. We used to restart this, but * now we just punt. */ if (IS_2100(isp) && pdb.handle != handle) { isp_prt(isp, ISP_LOGWARN, "Chan %d getpdb() returned wrong handle %x != %x", chan, pdb.handle, handle); goto fail; } /* * Save the pertinent info locally. */ MAKE_WWN_FROM_NODE_NAME(tmp.node_wwn, pdb.nodename); MAKE_WWN_FROM_NODE_NAME(tmp.port_wwn, pdb.portname); tmp.prli_word3 = pdb.prli_word3; tmp.portid = pdb.portid; tmp.handle = pdb.handle; /* * Check to make sure it's still a valid entry. The 24XX seems * to return a portid but not a WWPN/WWNN or role for devices * which shift on a loop. */ if (tmp.node_wwn == 0 || tmp.port_wwn == 0 || tmp.portid == 0) { int a, b, c; isp_prt(isp, ISP_LOGWARN, "Chan %d bad pdb (WWNN %016jx, WWPN %016jx, PortID %06x, W3 0x%x, H 0x%x) @ handle 0x%x", chan, tmp.node_wwn, tmp.port_wwn, tmp.portid, tmp.prli_word3, tmp.handle, handle); a = (tmp.node_wwn == 0); b = (tmp.port_wwn == 0); c = (tmp.portid == 0); if (a == 0 && b == 0) { tmp.node_wwn = isp_get_wwn(isp, chan, handle, 1); tmp.port_wwn = isp_get_wwn(isp, chan, handle, 0); if (tmp.node_wwn && tmp.port_wwn) { isp_prt(isp, ISP_LOGWARN, "DODGED!"); goto cont; } } isp_dump_portdb(isp, chan); continue; } cont: /* * Now search the entire port database * for the same Port WWN. */ if (isp_find_pdb_by_wwn(isp, chan, tmp.port_wwn, &lp)) { /* * Okay- we've found a non-nil entry that matches. * Check to make sure it's probational or a zombie. */ if (lp->state != FC_PORTDB_STATE_PROBATIONAL && lp->state != FC_PORTDB_STATE_ZOMBIE && lp->state != FC_PORTDB_STATE_VALID) { isp_prt(isp, ISP_LOGERR, "Chan %d [%d] not probational/zombie (0x%x)", chan, FC_PORTDB_TGT(isp, chan, lp), lp->state); isp_dump_portdb(isp, chan); goto fail; } /* * Mark the device as something the f/w logs into * automatically. */ lp->autologin = 1; lp->node_wwn = tmp.node_wwn; /* * Check to make see if really still the same * device. If it is, we mark it pending valid. */ if (lp->portid == tmp.portid && lp->handle == tmp.handle && lp->prli_word3 == tmp.prli_word3) { lp->new_portid = tmp.portid; lp->new_prli_word3 = tmp.prli_word3; lp->state = FC_PORTDB_STATE_PENDING_VALID; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Loop Port 0x%06x@0x%04x Pending Valid", chan, tmp.portid, tmp.handle); continue; } /* * We can wipe out the old handle value * here because it's no longer valid. */ lp->handle = tmp.handle; /* * Claim that this has changed and let somebody else * decide what to do. */ isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Loop Port 0x%06x@0x%04x changed", chan, tmp.portid, tmp.handle); lp->state = FC_PORTDB_STATE_CHANGED; lp->new_portid = tmp.portid; lp->new_prli_word3 = tmp.prli_word3; continue; } /* * Ah. A new device entry. Find an empty slot * for it and save info for later disposition. */ for (i = 0; i < MAX_FC_TARG; i++) { if (fcp->portdb[i].state == FC_PORTDB_STATE_NIL) { break; } } if (i == MAX_FC_TARG) { isp_prt(isp, ISP_LOGERR, "Chan %d out of portdb entries", chan); continue; } lp = &fcp->portdb[i]; ISP_MEMZERO(lp, sizeof (fcportdb_t)); lp->autologin = 1; lp->state = FC_PORTDB_STATE_NEW; lp->new_portid = tmp.portid; lp->new_prli_word3 = tmp.prli_word3; lp->handle = tmp.handle; lp->port_wwn = tmp.port_wwn; lp->node_wwn = tmp.node_wwn; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Loop Port 0x%06x@0x%04x is New Entry", chan, tmp.portid, tmp.handle); } fcp->isp_loopstate = LOOP_LSCAN_DONE; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC scan loop DONE", chan); return (0); } /* * Scan the fabric for devices and add them to our port database. * * Use the GID_FT command to get all Port IDs for FC4 SCSI devices it knows. * * For 2100-23XX cards, we can use the SNS mailbox command to pass simple * name server commands to the switch management server via the QLogic f/w. * * For the 24XX card, we have to use CT-Pass through run via the Execute IOCB * mailbox command. * * The net result is to leave the list of Port IDs setting untranslated in * offset IGPOFF of the FC scratch area, whereupon we'll canonicalize it to * host order at OGPOFF. */ /* * Take less than half of our scratch area to store Port IDs */ #define GIDLEN ((ISP_FC_SCRLEN >> 1) - 16 - SNS_GID_FT_REQ_SIZE) #define NGENT ((GIDLEN - 16) >> 2) #define IGPOFF (2 * QENTRY_LEN) #define OGPOFF (ISP_FC_SCRLEN >> 1) #define ZTXOFF (ISP_FC_SCRLEN - (1 * QENTRY_LEN)) #define CTXOFF (ISP_FC_SCRLEN - (2 * QENTRY_LEN)) #define XTXOFF (ISP_FC_SCRLEN - (3 * QENTRY_LEN)) static int isp_gid_ft_sns(ispsoftc_t *isp, int chan) { union { sns_gid_ft_req_t _x; uint8_t _y[SNS_GID_FT_REQ_SIZE]; } un; fcparam *fcp = FCPARAM(isp, chan); sns_gid_ft_req_t *rq = &un._x; mbreg_t mbs; isp_prt(isp, ISP_LOGDEBUG0, "Chan %d scanning fabric (GID_FT) via SNS", chan); ISP_MEMZERO(rq, SNS_GID_FT_REQ_SIZE); rq->snscb_rblen = GIDLEN >> 1; rq->snscb_addr[RQRSP_ADDR0015] = DMA_WD0(fcp->isp_scdma + IGPOFF); rq->snscb_addr[RQRSP_ADDR1631] = DMA_WD1(fcp->isp_scdma + IGPOFF); rq->snscb_addr[RQRSP_ADDR3247] = DMA_WD2(fcp->isp_scdma + IGPOFF); rq->snscb_addr[RQRSP_ADDR4863] = DMA_WD3(fcp->isp_scdma + IGPOFF); rq->snscb_sblen = 6; rq->snscb_cmd = SNS_GID_FT; rq->snscb_mword_div_2 = NGENT; rq->snscb_fc4_type = FC4_SCSI; isp_put_gid_ft_request(isp, rq, fcp->isp_scratch); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, SNS_GID_FT_REQ_SIZE, chan); MBSINIT(&mbs, MBOX_SEND_SNS, MBLOGALL, 10000000); mbs.param[0] = MBOX_SEND_SNS; mbs.param[1] = SNS_GID_FT_REQ_SIZE >> 1; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { if (mbs.param[0] == MBOX_INVALID_COMMAND) { return (1); } else { return (-1); } } return (0); } static int isp_gid_ft_ct_passthru(ispsoftc_t *isp, int chan) { mbreg_t mbs; fcparam *fcp = FCPARAM(isp, chan); union { isp_ct_pt_t plocal; ct_hdr_t clocal; uint8_t q[QENTRY_LEN]; } un; isp_ct_pt_t *pt; ct_hdr_t *ct; uint32_t *rp; uint8_t *scp = fcp->isp_scratch; isp_prt(isp, ISP_LOGDEBUG0, "Chan %d scanning fabric (GID_FT) via CT", chan); if (!IS_24XX(isp)) { return (1); } /* * Build a Passthrough IOCB in memory. */ pt = &un.plocal; ISP_MEMZERO(un.q, QENTRY_LEN); pt->ctp_header.rqs_entry_count = 1; pt->ctp_header.rqs_entry_type = RQSTYPE_CT_PASSTHRU; pt->ctp_handle = 0xffffffff; pt->ctp_nphdl = fcp->isp_sns_hdl; pt->ctp_cmd_cnt = 1; pt->ctp_vpidx = ISP_GET_VPIDX(isp, chan); pt->ctp_time = 30; pt->ctp_rsp_cnt = 1; pt->ctp_rsp_bcnt = GIDLEN; pt->ctp_cmd_bcnt = sizeof (*ct) + sizeof (uint32_t); pt->ctp_dataseg[0].ds_base = DMA_LO32(fcp->isp_scdma+XTXOFF); pt->ctp_dataseg[0].ds_basehi = DMA_HI32(fcp->isp_scdma+XTXOFF); pt->ctp_dataseg[0].ds_count = sizeof (*ct) + sizeof (uint32_t); pt->ctp_dataseg[1].ds_base = DMA_LO32(fcp->isp_scdma+IGPOFF); pt->ctp_dataseg[1].ds_basehi = DMA_HI32(fcp->isp_scdma+IGPOFF); pt->ctp_dataseg[1].ds_count = GIDLEN; if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "ct IOCB", QENTRY_LEN, pt); } isp_put_ct_pt(isp, pt, (isp_ct_pt_t *) &scp[CTXOFF]); /* * Build the CT header and command in memory. * * Note that the CT header has to end up as Big Endian format in memory. */ ct = &un.clocal; ISP_MEMZERO(ct, sizeof (*ct)); ct->ct_revision = CT_REVISION; ct->ct_fcs_type = CT_FC_TYPE_FC; ct->ct_fcs_subtype = CT_FC_SUBTYPE_NS; ct->ct_cmd_resp = SNS_GID_FT; ct->ct_bcnt_resid = (GIDLEN - 16) >> 2; isp_put_ct_hdr(isp, ct, (ct_hdr_t *) &scp[XTXOFF]); rp = (uint32_t *) &scp[XTXOFF+sizeof (*ct)]; ISP_IOZPUT_32(isp, FC4_SCSI, rp); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "CT HDR + payload after put", sizeof (*ct) + sizeof (uint32_t), &scp[XTXOFF]); } ISP_MEMZERO(&scp[ZTXOFF], QENTRY_LEN); MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 500000); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma + CTXOFF); mbs.param[3] = DMA_WD0(fcp->isp_scdma + CTXOFF); mbs.param[6] = DMA_WD3(fcp->isp_scdma + CTXOFF); mbs.param[7] = DMA_WD2(fcp->isp_scdma + CTXOFF); MEMORYBARRIER(isp, SYNC_SFORDEV, XTXOFF, 2 * QENTRY_LEN, chan); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { return (-1); } MEMORYBARRIER(isp, SYNC_SFORCPU, ZTXOFF, QENTRY_LEN, chan); pt = &un.plocal; isp_get_ct_pt(isp, (isp_ct_pt_t *) &scp[ZTXOFF], pt); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "IOCB response", QENTRY_LEN, pt); } if (pt->ctp_status && pt->ctp_status != RQCS_DATA_UNDERRUN) { isp_prt(isp, ISP_LOGWARN, "Chan %d ISP GID FT CT Passthrough returned 0x%x", chan, pt->ctp_status); return (-1); } MEMORYBARRIER(isp, SYNC_SFORCPU, IGPOFF, GIDLEN + 16, chan); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "CT response", GIDLEN+16, &scp[IGPOFF]); } return (0); } static int isp_scan_fabric(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); uint32_t portid; uint16_t handle, loopid; isp_pdb_t pdb; int portidx, portlim, r; sns_gid_ft_rsp_t *rs0, *rs1; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Scan Fabric", chan); if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate < LOOP_LSCAN_DONE) { return (-1); } if (fcp->isp_loopstate > LOOP_SCANNING_FABRIC) { return (0); } if (fcp->isp_topo != TOPO_FL_PORT && fcp->isp_topo != TOPO_F_PORT) { fcp->isp_loopstate = LOOP_FSCAN_DONE; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Scan Fabric Done (no fabric)", chan); return (0); } fcp->isp_loopstate = LOOP_SCANNING_FABRIC; if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } if (fcp->isp_loopstate < LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } /* * Make sure we still are logged into the fabric controller. */ if (IS_24XX(isp)) { /* XXX SHOULDN'T THIS BE TRUE FOR 2K F/W? XXX */ loopid = NPH_FL_ID; } else { loopid = FL_ID; } r = isp_getpdb(isp, chan, loopid, &pdb, 0); if ((r & 0xffff) == MBOX_NOT_LOGGED_IN) { isp_dump_chip_portdb(isp, chan, 0); } if (r) { fcp->isp_loopstate = LOOP_PDB_RCVD; FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } if (IS_24XX(isp)) { r = isp_gid_ft_ct_passthru(isp, chan); } else { r = isp_gid_ft_sns(isp, chan); } if (fcp->isp_loopstate < LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } if (r > 0) { fcp->isp_loopstate = LOOP_FSCAN_DONE; FC_SCRATCH_RELEASE(isp, chan); return (0); } else if (r < 0) { fcp->isp_loopstate = LOOP_PDB_RCVD; /* try again */ FC_SCRATCH_RELEASE(isp, chan); return (0); } MEMORYBARRIER(isp, SYNC_SFORCPU, IGPOFF, GIDLEN, chan); rs0 = (sns_gid_ft_rsp_t *) ((uint8_t *)fcp->isp_scratch+IGPOFF); rs1 = (sns_gid_ft_rsp_t *) ((uint8_t *)fcp->isp_scratch+OGPOFF); isp_get_gid_ft_response(isp, rs0, rs1, NGENT); if (fcp->isp_loopstate < LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } if (rs1->snscb_cthdr.ct_cmd_resp != LS_ACC) { int level; if (rs1->snscb_cthdr.ct_reason == 9 && rs1->snscb_cthdr.ct_explanation == 7) { level = ISP_LOG_SANCFG; } else { level = ISP_LOGWARN; } isp_prt(isp, level, "Chan %d Fabric Nameserver rejected GID_FT" " (Reason=0x%x Expl=0x%x)", chan, rs1->snscb_cthdr.ct_reason, rs1->snscb_cthdr.ct_explanation); FC_SCRATCH_RELEASE(isp, chan); fcp->isp_loopstate = LOOP_FSCAN_DONE; return (0); } /* * If we get this far, we certainly still have the fabric controller. */ fcp->portdb[FL_ID].state = FC_PORTDB_STATE_PENDING_VALID; /* * Go through the list and remove duplicate port ids. */ portlim = 0; portidx = 0; for (portidx = 0; portidx < NGENT-1; portidx++) { if (rs1->snscb_ports[portidx].control & 0x80) { break; } } /* * If we're not at the last entry, our list wasn't big enough. */ if ((rs1->snscb_ports[portidx].control & 0x80) == 0) { isp_prt(isp, ISP_LOGWARN, "fabric too big for scratch area: increase ISP_FC_SCRLEN"); } portlim = portidx + 1; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d got %d ports back from name server", chan, portlim); for (portidx = 0; portidx < portlim; portidx++) { int npidx; portid = ((rs1->snscb_ports[portidx].portid[0]) << 16) | ((rs1->snscb_ports[portidx].portid[1]) << 8) | ((rs1->snscb_ports[portidx].portid[2])); for (npidx = portidx + 1; npidx < portlim; npidx++) { uint32_t new_portid = ((rs1->snscb_ports[npidx].portid[0]) << 16) | ((rs1->snscb_ports[npidx].portid[1]) << 8) | ((rs1->snscb_ports[npidx].portid[2])); if (new_portid == portid) { break; } } if (npidx < portlim) { rs1->snscb_ports[npidx].portid[0] = 0; rs1->snscb_ports[npidx].portid[1] = 0; rs1->snscb_ports[npidx].portid[2] = 0; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d removing duplicate PortID 0x%06x entry from list", chan, portid); } } /* * We now have a list of Port IDs for all FC4 SCSI devices * that the Fabric Name server knows about. * * For each entry on this list go through our port database looking * for probational entries- if we find one, then an old entry is * maybe still this one. We get some information to find out. * * Otherwise, it's a new fabric device, and we log into it * (unconditionally). After searching the entire database * again to make sure that we never ever ever ever have more * than one entry that has the same PortID or the same * WWNN/WWPN duple, we enter the device into our database. */ for (portidx = 0; portidx < portlim; portidx++) { fcportdb_t *lp; uint64_t wwnn, wwpn; int dbidx, nr; portid = ((rs1->snscb_ports[portidx].portid[0]) << 16) | ((rs1->snscb_ports[portidx].portid[1]) << 8) | ((rs1->snscb_ports[portidx].portid[2])); if (portid == 0) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d skipping null PortID at idx %d", chan, portidx); continue; } /* * Skip ourselves here and on other channels. If we're * multi-id, we can't check the portids in other FCPARAM * arenas because the resolutions here aren't synchronized. * The best way to do this is to exclude looking at portids * that have the same domain and area code as our own * portid. */ if (ISP_CAP_MULTI_ID(isp) && isp->isp_nchan > 1) { if ((portid >> 8) == (fcp->isp_portid >> 8)) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d skip PortID 0x%06x", chan, portid); continue; } } else if (portid == fcp->isp_portid) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d skip ourselves on @ PortID 0x%06x", chan, portid); continue; } isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Checking Fabric Port 0x%06x", chan, portid); /* * We now search our Port Database for any * probational entries with this PortID. We don't * look for zombies here- only probational * entries (we've already logged out of zombies). */ for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) { lp = &fcp->portdb[dbidx]; if (lp->state != FC_PORTDB_STATE_PROBATIONAL) { continue; } if (lp->portid == portid) { break; } } /* * We found a probational entry with this Port ID. */ if (dbidx < MAX_FC_TARG) { int handle_changed = 0; lp = &fcp->portdb[dbidx]; /* * See if we're still logged into it. * * If we aren't, mark it as a dead device and * leave the new portid in the database entry * for somebody further along to decide what to * do (policy choice). * * If we are, check to see if it's the same * device still (it should be). If for some * reason it isn't, mark it as a changed device * and leave the new portid and role in the * database entry for somebody further along to * decide what to do (policy choice). * */ r = isp_getpdb(isp, chan, lp->handle, &pdb, 0); if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } if (r != 0) { lp->new_portid = portid; lp->state = FC_PORTDB_STATE_DEAD; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric PortID 0x%06x handle 0x%x is dead (%d)", chan, portid, lp->handle, r); continue; } /* * Check to make sure that handle, portid, WWPN and * WWNN agree. If they don't, then the association * between this PortID and the stated handle has been * broken by the firmware. */ MAKE_WWN_FROM_NODE_NAME(wwnn, pdb.nodename); MAKE_WWN_FROM_NODE_NAME(wwpn, pdb.portname); if (pdb.handle != lp->handle || pdb.portid != portid || wwpn != lp->port_wwn || (lp->node_wwn != 0 && wwnn != lp->node_wwn)) { isp_prt(isp, ISP_LOG_SANCFG, fconf, chan, dbidx, pdb.handle, pdb.portid, (uint32_t) (wwnn >> 32), (uint32_t) wwnn, (uint32_t) (wwpn >> 32), (uint32_t) wwpn, lp->handle, portid, (uint32_t) (lp->node_wwn >> 32), (uint32_t) lp->node_wwn, (uint32_t) (lp->port_wwn >> 32), (uint32_t) lp->port_wwn); /* * Try to re-login to this device using a * new handle. If that fails, mark it dead. * * isp_login_device will check for handle and * portid consistency after re-login. * */ if ((fcp->role & ISP_ROLE_INITIATOR) == 0 || isp_login_device(isp, chan, portid, &pdb, &FCPARAM(isp, 0)->isp_lasthdl)) { lp->new_portid = portid; lp->state = FC_PORTDB_STATE_DEAD; if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } continue; } if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } MAKE_WWN_FROM_NODE_NAME(wwnn, pdb.nodename); MAKE_WWN_FROM_NODE_NAME(wwpn, pdb.portname); if (wwpn != lp->port_wwn || (lp->node_wwn != 0 && wwnn != lp->node_wwn)) { isp_prt(isp, ISP_LOGWARN, "changed WWN" " after relogin"); lp->new_portid = portid; lp->state = FC_PORTDB_STATE_DEAD; continue; } lp->handle = pdb.handle; handle_changed++; } nr = pdb.prli_word3; /* * Check to see whether the portid and roles have * stayed the same. If they have stayed the same, * we believe that this is the same device and it * hasn't become disconnected and reconnected, so * mark it as pending valid. * * If they aren't the same, mark the device as a * changed device and save the new port id and role * and let somebody else decide. */ lp->new_portid = portid; lp->new_prli_word3 = nr; if (pdb.portid != lp->portid || nr != lp->prli_word3 || handle_changed) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric Port 0x%06x changed", chan, portid); lp->state = FC_PORTDB_STATE_CHANGED; } else { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric Port 0x%06x Now Pending Valid", chan, portid); lp->state = FC_PORTDB_STATE_PENDING_VALID; } continue; } if ((fcp->role & ISP_ROLE_INITIATOR) == 0) continue; /* * Ah- a new entry. Search the database again for all non-NIL * entries to make sure we never ever make a new database entry * with the same port id. While we're at it, mark where the * last free entry was. */ dbidx = MAX_FC_TARG; for (lp = fcp->portdb; lp < &fcp->portdb[MAX_FC_TARG]; lp++) { if (lp >= &fcp->portdb[FL_ID] && lp <= &fcp->portdb[SNS_ID]) { continue; } if (lp->state == FC_PORTDB_STATE_NIL) { if (dbidx == MAX_FC_TARG) { dbidx = lp - fcp->portdb; } continue; } if (lp->state == FC_PORTDB_STATE_ZOMBIE) { continue; } if (lp->portid == portid) { break; } } if (lp < &fcp->portdb[MAX_FC_TARG]) { isp_prt(isp, ISP_LOGWARN, "Chan %d PortID 0x%06x " "already at %d handle %d state %d", chan, portid, dbidx, lp->handle, lp->state); continue; } /* * We should have the index of the first free entry seen. */ if (dbidx == MAX_FC_TARG) { isp_prt(isp, ISP_LOGERR, "port database too small to login PortID 0x%06x" "- increase MAX_FC_TARG", portid); continue; } /* * Otherwise, point to our new home. */ lp = &fcp->portdb[dbidx]; /* * Try to see if we are logged into this device, * and maybe log into it. * * isp_login_device will check for handle and * portid consistency after login. */ if (isp_login_device(isp, chan, portid, &pdb, &FCPARAM(isp, 0)->isp_lasthdl)) { if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } continue; } if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) { FC_SCRATCH_RELEASE(isp, chan); ISP_MARK_PORTDB(isp, chan, 1); return (-1); } handle = pdb.handle; MAKE_WWN_FROM_NODE_NAME(wwnn, pdb.nodename); MAKE_WWN_FROM_NODE_NAME(wwpn, pdb.portname); nr = pdb.prli_word3; /* * And go through the database *one* more time to make sure * that we do not make more than one entry that has the same * WWNN/WWPN duple */ for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) { if (dbidx >= FL_ID && dbidx <= SNS_ID) { continue; } if ((fcp->portdb[dbidx].node_wwn == wwnn || fcp->portdb[dbidx].node_wwn == 0) && fcp->portdb[dbidx].port_wwn == wwpn) { break; } } if (dbidx == MAX_FC_TARG) { ISP_MEMZERO(lp, sizeof (fcportdb_t)); lp->handle = handle; lp->node_wwn = wwnn; lp->port_wwn = wwpn; lp->new_portid = portid; lp->new_prli_word3 = nr; lp->state = FC_PORTDB_STATE_NEW; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Fabric Port 0x%06x is a New Entry", chan, portid); continue; } if (fcp->portdb[dbidx].state != FC_PORTDB_STATE_ZOMBIE) { isp_prt(isp, ISP_LOGWARN, "Chan %d PortID 0x%x 0x%08x%08x/0x%08x%08x %ld " "already at idx %d, state 0x%x", chan, portid, (uint32_t) (wwnn >> 32), (uint32_t) wwnn, (uint32_t) (wwpn >> 32), (uint32_t) wwpn, (long) (lp - fcp->portdb), dbidx, fcp->portdb[dbidx].state); continue; } /* * We found a zombie entry that matches us. * Revive it. We know that WWN and WWPN * are the same. For fabric devices, we * don't care that handle is different * as we assign that. If role or portid * are different, it maybe a changed device. */ lp = &fcp->portdb[dbidx]; lp->handle = handle; lp->node_wwn = wwnn; lp->new_portid = portid; lp->new_prli_word3 = nr; if (lp->portid != portid || lp->prli_word3 != nr) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Zombie Fabric Port 0x%06x Now Changed", chan, portid); lp->state = FC_PORTDB_STATE_CHANGED; } else { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Zombie Fabric Port 0x%06x Now Pending Valid", chan, portid); lp->state = FC_PORTDB_STATE_PENDING_VALID; } } FC_SCRATCH_RELEASE(isp, chan); if (fcp->isp_loopstate != LOOP_SCANNING_FABRIC) { ISP_MARK_PORTDB(isp, chan, 1); return (-1); } fcp->isp_loopstate = LOOP_FSCAN_DONE; isp_prt(isp, ISP_LOG_SANCFG, "Chan %d FC Scan Fabric Done", chan); return (0); } /* * Find an unused handle and try and use to login to a port. */ static int isp_login_device(ispsoftc_t *isp, int chan, uint32_t portid, isp_pdb_t *p, uint16_t *ohp) { int lim, i, r; uint16_t handle; if (ISP_CAP_2KLOGIN(isp)) { lim = NPH_MAX_2K; } else { lim = NPH_MAX; } handle = isp_next_handle(isp, ohp); for (i = 0; i < lim; i++) { /* * See if we're still logged into something with * this handle and that something agrees with this * port id. */ r = isp_getpdb(isp, chan, handle, p, 0); if (r == 0 && p->portid != portid) { (void) isp_plogx(isp, chan, handle, portid, PLOGX_FLG_CMD_LOGO | PLOGX_FLG_IMPLICIT | PLOGX_FLG_FREE_NPHDL, 1); } else if (r == 0) { break; } if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) { return (-1); } /* * Now try and log into the device */ r = isp_plogx(isp, chan, handle, portid, PLOGX_FLG_CMD_PLOGI, 1); if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) { return (-1); } if (r == 0) { break; } else if ((r & 0xffff) == MBOX_PORT_ID_USED) { /* * If we get here, then the firmwware still thinks we're logged into this device, but with a different * handle. We need to break that association. We used to try and just substitute the handle, but then * failed to get any data via isp_getpdb (below). */ if (isp_plogx(isp, chan, r >> 16, portid, PLOGX_FLG_CMD_LOGO | PLOGX_FLG_IMPLICIT | PLOGX_FLG_FREE_NPHDL, 1)) { isp_prt(isp, ISP_LOGERR, "baw... logout of %x failed", r >> 16); } if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) { return (-1); } r = isp_plogx(isp, chan, handle, portid, PLOGX_FLG_CMD_PLOGI, 1); if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) { return (-1); } if (r != 0) i = lim; break; } else if ((r & 0xffff) == MBOX_LOOP_ID_USED) { /* Try the next loop id. */ handle = isp_next_handle(isp, ohp); } else { /* Give up. */ i = lim; break; } } if (i == lim) { isp_prt(isp, ISP_LOGWARN, "Chan %d PLOGI 0x%06x failed", chan, portid); return (-1); } /* * If we successfully logged into it, get the PDB for it * so we can crosscheck that it is still what we think it * is and that we also have the role it plays */ r = isp_getpdb(isp, chan, handle, p, 0); if (FCPARAM(isp, chan)->isp_loopstate != LOOP_SCANNING_FABRIC) { return (-1); } if (r != 0) { isp_prt(isp, ISP_LOGERR, "Chan %d new device 0x%06x@0x%x disappeared", chan, portid, handle); return (-1); } if (p->handle != handle || p->portid != portid) { isp_prt(isp, ISP_LOGERR, "Chan %d new device 0x%06x@0x%x changed (0x%06x@0x%0x)", chan, portid, handle, p->portid, p->handle); return (-1); } return (0); } static int isp_register_fc4_type(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); uint8_t local[SNS_RFT_ID_REQ_SIZE]; sns_screq_t *reqp = (sns_screq_t *) local; mbreg_t mbs; ISP_MEMZERO((void *) reqp, SNS_RFT_ID_REQ_SIZE); reqp->snscb_rblen = SNS_RFT_ID_RESP_SIZE >> 1; reqp->snscb_addr[RQRSP_ADDR0015] = DMA_WD0(fcp->isp_scdma + 0x100); reqp->snscb_addr[RQRSP_ADDR1631] = DMA_WD1(fcp->isp_scdma + 0x100); reqp->snscb_addr[RQRSP_ADDR3247] = DMA_WD2(fcp->isp_scdma + 0x100); reqp->snscb_addr[RQRSP_ADDR4863] = DMA_WD3(fcp->isp_scdma + 0x100); reqp->snscb_sblen = 22; reqp->snscb_data[0] = SNS_RFT_ID; reqp->snscb_data[4] = fcp->isp_portid & 0xffff; reqp->snscb_data[5] = (fcp->isp_portid >> 16) & 0xff; reqp->snscb_data[6] = (1 << FC4_SCSI); if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); return (-1); } isp_put_sns_request(isp, reqp, (sns_screq_t *) fcp->isp_scratch); MBSINIT(&mbs, MBOX_SEND_SNS, MBLOGALL, 1000000); mbs.param[1] = SNS_RFT_ID_REQ_SIZE >> 1; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, SNS_RFT_ID_REQ_SIZE, chan); isp_mboxcmd(isp, &mbs); FC_SCRATCH_RELEASE(isp, chan); if (mbs.param[0] == MBOX_COMMAND_COMPLETE) { return (0); } else { return (-1); } } static int isp_register_fc4_type_24xx(ispsoftc_t *isp, int chan) { mbreg_t mbs; fcparam *fcp = FCPARAM(isp, chan); union { isp_ct_pt_t plocal; rft_id_t clocal; uint8_t q[QENTRY_LEN]; } un; isp_ct_pt_t *pt; ct_hdr_t *ct; rft_id_t *rp; uint8_t *scp = fcp->isp_scratch; if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); return (-1); } /* * Build a Passthrough IOCB in memory. */ ISP_MEMZERO(un.q, QENTRY_LEN); pt = &un.plocal; pt->ctp_header.rqs_entry_count = 1; pt->ctp_header.rqs_entry_type = RQSTYPE_CT_PASSTHRU; pt->ctp_handle = 0xffffffff; pt->ctp_nphdl = fcp->isp_sns_hdl; pt->ctp_cmd_cnt = 1; pt->ctp_vpidx = ISP_GET_VPIDX(isp, chan); pt->ctp_time = 1; pt->ctp_rsp_cnt = 1; pt->ctp_rsp_bcnt = sizeof (ct_hdr_t); pt->ctp_cmd_bcnt = sizeof (rft_id_t); pt->ctp_dataseg[0].ds_base = DMA_LO32(fcp->isp_scdma+XTXOFF); pt->ctp_dataseg[0].ds_basehi = DMA_HI32(fcp->isp_scdma+XTXOFF); pt->ctp_dataseg[0].ds_count = sizeof (rft_id_t); pt->ctp_dataseg[1].ds_base = DMA_LO32(fcp->isp_scdma+IGPOFF); pt->ctp_dataseg[1].ds_basehi = DMA_HI32(fcp->isp_scdma+IGPOFF); pt->ctp_dataseg[1].ds_count = sizeof (ct_hdr_t); isp_put_ct_pt(isp, pt, (isp_ct_pt_t *) &scp[CTXOFF]); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "IOCB CT Request", QENTRY_LEN, pt); } /* * Build the CT header and command in memory. * * Note that the CT header has to end up as Big Endian format in memory. */ ISP_MEMZERO(&un.clocal, sizeof (un.clocal)); ct = &un.clocal.rftid_hdr; ct->ct_revision = CT_REVISION; ct->ct_fcs_type = CT_FC_TYPE_FC; ct->ct_fcs_subtype = CT_FC_SUBTYPE_NS; ct->ct_cmd_resp = SNS_RFT_ID; ct->ct_bcnt_resid = (sizeof (rft_id_t) - sizeof (ct_hdr_t)) >> 2; rp = &un.clocal; rp->rftid_portid[0] = fcp->isp_portid >> 16; rp->rftid_portid[1] = fcp->isp_portid >> 8; rp->rftid_portid[2] = fcp->isp_portid; rp->rftid_fc4types[FC4_SCSI >> 5] = 1 << (FC4_SCSI & 0x1f); isp_put_rft_id(isp, rp, (rft_id_t *) &scp[XTXOFF]); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "CT Header", QENTRY_LEN, &scp[XTXOFF]); } ISP_MEMZERO(&scp[ZTXOFF], sizeof (ct_hdr_t)); MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 1000000); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma + CTXOFF); mbs.param[3] = DMA_WD0(fcp->isp_scdma + CTXOFF); mbs.param[6] = DMA_WD3(fcp->isp_scdma + CTXOFF); mbs.param[7] = DMA_WD2(fcp->isp_scdma + CTXOFF); MEMORYBARRIER(isp, SYNC_SFORDEV, XTXOFF, 2 * QENTRY_LEN, chan); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { FC_SCRATCH_RELEASE(isp, chan); return (-1); } MEMORYBARRIER(isp, SYNC_SFORCPU, ZTXOFF, QENTRY_LEN, chan); pt = &un.plocal; isp_get_ct_pt(isp, (isp_ct_pt_t *) &scp[ZTXOFF], pt); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "IOCB response", QENTRY_LEN, pt); } if (pt->ctp_status) { FC_SCRATCH_RELEASE(isp, chan); isp_prt(isp, ISP_LOGWARN, "Chan %d Register FC4 Type CT Passthrough returned 0x%x", chan, pt->ctp_status); return (1); } isp_get_ct_hdr(isp, (ct_hdr_t *) &scp[IGPOFF], ct); FC_SCRATCH_RELEASE(isp, chan); if (ct->ct_cmd_resp == LS_RJT) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOG_WARN1, "Chan %d Register FC4 Type rejected", chan); return (-1); } else if (ct->ct_cmd_resp == LS_ACC) { isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Register FC4 Type accepted", chan); return (0); } else { isp_prt(isp, ISP_LOGWARN, "Chan %d Register FC4 Type: 0x%x", chan, ct->ct_cmd_resp); return (-1); } } static uint16_t isp_next_handle(ispsoftc_t *isp, uint16_t *ohp) { fcparam *fcp; int i, chan, wrap; uint16_t handle, minh, maxh; handle = *ohp; if (ISP_CAP_2KLOGIN(isp)) { minh = 0; - maxh = NPH_RESERVED - isp->isp_nchan; /* Reserve for SNS */ + maxh = NPH_RESERVED - 1; } else { minh = SNS_ID + 1; maxh = NPH_MAX - 1; } wrap = 0; next: if (handle == NIL_HANDLE) { handle = minh; } else { handle++; if (handle > maxh) { if (++wrap >= 2) { isp_prt(isp, ISP_LOGERR, "Out of port handles!"); return (NIL_HANDLE); } handle = minh; } } for (chan = 0; chan < isp->isp_nchan; chan++) { fcp = FCPARAM(isp, chan); if (fcp->role == ISP_ROLE_NONE) continue; for (i = 0; i < MAX_FC_TARG; i++) { if (fcp->portdb[i].state != FC_PORTDB_STATE_NIL && fcp->portdb[i].handle == handle) goto next; } } *ohp = handle; return (handle); } /* * Start a command. Locking is assumed done in the caller. */ int isp_start(XS_T *xs) { ispsoftc_t *isp; uint32_t handle, cdblen; uint8_t local[QENTRY_LEN]; ispreq_t *reqp; void *cdbp, *qep; uint16_t *tptr; fcportdb_t *lp; int target, dmaresult; XS_INITERR(xs); isp = XS_ISP(xs); /* * Check command CDB length, etc.. We really are limited to 16 bytes * for Fibre Channel, but can do up to 44 bytes in parallel SCSI, * but probably only if we're running fairly new firmware (we'll * let the old f/w choke on an extended command queue entry). */ if (XS_CDBLEN(xs) > (IS_FC(isp)? 16 : 44) || XS_CDBLEN(xs) == 0) { isp_prt(isp, ISP_LOGERR, "unsupported cdb length (%d, CDB[0]=0x%x)", XS_CDBLEN(xs), XS_CDBP(xs)[0] & 0xff); XS_SETERR(xs, HBA_BOTCH); return (CMD_COMPLETE); } /* * Translate the target to device handle as appropriate, checking * for correct device state as well. */ target = XS_TGT(xs); if (IS_FC(isp)) { fcparam *fcp = FCPARAM(isp, XS_CHANNEL(xs)); if ((fcp->role & ISP_ROLE_INITIATOR) == 0) { isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx I am not an initiator", XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs)); XS_SETERR(xs, HBA_SELTIMEOUT); return (CMD_COMPLETE); } if (isp->isp_state != ISP_RUNSTATE) { isp_prt(isp, ISP_LOGERR, "Adapter not at RUNSTATE"); XS_SETERR(xs, HBA_BOTCH); return (CMD_COMPLETE); } /* * Try again later. */ if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate != LOOP_READY) { return (CMD_RQLATER); } isp_prt(isp, ISP_LOGDEBUG2, "XS_TGT(xs)=%d", target); lp = &fcp->portdb[target]; if (target < 0 || target >= MAX_FC_TARG || lp->is_target == 0) { XS_SETERR(xs, HBA_SELTIMEOUT); return (CMD_COMPLETE); } if (lp->state == FC_PORTDB_STATE_ZOMBIE) { isp_prt(isp, ISP_LOGDEBUG1, "%d.%d.%jx target zombie", XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs)); return (CMD_RQLATER); } if (lp->state != FC_PORTDB_STATE_VALID) { isp_prt(isp, ISP_LOGDEBUG1, "%d.%d.%jx bad db port state 0x%x", XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs), lp->state); XS_SETERR(xs, HBA_SELTIMEOUT); return (CMD_COMPLETE); } } else { sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs)); if ((sdp->role & ISP_ROLE_INITIATOR) == 0) { isp_prt(isp, ISP_LOGDEBUG1, "%d.%d.%jx I am not an initiator", XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs)); XS_SETERR(xs, HBA_SELTIMEOUT); return (CMD_COMPLETE); } if (isp->isp_state != ISP_RUNSTATE) { isp_prt(isp, ISP_LOGERR, "Adapter not at RUNSTATE"); XS_SETERR(xs, HBA_BOTCH); return (CMD_COMPLETE); } if (sdp->update) { isp_spi_update(isp, XS_CHANNEL(xs)); } lp = NULL; } start_again: qep = isp_getrqentry(isp); if (qep == NULL) { isp_prt(isp, ISP_LOG_WARN1, "Request Queue Overflow"); XS_SETERR(xs, HBA_BOTCH); return (CMD_EAGAIN); } XS_SETERR(xs, HBA_NOERROR); /* * Now see if we need to synchronize the ISP with respect to anything. * We do dual duty here (cough) for synchronizing for busses other * than which we got here to send a command to. */ reqp = (ispreq_t *) local; ISP_MEMZERO(local, QENTRY_LEN); if (ISP_TST_SENDMARKER(isp, XS_CHANNEL(xs))) { if (IS_24XX(isp)) { isp_marker_24xx_t *m = (isp_marker_24xx_t *) reqp; m->mrk_header.rqs_entry_count = 1; m->mrk_header.rqs_entry_type = RQSTYPE_MARKER; m->mrk_modifier = SYNC_ALL; m->mrk_vphdl = XS_CHANNEL(xs); isp_put_marker_24xx(isp, m, qep); } else { isp_marker_t *m = (isp_marker_t *) reqp; m->mrk_header.rqs_entry_count = 1; m->mrk_header.rqs_entry_type = RQSTYPE_MARKER; m->mrk_target = (XS_CHANNEL(xs) << 7); /* bus # */ m->mrk_modifier = SYNC_ALL; isp_put_marker(isp, m, qep); } ISP_SYNC_REQUEST(isp); ISP_SET_SENDMARKER(isp, XS_CHANNEL(xs), 0); goto start_again; } reqp->req_header.rqs_entry_count = 1; /* * Select and install Header Code. * Note that it might be overridden before going out * if we're on a 64 bit platform. The lower level * code (isp_send_cmd) will select the appropriate * 64 bit variant if it needs to. */ if (IS_24XX(isp)) { reqp->req_header.rqs_entry_type = RQSTYPE_T7RQS; } else if (IS_FC(isp)) { reqp->req_header.rqs_entry_type = RQSTYPE_T2RQS; } else { if (XS_CDBLEN(xs) > 12) { reqp->req_header.rqs_entry_type = RQSTYPE_CMDONLY; } else { reqp->req_header.rqs_entry_type = RQSTYPE_REQUEST; } } /* * Set task attributes */ if (IS_24XX(isp)) { int ttype; if (XS_TAG_P(xs)) { ttype = XS_TAG_TYPE(xs); } else { if (XS_CDBP(xs)[0] == 0x3) { ttype = REQFLAG_HTAG; } else { ttype = REQFLAG_STAG; } } if (ttype == REQFLAG_OTAG) { ttype = FCP_CMND_TASK_ATTR_ORDERED; } else if (ttype == REQFLAG_HTAG) { ttype = FCP_CMND_TASK_ATTR_HEAD; } else { ttype = FCP_CMND_TASK_ATTR_SIMPLE; } ((ispreqt7_t *)reqp)->req_task_attribute = ttype; } else if (IS_FC(isp)) { /* * See comment in isp_intr */ /* XS_SET_RESID(xs, 0); */ /* * Fibre Channel always requires some kind of tag. * The Qlogic drivers seem be happy not to use a tag, * but this breaks for some devices (IBM drives). */ if (XS_TAG_P(xs)) { ((ispreqt2_t *)reqp)->req_flags = XS_TAG_TYPE(xs); } else { /* * If we don't know what tag to use, use HEAD OF QUEUE * for Request Sense or Simple. */ if (XS_CDBP(xs)[0] == 0x3) /* REQUEST SENSE */ ((ispreqt2_t *)reqp)->req_flags = REQFLAG_HTAG; else ((ispreqt2_t *)reqp)->req_flags = REQFLAG_STAG; } } else { sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs)); if ((sdp->isp_devparam[target].actv_flags & DPARM_TQING) && XS_TAG_P(xs)) { reqp->req_flags = XS_TAG_TYPE(xs); } } tptr = &reqp->req_time; /* * NB: we do not support long CDBs (yet) */ cdblen = XS_CDBLEN(xs); if (IS_SCSI(isp)) { if (cdblen > sizeof (reqp->req_cdb)) { isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen); XS_SETERR(xs, HBA_BOTCH); return (CMD_COMPLETE); } reqp->req_target = target | (XS_CHANNEL(xs) << 7); reqp->req_lun_trn = XS_LUN(xs); cdbp = reqp->req_cdb; reqp->req_cdblen = cdblen; } else if (IS_24XX(isp)) { ispreqt7_t *t7 = (ispreqt7_t *)local; if (cdblen > sizeof (t7->req_cdb)) { isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen); XS_SETERR(xs, HBA_BOTCH); return (CMD_COMPLETE); } t7->req_nphdl = lp->handle; t7->req_tidlo = lp->portid; t7->req_tidhi = lp->portid >> 16; t7->req_vpidx = ISP_GET_VPIDX(isp, XS_CHANNEL(xs)); #if __FreeBSD_version >= 1000700 be64enc(t7->req_lun, CAM_EXTLUN_BYTE_SWIZZLE(XS_LUN(xs))); #else if (XS_LUN(xs) >= 256) { t7->req_lun[0] = XS_LUN(xs) >> 8; t7->req_lun[0] |= 0x40; } t7->req_lun[1] = XS_LUN(xs); #endif if (FCPARAM(isp, XS_CHANNEL(xs))->fctape_enabled && (lp->prli_word3 & PRLI_WD3_RETRY)) { if (FCP_NEXT_CRN(isp, &t7->req_crn, xs)) { isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx cannot generate next CRN", XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs)); XS_SETERR(xs, HBA_BOTCH); return (CMD_EAGAIN); } } tptr = &t7->req_time; cdbp = t7->req_cdb; } else { ispreqt2_t *t2 = (ispreqt2_t *)local; if (cdblen > sizeof t2->req_cdb) { isp_prt(isp, ISP_LOGERR, "Command Length %u too long for this chip", cdblen); XS_SETERR(xs, HBA_BOTCH); return (CMD_COMPLETE); } if (FCPARAM(isp, XS_CHANNEL(xs))->fctape_enabled && (lp->prli_word3 & PRLI_WD3_RETRY)) { if (FCP_NEXT_CRN(isp, &t2->req_crn, xs)) { isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx cannot generate next CRN", XS_CHANNEL(xs), target, (uintmax_t)XS_LUN(xs)); XS_SETERR(xs, HBA_BOTCH); return (CMD_EAGAIN); } } if (ISP_CAP_2KLOGIN(isp)) { ispreqt2e_t *t2e = (ispreqt2e_t *)local; t2e->req_target = lp->handle; t2e->req_scclun = XS_LUN(xs); #if __FreeBSD_version < 1000700 if (XS_LUN(xs) >= 256) t2e->req_scclun |= 0x4000; #endif cdbp = t2e->req_cdb; } else if (ISP_CAP_SCCFW(isp)) { ispreqt2_t *t2 = (ispreqt2_t *)local; t2->req_target = lp->handle; t2->req_scclun = XS_LUN(xs); #if __FreeBSD_version < 1000700 if (XS_LUN(xs) >= 256) t2->req_scclun |= 0x4000; #endif cdbp = t2->req_cdb; } else { t2->req_target = lp->handle; t2->req_lun_trn = XS_LUN(xs); cdbp = t2->req_cdb; } } ISP_MEMCPY(cdbp, XS_CDBP(xs), cdblen); *tptr = XS_TIME(xs) / 1000; if (*tptr == 0 && XS_TIME(xs)) { *tptr = 1; } if (IS_24XX(isp) && *tptr > 0x1999) { *tptr = 0x1999; } if (isp_allocate_xs(isp, xs, &handle)) { isp_prt(isp, ISP_LOG_WARN1, "out of xflist pointers"); XS_SETERR(xs, HBA_BOTCH); return (CMD_EAGAIN); } /* Whew. Thankfully the same for type 7 requests */ reqp->req_handle = handle; /* * Set up DMA and/or do any platform dependent swizzling of the request entry * so that the Qlogic F/W understands what is being asked of it. * * The callee is responsible for adding all requests at this point. */ dmaresult = ISP_DMASETUP(isp, xs, reqp); if (dmaresult != CMD_QUEUED) { isp_destroy_handle(isp, handle); /* * dmasetup sets actual error in packet, and * return what we were given to return. */ return (dmaresult); } isp_xs_prt(isp, xs, ISP_LOGDEBUG0, "START cmd cdb[0]=0x%x datalen %ld", XS_CDBP(xs)[0], (long) XS_XFRLEN(xs)); isp->isp_nactive++; return (CMD_QUEUED); } /* * isp control * Locks (ints blocked) assumed held. */ int isp_control(ispsoftc_t *isp, ispctl_t ctl, ...) { XS_T *xs; mbreg_t *mbr, mbs; int chan, tgt; uint32_t handle; va_list ap; switch (ctl) { case ISPCTL_RESET_BUS: /* * Issue a bus reset. */ if (IS_24XX(isp)) { isp_prt(isp, ISP_LOGERR, "BUS RESET NOT IMPLEMENTED"); break; } else if (IS_FC(isp)) { mbs.param[1] = 10; chan = 0; } else { va_start(ap, ctl); chan = va_arg(ap, int); va_end(ap); mbs.param[1] = SDPARAM(isp, chan)->isp_bus_reset_delay; if (mbs.param[1] < 2) { mbs.param[1] = 2; } mbs.param[2] = chan; } MBSINIT(&mbs, MBOX_BUS_RESET, MBLOGALL, 0); ISP_SET_SENDMARKER(isp, chan, 1); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { break; } isp_prt(isp, ISP_LOGINFO, "driver initiated bus reset of bus %d", chan); return (0); case ISPCTL_RESET_DEV: va_start(ap, ctl); chan = va_arg(ap, int); tgt = va_arg(ap, int); va_end(ap); if (IS_24XX(isp)) { uint8_t local[QENTRY_LEN]; isp24xx_tmf_t *tmf; isp24xx_statusreq_t *sp; fcparam *fcp = FCPARAM(isp, chan); fcportdb_t *lp; if (tgt < 0 || tgt >= MAX_FC_TARG) { isp_prt(isp, ISP_LOGWARN, "Chan %d trying to reset bad target %d", chan, tgt); break; } lp = &fcp->portdb[tgt]; if (lp->is_target == 0 || lp->state != FC_PORTDB_STATE_VALID) { isp_prt(isp, ISP_LOGWARN, "Chan %d abort of no longer valid target %d", chan, tgt); break; } tmf = (isp24xx_tmf_t *) local; ISP_MEMZERO(tmf, QENTRY_LEN); tmf->tmf_header.rqs_entry_type = RQSTYPE_TSK_MGMT; tmf->tmf_header.rqs_entry_count = 1; tmf->tmf_nphdl = lp->handle; tmf->tmf_delay = 2; tmf->tmf_timeout = 2; tmf->tmf_flags = ISP24XX_TMF_TARGET_RESET; tmf->tmf_tidlo = lp->portid; tmf->tmf_tidhi = lp->portid >> 16; tmf->tmf_vpidx = ISP_GET_VPIDX(isp, chan); isp_prt(isp, ISP_LOGALL, "Chan %d Reset N-Port Handle 0x%04x @ Port 0x%06x", chan, lp->handle, lp->portid); MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 5000000); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); break; } isp_put_24xx_tmf(isp, tmf, fcp->isp_scratch); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan); fcp->sendmarker = 1; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { FC_SCRATCH_RELEASE(isp, chan); break; } MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); sp = (isp24xx_statusreq_t *) local; isp_get_24xx_response(isp, &((isp24xx_statusreq_t *)fcp->isp_scratch)[1], sp); FC_SCRATCH_RELEASE(isp, chan); if (sp->req_completion_status == 0) { return (0); } isp_prt(isp, ISP_LOGWARN, "Chan %d reset of target %d returned 0x%x", chan, tgt, sp->req_completion_status); break; } else if (IS_FC(isp)) { if (ISP_CAP_2KLOGIN(isp)) { mbs.param[1] = tgt; mbs.ibits = (1 << 10); } else { mbs.param[1] = (tgt << 8); } } else { mbs.param[1] = (chan << 15) | (tgt << 8); } MBSINIT(&mbs, MBOX_ABORT_TARGET, MBLOGALL, 0); mbs.param[2] = 3; /* 'delay', in seconds */ isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { break; } isp_prt(isp, ISP_LOGINFO, "Target %d on Bus %d Reset Succeeded", tgt, chan); ISP_SET_SENDMARKER(isp, chan, 1); return (0); case ISPCTL_ABORT_CMD: va_start(ap, ctl); xs = va_arg(ap, XS_T *); va_end(ap); tgt = XS_TGT(xs); chan = XS_CHANNEL(xs); handle = isp_find_handle(isp, xs); if (handle == 0) { isp_prt(isp, ISP_LOGWARN, "cannot find handle for command to abort"); break; } if (IS_24XX(isp)) { isp24xx_abrt_t local, *ab = &local, *ab2; fcparam *fcp; fcportdb_t *lp; fcp = FCPARAM(isp, chan); if (tgt < 0 || tgt >= MAX_FC_TARG) { isp_prt(isp, ISP_LOGWARN, "Chan %d trying to abort bad target %d", chan, tgt); break; } lp = &fcp->portdb[tgt]; if (lp->is_target == 0 || lp->state != FC_PORTDB_STATE_VALID) { isp_prt(isp, ISP_LOGWARN, "Chan %d abort of no longer valid target %d", chan, tgt); break; } isp_prt(isp, ISP_LOGALL, "Chan %d Abort Cmd for N-Port 0x%04x @ Port 0x%06x", chan, lp->handle, lp->portid); ISP_MEMZERO(ab, QENTRY_LEN); ab->abrt_header.rqs_entry_type = RQSTYPE_ABORT_IO; ab->abrt_header.rqs_entry_count = 1; ab->abrt_handle = lp->handle; ab->abrt_cmd_handle = handle; ab->abrt_tidlo = lp->portid; ab->abrt_tidhi = lp->portid >> 16; ab->abrt_vpidx = ISP_GET_VPIDX(isp, chan); ISP_MEMZERO(&mbs, sizeof (mbs)); MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 5000000); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); if (FC_SCRATCH_ACQUIRE(isp, chan)) { isp_prt(isp, ISP_LOGERR, sacq); break; } isp_put_24xx_abrt(isp, ab, fcp->isp_scratch); ab2 = (isp24xx_abrt_t *) &((uint8_t *)fcp->isp_scratch)[QENTRY_LEN]; ab2->abrt_nphdl = 0xdeaf; MEMORYBARRIER(isp, SYNC_SFORDEV, 0, 2 * QENTRY_LEN, chan); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { FC_SCRATCH_RELEASE(isp, chan); break; } MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); isp_get_24xx_abrt(isp, ab2, ab); FC_SCRATCH_RELEASE(isp, chan); if (ab->abrt_nphdl == ISP24XX_ABRT_OKAY) { return (0); } isp_prt(isp, ISP_LOGWARN, "Chan %d handle %d abort returned 0x%x", chan, tgt, ab->abrt_nphdl); break; } else if (IS_FC(isp)) { if (ISP_CAP_SCCFW(isp)) { if (ISP_CAP_2KLOGIN(isp)) { mbs.param[1] = tgt; } else { mbs.param[1] = tgt << 8; } mbs.param[6] = XS_LUN(xs); } else { mbs.param[1] = tgt << 8 | XS_LUN(xs); } } else { mbs.param[1] = (chan << 15) | (tgt << 8) | XS_LUN(xs); } MBSINIT(&mbs, MBOX_ABORT, MBLOGALL & ~MBLOGMASK(MBOX_COMMAND_ERROR), 0); mbs.param[2] = handle; isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { break; } return (0); case ISPCTL_UPDATE_PARAMS: va_start(ap, ctl); chan = va_arg(ap, int); va_end(ap); isp_spi_update(isp, chan); return (0); case ISPCTL_FCLINK_TEST: if (IS_FC(isp)) { int usdelay; va_start(ap, ctl); chan = va_arg(ap, int); usdelay = va_arg(ap, int); va_end(ap); if (usdelay == 0) { usdelay = 250000; } return (isp_fclink_test(isp, chan, usdelay)); } break; case ISPCTL_SCAN_FABRIC: if (IS_FC(isp)) { va_start(ap, ctl); chan = va_arg(ap, int); va_end(ap); return (isp_scan_fabric(isp, chan)); } break; case ISPCTL_SCAN_LOOP: if (IS_FC(isp)) { va_start(ap, ctl); chan = va_arg(ap, int); va_end(ap); return (isp_scan_loop(isp, chan)); } break; case ISPCTL_PDB_SYNC: if (IS_FC(isp)) { va_start(ap, ctl); chan = va_arg(ap, int); va_end(ap); return (isp_pdb_sync(isp, chan)); } break; case ISPCTL_SEND_LIP: if (IS_FC(isp) && !IS_24XX(isp)) { MBSINIT(&mbs, MBOX_INIT_LIP, MBLOGALL, 0); if (ISP_CAP_2KLOGIN(isp)) { mbs.ibits = (1 << 10); } isp_mboxcmd(isp, &mbs); if (mbs.param[0] == MBOX_COMMAND_COMPLETE) { return (0); } } break; case ISPCTL_GET_PDB: if (IS_FC(isp)) { isp_pdb_t *pdb; va_start(ap, ctl); chan = va_arg(ap, int); tgt = va_arg(ap, int); pdb = va_arg(ap, isp_pdb_t *); va_end(ap); return (isp_getpdb(isp, chan, tgt, pdb, 1)); } break; case ISPCTL_GET_NAMES: { uint64_t *wwnn, *wwnp; va_start(ap, ctl); chan = va_arg(ap, int); tgt = va_arg(ap, int); wwnn = va_arg(ap, uint64_t *); wwnp = va_arg(ap, uint64_t *); va_end(ap); if (wwnn == NULL && wwnp == NULL) { break; } if (wwnn) { *wwnn = isp_get_wwn(isp, chan, tgt, 1); if (*wwnn == INI_NONE) { break; } } if (wwnp) { *wwnp = isp_get_wwn(isp, chan, tgt, 0); if (*wwnp == INI_NONE) { break; } } return (0); } case ISPCTL_RUN_MBOXCMD: { va_start(ap, ctl); mbr = va_arg(ap, mbreg_t *); va_end(ap); isp_mboxcmd(isp, mbr); return (0); } case ISPCTL_PLOGX: { isp_plcmd_t *p; int r; va_start(ap, ctl); p = va_arg(ap, isp_plcmd_t *); va_end(ap); if ((p->flags & PLOGX_FLG_CMD_MASK) != PLOGX_FLG_CMD_PLOGI || (p->handle != NIL_HANDLE)) { return (isp_plogx(isp, p->channel, p->handle, p->portid, p->flags, 0)); } do { isp_next_handle(isp, &p->handle); r = isp_plogx(isp, p->channel, p->handle, p->portid, p->flags, 0); if ((r & 0xffff) == MBOX_PORT_ID_USED) { p->handle = r >> 16; r = 0; break; } } while ((r & 0xffff) == MBOX_LOOP_ID_USED); return (r); } case ISPCTL_CHANGE_ROLE: { int role, r; va_start(ap, ctl); chan = va_arg(ap, int); role = va_arg(ap, int); va_end(ap); if (IS_FC(isp)) { r = isp_fc_change_role(isp, chan, role); } else { SDPARAM(isp, chan)->role = role; r = 0; } return (r); } default: isp_prt(isp, ISP_LOGERR, "Unknown Control Opcode 0x%x", ctl); break; } return (-1); } /* * Interrupt Service Routine(s). * * External (OS) framework has done the appropriate locking, * and the locking will be held throughout this function. */ /* * Limit our stack depth by sticking with the max likely number * of completions on a request queue at any one time. */ #ifndef MAX_REQUESTQ_COMPLETIONS #define MAX_REQUESTQ_COMPLETIONS 32 #endif void isp_intr(ispsoftc_t *isp, uint16_t isr, uint16_t sema, uint16_t info) { XS_T *complist[MAX_REQUESTQ_COMPLETIONS], *xs; uint32_t iptr, optr, junk; int i, nlooked = 0, ndone = 0, continuations_expected = 0; int etype, last_etype = 0; again: /* * Is this a mailbox related interrupt? * The mailbox semaphore will be nonzero if so. */ if (sema) { fmbox: if (info & MBOX_COMMAND_COMPLETE) { isp->isp_intmboxc++; if (isp->isp_mboxbsy) { int obits = isp->isp_obits; isp->isp_mboxtmp[0] = info; for (i = 1; i < ISP_NMBOX(isp); i++) { if ((obits & (1 << i)) == 0) { continue; } isp->isp_mboxtmp[i] = ISP_READ(isp, MBOX_OFF(i)); } if (isp->isp_mbxwrk0) { if (isp_mbox_continue(isp) == 0) { return; } } MBOX_NOTIFY_COMPLETE(isp); } else { isp_prt(isp, ISP_LOGWARN, "mailbox cmd (0x%x) with no waiters", info); } } else { i = IS_FC(isp)? isp_parse_async_fc(isp, info) : isp_parse_async(isp, info); if (i < 0) { return; } } if ((IS_FC(isp) && info != ASYNC_RIOZIO_STALL) || isp->isp_state != ISP_RUNSTATE) { goto out; } } /* * We can't be getting this now. */ if (isp->isp_state != ISP_RUNSTATE) { /* * This seems to happen to 23XX and 24XX cards- don't know why. */ if (isp->isp_mboxbsy && isp->isp_lastmbxcmd == MBOX_ABOUT_FIRMWARE) { goto fmbox; } isp_prt(isp, ISP_LOGINFO, "interrupt (ISR=%x SEMA=%x INFO=%x) " "when not ready", isr, sema, info); /* * Thank you very much! *Burrrp*! */ isp->isp_residx = ISP_READ(isp, isp->isp_respinrp); isp->isp_resodx = isp->isp_residx; ISP_WRITE(isp, isp->isp_respoutrp, isp->isp_resodx); if (IS_24XX(isp)) { ISP_DISABLE_INTS(isp); } goto out; } #ifdef ISP_TARGET_MODE /* * Check for ATIO Queue entries. */ if (IS_24XX(isp) && (isr == ISPR2HST_ATIO_UPDATE || isr == ISPR2HST_ATIO_RSPQ_UPDATE || isr == ISPR2HST_ATIO_UPDATE2)) { iptr = ISP_READ(isp, BIU2400_ATIO_RSPINP); optr = isp->isp_atioodx; while (optr != iptr) { uint8_t qe[QENTRY_LEN]; isphdr_t *hp; uint32_t oop; void *addr; oop = optr; MEMORYBARRIER(isp, SYNC_ATIOQ, oop, QENTRY_LEN, -1); addr = ISP_QUEUE_ENTRY(isp->isp_atioq, oop); isp_get_hdr(isp, addr, (isphdr_t *)qe); hp = (isphdr_t *)qe; switch (hp->rqs_entry_type) { case RQSTYPE_NOTIFY: case RQSTYPE_ATIO: (void) isp_target_notify(isp, addr, &oop); break; default: isp_print_qentry(isp, "?ATIOQ entry?", oop, addr); break; } optr = ISP_NXT_QENTRY(oop, RESULT_QUEUE_LEN(isp)); } if (isp->isp_atioodx != optr) { ISP_WRITE(isp, BIU2400_ATIO_RSPOUTP, optr); isp->isp_atioodx = optr; } } #endif /* * You *must* read the Response Queue In Pointer * prior to clearing the RISC interrupt. * * Debounce the 2300 if revision less than 2. */ if (IS_2100(isp) || (IS_2300(isp) && isp->isp_revision < 2)) { i = 0; do { iptr = ISP_READ(isp, isp->isp_respinrp); junk = ISP_READ(isp, isp->isp_respinrp); } while (junk != iptr && ++i < 1000); if (iptr != junk) { isp_prt(isp, ISP_LOGWARN, "Response Queue Out Pointer Unstable (%x, %x)", iptr, junk); goto out; } } else { iptr = ISP_READ(isp, isp->isp_respinrp); } optr = isp->isp_resodx; if (optr == iptr && sema == 0) { /* * There are a lot of these- reasons unknown- mostly on * faster Alpha machines. * * I tried delaying after writing HCCR_CMD_CLEAR_RISC_INT to * make sure the old interrupt went away (to avoid 'ringing' * effects), but that didn't stop this from occurring. */ if (IS_24XX(isp)) { junk = 0; } else if (IS_23XX(isp)) { ISP_DELAY(100); iptr = ISP_READ(isp, isp->isp_respinrp); junk = ISP_READ(isp, BIU_R2HSTSLO); } else { junk = ISP_READ(isp, BIU_ISR); } if (optr == iptr) { if (IS_23XX(isp) || IS_24XX(isp)) { ; } else { sema = ISP_READ(isp, BIU_SEMA); info = ISP_READ(isp, OUTMAILBOX0); if ((sema & 0x3) && (info & 0x8000)) { goto again; } } isp->isp_intbogus++; isp_prt(isp, ISP_LOGDEBUG1, "bogus intr- isr %x (%x) iptr %x optr %x", isr, junk, iptr, optr); } } isp->isp_residx = iptr; while (optr != iptr) { uint8_t qe[QENTRY_LEN]; ispstatusreq_t *sp = (ispstatusreq_t *) qe; isphdr_t *hp; int buddaboom, scsi_status, completion_status; int req_status_flags, req_state_flags; uint8_t *snsp, *resp; uint32_t rlen, slen, totslen; long resid; uint16_t oop; hp = (isphdr_t *) ISP_QUEUE_ENTRY(isp->isp_result, optr); oop = optr; optr = ISP_NXT_QENTRY(optr, RESULT_QUEUE_LEN(isp)); nlooked++; read_again: buddaboom = req_status_flags = req_state_flags = 0; resid = 0L; /* * Synchronize our view of this response queue entry. */ MEMORYBARRIER(isp, SYNC_RESULT, oop, QENTRY_LEN, -1); isp_get_hdr(isp, hp, &sp->req_header); etype = sp->req_header.rqs_entry_type; if (IS_24XX(isp) && etype == RQSTYPE_RESPONSE) { isp24xx_statusreq_t *sp2 = (isp24xx_statusreq_t *)qe; isp_get_24xx_response(isp, (isp24xx_statusreq_t *)hp, sp2); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "Response Queue Entry", QENTRY_LEN, sp2); } scsi_status = sp2->req_scsi_status; completion_status = sp2->req_completion_status; if ((scsi_status & 0xff) != 0) req_state_flags = RQSF_GOT_STATUS; else req_state_flags = 0; resid = sp2->req_resid; } else if (etype == RQSTYPE_RESPONSE) { isp_get_response(isp, (ispstatusreq_t *) hp, sp); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "Response Queue Entry", QENTRY_LEN, sp); } scsi_status = sp->req_scsi_status; completion_status = sp->req_completion_status; req_status_flags = sp->req_status_flags; req_state_flags = sp->req_state_flags; resid = sp->req_resid; } else if (etype == RQSTYPE_RIO1) { isp_rio1_t *rio = (isp_rio1_t *) qe; isp_get_rio1(isp, (isp_rio1_t *) hp, rio); if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "Response Queue Entry", QENTRY_LEN, rio); } for (i = 0; i < rio->req_header.rqs_seqno; i++) { isp_fastpost_complete(isp, rio->req_handles[i]); } if (isp->isp_fpcchiwater < rio->req_header.rqs_seqno) { isp->isp_fpcchiwater = rio->req_header.rqs_seqno; } ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ last_etype = etype; continue; } else if (etype == RQSTYPE_RIO2) { isp_prt(isp, ISP_LOGERR, "dropping RIO2 response"); ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ last_etype = etype; continue; } else if (etype == RQSTYPE_STATUS_CONT) { isp_get_cont_response(isp, (ispstatus_cont_t *) hp, (ispstatus_cont_t *) sp); if (last_etype == RQSTYPE_RESPONSE && continuations_expected && ndone > 0 && (xs = complist[ndone-1]) != NULL) { ispstatus_cont_t *scp = (ispstatus_cont_t *) sp; XS_SENSE_APPEND(xs, scp->req_sense_data, sizeof (scp->req_sense_data)); isp_prt(isp, ISP_LOGDEBUG0|ISP_LOG_CWARN, "%d more Status Continuations expected", --continuations_expected); } else { isp_prt(isp, ISP_LOG_WARN1, "Ignored Continuation Response"); } ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ continue; } else { /* * Somebody reachable via isp_handle_other_response * may have updated the response queue pointers for * us, so we reload our goal index. */ int r; uint32_t tsto = oop; r = isp_handle_other_response(isp, etype, hp, &tsto); if (r < 0) { goto read_again; } /* * If somebody updated the output pointer, then reset * optr to be one more than the updated amount. */ while (tsto != oop) { optr = ISP_NXT_QENTRY(tsto, RESULT_QUEUE_LEN(isp)); } if (r > 0) { ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ last_etype = etype; continue; } /* * After this point, we'll just look at the header as * we don't know how to deal with the rest of the * response. */ /* * It really has to be a bounced request just copied * from the request queue to the response queue. If * not, something bad has happened. */ if (etype != RQSTYPE_REQUEST) { isp_prt(isp, ISP_LOGERR, notresp, etype, oop, optr, nlooked); isp_print_bytes(isp, "Request Queue Entry", QENTRY_LEN, sp); ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ last_etype = etype; continue; } buddaboom = 1; scsi_status = sp->req_scsi_status; completion_status = sp->req_completion_status; req_status_flags = sp->req_status_flags; req_state_flags = sp->req_state_flags; resid = sp->req_resid; } if (sp->req_header.rqs_flags & RQSFLAG_MASK) { if (sp->req_header.rqs_flags & RQSFLAG_CONTINUATION) { isp_print_bytes(isp, "unexpected continuation segment", QENTRY_LEN, sp); last_etype = etype; continue; } if (sp->req_header.rqs_flags & RQSFLAG_FULL) { isp_prt(isp, ISP_LOG_WARN1, "internal queues full"); /* * We'll synthesize a QUEUE FULL message below. */ } if (sp->req_header.rqs_flags & RQSFLAG_BADHEADER) { isp_print_bytes(isp, "bad header flag", QENTRY_LEN, sp); buddaboom++; } if (sp->req_header.rqs_flags & RQSFLAG_BADPACKET) { isp_print_bytes(isp, "bad request packet", QENTRY_LEN, sp); buddaboom++; } if (sp->req_header.rqs_flags & RQSFLAG_BADCOUNT) { isp_print_bytes(isp, "invalid entry count", QENTRY_LEN, sp); buddaboom++; } if (sp->req_header.rqs_flags & RQSFLAG_BADORDER) { isp_print_bytes(isp, "invalid IOCB ordering", QENTRY_LEN, sp); last_etype = etype; continue; } } if (!ISP_VALID_HANDLE(isp, sp->req_handle)) { isp_prt(isp, ISP_LOGERR, "bad request handle 0x%x (iocb type 0x%x)", sp->req_handle, etype); ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ last_etype = etype; continue; } xs = isp_find_xs(isp, sp->req_handle); if (xs == NULL) { uint8_t ts = completion_status & 0xff; /* * Only whine if this isn't the expected fallout of * aborting the command or resetting the target. */ if (etype != RQSTYPE_RESPONSE) { isp_prt(isp, ISP_LOGERR, "cannot find handle 0x%x (type 0x%x)", sp->req_handle, etype); } else if (ts != RQCS_ABORTED && ts != RQCS_RESET_OCCURRED) { isp_prt(isp, ISP_LOGERR, "cannot find handle 0x%x (status 0x%x)", sp->req_handle, ts); } ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ last_etype = etype; continue; } if (req_status_flags & RQSTF_BUS_RESET) { isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx bus was reset", XS_CHANNEL(xs), XS_TGT(xs), (uintmax_t)XS_LUN(xs)); XS_SETERR(xs, HBA_BUSRESET); ISP_SET_SENDMARKER(isp, XS_CHANNEL(xs), 1); } if (buddaboom) { isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx buddaboom", XS_CHANNEL(xs), XS_TGT(xs), (uintmax_t)XS_LUN(xs)); XS_SETERR(xs, HBA_BOTCH); } resp = NULL; rlen = 0; snsp = NULL; totslen = slen = 0; if (IS_24XX(isp) && (scsi_status & (RQCS_RV|RQCS_SV)) != 0) { resp = ((isp24xx_statusreq_t *)sp)->req_rsp_sense; rlen = ((isp24xx_statusreq_t *)sp)->req_response_len; } else if (IS_FC(isp) && (scsi_status & RQCS_RV) != 0) { resp = sp->req_response; rlen = sp->req_response_len; } if (IS_FC(isp) && (scsi_status & RQCS_SV) != 0) { /* * Fibre Channel F/W doesn't say we got status * if there's Sense Data instead. I guess they * think it goes w/o saying. */ req_state_flags |= RQSF_GOT_STATUS|RQSF_GOT_SENSE; if (IS_24XX(isp)) { snsp = ((isp24xx_statusreq_t *)sp)->req_rsp_sense; snsp += rlen; totslen = ((isp24xx_statusreq_t *)sp)->req_sense_len; slen = (sizeof (((isp24xx_statusreq_t *)sp)->req_rsp_sense)) - rlen; if (totslen < slen) slen = totslen; } else { snsp = sp->req_sense_data; totslen = sp->req_sense_len; slen = sizeof (sp->req_sense_data); if (totslen < slen) slen = totslen; } } else if (IS_SCSI(isp) && (req_state_flags & RQSF_GOT_SENSE)) { snsp = sp->req_sense_data; totslen = sp->req_sense_len; slen = sizeof (sp->req_sense_data); if (totslen < slen) slen = totslen; } if (req_state_flags & RQSF_GOT_STATUS) { *XS_STSP(xs) = scsi_status & 0xff; } switch (etype) { case RQSTYPE_RESPONSE: if (resp && rlen >= 4 && resp[FCP_RSPNS_CODE_OFFSET] != 0) { const char *ptr; char lb[64]; const char *rnames[10] = { "Task Management function complete", "FCP_DATA length different than FCP_BURST_LEN", "FCP_CMND fields invalid", "FCP_DATA parameter mismatch with FCP_DATA_RO", "Task Management function rejected", "Task Management function failed", NULL, NULL, "Task Management function succeeded", "Task Management function incorrect logical unit number", }; uint8_t code = resp[FCP_RSPNS_CODE_OFFSET]; if (code >= 10 || rnames[code] == NULL) { ISP_SNPRINTF(lb, sizeof(lb), "Unknown FCP Response Code 0x%x", code); ptr = lb; } else { ptr = rnames[code]; } isp_xs_prt(isp, xs, ISP_LOGWARN, "FCP RESPONSE, LENGTH %u: %s CDB0=0x%02x", rlen, ptr, XS_CDBP(xs)[0] & 0xff); if (code != 0 && code != 8) XS_SETERR(xs, HBA_BOTCH); } if (IS_24XX(isp)) { isp_parse_status_24xx(isp, (isp24xx_statusreq_t *)sp, xs, &resid); } else { isp_parse_status(isp, (void *)sp, xs, &resid); } if ((XS_NOERR(xs) || XS_ERR(xs) == HBA_NOERROR) && (*XS_STSP(xs) == SCSI_BUSY)) { XS_SETERR(xs, HBA_TGTBSY); } if (IS_SCSI(isp)) { XS_SET_RESID(xs, resid); /* * A new synchronous rate was negotiated for * this target. Mark state such that we'll go * look up that which has changed later. */ if (req_status_flags & RQSTF_NEGOTIATION) { int t = XS_TGT(xs); sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs)); sdp->isp_devparam[t].dev_refresh = 1; sdp->update = 1; } } else { if (req_status_flags & RQSF_XFER_COMPLETE) { XS_SET_RESID(xs, 0); } else if (scsi_status & RQCS_RESID) { XS_SET_RESID(xs, resid); } else { XS_SET_RESID(xs, 0); } } if (snsp && slen) { if (totslen > slen) { continuations_expected += ((totslen - slen + QENTRY_LEN - 5) / (QENTRY_LEN - 4)); if (ndone > (MAX_REQUESTQ_COMPLETIONS - continuations_expected - 1)) { /* we'll lose some stats, but that's a small price to pay */ for (i = 0; i < ndone; i++) { if (complist[i]) { isp->isp_rsltccmplt++; isp_done(complist[i]); } } ndone = 0; } isp_prt(isp, ISP_LOGDEBUG0|ISP_LOG_CWARN, "Expecting %d more Status Continuations for total sense length of %u", continuations_expected, totslen); } XS_SAVE_SENSE(xs, snsp, totslen, slen); } else if ((req_status_flags & RQSF_GOT_STATUS) && (scsi_status & 0xff) == SCSI_CHECK && IS_FC(isp)) { isp_prt(isp, ISP_LOGWARN, "CHECK CONDITION w/o sense data for CDB=0x%x", XS_CDBP(xs)[0] & 0xff); isp_print_bytes(isp, "CC with no Sense", QENTRY_LEN, qe); } isp_prt(isp, ISP_LOGDEBUG2, "asked for %ld got raw resid %ld settled for %ld", (long) XS_XFRLEN(xs), resid, (long) XS_GET_RESID(xs)); break; case RQSTYPE_REQUEST: case RQSTYPE_A64: case RQSTYPE_T2RQS: case RQSTYPE_T3RQS: case RQSTYPE_T7RQS: if (!IS_24XX(isp) && (sp->req_header.rqs_flags & RQSFLAG_FULL)) { /* * Force Queue Full status. */ *XS_STSP(xs) = SCSI_QFULL; XS_SETERR(xs, HBA_NOERROR); } else if (XS_NOERR(xs)) { isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx badness at %s:%u", XS_CHANNEL(xs), XS_TGT(xs), (uintmax_t)XS_LUN(xs), __func__, __LINE__); XS_SETERR(xs, HBA_BOTCH); } XS_SET_RESID(xs, XS_XFRLEN(xs)); break; default: isp_print_bytes(isp, "Unhandled Response Type", QENTRY_LEN, qe); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_BOTCH); } break; } /* * Free any DMA resources. As a side effect, this may * also do any cache flushing necessary for data coherence. */ if (XS_XFRLEN(xs)) { ISP_DMAFREE(isp, xs, sp->req_handle); } isp_destroy_handle(isp, sp->req_handle); if (isp->isp_nactive > 0) { isp->isp_nactive--; } complist[ndone++] = xs; /* defer completion call until later */ ISP_MEMZERO(hp, QENTRY_LEN); /* PERF */ last_etype = etype; if (ndone == MAX_REQUESTQ_COMPLETIONS) { break; } } /* * If we looked at any commands, then it's valid to find out * what the outpointer is. It also is a trigger to update the * ISP's notion of what we've seen so far. */ if (nlooked) { ISP_WRITE(isp, isp->isp_respoutrp, optr); isp->isp_resodx = optr; if (isp->isp_rscchiwater < ndone) isp->isp_rscchiwater = ndone; } out: if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT); } else { ISP_WRITE(isp, HCCR, HCCR_CMD_CLEAR_RISC_INT); ISP_WRITE(isp, BIU_SEMA, 0); } for (i = 0; i < ndone; i++) { xs = complist[i]; if (xs) { if (((isp->isp_dblev & (ISP_LOGDEBUG1|ISP_LOGDEBUG2|ISP_LOGDEBUG3))) || ((isp->isp_dblev & (ISP_LOGDEBUG0|ISP_LOG_CWARN) && ((!XS_NOERR(xs)) || (*XS_STSP(xs) != SCSI_GOOD))))) { isp_prt_endcmd(isp, xs); } isp->isp_rsltccmplt++; isp_done(xs); } } } /* * Support routines. */ void isp_prt_endcmd(ispsoftc_t *isp, XS_T *xs) { char cdbstr[16 * 5 + 1]; int i, lim; lim = XS_CDBLEN(xs) > 16? 16 : XS_CDBLEN(xs); ISP_SNPRINTF(cdbstr, sizeof (cdbstr), "0x%02x ", XS_CDBP(xs)[0]); for (i = 1; i < lim; i++) { ISP_SNPRINTF(cdbstr, sizeof (cdbstr), "%s0x%02x ", cdbstr, XS_CDBP(xs)[i]); } if (XS_SENSE_VALID(xs)) { isp_xs_prt(isp, xs, ISP_LOGALL, "FIN dl%d resid %ld CDB=%s SenseLength=%u/%u KEY/ASC/ASCQ=0x%02x/0x%02x/0x%02x", XS_XFRLEN(xs), (long) XS_GET_RESID(xs), cdbstr, XS_CUR_SNSLEN(xs), XS_TOT_SNSLEN(xs), XS_SNSKEY(xs), XS_SNSASC(xs), XS_SNSASCQ(xs)); } else { isp_xs_prt(isp, xs, ISP_LOGALL, "FIN dl%d resid %ld CDB=%s STS 0x%x XS_ERR=0x%x", XS_XFRLEN(xs), (long) XS_GET_RESID(xs), cdbstr, *XS_STSP(xs), XS_ERR(xs)); } } /* * Parse an ASYNC mailbox complete * * Return non-zero if the event has been acknowledged. */ static int isp_parse_async(ispsoftc_t *isp, uint16_t mbox) { int acked = 0; uint32_t h1 = 0, h2 = 0; uint16_t chan = 0; /* * Pick up the channel, but not if this is a ASYNC_RIO32_2, * where Mailboxes 6/7 have the second handle. */ if (mbox != ASYNC_RIO32_2) { if (IS_DUALBUS(isp)) { chan = ISP_READ(isp, OUTMAILBOX6); } } isp_prt(isp, ISP_LOGDEBUG2, "Async Mbox 0x%x", mbox); switch (mbox) { case ASYNC_BUS_RESET: ISP_SET_SENDMARKER(isp, chan, 1); #ifdef ISP_TARGET_MODE if (isp_target_async(isp, chan, mbox)) { acked = 1; } #endif isp_async(isp, ISPASYNC_BUS_RESET, chan); break; case ASYNC_SYSTEM_ERROR: isp->isp_dead = 1; isp->isp_state = ISP_CRASHED; /* * Were we waiting for a mailbox command to complete? * If so, it's dead, so wake up the waiter. */ if (isp->isp_mboxbsy) { isp->isp_obits = 1; isp->isp_mboxtmp[0] = MBOX_HOST_INTERFACE_ERROR; MBOX_NOTIFY_COMPLETE(isp); } /* * It's up to the handler for isp_async to reinit stuff and * restart the firmware */ isp_async(isp, ISPASYNC_FW_CRASH); acked = 1; break; case ASYNC_RQS_XFER_ERR: isp_prt(isp, ISP_LOGERR, "Request Queue Transfer Error"); break; case ASYNC_RSP_XFER_ERR: isp_prt(isp, ISP_LOGERR, "Response Queue Transfer Error"); break; case ASYNC_QWAKEUP: /* * We've just been notified that the Queue has woken up. * We don't need to be chatty about this- just unlatch things * and move on. */ mbox = ISP_READ(isp, isp->isp_rqstoutrp); break; case ASYNC_TIMEOUT_RESET: isp_prt(isp, ISP_LOGWARN, "timeout initiated SCSI bus reset of chan %d", chan); ISP_SET_SENDMARKER(isp, chan, 1); #ifdef ISP_TARGET_MODE if (isp_target_async(isp, chan, mbox)) { acked = 1; } #endif break; case ASYNC_DEVICE_RESET: isp_prt(isp, ISP_LOGINFO, "device reset on chan %d", chan); ISP_SET_SENDMARKER(isp, chan, 1); #ifdef ISP_TARGET_MODE if (isp_target_async(isp, chan, mbox)) { acked = 1; } #endif break; case ASYNC_EXTMSG_UNDERRUN: isp_prt(isp, ISP_LOGWARN, "extended message underrun"); break; case ASYNC_SCAM_INT: isp_prt(isp, ISP_LOGINFO, "SCAM interrupt"); break; case ASYNC_HUNG_SCSI: isp_prt(isp, ISP_LOGERR, "stalled SCSI Bus after DATA Overrun"); /* XXX: Need to issue SCSI reset at this point */ break; case ASYNC_KILLED_BUS: isp_prt(isp, ISP_LOGERR, "SCSI Bus reset after DATA Overrun"); break; case ASYNC_BUS_TRANSIT: mbox = ISP_READ(isp, OUTMAILBOX2); switch (mbox & SXP_PINS_MODE_MASK) { case SXP_PINS_LVD_MODE: isp_prt(isp, ISP_LOGINFO, "Transition to LVD mode"); SDPARAM(isp, chan)->isp_diffmode = 0; SDPARAM(isp, chan)->isp_ultramode = 0; SDPARAM(isp, chan)->isp_lvdmode = 1; break; case SXP_PINS_HVD_MODE: isp_prt(isp, ISP_LOGINFO, "Transition to Differential mode"); SDPARAM(isp, chan)->isp_diffmode = 1; SDPARAM(isp, chan)->isp_ultramode = 0; SDPARAM(isp, chan)->isp_lvdmode = 0; break; case SXP_PINS_SE_MODE: isp_prt(isp, ISP_LOGINFO, "Transition to Single Ended mode"); SDPARAM(isp, chan)->isp_diffmode = 0; SDPARAM(isp, chan)->isp_ultramode = 1; SDPARAM(isp, chan)->isp_lvdmode = 0; break; default: isp_prt(isp, ISP_LOGWARN, "Transition to Unknown Mode 0x%x", mbox); break; } /* * XXX: Set up to renegotiate again! */ /* Can only be for a 1080... */ ISP_SET_SENDMARKER(isp, chan, 1); break; case ASYNC_CMD_CMPLT: case ASYNC_RIO32_1: if (!IS_ULTRA3(isp)) { isp_prt(isp, ISP_LOGERR, "unexpected fast posting completion"); break; } /* FALLTHROUGH */ h1 = (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1); break; case ASYNC_RIO32_2: h1 = (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1); h2 = (ISP_READ(isp, OUTMAILBOX7) << 16) | ISP_READ(isp, OUTMAILBOX6); break; case ASYNC_RIO16_5: case ASYNC_RIO16_4: case ASYNC_RIO16_3: case ASYNC_RIO16_2: case ASYNC_RIO16_1: isp_prt(isp, ISP_LOGERR, "unexpected 16 bit RIO handle"); break; default: isp_prt(isp, ISP_LOGWARN, "%s: unhandled async code 0x%x", __func__, mbox); break; } if (h1 || h2) { isp_prt(isp, ISP_LOGDEBUG3, "fast post/rio completion of 0x%08x", h1); isp_fastpost_complete(isp, h1); if (h2) { isp_prt(isp, ISP_LOGDEBUG3, "fast post/rio completion of 0x%08x", h2); isp_fastpost_complete(isp, h2); if (isp->isp_fpcchiwater < 2) { isp->isp_fpcchiwater = 2; } } else { if (isp->isp_fpcchiwater < 1) { isp->isp_fpcchiwater = 1; } } } else { isp->isp_intoasync++; } return (acked); } #define GET_24XX_BUS(isp, chan, msg) \ if (IS_24XX(isp)) { \ chan = ISP_READ(isp, OUTMAILBOX3) & 0xff; \ if (chan >= isp->isp_nchan) { \ isp_prt(isp, ISP_LOGERR, "bogus channel %u for %s at line %d", chan, msg, __LINE__); \ break; \ } \ } static int isp_parse_async_fc(ispsoftc_t *isp, uint16_t mbox) { int acked = 0; uint16_t chan; if (IS_DUALBUS(isp)) { chan = ISP_READ(isp, OUTMAILBOX6); } else { chan = 0; } isp_prt(isp, ISP_LOGDEBUG2, "Async Mbox 0x%x", mbox); switch (mbox) { case ASYNC_SYSTEM_ERROR: isp->isp_dead = 1; isp->isp_state = ISP_CRASHED; FCPARAM(isp, chan)->isp_loopstate = LOOP_NIL; FCPARAM(isp, chan)->isp_fwstate = FW_CONFIG_WAIT; /* * Were we waiting for a mailbox command to complete? * If so, it's dead, so wake up the waiter. */ if (isp->isp_mboxbsy) { isp->isp_obits = 1; isp->isp_mboxtmp[0] = MBOX_HOST_INTERFACE_ERROR; MBOX_NOTIFY_COMPLETE(isp); } /* * It's up to the handler for isp_async to reinit stuff and * restart the firmware */ isp_async(isp, ISPASYNC_FW_CRASH); acked = 1; break; case ASYNC_RQS_XFER_ERR: isp_prt(isp, ISP_LOGERR, "Request Queue Transfer Error"); break; case ASYNC_RSP_XFER_ERR: isp_prt(isp, ISP_LOGERR, "Response Queue Transfer Error"); break; case ASYNC_QWAKEUP: #ifdef ISP_TARGET_MODE if (IS_24XX(isp)) { isp_prt(isp, ISP_LOGERR, "ATIO Queue Transfer Error"); break; } #endif isp_prt(isp, ISP_LOGERR, "%s: unexpected ASYNC_QWAKEUP code", __func__); break; case ASYNC_CMD_CMPLT: isp_fastpost_complete(isp, (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1)); if (isp->isp_fpcchiwater < 1) { isp->isp_fpcchiwater = 1; } break; case ASYNC_RIOZIO_STALL: break; case ASYNC_CTIO_DONE: #ifdef ISP_TARGET_MODE if (isp_target_async(isp, (ISP_READ(isp, OUTMAILBOX2) << 16) | ISP_READ(isp, OUTMAILBOX1), mbox)) { acked = 1; } else { isp->isp_fphccmplt++; } #else isp_prt(isp, ISP_LOGWARN, "unexpected ASYNC CTIO done"); #endif break; case ASYNC_LIP_ERROR: case ASYNC_LIP_F8: case ASYNC_LIP_OCCURRED: case ASYNC_PTPMODE: /* * These are broadcast events that have to be sent across * all active channels. */ for (chan = 0; chan < isp->isp_nchan; chan++) { fcparam *fcp = FCPARAM(isp, chan); int topo = fcp->isp_topo; if (fcp->role == ISP_ROLE_NONE) { continue; } fcp->isp_fwstate = FW_CONFIG_WAIT; fcp->isp_loopstate = LOOP_LIP_RCVD; ISP_SET_SENDMARKER(isp, chan, 1); ISP_MARK_PORTDB(isp, chan, 1); isp_async(isp, ISPASYNC_LIP, chan); #ifdef ISP_TARGET_MODE if (isp_target_async(isp, chan, mbox)) { acked = 1; } #endif /* * We've had problems with data corruption occuring on * commands that complete (with no apparent error) after * we receive a LIP. This has been observed mostly on * Local Loop topologies. To be safe, let's just mark * all active initiator commands as dead. */ if (topo == TOPO_NL_PORT || topo == TOPO_FL_PORT) { int i, j; for (i = j = 0; i < isp->isp_maxcmds; i++) { XS_T *xs; isp_hdl_t *hdp; hdp = &isp->isp_xflist[i]; if (ISP_H2HT(hdp->handle) != ISP_HANDLE_INITIATOR) { continue; } xs = hdp->cmd; if (XS_CHANNEL(xs) != chan) { continue; } j++; isp_prt(isp, ISP_LOG_WARN1, "%d.%d.%jx bus reset set at %s:%u", XS_CHANNEL(xs), XS_TGT(xs), (uintmax_t)XS_LUN(xs), __func__, __LINE__); XS_SETERR(xs, HBA_BUSRESET); } if (j) { isp_prt(isp, ISP_LOGERR, lipd, chan, j); } } } break; case ASYNC_LOOP_UP: /* * This is a broadcast event that has to be sent across * all active channels. */ for (chan = 0; chan < isp->isp_nchan; chan++) { fcparam *fcp = FCPARAM(isp, chan); if (fcp->role == ISP_ROLE_NONE) { continue; } ISP_SET_SENDMARKER(isp, chan, 1); fcp->isp_fwstate = FW_CONFIG_WAIT; fcp->isp_loopstate = LOOP_LIP_RCVD; ISP_MARK_PORTDB(isp, chan, 1); isp_async(isp, ISPASYNC_LOOP_UP, chan); #ifdef ISP_TARGET_MODE if (isp_target_async(isp, chan, mbox)) { acked = 1; } #endif } break; case ASYNC_LOOP_DOWN: /* * This is a broadcast event that has to be sent across * all active channels. */ for (chan = 0; chan < isp->isp_nchan; chan++) { fcparam *fcp = FCPARAM(isp, chan); if (fcp->role == ISP_ROLE_NONE) { continue; } ISP_SET_SENDMARKER(isp, chan, 1); fcp->isp_fwstate = FW_CONFIG_WAIT; fcp->isp_loopstate = LOOP_NIL; ISP_MARK_PORTDB(isp, chan, 1); isp_async(isp, ISPASYNC_LOOP_DOWN, chan); #ifdef ISP_TARGET_MODE if (isp_target_async(isp, chan, mbox)) { acked = 1; } #endif } break; case ASYNC_LOOP_RESET: /* * This is a broadcast event that has to be sent across * all active channels. */ for (chan = 0; chan < isp->isp_nchan; chan++) { fcparam *fcp = FCPARAM(isp, chan); if (fcp->role == ISP_ROLE_NONE) { continue; } ISP_SET_SENDMARKER(isp, chan, 1); fcp->isp_fwstate = FW_CONFIG_WAIT; fcp->isp_loopstate = LOOP_NIL; ISP_MARK_PORTDB(isp, chan, 1); isp_async(isp, ISPASYNC_LOOP_RESET, chan); #ifdef ISP_TARGET_MODE if (isp_target_async(isp, chan, mbox)) { acked = 1; } #endif } break; case ASYNC_PDB_CHANGED: { int echan, nphdl, nlstate, reason; if (IS_24XX(isp)) { nphdl = ISP_READ(isp, OUTMAILBOX1); nlstate = ISP_READ(isp, OUTMAILBOX2); reason = ISP_READ(isp, OUTMAILBOX3) >> 8; GET_24XX_BUS(isp, chan, "ASYNC_CHANGE_NOTIFY"); echan = (nphdl == NIL_HANDLE) ? isp->isp_nchan - 1 : chan; } else { nphdl = NIL_HANDLE; nlstate = reason = 0; chan = echan = 0; } for (; chan <= echan; chan++) { fcparam *fcp = FCPARAM(isp, chan); if (fcp->role == ISP_ROLE_NONE) { continue; } ISP_SET_SENDMARKER(isp, chan, 1); fcp->isp_loopstate = LOOP_PDB_RCVD; ISP_MARK_PORTDB(isp, chan, 1); isp_async(isp, ISPASYNC_CHANGE_NOTIFY, chan, ISPASYNC_CHANGE_PDB, nphdl, nlstate, reason); } break; } case ASYNC_CHANGE_NOTIFY: { int lochan, hichan; if (ISP_FW_NEWER_THAN(isp, 4, 0, 25) && ISP_CAP_MULTI_ID(isp)) { GET_24XX_BUS(isp, chan, "ASYNC_CHANGE_NOTIFY"); lochan = chan; hichan = chan + 1; } else { lochan = 0; hichan = isp->isp_nchan; } for (chan = lochan; chan < hichan; chan++) { fcparam *fcp = FCPARAM(isp, chan); if (fcp->role == ISP_ROLE_NONE) { continue; } if (fcp->isp_topo == TOPO_F_PORT) { fcp->isp_loopstate = LOOP_LSCAN_DONE; } else { fcp->isp_loopstate = LOOP_PDB_RCVD; } ISP_MARK_PORTDB(isp, chan, 1); isp_async(isp, ISPASYNC_CHANGE_NOTIFY, chan, ISPASYNC_CHANGE_SNS); } break; } case ASYNC_CONNMODE: /* * This only applies to 2100 amd 2200 cards */ if (!IS_2200(isp) && !IS_2100(isp)) { isp_prt(isp, ISP_LOGWARN, "bad card for ASYNC_CONNMODE event"); break; } chan = 0; mbox = ISP_READ(isp, OUTMAILBOX1); ISP_MARK_PORTDB(isp, chan, 1); switch (mbox) { case ISP_CONN_LOOP: isp_prt(isp, ISP_LOGINFO, "Point-to-Point -> Loop mode"); break; case ISP_CONN_PTP: isp_prt(isp, ISP_LOGINFO, "Loop -> Point-to-Point mode"); break; case ISP_CONN_BADLIP: isp_prt(isp, ISP_LOGWARN, "Point-to-Point -> Loop mode (BAD LIP)"); break; case ISP_CONN_FATAL: isp->isp_dead = 1; isp->isp_state = ISP_CRASHED; isp_prt(isp, ISP_LOGERR, "FATAL CONNECTION ERROR"); isp_async(isp, ISPASYNC_FW_CRASH); return (-1); case ISP_CONN_LOOPBACK: isp_prt(isp, ISP_LOGWARN, "Looped Back in Point-to-Point mode"); break; default: isp_prt(isp, ISP_LOGWARN, "Unknown connection mode (0x%x)", mbox); break; } isp_async(isp, ISPASYNC_CHANGE_NOTIFY, chan, ISPASYNC_CHANGE_OTHER); FCPARAM(isp, chan)->sendmarker = 1; FCPARAM(isp, chan)->isp_fwstate = FW_CONFIG_WAIT; FCPARAM(isp, chan)->isp_loopstate = LOOP_LIP_RCVD; break; case ASYNC_RCV_ERR: if (IS_24XX(isp)) { isp_prt(isp, ISP_LOGWARN, "Receive Error"); } else { isp_prt(isp, ISP_LOGWARN, "unexpected ASYNC_RCV_ERR"); } break; case ASYNC_RJT_SENT: /* same as ASYNC_QFULL_SENT */ if (IS_24XX(isp)) { isp_prt(isp, ISP_LOGTDEBUG0, "LS_RJT sent"); break; } else if (IS_2200(isp)) { isp_prt(isp, ISP_LOGTDEBUG0, "QFULL sent"); break; } /* FALLTHROUGH */ default: isp_prt(isp, ISP_LOGWARN, "Unknown Async Code 0x%x", mbox); break; } if (mbox != ASYNC_CTIO_DONE && mbox != ASYNC_CMD_CMPLT) { isp->isp_intoasync++; } return (acked); } /* * Handle other response entries. A pointer to the request queue output * index is here in case we want to eat several entries at once, although * this is not used currently. */ static int isp_handle_other_response(ispsoftc_t *isp, int type, isphdr_t *hp, uint32_t *optrp) { + isp_ridacq_t rid; + int chan, c; + switch (type) { case RQSTYPE_STATUS_CONT: isp_prt(isp, ISP_LOG_WARN1, "Ignored Continuation Response"); return (1); case RQSTYPE_MARKER: isp_prt(isp, ISP_LOG_WARN1, "Marker Response"); return (1); + case RQSTYPE_RPT_ID_ACQ: + isp_get_ridacq(isp, (isp_ridacq_t *)hp, &rid); + if (rid.ridacq_format == 0) { + for (chan = 0; chan < isp->isp_nchan; chan++) { + fcparam *fcp = FCPARAM(isp, chan); + if (fcp->role == ISP_ROLE_NONE) + continue; + c = (chan == 0) ? 127 : (chan - 1); + if (rid.ridacq_map[c / 16] & (1 << (c % 16))) + isp_async(isp, ISPASYNC_CHANGE_NOTIFY, + chan, ISPASYNC_CHANGE_OTHER); + } + } else { + isp_async(isp, ISPASYNC_CHANGE_NOTIFY, + rid.ridacq_vp_index, ISPASYNC_CHANGE_OTHER); + } + return (1); case RQSTYPE_ATIO: case RQSTYPE_CTIO: case RQSTYPE_ENABLE_LUN: case RQSTYPE_MODIFY_LUN: case RQSTYPE_NOTIFY: case RQSTYPE_NOTIFY_ACK: case RQSTYPE_CTIO1: case RQSTYPE_ATIO2: case RQSTYPE_CTIO2: case RQSTYPE_CTIO3: case RQSTYPE_CTIO7: case RQSTYPE_ABTS_RCVD: case RQSTYPE_ABTS_RSP: isp->isp_rsltccmplt++; /* count as a response completion */ #ifdef ISP_TARGET_MODE if (isp_target_notify(isp, (ispstatusreq_t *) hp, optrp)) { return (1); } #endif - /* FALLTHROUGH */ - case RQSTYPE_RPT_ID_ACQ: - if (IS_24XX(isp)) { - isp_ridacq_t rid; - isp_get_ridacq(isp, (isp_ridacq_t *)hp, &rid); - if (rid.ridacq_format == 0) { - } - return (1); - } /* FALLTHROUGH */ case RQSTYPE_REQUEST: default: ISP_DELAY(100); if (type != isp_get_response_type(isp, hp)) { /* * This is questionable- we're just papering over * something we've seen on SMP linux in target * mode- we don't really know what's happening * here that causes us to think we've gotten * an entry, but that either the entry isn't * filled out yet or our CPU read data is stale. */ isp_prt(isp, ISP_LOGINFO, "unstable type in response queue"); return (-1); } isp_prt(isp, ISP_LOGWARN, "Unhandled Response Type 0x%x", isp_get_response_type(isp, hp)); return (0); } } static void isp_parse_status(ispsoftc_t *isp, ispstatusreq_t *sp, XS_T *xs, long *rp) { switch (sp->req_completion_status & 0xff) { case RQCS_COMPLETE: if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_NOERROR); } return; case RQCS_INCOMPLETE: if ((sp->req_state_flags & RQSF_GOT_TARGET) == 0) { isp_xs_prt(isp, xs, ISP_LOG_WARN1, "Selection Timeout @ %s:%d", __func__, __LINE__); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_SELTIMEOUT); *rp = XS_XFRLEN(xs); } return; } isp_xs_prt(isp, xs, ISP_LOGERR, "Command Incomplete, state 0x%x", sp->req_state_flags); break; case RQCS_DMA_ERROR: isp_xs_prt(isp, xs, ISP_LOGERR, "DMA Error"); *rp = XS_XFRLEN(xs); break; case RQCS_TRANSPORT_ERROR: { char buf[172]; ISP_SNPRINTF(buf, sizeof (buf), "states=>"); if (sp->req_state_flags & RQSF_GOT_BUS) { ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_BUS", buf); } if (sp->req_state_flags & RQSF_GOT_TARGET) { ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_TGT", buf); } if (sp->req_state_flags & RQSF_SENT_CDB) { ISP_SNPRINTF(buf, sizeof (buf), "%s SENT_CDB", buf); } if (sp->req_state_flags & RQSF_XFRD_DATA) { ISP_SNPRINTF(buf, sizeof (buf), "%s XFRD_DATA", buf); } if (sp->req_state_flags & RQSF_GOT_STATUS) { ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_STS", buf); } if (sp->req_state_flags & RQSF_GOT_SENSE) { ISP_SNPRINTF(buf, sizeof (buf), "%s GOT_SNS", buf); } if (sp->req_state_flags & RQSF_XFER_COMPLETE) { ISP_SNPRINTF(buf, sizeof (buf), "%s XFR_CMPLT", buf); } ISP_SNPRINTF(buf, sizeof (buf), "%s\nstatus=>", buf); if (sp->req_status_flags & RQSTF_DISCONNECT) { ISP_SNPRINTF(buf, sizeof (buf), "%s Disconnect", buf); } if (sp->req_status_flags & RQSTF_SYNCHRONOUS) { ISP_SNPRINTF(buf, sizeof (buf), "%s Sync_xfr", buf); } if (sp->req_status_flags & RQSTF_PARITY_ERROR) { ISP_SNPRINTF(buf, sizeof (buf), "%s Parity", buf); } if (sp->req_status_flags & RQSTF_BUS_RESET) { ISP_SNPRINTF(buf, sizeof (buf), "%s Bus_Reset", buf); } if (sp->req_status_flags & RQSTF_DEVICE_RESET) { ISP_SNPRINTF(buf, sizeof (buf), "%s Device_Reset", buf); } if (sp->req_status_flags & RQSTF_ABORTED) { ISP_SNPRINTF(buf, sizeof (buf), "%s Aborted", buf); } if (sp->req_status_flags & RQSTF_TIMEOUT) { ISP_SNPRINTF(buf, sizeof (buf), "%s Timeout", buf); } if (sp->req_status_flags & RQSTF_NEGOTIATION) { ISP_SNPRINTF(buf, sizeof (buf), "%s Negotiation", buf); } isp_xs_prt(isp, xs, ISP_LOGERR, "Transport Error: %s", buf); *rp = XS_XFRLEN(xs); break; } case RQCS_RESET_OCCURRED: { int chan; isp_xs_prt(isp, xs, ISP_LOGWARN, "Bus Reset destroyed command"); for (chan = 0; chan < isp->isp_nchan; chan++) { FCPARAM(isp, chan)->sendmarker = 1; } if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_BUSRESET); } *rp = XS_XFRLEN(xs); return; } case RQCS_ABORTED: isp_xs_prt(isp, xs, ISP_LOGERR, "Command Aborted"); ISP_SET_SENDMARKER(isp, XS_CHANNEL(xs), 1); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_ABORTED); } return; case RQCS_TIMEOUT: isp_xs_prt(isp, xs, ISP_LOGWARN, "Command timed out"); /* * XXX: Check to see if we logged out of the device. */ if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_CMDTIMEOUT); } return; case RQCS_DATA_OVERRUN: XS_SET_RESID(xs, sp->req_resid); isp_xs_prt(isp, xs, ISP_LOGERR, "data overrun (%ld)", (long) XS_GET_RESID(xs)); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_DATAOVR); } return; case RQCS_COMMAND_OVERRUN: isp_xs_prt(isp, xs, ISP_LOGERR, "command overrun"); break; case RQCS_STATUS_OVERRUN: isp_xs_prt(isp, xs, ISP_LOGERR, "status overrun"); break; case RQCS_BAD_MESSAGE: isp_xs_prt(isp, xs, ISP_LOGERR, "msg not COMMAND COMPLETE after status"); break; case RQCS_NO_MESSAGE_OUT: isp_xs_prt(isp, xs, ISP_LOGERR, "No MESSAGE OUT phase after selection"); break; case RQCS_EXT_ID_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "EXTENDED IDENTIFY failed"); break; case RQCS_IDE_MSG_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "INITIATOR DETECTED ERROR rejected"); break; case RQCS_ABORT_MSG_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "ABORT OPERATION rejected"); break; case RQCS_REJECT_MSG_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "MESSAGE REJECT rejected"); break; case RQCS_NOP_MSG_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "NOP rejected"); break; case RQCS_PARITY_ERROR_MSG_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "MESSAGE PARITY ERROR rejected"); break; case RQCS_DEVICE_RESET_MSG_FAILED: isp_xs_prt(isp, xs, ISP_LOGWARN, "BUS DEVICE RESET rejected"); break; case RQCS_ID_MSG_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "IDENTIFY rejected"); break; case RQCS_UNEXP_BUS_FREE: isp_xs_prt(isp, xs, ISP_LOGERR, "Unexpected Bus Free"); break; case RQCS_DATA_UNDERRUN: { if (IS_FC(isp)) { int ru_marked = (sp->req_scsi_status & RQCS_RU) != 0; if (!ru_marked || sp->req_resid > XS_XFRLEN(xs)) { isp_xs_prt(isp, xs, ISP_LOGWARN, bun, XS_XFRLEN(xs), sp->req_resid, (ru_marked)? "marked" : "not marked"); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_BOTCH); } return; } } XS_SET_RESID(xs, sp->req_resid); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_NOERROR); } return; } case RQCS_XACT_ERR1: isp_xs_prt(isp, xs, ISP_LOGERR, "HBA attempted queued transaction with disconnect not set"); break; case RQCS_XACT_ERR2: isp_xs_prt(isp, xs, ISP_LOGERR, "HBA attempted queued transaction to target routine %jx", (uintmax_t)XS_LUN(xs)); break; case RQCS_XACT_ERR3: isp_xs_prt(isp, xs, ISP_LOGERR, "HBA attempted queued cmd when queueing disabled"); break; case RQCS_BAD_ENTRY: isp_prt(isp, ISP_LOGERR, "Invalid IOCB entry type detected"); break; case RQCS_QUEUE_FULL: isp_xs_prt(isp, xs, ISP_LOG_WARN1, "internal queues full status 0x%x", *XS_STSP(xs)); /* * If QFULL or some other status byte is set, then this * isn't an error, per se. * * Unfortunately, some QLogic f/w writers have, in * some cases, ommitted to *set* status to QFULL. */ #if 0 if (*XS_STSP(xs) != SCSI_GOOD && XS_NOERR(xs)) { XS_SETERR(xs, HBA_NOERROR); return; } #endif *XS_STSP(xs) = SCSI_QFULL; XS_SETERR(xs, HBA_NOERROR); return; case RQCS_PHASE_SKIPPED: isp_xs_prt(isp, xs, ISP_LOGERR, "SCSI phase skipped"); break; case RQCS_ARQS_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "Auto Request Sense Failed"); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_ARQFAIL); } return; case RQCS_WIDE_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "Wide Negotiation Failed"); if (IS_SCSI(isp)) { sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs)); sdp->isp_devparam[XS_TGT(xs)].goal_flags &= ~DPARM_WIDE; sdp->isp_devparam[XS_TGT(xs)].dev_update = 1; sdp->update = 1; } if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_NOERROR); } return; case RQCS_SYNCXFER_FAILED: isp_xs_prt(isp, xs, ISP_LOGERR, "SDTR Message Failed"); if (IS_SCSI(isp)) { sdparam *sdp = SDPARAM(isp, XS_CHANNEL(xs)); sdp += XS_CHANNEL(xs); sdp->isp_devparam[XS_TGT(xs)].goal_flags &= ~DPARM_SYNC; sdp->isp_devparam[XS_TGT(xs)].dev_update = 1; sdp->update = 1; } break; case RQCS_LVD_BUSERR: isp_xs_prt(isp, xs, ISP_LOGERR, "Bad LVD condition"); break; case RQCS_PORT_UNAVAILABLE: /* * No such port on the loop. Moral equivalent of SELTIMEO */ case RQCS_PORT_LOGGED_OUT: { const char *reason; uint8_t sts = sp->req_completion_status & 0xff; /* * It was there (maybe)- treat as a selection timeout. */ if (sts == RQCS_PORT_UNAVAILABLE) { reason = "unavailable"; } else { reason = "logout"; } isp_prt(isp, ISP_LOGINFO, "port %s for target %d", reason, XS_TGT(xs)); /* * If we're on a local loop, force a LIP (which is overkill) * to force a re-login of this unit. If we're on fabric, * then we'll have to log in again as a matter of course. */ if (FCPARAM(isp, 0)->isp_topo == TOPO_NL_PORT || FCPARAM(isp, 0)->isp_topo == TOPO_FL_PORT) { mbreg_t mbs; MBSINIT(&mbs, MBOX_INIT_LIP, MBLOGALL, 0); if (ISP_CAP_2KLOGIN(isp)) { mbs.ibits = (1 << 10); } isp_mboxcmd_qnw(isp, &mbs, 1); } if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_SELTIMEOUT); } return; } case RQCS_PORT_CHANGED: isp_prt(isp, ISP_LOGWARN, "port changed for target %d", XS_TGT(xs)); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_SELTIMEOUT); } return; case RQCS_PORT_BUSY: isp_prt(isp, ISP_LOGWARN, "port busy for target %d", XS_TGT(xs)); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_TGTBSY); } return; default: isp_prt(isp, ISP_LOGERR, "Unknown Completion Status 0x%x", sp->req_completion_status); break; } if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_BOTCH); } } static void isp_parse_status_24xx(ispsoftc_t *isp, isp24xx_statusreq_t *sp, XS_T *xs, long *rp) { int ru_marked, sv_marked; int chan = XS_CHANNEL(xs); switch (sp->req_completion_status) { case RQCS_COMPLETE: if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_NOERROR); } return; case RQCS_DMA_ERROR: isp_xs_prt(isp, xs, ISP_LOGERR, "DMA error"); break; case RQCS_TRANSPORT_ERROR: isp_xs_prt(isp, xs, ISP_LOGERR, "Transport Error"); break; case RQCS_RESET_OCCURRED: isp_xs_prt(isp, xs, ISP_LOGWARN, "reset destroyed command"); FCPARAM(isp, chan)->sendmarker = 1; if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_BUSRESET); } return; case RQCS_ABORTED: isp_xs_prt(isp, xs, ISP_LOGERR, "Command Aborted"); FCPARAM(isp, chan)->sendmarker = 1; if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_ABORTED); } return; case RQCS_TIMEOUT: isp_xs_prt(isp, xs, ISP_LOGWARN, "Command Timed Out"); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_CMDTIMEOUT); } return; case RQCS_DATA_OVERRUN: XS_SET_RESID(xs, sp->req_resid); isp_xs_prt(isp, xs, ISP_LOGERR, "Data Overrun"); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_DATAOVR); } return; case RQCS_24XX_DRE: /* data reassembly error */ isp_prt(isp, ISP_LOGERR, "Chan %d data reassembly error for target %d", chan, XS_TGT(xs)); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_ABORTED); } *rp = XS_XFRLEN(xs); return; case RQCS_24XX_TABORT: /* aborted by target */ isp_prt(isp, ISP_LOGERR, "Chan %d target %d sent ABTS", chan, XS_TGT(xs)); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_ABORTED); } return; case RQCS_DATA_UNDERRUN: ru_marked = (sp->req_scsi_status & RQCS_RU) != 0; /* * We can get an underrun w/o things being marked * if we got a non-zero status. */ sv_marked = (sp->req_scsi_status & (RQCS_SV|RQCS_RV)) != 0; if ((ru_marked == 0 && sv_marked == 0) || (sp->req_resid > XS_XFRLEN(xs))) { isp_xs_prt(isp, xs, ISP_LOGWARN, bun, XS_XFRLEN(xs), sp->req_resid, (ru_marked)? "marked" : "not marked"); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_BOTCH); } return; } XS_SET_RESID(xs, sp->req_resid); isp_xs_prt(isp, xs, ISP_LOG_WARN1, "Data Underrun (%d) for command 0x%x", sp->req_resid, XS_CDBP(xs)[0] & 0xff); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_NOERROR); } return; case RQCS_PORT_UNAVAILABLE: /* * No such port on the loop. Moral equivalent of SELTIMEO */ case RQCS_PORT_LOGGED_OUT: { const char *reason; uint8_t sts = sp->req_completion_status & 0xff; /* * It was there (maybe)- treat as a selection timeout. */ if (sts == RQCS_PORT_UNAVAILABLE) { reason = "unavailable"; } else { reason = "logout"; } isp_prt(isp, ISP_LOGINFO, "Chan %d port %s for target %d", chan, reason, XS_TGT(xs)); /* * There is no MBOX_INIT_LIP for the 24XX. */ if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_SELTIMEOUT); } return; } case RQCS_PORT_CHANGED: isp_prt(isp, ISP_LOGWARN, "port changed for target %d chan %d", XS_TGT(xs), chan); if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_SELTIMEOUT); } return; case RQCS_24XX_ENOMEM: /* f/w resource unavailable */ isp_prt(isp, ISP_LOGWARN, "f/w resource unavailable for target %d chan %d", XS_TGT(xs), chan); if (XS_NOERR(xs)) { *XS_STSP(xs) = SCSI_BUSY; XS_SETERR(xs, HBA_TGTBSY); } return; case RQCS_24XX_TMO: /* task management overrun */ isp_prt(isp, ISP_LOGWARN, "command for target %d overlapped task management for chan %d", XS_TGT(xs), chan); if (XS_NOERR(xs)) { *XS_STSP(xs) = SCSI_BUSY; XS_SETERR(xs, HBA_TGTBSY); } return; default: isp_prt(isp, ISP_LOGERR, "Unknown Completion Status 0x%x on chan %d", sp->req_completion_status, chan); break; } if (XS_NOERR(xs)) { XS_SETERR(xs, HBA_BOTCH); } } static void isp_fastpost_complete(ispsoftc_t *isp, uint32_t fph) { XS_T *xs; if (fph == 0) { return; } xs = isp_find_xs(isp, fph); if (xs == NULL) { isp_prt(isp, ISP_LOGWARN, "Command for fast post handle 0x%x not found", fph); return; } isp_destroy_handle(isp, fph); /* * Since we don't have a result queue entry item, * we must believe that SCSI status is zero and * that all data transferred. */ XS_SET_RESID(xs, 0); *XS_STSP(xs) = SCSI_GOOD; if (XS_XFRLEN(xs)) { ISP_DMAFREE(isp, xs, fph); } if (isp->isp_nactive) { isp->isp_nactive--; } isp->isp_fphccmplt++; isp_done(xs); } static int isp_mbox_continue(ispsoftc_t *isp) { mbreg_t mbs; uint16_t *ptr; uint32_t offset; switch (isp->isp_lastmbxcmd) { case MBOX_WRITE_RAM_WORD: case MBOX_READ_RAM_WORD: case MBOX_WRITE_RAM_WORD_EXTENDED: case MBOX_READ_RAM_WORD_EXTENDED: break; default: return (1); } if (isp->isp_mboxtmp[0] != MBOX_COMMAND_COMPLETE) { isp->isp_mbxwrk0 = 0; return (-1); } /* * Clear the previous interrupt. */ if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_CLEAR_RISC_INT); } else { ISP_WRITE(isp, HCCR, HCCR_CMD_CLEAR_RISC_INT); ISP_WRITE(isp, BIU_SEMA, 0); } /* * Continue with next word. */ ISP_MEMZERO(&mbs, sizeof (mbs)); ptr = isp->isp_mbxworkp; switch (isp->isp_lastmbxcmd) { case MBOX_WRITE_RAM_WORD: mbs.param[1] = isp->isp_mbxwrk1++; mbs.param[2] = *ptr++; break; case MBOX_READ_RAM_WORD: *ptr++ = isp->isp_mboxtmp[2]; mbs.param[1] = isp->isp_mbxwrk1++; break; case MBOX_WRITE_RAM_WORD_EXTENDED: if (IS_24XX(isp)) { uint32_t *lptr = (uint32_t *)ptr; mbs.param[2] = lptr[0]; mbs.param[3] = lptr[0] >> 16; lptr++; ptr = (uint16_t *)lptr; } else { mbs.param[2] = *ptr++; } offset = isp->isp_mbxwrk1; offset |= isp->isp_mbxwrk8 << 16; mbs.param[1] = offset; mbs.param[8] = offset >> 16; offset++; isp->isp_mbxwrk1 = offset; isp->isp_mbxwrk8 = offset >> 16; break; case MBOX_READ_RAM_WORD_EXTENDED: if (IS_24XX(isp)) { uint32_t *lptr = (uint32_t *)ptr; uint32_t val = isp->isp_mboxtmp[2]; val |= (isp->isp_mboxtmp[3]) << 16; *lptr++ = val; ptr = (uint16_t *)lptr; } else { *ptr++ = isp->isp_mboxtmp[2]; } offset = isp->isp_mbxwrk1; offset |= isp->isp_mbxwrk8 << 16; mbs.param[1] = offset; mbs.param[8] = offset >> 16; offset++; isp->isp_mbxwrk1 = offset; isp->isp_mbxwrk8 = offset >> 16; break; } isp->isp_mbxworkp = ptr; isp->isp_mbxwrk0--; mbs.param[0] = isp->isp_lastmbxcmd; mbs.logval = MBLOGALL; isp_mboxcmd_qnw(isp, &mbs, 0); return (0); } #define ISP_SCSI_IBITS(op) (mbpscsi[((op)<<1)]) #define ISP_SCSI_OBITS(op) (mbpscsi[((op)<<1) + 1]) #define ISP_SCSI_OPMAP(in, out) in, out static const uint8_t mbpscsi[] = { ISP_SCSI_OPMAP(0x01, 0x01), /* 0x00: MBOX_NO_OP */ ISP_SCSI_OPMAP(0x1f, 0x01), /* 0x01: MBOX_LOAD_RAM */ ISP_SCSI_OPMAP(0x03, 0x01), /* 0x02: MBOX_EXEC_FIRMWARE */ ISP_SCSI_OPMAP(0x1f, 0x01), /* 0x03: MBOX_DUMP_RAM */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x04: MBOX_WRITE_RAM_WORD */ ISP_SCSI_OPMAP(0x03, 0x07), /* 0x05: MBOX_READ_RAM_WORD */ ISP_SCSI_OPMAP(0x3f, 0x3f), /* 0x06: MBOX_MAILBOX_REG_TEST */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x07: MBOX_VERIFY_CHECKSUM */ ISP_SCSI_OPMAP(0x01, 0x0f), /* 0x08: MBOX_ABOUT_FIRMWARE */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x09: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x0a: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x0b: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x0c: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x0d: */ ISP_SCSI_OPMAP(0x01, 0x05), /* 0x0e: MBOX_CHECK_FIRMWARE */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x0f: */ ISP_SCSI_OPMAP(0x1f, 0x1f), /* 0x10: MBOX_INIT_REQ_QUEUE */ ISP_SCSI_OPMAP(0x3f, 0x3f), /* 0x11: MBOX_INIT_RES_QUEUE */ ISP_SCSI_OPMAP(0x0f, 0x0f), /* 0x12: MBOX_EXECUTE_IOCB */ ISP_SCSI_OPMAP(0x03, 0x03), /* 0x13: MBOX_WAKE_UP */ ISP_SCSI_OPMAP(0x01, 0x3f), /* 0x14: MBOX_STOP_FIRMWARE */ ISP_SCSI_OPMAP(0x0f, 0x0f), /* 0x15: MBOX_ABORT */ ISP_SCSI_OPMAP(0x03, 0x03), /* 0x16: MBOX_ABORT_DEVICE */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x17: MBOX_ABORT_TARGET */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x18: MBOX_BUS_RESET */ ISP_SCSI_OPMAP(0x03, 0x07), /* 0x19: MBOX_STOP_QUEUE */ ISP_SCSI_OPMAP(0x03, 0x07), /* 0x1a: MBOX_START_QUEUE */ ISP_SCSI_OPMAP(0x03, 0x07), /* 0x1b: MBOX_SINGLE_STEP_QUEUE */ ISP_SCSI_OPMAP(0x03, 0x07), /* 0x1c: MBOX_ABORT_QUEUE */ ISP_SCSI_OPMAP(0x03, 0x4f), /* 0x1d: MBOX_GET_DEV_QUEUE_STATUS */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x1e: */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x1f: MBOX_GET_FIRMWARE_STATUS */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x20: MBOX_GET_INIT_SCSI_ID */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x21: MBOX_GET_SELECT_TIMEOUT */ ISP_SCSI_OPMAP(0x01, 0xc7), /* 0x22: MBOX_GET_RETRY_COUNT */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x23: MBOX_GET_TAG_AGE_LIMIT */ ISP_SCSI_OPMAP(0x01, 0x03), /* 0x24: MBOX_GET_CLOCK_RATE */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x25: MBOX_GET_ACT_NEG_STATE */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x26: MBOX_GET_ASYNC_DATA_SETUP_TIME */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x27: MBOX_GET_PCI_PARAMS */ ISP_SCSI_OPMAP(0x03, 0x4f), /* 0x28: MBOX_GET_TARGET_PARAMS */ ISP_SCSI_OPMAP(0x03, 0x0f), /* 0x29: MBOX_GET_DEV_QUEUE_PARAMS */ ISP_SCSI_OPMAP(0x01, 0x07), /* 0x2a: MBOX_GET_RESET_DELAY_PARAMS */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x2b: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x2c: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x2d: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x2e: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x2f: */ ISP_SCSI_OPMAP(0x03, 0x03), /* 0x30: MBOX_SET_INIT_SCSI_ID */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x31: MBOX_SET_SELECT_TIMEOUT */ ISP_SCSI_OPMAP(0xc7, 0xc7), /* 0x32: MBOX_SET_RETRY_COUNT */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x33: MBOX_SET_TAG_AGE_LIMIT */ ISP_SCSI_OPMAP(0x03, 0x03), /* 0x34: MBOX_SET_CLOCK_RATE */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x35: MBOX_SET_ACT_NEG_STATE */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x36: MBOX_SET_ASYNC_DATA_SETUP_TIME */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x37: MBOX_SET_PCI_CONTROL_PARAMS */ ISP_SCSI_OPMAP(0x4f, 0x4f), /* 0x38: MBOX_SET_TARGET_PARAMS */ ISP_SCSI_OPMAP(0x0f, 0x0f), /* 0x39: MBOX_SET_DEV_QUEUE_PARAMS */ ISP_SCSI_OPMAP(0x07, 0x07), /* 0x3a: MBOX_SET_RESET_DELAY_PARAMS */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x3b: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x3c: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x3d: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x3e: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x3f: */ ISP_SCSI_OPMAP(0x01, 0x03), /* 0x40: MBOX_RETURN_BIOS_BLOCK_ADDR */ ISP_SCSI_OPMAP(0x3f, 0x01), /* 0x41: MBOX_WRITE_FOUR_RAM_WORDS */ ISP_SCSI_OPMAP(0x03, 0x07), /* 0x42: MBOX_EXEC_BIOS_IOCB */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x43: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x44: */ ISP_SCSI_OPMAP(0x03, 0x03), /* 0x45: SET SYSTEM PARAMETER */ ISP_SCSI_OPMAP(0x01, 0x03), /* 0x46: GET SYSTEM PARAMETER */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x47: */ ISP_SCSI_OPMAP(0x01, 0xcf), /* 0x48: GET SCAM CONFIGURATION */ ISP_SCSI_OPMAP(0xcf, 0xcf), /* 0x49: SET SCAM CONFIGURATION */ ISP_SCSI_OPMAP(0x03, 0x03), /* 0x4a: MBOX_SET_FIRMWARE_FEATURES */ ISP_SCSI_OPMAP(0x01, 0x03), /* 0x4b: MBOX_GET_FIRMWARE_FEATURES */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x4c: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x4d: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x4e: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x4f: */ ISP_SCSI_OPMAP(0xdf, 0xdf), /* 0x50: LOAD RAM A64 */ ISP_SCSI_OPMAP(0xdf, 0xdf), /* 0x51: DUMP RAM A64 */ ISP_SCSI_OPMAP(0xdf, 0xff), /* 0x52: INITIALIZE REQUEST QUEUE A64 */ ISP_SCSI_OPMAP(0xef, 0xff), /* 0x53: INITIALIZE RESPONSE QUEUE A64 */ ISP_SCSI_OPMAP(0xcf, 0x01), /* 0x54: EXECUCUTE COMMAND IOCB A64 */ ISP_SCSI_OPMAP(0x07, 0x01), /* 0x55: ENABLE TARGET MODE */ ISP_SCSI_OPMAP(0x03, 0x0f), /* 0x56: GET TARGET STATUS */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x57: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x58: */ ISP_SCSI_OPMAP(0x00, 0x00), /* 0x59: */ ISP_SCSI_OPMAP(0x03, 0x03), /* 0x5a: SET DATA OVERRUN RECOVERY MODE */ ISP_SCSI_OPMAP(0x01, 0x03), /* 0x5b: GET DATA OVERRUN RECOVERY MODE */ ISP_SCSI_OPMAP(0x0f, 0x0f), /* 0x5c: SET HOST DATA */ ISP_SCSI_OPMAP(0x01, 0x01) /* 0x5d: GET NOST DATA */ }; #define MAX_SCSI_OPCODE 0x5d static const char *scsi_mbcmd_names[] = { "NO-OP", "LOAD RAM", "EXEC FIRMWARE", "DUMP RAM", "WRITE RAM WORD", "READ RAM WORD", "MAILBOX REG TEST", "VERIFY CHECKSUM", "ABOUT FIRMWARE", NULL, NULL, NULL, NULL, NULL, "CHECK FIRMWARE", NULL, "INIT REQUEST QUEUE", "INIT RESULT QUEUE", "EXECUTE IOCB", "WAKE UP", "STOP FIRMWARE", "ABORT", "ABORT DEVICE", "ABORT TARGET", "BUS RESET", "STOP QUEUE", "START QUEUE", "SINGLE STEP QUEUE", "ABORT QUEUE", "GET DEV QUEUE STATUS", NULL, "GET FIRMWARE STATUS", "GET INIT SCSI ID", "GET SELECT TIMEOUT", "GET RETRY COUNT", "GET TAG AGE LIMIT", "GET CLOCK RATE", "GET ACT NEG STATE", "GET ASYNC DATA SETUP TIME", "GET PCI PARAMS", "GET TARGET PARAMS", "GET DEV QUEUE PARAMS", "GET RESET DELAY PARAMS", NULL, NULL, NULL, NULL, NULL, "SET INIT SCSI ID", "SET SELECT TIMEOUT", "SET RETRY COUNT", "SET TAG AGE LIMIT", "SET CLOCK RATE", "SET ACT NEG STATE", "SET ASYNC DATA SETUP TIME", "SET PCI CONTROL PARAMS", "SET TARGET PARAMS", "SET DEV QUEUE PARAMS", "SET RESET DELAY PARAMS", NULL, NULL, NULL, NULL, NULL, "RETURN BIOS BLOCK ADDR", "WRITE FOUR RAM WORDS", "EXEC BIOS IOCB", NULL, NULL, "SET SYSTEM PARAMETER", "GET SYSTEM PARAMETER", NULL, "GET SCAM CONFIGURATION", "SET SCAM CONFIGURATION", "SET FIRMWARE FEATURES", "GET FIRMWARE FEATURES", NULL, NULL, NULL, NULL, "LOAD RAM A64", "DUMP RAM A64", "INITIALIZE REQUEST QUEUE A64", "INITIALIZE RESPONSE QUEUE A64", "EXECUTE IOCB A64", "ENABLE TARGET MODE", "GET TARGET MODE STATE", NULL, NULL, NULL, "SET DATA OVERRUN RECOVERY MODE", "GET DATA OVERRUN RECOVERY MODE", "SET HOST DATA", "GET NOST DATA", }; #define ISP_FC_IBITS(op) ((mbpfc[((op)<<3) + 0] << 24) | (mbpfc[((op)<<3) + 1] << 16) | (mbpfc[((op)<<3) + 2] << 8) | (mbpfc[((op)<<3) + 3])) #define ISP_FC_OBITS(op) ((mbpfc[((op)<<3) + 4] << 24) | (mbpfc[((op)<<3) + 5] << 16) | (mbpfc[((op)<<3) + 6] << 8) | (mbpfc[((op)<<3) + 7])) #define ISP_FC_OPMAP(in0, out0) 0, 0, 0, in0, 0, 0, 0, out0 #define ISP_FC_OPMAP_HALF(in1, in0, out1, out0) 0, 0, in1, in0, 0, 0, out1, out0 #define ISP_FC_OPMAP_FULL(in3, in2, in1, in0, out3, out2, out1, out0) in3, in2, in1, in0, out3, out2, out1, out0 static const uint32_t mbpfc[] = { ISP_FC_OPMAP(0x01, 0x01), /* 0x00: MBOX_NO_OP */ ISP_FC_OPMAP(0x1f, 0x01), /* 0x01: MBOX_LOAD_RAM */ ISP_FC_OPMAP(0x0f, 0x01), /* 0x02: MBOX_EXEC_FIRMWARE */ ISP_FC_OPMAP(0xdf, 0x01), /* 0x03: MBOX_DUMP_RAM */ ISP_FC_OPMAP(0x07, 0x07), /* 0x04: MBOX_WRITE_RAM_WORD */ ISP_FC_OPMAP(0x03, 0x07), /* 0x05: MBOX_READ_RAM_WORD */ ISP_FC_OPMAP_FULL(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff), /* 0x06: MBOX_MAILBOX_REG_TEST */ ISP_FC_OPMAP(0x07, 0x07), /* 0x07: MBOX_VERIFY_CHECKSUM */ ISP_FC_OPMAP_FULL(0x0, 0x0, 0x0, 0x01, 0x0, 0x3, 0x80, 0x7f), /* 0x08: MBOX_ABOUT_FIRMWARE */ ISP_FC_OPMAP(0xdf, 0x01), /* 0x09: MBOX_LOAD_RISC_RAM_2100 */ ISP_FC_OPMAP(0xdf, 0x01), /* 0x0a: DUMP RAM */ ISP_FC_OPMAP_HALF(0x1, 0xff, 0x0, 0x01), /* 0x0b: MBOX_LOAD_RISC_RAM */ ISP_FC_OPMAP(0x00, 0x00), /* 0x0c: */ ISP_FC_OPMAP_HALF(0x1, 0x0f, 0x0, 0x01), /* 0x0d: MBOX_WRITE_RAM_WORD_EXTENDED */ ISP_FC_OPMAP(0x01, 0x05), /* 0x0e: MBOX_CHECK_FIRMWARE */ ISP_FC_OPMAP_HALF(0x1, 0x03, 0x0, 0x0d), /* 0x0f: MBOX_READ_RAM_WORD_EXTENDED */ ISP_FC_OPMAP(0x1f, 0x11), /* 0x10: MBOX_INIT_REQ_QUEUE */ ISP_FC_OPMAP(0x2f, 0x21), /* 0x11: MBOX_INIT_RES_QUEUE */ ISP_FC_OPMAP(0x0f, 0x01), /* 0x12: MBOX_EXECUTE_IOCB */ ISP_FC_OPMAP(0x03, 0x03), /* 0x13: MBOX_WAKE_UP */ ISP_FC_OPMAP(0x01, 0xff), /* 0x14: MBOX_STOP_FIRMWARE */ ISP_FC_OPMAP(0x4f, 0x01), /* 0x15: MBOX_ABORT */ ISP_FC_OPMAP(0x07, 0x01), /* 0x16: MBOX_ABORT_DEVICE */ ISP_FC_OPMAP(0x07, 0x01), /* 0x17: MBOX_ABORT_TARGET */ ISP_FC_OPMAP(0x03, 0x03), /* 0x18: MBOX_BUS_RESET */ ISP_FC_OPMAP(0x07, 0x05), /* 0x19: MBOX_STOP_QUEUE */ ISP_FC_OPMAP(0x07, 0x05), /* 0x1a: MBOX_START_QUEUE */ ISP_FC_OPMAP(0x07, 0x05), /* 0x1b: MBOX_SINGLE_STEP_QUEUE */ ISP_FC_OPMAP(0x07, 0x05), /* 0x1c: MBOX_ABORT_QUEUE */ ISP_FC_OPMAP(0x07, 0x03), /* 0x1d: MBOX_GET_DEV_QUEUE_STATUS */ ISP_FC_OPMAP(0x00, 0x00), /* 0x1e: */ ISP_FC_OPMAP(0x01, 0x07), /* 0x1f: MBOX_GET_FIRMWARE_STATUS */ ISP_FC_OPMAP_HALF(0x2, 0x01, 0x7e, 0xcf), /* 0x20: MBOX_GET_LOOP_ID */ ISP_FC_OPMAP(0x00, 0x00), /* 0x21: */ ISP_FC_OPMAP(0x01, 0x07), /* 0x22: MBOX_GET_RETRY_COUNT */ ISP_FC_OPMAP(0x00, 0x00), /* 0x23: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x24: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x25: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x26: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x27: */ ISP_FC_OPMAP(0x01, 0x03), /* 0x28: MBOX_GET_FIRMWARE_OPTIONS */ ISP_FC_OPMAP(0x03, 0x07), /* 0x29: MBOX_GET_PORT_QUEUE_PARAMS */ ISP_FC_OPMAP(0x00, 0x00), /* 0x2a: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x2b: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x2c: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x2d: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x2e: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x2f: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x30: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x31: */ ISP_FC_OPMAP(0x07, 0x07), /* 0x32: MBOX_SET_RETRY_COUNT */ ISP_FC_OPMAP(0x00, 0x00), /* 0x33: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x34: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x35: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x36: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x37: */ ISP_FC_OPMAP(0x0f, 0x01), /* 0x38: MBOX_SET_FIRMWARE_OPTIONS */ ISP_FC_OPMAP(0x0f, 0x07), /* 0x39: MBOX_SET_PORT_QUEUE_PARAMS */ ISP_FC_OPMAP(0x00, 0x00), /* 0x3a: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x3b: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x3c: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x3d: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x3e: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x3f: */ ISP_FC_OPMAP(0x03, 0x01), /* 0x40: MBOX_LOOP_PORT_BYPASS */ ISP_FC_OPMAP(0x03, 0x01), /* 0x41: MBOX_LOOP_PORT_ENABLE */ ISP_FC_OPMAP_HALF(0x0, 0x01, 0x3, 0xcf), /* 0x42: MBOX_GET_RESOURCE_COUNT */ ISP_FC_OPMAP(0x01, 0x01), /* 0x43: MBOX_REQUEST_OFFLINE_MODE */ ISP_FC_OPMAP(0x00, 0x00), /* 0x44: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x45: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x46: */ ISP_FC_OPMAP(0xcf, 0x03), /* 0x47: GET PORT_DATABASE ENHANCED */ ISP_FC_OPMAP(0xcf, 0x0f), /* 0x48: MBOX_INIT_FIRMWARE_MULTI_ID */ ISP_FC_OPMAP(0xcd, 0x01), /* 0x49: MBOX_GET_VP_DATABASE */ ISP_FC_OPMAP_HALF(0x2, 0xcd, 0x0, 0x01), /* 0x4a: MBOX_GET_VP_DATABASE_ENTRY */ ISP_FC_OPMAP(0x00, 0x00), /* 0x4b: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x4c: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x4d: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x4e: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x4f: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x50: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x51: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x52: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x53: */ ISP_FC_OPMAP(0xcf, 0x01), /* 0x54: EXECUTE IOCB A64 */ ISP_FC_OPMAP(0x00, 0x00), /* 0x55: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x56: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x57: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x58: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x59: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x5a: */ ISP_FC_OPMAP(0x03, 0x01), /* 0x5b: MBOX_DRIVER_HEARTBEAT */ ISP_FC_OPMAP(0xcf, 0x01), /* 0x5c: MBOX_FW_HEARTBEAT */ ISP_FC_OPMAP(0x07, 0x03), /* 0x5d: MBOX_GET_SET_DATA_RATE */ ISP_FC_OPMAP(0x00, 0x00), /* 0x5e: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x5f: */ ISP_FC_OPMAP(0xcf, 0x0f), /* 0x60: MBOX_INIT_FIRMWARE */ ISP_FC_OPMAP(0x00, 0x00), /* 0x61: */ ISP_FC_OPMAP(0x01, 0x01), /* 0x62: MBOX_INIT_LIP */ ISP_FC_OPMAP(0xcd, 0x03), /* 0x63: MBOX_GET_FC_AL_POSITION_MAP */ ISP_FC_OPMAP(0xcf, 0x01), /* 0x64: MBOX_GET_PORT_DB */ ISP_FC_OPMAP(0x07, 0x01), /* 0x65: MBOX_CLEAR_ACA */ ISP_FC_OPMAP(0x07, 0x01), /* 0x66: MBOX_TARGET_RESET */ ISP_FC_OPMAP(0x07, 0x01), /* 0x67: MBOX_CLEAR_TASK_SET */ ISP_FC_OPMAP(0x07, 0x01), /* 0x68: MBOX_ABORT_TASK_SET */ ISP_FC_OPMAP(0x01, 0x07), /* 0x69: MBOX_GET_FW_STATE */ ISP_FC_OPMAP_HALF(0x6, 0x03, 0x0, 0xcf), /* 0x6a: MBOX_GET_PORT_NAME */ ISP_FC_OPMAP(0xcf, 0x01), /* 0x6b: MBOX_GET_LINK_STATUS */ ISP_FC_OPMAP(0x0f, 0x01), /* 0x6c: MBOX_INIT_LIP_RESET */ ISP_FC_OPMAP(0x00, 0x00), /* 0x6d: */ ISP_FC_OPMAP(0xcf, 0x03), /* 0x6e: MBOX_SEND_SNS */ ISP_FC_OPMAP(0x0f, 0x07), /* 0x6f: MBOX_FABRIC_LOGIN */ ISP_FC_OPMAP(0x03, 0x01), /* 0x70: MBOX_SEND_CHANGE_REQUEST */ ISP_FC_OPMAP(0x03, 0x03), /* 0x71: MBOX_FABRIC_LOGOUT */ ISP_FC_OPMAP(0x0f, 0x0f), /* 0x72: MBOX_INIT_LIP_LOGIN */ ISP_FC_OPMAP(0x00, 0x00), /* 0x73: */ ISP_FC_OPMAP(0x07, 0x01), /* 0x74: LOGIN LOOP PORT */ ISP_FC_OPMAP_HALF(0x03, 0xcf, 0x00, 0x07), /* 0x75: GET PORT/NODE NAME LIST */ ISP_FC_OPMAP(0x4f, 0x01), /* 0x76: SET VENDOR ID */ ISP_FC_OPMAP(0xcd, 0x01), /* 0x77: INITIALIZE IP MAILBOX */ ISP_FC_OPMAP(0x00, 0x00), /* 0x78: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x79: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x7a: */ ISP_FC_OPMAP(0x00, 0x00), /* 0x7b: */ ISP_FC_OPMAP_HALF(0x03, 0x4f, 0x00, 0x07), /* 0x7c: Get ID List */ ISP_FC_OPMAP(0xcf, 0x01), /* 0x7d: SEND LFA */ ISP_FC_OPMAP(0x0f, 0x01) /* 0x7e: LUN RESET */ }; #define MAX_FC_OPCODE 0x7e /* * Footnotes * * (1): this sets bits 21..16 in mailbox register #8, which we nominally * do not access at this time in the core driver. The caller is * responsible for setting this register first (Gross!). The assumption * is that we won't overflow. */ static const char *fc_mbcmd_names[] = { "NO-OP", "LOAD RAM", "EXEC FIRMWARE", "DUMP RAM", "WRITE RAM WORD", "READ RAM WORD", "MAILBOX REG TEST", "VERIFY CHECKSUM", "ABOUT FIRMWARE", "LOAD RAM (2100)", "DUMP RAM", "LOAD RISC RAM", NULL, "WRITE RAM WORD EXTENDED", "CHECK FIRMWARE", "READ RAM WORD EXTENDED", "INIT REQUEST QUEUE", "INIT RESULT QUEUE", "EXECUTE IOCB", "WAKE UP", "STOP FIRMWARE", "ABORT", "ABORT DEVICE", "ABORT TARGET", "BUS RESET", "STOP QUEUE", "START QUEUE", "SINGLE STEP QUEUE", "ABORT QUEUE", "GET DEV QUEUE STATUS", NULL, "GET FIRMWARE STATUS", "GET LOOP ID", NULL, "GET RETRY COUNT", NULL, NULL, NULL, NULL, NULL, "GET FIRMWARE OPTIONS", "GET PORT QUEUE PARAMS", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "SET RETRY COUNT", NULL, NULL, NULL, NULL, NULL, "SET FIRMWARE OPTIONS", "SET PORT QUEUE PARAMS", NULL, NULL, NULL, NULL, NULL, NULL, "LOOP PORT BYPASS", "LOOP PORT ENABLE", "GET RESOURCE COUNT", "REQUEST NON PARTICIPATING MODE", NULL, NULL, NULL, "GET PORT DATABASE ENHANCED", "INIT FIRMWARE MULTI ID", "GET VP DATABASE", "GET VP DATABASE ENTRY", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "EXECUTE IOCB A64", NULL, NULL, NULL, NULL, NULL, NULL, "DRIVER HEARTBEAT", NULL, "GET/SET DATA RATE", NULL, NULL, "INIT FIRMWARE", NULL, "INIT LIP", "GET FC-AL POSITION MAP", "GET PORT DATABASE", "CLEAR ACA", "TARGET RESET", "CLEAR TASK SET", "ABORT TASK SET", "GET FW STATE", "GET PORT NAME", "GET LINK STATUS", "INIT LIP RESET", NULL, "SEND SNS", "FABRIC LOGIN", "SEND CHANGE REQUEST", "FABRIC LOGOUT", "INIT LIP LOGIN", NULL, "LOGIN LOOP PORT", "GET PORT/NODE NAME LIST", "SET VENDOR ID", "INITIALIZE IP MAILBOX", NULL, NULL, NULL, NULL, "Get ID List", "SEND LFA", "Lun RESET" }; static void isp_mboxcmd_qnw(ispsoftc_t *isp, mbreg_t *mbp, int nodelay) { unsigned int ibits, obits, box, opcode; opcode = mbp->param[0]; if (IS_FC(isp)) { ibits = ISP_FC_IBITS(opcode); obits = ISP_FC_OBITS(opcode); } else { ibits = ISP_SCSI_IBITS(opcode); obits = ISP_SCSI_OBITS(opcode); } ibits |= mbp->ibits; obits |= mbp->obits; for (box = 0; box < ISP_NMBOX(isp); box++) { if (ibits & (1 << box)) { ISP_WRITE(isp, MBOX_OFF(box), mbp->param[box]); } if (nodelay == 0) { isp->isp_mboxtmp[box] = mbp->param[box] = 0; } } if (nodelay == 0) { isp->isp_lastmbxcmd = opcode; isp->isp_obits = obits; isp->isp_mboxbsy = 1; } if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_SET_HOST_INT); } else { ISP_WRITE(isp, HCCR, HCCR_CMD_SET_HOST_INT); } /* * Oddly enough, if we're not delaying for an answer, * delay a bit to give the f/w a chance to pick up the * command. */ if (nodelay) { ISP_DELAY(1000); } } static void isp_mboxcmd(ispsoftc_t *isp, mbreg_t *mbp) { const char *cname, *xname, *sname; char tname[16], mname[16]; unsigned int ibits, obits, box, opcode; opcode = mbp->param[0]; if (IS_FC(isp)) { if (opcode > MAX_FC_OPCODE) { mbp->param[0] = MBOX_INVALID_COMMAND; isp_prt(isp, ISP_LOGERR, "Unknown Command 0x%x", opcode); return; } cname = fc_mbcmd_names[opcode]; ibits = ISP_FC_IBITS(opcode); obits = ISP_FC_OBITS(opcode); } else { if (opcode > MAX_SCSI_OPCODE) { mbp->param[0] = MBOX_INVALID_COMMAND; isp_prt(isp, ISP_LOGERR, "Unknown Command 0x%x", opcode); return; } cname = scsi_mbcmd_names[opcode]; ibits = ISP_SCSI_IBITS(opcode); obits = ISP_SCSI_OBITS(opcode); } if (cname == NULL) { cname = tname; ISP_SNPRINTF(tname, sizeof tname, "opcode %x", opcode); } isp_prt(isp, ISP_LOGDEBUG3, "Mailbox Command '%s'", cname); /* * Pick up any additional bits that the caller might have set. */ ibits |= mbp->ibits; obits |= mbp->obits; /* * Mask any bits that the caller wants us to mask */ ibits &= mbp->ibitm; obits &= mbp->obitm; if (ibits == 0 && obits == 0) { mbp->param[0] = MBOX_COMMAND_PARAM_ERROR; isp_prt(isp, ISP_LOGERR, "no parameters for 0x%x", opcode); return; } /* * Get exclusive usage of mailbox registers. */ if (MBOX_ACQUIRE(isp)) { mbp->param[0] = MBOX_REGS_BUSY; goto out; } for (box = 0; box < ISP_NMBOX(isp); box++) { if (ibits & (1 << box)) { isp_prt(isp, ISP_LOGDEBUG3, "IN mbox %d = 0x%04x", box, mbp->param[box]); ISP_WRITE(isp, MBOX_OFF(box), mbp->param[box]); } isp->isp_mboxtmp[box] = mbp->param[box] = 0; } isp->isp_lastmbxcmd = opcode; /* * We assume that we can't overwrite a previous command. */ isp->isp_obits = obits; isp->isp_mboxbsy = 1; /* * Set Host Interrupt condition so that RISC will pick up mailbox regs. */ if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_SET_HOST_INT); } else { ISP_WRITE(isp, HCCR, HCCR_CMD_SET_HOST_INT); } /* * While we haven't finished the command, spin our wheels here. */ MBOX_WAIT_COMPLETE(isp, mbp); /* * Did the command time out? */ if (mbp->param[0] == MBOX_TIMEOUT) { isp->isp_mboxbsy = 0; MBOX_RELEASE(isp); goto out; } /* * Copy back output registers. */ for (box = 0; box < ISP_NMBOX(isp); box++) { if (obits & (1 << box)) { mbp->param[box] = isp->isp_mboxtmp[box]; isp_prt(isp, ISP_LOGDEBUG3, "OUT mbox %d = 0x%04x", box, mbp->param[box]); } } isp->isp_mboxbsy = 0; MBOX_RELEASE(isp); out: if (mbp->logval == 0 || mbp->param[0] == MBOX_COMMAND_COMPLETE) return; if ((mbp->param[0] & 0xbfe0) == 0 && (mbp->logval & MBLOGMASK(mbp->param[0])) == 0) return; xname = NULL; sname = ""; switch (mbp->param[0]) { case MBOX_INVALID_COMMAND: xname = "INVALID COMMAND"; break; case MBOX_HOST_INTERFACE_ERROR: xname = "HOST INTERFACE ERROR"; break; case MBOX_TEST_FAILED: xname = "TEST FAILED"; break; case MBOX_COMMAND_ERROR: xname = "COMMAND ERROR"; ISP_SNPRINTF(mname, sizeof(mname), " subcode 0x%x", mbp->param[1]); sname = mname; break; case MBOX_COMMAND_PARAM_ERROR: xname = "COMMAND PARAMETER ERROR"; break; case MBOX_PORT_ID_USED: xname = "PORT ID ALREADY IN USE"; break; case MBOX_LOOP_ID_USED: xname = "LOOP ID ALREADY IN USE"; break; case MBOX_ALL_IDS_USED: xname = "ALL LOOP IDS IN USE"; break; case MBOX_NOT_LOGGED_IN: xname = "NOT LOGGED IN"; break; case MBOX_LINK_DOWN_ERROR: xname = "LINK DOWN ERROR"; break; case MBOX_LOOPBACK_ERROR: xname = "LOOPBACK ERROR"; break; case MBOX_CHECKSUM_ERROR: xname = "CHECKSUM ERROR"; break; case MBOX_INVALID_PRODUCT_KEY: xname = "INVALID PRODUCT KEY"; break; case MBOX_REGS_BUSY: xname = "REGISTERS BUSY"; break; case MBOX_TIMEOUT: xname = "TIMEOUT"; break; default: ISP_SNPRINTF(mname, sizeof mname, "error 0x%x", mbp->param[0]); xname = mname; break; } if (xname) { isp_prt(isp, ISP_LOGALL, "Mailbox Command '%s' failed (%s%s)", cname, xname, sname); } } static void isp_fw_state(ispsoftc_t *isp, int chan) { if (IS_FC(isp)) { mbreg_t mbs; fcparam *fcp = FCPARAM(isp, chan); MBSINIT(&mbs, MBOX_GET_FW_STATE, MBLOGALL, 0); isp_mboxcmd(isp, &mbs); if (mbs.param[0] == MBOX_COMMAND_COMPLETE) { fcp->isp_fwstate = mbs.param[1]; } } } static void isp_spi_update(ispsoftc_t *isp, int chan) { int tgt; mbreg_t mbs; sdparam *sdp; if (IS_FC(isp)) { /* * There are no 'per-bus' settings for Fibre Channel. */ return; } sdp = SDPARAM(isp, chan); sdp->update = 0; for (tgt = 0; tgt < MAX_TARGETS; tgt++) { uint16_t flags, period, offset; int get; if (sdp->isp_devparam[tgt].dev_enable == 0) { sdp->isp_devparam[tgt].dev_update = 0; sdp->isp_devparam[tgt].dev_refresh = 0; isp_prt(isp, ISP_LOGDEBUG0, "skipping target %d bus %d update", tgt, chan); continue; } /* * If the goal is to update the status of the device, * take what's in goal_flags and try and set the device * toward that. Otherwise, if we're just refreshing the * current device state, get the current parameters. */ MBSINIT(&mbs, 0, MBLOGALL, 0); /* * Refresh overrides set */ if (sdp->isp_devparam[tgt].dev_refresh) { mbs.param[0] = MBOX_GET_TARGET_PARAMS; get = 1; } else if (sdp->isp_devparam[tgt].dev_update) { mbs.param[0] = MBOX_SET_TARGET_PARAMS; /* * Make sure goal_flags has "Renegotiate on Error" * on and "Freeze Queue on Error" off. */ sdp->isp_devparam[tgt].goal_flags |= DPARM_RENEG; sdp->isp_devparam[tgt].goal_flags &= ~DPARM_QFRZ; mbs.param[2] = sdp->isp_devparam[tgt].goal_flags; /* * Insist that PARITY must be enabled * if SYNC or WIDE is enabled. */ if ((mbs.param[2] & (DPARM_SYNC|DPARM_WIDE)) != 0) { mbs.param[2] |= DPARM_PARITY; } if (mbs.param[2] & DPARM_SYNC) { mbs.param[3] = (sdp->isp_devparam[tgt].goal_offset << 8) | (sdp->isp_devparam[tgt].goal_period); } /* * A command completion later that has * RQSTF_NEGOTIATION set can cause * the dev_refresh/announce cycle also. * * Note: It is really important to update our current * flags with at least the state of TAG capabilities- * otherwise we might try and send a tagged command * when we have it all turned off. So change it here * to say that current already matches goal. */ sdp->isp_devparam[tgt].actv_flags &= ~DPARM_TQING; sdp->isp_devparam[tgt].actv_flags |= (sdp->isp_devparam[tgt].goal_flags & DPARM_TQING); isp_prt(isp, ISP_LOGDEBUG0, "bus %d set tgt %d flags 0x%x off 0x%x period 0x%x", chan, tgt, mbs.param[2], mbs.param[3] >> 8, mbs.param[3] & 0xff); get = 0; } else { continue; } mbs.param[1] = (chan << 15) | (tgt << 8); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { continue; } if (get == 0) { sdp->sendmarker = 1; sdp->isp_devparam[tgt].dev_update = 0; sdp->isp_devparam[tgt].dev_refresh = 1; } else { sdp->isp_devparam[tgt].dev_refresh = 0; flags = mbs.param[2]; period = mbs.param[3] & 0xff; offset = mbs.param[3] >> 8; sdp->isp_devparam[tgt].actv_flags = flags; sdp->isp_devparam[tgt].actv_period = period; sdp->isp_devparam[tgt].actv_offset = offset; isp_async(isp, ISPASYNC_NEW_TGT_PARAMS, chan, tgt); } } for (tgt = 0; tgt < MAX_TARGETS; tgt++) { if (sdp->isp_devparam[tgt].dev_update || sdp->isp_devparam[tgt].dev_refresh) { sdp->update = 1; break; } } } static void isp_setdfltsdparm(ispsoftc_t *isp) { int tgt; sdparam *sdp, *sdp1; sdp = SDPARAM(isp, 0); sdp->role = GET_DEFAULT_ROLE(isp, 0); if (IS_DUALBUS(isp)) { sdp1 = sdp + 1; sdp1->role = GET_DEFAULT_ROLE(isp, 1); } else { sdp1 = NULL; } /* * Establish some default parameters. */ sdp->isp_cmd_dma_burst_enable = 0; sdp->isp_data_dma_burst_enabl = 1; sdp->isp_fifo_threshold = 0; sdp->isp_initiator_id = DEFAULT_IID(isp, 0); if (isp->isp_type >= ISP_HA_SCSI_1040) { sdp->isp_async_data_setup = 9; } else { sdp->isp_async_data_setup = 6; } sdp->isp_selection_timeout = 250; sdp->isp_max_queue_depth = MAXISPREQUEST(isp); sdp->isp_tag_aging = 8; sdp->isp_bus_reset_delay = 5; /* * Don't retry selection, busy or queue full automatically- reflect * these back to us. */ sdp->isp_retry_count = 0; sdp->isp_retry_delay = 0; for (tgt = 0; tgt < MAX_TARGETS; tgt++) { sdp->isp_devparam[tgt].exc_throttle = ISP_EXEC_THROTTLE; sdp->isp_devparam[tgt].dev_enable = 1; } /* * The trick here is to establish a default for the default (honk!) * state (goal_flags). Then try and get the current status from * the card to fill in the current state. We don't, in fact, set * the default to the SAFE default state- that's not the goal state. */ for (tgt = 0; tgt < MAX_TARGETS; tgt++) { uint8_t off, per; sdp->isp_devparam[tgt].actv_offset = 0; sdp->isp_devparam[tgt].actv_period = 0; sdp->isp_devparam[tgt].actv_flags = 0; sdp->isp_devparam[tgt].goal_flags = sdp->isp_devparam[tgt].nvrm_flags = DPARM_DEFAULT; /* * We default to Wide/Fast for versions less than a 1040 * (unless it's SBus). */ if (IS_ULTRA3(isp)) { off = ISP_80M_SYNCPARMS >> 8; per = ISP_80M_SYNCPARMS & 0xff; } else if (IS_ULTRA2(isp)) { off = ISP_40M_SYNCPARMS >> 8; per = ISP_40M_SYNCPARMS & 0xff; } else if (IS_1240(isp)) { off = ISP_20M_SYNCPARMS >> 8; per = ISP_20M_SYNCPARMS & 0xff; } else if ((isp->isp_bustype == ISP_BT_SBUS && isp->isp_type < ISP_HA_SCSI_1020A) || (isp->isp_bustype == ISP_BT_PCI && isp->isp_type < ISP_HA_SCSI_1040) || (isp->isp_clock && isp->isp_clock < 60) || (sdp->isp_ultramode == 0)) { off = ISP_10M_SYNCPARMS >> 8; per = ISP_10M_SYNCPARMS & 0xff; } else { off = ISP_20M_SYNCPARMS_1040 >> 8; per = ISP_20M_SYNCPARMS_1040 & 0xff; } sdp->isp_devparam[tgt].goal_offset = sdp->isp_devparam[tgt].nvrm_offset = off; sdp->isp_devparam[tgt].goal_period = sdp->isp_devparam[tgt].nvrm_period = per; } /* * If we're a dual bus card, just copy the data over */ if (sdp1) { *sdp1 = *sdp; sdp1->isp_initiator_id = DEFAULT_IID(isp, 1); } /* * If we've not been told to avoid reading NVRAM, try and read it. * If we're successful reading it, we can then return because NVRAM * will tell us what the desired settings are. Otherwise, we establish * some reasonable 'fake' nvram and goal defaults. */ if ((isp->isp_confopts & ISP_CFG_NONVRAM) == 0) { mbreg_t mbs; if (isp_read_nvram(isp, 0) == 0) { if (IS_DUALBUS(isp)) { if (isp_read_nvram(isp, 1) == 0) { return; } } } MBSINIT(&mbs, MBOX_GET_ACT_NEG_STATE, MBLOGNONE, 0); isp_mboxcmd(isp, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { sdp->isp_req_ack_active_neg = 1; sdp->isp_data_line_active_neg = 1; if (sdp1) { sdp1->isp_req_ack_active_neg = 1; sdp1->isp_data_line_active_neg = 1; } } else { sdp->isp_req_ack_active_neg = (mbs.param[1] >> 4) & 0x1; sdp->isp_data_line_active_neg = (mbs.param[1] >> 5) & 0x1; if (sdp1) { sdp1->isp_req_ack_active_neg = (mbs.param[2] >> 4) & 0x1; sdp1->isp_data_line_active_neg = (mbs.param[2] >> 5) & 0x1; } } } } static void isp_setdfltfcparm(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); /* * Establish some default parameters. */ fcp->role = GET_DEFAULT_ROLE(isp, chan); fcp->isp_maxalloc = ICB_DFLT_ALLOC; fcp->isp_retry_delay = ICB_DFLT_RDELAY; fcp->isp_retry_count = ICB_DFLT_RCOUNT; fcp->isp_loopid = DEFAULT_LOOPID(isp, chan); fcp->isp_wwnn_nvram = DEFAULT_NODEWWN(isp, chan); fcp->isp_wwpn_nvram = DEFAULT_PORTWWN(isp, chan); fcp->isp_fwoptions = 0; fcp->isp_lasthdl = NIL_HANDLE; if (IS_24XX(isp)) { fcp->isp_fwoptions |= ICB2400_OPT1_FAIRNESS; fcp->isp_fwoptions |= ICB2400_OPT1_HARD_ADDRESS; if (isp->isp_confopts & ISP_CFG_FULL_DUPLEX) { fcp->isp_fwoptions |= ICB2400_OPT1_FULL_DUPLEX; } fcp->isp_fwoptions |= ICB2400_OPT1_BOTH_WWNS; } else { fcp->isp_fwoptions |= ICBOPT_FAIRNESS; fcp->isp_fwoptions |= ICBOPT_PDBCHANGE_AE; fcp->isp_fwoptions |= ICBOPT_HARD_ADDRESS; if (isp->isp_confopts & ISP_CFG_FULL_DUPLEX) { fcp->isp_fwoptions |= ICBOPT_FULL_DUPLEX; } /* * Make sure this is turned off now until we get * extended options from NVRAM */ fcp->isp_fwoptions &= ~ICBOPT_EXTENDED; } /* * Now try and read NVRAM unless told to not do so. * This will set fcparam's isp_wwnn_nvram && isp_wwpn_nvram. */ if ((isp->isp_confopts & ISP_CFG_NONVRAM) == 0) { int i, j = 0; /* * Give a couple of tries at reading NVRAM. */ for (i = 0; i < 2; i++) { j = isp_read_nvram(isp, chan); if (j == 0) { break; } } if (j) { isp->isp_confopts |= ISP_CFG_NONVRAM; } } fcp->isp_wwnn = ACTIVE_NODEWWN(isp, chan); fcp->isp_wwpn = ACTIVE_PORTWWN(isp, chan); isp_prt(isp, ISP_LOGCONFIG, "Chan %d 0x%08x%08x/0x%08x%08x Role %s", chan, (uint32_t) (fcp->isp_wwnn >> 32), (uint32_t) (fcp->isp_wwnn), (uint32_t) (fcp->isp_wwpn >> 32), (uint32_t) (fcp->isp_wwpn), isp_class3_roles[fcp->role]); } /* * Re-initialize the ISP and complete all orphaned commands * with a 'botched' notice. The reset/init routines should * not disturb an already active list of commands. */ int isp_reinit(ispsoftc_t *isp, int do_load_defaults) { int i, res = 0; if (isp->isp_state != ISP_RESETSTATE) isp_reset(isp, do_load_defaults); if (isp->isp_state != ISP_RESETSTATE) { res = EIO; isp_prt(isp, ISP_LOGERR, "%s: cannot reset card", __func__); ISP_DISABLE_INTS(isp); goto cleanup; } isp_init(isp); if (isp->isp_state > ISP_RESETSTATE && isp->isp_state != ISP_RUNSTATE) { res = EIO; isp_prt(isp, ISP_LOGERR, "%s: cannot init card", __func__); ISP_DISABLE_INTS(isp); if (IS_FC(isp)) { /* * If we're in ISP_ROLE_NONE, turn off the lasers. */ if (!IS_24XX(isp)) { ISP_WRITE(isp, BIU2100_CSR, BIU2100_FPM0_REGS); ISP_WRITE(isp, FPM_DIAG_CONFIG, FPM_SOFT_RESET); ISP_WRITE(isp, BIU2100_CSR, BIU2100_FB_REGS); ISP_WRITE(isp, FBM_CMD, FBMCMD_FIFO_RESET_ALL); ISP_WRITE(isp, BIU2100_CSR, BIU2100_RISC_REGS); } } } cleanup: isp->isp_nactive = 0; isp_clear_commands(isp); if (IS_FC(isp)) { for (i = 0; i < isp->isp_nchan; i++) ISP_MARK_PORTDB(isp, i, -1); } return (res); } /* * NVRAM Routines */ static int isp_read_nvram(ispsoftc_t *isp, int bus) { int i, amt, retval; uint8_t csum, minversion; union { uint8_t _x[ISP2400_NVRAM_SIZE]; uint16_t _s[ISP2400_NVRAM_SIZE>>1]; } _n; #define nvram_data _n._x #define nvram_words _n._s if (IS_24XX(isp)) { return (isp_read_nvram_2400(isp, nvram_data)); } else if (IS_FC(isp)) { amt = ISP2100_NVRAM_SIZE; minversion = 1; } else if (IS_ULTRA2(isp)) { amt = ISP1080_NVRAM_SIZE; minversion = 0; } else { amt = ISP_NVRAM_SIZE; minversion = 2; } for (i = 0; i < amt>>1; i++) { isp_rdnvram_word(isp, i, &nvram_words[i]); } if (nvram_data[0] != 'I' || nvram_data[1] != 'S' || nvram_data[2] != 'P') { if (isp->isp_bustype != ISP_BT_SBUS) { isp_prt(isp, ISP_LOGWARN, "invalid NVRAM header"); isp_prt(isp, ISP_LOGDEBUG0, "%x %x %x", nvram_data[0], nvram_data[1], nvram_data[2]); } retval = -1; goto out; } for (csum = 0, i = 0; i < amt; i++) { csum += nvram_data[i]; } if (csum != 0) { isp_prt(isp, ISP_LOGWARN, "invalid NVRAM checksum"); retval = -1; goto out; } if (ISP_NVRAM_VERSION(nvram_data) < minversion) { isp_prt(isp, ISP_LOGWARN, "version %d NVRAM not understood", ISP_NVRAM_VERSION(nvram_data)); retval = -1; goto out; } if (IS_ULTRA3(isp)) { isp_parse_nvram_12160(isp, bus, nvram_data); } else if (IS_1080(isp)) { isp_parse_nvram_1080(isp, bus, nvram_data); } else if (IS_1280(isp) || IS_1240(isp)) { isp_parse_nvram_1080(isp, bus, nvram_data); } else if (IS_SCSI(isp)) { isp_parse_nvram_1020(isp, nvram_data); } else { isp_parse_nvram_2100(isp, nvram_data); } retval = 0; out: return (retval); #undef nvram_data #undef nvram_words } static int isp_read_nvram_2400(ispsoftc_t *isp, uint8_t *nvram_data) { int retval = 0; uint32_t addr, csum, lwrds, *dptr; if (isp->isp_port) { addr = ISP2400_NVRAM_PORT1_ADDR; } else { addr = ISP2400_NVRAM_PORT0_ADDR; } dptr = (uint32_t *) nvram_data; for (lwrds = 0; lwrds < ISP2400_NVRAM_SIZE >> 2; lwrds++) { isp_rd_2400_nvram(isp, addr++, dptr++); } if (nvram_data[0] != 'I' || nvram_data[1] != 'S' || nvram_data[2] != 'P') { isp_prt(isp, ISP_LOGWARN, "invalid NVRAM header (%x %x %x)", nvram_data[0], nvram_data[1], nvram_data[2]); retval = -1; goto out; } dptr = (uint32_t *) nvram_data; for (csum = 0, lwrds = 0; lwrds < ISP2400_NVRAM_SIZE >> 2; lwrds++) { uint32_t tmp; ISP_IOXGET_32(isp, &dptr[lwrds], tmp); csum += tmp; } if (csum != 0) { isp_prt(isp, ISP_LOGWARN, "invalid NVRAM checksum"); retval = -1; goto out; } isp_parse_nvram_2400(isp, nvram_data); out: return (retval); } static void isp_rdnvram_word(ispsoftc_t *isp, int wo, uint16_t *rp) { int i, cbits; uint16_t bit, rqst, junk; ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT); ISP_DELAY(10); ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT|BIU_NVRAM_CLOCK); ISP_DELAY(10); if (IS_FC(isp)) { wo &= ((ISP2100_NVRAM_SIZE >> 1) - 1); if (IS_2312(isp) && isp->isp_port) { wo += 128; } rqst = (ISP_NVRAM_READ << 8) | wo; cbits = 10; } else if (IS_ULTRA2(isp)) { wo &= ((ISP1080_NVRAM_SIZE >> 1) - 1); rqst = (ISP_NVRAM_READ << 8) | wo; cbits = 10; } else { wo &= ((ISP_NVRAM_SIZE >> 1) - 1); rqst = (ISP_NVRAM_READ << 6) | wo; cbits = 8; } /* * Clock the word select request out... */ for (i = cbits; i >= 0; i--) { if ((rqst >> i) & 1) { bit = BIU_NVRAM_SELECT | BIU_NVRAM_DATAOUT; } else { bit = BIU_NVRAM_SELECT; } ISP_WRITE(isp, BIU_NVRAM, bit); ISP_DELAY(10); junk = ISP_READ(isp, BIU_NVRAM); /* force PCI flush */ ISP_WRITE(isp, BIU_NVRAM, bit | BIU_NVRAM_CLOCK); ISP_DELAY(10); junk = ISP_READ(isp, BIU_NVRAM); /* force PCI flush */ ISP_WRITE(isp, BIU_NVRAM, bit); ISP_DELAY(10); junk = ISP_READ(isp, BIU_NVRAM); /* force PCI flush */ } /* * Now read the result back in (bits come back in MSB format). */ *rp = 0; for (i = 0; i < 16; i++) { uint16_t rv; *rp <<= 1; ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT|BIU_NVRAM_CLOCK); ISP_DELAY(10); rv = ISP_READ(isp, BIU_NVRAM); if (rv & BIU_NVRAM_DATAIN) { *rp |= 1; } ISP_DELAY(10); ISP_WRITE(isp, BIU_NVRAM, BIU_NVRAM_SELECT); ISP_DELAY(10); junk = ISP_READ(isp, BIU_NVRAM); /* force PCI flush */ } ISP_WRITE(isp, BIU_NVRAM, 0); ISP_DELAY(10); junk = ISP_READ(isp, BIU_NVRAM); /* force PCI flush */ ISP_SWIZZLE_NVRAM_WORD(isp, rp); } static void isp_rd_2400_nvram(ispsoftc_t *isp, uint32_t addr, uint32_t *rp) { int loops = 0; uint32_t base = 0x7ffe0000; uint32_t tmp = 0; if (IS_25XX(isp)) { base = 0x7ff00000 | 0x48000; } ISP_WRITE(isp, BIU2400_FLASH_ADDR, base | addr); for (loops = 0; loops < 5000; loops++) { ISP_DELAY(10); tmp = ISP_READ(isp, BIU2400_FLASH_ADDR); if ((tmp & (1U << 31)) != 0) { break; } } if (tmp & (1U << 31)) { *rp = ISP_READ(isp, BIU2400_FLASH_DATA); ISP_SWIZZLE_NVRAM_LONG(isp, rp); } else { *rp = 0xffffffff; } } static void isp_parse_nvram_1020(ispsoftc_t *isp, uint8_t *nvram_data) { sdparam *sdp = SDPARAM(isp, 0); int tgt; sdp->isp_fifo_threshold = ISP_NVRAM_FIFO_THRESHOLD(nvram_data) | (ISP_NVRAM_FIFO_THRESHOLD_128(nvram_data) << 2); if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0) sdp->isp_initiator_id = ISP_NVRAM_INITIATOR_ID(nvram_data); sdp->isp_bus_reset_delay = ISP_NVRAM_BUS_RESET_DELAY(nvram_data); sdp->isp_retry_count = ISP_NVRAM_BUS_RETRY_COUNT(nvram_data); sdp->isp_retry_delay = ISP_NVRAM_BUS_RETRY_DELAY(nvram_data); sdp->isp_async_data_setup = ISP_NVRAM_ASYNC_DATA_SETUP_TIME(nvram_data); if (isp->isp_type >= ISP_HA_SCSI_1040) { if (sdp->isp_async_data_setup < 9) { sdp->isp_async_data_setup = 9; } } else { if (sdp->isp_async_data_setup != 6) { sdp->isp_async_data_setup = 6; } } sdp->isp_req_ack_active_neg = ISP_NVRAM_REQ_ACK_ACTIVE_NEGATION(nvram_data); sdp->isp_data_line_active_neg = ISP_NVRAM_DATA_LINE_ACTIVE_NEGATION(nvram_data); sdp->isp_data_dma_burst_enabl = ISP_NVRAM_DATA_DMA_BURST_ENABLE(nvram_data); sdp->isp_cmd_dma_burst_enable = ISP_NVRAM_CMD_DMA_BURST_ENABLE(nvram_data); sdp->isp_tag_aging = ISP_NVRAM_TAG_AGE_LIMIT(nvram_data); sdp->isp_selection_timeout = ISP_NVRAM_SELECTION_TIMEOUT(nvram_data); sdp->isp_max_queue_depth = ISP_NVRAM_MAX_QUEUE_DEPTH(nvram_data); sdp->isp_fast_mttr = ISP_NVRAM_FAST_MTTR_ENABLE(nvram_data); for (tgt = 0; tgt < MAX_TARGETS; tgt++) { sdp->isp_devparam[tgt].dev_enable = ISP_NVRAM_TGT_DEVICE_ENABLE(nvram_data, tgt); sdp->isp_devparam[tgt].exc_throttle = ISP_NVRAM_TGT_EXEC_THROTTLE(nvram_data, tgt); sdp->isp_devparam[tgt].nvrm_offset = ISP_NVRAM_TGT_SYNC_OFFSET(nvram_data, tgt); sdp->isp_devparam[tgt].nvrm_period = ISP_NVRAM_TGT_SYNC_PERIOD(nvram_data, tgt); /* * We probably shouldn't lie about this, but it * it makes it much safer if we limit NVRAM values * to sanity. */ if (isp->isp_type < ISP_HA_SCSI_1040) { /* * If we're not ultra, we can't possibly * be a shorter period than this. */ if (sdp->isp_devparam[tgt].nvrm_period < 0x19) { sdp->isp_devparam[tgt].nvrm_period = 0x19; } if (sdp->isp_devparam[tgt].nvrm_offset > 0xc) { sdp->isp_devparam[tgt].nvrm_offset = 0x0c; } } else { if (sdp->isp_devparam[tgt].nvrm_offset > 0x8) { sdp->isp_devparam[tgt].nvrm_offset = 0x8; } } sdp->isp_devparam[tgt].nvrm_flags = 0; if (ISP_NVRAM_TGT_RENEG(nvram_data, tgt)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_RENEG; sdp->isp_devparam[tgt].nvrm_flags |= DPARM_ARQ; if (ISP_NVRAM_TGT_TQING(nvram_data, tgt)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_TQING; if (ISP_NVRAM_TGT_SYNC(nvram_data, tgt)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_SYNC; if (ISP_NVRAM_TGT_WIDE(nvram_data, tgt)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_WIDE; if (ISP_NVRAM_TGT_PARITY(nvram_data, tgt)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_PARITY; if (ISP_NVRAM_TGT_DISC(nvram_data, tgt)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_DISC; sdp->isp_devparam[tgt].actv_flags = 0; /* we don't know */ sdp->isp_devparam[tgt].goal_offset = sdp->isp_devparam[tgt].nvrm_offset; sdp->isp_devparam[tgt].goal_period = sdp->isp_devparam[tgt].nvrm_period; sdp->isp_devparam[tgt].goal_flags = sdp->isp_devparam[tgt].nvrm_flags; } } static void isp_parse_nvram_1080(ispsoftc_t *isp, int bus, uint8_t *nvram_data) { sdparam *sdp = SDPARAM(isp, bus); int tgt; sdp->isp_fifo_threshold = ISP1080_NVRAM_FIFO_THRESHOLD(nvram_data); if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0) sdp->isp_initiator_id = ISP1080_NVRAM_INITIATOR_ID(nvram_data, bus); sdp->isp_bus_reset_delay = ISP1080_NVRAM_BUS_RESET_DELAY(nvram_data, bus); sdp->isp_retry_count = ISP1080_NVRAM_BUS_RETRY_COUNT(nvram_data, bus); sdp->isp_retry_delay = ISP1080_NVRAM_BUS_RETRY_DELAY(nvram_data, bus); sdp->isp_async_data_setup = ISP1080_NVRAM_ASYNC_DATA_SETUP_TIME(nvram_data, bus); sdp->isp_req_ack_active_neg = ISP1080_NVRAM_REQ_ACK_ACTIVE_NEGATION(nvram_data, bus); sdp->isp_data_line_active_neg = ISP1080_NVRAM_DATA_LINE_ACTIVE_NEGATION(nvram_data, bus); sdp->isp_data_dma_burst_enabl = ISP1080_NVRAM_BURST_ENABLE(nvram_data); sdp->isp_cmd_dma_burst_enable = ISP1080_NVRAM_BURST_ENABLE(nvram_data); sdp->isp_selection_timeout = ISP1080_NVRAM_SELECTION_TIMEOUT(nvram_data, bus); sdp->isp_max_queue_depth = ISP1080_NVRAM_MAX_QUEUE_DEPTH(nvram_data, bus); for (tgt = 0; tgt < MAX_TARGETS; tgt++) { sdp->isp_devparam[tgt].dev_enable = ISP1080_NVRAM_TGT_DEVICE_ENABLE(nvram_data, tgt, bus); sdp->isp_devparam[tgt].exc_throttle = ISP1080_NVRAM_TGT_EXEC_THROTTLE(nvram_data, tgt, bus); sdp->isp_devparam[tgt].nvrm_offset = ISP1080_NVRAM_TGT_SYNC_OFFSET(nvram_data, tgt, bus); sdp->isp_devparam[tgt].nvrm_period = ISP1080_NVRAM_TGT_SYNC_PERIOD(nvram_data, tgt, bus); sdp->isp_devparam[tgt].nvrm_flags = 0; if (ISP1080_NVRAM_TGT_RENEG(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_RENEG; sdp->isp_devparam[tgt].nvrm_flags |= DPARM_ARQ; if (ISP1080_NVRAM_TGT_TQING(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_TQING; if (ISP1080_NVRAM_TGT_SYNC(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_SYNC; if (ISP1080_NVRAM_TGT_WIDE(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_WIDE; if (ISP1080_NVRAM_TGT_PARITY(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_PARITY; if (ISP1080_NVRAM_TGT_DISC(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_DISC; sdp->isp_devparam[tgt].actv_flags = 0; sdp->isp_devparam[tgt].goal_offset = sdp->isp_devparam[tgt].nvrm_offset; sdp->isp_devparam[tgt].goal_period = sdp->isp_devparam[tgt].nvrm_period; sdp->isp_devparam[tgt].goal_flags = sdp->isp_devparam[tgt].nvrm_flags; } } static void isp_parse_nvram_12160(ispsoftc_t *isp, int bus, uint8_t *nvram_data) { sdparam *sdp = SDPARAM(isp, bus); int tgt; sdp->isp_fifo_threshold = ISP12160_NVRAM_FIFO_THRESHOLD(nvram_data); if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0) sdp->isp_initiator_id = ISP12160_NVRAM_INITIATOR_ID(nvram_data, bus); sdp->isp_bus_reset_delay = ISP12160_NVRAM_BUS_RESET_DELAY(nvram_data, bus); sdp->isp_retry_count = ISP12160_NVRAM_BUS_RETRY_COUNT(nvram_data, bus); sdp->isp_retry_delay = ISP12160_NVRAM_BUS_RETRY_DELAY(nvram_data, bus); sdp->isp_async_data_setup = ISP12160_NVRAM_ASYNC_DATA_SETUP_TIME(nvram_data, bus); sdp->isp_req_ack_active_neg = ISP12160_NVRAM_REQ_ACK_ACTIVE_NEGATION(nvram_data, bus); sdp->isp_data_line_active_neg = ISP12160_NVRAM_DATA_LINE_ACTIVE_NEGATION(nvram_data, bus); sdp->isp_data_dma_burst_enabl = ISP12160_NVRAM_BURST_ENABLE(nvram_data); sdp->isp_cmd_dma_burst_enable = ISP12160_NVRAM_BURST_ENABLE(nvram_data); sdp->isp_selection_timeout = ISP12160_NVRAM_SELECTION_TIMEOUT(nvram_data, bus); sdp->isp_max_queue_depth = ISP12160_NVRAM_MAX_QUEUE_DEPTH(nvram_data, bus); for (tgt = 0; tgt < MAX_TARGETS; tgt++) { sdp->isp_devparam[tgt].dev_enable = ISP12160_NVRAM_TGT_DEVICE_ENABLE(nvram_data, tgt, bus); sdp->isp_devparam[tgt].exc_throttle = ISP12160_NVRAM_TGT_EXEC_THROTTLE(nvram_data, tgt, bus); sdp->isp_devparam[tgt].nvrm_offset = ISP12160_NVRAM_TGT_SYNC_OFFSET(nvram_data, tgt, bus); sdp->isp_devparam[tgt].nvrm_period = ISP12160_NVRAM_TGT_SYNC_PERIOD(nvram_data, tgt, bus); sdp->isp_devparam[tgt].nvrm_flags = 0; if (ISP12160_NVRAM_TGT_RENEG(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_RENEG; sdp->isp_devparam[tgt].nvrm_flags |= DPARM_ARQ; if (ISP12160_NVRAM_TGT_TQING(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_TQING; if (ISP12160_NVRAM_TGT_SYNC(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_SYNC; if (ISP12160_NVRAM_TGT_WIDE(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_WIDE; if (ISP12160_NVRAM_TGT_PARITY(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_PARITY; if (ISP12160_NVRAM_TGT_DISC(nvram_data, tgt, bus)) sdp->isp_devparam[tgt].nvrm_flags |= DPARM_DISC; sdp->isp_devparam[tgt].actv_flags = 0; sdp->isp_devparam[tgt].goal_offset = sdp->isp_devparam[tgt].nvrm_offset; sdp->isp_devparam[tgt].goal_period = sdp->isp_devparam[tgt].nvrm_period; sdp->isp_devparam[tgt].goal_flags = sdp->isp_devparam[tgt].nvrm_flags; } } static void isp_parse_nvram_2100(ispsoftc_t *isp, uint8_t *nvram_data) { fcparam *fcp = FCPARAM(isp, 0); uint64_t wwn; /* * There is NVRAM storage for both Port and Node entities- * but the Node entity appears to be unused on all the cards * I can find. However, we should account for this being set * at some point in the future. * * Qlogic WWNs have an NAA of 2, but usually nothing shows up in * bits 48..60. In the case of the 2202, it appears that they do * use bit 48 to distinguish between the two instances on the card. * The 2204, which I've never seen, *probably* extends this method. */ wwn = ISP2100_NVRAM_PORT_NAME(nvram_data); if (wwn) { isp_prt(isp, ISP_LOGCONFIG, "NVRAM Port WWN 0x%08x%08x", (uint32_t) (wwn >> 32), (uint32_t) (wwn)); if ((wwn >> 60) == 0) { wwn |= (((uint64_t) 2)<< 60); } } fcp->isp_wwpn_nvram = wwn; if (IS_2200(isp) || IS_23XX(isp)) { wwn = ISP2100_NVRAM_NODE_NAME(nvram_data); if (wwn) { isp_prt(isp, ISP_LOGCONFIG, "NVRAM Node WWN 0x%08x%08x", (uint32_t) (wwn >> 32), (uint32_t) (wwn)); if ((wwn >> 60) == 0) { wwn |= (((uint64_t) 2)<< 60); } } else { wwn = fcp->isp_wwpn_nvram & ~((uint64_t) 0xfff << 48); } } else { wwn &= ~((uint64_t) 0xfff << 48); } fcp->isp_wwnn_nvram = wwn; fcp->isp_maxalloc = ISP2100_NVRAM_MAXIOCBALLOCATION(nvram_data); if ((isp->isp_confopts & ISP_CFG_OWNFSZ) == 0) { DEFAULT_FRAMESIZE(isp) = ISP2100_NVRAM_MAXFRAMELENGTH(nvram_data); } fcp->isp_retry_delay = ISP2100_NVRAM_RETRY_DELAY(nvram_data); fcp->isp_retry_count = ISP2100_NVRAM_RETRY_COUNT(nvram_data); if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0) { fcp->isp_loopid = ISP2100_NVRAM_HARDLOOPID(nvram_data); } if ((isp->isp_confopts & ISP_CFG_OWNEXCTHROTTLE) == 0) { DEFAULT_EXEC_THROTTLE(isp) = ISP2100_NVRAM_EXECUTION_THROTTLE(nvram_data); } fcp->isp_fwoptions = ISP2100_NVRAM_OPTIONS(nvram_data); isp_prt(isp, ISP_LOGDEBUG0, "NVRAM 0x%08x%08x 0x%08x%08x maxalloc %d maxframelen %d", (uint32_t) (fcp->isp_wwnn_nvram >> 32), (uint32_t) fcp->isp_wwnn_nvram, (uint32_t) (fcp->isp_wwpn_nvram >> 32), (uint32_t) fcp->isp_wwpn_nvram, ISP2100_NVRAM_MAXIOCBALLOCATION(nvram_data), ISP2100_NVRAM_MAXFRAMELENGTH(nvram_data)); isp_prt(isp, ISP_LOGDEBUG0, "execthrottle %d fwoptions 0x%x hardloop %d tov %d", ISP2100_NVRAM_EXECUTION_THROTTLE(nvram_data), ISP2100_NVRAM_OPTIONS(nvram_data), ISP2100_NVRAM_HARDLOOPID(nvram_data), ISP2100_NVRAM_TOV(nvram_data)); fcp->isp_xfwoptions = ISP2100_XFW_OPTIONS(nvram_data); fcp->isp_zfwoptions = ISP2100_ZFW_OPTIONS(nvram_data); isp_prt(isp, ISP_LOGDEBUG0, "xfwoptions 0x%x zfw options 0x%x", ISP2100_XFW_OPTIONS(nvram_data), ISP2100_ZFW_OPTIONS(nvram_data)); } static void isp_parse_nvram_2400(ispsoftc_t *isp, uint8_t *nvram_data) { fcparam *fcp = FCPARAM(isp, 0); uint64_t wwn; isp_prt(isp, ISP_LOGDEBUG0, "NVRAM 0x%08x%08x 0x%08x%08x exchg_cnt %d maxframelen %d", (uint32_t) (ISP2400_NVRAM_NODE_NAME(nvram_data) >> 32), (uint32_t) (ISP2400_NVRAM_NODE_NAME(nvram_data)), (uint32_t) (ISP2400_NVRAM_PORT_NAME(nvram_data) >> 32), (uint32_t) (ISP2400_NVRAM_PORT_NAME(nvram_data)), ISP2400_NVRAM_EXCHANGE_COUNT(nvram_data), ISP2400_NVRAM_MAXFRAMELENGTH(nvram_data)); isp_prt(isp, ISP_LOGDEBUG0, "NVRAM execthr %d loopid %d fwopt1 0x%x fwopt2 0x%x fwopt3 0x%x", ISP2400_NVRAM_EXECUTION_THROTTLE(nvram_data), ISP2400_NVRAM_HARDLOOPID(nvram_data), ISP2400_NVRAM_FIRMWARE_OPTIONS1(nvram_data), ISP2400_NVRAM_FIRMWARE_OPTIONS2(nvram_data), ISP2400_NVRAM_FIRMWARE_OPTIONS3(nvram_data)); wwn = ISP2400_NVRAM_PORT_NAME(nvram_data); fcp->isp_wwpn_nvram = wwn; wwn = ISP2400_NVRAM_NODE_NAME(nvram_data); if (wwn) { if ((wwn >> 60) != 2 && (wwn >> 60) != 5) { wwn = 0; } } if (wwn == 0 && (fcp->isp_wwpn_nvram >> 60) == 2) { wwn = fcp->isp_wwpn_nvram; wwn &= ~((uint64_t) 0xfff << 48); } fcp->isp_wwnn_nvram = wwn; if (ISP2400_NVRAM_EXCHANGE_COUNT(nvram_data)) { fcp->isp_maxalloc = ISP2400_NVRAM_EXCHANGE_COUNT(nvram_data); } if ((isp->isp_confopts & ISP_CFG_OWNFSZ) == 0) { DEFAULT_FRAMESIZE(isp) = ISP2400_NVRAM_MAXFRAMELENGTH(nvram_data); } if ((isp->isp_confopts & ISP_CFG_OWNLOOPID) == 0) { fcp->isp_loopid = ISP2400_NVRAM_HARDLOOPID(nvram_data); } if ((isp->isp_confopts & ISP_CFG_OWNEXCTHROTTLE) == 0) { DEFAULT_EXEC_THROTTLE(isp) = ISP2400_NVRAM_EXECUTION_THROTTLE(nvram_data); } fcp->isp_fwoptions = ISP2400_NVRAM_FIRMWARE_OPTIONS1(nvram_data); fcp->isp_xfwoptions = ISP2400_NVRAM_FIRMWARE_OPTIONS2(nvram_data); fcp->isp_zfwoptions = ISP2400_NVRAM_FIRMWARE_OPTIONS3(nvram_data); } Index: projects/powernv/dev/isp/isp_freebsd.c =================================================================== --- projects/powernv/dev/isp/isp_freebsd.c (revision 290990) +++ projects/powernv/dev/isp/isp_freebsd.c (revision 290991) @@ -1,5592 +1,5580 @@ /*- * Copyright (c) 1997-2009 by Matthew Jacob * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Platform (FreeBSD) dependent common attachment code for Qlogic adapters. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #if __FreeBSD_version < 800002 #define THREAD_CREATE kthread_create #else #define THREAD_CREATE kproc_create #endif MODULE_VERSION(isp, 1); MODULE_DEPEND(isp, cam, 1, 1, 1); int isp_announced = 0; int isp_fabric_hysteresis = 5; int isp_loop_down_limit = 60; /* default loop down limit */ int isp_quickboot_time = 7; /* don't wait more than N secs for loop up */ int isp_gone_device_time = 30; /* grace time before reporting device lost */ static const char prom3[] = "Chan %d [%u] PortID 0x%06x Departed because of %s"; static void isp_freeze_loopdown(ispsoftc_t *, int, char *); static d_ioctl_t ispioctl; static void isp_intr_enable(void *); static void isp_cam_async(void *, uint32_t, struct cam_path *, void *); static void isp_poll(struct cam_sim *); static timeout_t isp_watchdog; static timeout_t isp_gdt; static task_fn_t isp_gdt_task; static timeout_t isp_ldt; static task_fn_t isp_ldt_task; static void isp_kthread(void *); static void isp_action(struct cam_sim *, union ccb *); static int isp_timer_count; static void isp_timer(void *); static struct cdevsw isp_cdevsw = { .d_version = D_VERSION, .d_ioctl = ispioctl, .d_name = "isp", }; static int isp_role_sysctl(SYSCTL_HANDLER_ARGS) { ispsoftc_t *isp = (ispsoftc_t *)arg1; int chan = arg2; int error, old, value; value = FCPARAM(isp, chan)->role; error = sysctl_handle_int(oidp, &value, 0, req); if ((error != 0) || (req->newptr == NULL)) return (error); if (value < ISP_ROLE_NONE || value > ISP_ROLE_BOTH) return (EINVAL); ISP_LOCK(isp); old = FCPARAM(isp, chan)->role; /* We don't allow target mode switch from here. */ value = (old & ISP_ROLE_TARGET) | (value & ISP_ROLE_INITIATOR); /* If nothing has changed -- we are done. */ if (value == old) { ISP_UNLOCK(isp); return (0); } /* Actually change the role. */ error = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, value); ISP_UNLOCK(isp); return (error); } static int isp_attach_chan(ispsoftc_t *isp, struct cam_devq *devq, int chan) { struct ccb_setasync csa; struct cam_sim *sim; struct cam_path *path; /* * Construct our SIM entry. */ sim = cam_sim_alloc(isp_action, isp_poll, "isp", isp, device_get_unit(isp->isp_dev), &isp->isp_osinfo.lock, isp->isp_maxcmds, isp->isp_maxcmds, devq); if (sim == NULL) { return (ENOMEM); } ISP_LOCK(isp); if (xpt_bus_register(sim, isp->isp_dev, chan) != CAM_SUCCESS) { ISP_UNLOCK(isp); cam_sim_free(sim, FALSE); return (EIO); } ISP_UNLOCK(isp); if (xpt_create_path(&path, NULL, cam_sim_path(sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { ISP_LOCK(isp); xpt_bus_deregister(cam_sim_path(sim)); ISP_UNLOCK(isp); cam_sim_free(sim, FALSE); return (ENXIO); } xpt_setup_ccb(&csa.ccb_h, path, 5); csa.ccb_h.func_code = XPT_SASYNC_CB; csa.event_enable = AC_LOST_DEVICE; csa.callback = isp_cam_async; csa.callback_arg = sim; ISP_LOCK(isp); xpt_action((union ccb *)&csa); ISP_UNLOCK(isp); if (IS_SCSI(isp)) { struct isp_spi *spi = ISP_SPI_PC(isp, chan); spi->sim = sim; spi->path = path; } else { fcparam *fcp = FCPARAM(isp, chan); struct isp_fc *fc = ISP_FC_PC(isp, chan); struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(isp->isp_osinfo.dev); struct sysctl_oid *tree = device_get_sysctl_tree(isp->isp_osinfo.dev); char name[16]; ISP_LOCK(isp); fc->sim = sim; fc->path = path; fc->isp = isp; fc->ready = 1; callout_init_mtx(&fc->ldt, &isp->isp_osinfo.lock, 0); callout_init_mtx(&fc->gdt, &isp->isp_osinfo.lock, 0); TASK_INIT(&fc->ltask, 1, isp_ldt_task, fc); TASK_INIT(&fc->gtask, 1, isp_gdt_task, fc); /* * We start by being "loop down" if we have an initiator role */ if (fcp->role & ISP_ROLE_INITIATOR) { isp_freeze_loopdown(isp, chan, "isp_attach"); callout_reset(&fc->ldt, isp_quickboot_time * hz, isp_ldt, fc); isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Starting Initial Loop Down Timer @ %lu", (unsigned long) time_uptime); } ISP_UNLOCK(isp); if (THREAD_CREATE(isp_kthread, fc, &fc->kproc, 0, 0, "%s: fc_thrd%d", device_get_nameunit(isp->isp_osinfo.dev), chan)) { xpt_free_path(fc->path); ISP_LOCK(isp); if (callout_active(&fc->ldt)) callout_stop(&fc->ldt); xpt_bus_deregister(cam_sim_path(fc->sim)); ISP_UNLOCK(isp); cam_sim_free(fc->sim, FALSE); return (ENOMEM); } fc->num_threads += 1; if (chan > 0) { snprintf(name, sizeof(name), "chan%d", chan); tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, name, CTLFLAG_RW, 0, "Virtual channel"); } SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "wwnn", CTLFLAG_RD, &fcp->isp_wwnn, "World Wide Node Name"); SYSCTL_ADD_QUAD(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "wwpn", CTLFLAG_RD, &fcp->isp_wwpn, "World Wide Port Name"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "loop_down_limit", CTLFLAG_RW, &fc->loop_down_limit, 0, "Loop Down Limit"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "gone_device_time", CTLFLAG_RW, &fc->gone_device_time, 0, "Gone Device Time"); #if defined(ISP_TARGET_MODE) && defined(DEBUG) SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "inject_lost_data_frame", CTLFLAG_RW, &fc->inject_lost_data_frame, 0, "Cause a Lost Frame on a Read"); #endif SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "role", CTLTYPE_INT | CTLFLAG_RW, isp, chan, isp_role_sysctl, "I", "Current role"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "speed", CTLFLAG_RD, &fcp->isp_gbspeed, 0, "Connection speed in gigabits"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "linkstate", CTLFLAG_RD, &fcp->isp_linkstate, 0, "Link state"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "fwstate", CTLFLAG_RD, &fcp->isp_fwstate, 0, "Firmware state"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "loopstate", CTLFLAG_RD, &fcp->isp_loopstate, 0, "Loop state"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "topo", CTLFLAG_RD, &fcp->isp_topo, 0, "Connection topology"); } return (0); } static void isp_detach_chan(ispsoftc_t *isp, int chan) { struct cam_sim *sim; struct cam_path *path; struct ccb_setasync csa; int *num_threads; ISP_GET_PC(isp, chan, sim, sim); ISP_GET_PC(isp, chan, path, path); ISP_GET_PC_ADDR(isp, chan, num_threads, num_threads); xpt_setup_ccb(&csa.ccb_h, path, 5); csa.ccb_h.func_code = XPT_SASYNC_CB; csa.event_enable = 0; csa.callback = isp_cam_async; csa.callback_arg = sim; xpt_action((union ccb *)&csa); xpt_free_path(path); xpt_bus_deregister(cam_sim_path(sim)); cam_sim_free(sim, FALSE); /* Wait for the channel's spawned threads to exit. */ wakeup(isp->isp_osinfo.pc.ptr); while (*num_threads != 0) mtx_sleep(isp, &isp->isp_osinfo.lock, PRIBIO, "isp_reap", 100); } int isp_attach(ispsoftc_t *isp) { const char *nu = device_get_nameunit(isp->isp_osinfo.dev); int du = device_get_unit(isp->isp_dev); int chan; isp->isp_osinfo.ehook.ich_func = isp_intr_enable; isp->isp_osinfo.ehook.ich_arg = isp; /* * Haha. Set this first, because if we're loaded as a module isp_intr_enable * will be called right awawy, which will clear isp_osinfo.ehook_active, * which would be unwise to then set again later. */ isp->isp_osinfo.ehook_active = 1; if (config_intrhook_establish(&isp->isp_osinfo.ehook) != 0) { isp_prt(isp, ISP_LOGERR, "could not establish interrupt enable hook"); return (-EIO); } /* * Create the device queue for our SIM(s). */ isp->isp_osinfo.devq = cam_simq_alloc(isp->isp_maxcmds); if (isp->isp_osinfo.devq == NULL) { config_intrhook_disestablish(&isp->isp_osinfo.ehook); return (EIO); } for (chan = 0; chan < isp->isp_nchan; chan++) { if (isp_attach_chan(isp, isp->isp_osinfo.devq, chan)) { goto unwind; } } callout_init_mtx(&isp->isp_osinfo.tmo, &isp->isp_osinfo.lock, 0); isp_timer_count = hz >> 2; callout_reset(&isp->isp_osinfo.tmo, isp_timer_count, isp_timer, isp); isp->isp_osinfo.timer_active = 1; isp->isp_osinfo.cdev = make_dev(&isp_cdevsw, du, UID_ROOT, GID_OPERATOR, 0600, "%s", nu); if (isp->isp_osinfo.cdev) { isp->isp_osinfo.cdev->si_drv1 = isp; } return (0); unwind: while (--chan >= 0) { struct cam_sim *sim; struct cam_path *path; ISP_GET_PC(isp, chan, sim, sim); ISP_GET_PC(isp, chan, path, path); xpt_free_path(path); ISP_LOCK(isp); xpt_bus_deregister(cam_sim_path(sim)); ISP_UNLOCK(isp); cam_sim_free(sim, FALSE); } if (isp->isp_osinfo.ehook_active) { config_intrhook_disestablish(&isp->isp_osinfo.ehook); isp->isp_osinfo.ehook_active = 0; } if (isp->isp_osinfo.cdev) { destroy_dev(isp->isp_osinfo.cdev); isp->isp_osinfo.cdev = NULL; } cam_simq_free(isp->isp_osinfo.devq); isp->isp_osinfo.devq = NULL; return (-1); } int isp_detach(ispsoftc_t *isp) { struct cam_sim *sim; int chan; ISP_LOCK(isp); for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1) { ISP_GET_PC(isp, chan, sim, sim); if (sim->refcount > 2) { ISP_UNLOCK(isp); return (EBUSY); } } /* Tell spawned threads that we're exiting. */ isp->isp_osinfo.is_exiting = 1; if (isp->isp_osinfo.timer_active) { callout_stop(&isp->isp_osinfo.tmo); isp->isp_osinfo.timer_active = 0; } for (chan = isp->isp_nchan - 1; chan >= 0; chan -= 1) isp_detach_chan(isp, chan); ISP_UNLOCK(isp); if (isp->isp_osinfo.cdev) { destroy_dev(isp->isp_osinfo.cdev); isp->isp_osinfo.cdev = NULL; } if (isp->isp_osinfo.ehook_active) { config_intrhook_disestablish(&isp->isp_osinfo.ehook); isp->isp_osinfo.ehook_active = 0; } if (isp->isp_osinfo.devq != NULL) { cam_simq_free(isp->isp_osinfo.devq); isp->isp_osinfo.devq = NULL; } return (0); } static void isp_freeze_loopdown(ispsoftc_t *isp, int chan, char *msg) { if (IS_FC(isp)) { struct isp_fc *fc = ISP_FC_PC(isp, chan); if (fc->simqfrozen == 0) { isp_prt(isp, ISP_LOGDEBUG0, "Chan %d %s -- freeze simq (loopdown)", chan, msg); fc->simqfrozen = SIMQFRZ_LOOPDOWN; #if __FreeBSD_version >= 1000039 xpt_hold_boot(); #endif xpt_freeze_simq(fc->sim, 1); } else { isp_prt(isp, ISP_LOGDEBUG0, "Chan %d %s -- mark frozen (loopdown)", chan, msg); fc->simqfrozen |= SIMQFRZ_LOOPDOWN; } } } static void isp_unfreeze_loopdown(ispsoftc_t *isp, int chan) { if (IS_FC(isp)) { struct isp_fc *fc = ISP_FC_PC(isp, chan); int wasfrozen = fc->simqfrozen & SIMQFRZ_LOOPDOWN; fc->simqfrozen &= ~SIMQFRZ_LOOPDOWN; if (wasfrozen && fc->simqfrozen == 0) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d releasing simq", __func__, chan); xpt_release_simq(fc->sim, 1); #if __FreeBSD_version >= 1000039 xpt_release_boot(); #endif } } } static int ispioctl(struct cdev *dev, u_long c, caddr_t addr, int flags, struct thread *td) { ispsoftc_t *isp; int nr, chan, retval = ENOTTY; isp = dev->si_drv1; switch (c) { case ISP_SDBLEV: { int olddblev = isp->isp_dblev; isp->isp_dblev = *(int *)addr; *(int *)addr = olddblev; retval = 0; break; } case ISP_GETROLE: chan = *(int *)addr; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } if (IS_FC(isp)) { *(int *)addr = FCPARAM(isp, chan)->role; } else { *(int *)addr = SDPARAM(isp, chan)->role; } retval = 0; break; case ISP_SETROLE: nr = *(int *)addr; chan = nr >> 8; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } nr &= 0xff; if (nr & ~(ISP_ROLE_INITIATOR|ISP_ROLE_TARGET)) { retval = EINVAL; break; } ISP_LOCK(isp); if (IS_FC(isp)) *(int *)addr = FCPARAM(isp, chan)->role; else *(int *)addr = SDPARAM(isp, chan)->role; retval = isp_control(isp, ISPCTL_CHANGE_ROLE, chan, nr); ISP_UNLOCK(isp); retval = 0; break; case ISP_RESETHBA: ISP_LOCK(isp); isp_reinit(isp, 0); ISP_UNLOCK(isp); retval = 0; break; case ISP_RESCAN: if (IS_FC(isp)) { chan = *(int *)addr; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } ISP_LOCK(isp); if (isp_fc_runstate(isp, chan, 5 * 1000000)) { retval = EIO; } else { retval = 0; } ISP_UNLOCK(isp); } break; case ISP_FC_LIP: if (IS_FC(isp)) { chan = *(int *)addr; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } ISP_LOCK(isp); if (isp_control(isp, ISPCTL_SEND_LIP, chan)) { retval = EIO; } else { retval = 0; } ISP_UNLOCK(isp); } break; case ISP_FC_GETDINFO: { struct isp_fc_device *ifc = (struct isp_fc_device *) addr; fcportdb_t *lp; if (IS_SCSI(isp)) { break; } if (ifc->loopid >= MAX_FC_TARG) { retval = EINVAL; break; } lp = &FCPARAM(isp, ifc->chan)->portdb[ifc->loopid]; if (lp->state != FC_PORTDB_STATE_NIL) { ifc->role = (lp->prli_word3 & SVC3_ROLE_MASK) >> SVC3_ROLE_SHIFT; ifc->loopid = lp->handle; ifc->portid = lp->portid; ifc->node_wwn = lp->node_wwn; ifc->port_wwn = lp->port_wwn; retval = 0; } else { retval = ENODEV; } break; } case ISP_GET_STATS: { isp_stats_t *sp = (isp_stats_t *) addr; ISP_MEMZERO(sp, sizeof (*sp)); sp->isp_stat_version = ISP_STATS_VERSION; sp->isp_type = isp->isp_type; sp->isp_revision = isp->isp_revision; ISP_LOCK(isp); sp->isp_stats[ISP_INTCNT] = isp->isp_intcnt; sp->isp_stats[ISP_INTBOGUS] = isp->isp_intbogus; sp->isp_stats[ISP_INTMBOXC] = isp->isp_intmboxc; sp->isp_stats[ISP_INGOASYNC] = isp->isp_intoasync; sp->isp_stats[ISP_RSLTCCMPLT] = isp->isp_rsltccmplt; sp->isp_stats[ISP_FPHCCMCPLT] = isp->isp_fphccmplt; sp->isp_stats[ISP_RSCCHIWAT] = isp->isp_rscchiwater; sp->isp_stats[ISP_FPCCHIWAT] = isp->isp_fpcchiwater; ISP_UNLOCK(isp); retval = 0; break; } case ISP_CLR_STATS: ISP_LOCK(isp); isp->isp_intcnt = 0; isp->isp_intbogus = 0; isp->isp_intmboxc = 0; isp->isp_intoasync = 0; isp->isp_rsltccmplt = 0; isp->isp_fphccmplt = 0; isp->isp_rscchiwater = 0; isp->isp_fpcchiwater = 0; ISP_UNLOCK(isp); retval = 0; break; case ISP_FC_GETHINFO: { struct isp_hba_device *hba = (struct isp_hba_device *) addr; int chan = hba->fc_channel; if (chan < 0 || chan >= isp->isp_nchan) { retval = ENXIO; break; } hba->fc_fw_major = ISP_FW_MAJORX(isp->isp_fwrev); hba->fc_fw_minor = ISP_FW_MINORX(isp->isp_fwrev); hba->fc_fw_micro = ISP_FW_MICROX(isp->isp_fwrev); hba->fc_nchannels = isp->isp_nchan; if (IS_FC(isp)) { hba->fc_nports = MAX_FC_TARG; hba->fc_speed = FCPARAM(isp, hba->fc_channel)->isp_gbspeed; hba->fc_topology = FCPARAM(isp, chan)->isp_topo + 1; hba->fc_loopid = FCPARAM(isp, chan)->isp_loopid; hba->nvram_node_wwn = FCPARAM(isp, chan)->isp_wwnn_nvram; hba->nvram_port_wwn = FCPARAM(isp, chan)->isp_wwpn_nvram; hba->active_node_wwn = FCPARAM(isp, chan)->isp_wwnn; hba->active_port_wwn = FCPARAM(isp, chan)->isp_wwpn; } else { hba->fc_nports = MAX_TARGETS; hba->fc_speed = 0; hba->fc_topology = 0; hba->nvram_node_wwn = 0ull; hba->nvram_port_wwn = 0ull; hba->active_node_wwn = 0ull; hba->active_port_wwn = 0ull; } retval = 0; break; } case ISP_TSK_MGMT: { int needmarker; struct isp_fc_tsk_mgmt *fct = (struct isp_fc_tsk_mgmt *) addr; uint16_t loopid; mbreg_t mbs; if (IS_SCSI(isp)) { break; } chan = fct->chan; if (chan < 0 || chan >= isp->isp_nchan) { retval = -ENXIO; break; } needmarker = retval = 0; loopid = fct->loopid; ISP_LOCK(isp); if (IS_24XX(isp)) { uint8_t local[QENTRY_LEN]; isp24xx_tmf_t *tmf; isp24xx_statusreq_t *sp; fcparam *fcp = FCPARAM(isp, chan); fcportdb_t *lp; int i; for (i = 0; i < MAX_FC_TARG; i++) { lp = &fcp->portdb[i]; if (lp->handle == loopid) { break; } } if (i == MAX_FC_TARG) { retval = ENXIO; ISP_UNLOCK(isp); break; } /* XXX VALIDATE LP XXX */ tmf = (isp24xx_tmf_t *) local; ISP_MEMZERO(tmf, QENTRY_LEN); tmf->tmf_header.rqs_entry_type = RQSTYPE_TSK_MGMT; tmf->tmf_header.rqs_entry_count = 1; tmf->tmf_nphdl = lp->handle; tmf->tmf_delay = 2; tmf->tmf_timeout = 2; tmf->tmf_tidlo = lp->portid; tmf->tmf_tidhi = lp->portid >> 16; tmf->tmf_vpidx = ISP_GET_VPIDX(isp, chan); tmf->tmf_lun[1] = fct->lun & 0xff; if (fct->lun >= 256) { tmf->tmf_lun[0] = 0x40 | (fct->lun >> 8); } switch (fct->action) { case IPT_CLEAR_ACA: tmf->tmf_flags = ISP24XX_TMF_CLEAR_ACA; break; case IPT_TARGET_RESET: tmf->tmf_flags = ISP24XX_TMF_TARGET_RESET; needmarker = 1; break; case IPT_LUN_RESET: tmf->tmf_flags = ISP24XX_TMF_LUN_RESET; needmarker = 1; break; case IPT_CLEAR_TASK_SET: tmf->tmf_flags = ISP24XX_TMF_CLEAR_TASK_SET; needmarker = 1; break; case IPT_ABORT_TASK_SET: tmf->tmf_flags = ISP24XX_TMF_ABORT_TASK_SET; needmarker = 1; break; default: retval = EINVAL; break; } if (retval) { ISP_UNLOCK(isp); break; } MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 5000000); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); if (FC_SCRATCH_ACQUIRE(isp, chan)) { ISP_UNLOCK(isp); retval = ENOMEM; break; } isp_put_24xx_tmf(isp, tmf, fcp->isp_scratch); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, QENTRY_LEN, chan); sp = (isp24xx_statusreq_t *) local; sp->req_completion_status = 1; retval = isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs); MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); isp_get_24xx_response(isp, &((isp24xx_statusreq_t *)fcp->isp_scratch)[1], sp); FC_SCRATCH_RELEASE(isp, chan); if (retval || sp->req_completion_status != 0) { FC_SCRATCH_RELEASE(isp, chan); retval = EIO; } if (retval == 0) { if (needmarker) { fcp->sendmarker = 1; } } } else { MBSINIT(&mbs, 0, MBLOGALL, 0); if (ISP_CAP_2KLOGIN(isp) == 0) { loopid <<= 8; } switch (fct->action) { case IPT_CLEAR_ACA: mbs.param[0] = MBOX_CLEAR_ACA; mbs.param[1] = loopid; mbs.param[2] = fct->lun; break; case IPT_TARGET_RESET: mbs.param[0] = MBOX_TARGET_RESET; mbs.param[1] = loopid; needmarker = 1; break; case IPT_LUN_RESET: mbs.param[0] = MBOX_LUN_RESET; mbs.param[1] = loopid; mbs.param[2] = fct->lun; needmarker = 1; break; case IPT_CLEAR_TASK_SET: mbs.param[0] = MBOX_CLEAR_TASK_SET; mbs.param[1] = loopid; mbs.param[2] = fct->lun; needmarker = 1; break; case IPT_ABORT_TASK_SET: mbs.param[0] = MBOX_ABORT_TASK_SET; mbs.param[1] = loopid; mbs.param[2] = fct->lun; needmarker = 1; break; default: retval = EINVAL; break; } if (retval == 0) { if (needmarker) { FCPARAM(isp, chan)->sendmarker = 1; } retval = isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs); if (retval) { retval = EIO; } } } ISP_UNLOCK(isp); break; } default: break; } return (retval); } static void isp_intr_enable(void *arg) { int chan; ispsoftc_t *isp = arg; ISP_LOCK(isp); for (chan = 0; chan < isp->isp_nchan; chan++) { if (IS_FC(isp)) { if (FCPARAM(isp, chan)->role != ISP_ROLE_NONE) { ISP_ENABLE_INTS(isp); break; } } else { if (SDPARAM(isp, chan)->role != ISP_ROLE_NONE) { ISP_ENABLE_INTS(isp); break; } } } isp->isp_osinfo.ehook_active = 0; ISP_UNLOCK(isp); /* Release our hook so that the boot can continue. */ config_intrhook_disestablish(&isp->isp_osinfo.ehook); } /* * Local Inlines */ static ISP_INLINE int isp_get_pcmd(ispsoftc_t *, union ccb *); static ISP_INLINE void isp_free_pcmd(ispsoftc_t *, union ccb *); static ISP_INLINE int isp_get_pcmd(ispsoftc_t *isp, union ccb *ccb) { ISP_PCMD(ccb) = isp->isp_osinfo.pcmd_free; if (ISP_PCMD(ccb) == NULL) { return (-1); } isp->isp_osinfo.pcmd_free = ((struct isp_pcmd *)ISP_PCMD(ccb))->next; return (0); } static ISP_INLINE void isp_free_pcmd(ispsoftc_t *isp, union ccb *ccb) { if (ISP_PCMD(ccb)) { #ifdef ISP_TARGET_MODE PISP_PCMD(ccb)->datalen = 0; PISP_PCMD(ccb)->totslen = 0; PISP_PCMD(ccb)->cumslen = 0; PISP_PCMD(ccb)->crn = 0; #endif PISP_PCMD(ccb)->next = isp->isp_osinfo.pcmd_free; isp->isp_osinfo.pcmd_free = ISP_PCMD(ccb); ISP_PCMD(ccb) = NULL; } } /* * Put the target mode functions here, because some are inlines */ #ifdef ISP_TARGET_MODE static ISP_INLINE void isp_tmlock(ispsoftc_t *, const char *); static ISP_INLINE void isp_tmunlk(ispsoftc_t *); static ISP_INLINE int is_any_lun_enabled(ispsoftc_t *, int); static ISP_INLINE int is_lun_enabled(ispsoftc_t *, int, lun_id_t); static ISP_INLINE tstate_t *get_lun_statep(ispsoftc_t *, int, lun_id_t); static ISP_INLINE tstate_t *get_lun_statep_from_tag(ispsoftc_t *, int, uint32_t); static ISP_INLINE void rls_lun_statep(ispsoftc_t *, tstate_t *); static ISP_INLINE inot_private_data_t *get_ntp_from_tagdata(ispsoftc_t *, uint32_t, uint32_t, tstate_t **); static ISP_INLINE atio_private_data_t *isp_get_atpd(ispsoftc_t *, tstate_t *, uint32_t); static ISP_INLINE atio_private_data_t *isp_find_atpd(ispsoftc_t *, tstate_t *, uint32_t); static ISP_INLINE void isp_put_atpd(ispsoftc_t *, tstate_t *, atio_private_data_t *); static ISP_INLINE inot_private_data_t *isp_get_ntpd(ispsoftc_t *, tstate_t *); static ISP_INLINE inot_private_data_t *isp_find_ntpd(ispsoftc_t *, tstate_t *, uint32_t, uint32_t); static ISP_INLINE void isp_put_ntpd(ispsoftc_t *, tstate_t *, inot_private_data_t *); static cam_status create_lun_state(ispsoftc_t *, int, struct cam_path *, tstate_t **); static void destroy_lun_state(ispsoftc_t *, tstate_t *); static void isp_enable_lun(ispsoftc_t *, union ccb *); static cam_status isp_enable_deferred_luns(ispsoftc_t *, int); static cam_status isp_enable_deferred(ispsoftc_t *, int, lun_id_t); static void isp_disable_lun(ispsoftc_t *, union ccb *); static int isp_enable_target_mode(ispsoftc_t *, int); static int isp_disable_target_mode(ispsoftc_t *, int); static void isp_ledone(ispsoftc_t *, lun_entry_t *); static timeout_t isp_refire_putback_atio; static timeout_t isp_refire_notify_ack; static void isp_complete_ctio(union ccb *); static void isp_target_putback_atio(union ccb *); enum Start_Ctio_How { FROM_CAM, FROM_TIMER, FROM_SRR, FROM_CTIO_DONE }; static void isp_target_start_ctio(ispsoftc_t *, union ccb *, enum Start_Ctio_How); static void isp_handle_platform_atio(ispsoftc_t *, at_entry_t *); static void isp_handle_platform_atio2(ispsoftc_t *, at2_entry_t *); static void isp_handle_platform_atio7(ispsoftc_t *, at7_entry_t *); static void isp_handle_platform_ctio(ispsoftc_t *, void *); static void isp_handle_platform_notify_scsi(ispsoftc_t *, in_entry_t *); static void isp_handle_platform_notify_fc(ispsoftc_t *, in_fcentry_t *); static void isp_handle_platform_notify_24xx(ispsoftc_t *, in_fcentry_24xx_t *); static int isp_handle_platform_target_notify_ack(ispsoftc_t *, isp_notify_t *); static void isp_handle_platform_target_tmf(ispsoftc_t *, isp_notify_t *); static void isp_target_mark_aborted(ispsoftc_t *, union ccb *); static void isp_target_mark_aborted_early(ispsoftc_t *, tstate_t *, uint32_t); static ISP_INLINE void isp_tmlock(ispsoftc_t *isp, const char *msg) { while (isp->isp_osinfo.tmbusy) { isp->isp_osinfo.tmwanted = 1; mtx_sleep(isp, &isp->isp_lock, PRIBIO, msg, 0); } isp->isp_osinfo.tmbusy = 1; } static ISP_INLINE void isp_tmunlk(ispsoftc_t *isp) { isp->isp_osinfo.tmbusy = 0; if (isp->isp_osinfo.tmwanted) { isp->isp_osinfo.tmwanted = 0; wakeup(isp); } } static ISP_INLINE int is_any_lun_enabled(ispsoftc_t *isp, int bus) { struct tslist *lhp; int i; for (i = 0; i < LUN_HASH_SIZE; i++) { ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp); if (SLIST_FIRST(lhp)) return (1); } return (0); } static ISP_INLINE int is_lun_enabled(ispsoftc_t *isp, int bus, lun_id_t lun) { tstate_t *tptr; struct tslist *lhp; ISP_GET_PC_ADDR(isp, bus, lun_hash[LUN_HASH_FUNC(lun)], lhp); SLIST_FOREACH(tptr, lhp, next) { if (tptr->ts_lun == lun) { return (1); } } return (0); } static void dump_tstates(ispsoftc_t *isp, int bus) { int i, j; struct tslist *lhp; tstate_t *tptr = NULL; if (bus >= isp->isp_nchan) { return; } for (i = 0; i < LUN_HASH_SIZE; i++) { ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp); j = 0; SLIST_FOREACH(tptr, lhp, next) { xpt_print(tptr->owner, "[%d, %d] atio_cnt=%d inot_cnt=%d\n", i, j, tptr->atio_count, tptr->inot_count); j++; } } } static ISP_INLINE tstate_t * get_lun_statep(ispsoftc_t *isp, int bus, lun_id_t lun) { tstate_t *tptr = NULL; struct tslist *lhp; if (bus < isp->isp_nchan) { ISP_GET_PC_ADDR(isp, bus, lun_hash[LUN_HASH_FUNC(lun)], lhp); SLIST_FOREACH(tptr, lhp, next) { if (tptr->ts_lun == lun) { tptr->hold++; return (tptr); } } } return (NULL); } static ISP_INLINE tstate_t * get_lun_statep_from_tag(ispsoftc_t *isp, int bus, uint32_t tagval) { tstate_t *tptr = NULL; atio_private_data_t *atp; struct tslist *lhp; int i; if (bus < isp->isp_nchan && tagval != 0) { for (i = 0; i < LUN_HASH_SIZE; i++) { ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp); SLIST_FOREACH(tptr, lhp, next) { atp = isp_find_atpd(isp, tptr, tagval); if (atp) { tptr->hold++; return (tptr); } } } } return (NULL); } static ISP_INLINE inot_private_data_t * get_ntp_from_tagdata(ispsoftc_t *isp, uint32_t tag_id, uint32_t seq_id, tstate_t **rslt) { inot_private_data_t *ntp; tstate_t *tptr; struct tslist *lhp; int bus, i; for (bus = 0; bus < isp->isp_nchan; bus++) { for (i = 0; i < LUN_HASH_SIZE; i++) { ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp); SLIST_FOREACH(tptr, lhp, next) { ntp = isp_find_ntpd(isp, tptr, tag_id, seq_id); if (ntp) { *rslt = tptr; tptr->hold++; return (ntp); } } } } return (NULL); } static ISP_INLINE void rls_lun_statep(ispsoftc_t *isp, tstate_t *tptr) { KASSERT((tptr->hold), ("tptr not held")); tptr->hold--; } static void isp_tmcmd_restart(ispsoftc_t *isp) { inot_private_data_t *ntp; inot_private_data_t *restart_queue; tstate_t *tptr; union ccb *ccb; struct tslist *lhp; int bus, i; for (bus = 0; bus < isp->isp_nchan; bus++) { for (i = 0; i < LUN_HASH_SIZE; i++) { ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp); SLIST_FOREACH(tptr, lhp, next) { if ((restart_queue = tptr->restart_queue) != NULL) tptr->restart_queue = NULL; while (restart_queue) { ntp = restart_queue; restart_queue = ntp->rd.nt.nt_hba; if (IS_24XX(isp)) { isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at7_entry_t *)ntp->rd.data)->at_rxid); isp_handle_platform_atio7(isp, (at7_entry_t *) ntp->rd.data); } else { isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at2_entry_t *)ntp->rd.data)->at_rxid); isp_handle_platform_atio2(isp, (at2_entry_t *) ntp->rd.data); } isp_put_ntpd(isp, tptr, ntp); if (tptr->restart_queue && restart_queue != NULL) { ntp = tptr->restart_queue; tptr->restart_queue = restart_queue; while (restart_queue->rd.nt.nt_hba) { restart_queue = restart_queue->rd.nt.nt_hba; } restart_queue->rd.nt.nt_hba = ntp; break; } } /* * We only need to do this once per tptr */ if (!TAILQ_EMPTY(&tptr->waitq)) { ccb = (union ccb *)TAILQ_LAST(&tptr->waitq, isp_ccbq); TAILQ_REMOVE(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); isp_target_start_ctio(isp, ccb, FROM_TIMER); } } } } } static ISP_INLINE atio_private_data_t * isp_get_atpd(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag) { atio_private_data_t *atp; atp = LIST_FIRST(&tptr->atfree); if (atp) { LIST_REMOVE(atp, next); atp->tag = tag; LIST_INSERT_HEAD(&tptr->atused[ATPDPHASH(tag)], atp, next); } return (atp); } static ISP_INLINE atio_private_data_t * isp_find_atpd(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag) { atio_private_data_t *atp; LIST_FOREACH(atp, &tptr->atused[ATPDPHASH(tag)], next) { if (atp->tag == tag) return (atp); } return (NULL); } static ISP_INLINE void isp_put_atpd(ispsoftc_t *isp, tstate_t *tptr, atio_private_data_t *atp) { if (atp->ests) { isp_put_ecmd(isp, atp->ests); } LIST_REMOVE(atp, next); memset(atp, 0, sizeof (*atp)); LIST_INSERT_HEAD(&tptr->atfree, atp, next); } static void isp_dump_atpd(ispsoftc_t *isp, tstate_t *tptr) { atio_private_data_t *atp; const char *states[8] = { "Free", "ATIO", "CAM", "CTIO", "LAST_CTIO", "PDON", "?6", "7" }; for (atp = tptr->atpool; atp < &tptr->atpool[ATPDPSIZE]; atp++) { xpt_print(tptr->owner, "ATP: [0x%x] origdlen %u bytes_xfrd %u lun %u nphdl 0x%04x s_id 0x%06x d_id 0x%06x oxid 0x%04x state %s\n", atp->tag, atp->orig_datalen, atp->bytes_xfered, atp->lun, atp->nphdl, atp->sid, atp->portid, atp->oxid, states[atp->state & 0x7]); } } static ISP_INLINE inot_private_data_t * isp_get_ntpd(ispsoftc_t *isp, tstate_t *tptr) { inot_private_data_t *ntp; ntp = tptr->ntfree; if (ntp) { tptr->ntfree = ntp->next; } return (ntp); } static ISP_INLINE inot_private_data_t * isp_find_ntpd(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag_id, uint32_t seq_id) { inot_private_data_t *ntp; for (ntp = tptr->ntpool; ntp < &tptr->ntpool[ATPDPSIZE]; ntp++) { if (ntp->rd.tag_id == tag_id && ntp->rd.seq_id == seq_id) { return (ntp); } } return (NULL); } static ISP_INLINE void isp_put_ntpd(ispsoftc_t *isp, tstate_t *tptr, inot_private_data_t *ntp) { ntp->rd.tag_id = ntp->rd.seq_id = 0; ntp->next = tptr->ntfree; tptr->ntfree = ntp; } static cam_status create_lun_state(ispsoftc_t *isp, int bus, struct cam_path *path, tstate_t **rslt) { cam_status status; lun_id_t lun; struct tslist *lhp; tstate_t *tptr; int i; lun = xpt_path_lun_id(path); if (lun != CAM_LUN_WILDCARD) { if (ISP_MAX_LUNS(isp) > 0 && lun >= ISP_MAX_LUNS(isp)) { return (CAM_LUN_INVALID); } } if (is_lun_enabled(isp, bus, lun)) { return (CAM_LUN_ALRDY_ENA); } tptr = malloc(sizeof (tstate_t), M_DEVBUF, M_NOWAIT|M_ZERO); if (tptr == NULL) { return (CAM_RESRC_UNAVAIL); } tptr->ts_lun = lun; status = xpt_create_path(&tptr->owner, NULL, xpt_path_path_id(path), xpt_path_target_id(path), lun); if (status != CAM_REQ_CMP) { free(tptr, M_DEVBUF); return (status); } SLIST_INIT(&tptr->atios); SLIST_INIT(&tptr->inots); TAILQ_INIT(&tptr->waitq); LIST_INIT(&tptr->atfree); for (i = ATPDPSIZE-1; i >= 0; i--) LIST_INSERT_HEAD(&tptr->atfree, &tptr->atpool[i], next); for (i = 0; i < ATPDPHASHSIZE; i++) LIST_INIT(&tptr->atused[i]); for (i = 0; i < ATPDPSIZE-1; i++) tptr->ntpool[i].next = &tptr->ntpool[i+1]; tptr->ntfree = tptr->ntpool; tptr->hold = 1; ISP_GET_PC_ADDR(isp, bus, lun_hash[LUN_HASH_FUNC(lun)], lhp); SLIST_INSERT_HEAD(lhp, tptr, next); *rslt = tptr; ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, path, "created tstate\n"); return (CAM_REQ_CMP); } static ISP_INLINE void destroy_lun_state(ispsoftc_t *isp, tstate_t *tptr) { union ccb *ccb; struct tslist *lhp; KASSERT((tptr->hold != 0), ("tptr is not held")); KASSERT((tptr->hold == 1), ("tptr still held (%d)", tptr->hold)); do { ccb = (union ccb *)SLIST_FIRST(&tptr->atios); if (ccb) { SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle); ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); } } while (ccb); do { ccb = (union ccb *)SLIST_FIRST(&tptr->inots); if (ccb) { SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle); ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); } } while (ccb); ISP_GET_PC_ADDR(isp, cam_sim_bus(xpt_path_sim(tptr->owner)), lun_hash[LUN_HASH_FUNC(tptr->ts_lun)], lhp); SLIST_REMOVE(lhp, tptr, tstate, next); ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, tptr->owner, "destroyed tstate\n"); xpt_free_path(tptr->owner); free(tptr, M_DEVBUF); } /* * Enable a lun. */ static void isp_enable_lun(ispsoftc_t *isp, union ccb *ccb) { tstate_t *tptr = NULL; int bus, tm_enabled, target_role; target_id_t target; lun_id_t lun; /* * We only support either a wildcard target/lun or a target ID of zero and a non-wildcard lun */ bus = XS_CHANNEL(ccb); target = ccb->ccb_h.target_id; lun = ccb->ccb_h.target_lun; ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path, "enabling lun %jx\n", (uintmax_t)lun); if (target == CAM_TARGET_WILDCARD && lun != CAM_LUN_WILDCARD) { ccb->ccb_h.status = CAM_LUN_INVALID; xpt_done(ccb); return; } if (target != CAM_TARGET_WILDCARD && lun == CAM_LUN_WILDCARD) { ccb->ccb_h.status = CAM_LUN_INVALID; xpt_done(ccb); return; } if (isp->isp_dblev & ISP_LOGTDEBUG0) { xpt_print(ccb->ccb_h.path, "enabling lun 0x%jx on channel %d\n", (uintmax_t)lun, bus); } /* * Wait until we're not busy with the lun enables subsystem */ isp_tmlock(isp, "isp_enable_lun"); /* * This is as a good a place as any to check f/w capabilities. */ if (IS_FC(isp)) { if (ISP_CAP_TMODE(isp) == 0) { xpt_print(ccb->ccb_h.path, "firmware does not support target mode\n"); ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; goto done; } /* * We *could* handle non-SCCLUN f/w, but we'd have to * dork with our already fragile enable/disable code. */ if (ISP_CAP_SCCFW(isp) == 0) { xpt_print(ccb->ccb_h.path, "firmware not SCCLUN capable\n"); ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; goto done; } target_role = (FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) != 0; } else { target_role = (SDPARAM(isp, bus)->role & ISP_ROLE_TARGET) != 0; } /* * Create the state pointer. * It should not already exist. */ tptr = get_lun_statep(isp, bus, lun); if (tptr) { ccb->ccb_h.status = CAM_LUN_ALRDY_ENA; goto done; } ccb->ccb_h.status = create_lun_state(isp, bus, ccb->ccb_h.path, &tptr); if (ccb->ccb_h.status != CAM_REQ_CMP) { goto done; } /* * We have a tricky maneuver to perform here. * * If target mode isn't already enabled here, * *and* our current role includes target mode, * we enable target mode here. * */ ISP_GET_PC(isp, bus, tm_enabled, tm_enabled); if (tm_enabled == 0 && target_role != 0) { if (isp_enable_target_mode(isp, bus)) { ccb->ccb_h.status = CAM_REQ_CMP_ERR; destroy_lun_state(isp, tptr); tptr = NULL; goto done; } tm_enabled = 1; } /* * Now check to see whether this bus is in target mode already. * * If not, a later role change into target mode will finish the job. */ if (tm_enabled == 0) { ISP_SET_PC(isp, bus, tm_enable_defer, 1); ccb->ccb_h.status = CAM_REQ_CMP; xpt_print(ccb->ccb_h.path, "Target Mode not enabled yet- lun enable deferred\n"); goto done1; } /* * Enable the lun. */ ccb->ccb_h.status = isp_enable_deferred(isp, bus, lun); done: if (ccb->ccb_h.status != CAM_REQ_CMP) { if (tptr) { destroy_lun_state(isp, tptr); tptr = NULL; } } else { tptr->enabled = 1; } done1: if (tptr) { rls_lun_statep(isp, tptr); } /* * And we're outta here.... */ isp_tmunlk(isp); xpt_done(ccb); } static cam_status isp_enable_deferred_luns(ispsoftc_t *isp, int bus) { tstate_t *tptr = NULL; struct tslist *lhp; int i, n; ISP_GET_PC(isp, bus, tm_enabled, i); if (i == 1) { return (CAM_REQ_CMP); } ISP_GET_PC(isp, bus, tm_enable_defer, i); if (i == 0) { return (CAM_REQ_CMP); } /* * If this succeeds, it will set tm_enable */ if (isp_enable_target_mode(isp, bus)) { return (CAM_REQ_CMP_ERR); } isp_tmlock(isp, "isp_enable_deferred_luns"); for (n = i = 0; i < LUN_HASH_SIZE; i++) { ISP_GET_PC_ADDR(isp, bus, lun_hash[i], lhp); SLIST_FOREACH(tptr, lhp, next) { tptr->hold++; if (tptr->enabled == 0) { if (isp_enable_deferred(isp, bus, tptr->ts_lun) == CAM_REQ_CMP) { tptr->enabled = 1; n++; } } else { n++; } tptr->hold--; } } isp_tmunlk(isp); if (n == 0) { return (CAM_REQ_CMP_ERR); } ISP_SET_PC(isp, bus, tm_enable_defer, 0); return (CAM_REQ_CMP); } static cam_status isp_enable_deferred(ispsoftc_t *isp, int bus, lun_id_t lun) { cam_status status; int luns_already_enabled; ISP_GET_PC(isp, bus, tm_luns_enabled, luns_already_enabled); isp_prt(isp, ISP_LOGTINFO, "%s: bus %d lun %jx luns_enabled %d", __func__, bus, (uintmax_t)lun, luns_already_enabled); if (IS_23XX(isp) || IS_24XX(isp) || (IS_FC(isp) && luns_already_enabled)) { status = CAM_REQ_CMP; } else { int cmd_cnt, not_cnt; if (IS_23XX(isp)) { cmd_cnt = DFLT_CMND_CNT; not_cnt = DFLT_INOT_CNT; } else { cmd_cnt = 64; not_cnt = 8; } status = CAM_REQ_INPROG; isp->isp_osinfo.rptr = &status; if (isp_lun_cmd(isp, RQSTYPE_ENABLE_LUN, bus, lun == CAM_LUN_WILDCARD? 0 : lun, cmd_cnt, not_cnt)) { status = CAM_RESRC_UNAVAIL; } else { mtx_sleep(&status, &isp->isp_lock, PRIBIO, "isp_enable_deferred", 0); } isp->isp_osinfo.rptr = NULL; } if (status == CAM_REQ_CMP) { ISP_SET_PC(isp, bus, tm_luns_enabled, 1); isp_prt(isp, ISP_LOGCONFIG|ISP_LOGTINFO, "bus %d lun %jx now enabled for target mode", bus, (uintmax_t)lun); } return (status); } static void isp_disable_lun(ispsoftc_t *isp, union ccb *ccb) { tstate_t *tptr = NULL; int bus; cam_status status; target_id_t target; lun_id_t lun; bus = XS_CHANNEL(ccb); target = ccb->ccb_h.target_id; lun = ccb->ccb_h.target_lun; ISP_PATH_PRT(isp, ISP_LOGTDEBUG0|ISP_LOGCONFIG, ccb->ccb_h.path, "disabling lun %jx\n", (uintmax_t)lun); if (target == CAM_TARGET_WILDCARD && lun != CAM_LUN_WILDCARD) { ccb->ccb_h.status = CAM_LUN_INVALID; xpt_done(ccb); return; } if (target != CAM_TARGET_WILDCARD && lun == CAM_LUN_WILDCARD) { ccb->ccb_h.status = CAM_LUN_INVALID; xpt_done(ccb); return; } /* * See if we're busy disabling a lun now. */ isp_tmlock(isp, "isp_disable_lun"); status = CAM_REQ_INPROG; /* * Find the state pointer. */ if ((tptr = get_lun_statep(isp, bus, lun)) == NULL) { status = CAM_PATH_INVALID; goto done; } /* * If we're a 24XX card, we're done. */ if (IS_23XX(isp) || IS_24XX(isp)) { status = CAM_REQ_CMP; goto done; } /* * For SCC FW, we only deal with lun zero. */ if (IS_FC(isp) && lun > 0) { status = CAM_REQ_CMP; goto done; } isp->isp_osinfo.rptr = &status; if (isp_lun_cmd(isp, RQSTYPE_ENABLE_LUN, bus, lun, 0, 0)) { status = CAM_RESRC_UNAVAIL; } else { mtx_sleep(&status, &isp->isp_lock, PRIBIO, "isp_disable_lun", 0); } isp->isp_osinfo.rptr = NULL; done: if (status == CAM_REQ_CMP) { tptr->enabled = 0; if (is_any_lun_enabled(isp, bus) == 0) { if (isp_disable_target_mode(isp, bus)) { status = CAM_REQ_CMP_ERR; } } } ccb->ccb_h.status = status; if (status == CAM_REQ_CMP) { destroy_lun_state(isp, tptr); xpt_print(ccb->ccb_h.path, "lun now disabled for target mode\n"); } else { if (tptr) rls_lun_statep(isp, tptr); } isp_tmunlk(isp); xpt_done(ccb); } static int isp_enable_target_mode(ispsoftc_t *isp, int bus) { int tm_enabled; ISP_GET_PC(isp, bus, tm_enabled, tm_enabled); if (tm_enabled != 0) { return (0); } if (IS_SCSI(isp)) { mbreg_t mbs; MBSINIT(&mbs, MBOX_ENABLE_TARGET_MODE, MBLOGALL, 0); mbs.param[0] = MBOX_ENABLE_TARGET_MODE; mbs.param[1] = ENABLE_TARGET_FLAG|ENABLE_TQING_FLAG; mbs.param[2] = bus << 7; if (isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs) < 0 || mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, "Unable to enable Target Role on Bus %d", bus); return (EIO); } } ISP_SET_PC(isp, bus, tm_enabled, 1); isp_prt(isp, ISP_LOGINFO, "Target Role enabled on Bus %d", bus); return (0); } static int isp_disable_target_mode(ispsoftc_t *isp, int bus) { int tm_enabled; ISP_GET_PC(isp, bus, tm_enabled, tm_enabled); if (tm_enabled == 0) { return (0); } if (IS_SCSI(isp)) { mbreg_t mbs; MBSINIT(&mbs, MBOX_ENABLE_TARGET_MODE, MBLOGALL, 0); mbs.param[2] = bus << 7; if (isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs) < 0 || mbs.param[0] != MBOX_COMMAND_COMPLETE) { isp_prt(isp, ISP_LOGERR, "Unable to disable Target Role on Bus %d", bus); return (EIO); } } ISP_SET_PC(isp, bus, tm_enabled, 0); isp_prt(isp, ISP_LOGINFO, "Target Role disabled on Bus %d", bus); return (0); } static void isp_ledone(ispsoftc_t *isp, lun_entry_t *lep) { uint32_t *rptr; rptr = isp->isp_osinfo.rptr; if (lep->le_status != LUN_OK) { isp_prt(isp, ISP_LOGERR, "ENABLE/MODIFY LUN returned 0x%x", lep->le_status); if (rptr) { *rptr = CAM_REQ_CMP_ERR; wakeup_one(rptr); } } else { if (rptr) { *rptr = CAM_REQ_CMP; wakeup_one(rptr); } } } static void isp_target_start_ctio(ispsoftc_t *isp, union ccb *ccb, enum Start_Ctio_How how) { int fctape, sendstatus, resid; tstate_t *tptr; fcparam *fcp; atio_private_data_t *atp; struct ccb_scsiio *cso; uint32_t dmaresult, handle, xfrlen, sense_length, tmp; uint8_t local[QENTRY_LEN]; tptr = get_lun_statep(isp, XS_CHANNEL(ccb), XS_LUN(ccb)); if (tptr == NULL) { tptr = get_lun_statep(isp, XS_CHANNEL(ccb), CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] cannot find tstate pointer", __func__, ccb->csio.tag_id); ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } } isp_prt(isp, ISP_LOGTDEBUG0, "%s: ENTRY[0x%x] how %u xfrlen %u sendstatus %d sense_len %u", __func__, ccb->csio.tag_id, how, ccb->csio.dxfer_len, (ccb->ccb_h.flags & CAM_SEND_STATUS) != 0, ((ccb->ccb_h.flags & CAM_SEND_SENSE)? ccb->csio.sense_len : 0)); switch (how) { case FROM_TIMER: case FROM_CAM: /* * Insert at the tail of the list, if any, waiting CTIO CCBs */ TAILQ_INSERT_TAIL(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); break; case FROM_SRR: case FROM_CTIO_DONE: TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); break; } while (TAILQ_FIRST(&tptr->waitq) != NULL) { ccb = (union ccb *) TAILQ_FIRST(&tptr->waitq); TAILQ_REMOVE(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); cso = &ccb->csio; xfrlen = cso->dxfer_len; if (xfrlen == 0) { if ((ccb->ccb_h.flags & CAM_SEND_STATUS) == 0) { ISP_PATH_PRT(isp, ISP_LOGERR, ccb->ccb_h.path, "a data transfer length of zero but no status to send is wrong\n"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); continue; } } atp = isp_find_atpd(isp, tptr, cso->tag_id); if (atp == NULL) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] cannot find private data adjunct in %s", __func__, cso->tag_id, __func__); isp_dump_atpd(isp, tptr); ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); continue; } /* * Is this command a dead duck? */ if (atp->dead) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] not sending a CTIO for a dead command", __func__, cso->tag_id); ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); continue; } /* * Check to make sure we're still in target mode. */ fcp = FCPARAM(isp, XS_CHANNEL(ccb)); if ((fcp->role & ISP_ROLE_TARGET) == 0) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] stopping sending a CTIO because we're no longer in target mode", __func__, cso->tag_id); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); continue; } /* * We're only handling ATPD_CCB_OUTSTANDING outstanding CCB at a time (one of which * could be split into two CTIOs to split data and status). */ if (atp->ctcnt >= ATPD_CCB_OUTSTANDING) { isp_prt(isp, ISP_LOGTINFO, "[0x%x] handling only %d CCBs at a time (flags for this ccb: 0x%x)", cso->tag_id, ATPD_CCB_OUTSTANDING, ccb->ccb_h.flags); TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); break; } /* * Does the initiator expect FC-Tape style responses? */ if ((atp->word3 & PRLI_WD3_RETRY) && fcp->fctape_enabled) { fctape = 1; } else { fctape = 0; } /* * If we already did the data xfer portion of a CTIO that sends data * and status, don't do it again and do the status portion now. */ if (atp->sendst) { isp_prt(isp, ISP_LOGTINFO, "[0x%x] now sending synthesized status orig_dl=%u xfered=%u bit=%u", cso->tag_id, atp->orig_datalen, atp->bytes_xfered, atp->bytes_in_transit); xfrlen = 0; /* we already did the data transfer */ atp->sendst = 0; } if (ccb->ccb_h.flags & CAM_SEND_STATUS) { sendstatus = 1; } else { sendstatus = 0; } if (ccb->ccb_h.flags & CAM_SEND_SENSE) { KASSERT((sendstatus != 0), ("how can you have CAM_SEND_SENSE w/o CAM_SEND_STATUS?")); /* * Sense length is not the entire sense data structure size. Periph * drivers don't seem to be setting sense_len to reflect the actual * size. We'll peek inside to get the right amount. */ sense_length = cso->sense_len; /* * This 'cannot' happen */ if (sense_length > (XCMD_SIZE - MIN_FCP_RESPONSE_SIZE)) { sense_length = XCMD_SIZE - MIN_FCP_RESPONSE_SIZE; } } else { sense_length = 0; } memset(local, 0, QENTRY_LEN); /* * Check for overflow */ tmp = atp->bytes_xfered + atp->bytes_in_transit + xfrlen; if (tmp > atp->orig_datalen) { isp_prt(isp, ISP_LOGERR, "%s: [0x%x] data overflow by %u bytes", __func__, cso->tag_id, tmp - atp->orig_datalen); ccb->ccb_h.status = CAM_DATA_RUN_ERR; xpt_done(ccb); continue; } if (IS_24XX(isp)) { ct7_entry_t *cto = (ct7_entry_t *) local; cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7; cto->ct_header.rqs_entry_count = 1; cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM; ATPD_SET_SEQNO(cto, atp); cto->ct_nphdl = atp->nphdl; cto->ct_rxid = atp->tag; cto->ct_iid_lo = atp->portid; cto->ct_iid_hi = atp->portid >> 16; cto->ct_oxid = atp->oxid; cto->ct_vpidx = ISP_GET_VPIDX(isp, XS_CHANNEL(ccb)); cto->ct_timeout = 120; cto->ct_flags = atp->tattr << CT7_TASK_ATTR_SHIFT; /* * Mode 1, status, no data. Only possible when we are sending status, have * no data to transfer, and any sense data can fit into a ct7_entry_t. * * Mode 2, status, no data. We have to use this in the case that * the sense data won't fit into a ct7_entry_t. * */ if (sendstatus && xfrlen == 0) { cto->ct_flags |= CT7_SENDSTATUS | CT7_NO_DATA; resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit; if (sense_length <= MAXRESPLEN_24XX) { if (resid < 0) { cto->ct_resid = -resid; } else if (resid > 0) { cto->ct_resid = resid; } cto->ct_flags |= CT7_FLAG_MODE1; cto->ct_scsi_status = cso->scsi_status; if (resid < 0) { cto->ct_scsi_status |= (FCP_RESID_OVERFLOW << 8); } else if (resid > 0) { cto->ct_scsi_status |= (FCP_RESID_UNDERFLOW << 8); } if (fctape) { cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF; } if (sense_length) { cto->ct_scsi_status |= (FCP_SNSLEN_VALID << 8); cto->rsp.m1.ct_resplen = cto->ct_senselen = sense_length; memcpy(cto->rsp.m1.ct_resp, &cso->sense_data, sense_length); } } else { bus_addr_t addr; char buf[XCMD_SIZE]; fcp_rsp_iu_t *rp; if (atp->ests == NULL) { atp->ests = isp_get_ecmd(isp); if (atp->ests == NULL) { TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); break; } } memset(buf, 0, sizeof (buf)); rp = (fcp_rsp_iu_t *)buf; if (fctape) { cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF; rp->fcp_rsp_bits |= FCP_CONF_REQ; } cto->ct_flags |= CT7_FLAG_MODE2; rp->fcp_rsp_scsi_status = cso->scsi_status; if (resid < 0) { rp->fcp_rsp_resid = -resid; rp->fcp_rsp_bits |= FCP_RESID_OVERFLOW; } else if (resid > 0) { rp->fcp_rsp_resid = resid; rp->fcp_rsp_bits |= FCP_RESID_UNDERFLOW; } if (sense_length) { rp->fcp_rsp_snslen = sense_length; cto->ct_senselen = sense_length; rp->fcp_rsp_bits |= FCP_SNSLEN_VALID; isp_put_fcp_rsp_iu(isp, rp, atp->ests); memcpy(((fcp_rsp_iu_t *)atp->ests)->fcp_rsp_extra, &cso->sense_data, sense_length); } else { isp_put_fcp_rsp_iu(isp, rp, atp->ests); } if (isp->isp_dblev & ISP_LOGTDEBUG1) { isp_print_bytes(isp, "FCP Response Frame After Swizzling", MIN_FCP_RESPONSE_SIZE + sense_length, atp->ests); } addr = isp->isp_osinfo.ecmd_dma; addr += ((((isp_ecmd_t *)atp->ests) - isp->isp_osinfo.ecmd_base) * XCMD_SIZE); isp_prt(isp, ISP_LOGTDEBUG0, "%s: ests base %p vaddr %p ecmd_dma %jx addr %jx len %u", __func__, isp->isp_osinfo.ecmd_base, atp->ests, (uintmax_t) isp->isp_osinfo.ecmd_dma, (uintmax_t)addr, MIN_FCP_RESPONSE_SIZE + sense_length); cto->rsp.m2.ct_datalen = MIN_FCP_RESPONSE_SIZE + sense_length; cto->rsp.m2.ct_fcp_rsp_iudata.ds_base = DMA_LO32(addr); cto->rsp.m2.ct_fcp_rsp_iudata.ds_basehi = DMA_HI32(addr); cto->rsp.m2.ct_fcp_rsp_iudata.ds_count = MIN_FCP_RESPONSE_SIZE + sense_length; } if (sense_length) { isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d slen %u sense: %x %x/%x/%x", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, cto->ct_resid, sense_length, cso->sense_data.error_code, cso->sense_data.sense_buf[1], cso->sense_data.sense_buf[11], cso->sense_data.sense_buf[12]); } else { isp_prt(isp, ISP_LOGDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, cto->ct_resid); } atp->state = ATPD_STATE_LAST_CTIO; } /* * Mode 0 data transfers, *possibly* with status. */ if (xfrlen != 0) { cto->ct_flags |= CT7_FLAG_MODE0; if ((cso->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { cto->ct_flags |= CT7_DATA_IN; } else { cto->ct_flags |= CT7_DATA_OUT; } cto->rsp.m0.reloff = atp->bytes_xfered + atp->bytes_in_transit; cto->rsp.m0.ct_xfrlen = xfrlen; #ifdef DEBUG if (ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame && xfrlen > ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame) { isp_prt(isp, ISP_LOGWARN, "%s: truncating data frame with xfrlen %d to %d", __func__, xfrlen, xfrlen - (xfrlen >> 2)); ISP_FC_PC(isp, XS_CHANNEL(ccb))->inject_lost_data_frame = 0; cto->rsp.m0.ct_xfrlen -= xfrlen >> 2; } #endif if (sendstatus) { resid = atp->orig_datalen - atp->bytes_xfered - xfrlen; if (cso->scsi_status == SCSI_STATUS_OK && resid == 0 /* && fctape == 0 */) { cto->ct_flags |= CT7_SENDSTATUS; atp->state = ATPD_STATE_LAST_CTIO; if (fctape) { cto->ct_flags |= CT7_CONFIRM|CT7_EXPLCT_CONF; } } else { atp->sendst = 1; /* send status later */ cto->ct_header.rqs_seqno &= ~ATPD_SEQ_NOTIFY_CAM; atp->state = ATPD_STATE_CTIO; } } else { atp->state = ATPD_STATE_CTIO; } isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO7[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x xfrlen=%u off=%u", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cto->ct_scsi_status, cto->ct_flags, xfrlen, atp->bytes_xfered); } } else if (IS_FC(isp)) { ct2_entry_t *cto = (ct2_entry_t *) local; if (isp->isp_osinfo.sixtyfourbit) cto->ct_header.rqs_entry_type = RQSTYPE_CTIO3; else cto->ct_header.rqs_entry_type = RQSTYPE_CTIO2; cto->ct_header.rqs_entry_count = 1; cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM; ATPD_SET_SEQNO(cto, atp); if (ISP_CAP_2KLOGIN(isp)) { ((ct2e_entry_t *)cto)->ct_iid = atp->nphdl; } else { cto->ct_iid = atp->nphdl; if (ISP_CAP_SCCFW(isp) == 0) { cto->ct_lun = ccb->ccb_h.target_lun; } } cto->ct_timeout = 10; cto->ct_rxid = cso->tag_id; /* * Mode 1, status, no data. Only possible when we are sending status, have * no data to transfer, and the sense length can fit in the ct7_entry. * * Mode 2, status, no data. We have to use this in the case the response * length won't fit into a ct2_entry_t. * * We'll fill out this structure with information as if this were a * Mode 1. The hardware layer will create the Mode 2 FCP RSP IU as * needed based upon this. */ if (sendstatus && xfrlen == 0) { cto->ct_flags |= CT2_SENDSTATUS | CT2_NO_DATA; resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit; if (sense_length <= MAXRESPLEN) { if (resid < 0) { cto->ct_resid = -resid; } else if (resid > 0) { cto->ct_resid = resid; } cto->ct_flags |= CT2_FLAG_MODE1; cto->rsp.m1.ct_scsi_status = cso->scsi_status; if (resid < 0) { cto->rsp.m1.ct_scsi_status |= CT2_DATA_OVER; } else if (resid > 0) { cto->rsp.m1.ct_scsi_status |= CT2_DATA_UNDER; } if (fctape) { cto->ct_flags |= CT2_CONFIRM; } if (sense_length) { cto->rsp.m1.ct_scsi_status |= CT2_SNSLEN_VALID; cto->rsp.m1.ct_resplen = cto->rsp.m1.ct_senselen = sense_length; memcpy(cto->rsp.m1.ct_resp, &cso->sense_data, sense_length); } } else { bus_addr_t addr; char buf[XCMD_SIZE]; fcp_rsp_iu_t *rp; if (atp->ests == NULL) { atp->ests = isp_get_ecmd(isp); if (atp->ests == NULL) { TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); break; } } memset(buf, 0, sizeof (buf)); rp = (fcp_rsp_iu_t *)buf; if (fctape) { cto->ct_flags |= CT2_CONFIRM; rp->fcp_rsp_bits |= FCP_CONF_REQ; } cto->ct_flags |= CT2_FLAG_MODE2; rp->fcp_rsp_scsi_status = cso->scsi_status; if (resid < 0) { rp->fcp_rsp_resid = -resid; rp->fcp_rsp_bits |= FCP_RESID_OVERFLOW; } else if (resid > 0) { rp->fcp_rsp_resid = resid; rp->fcp_rsp_bits |= FCP_RESID_UNDERFLOW; } if (sense_length) { rp->fcp_rsp_snslen = sense_length; rp->fcp_rsp_bits |= FCP_SNSLEN_VALID; isp_put_fcp_rsp_iu(isp, rp, atp->ests); memcpy(((fcp_rsp_iu_t *)atp->ests)->fcp_rsp_extra, &cso->sense_data, sense_length); } else { isp_put_fcp_rsp_iu(isp, rp, atp->ests); } if (isp->isp_dblev & ISP_LOGTDEBUG1) { isp_print_bytes(isp, "FCP Response Frame After Swizzling", MIN_FCP_RESPONSE_SIZE + sense_length, atp->ests); } addr = isp->isp_osinfo.ecmd_dma; addr += ((((isp_ecmd_t *)atp->ests) - isp->isp_osinfo.ecmd_base) * XCMD_SIZE); isp_prt(isp, ISP_LOGTDEBUG0, "%s: ests base %p vaddr %p ecmd_dma %jx addr %jx len %u", __func__, isp->isp_osinfo.ecmd_base, atp->ests, (uintmax_t) isp->isp_osinfo.ecmd_dma, (uintmax_t)addr, MIN_FCP_RESPONSE_SIZE + sense_length); cto->rsp.m2.ct_datalen = MIN_FCP_RESPONSE_SIZE + sense_length; if (isp->isp_osinfo.sixtyfourbit) { cto->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base = DMA_LO32(addr); cto->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi = DMA_HI32(addr); cto->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count = MIN_FCP_RESPONSE_SIZE + sense_length; } else { cto->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base = DMA_LO32(addr); cto->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count = MIN_FCP_RESPONSE_SIZE + sense_length; } } if (sense_length) { isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO2[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d sense: %x %x/%x/%x", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cso->scsi_status, cto->ct_flags, cto->ct_resid, cso->sense_data.error_code, cso->sense_data.sense_buf[1], cso->sense_data.sense_buf[11], cso->sense_data.sense_buf[12]); } else { isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO2[0x%x] seq %u nc %d CDB0=%x sstatus=0x%x flags=0x%x resid=%d", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cso->scsi_status, cto->ct_flags, cto->ct_resid); } atp->state = ATPD_STATE_LAST_CTIO; } if (xfrlen != 0) { cto->ct_flags |= CT2_FLAG_MODE0; if ((cso->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { cto->ct_flags |= CT2_DATA_IN; } else { cto->ct_flags |= CT2_DATA_OUT; } cto->ct_reloff = atp->bytes_xfered + atp->bytes_in_transit; cto->rsp.m0.ct_xfrlen = xfrlen; if (sendstatus) { resid = atp->orig_datalen - atp->bytes_xfered - xfrlen; if (cso->scsi_status == SCSI_STATUS_OK && resid == 0 /*&& fctape == 0*/) { cto->ct_flags |= CT2_SENDSTATUS; atp->state = ATPD_STATE_LAST_CTIO; if (fctape) { cto->ct_flags |= CT2_CONFIRM; } } else { atp->sendst = 1; /* send status later */ cto->ct_header.rqs_seqno &= ~ATPD_SEQ_NOTIFY_CAM; atp->state = ATPD_STATE_CTIO; } } else { atp->state = ATPD_STATE_CTIO; } } isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO2[%x] seq %u nc %d CDB0=%x scsi status %x flags %x resid %d xfrlen %u offset %u", __func__, cto->ct_rxid, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), atp->cdb0, cso->scsi_status, cto->ct_flags, cto->ct_resid, cso->dxfer_len, atp->bytes_xfered); } else { ct_entry_t *cto = (ct_entry_t *) local; cto->ct_header.rqs_entry_type = RQSTYPE_CTIO; cto->ct_header.rqs_entry_count = 1; cto->ct_header.rqs_seqno |= ATPD_SEQ_NOTIFY_CAM; ATPD_SET_SEQNO(cto, atp); cto->ct_iid = cso->init_id; cto->ct_iid |= XS_CHANNEL(ccb) << 7; cto->ct_tgt = ccb->ccb_h.target_id; cto->ct_lun = ccb->ccb_h.target_lun; cto->ct_fwhandle = cso->tag_id; if (atp->rxid) { cto->ct_tag_val = atp->rxid; cto->ct_flags |= CT_TQAE; } if (ccb->ccb_h.flags & CAM_DIS_DISCONNECT) { cto->ct_flags |= CT_NODISC; } if (cso->dxfer_len == 0) { cto->ct_flags |= CT_NO_DATA; } else if ((cso->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { cto->ct_flags |= CT_DATA_IN; } else { cto->ct_flags |= CT_DATA_OUT; } if (ccb->ccb_h.flags & CAM_SEND_STATUS) { cto->ct_flags |= CT_SENDSTATUS|CT_CCINCR; cto->ct_scsi_status = cso->scsi_status; cto->ct_resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit - xfrlen; isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO[%x] seq %u nc %d scsi status %x resid %d tag_id %x", __func__, cto->ct_fwhandle, ATPD_GET_SEQNO(cto), ATPD_GET_NCAM(cto), cso->scsi_status, cso->resid, cso->tag_id); } ccb->ccb_h.flags &= ~CAM_SEND_SENSE; cto->ct_timeout = 10; } if (isp_get_pcmd(isp, ccb)) { ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "out of PCMDs\n"); TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); break; } if (isp_allocate_xs_tgt(isp, ccb, &handle)) { ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "No XFLIST pointers for %s\n", __func__); TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); isp_free_pcmd(isp, ccb); break; } atp->bytes_in_transit += xfrlen; PISP_PCMD(ccb)->datalen = xfrlen; /* * Call the dma setup routines for this entry (and any subsequent * CTIOs) if there's data to move, and then tell the f/w it's got * new things to play with. As with isp_start's usage of DMA setup, * any swizzling is done in the machine dependent layer. Because * of this, we put the request onto the queue area first in native * format. */ if (IS_24XX(isp)) { ct7_entry_t *cto = (ct7_entry_t *) local; cto->ct_syshandle = handle; } else if (IS_FC(isp)) { ct2_entry_t *cto = (ct2_entry_t *) local; cto->ct_syshandle = handle; } else { ct_entry_t *cto = (ct_entry_t *) local; cto->ct_syshandle = handle; } dmaresult = ISP_DMASETUP(isp, cso, (ispreq_t *) local); if (dmaresult != CMD_QUEUED) { isp_destroy_tgt_handle(isp, handle); isp_free_pcmd(isp, ccb); if (dmaresult == CMD_EAGAIN) { TAILQ_INSERT_HEAD(&tptr->waitq, &ccb->ccb_h, periph_links.tqe); break; } ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); continue; } isp->isp_nactive++; ccb->ccb_h.status = CAM_REQ_INPROG | CAM_SIM_QUEUED; if (xfrlen) { ccb->ccb_h.spriv_field0 = atp->bytes_xfered; } else { ccb->ccb_h.spriv_field0 = ~0; } atp->ctcnt++; atp->seqno++; } rls_lun_statep(isp, tptr); } static void isp_refire_putback_atio(void *arg) { union ccb *ccb = arg; ISP_ASSERT_LOCKED((ispsoftc_t *)XS_ISP(ccb)); isp_target_putback_atio(ccb); } static void isp_refire_notify_ack(void *arg) { isp_tna_t *tp = arg; ispsoftc_t *isp = tp->isp; ISP_ASSERT_LOCKED(isp); if (isp_notify_ack(isp, tp->not)) { callout_schedule(&tp->timer, 5); } else { free(tp, M_DEVBUF); } } static void isp_target_putback_atio(union ccb *ccb) { ispsoftc_t *isp; struct ccb_scsiio *cso; void *qe; isp = XS_ISP(ccb); qe = isp_getrqentry(isp); if (qe == NULL) { xpt_print(ccb->ccb_h.path, "%s: Request Queue Overflow\n", __func__); callout_reset(&PISP_PCMD(ccb)->wdog, 10, isp_refire_putback_atio, ccb); return; } memset(qe, 0, QENTRY_LEN); cso = &ccb->csio; if (IS_FC(isp)) { at2_entry_t local, *at = &local; ISP_MEMZERO(at, sizeof (at2_entry_t)); at->at_header.rqs_entry_type = RQSTYPE_ATIO2; at->at_header.rqs_entry_count = 1; if (ISP_CAP_SCCFW(isp)) { at->at_scclun = (uint16_t) ccb->ccb_h.target_lun; #if __FreeBSD_version < 1000700 if (at->at_scclun >= 256) at->at_scclun |= 0x4000; #endif } else { at->at_lun = (uint8_t) ccb->ccb_h.target_lun; } at->at_status = CT_OK; at->at_rxid = cso->tag_id; at->at_iid = cso->ccb_h.target_id; isp_put_atio2(isp, at, qe); } else { at_entry_t local, *at = &local; ISP_MEMZERO(at, sizeof (at_entry_t)); at->at_header.rqs_entry_type = RQSTYPE_ATIO; at->at_header.rqs_entry_count = 1; at->at_iid = cso->init_id; at->at_iid |= XS_CHANNEL(ccb) << 7; at->at_tgt = cso->ccb_h.target_id; at->at_lun = cso->ccb_h.target_lun; at->at_status = CT_OK; at->at_tag_val = AT_GET_TAG(cso->tag_id); at->at_handle = AT_GET_HANDLE(cso->tag_id); isp_put_atio(isp, at, qe); } ISP_TDQE(isp, "isp_target_putback_atio", isp->isp_reqidx, qe); ISP_SYNC_REQUEST(isp); isp_complete_ctio(ccb); } static void isp_complete_ctio(union ccb *ccb) { if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_INPROG) { ccb->ccb_h.status &= ~CAM_SIM_QUEUED; xpt_done(ccb); } } /* * Handle ATIO stuff that the generic code can't. * This means handling CDBs. */ static void isp_handle_platform_atio(ispsoftc_t *isp, at_entry_t *aep) { tstate_t *tptr; int status, bus; struct ccb_accept_tio *atiop; atio_private_data_t *atp; /* * The firmware status (except for the QLTM_SVALID bit) * indicates why this ATIO was sent to us. * * If QLTM_SVALID is set, the firmware has recommended Sense Data. * * If the DISCONNECTS DISABLED bit is set in the flags field, * we're still connected on the SCSI bus. */ status = aep->at_status; if ((status & ~QLTM_SVALID) == AT_PHASE_ERROR) { /* * Bus Phase Sequence error. We should have sense data * suggested by the f/w. I'm not sure quite yet what * to do about this for CAM. */ isp_prt(isp, ISP_LOGWARN, "PHASE ERROR"); isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0); return; } if ((status & ~QLTM_SVALID) != AT_CDB) { isp_prt(isp, ISP_LOGWARN, "bad atio (0x%x) leaked to platform", status); isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0); return; } bus = GET_BUS_VAL(aep->at_iid); tptr = get_lun_statep(isp, bus, aep->at_lun); if (tptr == NULL) { tptr = get_lun_statep(isp, bus, CAM_LUN_WILDCARD); if (tptr == NULL) { /* * Because we can't autofeed sense data back with * a command for parallel SCSI, we can't give back * a CHECK CONDITION. We'll give back a BUSY status * instead. This works out okay because the only * time we should, in fact, get this, is in the * case that somebody configured us without the * blackhole driver, so they get what they deserve. */ isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0); return; } } atp = isp_get_atpd(isp, tptr, aep->at_handle); atiop = (struct ccb_accept_tio *) SLIST_FIRST(&tptr->atios); if (atiop == NULL || atp == NULL) { /* * Because we can't autofeed sense data back with * a command for parallel SCSI, we can't give back * a CHECK CONDITION. We'll give back a QUEUE FULL status * instead. This works out okay because the only time we * should, in fact, get this, is in the case that we've * run out of ATIOS. */ xpt_print(tptr->owner, "no %s for lun %d from initiator %d\n", (atp == NULL && atiop == NULL)? "ATIOs *or* ATPS" : ((atp == NULL)? "ATPs" : "ATIOs"), aep->at_lun, aep->at_iid); isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0); if (atp) { isp_put_atpd(isp, tptr, atp); } rls_lun_statep(isp, tptr); return; } atp->rxid = aep->at_tag_val; atp->state = ATPD_STATE_ATIO; SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle); tptr->atio_count--; ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, atiop->ccb_h.path, "Take FREE ATIO count now %d\n", tptr->atio_count); atiop->ccb_h.target_id = aep->at_tgt; atiop->ccb_h.target_lun = aep->at_lun; if (aep->at_flags & AT_NODISC) { atiop->ccb_h.flags |= CAM_DIS_DISCONNECT; } else { atiop->ccb_h.flags &= ~CAM_DIS_DISCONNECT; } if (status & QLTM_SVALID) { size_t amt = ISP_MIN(QLTM_SENSELEN, sizeof (atiop->sense_data)); atiop->sense_len = amt; ISP_MEMCPY(&atiop->sense_data, aep->at_sense, amt); } else { atiop->sense_len = 0; } atiop->init_id = GET_IID_VAL(aep->at_iid); atiop->cdb_len = aep->at_cdblen; ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cdb, aep->at_cdblen); atiop->ccb_h.status = CAM_CDB_RECVD; /* * Construct a tag 'id' based upon tag value (which may be 0..255) * and the handle (which we have to preserve). */ atiop->tag_id = atp->tag; if (aep->at_flags & AT_TQAE) { atiop->tag_action = aep->at_tag_type; atiop->ccb_h.status |= CAM_TAG_ACTION_VALID; } atp->orig_datalen = 0; atp->bytes_xfered = 0; atp->lun = aep->at_lun; atp->nphdl = aep->at_iid; atp->portid = PORT_NONE; atp->oxid = 0; atp->cdb0 = atiop->cdb_io.cdb_bytes[0]; atp->tattr = aep->at_tag_type; atp->state = ATPD_STATE_CAM; isp_prt(isp, ISP_LOGTDEBUG0, "ATIO[0x%x] CDB=0x%x lun %d", aep->at_tag_val, atp->cdb0, atp->lun); rls_lun_statep(isp, tptr); } static void isp_handle_platform_atio2(ispsoftc_t *isp, at2_entry_t *aep) { lun_id_t lun; fcportdb_t *lp; tstate_t *tptr; struct ccb_accept_tio *atiop; uint16_t nphdl; atio_private_data_t *atp; inot_private_data_t *ntp; /* * The firmware status (except for the QLTM_SVALID bit) * indicates why this ATIO was sent to us. * * If QLTM_SVALID is set, the firmware has recommended Sense Data. */ if ((aep->at_status & ~QLTM_SVALID) != AT_CDB) { isp_prt(isp, ISP_LOGWARN, "bogus atio (0x%x) leaked to platform", aep->at_status); isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0); return; } if (ISP_CAP_SCCFW(isp)) { lun = aep->at_scclun; #if __FreeBSD_version < 1000700 lun &= 0x3fff; #endif } else { lun = aep->at_lun; } if (ISP_CAP_2KLOGIN(isp)) { nphdl = ((at2e_entry_t *)aep)->at_iid; } else { nphdl = aep->at_iid; } tptr = get_lun_statep(isp, 0, lun); if (tptr == NULL) { tptr = get_lun_statep(isp, 0, CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: [0x%x] no state pointer for lun %jx or wildcard", __func__, aep->at_rxid, (uintmax_t)lun); if (lun == 0) { isp_endcmd(isp, aep, SCSI_STATUS_BUSY, 0); } else { isp_endcmd(isp, aep, SCSI_STATUS_CHECK_COND | ECMD_SVALID | (0x5 << 12) | (0x25 << 16), 0); } return; } } /* * Start any commands pending resources first. */ if (tptr->restart_queue) { inot_private_data_t *restart_queue = tptr->restart_queue; tptr->restart_queue = NULL; while (restart_queue) { ntp = restart_queue; restart_queue = ntp->rd.nt.nt_hba; isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at2_entry_t *)ntp->rd.data)->at_rxid); isp_handle_platform_atio2(isp, (at2_entry_t *) ntp->rd.data); isp_put_ntpd(isp, tptr, ntp); /* * If a recursion caused the restart queue to start to fill again, * stop and splice the new list on top of the old list and restore * it and go to noresrc. */ if (tptr->restart_queue) { ntp = tptr->restart_queue; tptr->restart_queue = restart_queue; while (restart_queue->rd.nt.nt_hba) { restart_queue = restart_queue->rd.nt.nt_hba; } restart_queue->rd.nt.nt_hba = ntp; goto noresrc; } } } atiop = (struct ccb_accept_tio *) SLIST_FIRST(&tptr->atios); if (atiop == NULL) { goto noresrc; } atp = isp_get_atpd(isp, tptr, aep->at_rxid); if (atp == NULL) { goto noresrc; } atp->state = ATPD_STATE_ATIO; SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle); tptr->atio_count--; isp_prt(isp, ISP_LOGTDEBUG2, "Take FREE ATIO count now %d", tptr->atio_count); atiop->ccb_h.target_id = FCPARAM(isp, 0)->isp_loopid; atiop->ccb_h.target_lun = lun; /* * We don't get 'suggested' sense data as we do with SCSI cards. */ atiop->sense_len = 0; /* * If we're not in the port database, add ourselves. */ if (IS_2100(isp)) atiop->init_id = nphdl; else { if ((isp_find_pdb_by_handle(isp, 0, nphdl, &lp) == 0 || lp->state == FC_PORTDB_STATE_ZOMBIE)) { uint64_t wwpn = (((uint64_t) aep->at_wwpn[0]) << 48) | (((uint64_t) aep->at_wwpn[1]) << 32) | (((uint64_t) aep->at_wwpn[2]) << 16) | (((uint64_t) aep->at_wwpn[3]) << 0); isp_add_wwn_entry(isp, 0, wwpn, INI_NONE, nphdl, PORT_ANY, 0); isp_find_pdb_by_handle(isp, 0, nphdl, &lp); } atiop->init_id = FC_PORTDB_TGT(isp, 0, lp); } atiop->cdb_len = ATIO2_CDBLEN; ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cdb, ATIO2_CDBLEN); atiop->ccb_h.status = CAM_CDB_RECVD; atiop->tag_id = atp->tag; switch (aep->at_taskflags & ATIO2_TC_ATTR_MASK) { case ATIO2_TC_ATTR_SIMPLEQ: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_SIMPLE_Q_TAG; break; case ATIO2_TC_ATTR_HEADOFQ: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_HEAD_OF_Q_TAG; break; case ATIO2_TC_ATTR_ORDERED: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_ORDERED_Q_TAG; break; case ATIO2_TC_ATTR_ACAQ: /* ?? */ case ATIO2_TC_ATTR_UNTAGGED: default: atiop->tag_action = 0; break; } atp->orig_datalen = aep->at_datalen; atp->bytes_xfered = 0; atp->lun = lun; atp->nphdl = nphdl; atp->sid = PORT_ANY; atp->oxid = aep->at_oxid; atp->cdb0 = aep->at_cdb[0]; atp->tattr = aep->at_taskflags & ATIO2_TC_ATTR_MASK; atp->state = ATPD_STATE_CAM; xpt_done((union ccb *)atiop); isp_prt(isp, ISP_LOGTDEBUG0, "ATIO2[0x%x] CDB=0x%x lun %jx datalen %u", aep->at_rxid, atp->cdb0, (uintmax_t)lun, atp->orig_datalen); rls_lun_statep(isp, tptr); return; noresrc: ntp = isp_get_ntpd(isp, tptr); if (ntp == NULL) { rls_lun_statep(isp, tptr); isp_endcmd(isp, aep, nphdl, 0, SCSI_STATUS_BUSY, 0); return; } memcpy(ntp->rd.data, aep, QENTRY_LEN); ntp->rd.nt.nt_hba = tptr->restart_queue; tptr->restart_queue = ntp; rls_lun_statep(isp, tptr); } static void isp_handle_platform_atio7(ispsoftc_t *isp, at7_entry_t *aep) { int cdbxlen; lun_id_t lun; uint16_t chan, nphdl = NIL_HANDLE; uint32_t did, sid; fcportdb_t *lp; tstate_t *tptr; struct ccb_accept_tio *atiop; atio_private_data_t *atp = NULL; atio_private_data_t *oatp; inot_private_data_t *ntp; did = (aep->at_hdr.d_id[0] << 16) | (aep->at_hdr.d_id[1] << 8) | aep->at_hdr.d_id[2]; sid = (aep->at_hdr.s_id[0] << 16) | (aep->at_hdr.s_id[1] << 8) | aep->at_hdr.s_id[2]; #if __FreeBSD_version >= 1000700 lun = CAM_EXTLUN_BYTE_SWIZZLE(be64dec(aep->at_cmnd.fcp_cmnd_lun)); #else lun = (aep->at_cmnd.fcp_cmnd_lun[0] & 0x3f << 8) | aep->at_cmnd.fcp_cmnd_lun[1]; #endif /* * Find the N-port handle, and Virtual Port Index for this command. * * If we can't, we're somewhat in trouble because we can't actually respond w/o that information. * We also, as a matter of course, need to know the WWN of the initiator too. */ if (ISP_CAP_MULTI_ID(isp) && isp->isp_nchan > 1) { /* * Find the right channel based upon D_ID */ isp_find_chan_by_did(isp, did, &chan); if (chan == ISP_NOCHAN) { NANOTIME_T now; /* * If we don't recognizer our own D_DID, terminate the exchange, unless we're within 2 seconds of startup * It's a bit tricky here as we need to stash this command *somewhere*. */ GET_NANOTIME(&now); if (NANOTIME_SUB(&isp->isp_init_time, &now) > 2000000000ULL) { isp_prt(isp, ISP_LOGWARN, "%s: [RX_ID 0x%x] D_ID %x not found on any channel- dropping", __func__, aep->at_rxid, did); isp_endcmd(isp, aep, NIL_HANDLE, ISP_NOCHAN, ECMD_TERMINATE, 0); return; } tptr = get_lun_statep(isp, 0, 0); if (tptr == NULL) { tptr = get_lun_statep(isp, 0, CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: [RX_ID 0x%x] D_ID %x not found on any channel and no tptr- dropping", __func__, aep->at_rxid, did); isp_endcmd(isp, aep, NIL_HANDLE, ISP_NOCHAN, ECMD_TERMINATE, 0); return; } } isp_prt(isp, ISP_LOGWARN, "%s: [RX_ID 0x%x] D_ID %x not found on any channel- deferring", __func__, aep->at_rxid, did); goto noresrc; } isp_prt(isp, ISP_LOGTDEBUG0, "%s: [RX_ID 0x%x] D_ID 0x%06x found on Chan %d for S_ID 0x%06x", __func__, aep->at_rxid, did, chan, sid); } else { chan = 0; } /* * Find the PDB entry for this initiator */ if (isp_find_pdb_by_sid(isp, chan, sid, &lp) == 0) { /* * If we're not in the port database terminate the exchange. */ isp_prt(isp, ISP_LOGTINFO, "%s: [RX_ID 0x%x] D_ID 0x%06x found on Chan %d for S_ID 0x%06x wasn't in PDB already", __func__, aep->at_rxid, did, chan, sid); isp_dump_portdb(isp, chan); isp_endcmd(isp, aep, NIL_HANDLE, chan, ECMD_TERMINATE, 0); return; } nphdl = lp->handle; /* * Get the tstate pointer */ tptr = get_lun_statep(isp, chan, lun); if (tptr == NULL) { tptr = get_lun_statep(isp, chan, CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: [0x%x] no state pointer for lun %jx or wildcard", __func__, aep->at_rxid, (uintmax_t)lun); if (lun == 0) { isp_endcmd(isp, aep, nphdl, SCSI_STATUS_BUSY, 0); } else { isp_endcmd(isp, aep, nphdl, chan, SCSI_STATUS_CHECK_COND | ECMD_SVALID | (0x5 << 12) | (0x25 << 16), 0); } return; } } /* * Start any commands pending resources first. */ if (tptr->restart_queue) { inot_private_data_t *restart_queue = tptr->restart_queue; tptr->restart_queue = NULL; while (restart_queue) { ntp = restart_queue; restart_queue = ntp->rd.nt.nt_hba; isp_prt(isp, ISP_LOGTDEBUG0, "%s: restarting resrc deprived %x", __func__, ((at7_entry_t *)ntp->rd.data)->at_rxid); isp_handle_platform_atio7(isp, (at7_entry_t *) ntp->rd.data); isp_put_ntpd(isp, tptr, ntp); /* * If a recursion caused the restart queue to start to fill again, * stop and splice the new list on top of the old list and restore * it and go to noresrc. */ if (tptr->restart_queue) { isp_prt(isp, ISP_LOGTDEBUG0, "%s: restart queue refilling", __func__); if (restart_queue) { ntp = tptr->restart_queue; tptr->restart_queue = restart_queue; while (restart_queue->rd.nt.nt_hba) { restart_queue = restart_queue->rd.nt.nt_hba; } restart_queue->rd.nt.nt_hba = ntp; } goto noresrc; } } } /* * If the f/w is out of resources, just send a BUSY status back. */ if (aep->at_rxid == AT7_NORESRC_RXID) { rls_lun_statep(isp, tptr); isp_endcmd(isp, aep, nphdl, chan, SCSI_BUSY, 0); return; } /* * If we're out of resources, just send a BUSY status back. */ atiop = (struct ccb_accept_tio *) SLIST_FIRST(&tptr->atios); if (atiop == NULL) { isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] out of atios", aep->at_rxid); goto noresrc; } oatp = isp_find_atpd(isp, tptr, aep->at_rxid); if (oatp) { isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] tag wraparound in isp_handle_platforms_atio7 (N-Port Handle 0x%04x S_ID 0x%04x OX_ID 0x%04x) oatp state %d", aep->at_rxid, nphdl, sid, aep->at_hdr.ox_id, oatp->state); /* * It's not a "no resource" condition- but we can treat it like one */ goto noresrc; } atp = isp_get_atpd(isp, tptr, aep->at_rxid); if (atp == NULL) { isp_prt(isp, ISP_LOGTDEBUG0, "[0x%x] out of atps", aep->at_rxid); goto noresrc; } atp->word3 = lp->prli_word3; atp->state = ATPD_STATE_ATIO; SLIST_REMOVE_HEAD(&tptr->atios, sim_links.sle); tptr->atio_count--; ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, atiop->ccb_h.path, "Take FREE ATIO count now %d\n", tptr->atio_count); atiop->init_id = FC_PORTDB_TGT(isp, chan, lp); atiop->ccb_h.target_id = FCPARAM(isp, chan)->isp_loopid; atiop->ccb_h.target_lun = lun; atiop->sense_len = 0; cdbxlen = aep->at_cmnd.fcp_cmnd_alen_datadir >> FCP_CMND_ADDTL_CDBLEN_SHIFT; if (cdbxlen) { isp_prt(isp, ISP_LOGWARN, "additional CDBLEN ignored"); } cdbxlen = sizeof (aep->at_cmnd.cdb_dl.sf.fcp_cmnd_cdb); ISP_MEMCPY(atiop->cdb_io.cdb_bytes, aep->at_cmnd.cdb_dl.sf.fcp_cmnd_cdb, cdbxlen); atiop->cdb_len = cdbxlen; atiop->ccb_h.status = CAM_CDB_RECVD; atiop->tag_id = atp->tag; switch (aep->at_cmnd.fcp_cmnd_task_attribute & FCP_CMND_TASK_ATTR_MASK) { case FCP_CMND_TASK_ATTR_SIMPLE: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_SIMPLE_Q_TAG; break; case FCP_CMND_TASK_ATTR_HEAD: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_HEAD_OF_Q_TAG; break; case FCP_CMND_TASK_ATTR_ORDERED: atiop->ccb_h.flags |= CAM_TAG_ACTION_VALID; atiop->tag_action = MSG_ORDERED_Q_TAG; break; default: /* FALLTHROUGH */ case FCP_CMND_TASK_ATTR_ACA: case FCP_CMND_TASK_ATTR_UNTAGGED: atiop->tag_action = 0; break; } atp->orig_datalen = aep->at_cmnd.cdb_dl.sf.fcp_cmnd_dl; atp->bytes_xfered = 0; atp->lun = lun; atp->nphdl = nphdl; atp->portid = sid; atp->oxid = aep->at_hdr.ox_id; atp->rxid = aep->at_hdr.rx_id; atp->cdb0 = atiop->cdb_io.cdb_bytes[0]; atp->tattr = aep->at_cmnd.fcp_cmnd_task_attribute & FCP_CMND_TASK_ATTR_MASK; atp->state = ATPD_STATE_CAM; isp_prt(isp, ISP_LOGTDEBUG0, "ATIO7[0x%x] CDB=0x%x lun %jx datalen %u", aep->at_rxid, atp->cdb0, (uintmax_t)lun, atp->orig_datalen); xpt_done((union ccb *)atiop); rls_lun_statep(isp, tptr); return; noresrc: if (atp) { isp_put_atpd(isp, tptr, atp); } ntp = isp_get_ntpd(isp, tptr); if (ntp == NULL) { rls_lun_statep(isp, tptr); isp_endcmd(isp, aep, nphdl, chan, SCSI_STATUS_BUSY, 0); return; } memcpy(ntp->rd.data, aep, QENTRY_LEN); ntp->rd.nt.nt_hba = tptr->restart_queue; tptr->restart_queue = ntp; rls_lun_statep(isp, tptr); } /* * Handle starting an SRR (sequence retransmit request) * We get here when we've gotten the immediate notify * and the return of all outstanding CTIOs for this * transaction. */ static void isp_handle_srr_start(ispsoftc_t *isp, tstate_t *tptr, atio_private_data_t *atp) { in_fcentry_24xx_t *inot; uint32_t srr_off, ccb_off, ccb_len, ccb_end; union ccb *ccb; inot = (in_fcentry_24xx_t *)atp->srr; srr_off = inot->in_srr_reloff_lo | (inot->in_srr_reloff_hi << 16); ccb = atp->srr_ccb; atp->srr_ccb = NULL; atp->nsrr++; if (ccb == NULL) { isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] null ccb", atp->tag); goto fail; } ccb_off = ccb->ccb_h.spriv_field0; ccb_len = ccb->csio.dxfer_len; ccb_end = (ccb_off == ~0)? ~0 : ccb_off + ccb_len; switch (inot->in_srr_iu) { case R_CTL_INFO_SOLICITED_DATA: /* * We have to restart a FCP_DATA data out transaction */ atp->sendst = 0; atp->bytes_xfered = srr_off; if (ccb_len == 0) { isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x but current CCB doesn't transfer data", atp->tag, srr_off); goto mdp; } if (srr_off < ccb_off || ccb_off > srr_off + ccb_len) { isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x not covered by current CCB data range [0x%x..0x%x]", atp->tag, srr_off, ccb_off, ccb_end); goto mdp; } isp_prt(isp, ISP_LOGWARN, "SRR[0x%x] SRR offset 0x%x covered by current CCB data range [0x%x..0x%x]", atp->tag, srr_off, ccb_off, ccb_end); break; case R_CTL_INFO_COMMAND_STATUS: isp_prt(isp, ISP_LOGTINFO, "SRR[0x%x] Got an FCP RSP SRR- resending status", atp->tag); atp->sendst = 1; /* * We have to restart a FCP_RSP IU transaction */ break; case R_CTL_INFO_DATA_DESCRIPTOR: /* * We have to restart an FCP DATA in transaction */ isp_prt(isp, ISP_LOGWARN, "Got an FCP DATA IN SRR- dropping"); goto fail; default: isp_prt(isp, ISP_LOGWARN, "Got an unknown information (%x) SRR- dropping", inot->in_srr_iu); goto fail; } /* * We can't do anything until this is acked, so we might as well start it now. * We aren't going to do the usual asynchronous ack issue because we need * to make sure this gets on the wire first. */ if (isp_notify_ack(isp, inot)) { isp_prt(isp, ISP_LOGWARN, "could not push positive ack for SRR- you lose"); goto fail; } isp_target_start_ctio(isp, ccb, FROM_SRR); return; fail: inot->in_reserved = 1; isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_REQ_CMP_ERR; isp_complete_ctio(ccb); return; mdp: if (isp_notify_ack(isp, inot)) { isp_prt(isp, ISP_LOGWARN, "could not push positive ack for SRR- you lose"); goto fail; } ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status = CAM_MESSAGE_RECV; /* * This is not a strict interpretation of MDP, but it's close */ ccb->csio.msg_ptr = &ccb->csio.sense_data.sense_buf[SSD_FULL_SIZE - 16]; ccb->csio.msg_len = 7; ccb->csio.msg_ptr[0] = MSG_EXTENDED; ccb->csio.msg_ptr[1] = 5; ccb->csio.msg_ptr[2] = 0; /* modify data pointer */ ccb->csio.msg_ptr[3] = srr_off >> 24; ccb->csio.msg_ptr[4] = srr_off >> 16; ccb->csio.msg_ptr[5] = srr_off >> 8; ccb->csio.msg_ptr[6] = srr_off; isp_complete_ctio(ccb); } static void isp_handle_srr_notify(ispsoftc_t *isp, void *inot_raw) { tstate_t *tptr; in_fcentry_24xx_t *inot = inot_raw; atio_private_data_t *atp; uint32_t tag = inot->in_rxid; uint32_t bus = inot->in_vpidx; if (!IS_24XX(isp)) { isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot_raw); return; } tptr = get_lun_statep_from_tag(isp, bus, tag); if (tptr == NULL) { isp_prt(isp, ISP_LOGERR, "%s: cannot find tptr for tag %x in SRR Notify", __func__, tag); isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); return; } atp = isp_find_atpd(isp, tptr, tag); if (atp == NULL) { rls_lun_statep(isp, tptr); isp_prt(isp, ISP_LOGERR, "%s: cannot find adjunct for %x in SRR Notify", __func__, tag); isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); return; } atp->srr_notify_rcvd = 1; memcpy(atp->srr, inot, sizeof (atp->srr)); isp_prt(isp, ISP_LOGTINFO /* ISP_LOGTDEBUG0 */, "SRR[0x%x] inot->in_rxid flags 0x%x srr_iu=%x reloff 0x%x", inot->in_rxid, inot->in_flags, inot->in_srr_iu, inot->in_srr_reloff_lo | (inot->in_srr_reloff_hi << 16)); if (atp->srr_ccb) isp_handle_srr_start(isp, tptr, atp); rls_lun_statep(isp, tptr); } static void isp_handle_platform_ctio(ispsoftc_t *isp, void *arg) { union ccb *ccb; int sentstatus = 0, ok = 0, notify_cam = 0, resid = 0, failure = 0; tstate_t *tptr = NULL; atio_private_data_t *atp = NULL; int bus; uint32_t handle, moved_data = 0, data_requested; /* * CTIO handles are 16 bits. * CTIO2 and CTIO7 are 32 bits. */ if (IS_SCSI(isp)) { handle = ((ct_entry_t *)arg)->ct_syshandle; } else { handle = ((ct2_entry_t *)arg)->ct_syshandle; } ccb = isp_find_xs_tgt(isp, handle); if (ccb == NULL) { isp_print_bytes(isp, "null ccb in isp_handle_platform_ctio", QENTRY_LEN, arg); return; } isp_destroy_tgt_handle(isp, handle); data_requested = PISP_PCMD(ccb)->datalen; isp_free_pcmd(isp, ccb); if (isp->isp_nactive) { isp->isp_nactive--; } bus = XS_CHANNEL(ccb); tptr = get_lun_statep(isp, bus, XS_LUN(ccb)); if (tptr == NULL) { tptr = get_lun_statep(isp, bus, CAM_LUN_WILDCARD); } if (tptr == NULL) { isp_prt(isp, ISP_LOGERR, "%s: cannot find tptr for tag %x after I/O", __func__, ccb->csio.tag_id); return; } if (IS_24XX(isp)) { atp = isp_find_atpd(isp, tptr, ((ct7_entry_t *)arg)->ct_rxid); } else if (IS_FC(isp)) { atp = isp_find_atpd(isp, tptr, ((ct2_entry_t *)arg)->ct_rxid); } else { atp = isp_find_atpd(isp, tptr, ((ct_entry_t *)arg)->ct_fwhandle); } if (atp == NULL) { /* * XXX: isp_clear_commands() generates fake CTIO with zero * ct_rxid value, filling only ct_syshandle. Workaround * that using tag_id from the CCB, pointed by ct_syshandle. */ atp = isp_find_atpd(isp, tptr, ccb->csio.tag_id); } if (atp == NULL) { rls_lun_statep(isp, tptr); isp_prt(isp, ISP_LOGERR, "%s: cannot find adjunct for %x after I/O", __func__, ccb->csio.tag_id); return; } KASSERT((atp->ctcnt > 0), ("ctio count not greater than zero")); atp->bytes_in_transit -= data_requested; atp->ctcnt -= 1; ccb->ccb_h.status &= ~CAM_STATUS_MASK; if (IS_24XX(isp)) { ct7_entry_t *ct = arg; if (ct->ct_nphdl == CT7_SRR) { atp->srr_ccb = ccb; if (atp->srr_notify_rcvd) isp_handle_srr_start(isp, tptr, atp); rls_lun_statep(isp, tptr); return; } if (ct->ct_nphdl == CT_HBA_RESET) { failure = CAM_UNREC_HBA_ERROR; } else { sentstatus = ct->ct_flags & CT7_SENDSTATUS; ok = (ct->ct_nphdl == CT7_OK); notify_cam = (ct->ct_header.rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0; if ((ct->ct_flags & CT7_DATAMASK) != CT7_NO_DATA) { resid = ct->ct_resid; moved_data = data_requested - resid; } } isp_prt(isp, ok? ISP_LOGTDEBUG0 : ISP_LOGWARN, "%s: CTIO7[%x] seq %u nc %d sts 0x%x flg 0x%x sns %d resid %d %s", __func__, ct->ct_rxid, ATPD_GET_SEQNO(ct), notify_cam, ct->ct_nphdl, ct->ct_flags, (ccb->ccb_h.status & CAM_SENT_SENSE) != 0, resid, sentstatus? "FIN" : "MID"); } else if (IS_FC(isp)) { ct2_entry_t *ct = arg; if (ct->ct_status == CT_SRR) { atp->srr_ccb = ccb; if (atp->srr_notify_rcvd) isp_handle_srr_start(isp, tptr, atp); rls_lun_statep(isp, tptr); isp_target_putback_atio(ccb); return; } if (ct->ct_status == CT_HBA_RESET) { failure = CAM_UNREC_HBA_ERROR; } else { sentstatus = ct->ct_flags & CT2_SENDSTATUS; ok = (ct->ct_status & ~QLTM_SVALID) == CT_OK; notify_cam = (ct->ct_header.rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0; if ((ct->ct_flags & CT2_DATAMASK) != CT2_NO_DATA) { resid = ct->ct_resid; moved_data = data_requested - resid; } } isp_prt(isp, ok? ISP_LOGTDEBUG0 : ISP_LOGWARN, "%s: CTIO2[%x] seq %u nc %d sts 0x%x flg 0x%x sns %d resid %d %s", __func__, ct->ct_rxid, ATPD_GET_SEQNO(ct), notify_cam, ct->ct_status, ct->ct_flags, (ccb->ccb_h.status & CAM_SENT_SENSE) != 0, resid, sentstatus? "FIN" : "MID"); } else { ct_entry_t *ct = arg; if (ct->ct_status == (CT_HBA_RESET & 0xff)) { failure = CAM_UNREC_HBA_ERROR; } else { sentstatus = ct->ct_flags & CT_SENDSTATUS; ok = (ct->ct_status & ~QLTM_SVALID) == CT_OK; notify_cam = (ct->ct_header.rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0; } if ((ct->ct_flags & CT_DATAMASK) != CT_NO_DATA) { resid = ct->ct_resid; moved_data = data_requested - resid; } isp_prt(isp, ISP_LOGTDEBUG0, "%s: CTIO[%x] seq %u nc %d tag %x S_ID 0x%x lun %x sts %x flg %x resid %d %s", __func__, ct->ct_fwhandle, ATPD_GET_SEQNO(ct), notify_cam, ct->ct_tag_val, ct->ct_iid, ct->ct_lun, ct->ct_status, ct->ct_flags, resid, sentstatus? "FIN" : "MID"); } if (ok) { if (moved_data) { atp->bytes_xfered += moved_data; ccb->csio.resid = atp->orig_datalen - atp->bytes_xfered - atp->bytes_in_transit; } if (sentstatus && (ccb->ccb_h.flags & CAM_SEND_SENSE)) { ccb->ccb_h.status |= CAM_SENT_SENSE; } ccb->ccb_h.status |= CAM_REQ_CMP; } else { notify_cam = 1; if (failure == CAM_UNREC_HBA_ERROR) ccb->ccb_h.status |= CAM_UNREC_HBA_ERROR; else ccb->ccb_h.status |= CAM_REQ_CMP_ERR; } atp->state = ATPD_STATE_PDON; rls_lun_statep(isp, tptr); /* * We never *not* notify CAM when there has been any error (ok == 0), * so we never need to do an ATIO putback if we're not notifying CAM. */ isp_prt(isp, ISP_LOGTDEBUG0, "%s CTIO[0x%x] done (ok=%d nc=%d nowsendstatus=%d ccb ss=%d)", (sentstatus)? " FINAL " : "MIDTERM ", atp->tag, ok, notify_cam, atp->sendst, (ccb->ccb_h.flags & CAM_SEND_STATUS) != 0); if (notify_cam == 0) { if (atp->sendst) { isp_target_start_ctio(isp, ccb, FROM_CTIO_DONE); } return; } /* * We're telling CAM we're done with this CTIO transaction. * * 24XX cards never need an ATIO put back. * * Other cards need one put back only on error. * In the latter case, a timeout will re-fire * and try again in case we didn't have * queue resources to do so at first. In any case, * once the putback is done we do the completion * call. */ if (ok || IS_24XX(isp)) { isp_complete_ctio(ccb); } else { isp_target_putback_atio(ccb); } } static void isp_handle_platform_notify_scsi(ispsoftc_t *isp, in_entry_t *inot) { isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); } static void isp_handle_platform_notify_fc(ispsoftc_t *isp, in_fcentry_t *inp) { int needack = 1; switch (inp->in_status) { case IN_PORT_LOGOUT: /* * XXX: Need to delete this initiator's WWN from the database * XXX: Need to send this LOGOUT upstream */ isp_prt(isp, ISP_LOGWARN, "port logout of S_ID 0x%x", inp->in_iid); break; case IN_PORT_CHANGED: isp_prt(isp, ISP_LOGWARN, "port changed for S_ID 0x%x", inp->in_iid); break; case IN_GLOBAL_LOGO: isp_del_all_wwn_entries(isp, 0); isp_prt(isp, ISP_LOGINFO, "all ports logged out"); break; case IN_ABORT_TASK: { tstate_t *tptr; uint16_t lun; uint32_t loopid, sid; uint64_t wwn; atio_private_data_t *atp; fcportdb_t *lp; struct ccb_immediate_notify *inot = NULL; if (ISP_CAP_SCCFW(isp)) { lun = inp->in_scclun; #if __FreeBSD_version < 1000700 lun &= 0x3fff; #endif } else { lun = inp->in_lun; } if (ISP_CAP_2KLOGIN(isp)) { loopid = ((in_fcentry_e_t *)inp)->in_iid; } else { loopid = inp->in_iid; } if (isp_find_pdb_by_handle(isp, 0, loopid, &lp)) { wwn = lp->port_wwn; sid = lp->portid; } else { wwn = INI_ANY; sid = PORT_ANY; } tptr = get_lun_statep(isp, 0, lun); if (tptr == NULL) { tptr = get_lun_statep(isp, 0, CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGWARN, "ABORT TASK for lun %u- but no tstate", lun); return; } } atp = isp_find_atpd(isp, tptr, inp->in_seqid); if (atp) { inot = (struct ccb_immediate_notify *) SLIST_FIRST(&tptr->inots); isp_prt(isp, ISP_LOGTDEBUG0, "ABORT TASK RX_ID %x WWN 0x%016llx state %d", inp->in_seqid, (unsigned long long) wwn, atp->state); if (inot) { tptr->inot_count--; SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, inot->ccb_h.path, "%s: Take FREE INOT count now %d\n", __func__, tptr->inot_count); } else { ISP_PATH_PRT(isp, ISP_LOGWARN, tptr->owner, "out of INOT structures\n"); } } else { ISP_PATH_PRT(isp, ISP_LOGWARN, tptr->owner, "abort task RX_ID %x from wwn 0x%016llx, state unknown\n", inp->in_seqid, wwn); } if (inot) { isp_notify_t tmp, *nt = &tmp; ISP_MEMZERO(nt, sizeof (isp_notify_t)); nt->nt_hba = isp; nt->nt_tgt = FCPARAM(isp, 0)->isp_wwpn; nt->nt_wwn = wwn; nt->nt_nphdl = loopid; nt->nt_sid = sid; nt->nt_did = PORT_ANY; nt->nt_lun = lun; nt->nt_need_ack = 1; nt->nt_channel = 0; nt->nt_ncode = NT_ABORT_TASK; nt->nt_lreserved = inot; isp_handle_platform_target_tmf(isp, nt); needack = 0; } rls_lun_statep(isp, tptr); break; } default: break; } if (needack) { isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inp); } } static void isp_handle_platform_notify_24xx(ispsoftc_t *isp, in_fcentry_24xx_t *inot) { uint16_t nphdl; uint16_t prli_options = 0; uint32_t portid; fcportdb_t *lp; char *msg = NULL; uint8_t *ptr = (uint8_t *)inot; uint64_t wwpn = INI_NONE, wwnn = INI_NONE; nphdl = inot->in_nphdl; if (nphdl != NIL_HANDLE) { portid = inot->in_portid_hi << 16 | inot->in_portid_lo; } else { portid = PORT_ANY; } switch (inot->in_status) { case IN24XX_ELS_RCVD: { char buf[16]; int chan = ISP_GET_VPIDX(isp, inot->in_vpidx); /* * Note that we're just getting notification that an ELS was received * (possibly with some associated information sent upstream). This is * *not* the same as being given the ELS frame to accept or reject. */ switch (inot->in_status_subcode) { case LOGO: msg = "LOGO"; wwpn = be64dec(&ptr[IN24XX_PLOGI_WWPN_OFF]); isp_del_wwn_entry(isp, chan, wwpn, nphdl, portid); break; case PRLO: msg = "PRLO"; break; case PLOGI: msg = "PLOGI"; wwnn = be64dec(&ptr[IN24XX_PLOGI_WWNN_OFF]); wwpn = be64dec(&ptr[IN24XX_PLOGI_WWPN_OFF]); isp_add_wwn_entry(isp, chan, wwpn, wwnn, nphdl, portid, prli_options); break; case PRLI: msg = "PRLI"; prli_options = inot->in_prli_options; if (inot->in_flags & IN24XX_FLAG_PN_NN_VALID) wwnn = be64dec(&ptr[IN24XX_PRLI_WWNN_OFF]); wwpn = be64dec(&ptr[IN24XX_PRLI_WWPN_OFF]); isp_add_wwn_entry(isp, chan, wwpn, wwnn, nphdl, portid, prli_options); break; case PDISC: msg = "PDISC"; break; case ADISC: msg = "ADISC"; break; default: ISP_SNPRINTF(buf, sizeof (buf), "ELS 0x%x", inot->in_status_subcode); msg = buf; break; } if (inot->in_flags & IN24XX_FLAG_PUREX_IOCB) { isp_prt(isp, ISP_LOGERR, "%s Chan %d ELS N-port handle %x PortID 0x%06x marked as needing a PUREX response", msg, chan, nphdl, portid); break; } isp_prt(isp, ISP_LOGTDEBUG0, "%s Chan %d ELS N-port handle %x PortID 0x%06x RX_ID 0x%x OX_ID 0x%x", msg, chan, nphdl, portid, inot->in_rxid, inot->in_oxid); isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); break; } case IN24XX_PORT_LOGOUT: msg = "PORT LOGOUT"; if (isp_find_pdb_by_handle(isp, ISP_GET_VPIDX(isp, inot->in_vpidx), nphdl, &lp)) { isp_del_wwn_entry(isp, ISP_GET_VPIDX(isp, inot->in_vpidx), lp->port_wwn, nphdl, lp->portid); } /* FALLTHROUGH */ case IN24XX_PORT_CHANGED: if (msg == NULL) msg = "PORT CHANGED"; /* FALLTHROUGH */ case IN24XX_LIP_RESET: if (msg == NULL) msg = "LIP RESET"; isp_prt(isp, ISP_LOGINFO, "Chan %d %s (sub-status 0x%x) for N-port handle 0x%x", ISP_GET_VPIDX(isp, inot->in_vpidx), msg, inot->in_status_subcode, nphdl); /* * All subcodes here are irrelevant. What is relevant * is that we need to terminate all active commands from * this initiator (known by N-port handle). */ /* XXX IMPLEMENT XXX */ isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); break; case IN24XX_SRR_RCVD: #ifdef ISP_TARGET_MODE isp_handle_srr_notify(isp, inot); break; #else if (msg == NULL) msg = "SRR RCVD"; /* FALLTHROUGH */ #endif case IN24XX_LINK_RESET: if (msg == NULL) msg = "LINK RESET"; case IN24XX_LINK_FAILED: if (msg == NULL) msg = "LINK FAILED"; default: isp_prt(isp, ISP_LOGWARN, "Chan %d %s", ISP_GET_VPIDX(isp, inot->in_vpidx), msg); isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, inot); break; } } static int isp_handle_platform_target_notify_ack(ispsoftc_t *isp, isp_notify_t *mp) { if (isp->isp_state != ISP_RUNSTATE) { isp_prt(isp, ISP_LOGTINFO, "Notify Code 0x%x (qevalid=%d) acked- h/w not ready (dropping)", mp->nt_ncode, mp->nt_lreserved != NULL); return (0); } /* * This case is for a Task Management Function, which shows up as an ATIO7 entry. */ if (IS_24XX(isp) && mp->nt_lreserved && ((isphdr_t *)mp->nt_lreserved)->rqs_entry_type == RQSTYPE_ATIO) { ct7_entry_t local, *cto = &local; at7_entry_t *aep = (at7_entry_t *)mp->nt_lreserved; fcportdb_t *lp; uint32_t sid; uint16_t nphdl; sid = (aep->at_hdr.s_id[0] << 16) | (aep->at_hdr.s_id[1] << 8) | aep->at_hdr.s_id[2]; if (isp_find_pdb_by_sid(isp, mp->nt_channel, sid, &lp)) { nphdl = lp->handle; } else { nphdl = NIL_HANDLE; } ISP_MEMZERO(&local, sizeof (local)); cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7; cto->ct_header.rqs_entry_count = 1; cto->ct_nphdl = nphdl; cto->ct_rxid = aep->at_rxid; cto->ct_vpidx = mp->nt_channel; cto->ct_iid_lo = sid; cto->ct_iid_hi = sid >> 16; cto->ct_oxid = aep->at_hdr.ox_id; cto->ct_flags = CT7_SENDSTATUS|CT7_NOACK|CT7_NO_DATA|CT7_FLAG_MODE1; cto->ct_flags |= (aep->at_ta_len >> 12) << CT7_TASK_ATTR_SHIFT; return (isp_target_put_entry(isp, &local)); } /* * This case is for a responding to an ABTS frame */ if (IS_24XX(isp) && mp->nt_lreserved && ((isphdr_t *)mp->nt_lreserved)->rqs_entry_type == RQSTYPE_ABTS_RCVD) { /* * Overload nt_need_ack here to mark whether we've terminated the associated command. */ if (mp->nt_need_ack) { uint8_t storage[QENTRY_LEN]; ct7_entry_t *cto = (ct7_entry_t *) storage; abts_t *abts = (abts_t *)mp->nt_lreserved; ISP_MEMZERO(cto, sizeof (ct7_entry_t)); isp_prt(isp, ISP_LOGTDEBUG0, "%s: [%x] terminating after ABTS received", __func__, abts->abts_rxid_task); cto->ct_header.rqs_entry_type = RQSTYPE_CTIO7; cto->ct_header.rqs_entry_count = 1; cto->ct_nphdl = mp->nt_nphdl; cto->ct_rxid = abts->abts_rxid_task; cto->ct_iid_lo = mp->nt_sid; cto->ct_iid_hi = mp->nt_sid >> 16; cto->ct_oxid = abts->abts_ox_id; cto->ct_vpidx = mp->nt_channel; cto->ct_flags = CT7_NOACK|CT7_TERMINATE; if (isp_target_put_entry(isp, cto)) { return (ENOMEM); } mp->nt_need_ack = 0; } if (isp_acknak_abts(isp, mp->nt_lreserved, 0) == ENOMEM) { return (ENOMEM); } else { return (0); } } /* * Handle logout cases here */ if (mp->nt_ncode == NT_GLOBAL_LOGOUT) { isp_del_all_wwn_entries(isp, mp->nt_channel); } if (mp->nt_ncode == NT_LOGOUT) { if (!IS_2100(isp) && IS_FC(isp)) { isp_del_wwn_entries(isp, mp); } } /* * General purpose acknowledgement */ if (mp->nt_need_ack) { isp_prt(isp, ISP_LOGTINFO, "Notify Code 0x%x (qevalid=%d) being acked", mp->nt_ncode, mp->nt_lreserved != NULL); /* * Don't need to use the guaranteed send because the caller can retry */ return (isp_notify_ack(isp, mp->nt_lreserved)); } return (0); } /* * Handle task management functions. * * We show up here with a notify structure filled out. * * The nt_lreserved tag points to the original queue entry */ static void isp_handle_platform_target_tmf(ispsoftc_t *isp, isp_notify_t *notify) { tstate_t *tptr; fcportdb_t *lp; struct ccb_immediate_notify *inot; inot_private_data_t *ntp = NULL; lun_id_t lun; isp_prt(isp, ISP_LOGTDEBUG0, "%s: code 0x%x sid 0x%x tagval 0x%016llx chan %d lun 0x%x", __func__, notify->nt_ncode, notify->nt_sid, (unsigned long long) notify->nt_tagval, notify->nt_channel, notify->nt_lun); /* * NB: This assignment is necessary because of tricky type conversion. * XXX: This is tricky and I need to check this. If the lun isn't known * XXX: for the task management function, it does not of necessity follow * XXX: that it should go up stream to the wildcard listener. */ if (notify->nt_lun == LUN_ANY) { lun = CAM_LUN_WILDCARD; } else { lun = notify->nt_lun; } tptr = get_lun_statep(isp, notify->nt_channel, lun); if (tptr == NULL) { tptr = get_lun_statep(isp, notify->nt_channel, CAM_LUN_WILDCARD); if (tptr == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: no state pointer found for chan %d lun %#jx", __func__, notify->nt_channel, (uintmax_t)lun); goto bad; } } inot = (struct ccb_immediate_notify *) SLIST_FIRST(&tptr->inots); if (inot == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: out of immediate notify structures for chan %d lun %#jx", __func__, notify->nt_channel, (uintmax_t)lun); goto bad; } if (isp_find_pdb_by_sid(isp, notify->nt_channel, notify->nt_sid, &lp) == 0 && isp_find_pdb_by_handle(isp, notify->nt_channel, notify->nt_nphdl, &lp) == 0) { inot->initiator_id = CAM_TARGET_WILDCARD; } else { inot->initiator_id = FC_PORTDB_TGT(isp, notify->nt_channel, lp); } inot->seq_id = notify->nt_tagval; inot->tag_id = notify->nt_tagval >> 32; switch (notify->nt_ncode) { case NT_ABORT_TASK: isp_target_mark_aborted_early(isp, tptr, inot->tag_id); inot->arg = MSG_ABORT_TASK; break; case NT_ABORT_TASK_SET: isp_target_mark_aborted_early(isp, tptr, TAG_ANY); inot->arg = MSG_ABORT_TASK_SET; break; case NT_CLEAR_ACA: inot->arg = MSG_CLEAR_ACA; break; case NT_CLEAR_TASK_SET: inot->arg = MSG_CLEAR_TASK_SET; break; case NT_LUN_RESET: inot->arg = MSG_LOGICAL_UNIT_RESET; break; case NT_TARGET_RESET: inot->arg = MSG_TARGET_RESET; break; case NT_QUERY_TASK_SET: inot->arg = MSG_QUERY_TASK_SET; break; case NT_QUERY_ASYNC_EVENT: inot->arg = MSG_QUERY_ASYNC_EVENT; break; default: isp_prt(isp, ISP_LOGWARN, "%s: unknown TMF code 0x%x for chan %d lun %#jx", __func__, notify->nt_ncode, notify->nt_channel, (uintmax_t)lun); goto bad; } ntp = isp_get_ntpd(isp, tptr); if (ntp == NULL) { isp_prt(isp, ISP_LOGWARN, "%s: out of inotify private structures", __func__); goto bad; } ISP_MEMCPY(&ntp->rd.nt, notify, sizeof (isp_notify_t)); if (notify->nt_lreserved) { ISP_MEMCPY(&ntp->rd.data, notify->nt_lreserved, QENTRY_LEN); ntp->rd.nt.nt_lreserved = &ntp->rd.data; } ntp->rd.seq_id = notify->nt_tagval; ntp->rd.tag_id = notify->nt_tagval >> 32; tptr->inot_count--; SLIST_REMOVE_HEAD(&tptr->inots, sim_links.sle); rls_lun_statep(isp, tptr); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, inot->ccb_h.path, "%s: Take FREE INOT count now %d\n", __func__, tptr->inot_count); inot->ccb_h.status = CAM_MESSAGE_RECV; xpt_done((union ccb *)inot); return; bad: if (tptr) { rls_lun_statep(isp, tptr); } if (notify->nt_need_ack && notify->nt_lreserved) { if (((isphdr_t *)notify->nt_lreserved)->rqs_entry_type == RQSTYPE_ABTS_RCVD) { if (isp_acknak_abts(isp, notify->nt_lreserved, ENOMEM)) { isp_prt(isp, ISP_LOGWARN, "you lose- unable to send an ACKNAK"); } } else { isp_async(isp, ISPASYNC_TARGET_NOTIFY_ACK, notify->nt_lreserved); } } } /* * Find the associated private data and mark it as dead so * we don't try to work on it any further. */ static void isp_target_mark_aborted(ispsoftc_t *isp, union ccb *ccb) { tstate_t *tptr; atio_private_data_t *atp; union ccb *accb = ccb->cab.abort_ccb; tptr = get_lun_statep(isp, XS_CHANNEL(accb), XS_LUN(accb)); if (tptr == NULL) { tptr = get_lun_statep(isp, XS_CHANNEL(accb), CAM_LUN_WILDCARD); if (tptr == NULL) { ccb->ccb_h.status = CAM_REQ_INVALID; return; } } atp = isp_find_atpd(isp, tptr, accb->atio.tag_id); if (atp == NULL) { ccb->ccb_h.status = CAM_REQ_INVALID; } else { atp->dead = 1; ccb->ccb_h.status = CAM_REQ_CMP; } rls_lun_statep(isp, tptr); } static void isp_target_mark_aborted_early(ispsoftc_t *isp, tstate_t *tptr, uint32_t tag_id) { atio_private_data_t *atp; inot_private_data_t *restart_queue = tptr->restart_queue; /* * First, clean any commands pending restart */ tptr->restart_queue = NULL; while (restart_queue) { uint32_t this_tag_id; inot_private_data_t *ntp = restart_queue; restart_queue = ntp->rd.nt.nt_hba; if (IS_24XX(isp)) { this_tag_id = ((at7_entry_t *)ntp->rd.data)->at_rxid; } else { this_tag_id = ((at2_entry_t *)ntp->rd.data)->at_rxid; } if ((uint64_t)tag_id == TAG_ANY || tag_id == this_tag_id) { isp_put_ntpd(isp, tptr, ntp); } else { ntp->rd.nt.nt_hba = tptr->restart_queue; tptr->restart_queue = ntp; } } /* * Now mark other ones dead as well. */ for (atp = tptr->atpool; atp < &tptr->atpool[ATPDPSIZE]; atp++) { if ((uint64_t)tag_id == TAG_ANY || atp->tag == tag_id) { atp->dead = 1; } } } #endif static void isp_cam_async(void *cbarg, uint32_t code, struct cam_path *path, void *arg) { struct cam_sim *sim; int bus, tgt; ispsoftc_t *isp; sim = (struct cam_sim *)cbarg; isp = (ispsoftc_t *) cam_sim_softc(sim); bus = cam_sim_bus(sim); tgt = xpt_path_target_id(path); switch (code) { case AC_LOST_DEVICE: if (IS_SCSI(isp)) { uint16_t oflags, nflags; sdparam *sdp = SDPARAM(isp, bus); if (tgt >= 0) { nflags = sdp->isp_devparam[tgt].nvrm_flags; #ifndef ISP_TARGET_MODE nflags &= DPARM_SAFE_DFLT; if (isp->isp_loaded_fw) { nflags |= DPARM_NARROW | DPARM_ASYNC; } #else nflags = DPARM_DEFAULT; #endif oflags = sdp->isp_devparam[tgt].goal_flags; sdp->isp_devparam[tgt].goal_flags = nflags; sdp->isp_devparam[tgt].dev_update = 1; sdp->update = 1; (void) isp_control(isp, ISPCTL_UPDATE_PARAMS, bus); sdp->isp_devparam[tgt].goal_flags = oflags; } } break; default: isp_prt(isp, ISP_LOGWARN, "isp_cam_async: Code 0x%x", code); break; } } static void isp_poll(struct cam_sim *sim) { ispsoftc_t *isp = cam_sim_softc(sim); uint16_t isr, sema, info; if (ISP_READ_ISR(isp, &isr, &sema, &info)) isp_intr(isp, isr, sema, info); } static void isp_watchdog(void *arg) { struct ccb_scsiio *xs = arg; ispsoftc_t *isp; uint32_t ohandle = ISP_HANDLE_FREE, handle; isp = XS_ISP(xs); handle = isp_find_handle(isp, xs); /* * Hand crank the interrupt code just to be sure the command isn't stuck somewhere. */ if (handle != ISP_HANDLE_FREE) { uint16_t isr, sema, info; if (ISP_READ_ISR(isp, &isr, &sema, &info) != 0) isp_intr(isp, isr, sema, info); ohandle = handle; handle = isp_find_handle(isp, xs); } if (handle != ISP_HANDLE_FREE) { /* * Try and make sure the command is really dead before * we release the handle (and DMA resources) for reuse. * * If we are successful in aborting the command then * we're done here because we'll get the command returned * back separately. */ if (isp_control(isp, ISPCTL_ABORT_CMD, xs) == 0) { return; } /* * Note that after calling the above, the command may in * fact have been completed. */ xs = isp_find_xs(isp, handle); /* * If the command no longer exists, then we won't * be able to find the xs again with this handle. */ if (xs == NULL) { return; } /* * After this point, the command is really dead. */ if (XS_XFRLEN(xs)) { ISP_DMAFREE(isp, xs, handle); } isp_destroy_handle(isp, handle); isp_prt(isp, ISP_LOGERR, "%s: timeout for handle 0x%x", __func__, handle); xs->ccb_h.status &= ~CAM_STATUS_MASK; xs->ccb_h.status |= CAM_CMD_TIMEOUT; isp_prt_endcmd(isp, xs); isp_done(xs); } else { if (ohandle != ISP_HANDLE_FREE) { isp_prt(isp, ISP_LOGWARN, "%s: timeout for handle 0x%x, recovered during interrupt", __func__, ohandle); } else { isp_prt(isp, ISP_LOGWARN, "%s: timeout for handle already free", __func__); } } } static void isp_make_here(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt) { union ccb *ccb; struct isp_fc *fc = ISP_FC_PC(isp, chan); /* * Allocate a CCB, create a wildcard path for this target and schedule a rescan. */ ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { isp_prt(isp, ISP_LOGWARN, "Chan %d unable to alloc CCB for rescan", chan); return; } if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(fc->sim), tgt, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { isp_prt(isp, ISP_LOGWARN, "unable to create path for rescan"); xpt_free_ccb(ccb); return; } xpt_rescan(ccb); } static void isp_make_gone(ispsoftc_t *isp, fcportdb_t *fcp, int chan, int tgt) { struct cam_path *tp; struct isp_fc *fc = ISP_FC_PC(isp, chan); if (xpt_create_path(&tp, NULL, cam_sim_path(fc->sim), tgt, CAM_LUN_WILDCARD) == CAM_REQ_CMP) { xpt_async(AC_LOST_DEVICE, tp, NULL); xpt_free_path(tp); } } /* * Gone Device Timer Function- when we have decided that a device has gone * away, we wait a specific period of time prior to telling the OS it has * gone away. * * This timer function fires once a second and then scans the port database * for devices that are marked dead but still have a virtual target assigned. * We decrement a counter for that port database entry, and when it hits zero, * we tell the OS the device has gone away. */ static void isp_gdt(void *arg) { struct isp_fc *fc = arg; taskqueue_enqueue(taskqueue_thread, &fc->gtask); } static void isp_gdt_task(void *arg, int pending) { struct isp_fc *fc = arg; ispsoftc_t *isp = fc->isp; int chan = fc - isp->isp_osinfo.pc.fc; fcportdb_t *lp; struct ac_contract ac; struct ac_device_changed *adc; int dbidx, more_to_do = 0; ISP_LOCK(isp); isp_prt(isp, ISP_LOGDEBUG0, "Chan %d GDT timer expired", chan); for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) { lp = &FCPARAM(isp, chan)->portdb[dbidx]; if (lp->state != FC_PORTDB_STATE_ZOMBIE) { continue; } if (lp->gone_timer != 0) { lp->gone_timer -= 1; more_to_do++; continue; } isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Gone Device Timeout"); if (lp->is_target) { lp->is_target = 0; isp_make_gone(isp, lp, chan, dbidx); } if (lp->is_initiator) { lp->is_initiator = 0; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = dbidx; adc->arrived = 0; xpt_async(AC_CONTRACT, fc->path, &ac); } lp->state = FC_PORTDB_STATE_NIL; } if (fc->ready) { if (more_to_do) { callout_reset(&fc->gdt, hz, isp_gdt, fc); } else { callout_deactivate(&fc->gdt); isp_prt(isp, ISP_LOG_SANCFG, "Chan %d Stopping Gone Device Timer @ %lu", chan, (unsigned long) time_uptime); } } ISP_UNLOCK(isp); } /* * Loop Down Timer Function- when loop goes down, a timer is started and * and after it expires we come here and take all probational devices that * the OS knows about and the tell the OS that they've gone away. * * We don't clear the devices out of our port database because, when loop * come back up, we have to do some actual cleanup with the chip at that * point (implicit PLOGO, e.g., to get the chip's port database state right). */ static void isp_ldt(void *arg) { struct isp_fc *fc = arg; taskqueue_enqueue(taskqueue_thread, &fc->ltask); } static void isp_ldt_task(void *arg, int pending) { struct isp_fc *fc = arg; ispsoftc_t *isp = fc->isp; int chan = fc - isp->isp_osinfo.pc.fc; fcportdb_t *lp; struct ac_contract ac; struct ac_device_changed *adc; int dbidx, i; ISP_LOCK(isp); isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Loop Down Timer expired @ %lu", chan, (unsigned long) time_uptime); callout_deactivate(&fc->ldt); /* * Notify to the OS all targets who we now consider have departed. */ for (dbidx = 0; dbidx < MAX_FC_TARG; dbidx++) { lp = &FCPARAM(isp, chan)->portdb[dbidx]; if (lp->state == FC_PORTDB_STATE_NIL) continue; /* * XXX: CLEAN UP AND COMPLETE ANY PENDING COMMANDS FIRST! */ for (i = 0; i < isp->isp_maxcmds; i++) { struct ccb_scsiio *xs; if (!ISP_VALID_HANDLE(isp, isp->isp_xflist[i].handle)) { continue; } if ((xs = isp->isp_xflist[i].cmd) == NULL) { continue; } if (dbidx != XS_TGT(xs)) { continue; } isp_prt(isp, ISP_LOGWARN, "command handle 0x%x for %d.%d.%jx orphaned by loop down timeout", isp->isp_xflist[i].handle, chan, XS_TGT(xs), (uintmax_t)XS_LUN(xs)); } isp_prt(isp, ISP_LOGCONFIG, prom3, chan, dbidx, lp->portid, "Loop Down Timeout"); if (lp->is_target) { lp->is_target = 0; isp_make_gone(isp, lp, chan, dbidx); } if (lp->is_initiator) { lp->is_initiator = 0; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = dbidx; adc->arrived = 0; xpt_async(AC_CONTRACT, fc->path, &ac); } } isp_unfreeze_loopdown(isp, chan); /* * The loop down timer has expired. Wake up the kthread * to notice that fact (or make it false). */ fc->loop_dead = 1; fc->loop_down_time = fc->loop_down_limit+1; wakeup(fc); ISP_UNLOCK(isp); } static void isp_kthread(void *arg) { struct isp_fc *fc = arg; ispsoftc_t *isp = fc->isp; int chan = fc - isp->isp_osinfo.pc.fc; int slp = 0; mtx_lock(&isp->isp_osinfo.lock); while (isp->isp_osinfo.is_exiting == 0) { int lb, lim; isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d checking FC state", __func__, chan); lb = isp_fc_runstate(isp, chan, 250000); /* * Our action is different based upon whether we're supporting * Initiator mode or not. If we are, we might freeze the simq * when loop is down and set all sorts of different delays to * check again. * * If not, we simply just wait for loop to come up. */ if (lb && (FCPARAM(isp, chan)->role & ISP_ROLE_INITIATOR)) { /* * Increment loop down time by the last sleep interval */ fc->loop_down_time += slp; if (lb < 0) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC loop not up (down count %d)", __func__, chan, fc->loop_down_time); } else { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC got to %d (down count %d)", __func__, chan, lb, fc->loop_down_time); } /* * If we've never seen loop up and we've waited longer * than quickboot time, or we've seen loop up but we've * waited longer than loop_down_limit, give up and go * to sleep until loop comes up. */ if (FCPARAM(isp, chan)->loop_seen_once == 0) { lim = isp_quickboot_time; } else { lim = fc->loop_down_limit; } if (fc->loop_down_time >= lim) { isp_freeze_loopdown(isp, chan, "loop limit hit"); slp = 0; } else if (fc->loop_down_time < 10) { slp = 1; } else if (fc->loop_down_time < 30) { slp = 5; } else if (fc->loop_down_time < 60) { slp = 10; } else if (fc->loop_down_time < 120) { slp = 20; } else { slp = 30; } } else if (lb) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC Loop Down", __func__, chan); fc->loop_down_time += slp; if (fc->loop_down_time > 300) slp = 0; else slp = 60; } else { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d FC state OK", __func__, chan); fc->loop_down_time = 0; slp = 0; } /* * If this is past the first loop up or the loop is dead and if we'd frozen the simq, unfreeze it * now so that CAM can start sending us commands. * * If the FC state isn't okay yet, they'll hit that in isp_start which will freeze the queue again * or kill the commands, as appropriate. */ if (FCPARAM(isp, chan)->loop_seen_once || fc->loop_dead) { isp_unfreeze_loopdown(isp, chan); } isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d sleep time %d", __func__, chan, slp); msleep(fc, &isp->isp_osinfo.lock, PRIBIO, "ispf", slp * hz); /* * If slp is zero, we're waking up for the first time after * things have been okay. In this case, we set a deferral state * for all commands and delay hysteresis seconds before starting * the FC state evaluation. This gives the loop/fabric a chance * to settle. */ if (slp == 0 && fc->hysteresis) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "%s: Chan %d sleep hysteresis ticks %d", __func__, chan, fc->hysteresis * hz); mtx_unlock(&isp->isp_osinfo.lock); pause("ispt", fc->hysteresis * hz); mtx_lock(&isp->isp_osinfo.lock); } } fc->num_threads -= 1; mtx_unlock(&isp->isp_osinfo.lock); kthread_exit(); } static void isp_action(struct cam_sim *sim, union ccb *ccb) { int bus, tgt, ts, error, lim; ispsoftc_t *isp; struct ccb_trans_settings *cts; CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("isp_action\n")); isp = (ispsoftc_t *)cam_sim_softc(sim); mtx_assert(&isp->isp_lock, MA_OWNED); isp_prt(isp, ISP_LOGDEBUG2, "isp_action code %x", ccb->ccb_h.func_code); ISP_PCMD(ccb) = NULL; switch (ccb->ccb_h.func_code) { case XPT_SCSI_IO: /* Execute the requested I/O operation */ bus = XS_CHANNEL(ccb); /* * Do a couple of preliminary checks... */ if ((ccb->ccb_h.flags & CAM_CDB_POINTER) != 0) { if ((ccb->ccb_h.flags & CAM_CDB_PHYS) != 0) { ccb->ccb_h.status = CAM_REQ_INVALID; isp_done((struct ccb_scsiio *) ccb); break; } } ccb->csio.req_map = NULL; #ifdef DIAGNOSTIC if (ccb->ccb_h.target_id >= ISP_MAX_TARGETS(isp)) { xpt_print(ccb->ccb_h.path, "invalid target\n"); ccb->ccb_h.status = CAM_PATH_INVALID; } else if (ISP_MAX_LUNS(isp) > 0 && ccb->ccb_h.target_lun >= ISP_MAX_LUNS(isp)) { xpt_print(ccb->ccb_h.path, "invalid lun\n"); ccb->ccb_h.status = CAM_PATH_INVALID; } if (ccb->ccb_h.status == CAM_PATH_INVALID) { xpt_done(ccb); break; } #endif ccb->csio.scsi_status = SCSI_STATUS_OK; if (isp_get_pcmd(isp, ccb)) { isp_prt(isp, ISP_LOGWARN, "out of PCMDs"); cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 250, 0); ccb->ccb_h.status = CAM_REQUEUE_REQ; xpt_done(ccb); break; } error = isp_start((XS_T *) ccb); switch (error) { case CMD_QUEUED: ccb->ccb_h.status |= CAM_SIM_QUEUED; if (ccb->ccb_h.timeout == CAM_TIME_INFINITY) { break; } ts = ccb->ccb_h.timeout; if (ts == CAM_TIME_DEFAULT) { ts = 60*1000; } ts = isp_mstohz(ts); callout_reset(&PISP_PCMD(ccb)->wdog, ts, isp_watchdog, ccb); break; case CMD_RQLATER: /* * We get this result for FC devices if the loop state isn't ready yet * or if the device in question has gone zombie on us. * * If we've never seen Loop UP at all, we requeue this request and wait * for the initial loop up delay to expire. */ lim = ISP_FC_PC(isp, bus)->loop_down_limit; if (FCPARAM(isp, bus)->loop_seen_once == 0 || ISP_FC_PC(isp, bus)->loop_down_time >= lim) { if (FCPARAM(isp, bus)->loop_seen_once == 0) { isp_prt(isp, ISP_LOGDEBUG0, "%d.%jx loop not seen yet @ %lu", XS_TGT(ccb), (uintmax_t)XS_LUN(ccb), (unsigned long) time_uptime); } else { isp_prt(isp, ISP_LOGDEBUG0, "%d.%jx downtime (%d) > lim (%d)", XS_TGT(ccb), (uintmax_t)XS_LUN(ccb), ISP_FC_PC(isp, bus)->loop_down_time, lim); } ccb->ccb_h.status = CAM_SEL_TIMEOUT; isp_done((struct ccb_scsiio *) ccb); break; } isp_prt(isp, ISP_LOGDEBUG0, "%d.%jx retry later", XS_TGT(ccb), (uintmax_t)XS_LUN(ccb)); cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 1000, 0); ccb->ccb_h.status = CAM_REQUEUE_REQ; isp_free_pcmd(isp, ccb); xpt_done(ccb); break; case CMD_EAGAIN: isp_free_pcmd(isp, ccb); cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 100, 0); ccb->ccb_h.status = CAM_REQUEUE_REQ; xpt_done(ccb); break; case CMD_COMPLETE: isp_done((struct ccb_scsiio *) ccb); break; default: isp_prt(isp, ISP_LOGERR, "What's this? 0x%x at %d in file %s", error, __LINE__, __FILE__); ccb->ccb_h.status = CAM_REQUEUE_REQ; isp_free_pcmd(isp, ccb); xpt_done(ccb); } break; #ifdef ISP_TARGET_MODE case XPT_EN_LUN: /* Enable/Disable LUN as a target */ if (ccb->cel.enable) { isp_enable_lun(isp, ccb); } else { isp_disable_lun(isp, ccb); } break; case XPT_IMMED_NOTIFY: case XPT_IMMEDIATE_NOTIFY: /* Add Immediate Notify Resource */ case XPT_ACCEPT_TARGET_IO: /* Add Accept Target IO Resource */ { tstate_t *tptr = get_lun_statep(isp, XS_CHANNEL(ccb), ccb->ccb_h.target_lun); if (tptr == NULL) { tptr = get_lun_statep(isp, XS_CHANNEL(ccb), CAM_LUN_WILDCARD); } if (tptr == NULL) { const char *str; uint32_t tag; if (ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) { str = "XPT_IMMEDIATE_NOTIFY"; tag = ccb->cin1.seq_id; } else { tag = ccb->atio.tag_id; str = "XPT_ACCEPT_TARGET_IO"; } ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "%s: [0x%x] no state pointer found for %s\n", __func__, tag, str); dump_tstates(isp, XS_CHANNEL(ccb)); ccb->ccb_h.status = CAM_DEV_NOT_THERE; break; } ccb->ccb_h.spriv_field0 = 0; ccb->ccb_h.spriv_ptr1 = isp; if (ccb->ccb_h.func_code == XPT_ACCEPT_TARGET_IO) { if (ccb->atio.tag_id) { atio_private_data_t *atp = isp_find_atpd(isp, tptr, ccb->atio.tag_id); if (atp) { isp_put_atpd(isp, tptr, atp); } } tptr->atio_count++; SLIST_INSERT_HEAD(&tptr->atios, &ccb->ccb_h, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE ATIO (tag id 0x%x), count now %d\n", ccb->atio.tag_id, tptr->atio_count); ccb->atio.tag_id = 0; } else if (ccb->ccb_h.func_code == XPT_IMMEDIATE_NOTIFY) { if (ccb->cin1.tag_id) { inot_private_data_t *ntp = isp_find_ntpd(isp, tptr, ccb->cin1.tag_id, ccb->cin1.seq_id); if (ntp) { isp_put_ntpd(isp, tptr, ntp); } } tptr->inot_count++; SLIST_INSERT_HEAD(&tptr->inots, &ccb->ccb_h, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE INOT, (seq id 0x%x) count now %d\n", ccb->cin1.seq_id, tptr->inot_count); ccb->cin1.seq_id = 0; } else if (ccb->ccb_h.func_code == XPT_IMMED_NOTIFY) { tptr->inot_count++; SLIST_INSERT_HEAD(&tptr->inots, &ccb->ccb_h, sim_links.sle); ISP_PATH_PRT(isp, ISP_LOGTDEBUG2, ccb->ccb_h.path, "Put FREE INOT, (seq id 0x%x) count now %d\n", ccb->cin1.seq_id, tptr->inot_count); ccb->cin1.seq_id = 0; } rls_lun_statep(isp, tptr); ccb->ccb_h.status = CAM_REQ_INPROG; break; } case XPT_NOTIFY_ACK: ccb->ccb_h.status = CAM_REQ_CMP_ERR; break; case XPT_NOTIFY_ACKNOWLEDGE: /* notify ack */ { tstate_t *tptr; inot_private_data_t *ntp; /* * XXX: Because we cannot guarantee that the path information in the notify acknowledge ccb * XXX: matches that for the immediate notify, we have to *search* for the notify structure */ /* * All the relevant path information is in the associated immediate notify */ ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, ccb->ccb_h.path, "%s: [0x%x] NOTIFY ACKNOWLEDGE for 0x%x seen\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id); ntp = get_ntp_from_tagdata(isp, ccb->cna2.tag_id, ccb->cna2.seq_id, &tptr); if (ntp == NULL) { ISP_PATH_PRT(isp, ISP_LOGWARN, ccb->ccb_h.path, "%s: [0x%x] XPT_NOTIFY_ACKNOWLEDGE of 0x%x cannot find ntp private data\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id); ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); break; } if (isp_handle_platform_target_notify_ack(isp, &ntp->rd.nt)) { rls_lun_statep(isp, tptr); cam_freeze_devq(ccb->ccb_h.path); cam_release_devq(ccb->ccb_h.path, RELSIM_RELEASE_AFTER_TIMEOUT, 0, 1000, 0); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_REQUEUE_REQ; break; } isp_put_ntpd(isp, tptr, ntp); rls_lun_statep(isp, tptr); ccb->ccb_h.status = CAM_REQ_CMP; ISP_PATH_PRT(isp, ISP_LOGTDEBUG0, ccb->ccb_h.path, "%s: [0x%x] calling xpt_done for tag 0x%x\n", __func__, ccb->cna2.tag_id, ccb->cna2.seq_id); xpt_done(ccb); break; } case XPT_CONT_TARGET_IO: isp_target_start_ctio(isp, ccb, FROM_CAM); break; #endif case XPT_RESET_DEV: /* BDR the specified SCSI device */ - { - struct isp_fc *fc; - bus = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path)); tgt = ccb->ccb_h.target_id; tgt |= (bus << 16); - if (IS_FC(isp)) - fc = ISP_FC_PC(isp, bus); - else - fc = NULL; error = isp_control(isp, ISPCTL_RESET_DEV, bus, tgt); if (error) { ccb->ccb_h.status = CAM_REQ_CMP_ERR; } else { /* * If we have a FC device, reset the Command * Reference Number, because the target will expect * that we re-start the CRN at 1 after a reset. */ - if (fc != NULL) - isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1); + if (IS_FC(isp)) + isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); ccb->ccb_h.status = CAM_REQ_CMP; } xpt_done(ccb); break; - } case XPT_ABORT: /* Abort the specified CCB */ { union ccb *accb = ccb->cab.abort_ccb; switch (accb->ccb_h.func_code) { #ifdef ISP_TARGET_MODE case XPT_ACCEPT_TARGET_IO: isp_target_mark_aborted(isp, ccb); break; #endif case XPT_SCSI_IO: error = isp_control(isp, ISPCTL_ABORT_CMD, accb); if (error) { ccb->ccb_h.status = CAM_UA_ABORT; } else { ccb->ccb_h.status = CAM_REQ_CMP; } break; default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } /* * This is not a queued CCB, so the caller expects it to be * complete when control is returned. */ break; } #define IS_CURRENT_SETTINGS(c) (c->type == CTS_TYPE_CURRENT_SETTINGS) case XPT_SET_TRAN_SETTINGS: /* Nexus Settings */ cts = &ccb->cts; if (!IS_CURRENT_SETTINGS(cts)) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; } tgt = cts->ccb_h.target_id; bus = cam_sim_bus(xpt_path_sim(cts->ccb_h.path)); if (IS_SCSI(isp)) { struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi; struct ccb_trans_settings_spi *spi = &cts->xport_specific.spi; sdparam *sdp = SDPARAM(isp, bus); uint16_t *dptr; if (spi->valid == 0 && scsi->valid == 0) { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; } /* * We always update (internally) from goal_flags * so any request to change settings just gets * vectored to that location. */ dptr = &sdp->isp_devparam[tgt].goal_flags; if ((spi->valid & CTS_SPI_VALID_DISC) != 0) { if ((spi->flags & CTS_SPI_FLAGS_DISC_ENB) != 0) *dptr |= DPARM_DISC; else *dptr &= ~DPARM_DISC; } if ((scsi->valid & CTS_SCSI_VALID_TQ) != 0) { if ((scsi->flags & CTS_SCSI_FLAGS_TAG_ENB) != 0) *dptr |= DPARM_TQING; else *dptr &= ~DPARM_TQING; } if ((spi->valid & CTS_SPI_VALID_BUS_WIDTH) != 0) { if (spi->bus_width == MSG_EXT_WDTR_BUS_16_BIT) *dptr |= DPARM_WIDE; else *dptr &= ~DPARM_WIDE; } /* * XXX: FIX ME */ if ((spi->valid & CTS_SPI_VALID_SYNC_OFFSET) && (spi->valid & CTS_SPI_VALID_SYNC_RATE) && (spi->sync_period && spi->sync_offset)) { *dptr |= DPARM_SYNC; /* * XXX: CHECK FOR LEGALITY */ sdp->isp_devparam[tgt].goal_period = spi->sync_period; sdp->isp_devparam[tgt].goal_offset = spi->sync_offset; } else { *dptr &= ~DPARM_SYNC; } isp_prt(isp, ISP_LOGDEBUG0, "SET (%d.%d.%jx) to flags %x off %x per %x", bus, tgt, (uintmax_t)cts->ccb_h.target_lun, sdp->isp_devparam[tgt].goal_flags, sdp->isp_devparam[tgt].goal_offset, sdp->isp_devparam[tgt].goal_period); sdp->isp_devparam[tgt].dev_update = 1; sdp->update = 1; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; case XPT_GET_TRAN_SETTINGS: cts = &ccb->cts; tgt = cts->ccb_h.target_id; bus = cam_sim_bus(xpt_path_sim(cts->ccb_h.path)); if (IS_FC(isp)) { fcparam *fcp = FCPARAM(isp, bus); struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi; struct ccb_trans_settings_fc *fc = &cts->xport_specific.fc; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_2; cts->transport = XPORT_FC; cts->transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; fc->valid = CTS_FC_VALID_SPEED; fc->bitrate = 100000; fc->bitrate *= fcp->isp_gbspeed; if (tgt < MAX_FC_TARG) { fcportdb_t *lp = &fcp->portdb[tgt]; fc->wwnn = lp->node_wwn; fc->wwpn = lp->port_wwn; fc->port = lp->portid; fc->valid |= CTS_FC_VALID_WWNN | CTS_FC_VALID_WWPN | CTS_FC_VALID_PORT; } } else { struct ccb_trans_settings_scsi *scsi = &cts->proto_specific.scsi; struct ccb_trans_settings_spi *spi = &cts->xport_specific.spi; sdparam *sdp = SDPARAM(isp, bus); uint16_t dval, pval, oval; if (IS_CURRENT_SETTINGS(cts)) { sdp->isp_devparam[tgt].dev_refresh = 1; sdp->update = 1; (void) isp_control(isp, ISPCTL_UPDATE_PARAMS, bus); dval = sdp->isp_devparam[tgt].actv_flags; oval = sdp->isp_devparam[tgt].actv_offset; pval = sdp->isp_devparam[tgt].actv_period; } else { dval = sdp->isp_devparam[tgt].nvrm_flags; oval = sdp->isp_devparam[tgt].nvrm_offset; pval = sdp->isp_devparam[tgt].nvrm_period; } cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_2; cts->transport = XPORT_SPI; cts->transport_version = 2; spi->valid = 0; scsi->valid = 0; spi->flags = 0; scsi->flags = 0; if (dval & DPARM_DISC) { spi->flags |= CTS_SPI_FLAGS_DISC_ENB; } if ((dval & DPARM_SYNC) && oval && pval) { spi->sync_offset = oval; spi->sync_period = pval; } else { spi->sync_offset = 0; spi->sync_period = 0; } spi->valid |= CTS_SPI_VALID_SYNC_OFFSET; spi->valid |= CTS_SPI_VALID_SYNC_RATE; spi->valid |= CTS_SPI_VALID_BUS_WIDTH; if (dval & DPARM_WIDE) { spi->bus_width = MSG_EXT_WDTR_BUS_16_BIT; } else { spi->bus_width = MSG_EXT_WDTR_BUS_8_BIT; } if (cts->ccb_h.target_lun != CAM_LUN_WILDCARD) { scsi->valid = CTS_SCSI_VALID_TQ; if (dval & DPARM_TQING) { scsi->flags |= CTS_SCSI_FLAGS_TAG_ENB; } spi->valid |= CTS_SPI_VALID_DISC; } isp_prt(isp, ISP_LOGDEBUG0, "GET %s (%d.%d.%jx) to flags %x off %x per %x", IS_CURRENT_SETTINGS(cts)? "ACTIVE" : "NVRAM", bus, tgt, (uintmax_t)cts->ccb_h.target_lun, dval, oval, pval); } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; case XPT_CALC_GEOMETRY: cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); break; case XPT_RESET_BUS: /* Reset the specified bus */ bus = cam_sim_bus(sim); error = isp_control(isp, ISPCTL_RESET_BUS, bus); if (error) { ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); break; } if (bootverbose) { xpt_print(ccb->ccb_h.path, "reset bus on channel %d\n", bus); } if (IS_FC(isp)) { xpt_async(AC_BUS_RESET, ISP_FC_PC(isp, bus)->path, 0); } else { xpt_async(AC_BUS_RESET, ISP_SPI_PC(isp, bus)->path, 0); } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; case XPT_TERM_IO: /* Terminate the I/O process */ ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; case XPT_SET_SIM_KNOB: /* Set SIM knobs */ { struct ccb_sim_knob *kp = &ccb->knob; fcparam *fcp; if (!IS_FC(isp)) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; } bus = cam_sim_bus(xpt_path_sim(kp->ccb_h.path)); fcp = FCPARAM(isp, bus); if (kp->xport_specific.fc.valid & KNOB_VALID_ADDRESS) { fcp->isp_wwnn = ISP_FC_PC(isp, bus)->def_wwnn = kp->xport_specific.fc.wwnn; fcp->isp_wwpn = ISP_FC_PC(isp, bus)->def_wwpn = kp->xport_specific.fc.wwpn; isp_prt(isp, ISP_LOGALL, "Setting Channel %d wwns to 0x%jx 0x%jx", bus, fcp->isp_wwnn, fcp->isp_wwpn); } ccb->ccb_h.status = CAM_REQ_CMP; if (kp->xport_specific.fc.valid & KNOB_VALID_ROLE) { int rchange = 0; int newrole = 0; switch (kp->xport_specific.fc.role) { case KNOB_ROLE_NONE: if (fcp->role != ISP_ROLE_NONE) { rchange = 1; newrole = ISP_ROLE_NONE; } break; case KNOB_ROLE_TARGET: if (fcp->role != ISP_ROLE_TARGET) { rchange = 1; newrole = ISP_ROLE_TARGET; } break; case KNOB_ROLE_INITIATOR: if (fcp->role != ISP_ROLE_INITIATOR) { rchange = 1; newrole = ISP_ROLE_INITIATOR; } break; case KNOB_ROLE_BOTH: if (fcp->role != ISP_ROLE_BOTH) { rchange = 1; newrole = ISP_ROLE_BOTH; } break; } if (rchange) { ISP_PATH_PRT(isp, ISP_LOGCONFIG, ccb->ccb_h.path, "changing role on from %d to %d\n", fcp->role, newrole); #ifdef ISP_TARGET_MODE ISP_SET_PC(isp, bus, tm_enabled, 0); ISP_SET_PC(isp, bus, tm_luns_enabled, 0); #endif if (isp_control(isp, ISPCTL_CHANGE_ROLE, bus, newrole) != 0) { ccb->ccb_h.status = CAM_REQ_CMP_ERR; xpt_done(ccb); break; } #ifdef ISP_TARGET_MODE if (newrole == ISP_ROLE_TARGET || newrole == ISP_ROLE_BOTH) { /* * Give the new role a chance to complain and settle */ msleep(isp, &isp->isp_lock, PRIBIO, "taking a breather", 2); ccb->ccb_h.status = isp_enable_deferred_luns(isp, bus); } #endif } } xpt_done(ccb); break; } case XPT_GET_SIM_KNOB: /* Get SIM knobs */ { struct ccb_sim_knob *kp = &ccb->knob; if (IS_FC(isp)) { fcparam *fcp; bus = cam_sim_bus(xpt_path_sim(kp->ccb_h.path)); fcp = FCPARAM(isp, bus); kp->xport_specific.fc.wwnn = fcp->isp_wwnn; kp->xport_specific.fc.wwpn = fcp->isp_wwpn; switch (fcp->role) { case ISP_ROLE_NONE: kp->xport_specific.fc.role = KNOB_ROLE_NONE; break; case ISP_ROLE_TARGET: kp->xport_specific.fc.role = KNOB_ROLE_TARGET; break; case ISP_ROLE_INITIATOR: kp->xport_specific.fc.role = KNOB_ROLE_INITIATOR; break; case ISP_ROLE_BOTH: kp->xport_specific.fc.role = KNOB_ROLE_BOTH; break; } kp->xport_specific.fc.valid = KNOB_VALID_ADDRESS | KNOB_VALID_ROLE; ccb->ccb_h.status = CAM_REQ_CMP; } else { ccb->ccb_h.status = CAM_REQ_INVALID; } xpt_done(ccb); break; } case XPT_PATH_INQ: /* Path routing inquiry */ { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; #ifdef ISP_TARGET_MODE cpi->target_sprt = PIT_PROCESSOR | PIT_DISCONNECT | PIT_TERM_IO; #else cpi->target_sprt = 0; #endif cpi->hba_eng_cnt = 0; cpi->max_target = ISP_MAX_TARGETS(isp) - 1; cpi->max_lun = ISP_MAX_LUNS(isp) == 0 ? 255 : ISP_MAX_LUNS(isp) - 1; cpi->bus_id = cam_sim_bus(sim); if (isp->isp_osinfo.sixtyfourbit) cpi->maxio = (ISP_NSEG64_MAX - 1) * PAGE_SIZE; else cpi->maxio = (ISP_NSEG_MAX - 1) * PAGE_SIZE; bus = cam_sim_bus(xpt_path_sim(cpi->ccb_h.path)); if (IS_FC(isp)) { fcparam *fcp = FCPARAM(isp, bus); cpi->hba_misc = PIM_NOBUSRESET | PIM_UNMAPPED; #if __FreeBSD_version >= 1000700 cpi->hba_misc |= PIM_EXTLUNS; #endif #if __FreeBSD_version >= 1000039 cpi->hba_misc |= PIM_NOSCAN; #endif /* * Because our loop ID can shift from time to time, * make our initiator ID out of range of our bus. */ cpi->initiator_id = cpi->max_target + 1; /* * Set base transfer capabilities for Fibre Channel, for this HBA. */ if (IS_25XX(isp)) { cpi->base_transfer_speed = 8000000; } else if (IS_24XX(isp)) { cpi->base_transfer_speed = 4000000; } else if (IS_23XX(isp)) { cpi->base_transfer_speed = 2000000; } else { cpi->base_transfer_speed = 1000000; } cpi->hba_inquiry = PI_TAG_ABLE; cpi->transport = XPORT_FC; cpi->transport_version = 0; cpi->xport_specific.fc.wwnn = fcp->isp_wwnn; cpi->xport_specific.fc.wwpn = fcp->isp_wwpn; cpi->xport_specific.fc.port = fcp->isp_portid; cpi->xport_specific.fc.bitrate = fcp->isp_gbspeed * 1000; } else { sdparam *sdp = SDPARAM(isp, bus); cpi->hba_inquiry = PI_SDTR_ABLE|PI_TAG_ABLE|PI_WIDE_16; cpi->hba_misc = PIM_UNMAPPED; cpi->initiator_id = sdp->isp_initiator_id; cpi->base_transfer_speed = 3300; cpi->transport = XPORT_SPI; cpi->transport_version = 2; } cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_2; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, "Qlogic", HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; } } #define ISPDDB (CAM_DEBUG_INFO|CAM_DEBUG_TRACE|CAM_DEBUG_CDB) void isp_done(XS_T *sccb) { ispsoftc_t *isp = XS_ISP(sccb); uint32_t status; if (XS_NOERR(sccb)) XS_SETERR(sccb, CAM_REQ_CMP); if ((sccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP && (sccb->scsi_status != SCSI_STATUS_OK)) { sccb->ccb_h.status &= ~CAM_STATUS_MASK; if ((sccb->scsi_status == SCSI_STATUS_CHECK_COND) && (sccb->ccb_h.status & CAM_AUTOSNS_VALID) == 0) { sccb->ccb_h.status |= CAM_AUTOSENSE_FAIL; } else { sccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } } sccb->ccb_h.status &= ~CAM_SIM_QUEUED; status = sccb->ccb_h.status & CAM_STATUS_MASK; if (status != CAM_REQ_CMP) { if (status != CAM_SEL_TIMEOUT) isp_prt(isp, ISP_LOGDEBUG0, "target %d lun %jx CAM status 0x%x SCSI status 0x%x", XS_TGT(sccb), (uintmax_t)XS_LUN(sccb), sccb->ccb_h.status, sccb->scsi_status); else if ((IS_FC(isp)) && (XS_TGT(sccb) < MAX_FC_TARG)) { fcparam *fcp; fcp = FCPARAM(isp, XS_CHANNEL(sccb)); fcp->portdb[XS_TGT(sccb)].is_target = 0; } if ((sccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { sccb->ccb_h.status |= CAM_DEV_QFRZN; xpt_freeze_devq(sccb->ccb_h.path, 1); } } if ((CAM_DEBUGGED(sccb->ccb_h.path, ISPDDB)) && (sccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { xpt_print(sccb->ccb_h.path, "cam completion status 0x%x\n", sccb->ccb_h.status); } if (ISP_PCMD(sccb)) { if (callout_active(&PISP_PCMD(sccb)->wdog)) callout_stop(&PISP_PCMD(sccb)->wdog); isp_free_pcmd(isp, (union ccb *) sccb); } xpt_done((union ccb *) sccb); } void isp_async(ispsoftc_t *isp, ispasync_t cmd, ...) { int bus; static const char prom[] = "Chan %d [%d] WWPN 0x%16jx PortID 0x%06x handle 0x%x %s %s"; char buf[64]; char *msg = NULL; target_id_t tgt; fcportdb_t *lp; struct isp_fc *fc; struct cam_path *tmppath; struct ac_contract ac; struct ac_device_changed *adc; va_list ap; switch (cmd) { case ISPASYNC_NEW_TGT_PARAMS: { struct ccb_trans_settings_scsi *scsi; struct ccb_trans_settings_spi *spi; int flags, tgt; sdparam *sdp; struct ccb_trans_settings cts; memset(&cts, 0, sizeof (struct ccb_trans_settings)); va_start(ap, cmd); bus = va_arg(ap, int); tgt = va_arg(ap, int); va_end(ap); sdp = SDPARAM(isp, bus); if (xpt_create_path(&tmppath, NULL, cam_sim_path(ISP_SPI_PC(isp, bus)->sim), tgt, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { isp_prt(isp, ISP_LOGWARN, "isp_async cannot make temp path for %d.%d", tgt, bus); break; } flags = sdp->isp_devparam[tgt].actv_flags; cts.type = CTS_TYPE_CURRENT_SETTINGS; cts.protocol = PROTO_SCSI; cts.transport = XPORT_SPI; scsi = &cts.proto_specific.scsi; spi = &cts.xport_specific.spi; if (flags & DPARM_TQING) { scsi->valid |= CTS_SCSI_VALID_TQ; scsi->flags |= CTS_SCSI_FLAGS_TAG_ENB; } if (flags & DPARM_DISC) { spi->valid |= CTS_SPI_VALID_DISC; spi->flags |= CTS_SPI_FLAGS_DISC_ENB; } spi->flags |= CTS_SPI_VALID_BUS_WIDTH; if (flags & DPARM_WIDE) { spi->bus_width = MSG_EXT_WDTR_BUS_16_BIT; } else { spi->bus_width = MSG_EXT_WDTR_BUS_8_BIT; } if (flags & DPARM_SYNC) { spi->valid |= CTS_SPI_VALID_SYNC_RATE; spi->valid |= CTS_SPI_VALID_SYNC_OFFSET; spi->sync_period = sdp->isp_devparam[tgt].actv_period; spi->sync_offset = sdp->isp_devparam[tgt].actv_offset; } isp_prt(isp, ISP_LOGDEBUG2, "NEW_TGT_PARAMS bus %d tgt %d period %x offset %x flags %x", bus, tgt, sdp->isp_devparam[tgt].actv_period, sdp->isp_devparam[tgt].actv_offset, flags); xpt_setup_ccb(&cts.ccb_h, tmppath, 1); xpt_async(AC_TRANSFER_NEG, tmppath, &cts); xpt_free_path(tmppath); break; } case ISPASYNC_BUS_RESET: { va_start(ap, cmd); bus = va_arg(ap, int); va_end(ap); isp_prt(isp, ISP_LOGINFO, "SCSI bus reset on bus %d detected", bus); if (IS_FC(isp)) { xpt_async(AC_BUS_RESET, ISP_FC_PC(isp, bus)->path, NULL); } else { xpt_async(AC_BUS_RESET, ISP_SPI_PC(isp, bus)->path, NULL); } break; } case ISPASYNC_LIP: if (msg == NULL) msg = "LIP Received"; /* FALLTHROUGH */ case ISPASYNC_LOOP_RESET: if (msg == NULL) msg = "LOOP Reset"; /* FALLTHROUGH */ case ISPASYNC_LOOP_DOWN: { if (msg == NULL) msg = "LOOP Down"; va_start(ap, cmd); bus = va_arg(ap, int); va_end(ap); FCPARAM(isp, bus)->isp_linkstate = 0; fc = ISP_FC_PC(isp, bus); if (cmd == ISPASYNC_LOOP_DOWN && fc->ready) { /* * We don't do any simq freezing if we are only in target mode */ if (FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) { if (fc->path) { isp_freeze_loopdown(isp, bus, msg); } } if (!callout_active(&fc->ldt)) { callout_reset(&fc->ldt, fc->loop_down_limit * hz, isp_ldt, fc); isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Starting Loop Down Timer @ %lu", (unsigned long) time_uptime); } } - isp_fcp_reset_crn(fc, /*tgt*/0, /*tgt_set*/ 0); + isp_fcp_reset_crn(isp, bus, /*tgt*/0, /*tgt_set*/ 0); isp_prt(isp, ISP_LOGINFO, "Chan %d: %s", bus, msg); break; } case ISPASYNC_LOOP_UP: va_start(ap, cmd); bus = va_arg(ap, int); va_end(ap); fc = ISP_FC_PC(isp, bus); /* * Now we just note that Loop has come up. We don't * actually do anything because we're waiting for a * Change Notify before activating the FC cleanup * thread to look at the state of the loop again. */ FCPARAM(isp, bus)->isp_linkstate = 1; fc->loop_dead = 0; fc->loop_down_time = 0; isp_prt(isp, ISP_LOGINFO, "Chan %d Loop UP", bus); break; case ISPASYNC_DEV_ARRIVED: va_start(ap, cmd); bus = va_arg(ap, int); lp = va_arg(ap, fcportdb_t *); va_end(ap); fc = ISP_FC_PC(isp, bus); tgt = FC_PORTDB_TGT(isp, bus, lp); isp_gen_role_str(buf, sizeof (buf), lp->prli_word3); isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "arrived"); if ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) && (lp->prli_word3 & PRLI_WD3_TARGET_FUNCTION)) { lp->is_target = 1; - isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1); + isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); isp_make_here(isp, lp, bus, tgt); } if ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) && (lp->prli_word3 & PRLI_WD3_INITIATOR_FUNCTION)) { lp->is_initiator = 1; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = tgt; adc->arrived = 1; xpt_async(AC_CONTRACT, fc->path, &ac); } break; case ISPASYNC_DEV_CHANGED: va_start(ap, cmd); bus = va_arg(ap, int); lp = va_arg(ap, fcportdb_t *); va_end(ap); fc = ISP_FC_PC(isp, bus); tgt = FC_PORTDB_TGT(isp, bus, lp); isp_gen_role_str(buf, sizeof (buf), lp->new_prli_word3); isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->new_portid, lp->handle, buf, "changed"); changed: if (lp->is_target != ((FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) && (lp->new_prli_word3 & PRLI_WD3_TARGET_FUNCTION))) { lp->is_target = !lp->is_target; if (lp->is_target) { - isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1); + isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); isp_make_here(isp, lp, bus, tgt); } else { isp_make_gone(isp, lp, bus, tgt); - isp_fcp_reset_crn(fc, tgt, /*tgt_set*/ 1); + isp_fcp_reset_crn(isp, bus, tgt, /*tgt_set*/ 1); } } if (lp->is_initiator != ((FCPARAM(isp, bus)->role & ISP_ROLE_TARGET) && (lp->new_prli_word3 & PRLI_WD3_INITIATOR_FUNCTION))) { lp->is_initiator = !lp->is_initiator; ac.contract_number = AC_CONTRACT_DEV_CHG; adc = (struct ac_device_changed *) ac.contract_data; adc->wwpn = lp->port_wwn; adc->port = lp->portid; adc->target = tgt; adc->arrived = lp->is_initiator; xpt_async(AC_CONTRACT, fc->path, &ac); } break; case ISPASYNC_DEV_STAYED: va_start(ap, cmd); bus = va_arg(ap, int); lp = va_arg(ap, fcportdb_t *); va_end(ap); fc = ISP_FC_PC(isp, bus); tgt = FC_PORTDB_TGT(isp, bus, lp); isp_gen_role_str(buf, sizeof (buf), lp->prli_word3); isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "stayed"); goto changed; case ISPASYNC_DEV_GONE: va_start(ap, cmd); bus = va_arg(ap, int); lp = va_arg(ap, fcportdb_t *); va_end(ap); fc = ISP_FC_PC(isp, bus); tgt = FC_PORTDB_TGT(isp, bus, lp); /* * If this has a virtual target or initiator set the isp_gdt * timer running on it to delay its departure. */ isp_gen_role_str(buf, sizeof (buf), lp->prli_word3); if (lp->is_target || lp->is_initiator) { lp->state = FC_PORTDB_STATE_ZOMBIE; lp->gone_timer = fc->gone_device_time; isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone zombie"); if (fc->ready && !callout_active(&fc->gdt)) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Chan %d Starting Gone Device Timer with %u seconds time now %lu", bus, lp->gone_timer, (unsigned long)time_uptime); callout_reset(&fc->gdt, hz, isp_gdt, fc); } break; } isp_prt(isp, ISP_LOGCONFIG, prom, bus, tgt, lp->port_wwn, lp->portid, lp->handle, buf, "gone"); break; case ISPASYNC_CHANGE_NOTIFY: { char *msg; int evt, nphdl, nlstate, reason; va_start(ap, cmd); bus = va_arg(ap, int); evt = va_arg(ap, int); if (IS_24XX(isp) && evt == ISPASYNC_CHANGE_PDB) { nphdl = va_arg(ap, int); nlstate = va_arg(ap, int); reason = va_arg(ap, int); } else { nphdl = NIL_HANDLE; nlstate = reason = 0; } va_end(ap); fc = ISP_FC_PC(isp, bus); if (evt == ISPASYNC_CHANGE_PDB) { msg = "Port Database Changed"; } else if (evt == ISPASYNC_CHANGE_SNS) { msg = "Name Server Database Changed"; } else { msg = "Other Change Notify"; } /* * If the loop down timer is running, cancel it. */ if (fc->ready && callout_active(&fc->ldt)) { isp_prt(isp, ISP_LOG_SANCFG|ISP_LOGDEBUG0, "Stopping Loop Down Timer @ %lu", (unsigned long) time_uptime); callout_stop(&fc->ldt); } isp_prt(isp, ISP_LOGINFO, "Chan %d %s", bus, msg); if (FCPARAM(isp, bus)->role & ISP_ROLE_INITIATOR) { isp_freeze_loopdown(isp, bus, msg); } wakeup(fc); break; } #ifdef ISP_TARGET_MODE case ISPASYNC_TARGET_NOTIFY: { isp_notify_t *notify; va_start(ap, cmd); notify = va_arg(ap, isp_notify_t *); va_end(ap); switch (notify->nt_ncode) { case NT_ABORT_TASK: case NT_ABORT_TASK_SET: case NT_CLEAR_ACA: case NT_CLEAR_TASK_SET: case NT_LUN_RESET: case NT_TARGET_RESET: case NT_QUERY_TASK_SET: case NT_QUERY_ASYNC_EVENT: /* * These are task management functions. */ isp_handle_platform_target_tmf(isp, notify); break; case NT_BUS_RESET: case NT_LIP_RESET: case NT_LINK_UP: case NT_LINK_DOWN: case NT_HBA_RESET: /* * No action need be taken here. */ break; case NT_GLOBAL_LOGOUT: case NT_LOGOUT: /* * This is device arrival/departure notification */ isp_handle_platform_target_notify_ack(isp, notify); break; default: isp_prt(isp, ISP_LOGALL, "target notify code 0x%x", notify->nt_ncode); isp_handle_platform_target_notify_ack(isp, notify); break; } break; } case ISPASYNC_TARGET_NOTIFY_ACK: { void *inot; va_start(ap, cmd); inot = va_arg(ap, void *); va_end(ap); if (isp_notify_ack(isp, inot)) { isp_tna_t *tp = malloc(sizeof (*tp), M_DEVBUF, M_NOWAIT); if (tp) { tp->isp = isp; if (inot) { memcpy(tp->data, inot, sizeof (tp->data)); tp->not = tp->data; } else { tp->not = NULL; } callout_init_mtx(&tp->timer, &isp->isp_lock, 0); callout_reset(&tp->timer, 5, isp_refire_notify_ack, tp); } else { isp_prt(isp, ISP_LOGERR, "you lose- cannot allocate a notify refire"); } } break; } case ISPASYNC_TARGET_ACTION: { isphdr_t *hp; va_start(ap, cmd); hp = va_arg(ap, isphdr_t *); va_end(ap); switch (hp->rqs_entry_type) { default: isp_prt(isp, ISP_LOGWARN, "%s: unhandled target action 0x%x", __func__, hp->rqs_entry_type); break; case RQSTYPE_NOTIFY: if (IS_SCSI(isp)) { isp_handle_platform_notify_scsi(isp, (in_entry_t *) hp); } else if (IS_24XX(isp)) { isp_handle_platform_notify_24xx(isp, (in_fcentry_24xx_t *) hp); } else { isp_handle_platform_notify_fc(isp, (in_fcentry_t *) hp); } break; case RQSTYPE_ATIO: if (IS_24XX(isp)) { isp_handle_platform_atio7(isp, (at7_entry_t *) hp); } else { isp_handle_platform_atio(isp, (at_entry_t *) hp); } break; case RQSTYPE_ATIO2: isp_handle_platform_atio2(isp, (at2_entry_t *) hp); break; case RQSTYPE_CTIO7: case RQSTYPE_CTIO3: case RQSTYPE_CTIO2: case RQSTYPE_CTIO: isp_handle_platform_ctio(isp, hp); break; case RQSTYPE_ABTS_RCVD: { abts_t *abts = (abts_t *)hp; isp_notify_t notify, *nt = ¬ify; tstate_t *tptr; fcportdb_t *lp; uint16_t chan; uint32_t sid, did; did = (abts->abts_did_hi << 16) | abts->abts_did_lo; sid = (abts->abts_sid_hi << 16) | abts->abts_sid_lo; ISP_MEMZERO(nt, sizeof (isp_notify_t)); nt->nt_hba = isp; nt->nt_did = did; nt->nt_nphdl = abts->abts_nphdl; nt->nt_sid = sid; isp_find_chan_by_did(isp, did, &chan); if (chan == ISP_NOCHAN) { nt->nt_tgt = TGT_ANY; } else { nt->nt_tgt = FCPARAM(isp, chan)->isp_wwpn; if (isp_find_pdb_by_handle(isp, chan, abts->abts_nphdl, &lp)) { nt->nt_wwn = lp->port_wwn; } else { nt->nt_wwn = INI_ANY; } } /* * Try hard to find the lun for this command. */ tptr = get_lun_statep_from_tag(isp, chan, abts->abts_rxid_task); if (tptr) { nt->nt_lun = tptr->ts_lun; rls_lun_statep(isp, tptr); } else { nt->nt_lun = LUN_ANY; } nt->nt_need_ack = 1; nt->nt_tagval = abts->abts_rxid_task; nt->nt_tagval |= (((uint64_t) abts->abts_rxid_abts) << 32); if (abts->abts_rxid_task == ISP24XX_NO_TASK) { isp_prt(isp, ISP_LOGTINFO, "[0x%x] ABTS from N-Port handle 0x%x Port 0x%06x has no task id (rx_id 0x%04x ox_id 0x%04x)", abts->abts_rxid_abts, abts->abts_nphdl, sid, abts->abts_rx_id, abts->abts_ox_id); } else { isp_prt(isp, ISP_LOGTINFO, "[0x%x] ABTS from N-Port handle 0x%x Port 0x%06x for task 0x%x (rx_id 0x%04x ox_id 0x%04x)", abts->abts_rxid_abts, abts->abts_nphdl, sid, abts->abts_rxid_task, abts->abts_rx_id, abts->abts_ox_id); } nt->nt_channel = chan; nt->nt_ncode = NT_ABORT_TASK; nt->nt_lreserved = hp; isp_handle_platform_target_tmf(isp, nt); break; } case RQSTYPE_ENABLE_LUN: case RQSTYPE_MODIFY_LUN: isp_ledone(isp, (lun_entry_t *) hp); break; } break; } #endif case ISPASYNC_FW_CRASH: { uint16_t mbox1, mbox6; mbox1 = ISP_READ(isp, OUTMAILBOX1); if (IS_DUALBUS(isp)) { mbox6 = ISP_READ(isp, OUTMAILBOX6); } else { mbox6 = 0; } isp_prt(isp, ISP_LOGERR, "Internal Firmware Error on bus %d @ RISC Address 0x%x", mbox6, mbox1); mbox1 = isp->isp_osinfo.mbox_sleep_ok; isp->isp_osinfo.mbox_sleep_ok = 0; isp_reinit(isp, 1); isp->isp_osinfo.mbox_sleep_ok = mbox1; isp_async(isp, ISPASYNC_FW_RESTARTED, NULL); break; } default: isp_prt(isp, ISP_LOGERR, "unknown isp_async event %d", cmd); break; } } /* * Locks are held before coming here. */ void isp_uninit(ispsoftc_t *isp) { if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_RESET); } else { ISP_WRITE(isp, HCCR, HCCR_CMD_RESET); } ISP_DISABLE_INTS(isp); } /* * When we want to get the 'default' WWNs (when lacking NVRAM), we pick them * up from our platform default (defww{p|n}n) and morph them based upon * channel. * * When we want to get the 'active' WWNs, we get NVRAM WWNs and then morph them * based upon channel. */ uint64_t isp_default_wwn(ispsoftc_t * isp, int chan, int isactive, int iswwnn) { uint64_t seed; struct isp_fc *fc = ISP_FC_PC(isp, chan); /* * If we're asking for a active WWN, the default overrides get * returned, otherwise the NVRAM value is picked. * * If we're asking for a default WWN, we just pick the default override. */ if (isactive) { seed = iswwnn ? fc->def_wwnn : fc->def_wwpn; if (seed) { return (seed); } seed = iswwnn ? FCPARAM(isp, chan)->isp_wwnn_nvram : FCPARAM(isp, chan)->isp_wwpn_nvram; if (seed) { return (seed); } return (0x400000007F000009ull); } seed = iswwnn ? fc->def_wwnn : fc->def_wwpn; /* * For channel zero just return what we have. For either ACTIVE or * DEFAULT cases, we depend on default override of NVRAM values for * channel zero. */ if (chan == 0) { return (seed); } /* * For other channels, we are doing one of three things: * * 1. If what we have now is non-zero, return it. Otherwise we morph * values from channel 0. 2. If we're here for a WWPN we synthesize * it if Channel 0's wwpn has a type 2 NAA. 3. If we're here for a * WWNN we synthesize it if Channel 0's wwnn has a type 2 NAA. */ if (seed) { return (seed); } seed = iswwnn ? ISP_FC_PC(isp, 0)->def_wwnn : ISP_FC_PC(isp, 0)->def_wwpn; if (seed == 0) seed = iswwnn ? FCPARAM(isp, 0)->isp_wwnn_nvram : FCPARAM(isp, 0)->isp_wwpn_nvram; if (((seed >> 60) & 0xf) == 2) { /* * The type 2 NAA fields for QLogic cards appear be laid out * thusly: * * bits 63..60 NAA == 2 bits 59..57 unused/zero bit 56 * port (1) or node (0) WWN distinguishor bit 48 * physical port on dual-port chips (23XX/24XX) * * This is somewhat nutty, particularly since bit 48 is * irrelevant as they assign separate serial numbers to * different physical ports anyway. * * We'll stick our channel number plus one first into bits * 57..59 and thence into bits 52..55 which allows for 8 bits * of channel which is comfortably more than our maximum * (126) now. */ seed &= ~0x0FF0000000000000ULL; if (iswwnn == 0) { seed |= ((uint64_t) (chan + 1) & 0xf) << 56; seed |= ((uint64_t) ((chan + 1) >> 4) & 0xf) << 52; } } else { seed = 0; } return (seed); } void isp_prt(ispsoftc_t *isp, int level, const char *fmt, ...) { int loc; char lbuf[200]; va_list ap; if (level != ISP_LOGALL && (level & isp->isp_dblev) == 0) { return; } snprintf(lbuf, sizeof (lbuf), "%s: ", device_get_nameunit(isp->isp_dev)); loc = strlen(lbuf); va_start(ap, fmt); vsnprintf(&lbuf[loc], sizeof (lbuf) - loc - 1, fmt, ap); va_end(ap); printf("%s\n", lbuf); } void isp_xs_prt(ispsoftc_t *isp, XS_T *xs, int level, const char *fmt, ...) { va_list ap; if (level != ISP_LOGALL && (level & isp->isp_dblev) == 0) { return; } xpt_print_path(xs->ccb_h.path); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); } uint64_t isp_nanotime_sub(struct timespec *b, struct timespec *a) { uint64_t elapsed; struct timespec x = *b; timespecsub(&x, a); elapsed = GET_NANOSEC(&x); if (elapsed == 0) elapsed++; return (elapsed); } int isp_mbox_acquire(ispsoftc_t *isp) { if (isp->isp_osinfo.mboxbsy) { return (1); } else { isp->isp_osinfo.mboxcmd_done = 0; isp->isp_osinfo.mboxbsy = 1; return (0); } } void isp_mbox_wait_complete(ispsoftc_t *isp, mbreg_t *mbp) { unsigned int usecs = mbp->timeout; unsigned int max, olim, ilim; if (usecs == 0) { usecs = MBCMD_DEFAULT_TIMEOUT; } max = isp->isp_mbxwrk0 + 1; if (isp->isp_osinfo.mbox_sleep_ok) { unsigned int ms = (usecs + 999) / 1000; isp->isp_osinfo.mbox_sleep_ok = 0; isp->isp_osinfo.mbox_sleeping = 1; for (olim = 0; olim < max; olim++) { msleep(&isp->isp_mbxworkp, &isp->isp_osinfo.lock, PRIBIO, "ispmbx_sleep", isp_mstohz(ms)); if (isp->isp_osinfo.mboxcmd_done) { break; } } isp->isp_osinfo.mbox_sleep_ok = 1; isp->isp_osinfo.mbox_sleeping = 0; } else { for (olim = 0; olim < max; olim++) { for (ilim = 0; ilim < usecs; ilim += 100) { uint16_t isr, sema, info; if (isp->isp_osinfo.mboxcmd_done) { break; } if (ISP_READ_ISR(isp, &isr, &sema, &info)) { isp_intr(isp, isr, sema, info); if (isp->isp_osinfo.mboxcmd_done) { break; } } ISP_DELAY(100); } if (isp->isp_osinfo.mboxcmd_done) { break; } } } if (isp->isp_osinfo.mboxcmd_done == 0) { isp_prt(isp, ISP_LOGWARN, "%s Mailbox Command (0x%x) Timeout (%uus) (started @ %s:%d)", isp->isp_osinfo.mbox_sleep_ok? "Interrupting" : "Polled", isp->isp_lastmbxcmd, usecs, mbp->func, mbp->lineno); mbp->param[0] = MBOX_TIMEOUT; isp->isp_osinfo.mboxcmd_done = 1; } } void isp_mbox_notify_done(ispsoftc_t *isp) { if (isp->isp_osinfo.mbox_sleeping) { wakeup(&isp->isp_mbxworkp); } isp->isp_osinfo.mboxcmd_done = 1; } void isp_mbox_release(ispsoftc_t *isp) { isp->isp_osinfo.mboxbsy = 0; } int isp_fc_scratch_acquire(ispsoftc_t *isp, int chan) { int ret = 0; if (isp->isp_osinfo.pc.fc[chan].fcbsy) { ret = -1; } else { isp->isp_osinfo.pc.fc[chan].fcbsy = 1; } return (ret); } int isp_mstohz(int ms) { int hz; struct timeval t; t.tv_sec = ms / 1000; t.tv_usec = (ms % 1000) * 1000; hz = tvtohz(&t); if (hz < 0) { hz = 0x7fffffff; } if (hz == 0) { hz = 1; } return (hz); } void isp_platform_intr(void *arg) { ispsoftc_t *isp = arg; uint16_t isr, sema, info; ISP_LOCK(isp); isp->isp_intcnt++; if (ISP_READ_ISR(isp, &isr, &sema, &info)) isp_intr(isp, isr, sema, info); else isp->isp_intbogus++; ISP_UNLOCK(isp); } void isp_common_dmateardown(ispsoftc_t *isp, struct ccb_scsiio *csio, uint32_t hdl) { if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { bus_dmamap_sync(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap, BUS_DMASYNC_POSTREAD); } else { bus_dmamap_sync(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap, BUS_DMASYNC_POSTWRITE); } bus_dmamap_unload(isp->isp_osinfo.dmat, PISP_PCMD(csio)->dmap); } /* * Reset the command reference number for all LUNs on a specific target * (needed when a target arrives again) or for all targets on a port * (needed for events like a LIP). */ void -isp_fcp_reset_crn(struct isp_fc *fc, uint32_t tgt, int tgt_set) +isp_fcp_reset_crn(ispsoftc_t *isp, int chan, uint32_t tgt, int tgt_set) { - int i; + struct isp_fc *fc = ISP_FC_PC(isp, chan); struct isp_nexus *nxp; + int i; if (tgt_set == 0) - isp_prt(fc->isp, ISP_LOG_SANCFG, "resetting CRN on all targets"); + isp_prt(isp, ISP_LOGDEBUG0, + "Chan %d resetting CRN on all targets", chan); else - isp_prt(fc->isp, ISP_LOG_SANCFG, "resetting CRN target %u", tgt); + isp_prt(isp, ISP_LOGDEBUG0, + "Chan %d resetting CRN on target %u", chan, tgt); for (i = 0; i < NEXUS_HASH_WIDTH; i++) { - nxp = fc->nexus_hash[i]; - while (nxp) { - if ((tgt_set != 0) && (tgt == nxp->tgt)) + for (nxp = fc->nexus_hash[i]; nxp != NULL; nxp = nxp->next) { + if (tgt_set == 0 || tgt == nxp->tgt) nxp->crnseed = 0; - - nxp = nxp->next; } } } int isp_fcp_next_crn(ispsoftc_t *isp, uint8_t *crnp, XS_T *cmd) { lun_id_t lun; uint32_t chan, tgt; struct isp_fc *fc; struct isp_nexus *nxp; int idx; if (isp->isp_type < ISP_HA_FC_2300) return (0); chan = XS_CHANNEL(cmd); tgt = XS_TGT(cmd); lun = XS_LUN(cmd); fc = &isp->isp_osinfo.pc.fc[chan]; idx = NEXUS_HASH(tgt, lun); nxp = fc->nexus_hash[idx]; while (nxp) { if (nxp->tgt == tgt && nxp->lun == lun) break; nxp = nxp->next; } if (nxp == NULL) { nxp = fc->nexus_free_list; if (nxp == NULL) { nxp = malloc(sizeof (struct isp_nexus), M_DEVBUF, M_ZERO|M_NOWAIT); if (nxp == NULL) { return (-1); } } else { fc->nexus_free_list = nxp->next; } nxp->tgt = tgt; nxp->lun = lun; nxp->next = fc->nexus_hash[idx]; fc->nexus_hash[idx] = nxp; } - if (nxp) { - if (nxp->crnseed == 0) - nxp->crnseed = 1; - if (cmd) - PISP_PCMD(cmd)->crn = nxp->crnseed; - *crnp = nxp->crnseed++; - return (0); - } - return (-1); + if (nxp->crnseed == 0) + nxp->crnseed = 1; + PISP_PCMD(cmd)->crn = nxp->crnseed; + *crnp = nxp->crnseed++; + return (0); } /* * We enter with the lock held */ void isp_timer(void *arg) { ispsoftc_t *isp = arg; #ifdef ISP_TARGET_MODE isp_tmcmd_restart(isp); #endif callout_reset(&isp->isp_osinfo.tmo, isp_timer_count, isp_timer, isp); } isp_ecmd_t * isp_get_ecmd(ispsoftc_t *isp) { isp_ecmd_t *ecmd = isp->isp_osinfo.ecmd_free; if (ecmd) { isp->isp_osinfo.ecmd_free = ecmd->next; } return (ecmd); } void isp_put_ecmd(ispsoftc_t *isp, isp_ecmd_t *ecmd) { ecmd->next = isp->isp_osinfo.ecmd_free; isp->isp_osinfo.ecmd_free = ecmd; } Index: projects/powernv/dev/isp/isp_freebsd.h =================================================================== --- projects/powernv/dev/isp/isp_freebsd.h (revision 290990) +++ projects/powernv/dev/isp/isp_freebsd.h (revision 290991) @@ -1,779 +1,779 @@ /* $FreeBSD$ */ /*- * Qlogic ISP SCSI Host Adapter FreeBSD Wrapper Definitions * * Copyright (c) 1997-2008 by Matthew Jacob * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _ISP_FREEBSD_H #define _ISP_FREEBSD_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_ddb.h" #include "opt_isp.h" #define ISP_PLATFORM_VERSION_MAJOR 7 #define ISP_PLATFORM_VERSION_MINOR 10 /* * Efficiency- get rid of SBus code && tests unless we need them. */ #ifdef __sparc64__ #define ISP_SBUS_SUPPORTED 1 #else #define ISP_SBUS_SUPPORTED 0 #endif #define ISP_IFLAGS INTR_TYPE_CAM | INTR_ENTROPY | INTR_MPSAFE #define N_XCMDS 64 #define XCMD_SIZE 512 struct ispsoftc; typedef union isp_ecmd { union isp_ecmd * next; uint8_t data[XCMD_SIZE]; } isp_ecmd_t; isp_ecmd_t * isp_get_ecmd(struct ispsoftc *); void isp_put_ecmd(struct ispsoftc *, isp_ecmd_t *); #ifdef ISP_TARGET_MODE /* Not quite right, but there was no bump for this change */ #if __FreeBSD_version < 225469 #define SDFIXED(x) (&x) #else #define SDFIXED(x) ((struct scsi_sense_data_fixed *)(&x)) #endif #define ISP_TARGET_FUNCTIONS 1 #define ATPDPSIZE 4096 #define ATPDPHASHSIZE 32 #define ATPDPHASH(x) ((((x) >> 24) ^ ((x) >> 16) ^ ((x) >> 8) ^ (x)) & \ ((ATPDPHASHSIZE) - 1)) #include typedef struct atio_private_data { LIST_ENTRY(atio_private_data) next; uint32_t orig_datalen; uint32_t bytes_xfered; uint32_t bytes_in_transit; uint32_t tag; /* typically f/w RX_ID */ uint32_t lun; uint32_t nphdl; uint32_t sid; uint32_t portid; uint16_t rxid; /* wire rxid */ uint16_t oxid; /* wire oxid */ uint16_t word3; /* PRLI word3 params */ uint16_t ctcnt; /* number of CTIOs currently active */ uint8_t seqno; /* CTIO sequence number */ uint32_t srr_notify_rcvd : 1, cdb0 : 8, sendst : 1, dead : 1, tattr : 3, state : 3; void * ests; /* * The current SRR notify copy */ uint8_t srr[64]; /* sb QENTRY_LEN, but order of definitions is wrong */ void * srr_ccb; uint32_t nsrr; } atio_private_data_t; #define ATPD_STATE_FREE 0 #define ATPD_STATE_ATIO 1 #define ATPD_STATE_CAM 2 #define ATPD_STATE_CTIO 3 #define ATPD_STATE_LAST_CTIO 4 #define ATPD_STATE_PDON 5 #define ATPD_CCB_OUTSTANDING 16 #define ATPD_SEQ_MASK 0x7f #define ATPD_SEQ_NOTIFY_CAM 0x80 #define ATPD_SET_SEQNO(hdrp, atp) ((isphdr_t *)hdrp)->rqs_seqno &= ~ATPD_SEQ_MASK, ((isphdr_t *)hdrp)->rqs_seqno |= (atp)->seqno #define ATPD_GET_SEQNO(hdrp) (((isphdr_t *)hdrp)->rqs_seqno & ATPD_SEQ_MASK) #define ATPD_GET_NCAM(hdrp) ((((isphdr_t *)hdrp)->rqs_seqno & ATPD_SEQ_NOTIFY_CAM) != 0) typedef union inot_private_data inot_private_data_t; union inot_private_data { inot_private_data_t *next; struct { isp_notify_t nt; /* must be first! */ uint8_t data[64]; /* sb QENTRY_LEN, but order of definitions is wrong */ uint32_t tag_id, seq_id; } rd; }; typedef struct isp_timed_notify_ack { void *isp; void *not; uint8_t data[64]; /* sb QENTRY_LEN, but order of definitions is wrong */ struct callout timer; } isp_tna_t; TAILQ_HEAD(isp_ccbq, ccb_hdr); typedef struct tstate { SLIST_ENTRY(tstate) next; lun_id_t ts_lun; struct cam_path *owner; struct isp_ccbq waitq; /* waiting CCBs */ struct ccb_hdr_slist atios; struct ccb_hdr_slist inots; uint32_t hold; uint32_t enabled : 1, atio_count : 15, inot_count : 15; inot_private_data_t * restart_queue; inot_private_data_t * ntfree; inot_private_data_t ntpool[ATPDPSIZE]; LIST_HEAD(, atio_private_data) atfree; LIST_HEAD(, atio_private_data) atused[ATPDPHASHSIZE]; atio_private_data_t atpool[ATPDPSIZE]; } tstate_t; #define LUN_HASH_SIZE 32 #define LUN_HASH_FUNC(lun) ((lun) & (LUN_HASH_SIZE - 1)) #endif /* * Per command info. */ struct isp_pcmd { struct isp_pcmd * next; bus_dmamap_t dmap; /* dma map for this command */ struct ispsoftc * isp; /* containing isp */ struct callout wdog; /* watchdog timer */ uint32_t datalen; /* data length for this command (target mode only) */ uint8_t totslen; /* sense length on status response */ uint8_t cumslen; /* sense length on status response */ uint8_t crn; /* command reference number */ }; #define ISP_PCMD(ccb) (ccb)->ccb_h.spriv_ptr1 #define PISP_PCMD(ccb) ((struct isp_pcmd *)ISP_PCMD(ccb)) /* * Per nexus info. */ struct isp_nexus { uint64_t lun; /* LUN for target */ uint32_t tgt; /* TGT for target */ uint8_t crnseed; /* next command reference number */ struct isp_nexus *next; }; #define NEXUS_HASH_WIDTH 32 #define INITIAL_NEXUS_COUNT MAX_FC_TARG #define NEXUS_HASH(tgt, lun) ((tgt + lun) % NEXUS_HASH_WIDTH) /* * Per channel information */ SLIST_HEAD(tslist, tstate); struct isp_fc { struct cam_sim *sim; struct cam_path *path; struct ispsoftc *isp; struct proc *kproc; bus_dma_tag_t tdmat; bus_dmamap_t tdmap; uint64_t def_wwpn; uint64_t def_wwnn; uint32_t loop_down_time; uint32_t loop_down_limit; uint32_t gone_device_time; /* * Per target/lun info- just to keep a per-ITL nexus crn count */ struct isp_nexus *nexus_hash[NEXUS_HASH_WIDTH]; struct isp_nexus *nexus_free_list; uint32_t #ifdef ISP_TARGET_MODE tm_luns_enabled : 1, tm_enable_defer : 1, tm_enabled : 1, #endif simqfrozen : 3, default_id : 8, hysteresis : 8, def_role : 2, /* default role */ gdt_running : 1, loop_dead : 1, fcbsy : 1, ready : 1; struct callout ldt; /* loop down timer */ struct callout gdt; /* gone device timer */ struct task ltask; struct task gtask; #ifdef ISP_TARGET_MODE struct tslist lun_hash[LUN_HASH_SIZE]; #if defined(DEBUG) unsigned int inject_lost_data_frame; #endif #endif int num_threads; }; struct isp_spi { struct cam_sim *sim; struct cam_path *path; uint32_t #ifdef ISP_TARGET_MODE tm_luns_enabled : 1, tm_enable_defer : 1, tm_enabled : 1, #endif simqfrozen : 3, def_role : 2, iid : 4; #ifdef ISP_TARGET_MODE struct tslist lun_hash[LUN_HASH_SIZE]; #endif int num_threads; }; struct isposinfo { /* * Linkage, locking, and identity */ struct mtx lock; device_t dev; struct cdev * cdev; struct intr_config_hook ehook; struct cam_devq * devq; /* * Firmware pointer */ const struct firmware * fw; /* * DMA related sdtuff */ bus_space_tag_t bus_tag; bus_dma_tag_t dmat; bus_space_handle_t bus_handle; bus_dma_tag_t cdmat; bus_dmamap_t cdmap; /* * Command and transaction related related stuff */ struct isp_pcmd * pcmd_pool; struct isp_pcmd * pcmd_free; uint32_t #ifdef ISP_TARGET_MODE tmwanted : 1, tmbusy : 1, #else : 2, #endif sixtyfourbit : 1, /* sixtyfour bit platform */ timer_active : 1, autoconf : 1, ehook_active : 1, disabled : 1, mbox_sleeping : 1, mbox_sleep_ok : 1, mboxcmd_done : 1, mboxbsy : 1; struct callout tmo; /* general timer */ /* * misc- needs to be sorted better XXXXXX */ int framesize; int exec_throttle; int cont_max; #ifdef ISP_TARGET_MODE cam_status * rptr; #endif bus_addr_t ecmd_dma; isp_ecmd_t * ecmd_base; isp_ecmd_t * ecmd_free; /* * Per-type private storage... */ union { struct isp_fc *fc; struct isp_spi *spi; void *ptr; } pc; int is_exiting; }; #define ISP_FC_PC(isp, chan) (&(isp)->isp_osinfo.pc.fc[(chan)]) #define ISP_SPI_PC(isp, chan) (&(isp)->isp_osinfo.pc.spi[(chan)]) #define ISP_GET_PC(isp, chan, tag, rslt) \ if (IS_SCSI(isp)) { \ rslt = ISP_SPI_PC(isp, chan)-> tag; \ } else { \ rslt = ISP_FC_PC(isp, chan)-> tag; \ } #define ISP_GET_PC_ADDR(isp, chan, tag, rp) \ if (IS_SCSI(isp)) { \ rp = &ISP_SPI_PC(isp, chan)-> tag; \ } else { \ rp = &ISP_FC_PC(isp, chan)-> tag; \ } #define ISP_SET_PC(isp, chan, tag, val) \ if (IS_SCSI(isp)) { \ ISP_SPI_PC(isp, chan)-> tag = val; \ } else { \ ISP_FC_PC(isp, chan)-> tag = val; \ } #define FCP_NEXT_CRN isp_fcp_next_crn #define isp_lock isp_osinfo.lock #define isp_bus_tag isp_osinfo.bus_tag #define isp_bus_handle isp_osinfo.bus_handle /* * Locking macros... */ #define ISP_LOCK(isp) mtx_lock(&(isp)->isp_osinfo.lock) #define ISP_UNLOCK(isp) mtx_unlock(&(isp)->isp_osinfo.lock) #define ISP_ASSERT_LOCKED(isp) mtx_assert(&(isp)->isp_osinfo.lock, MA_OWNED) /* * Required Macros/Defines */ #define ISP_FC_SCRLEN 0x1000 #define ISP_MEMZERO(a, b) memset(a, 0, b) #define ISP_MEMCPY memcpy #define ISP_SNPRINTF snprintf #define ISP_DELAY(x) DELAY(x) #if __FreeBSD_version < 1000029 #define ISP_SLEEP(isp, x) msleep(&(isp)->isp_osinfo.is_exiting, \ &(isp)->isp_osinfo.lock, 0, "isp_sleep", ((x) + tick - 1) / tick) #else #define ISP_SLEEP(isp, x) msleep_sbt(&(isp)->isp_osinfo.is_exiting, \ &(isp)->isp_osinfo.lock, 0, "isp_sleep", (x) * SBT_1US, 0, 0) #endif #define ISP_MIN imin #ifndef DIAGNOSTIC #define ISP_INLINE __inline #else #define ISP_INLINE #endif #define NANOTIME_T struct timespec #define GET_NANOTIME nanotime #define GET_NANOSEC(x) ((x)->tv_sec * 1000000000 + (x)->tv_nsec) #define NANOTIME_SUB isp_nanotime_sub #define MAXISPREQUEST(isp) ((IS_FC(isp) || IS_ULTRA2(isp))? 1024 : 256) #define MEMORYBARRIER(isp, type, offset, size, chan) \ switch (type) { \ case SYNC_SFORDEV: \ { \ struct isp_fc *fc = ISP_FC_PC(isp, chan); \ bus_dmamap_sync(fc->tdmat, fc->tdmap, \ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); \ break; \ } \ case SYNC_REQUEST: \ bus_dmamap_sync(isp->isp_osinfo.cdmat, \ isp->isp_osinfo.cdmap, \ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); \ break; \ case SYNC_SFORCPU: \ { \ struct isp_fc *fc = ISP_FC_PC(isp, chan); \ bus_dmamap_sync(fc->tdmat, fc->tdmap, \ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); \ break; \ } \ case SYNC_RESULT: \ bus_dmamap_sync(isp->isp_osinfo.cdmat, \ isp->isp_osinfo.cdmap, \ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); \ break; \ case SYNC_REG: \ bus_space_barrier(isp->isp_osinfo.bus_tag, \ isp->isp_osinfo.bus_handle, offset, size, \ BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); \ break; \ default: \ break; \ } #define MEMORYBARRIERW(isp, type, offset, size, chan) \ switch (type) { \ case SYNC_SFORDEV: \ { \ struct isp_fc *fc = ISP_FC_PC(isp, chan); \ bus_dmamap_sync(fc->tdmat, fc->tdmap, \ BUS_DMASYNC_PREWRITE); \ break; \ } \ case SYNC_REQUEST: \ bus_dmamap_sync(isp->isp_osinfo.cdmat, \ isp->isp_osinfo.cdmap, BUS_DMASYNC_PREWRITE); \ break; \ case SYNC_SFORCPU: \ { \ struct isp_fc *fc = ISP_FC_PC(isp, chan); \ bus_dmamap_sync(fc->tdmat, fc->tdmap, \ BUS_DMASYNC_POSTWRITE); \ break; \ } \ case SYNC_RESULT: \ bus_dmamap_sync(isp->isp_osinfo.cdmat, \ isp->isp_osinfo.cdmap, BUS_DMASYNC_POSTWRITE); \ break; \ case SYNC_REG: \ bus_space_barrier(isp->isp_osinfo.bus_tag, \ isp->isp_osinfo.bus_handle, offset, size, \ BUS_SPACE_BARRIER_WRITE); \ break; \ default: \ break; \ } #define MBOX_ACQUIRE isp_mbox_acquire #define MBOX_WAIT_COMPLETE isp_mbox_wait_complete #define MBOX_NOTIFY_COMPLETE isp_mbox_notify_done #define MBOX_RELEASE isp_mbox_release #define FC_SCRATCH_ACQUIRE isp_fc_scratch_acquire #define FC_SCRATCH_RELEASE(isp, chan) isp->isp_osinfo.pc.fc[chan].fcbsy = 0 #ifndef SCSI_GOOD #define SCSI_GOOD SCSI_STATUS_OK #endif #ifndef SCSI_CHECK #define SCSI_CHECK SCSI_STATUS_CHECK_COND #endif #ifndef SCSI_BUSY #define SCSI_BUSY SCSI_STATUS_BUSY #endif #ifndef SCSI_QFULL #define SCSI_QFULL SCSI_STATUS_QUEUE_FULL #endif #define XS_T struct ccb_scsiio #define XS_DMA_ADDR_T bus_addr_t #define XS_GET_DMA64_SEG(a, b, c) \ { \ ispds64_t *d = a; \ bus_dma_segment_t *e = b; \ uint32_t f = c; \ e += f; \ d->ds_base = DMA_LO32(e->ds_addr); \ d->ds_basehi = DMA_HI32(e->ds_addr); \ d->ds_count = e->ds_len; \ } #define XS_GET_DMA_SEG(a, b, c) \ { \ ispds_t *d = a; \ bus_dma_segment_t *e = b; \ uint32_t f = c; \ e += f; \ d->ds_base = DMA_LO32(e->ds_addr); \ d->ds_count = e->ds_len; \ } #define XS_ISP(ccb) cam_sim_softc(xpt_path_sim((ccb)->ccb_h.path)) #define XS_CHANNEL(ccb) cam_sim_bus(xpt_path_sim((ccb)->ccb_h.path)) #define XS_TGT(ccb) (ccb)->ccb_h.target_id #define XS_LUN(ccb) (ccb)->ccb_h.target_lun #define XS_CDBP(ccb) \ (((ccb)->ccb_h.flags & CAM_CDB_POINTER)? \ (ccb)->cdb_io.cdb_ptr : (ccb)->cdb_io.cdb_bytes) #define XS_CDBLEN(ccb) (ccb)->cdb_len #define XS_XFRLEN(ccb) (ccb)->dxfer_len #define XS_TIME(ccb) (ccb)->ccb_h.timeout #define XS_GET_RESID(ccb) (ccb)->resid #define XS_SET_RESID(ccb, r) (ccb)->resid = r #define XS_STSP(ccb) (&(ccb)->scsi_status) #define XS_SNSP(ccb) (&(ccb)->sense_data) #define XS_TOT_SNSLEN(ccb) ccb->sense_len #define XS_CUR_SNSLEN(ccb) (ccb->sense_len - ccb->sense_resid) #define XS_SNSKEY(ccb) (scsi_get_sense_key(&(ccb)->sense_data, \ ccb->sense_len - ccb->sense_resid, 1)) #define XS_SNSASC(ccb) (scsi_get_asc(&(ccb)->sense_data, \ ccb->sense_len - ccb->sense_resid, 1)) #define XS_SNSASCQ(ccb) (scsi_get_ascq(&(ccb)->sense_data, \ ccb->sense_len - ccb->sense_resid, 1)) #define XS_TAG_P(ccb) \ (((ccb)->ccb_h.flags & CAM_TAG_ACTION_VALID) && \ (ccb)->tag_action != CAM_TAG_ACTION_NONE) #define XS_TAG_TYPE(ccb) \ ((ccb->tag_action == MSG_SIMPLE_Q_TAG)? REQFLAG_STAG : \ ((ccb->tag_action == MSG_HEAD_OF_Q_TAG)? REQFLAG_HTAG : REQFLAG_OTAG)) #define XS_SETERR(ccb, v) (ccb)->ccb_h.status &= ~CAM_STATUS_MASK, \ (ccb)->ccb_h.status |= v # define HBA_NOERROR CAM_REQ_INPROG # define HBA_BOTCH CAM_UNREC_HBA_ERROR # define HBA_CMDTIMEOUT CAM_CMD_TIMEOUT # define HBA_SELTIMEOUT CAM_SEL_TIMEOUT # define HBA_TGTBSY CAM_SCSI_STATUS_ERROR # define HBA_BUSRESET CAM_SCSI_BUS_RESET # define HBA_ABORTED CAM_REQ_ABORTED # define HBA_DATAOVR CAM_DATA_RUN_ERR # define HBA_ARQFAIL CAM_AUTOSENSE_FAIL #define XS_ERR(ccb) ((ccb)->ccb_h.status & CAM_STATUS_MASK) #define XS_NOERR(ccb) (((ccb)->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_INPROG) #define XS_INITERR(ccb) XS_SETERR(ccb, CAM_REQ_INPROG), ccb->sense_resid = ccb->sense_len #define XS_SAVE_SENSE(xs, sense_ptr, totslen, slen) do { \ uint32_t tlen = slen; \ if (tlen > (xs)->sense_len) \ tlen = (xs)->sense_len; \ PISP_PCMD(xs)->totslen = imin((xs)->sense_len, totslen); \ PISP_PCMD(xs)->cumslen = tlen; \ memcpy(&(xs)->sense_data, sense_ptr, tlen); \ (xs)->sense_resid = (xs)->sense_len - tlen; \ (xs)->ccb_h.status |= CAM_AUTOSNS_VALID; \ } while (0) #define XS_SENSE_APPEND(xs, xsnsp, xsnsl) do { \ uint32_t off = PISP_PCMD(xs)->cumslen; \ uint8_t *ptr = &((uint8_t *)(&(xs)->sense_data))[off]; \ uint32_t amt = imin(xsnsl, PISP_PCMD(xs)->totslen - off); \ if (amt) { \ memcpy(ptr, xsnsp, amt); \ (xs)->sense_resid -= amt; \ PISP_PCMD(xs)->cumslen += amt; \ } \ } while (0) #define XS_SENSE_VALID(xs) (((xs)->ccb_h.status & CAM_AUTOSNS_VALID) != 0) #define DEFAULT_FRAMESIZE(isp) isp->isp_osinfo.framesize #define DEFAULT_EXEC_THROTTLE(isp) isp->isp_osinfo.exec_throttle #define GET_DEFAULT_ROLE(isp, chan) \ (IS_FC(isp)? ISP_FC_PC(isp, chan)->def_role : ISP_SPI_PC(isp, chan)->def_role) #define SET_DEFAULT_ROLE(isp, chan, val) \ if (IS_FC(isp)) { \ ISP_FC_PC(isp, chan)->def_role = val; \ } else { \ ISP_SPI_PC(isp, chan)->def_role = val; \ } #define DEFAULT_IID(isp, chan) isp->isp_osinfo.pc.spi[chan].iid #define DEFAULT_LOOPID(x, chan) isp->isp_osinfo.pc.fc[chan].default_id #define DEFAULT_NODEWWN(isp, chan) isp_default_wwn(isp, chan, 0, 1) #define DEFAULT_PORTWWN(isp, chan) isp_default_wwn(isp, chan, 0, 0) #define ACTIVE_NODEWWN(isp, chan) isp_default_wwn(isp, chan, 1, 1) #define ACTIVE_PORTWWN(isp, chan) isp_default_wwn(isp, chan, 1, 0) #if BYTE_ORDER == BIG_ENDIAN #ifdef ISP_SBUS_SUPPORTED #define ISP_IOXPUT_8(isp, s, d) *(d) = s #define ISP_IOXPUT_16(isp, s, d) \ *(d) = (isp->isp_bustype == ISP_BT_SBUS)? s : bswap16(s) #define ISP_IOXPUT_32(isp, s, d) \ *(d) = (isp->isp_bustype == ISP_BT_SBUS)? s : bswap32(s) #define ISP_IOXGET_8(isp, s, d) d = (*((uint8_t *)s)) #define ISP_IOXGET_16(isp, s, d) \ d = (isp->isp_bustype == ISP_BT_SBUS)? \ *((uint16_t *)s) : bswap16(*((uint16_t *)s)) #define ISP_IOXGET_32(isp, s, d) \ d = (isp->isp_bustype == ISP_BT_SBUS)? \ *((uint32_t *)s) : bswap32(*((uint32_t *)s)) #else /* ISP_SBUS_SUPPORTED */ #define ISP_IOXPUT_8(isp, s, d) *(d) = s #define ISP_IOXPUT_16(isp, s, d) *(d) = bswap16(s) #define ISP_IOXPUT_32(isp, s, d) *(d) = bswap32(s) #define ISP_IOXGET_8(isp, s, d) d = (*((uint8_t *)s)) #define ISP_IOXGET_16(isp, s, d) d = bswap16(*((uint16_t *)s)) #define ISP_IOXGET_32(isp, s, d) d = bswap32(*((uint32_t *)s)) #endif #define ISP_SWIZZLE_NVRAM_WORD(isp, rp) *rp = bswap16(*rp) #define ISP_SWIZZLE_NVRAM_LONG(isp, rp) *rp = bswap32(*rp) #define ISP_IOZGET_8(isp, s, d) d = (*((uint8_t *)s)) #define ISP_IOZGET_16(isp, s, d) d = (*((uint16_t *)s)) #define ISP_IOZGET_32(isp, s, d) d = (*((uint32_t *)s)) #define ISP_IOZPUT_8(isp, s, d) *(d) = s #define ISP_IOZPUT_16(isp, s, d) *(d) = s #define ISP_IOZPUT_32(isp, s, d) *(d) = s #else #define ISP_IOXPUT_8(isp, s, d) *(d) = s #define ISP_IOXPUT_16(isp, s, d) *(d) = s #define ISP_IOXPUT_32(isp, s, d) *(d) = s #define ISP_IOXGET_8(isp, s, d) d = *(s) #define ISP_IOXGET_16(isp, s, d) d = *(s) #define ISP_IOXGET_32(isp, s, d) d = *(s) #define ISP_SWIZZLE_NVRAM_WORD(isp, rp) #define ISP_SWIZZLE_NVRAM_LONG(isp, rp) #define ISP_IOZPUT_8(isp, s, d) *(d) = s #define ISP_IOZPUT_16(isp, s, d) *(d) = bswap16(s) #define ISP_IOZPUT_32(isp, s, d) *(d) = bswap32(s) #define ISP_IOZGET_8(isp, s, d) d = (*((uint8_t *)(s))) #define ISP_IOZGET_16(isp, s, d) d = bswap16(*((uint16_t *)(s))) #define ISP_IOZGET_32(isp, s, d) d = bswap32(*((uint32_t *)(s))) #endif #define ISP_SWAP16(isp, s) bswap16(s) #define ISP_SWAP32(isp, s) bswap32(s) /* * Includes of common header files */ #include #include #include /* * isp_osinfo definiitions && shorthand */ #define SIMQFRZ_RESOURCE 0x1 #define SIMQFRZ_LOOPDOWN 0x2 #define SIMQFRZ_TIMED 0x4 #define isp_dev isp_osinfo.dev /* * prototypes for isp_pci && isp_freebsd to share */ extern int isp_attach(ispsoftc_t *); extern int isp_detach(ispsoftc_t *); extern void isp_uninit(ispsoftc_t *); extern uint64_t isp_default_wwn(ispsoftc_t *, int, int, int); /* * driver global data */ extern int isp_announced; extern int isp_fabric_hysteresis; extern int isp_loop_down_limit; extern int isp_gone_device_time; extern int isp_quickboot_time; /* * Platform private flags */ /* * Platform Library Functions */ void isp_prt(ispsoftc_t *, int level, const char *, ...) __printflike(3, 4); void isp_xs_prt(ispsoftc_t *, XS_T *, int level, const char *, ...) __printflike(4, 5); uint64_t isp_nanotime_sub(struct timespec *, struct timespec *); int isp_mbox_acquire(ispsoftc_t *); void isp_mbox_wait_complete(ispsoftc_t *, mbreg_t *); void isp_mbox_notify_done(ispsoftc_t *); void isp_mbox_release(ispsoftc_t *); int isp_fc_scratch_acquire(ispsoftc_t *, int); int isp_mstohz(int); void isp_platform_intr(void *); void isp_common_dmateardown(ispsoftc_t *, struct ccb_scsiio *, uint32_t); -void isp_fcp_reset_crn(struct isp_fc *, uint32_t, int); +void isp_fcp_reset_crn(ispsoftc_t *, int, uint32_t, int); int isp_fcp_next_crn(ispsoftc_t *, uint8_t *, XS_T *); /* * Platform Version specific defines */ #define BUS_DMA_ROOTARG(x) bus_get_dma_tag(x) #define isp_dma_tag_create(a, b, c, d, e, f, g, h, i, j, k, z) \ bus_dma_tag_create(a, b, c, d, e, f, g, h, i, j, k, \ busdma_lock_mutex, &isp->isp_osinfo.lock, z) #define isp_setup_intr bus_setup_intr #define isp_sim_alloc(a, b, c, d, e, f, g, h) \ cam_sim_alloc(a, b, c, d, e, &(d)->isp_osinfo.lock, f, g, h) #define ISP_PATH_PRT(i, l, p, ...) \ if ((l) == ISP_LOGALL || ((l)& (i)->isp_dblev) != 0) { \ xpt_print(p, __VA_ARGS__); \ } /* * Platform specific inline functions */ /* * ISP General Library functions */ #include #endif /* _ISP_FREEBSD_H */ Index: projects/powernv/dev/isp/isp_library.c =================================================================== --- projects/powernv/dev/isp/isp_library.c (revision 290990) +++ projects/powernv/dev/isp/isp_library.c (revision 290991) @@ -1,4016 +1,4010 @@ /*- * Copyright (c) 1997-2009 by Matthew Jacob * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Qlogic Host Adapter Internal Library Functions */ #ifdef __NetBSD__ #include __KERNEL_RCSID(0, "$NetBSD$"); #include #endif #ifdef __FreeBSD__ #include __FBSDID("$FreeBSD$"); #include #endif #ifdef __OpenBSD__ #include #endif #ifdef __linux__ #include "isp_linux.h" #endif #ifdef __svr4__ #include "isp_solaris.h" #endif const char *isp_class3_roles[4] = { "None", "Target", "Initiator", "Target/Initiator" }; /* * Command shipping- finish off first queue entry and do dma mapping and additional segments as needed. * * Called with the first queue entry at least partially filled out. */ int isp_send_cmd(ispsoftc_t *isp, void *fqe, void *segp, uint32_t nsegs, uint32_t totalcnt, isp_ddir_t ddir, ispds64_t *ecmd) { uint8_t storage[QENTRY_LEN]; uint8_t type, nqe; uint32_t seg, curseg, seglim, nxt, nxtnxt, ddf; ispds_t *dsp = NULL; ispds64_t *dsp64 = NULL; void *qe0, *qe1; qe0 = isp_getrqentry(isp); if (qe0 == NULL) { return (CMD_EAGAIN); } nxt = ISP_NXT_QENTRY(isp->isp_reqidx, RQUEST_QUEUE_LEN(isp)); type = ((isphdr_t *)fqe)->rqs_entry_type; nqe = 1; /* * If we have no data to transmit, just copy the first IOCB and start it up. */ if (ddir == ISP_NOXFR) { if (type == RQSTYPE_T2RQS || type == RQSTYPE_T3RQS) { ddf = CT2_NO_DATA; } else { ddf = 0; } goto copy_and_sync; } /* * First figure out how many pieces of data to transfer and what kind and how many we can put into the first queue entry. */ switch (type) { case RQSTYPE_REQUEST: ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN; dsp = ((ispreq_t *)fqe)->req_dataseg; seglim = ISP_RQDSEG; break; case RQSTYPE_CMDONLY: ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN; seglim = 0; break; case RQSTYPE_T2RQS: ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN; dsp = ((ispreqt2_t *)fqe)->req_dataseg; seglim = ISP_RQDSEG_T2; break; case RQSTYPE_A64: ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN; dsp64 = ((ispreqt3_t *)fqe)->req_dataseg; seglim = ISP_RQDSEG_T3; break; case RQSTYPE_T3RQS: ddf = (ddir == ISP_TO_DEVICE)? REQFLAG_DATA_OUT : REQFLAG_DATA_IN; dsp64 = ((ispreqt3_t *)fqe)->req_dataseg; seglim = ISP_RQDSEG_T3; break; case RQSTYPE_T7RQS: ddf = (ddir == ISP_TO_DEVICE)? FCP_CMND_DATA_WRITE : FCP_CMND_DATA_READ; dsp64 = &((ispreqt7_t *)fqe)->req_dataseg; seglim = 1; break; default: return (CMD_COMPLETE); } if (seglim > nsegs) { seglim = nsegs; } for (seg = curseg = 0; curseg < seglim; curseg++) { if (dsp64) { XS_GET_DMA64_SEG(dsp64++, segp, seg++); } else { XS_GET_DMA_SEG(dsp++, segp, seg++); } } /* * Second, start building additional continuation segments as needed. */ while (seg < nsegs) { nxtnxt = ISP_NXT_QENTRY(nxt, RQUEST_QUEUE_LEN(isp)); if (nxtnxt == isp->isp_reqodx) { isp->isp_reqodx = ISP_READ(isp, isp->isp_rqstoutrp); if (nxtnxt == isp->isp_reqodx) return (CMD_EAGAIN); } ISP_MEMZERO(storage, QENTRY_LEN); qe1 = ISP_QUEUE_ENTRY(isp->isp_rquest, nxt); nxt = nxtnxt; if (dsp64) { ispcontreq64_t *crq = (ispcontreq64_t *) storage; seglim = ISP_CDSEG64; crq->req_header.rqs_entry_type = RQSTYPE_A64_CONT; crq->req_header.rqs_entry_count = 1; dsp64 = crq->req_dataseg; } else { ispcontreq_t *crq = (ispcontreq_t *) storage; seglim = ISP_CDSEG; crq->req_header.rqs_entry_type = RQSTYPE_DATASEG; crq->req_header.rqs_entry_count = 1; dsp = crq->req_dataseg; } if (seg + seglim > nsegs) { seglim = nsegs - seg; } for (curseg = 0; curseg < seglim; curseg++) { if (dsp64) { XS_GET_DMA64_SEG(dsp64++, segp, seg++); } else { XS_GET_DMA_SEG(dsp++, segp, seg++); } } if (dsp64) { isp_put_cont64_req(isp, (ispcontreq64_t *)storage, qe1); } else { isp_put_cont_req(isp, (ispcontreq_t *)storage, qe1); } if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "additional queue entry", QENTRY_LEN, storage); } nqe++; } copy_and_sync: ((isphdr_t *)fqe)->rqs_entry_count = nqe; switch (type) { case RQSTYPE_REQUEST: ((ispreq_t *)fqe)->req_flags |= ddf; /* * This is historical and not clear whether really needed. */ if (nsegs == 0) { nsegs = 1; } ((ispreq_t *)fqe)->req_seg_count = nsegs; isp_put_request(isp, fqe, qe0); break; case RQSTYPE_CMDONLY: ((ispreq_t *)fqe)->req_flags |= ddf; /* * This is historical and not clear whether really needed. */ if (nsegs == 0) { nsegs = 1; } ((ispextreq_t *)fqe)->req_seg_count = nsegs; isp_put_extended_request(isp, fqe, qe0); break; case RQSTYPE_T2RQS: ((ispreqt2_t *)fqe)->req_flags |= ddf; ((ispreqt2_t *)fqe)->req_seg_count = nsegs; ((ispreqt2_t *)fqe)->req_totalcnt = totalcnt; if (ISP_CAP_2KLOGIN(isp)) { isp_put_request_t2e(isp, fqe, qe0); } else { isp_put_request_t2(isp, fqe, qe0); } break; case RQSTYPE_A64: case RQSTYPE_T3RQS: ((ispreqt3_t *)fqe)->req_flags |= ddf; ((ispreqt3_t *)fqe)->req_seg_count = nsegs; ((ispreqt3_t *)fqe)->req_totalcnt = totalcnt; if (ISP_CAP_2KLOGIN(isp)) { isp_put_request_t3e(isp, fqe, qe0); } else { isp_put_request_t3(isp, fqe, qe0); } break; case RQSTYPE_T7RQS: ((ispreqt7_t *)fqe)->req_alen_datadir = ddf; ((ispreqt7_t *)fqe)->req_seg_count = nsegs; ((ispreqt7_t *)fqe)->req_dl = totalcnt; isp_put_request_t7(isp, fqe, qe0); break; default: return (CMD_COMPLETE); } if (isp->isp_dblev & ISP_LOGDEBUG1) { isp_print_bytes(isp, "first queue entry", QENTRY_LEN, fqe); } ISP_ADD_REQUEST(isp, nxt); return (CMD_QUEUED); } int isp_allocate_xs(ispsoftc_t *isp, XS_T *xs, uint32_t *handlep) { isp_hdl_t *hdp; hdp = isp->isp_xffree; if (hdp == NULL) { return (-1); } isp->isp_xffree = hdp->cmd; hdp->cmd = xs; hdp->handle = (hdp - isp->isp_xflist); hdp->handle |= (ISP_HANDLE_INITIATOR << ISP_HANDLE_USAGE_SHIFT); hdp->handle |= (isp->isp_seqno++ << ISP_HANDLE_SEQ_SHIFT); *handlep = hdp->handle; return (0); } XS_T * isp_find_xs(ispsoftc_t *isp, uint32_t handle) { if (!ISP_VALID_INI_HANDLE(isp, handle)) { isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle); return (NULL); } return (isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)].cmd); } uint32_t isp_find_handle(ispsoftc_t *isp, XS_T *xs) { uint32_t i, foundhdl = ISP_HANDLE_FREE; if (xs != NULL) { for (i = 0; i < isp->isp_maxcmds; i++) { if (isp->isp_xflist[i].cmd != xs) { continue; } foundhdl = isp->isp_xflist[i].handle; break; } } return (foundhdl); } uint32_t isp_handle_index(ispsoftc_t *isp, uint32_t handle) { if (!ISP_VALID_HANDLE(isp, handle)) { isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle); return (ISP_BAD_HANDLE_INDEX); } else { return (handle & ISP_HANDLE_CMD_MASK); } } void isp_destroy_handle(ispsoftc_t *isp, uint32_t handle) { if (!ISP_VALID_INI_HANDLE(isp, handle)) { isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle); } else { isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)].handle = ISP_HANDLE_FREE; isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)].cmd = isp->isp_xffree; isp->isp_xffree = &isp->isp_xflist[(handle & ISP_HANDLE_CMD_MASK)]; } } /* * Make sure we have space to put something on the request queue. * Return a pointer to that entry if we do. A side effect of this * function is to update the output index. The input index * stays the same. */ void * isp_getrqentry(ispsoftc_t *isp) { uint32_t next; next = ISP_NXT_QENTRY(isp->isp_reqidx, RQUEST_QUEUE_LEN(isp)); if (next == isp->isp_reqodx) { isp->isp_reqodx = ISP_READ(isp, isp->isp_rqstoutrp); if (next == isp->isp_reqodx) return (NULL); } return (ISP_QUEUE_ENTRY(isp->isp_rquest, isp->isp_reqidx)); } #define TBA (4 * (((QENTRY_LEN >> 2) * 3) + 1) + 1) void isp_print_qentry(ispsoftc_t *isp, const char *msg, int idx, void *arg) { char buf[TBA]; int amt, i, j; uint8_t *ptr = arg; isp_prt(isp, ISP_LOGALL, "%s index %d=>", msg, idx); for (buf[0] = 0, amt = i = 0; i < 4; i++) { buf[0] = 0; ISP_SNPRINTF(buf, TBA, " "); for (j = 0; j < (QENTRY_LEN >> 2); j++) { ISP_SNPRINTF(buf, TBA, "%s %02x", buf, ptr[amt++] & 0xff); } isp_prt(isp, ISP_LOGALL, "%s", buf); } } void isp_print_bytes(ispsoftc_t *isp, const char *msg, int amt, void *arg) { char buf[128]; uint8_t *ptr = arg; int off; if (msg) isp_prt(isp, ISP_LOGALL, "%s:", msg); off = 0; buf[0] = 0; while (off < amt) { int j, to; to = off; for (j = 0; j < 16; j++) { ISP_SNPRINTF(buf, 128, "%s %02x", buf, ptr[off++] & 0xff); if (off == amt) { break; } } isp_prt(isp, ISP_LOGALL, "0x%08x:%s", to, buf); buf[0] = 0; } } /* * Do the common path to try and ensure that link is up, we've scanned * the fabric (if we're on a fabric), and that we've synchronized this * all with our own database and done the appropriate logins. * * We repeatedly check for firmware state and loop state after each * action because things may have changed while we were doing this. * Any failure or change of state causes us to return a nonzero value. * * We assume we enter here with any locks held. */ int isp_fc_runstate(ispsoftc_t *isp, int chan, int tval) { fcparam *fcp; fcp = FCPARAM(isp, chan); if (fcp->role == ISP_ROLE_NONE) { return (0); } if (fcp->isp_fwstate < FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) { if (isp_control(isp, ISPCTL_FCLINK_TEST, chan, tval) != 0) { isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: linktest failed for channel %d", chan); return (-1); } if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) { isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: f/w not ready for channel %d", chan); return (-1); } } if (isp_control(isp, ISPCTL_SCAN_LOOP, chan) != 0) { isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: scan loop fails on channel %d", chan); return (LOOP_PDB_RCVD); } if (isp_control(isp, ISPCTL_SCAN_FABRIC, chan) != 0) { isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: scan fabric fails on channel %d", chan); return (LOOP_LSCAN_DONE); } if (isp_control(isp, ISPCTL_PDB_SYNC, chan) != 0) { isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: pdb_sync fails on channel %d", chan); return (LOOP_FSCAN_DONE); } if (fcp->isp_fwstate != FW_READY || fcp->isp_loopstate != LOOP_READY) { isp_prt(isp, ISP_LOG_SANCFG, "isp_fc_runstate: f/w not ready again on channel %d", chan); return (-1); } return (0); } /* * Fibre Channel Support routines */ void isp_dump_portdb(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); int i; for (i = 0; i < MAX_FC_TARG; i++) { char buf1[64], buf2[64]; const char *dbs[8] = { "NIL ", "PROB", "DEAD", "CHGD", "NEW ", "PVLD", "ZOMB", "VLD " }; fcportdb_t *lp = &fcp->portdb[i]; if (lp->state == FC_PORTDB_STATE_NIL) { continue; } isp_gen_role_str(buf1, sizeof (buf1), lp->prli_word3); isp_gen_role_str(buf2, sizeof (buf2), lp->new_prli_word3); isp_prt(isp, ISP_LOGALL, "Chan %d [%d]: hdl 0x%x %s al%d %s 0x%06x =>%s 0x%06x; WWNN 0x%08x%08x WWPN 0x%08x%08x", chan, i, lp->handle, dbs[lp->state], lp->autologin, buf1, lp->portid, buf2, lp->new_portid, (uint32_t) (lp->node_wwn >> 32), (uint32_t) (lp->node_wwn), (uint32_t) (lp->port_wwn >> 32), (uint32_t) (lp->port_wwn)); } } void isp_gen_role_str(char *buf, size_t len, uint16_t p3) { int nd = 0; buf[0] = '('; buf[1] = 0; if (p3 & PRLI_WD3_ENHANCED_DISCOVERY) { nd++; strlcat(buf, "EDisc", len); } if (p3 & PRLI_WD3_REC_SUPPORT) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "REC", len); } if (p3 & PRLI_WD3_TASK_RETRY_IDENTIFICATION_REQUESTED) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "RetryID", len); } if (p3 & PRLI_WD3_RETRY) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "Retry", len); } if (p3 & PRLI_WD3_CONFIRMED_COMPLETION_ALLOWED) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "CNFRM", len); } if (p3 & PRLI_WD3_DATA_OVERLAY_ALLOWED) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "DOver", len); } if (p3 & PRLI_WD3_INITIATOR_FUNCTION) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "INI", len); } if (p3 & PRLI_WD3_TARGET_FUNCTION) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "TGT", len); } if (p3 & PRLI_WD3_READ_FCP_XFER_RDY_DISABLED) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "RdXfrDis", len); } if (p3 & PRLI_WD3_WRITE_FCP_XFER_RDY_DISABLED) { if (nd++) { strlcat(buf, ",", len); } strlcat(buf, "XfrDis", len); } strlcat(buf, ")", len); } const char * isp_fc_fw_statename(int state) { switch (state) { case FW_CONFIG_WAIT: return "Config Wait"; case FW_WAIT_AL_PA: return "Waiting for AL_PA"; case FW_WAIT_LOGIN: return "Wait Login"; case FW_READY: return "Ready"; case FW_LOSS_OF_SYNC: return "Loss Of Sync"; case FW_ERROR: return "Error"; case FW_REINIT: return "Re-Init"; case FW_NON_PART: return "Nonparticipating"; default: return "?????"; } } const char * isp_fc_loop_statename(int state) { switch (state) { case LOOP_NIL: return "NIL"; case LOOP_LIP_RCVD: return "LIP Received"; case LOOP_PDB_RCVD: return "PDB Received"; case LOOP_SCANNING_LOOP: return "Scanning"; case LOOP_LSCAN_DONE: return "Loop Scan Done"; case LOOP_SCANNING_FABRIC: return "Scanning Fabric"; case LOOP_FSCAN_DONE: return "Fabric Scan Done"; case LOOP_SYNCING_PDB: return "Syncing PDB"; case LOOP_READY: return "Ready"; default: return "?????"; } } const char * isp_fc_toponame(fcparam *fcp) { if (fcp->isp_fwstate != FW_READY) { return "Unavailable"; } switch (fcp->isp_topo) { case TOPO_NL_PORT: return "Private Loop"; case TOPO_FL_PORT: return "FL Port"; case TOPO_N_PORT: return "N-Port to N-Port"; case TOPO_F_PORT: return "F Port"; case TOPO_PTP_STUB: return "F Port (no FLOGI_ACC response)"; default: return "?????"; } } static int isp_fc_enable_vp(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); mbreg_t mbs; vp_modify_t *vp; uint8_t qe[QENTRY_LEN], *scp; ISP_MEMZERO(qe, QENTRY_LEN); if (FC_SCRATCH_ACQUIRE(isp, chan)) { return (EBUSY); } scp = fcp->isp_scratch; /* * Build a VP MODIFY command in memory */ vp = (vp_modify_t *) qe; vp->vp_mod_hdr.rqs_entry_type = RQSTYPE_VP_MODIFY; vp->vp_mod_hdr.rqs_entry_count = 1; vp->vp_mod_cnt = 1; vp->vp_mod_idx0 = chan; vp->vp_mod_cmd = VP_MODIFY_ENA; - vp->vp_mod_ports[0].options = ICB2400_VPOPT_ENABLED; + vp->vp_mod_ports[0].options = ICB2400_VPOPT_ENABLED | + ICB2400_VPOPT_ENA_SNSLOGIN; if (fcp->role & ISP_ROLE_INITIATOR) { vp->vp_mod_ports[0].options |= ICB2400_VPOPT_INI_ENABLE; } if ((fcp->role & ISP_ROLE_TARGET) == 0) { vp->vp_mod_ports[0].options |= ICB2400_VPOPT_TGT_DISABLE; } if (fcp->isp_loopid < LOCAL_LOOP_LIM) { vp->vp_mod_ports[0].loopid = fcp->isp_loopid; if (isp->isp_confopts & ISP_CFG_OWNLOOPID) vp->vp_mod_ports[0].options |= ICB2400_VPOPT_HARD_ADDRESS; else vp->vp_mod_ports[0].options |= ICB2400_VPOPT_PREV_ADDRESS; } MAKE_NODE_NAME_FROM_WWN(vp->vp_mod_ports[0].wwpn, fcp->isp_wwpn); MAKE_NODE_NAME_FROM_WWN(vp->vp_mod_ports[0].wwnn, fcp->isp_wwnn); isp_put_vp_modify(isp, vp, (vp_modify_t *) scp); /* * Build a EXEC IOCB A64 command that points to the VP MODIFY command */ MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 0); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, 2 * QENTRY_LEN, chan); isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { FC_SCRATCH_RELEASE(isp, chan); return (EIO); } MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); isp_get_vp_modify(isp, (vp_modify_t *)&scp[QENTRY_LEN], vp); FC_SCRATCH_RELEASE(isp, chan); if (vp->vp_mod_status != VP_STS_OK) { isp_prt(isp, ISP_LOGERR, "%s: VP_MODIFY of Chan %d failed with status %d", __func__, chan, vp->vp_mod_status); return (EIO); } return (0); } static int isp_fc_disable_vp(ispsoftc_t *isp, int chan) { fcparam *fcp = FCPARAM(isp, chan); mbreg_t mbs; vp_ctrl_info_t *vp; uint8_t qe[QENTRY_LEN], *scp; ISP_MEMZERO(qe, QENTRY_LEN); if (FC_SCRATCH_ACQUIRE(isp, chan)) { return (EBUSY); } scp = fcp->isp_scratch; /* * Build a VP CTRL command in memory */ vp = (vp_ctrl_info_t *) qe; vp->vp_ctrl_hdr.rqs_entry_type = RQSTYPE_VP_CTRL; vp->vp_ctrl_hdr.rqs_entry_count = 1; if (ISP_CAP_VP0(isp)) { vp->vp_ctrl_status = 1; } else { vp->vp_ctrl_status = 0; chan--; /* VP0 can not be controlled in this case. */ } vp->vp_ctrl_command = VP_CTRL_CMD_DISABLE_VP_LOGO_ALL; vp->vp_ctrl_vp_count = 1; vp->vp_ctrl_idmap[chan / 16] |= (1 << chan % 16); isp_put_vp_ctrl_info(isp, vp, (vp_ctrl_info_t *) scp); /* * Build a EXEC IOCB A64 command that points to the VP CTRL command */ MBSINIT(&mbs, MBOX_EXEC_COMMAND_IOCB_A64, MBLOGALL, 0); mbs.param[1] = QENTRY_LEN; mbs.param[2] = DMA_WD1(fcp->isp_scdma); mbs.param[3] = DMA_WD0(fcp->isp_scdma); mbs.param[6] = DMA_WD3(fcp->isp_scdma); mbs.param[7] = DMA_WD2(fcp->isp_scdma); MEMORYBARRIER(isp, SYNC_SFORDEV, 0, 2 * QENTRY_LEN, chan); isp_control(isp, ISPCTL_RUN_MBOXCMD, &mbs); if (mbs.param[0] != MBOX_COMMAND_COMPLETE) { FC_SCRATCH_RELEASE(isp, chan); return (EIO); } MEMORYBARRIER(isp, SYNC_SFORCPU, QENTRY_LEN, QENTRY_LEN, chan); isp_get_vp_ctrl_info(isp, (vp_ctrl_info_t *)&scp[QENTRY_LEN], vp); FC_SCRATCH_RELEASE(isp, chan); if (vp->vp_ctrl_status != 0) { isp_prt(isp, ISP_LOGERR, "%s: VP_CTRL of Chan %d failed with status %d %d", __func__, chan, vp->vp_ctrl_status, vp->vp_ctrl_index_fail); return (EIO); } return (0); } /* * Change Roles */ int isp_fc_change_role(ispsoftc_t *isp, int chan, int new_role) { fcparam *fcp = FCPARAM(isp, chan); int i, was, res = 0; if (chan >= isp->isp_nchan) { isp_prt(isp, ISP_LOGWARN, "%s: bad channel %d", __func__, chan); return (ENXIO); } if (fcp->role == new_role) return (0); for (was = 0, i = 0; i < isp->isp_nchan; i++) { if (FCPARAM(isp, i)->role != ISP_ROLE_NONE) was++; } if (was == 0 || (was == 1 && fcp->role != ISP_ROLE_NONE)) { fcp->role = new_role; return (isp_reinit(isp, 0)); } if (fcp->role != ISP_ROLE_NONE) res = isp_fc_disable_vp(isp, chan); fcp->role = new_role; if (fcp->role != ISP_ROLE_NONE) res = isp_fc_enable_vp(isp, chan); return (res); } void isp_clear_commands(ispsoftc_t *isp) { uint32_t tmp; isp_hdl_t *hdp; #ifdef ISP_TARGET_MODE isp_notify_t notify; #endif for (tmp = 0; isp->isp_xflist && tmp < isp->isp_maxcmds; tmp++) { XS_T *xs; hdp = &isp->isp_xflist[tmp]; if (hdp->handle == ISP_HANDLE_FREE) { continue; } xs = hdp->cmd; if (XS_XFRLEN(xs)) { ISP_DMAFREE(isp, xs, hdp->handle); XS_SET_RESID(xs, XS_XFRLEN(xs)); } else { XS_SET_RESID(xs, 0); } hdp->handle = 0; hdp->cmd = NULL; XS_SETERR(xs, HBA_BUSRESET); isp_done(xs); } #ifdef ISP_TARGET_MODE for (tmp = 0; isp->isp_tgtlist && tmp < isp->isp_maxcmds; tmp++) { uint8_t local[QENTRY_LEN]; hdp = &isp->isp_tgtlist[tmp]; if (hdp->handle == ISP_HANDLE_FREE) { continue; } ISP_DMAFREE(isp, hdp->cmd, hdp->handle); ISP_MEMZERO(local, QENTRY_LEN); if (IS_24XX(isp)) { ct7_entry_t *ctio = (ct7_entry_t *) local; ctio->ct_syshandle = hdp->handle; ctio->ct_nphdl = CT_HBA_RESET; ctio->ct_header.rqs_entry_type = RQSTYPE_CTIO7; } else if (IS_FC(isp)) { ct2_entry_t *ctio = (ct2_entry_t *) local; ctio->ct_syshandle = hdp->handle; ctio->ct_status = CT_HBA_RESET; ctio->ct_header.rqs_entry_type = RQSTYPE_CTIO2; } else { ct_entry_t *ctio = (ct_entry_t *) local; ctio->ct_syshandle = hdp->handle; ctio->ct_status = CT_HBA_RESET & 0xff; ctio->ct_header.rqs_entry_type = RQSTYPE_CTIO; } isp_async(isp, ISPASYNC_TARGET_ACTION, local); } for (tmp = 0; tmp < isp->isp_nchan; tmp++) { ISP_MEMZERO(¬ify, sizeof (isp_notify_t)); notify.nt_ncode = NT_HBA_RESET; notify.nt_hba = isp; notify.nt_wwn = INI_ANY; notify.nt_nphdl = NIL_HANDLE; notify.nt_sid = PORT_ANY; notify.nt_did = PORT_ANY; notify.nt_tgt = TGT_ANY; notify.nt_channel = tmp; notify.nt_lun = LUN_ANY; notify.nt_tagval = TAG_ANY; isp_async(isp, ISPASYNC_TARGET_NOTIFY, ¬ify); } #endif } void isp_shutdown(ispsoftc_t *isp) { if (IS_FC(isp)) { if (IS_24XX(isp)) { ISP_WRITE(isp, BIU2400_ICR, 0); ISP_WRITE(isp, BIU2400_HCCR, HCCR_2400_CMD_PAUSE); } else { ISP_WRITE(isp, BIU_ICR, 0); ISP_WRITE(isp, HCCR, HCCR_CMD_PAUSE); ISP_WRITE(isp, BIU2100_CSR, BIU2100_FPM0_REGS); ISP_WRITE(isp, FPM_DIAG_CONFIG, FPM_SOFT_RESET); ISP_WRITE(isp, BIU2100_CSR, BIU2100_FB_REGS); ISP_WRITE(isp, FBM_CMD, FBMCMD_FIFO_RESET_ALL); ISP_WRITE(isp, BIU2100_CSR, BIU2100_RISC_REGS); } } else { ISP_WRITE(isp, BIU_ICR, 0); ISP_WRITE(isp, HCCR, HCCR_CMD_PAUSE); } } /* * Functions to move stuff to a form that the QLogic RISC engine understands * and functions to move stuff back to a form the processor understands. * * Each platform is required to provide the 8, 16 and 32 bit * swizzle and unswizzle macros (ISP_IOX{PUT|GET}_{8,16,32}) * * The assumption is that swizzling and unswizzling is mostly done 'in place' * (with a few exceptions for efficiency). */ #define ISP_IS_SBUS(isp) (ISP_SBUS_SUPPORTED && (isp)->isp_bustype == ISP_BT_SBUS) #define ASIZE(x) (sizeof (x) / sizeof (x[0])) /* * Swizzle/Copy Functions */ void isp_put_hdr(ispsoftc_t *isp, isphdr_t *hpsrc, isphdr_t *hpdst) { if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, hpsrc->rqs_entry_type, &hpdst->rqs_entry_count); ISP_IOXPUT_8(isp, hpsrc->rqs_entry_count, &hpdst->rqs_entry_type); ISP_IOXPUT_8(isp, hpsrc->rqs_seqno, &hpdst->rqs_flags); ISP_IOXPUT_8(isp, hpsrc->rqs_flags, &hpdst->rqs_seqno); } else { ISP_IOXPUT_8(isp, hpsrc->rqs_entry_type, &hpdst->rqs_entry_type); ISP_IOXPUT_8(isp, hpsrc->rqs_entry_count, &hpdst->rqs_entry_count); ISP_IOXPUT_8(isp, hpsrc->rqs_seqno, &hpdst->rqs_seqno); ISP_IOXPUT_8(isp, hpsrc->rqs_flags, &hpdst->rqs_flags); } } void isp_get_hdr(ispsoftc_t *isp, isphdr_t *hpsrc, isphdr_t *hpdst) { if (ISP_IS_SBUS(isp)) { ISP_IOXGET_8(isp, &hpsrc->rqs_entry_type, hpdst->rqs_entry_count); ISP_IOXGET_8(isp, &hpsrc->rqs_entry_count, hpdst->rqs_entry_type); ISP_IOXGET_8(isp, &hpsrc->rqs_seqno, hpdst->rqs_flags); ISP_IOXGET_8(isp, &hpsrc->rqs_flags, hpdst->rqs_seqno); } else { ISP_IOXGET_8(isp, &hpsrc->rqs_entry_type, hpdst->rqs_entry_type); ISP_IOXGET_8(isp, &hpsrc->rqs_entry_count, hpdst->rqs_entry_count); ISP_IOXGET_8(isp, &hpsrc->rqs_seqno, hpdst->rqs_seqno); ISP_IOXGET_8(isp, &hpsrc->rqs_flags, hpdst->rqs_flags); } } int isp_get_response_type(ispsoftc_t *isp, isphdr_t *hp) { uint8_t type; if (ISP_IS_SBUS(isp)) { ISP_IOXGET_8(isp, &hp->rqs_entry_count, type); } else { ISP_IOXGET_8(isp, &hp->rqs_entry_type, type); } return ((int)type); } void isp_put_request(ispsoftc_t *isp, ispreq_t *rqsrc, ispreq_t *rqdst) { int i; isp_put_hdr(isp, &rqsrc->req_header, &rqdst->req_header); ISP_IOXPUT_32(isp, rqsrc->req_handle, &rqdst->req_handle); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, rqsrc->req_lun_trn, &rqdst->req_target); ISP_IOXPUT_8(isp, rqsrc->req_target, &rqdst->req_lun_trn); } else { ISP_IOXPUT_8(isp, rqsrc->req_lun_trn, &rqdst->req_lun_trn); ISP_IOXPUT_8(isp, rqsrc->req_target, &rqdst->req_target); } ISP_IOXPUT_16(isp, rqsrc->req_cdblen, &rqdst->req_cdblen); ISP_IOXPUT_16(isp, rqsrc->req_flags, &rqdst->req_flags); ISP_IOXPUT_16(isp, rqsrc->req_time, &rqdst->req_time); ISP_IOXPUT_16(isp, rqsrc->req_seg_count, &rqdst->req_seg_count); for (i = 0; i < ASIZE(rqsrc->req_cdb); i++) { ISP_IOXPUT_8(isp, rqsrc->req_cdb[i], &rqdst->req_cdb[i]); } for (i = 0; i < ISP_RQDSEG; i++) { ISP_IOXPUT_32(isp, rqsrc->req_dataseg[i].ds_base, &rqdst->req_dataseg[i].ds_base); ISP_IOXPUT_32(isp, rqsrc->req_dataseg[i].ds_count, &rqdst->req_dataseg[i].ds_count); } } void isp_put_marker(ispsoftc_t *isp, isp_marker_t *src, isp_marker_t *dst) { int i; isp_put_hdr(isp, &src->mrk_header, &dst->mrk_header); ISP_IOXPUT_32(isp, src->mrk_handle, &dst->mrk_handle); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->mrk_reserved0, &dst->mrk_target); ISP_IOXPUT_8(isp, src->mrk_target, &dst->mrk_reserved0); } else { ISP_IOXPUT_8(isp, src->mrk_reserved0, &dst->mrk_reserved0); ISP_IOXPUT_8(isp, src->mrk_target, &dst->mrk_target); } ISP_IOXPUT_16(isp, src->mrk_modifier, &dst->mrk_modifier); ISP_IOXPUT_16(isp, src->mrk_flags, &dst->mrk_flags); ISP_IOXPUT_16(isp, src->mrk_lun, &dst->mrk_lun); for (i = 0; i < ASIZE(src->mrk_reserved1); i++) { ISP_IOXPUT_8(isp, src->mrk_reserved1[i], &dst->mrk_reserved1[i]); } } void isp_put_marker_24xx(ispsoftc_t *isp, isp_marker_24xx_t *src, isp_marker_24xx_t *dst) { int i; isp_put_hdr(isp, &src->mrk_header, &dst->mrk_header); ISP_IOXPUT_32(isp, src->mrk_handle, &dst->mrk_handle); ISP_IOXPUT_16(isp, src->mrk_nphdl, &dst->mrk_nphdl); ISP_IOXPUT_8(isp, src->mrk_modifier, &dst->mrk_modifier); ISP_IOXPUT_8(isp, src->mrk_reserved0, &dst->mrk_reserved0); ISP_IOXPUT_8(isp, src->mrk_reserved1, &dst->mrk_reserved1); ISP_IOXPUT_8(isp, src->mrk_vphdl, &dst->mrk_vphdl); ISP_IOXPUT_8(isp, src->mrk_reserved2, &dst->mrk_reserved2); for (i = 0; i < ASIZE(src->mrk_lun); i++) { ISP_IOXPUT_8(isp, src->mrk_lun[i], &dst->mrk_lun[i]); } for (i = 0; i < ASIZE(src->mrk_reserved3); i++) { ISP_IOXPUT_8(isp, src->mrk_reserved3[i], &dst->mrk_reserved3[i]); } } void isp_put_request_t2(ispsoftc_t *isp, ispreqt2_t *src, ispreqt2_t *dst) { int i; isp_put_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle); ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_lun_trn); ISP_IOXPUT_8(isp, src->req_target, &dst->req_target); ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun); ISP_IOXPUT_16(isp, src->req_flags, &dst->req_flags); ISP_IOXPUT_16(isp, src->req_reserved, &dst->req_reserved); ISP_IOXPUT_16(isp, src->req_time, &dst->req_time); ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count); for (i = 0; i < ASIZE(src->req_cdb); i++) { ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]); } ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt); for (i = 0; i < ISP_RQDSEG_T2; i++) { ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count); } } void isp_put_request_t2e(ispsoftc_t *isp, ispreqt2e_t *src, ispreqt2e_t *dst) { int i; isp_put_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle); ISP_IOXPUT_16(isp, src->req_target, &dst->req_target); ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun); ISP_IOXPUT_16(isp, src->req_flags, &dst->req_flags); ISP_IOXPUT_16(isp, src->req_reserved, &dst->req_reserved); ISP_IOXPUT_16(isp, src->req_time, &dst->req_time); ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count); for (i = 0; i < ASIZE(src->req_cdb); i++) { ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]); } ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt); for (i = 0; i < ISP_RQDSEG_T2; i++) { ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count); } } void isp_put_request_t3(ispsoftc_t *isp, ispreqt3_t *src, ispreqt3_t *dst) { int i; isp_put_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle); ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_lun_trn); ISP_IOXPUT_8(isp, src->req_target, &dst->req_target); ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun); ISP_IOXPUT_16(isp, src->req_flags, &dst->req_flags); ISP_IOXPUT_8(isp, src->req_crn, &dst->req_crn); ISP_IOXPUT_8(isp, src->req_reserved, &dst->req_reserved); ISP_IOXPUT_16(isp, src->req_time, &dst->req_time); ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count); for (i = 0; i < ASIZE(src->req_cdb); i++) { ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]); } ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt); for (i = 0; i < ISP_RQDSEG_T3; i++) { ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_basehi, &dst->req_dataseg[i].ds_basehi); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count); } } void isp_put_request_t3e(ispsoftc_t *isp, ispreqt3e_t *src, ispreqt3e_t *dst) { int i; isp_put_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle); ISP_IOXPUT_16(isp, src->req_target, &dst->req_target); ISP_IOXPUT_16(isp, src->req_scclun, &dst->req_scclun); ISP_IOXPUT_16(isp, src->req_flags, &dst->req_flags); ISP_IOXPUT_8(isp, src->req_crn, &dst->req_crn); ISP_IOXPUT_8(isp, src->req_reserved, &dst->req_reserved); ISP_IOXPUT_16(isp, src->req_time, &dst->req_time); ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count); for (i = 0; i < ASIZE(src->req_cdb); i++) { ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]); } ISP_IOXPUT_32(isp, src->req_totalcnt, &dst->req_totalcnt); for (i = 0; i < ISP_RQDSEG_T3; i++) { ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_basehi, &dst->req_dataseg[i].ds_basehi); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count); } } void isp_put_extended_request(ispsoftc_t *isp, ispextreq_t *src, ispextreq_t *dst) { int i; isp_put_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_target); ISP_IOXPUT_8(isp, src->req_target, &dst->req_lun_trn); } else { ISP_IOXPUT_8(isp, src->req_lun_trn, &dst->req_lun_trn); ISP_IOXPUT_8(isp, src->req_target, &dst->req_target); } ISP_IOXPUT_16(isp, src->req_cdblen, &dst->req_cdblen); ISP_IOXPUT_16(isp, src->req_flags, &dst->req_flags); ISP_IOXPUT_16(isp, src->req_time, &dst->req_time); ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count); for (i = 0; i < ASIZE(src->req_cdb); i++) { ISP_IOXPUT_8(isp, src->req_cdb[i], &dst->req_cdb[i]); } } void isp_put_request_t7(ispsoftc_t *isp, ispreqt7_t *src, ispreqt7_t *dst) { int i; uint32_t *a, *b; isp_put_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXPUT_32(isp, src->req_handle, &dst->req_handle); ISP_IOXPUT_16(isp, src->req_nphdl, &dst->req_nphdl); ISP_IOXPUT_16(isp, src->req_time, &dst->req_time); ISP_IOXPUT_16(isp, src->req_seg_count, &dst->req_seg_count); ISP_IOXPUT_16(isp, src->req_reserved, &dst->req_reserved); a = (uint32_t *) src->req_lun; b = (uint32_t *) dst->req_lun; for (i = 0; i < (ASIZE(src->req_lun) >> 2); i++ ) { *b++ = ISP_SWAP32(isp, *a++); } ISP_IOXPUT_8(isp, src->req_alen_datadir, &dst->req_alen_datadir); ISP_IOXPUT_8(isp, src->req_task_management, &dst->req_task_management); ISP_IOXPUT_8(isp, src->req_task_attribute, &dst->req_task_attribute); ISP_IOXPUT_8(isp, src->req_crn, &dst->req_crn); a = (uint32_t *) src->req_cdb; b = (uint32_t *) dst->req_cdb; for (i = 0; i < (ASIZE(src->req_cdb) >> 2); i++) { *b++ = ISP_SWAP32(isp, *a++); } ISP_IOXPUT_32(isp, src->req_dl, &dst->req_dl); ISP_IOXPUT_16(isp, src->req_tidlo, &dst->req_tidlo); ISP_IOXPUT_8(isp, src->req_tidhi, &dst->req_tidhi); ISP_IOXPUT_8(isp, src->req_vpidx, &dst->req_vpidx); ISP_IOXPUT_32(isp, src->req_dataseg.ds_base, &dst->req_dataseg.ds_base); ISP_IOXPUT_32(isp, src->req_dataseg.ds_basehi, &dst->req_dataseg.ds_basehi); ISP_IOXPUT_32(isp, src->req_dataseg.ds_count, &dst->req_dataseg.ds_count); } void isp_put_24xx_tmf(ispsoftc_t *isp, isp24xx_tmf_t *src, isp24xx_tmf_t *dst) { int i; uint32_t *a, *b; isp_put_hdr(isp, &src->tmf_header, &dst->tmf_header); ISP_IOXPUT_32(isp, src->tmf_handle, &dst->tmf_handle); ISP_IOXPUT_16(isp, src->tmf_nphdl, &dst->tmf_nphdl); ISP_IOXPUT_16(isp, src->tmf_delay, &dst->tmf_delay); ISP_IOXPUT_16(isp, src->tmf_timeout, &dst->tmf_timeout); for (i = 0; i < ASIZE(src->tmf_reserved0); i++) { ISP_IOXPUT_8(isp, src->tmf_reserved0[i], &dst->tmf_reserved0[i]); } a = (uint32_t *) src->tmf_lun; b = (uint32_t *) dst->tmf_lun; for (i = 0; i < (ASIZE(src->tmf_lun) >> 2); i++ ) { *b++ = ISP_SWAP32(isp, *a++); } ISP_IOXPUT_32(isp, src->tmf_flags, &dst->tmf_flags); for (i = 0; i < ASIZE(src->tmf_reserved1); i++) { ISP_IOXPUT_8(isp, src->tmf_reserved1[i], &dst->tmf_reserved1[i]); } ISP_IOXPUT_16(isp, src->tmf_tidlo, &dst->tmf_tidlo); ISP_IOXPUT_8(isp, src->tmf_tidhi, &dst->tmf_tidhi); ISP_IOXPUT_8(isp, src->tmf_vpidx, &dst->tmf_vpidx); for (i = 0; i < ASIZE(src->tmf_reserved2); i++) { ISP_IOXPUT_8(isp, src->tmf_reserved2[i], &dst->tmf_reserved2[i]); } } void isp_put_24xx_abrt(ispsoftc_t *isp, isp24xx_abrt_t *src, isp24xx_abrt_t *dst) { int i; isp_put_hdr(isp, &src->abrt_header, &dst->abrt_header); ISP_IOXPUT_32(isp, src->abrt_handle, &dst->abrt_handle); ISP_IOXPUT_16(isp, src->abrt_nphdl, &dst->abrt_nphdl); ISP_IOXPUT_16(isp, src->abrt_options, &dst->abrt_options); ISP_IOXPUT_32(isp, src->abrt_cmd_handle, &dst->abrt_cmd_handle); ISP_IOXPUT_16(isp, src->abrt_queue_number, &dst->abrt_queue_number); for (i = 0; i < ASIZE(src->abrt_reserved); i++) { ISP_IOXPUT_8(isp, src->abrt_reserved[i], &dst->abrt_reserved[i]); } ISP_IOXPUT_16(isp, src->abrt_tidlo, &dst->abrt_tidlo); ISP_IOXPUT_8(isp, src->abrt_tidhi, &dst->abrt_tidhi); ISP_IOXPUT_8(isp, src->abrt_vpidx, &dst->abrt_vpidx); for (i = 0; i < ASIZE(src->abrt_reserved1); i++) { ISP_IOXPUT_8(isp, src->abrt_reserved1[i], &dst->abrt_reserved1[i]); } } void isp_put_cont_req(ispsoftc_t *isp, ispcontreq_t *src, ispcontreq_t *dst) { int i; isp_put_hdr(isp, &src->req_header, &dst->req_header); for (i = 0; i < ISP_CDSEG; i++) { ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count); } } void isp_put_cont64_req(ispsoftc_t *isp, ispcontreq64_t *src, ispcontreq64_t *dst) { int i; isp_put_hdr(isp, &src->req_header, &dst->req_header); for (i = 0; i < ISP_CDSEG64; i++) { ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_base, &dst->req_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_basehi, &dst->req_dataseg[i].ds_basehi); ISP_IOXPUT_32(isp, src->req_dataseg[i].ds_count, &dst->req_dataseg[i].ds_count); } } void isp_get_response(ispsoftc_t *isp, ispstatusreq_t *src, ispstatusreq_t *dst) { int i; isp_get_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXGET_32(isp, &src->req_handle, dst->req_handle); ISP_IOXGET_16(isp, &src->req_scsi_status, dst->req_scsi_status); ISP_IOXGET_16(isp, &src->req_completion_status, dst->req_completion_status); ISP_IOXGET_16(isp, &src->req_state_flags, dst->req_state_flags); ISP_IOXGET_16(isp, &src->req_status_flags, dst->req_status_flags); ISP_IOXGET_16(isp, &src->req_time, dst->req_time); ISP_IOXGET_16(isp, &src->req_sense_len, dst->req_sense_len); ISP_IOXGET_32(isp, &src->req_resid, dst->req_resid); for (i = 0; i < sizeof (src->req_response); i++) { ISP_IOXGET_8(isp, &src->req_response[i], dst->req_response[i]); } for (i = 0; i < sizeof (src->req_sense_data); i++) { ISP_IOXGET_8(isp, &src->req_sense_data[i], dst->req_sense_data[i]); } } void isp_get_cont_response(ispsoftc_t *isp, ispstatus_cont_t *src, ispstatus_cont_t *dst) { int i; isp_get_hdr(isp, &src->req_header, &dst->req_header); if (IS_24XX(isp)) { uint32_t *a, *b; a = (uint32_t *) src->req_sense_data; b = (uint32_t *) dst->req_sense_data; for (i = 0; i < (sizeof (src->req_sense_data) / sizeof (uint32_t)); i++) { ISP_IOZGET_32(isp, a++, *b++); } } else { for (i = 0; i < sizeof (src->req_sense_data); i++) { ISP_IOXGET_8(isp, &src->req_sense_data[i], dst->req_sense_data[i]); } } } void isp_get_24xx_response(ispsoftc_t *isp, isp24xx_statusreq_t *src, isp24xx_statusreq_t *dst) { int i; uint32_t *s, *d; isp_get_hdr(isp, &src->req_header, &dst->req_header); ISP_IOXGET_32(isp, &src->req_handle, dst->req_handle); ISP_IOXGET_16(isp, &src->req_completion_status, dst->req_completion_status); ISP_IOXGET_16(isp, &src->req_oxid, dst->req_oxid); ISP_IOXGET_32(isp, &src->req_resid, dst->req_resid); ISP_IOXGET_16(isp, &src->req_reserved0, dst->req_reserved0); ISP_IOXGET_16(isp, &src->req_state_flags, dst->req_state_flags); ISP_IOXGET_16(isp, &src->req_retry_delay, dst->req_retry_delay); ISP_IOXGET_16(isp, &src->req_scsi_status, dst->req_scsi_status); ISP_IOXGET_32(isp, &src->req_fcp_residual, dst->req_fcp_residual); ISP_IOXGET_32(isp, &src->req_sense_len, dst->req_sense_len); ISP_IOXGET_32(isp, &src->req_response_len, dst->req_response_len); s = (uint32_t *)src->req_rsp_sense; d = (uint32_t *)dst->req_rsp_sense; for (i = 0; i < (ASIZE(src->req_rsp_sense) >> 2); i++) { d[i] = ISP_SWAP32(isp, s[i]); } } void isp_get_24xx_abrt(ispsoftc_t *isp, isp24xx_abrt_t *src, isp24xx_abrt_t *dst) { int i; isp_get_hdr(isp, &src->abrt_header, &dst->abrt_header); ISP_IOXGET_32(isp, &src->abrt_handle, dst->abrt_handle); ISP_IOXGET_16(isp, &src->abrt_nphdl, dst->abrt_nphdl); ISP_IOXGET_16(isp, &src->abrt_options, dst->abrt_options); ISP_IOXGET_32(isp, &src->abrt_cmd_handle, dst->abrt_cmd_handle); ISP_IOXGET_16(isp, &src->abrt_queue_number, dst->abrt_queue_number); for (i = 0; i < ASIZE(src->abrt_reserved); i++) { ISP_IOXGET_8(isp, &src->abrt_reserved[i], dst->abrt_reserved[i]); } ISP_IOXGET_16(isp, &src->abrt_tidlo, dst->abrt_tidlo); ISP_IOXGET_8(isp, &src->abrt_tidhi, dst->abrt_tidhi); ISP_IOXGET_8(isp, &src->abrt_vpidx, dst->abrt_vpidx); for (i = 0; i < ASIZE(src->abrt_reserved1); i++) { ISP_IOXGET_8(isp, &src->abrt_reserved1[i], dst->abrt_reserved1[i]); } } void isp_get_rio1(ispsoftc_t *isp, isp_rio1_t *r1src, isp_rio1_t *r1dst) { const int lim = sizeof (r1dst->req_handles) / sizeof (r1dst->req_handles[0]); int i; isp_get_hdr(isp, &r1src->req_header, &r1dst->req_header); if (r1dst->req_header.rqs_seqno > lim) { r1dst->req_header.rqs_seqno = lim; } for (i = 0; i < r1dst->req_header.rqs_seqno; i++) { ISP_IOXGET_32(isp, &r1src->req_handles[i], r1dst->req_handles[i]); } while (i < lim) { r1dst->req_handles[i++] = 0; } } void isp_get_rio2(ispsoftc_t *isp, isp_rio2_t *r2src, isp_rio2_t *r2dst) { const int lim = sizeof (r2dst->req_handles) / sizeof (r2dst->req_handles[0]); int i; isp_get_hdr(isp, &r2src->req_header, &r2dst->req_header); if (r2dst->req_header.rqs_seqno > lim) { r2dst->req_header.rqs_seqno = lim; } for (i = 0; i < r2dst->req_header.rqs_seqno; i++) { ISP_IOXGET_16(isp, &r2src->req_handles[i], r2dst->req_handles[i]); } while (i < lim) { r2dst->req_handles[i++] = 0; } } void isp_put_icb(ispsoftc_t *isp, isp_icb_t *src, isp_icb_t *dst) { int i; if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->icb_version, &dst->icb_reserved0); ISP_IOXPUT_8(isp, src->icb_reserved0, &dst->icb_version); } else { ISP_IOXPUT_8(isp, src->icb_version, &dst->icb_version); ISP_IOXPUT_8(isp, src->icb_reserved0, &dst->icb_reserved0); } ISP_IOXPUT_16(isp, src->icb_fwoptions, &dst->icb_fwoptions); ISP_IOXPUT_16(isp, src->icb_maxfrmlen, &dst->icb_maxfrmlen); ISP_IOXPUT_16(isp, src->icb_maxalloc, &dst->icb_maxalloc); ISP_IOXPUT_16(isp, src->icb_execthrottle, &dst->icb_execthrottle); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->icb_retry_count, &dst->icb_retry_delay); ISP_IOXPUT_8(isp, src->icb_retry_delay, &dst->icb_retry_count); } else { ISP_IOXPUT_8(isp, src->icb_retry_count, &dst->icb_retry_count); ISP_IOXPUT_8(isp, src->icb_retry_delay, &dst->icb_retry_delay); } for (i = 0; i < 8; i++) { ISP_IOXPUT_8(isp, src->icb_portname[i], &dst->icb_portname[i]); } ISP_IOXPUT_16(isp, src->icb_hardaddr, &dst->icb_hardaddr); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->icb_iqdevtype, &dst->icb_logintime); ISP_IOXPUT_8(isp, src->icb_logintime, &dst->icb_iqdevtype); } else { ISP_IOXPUT_8(isp, src->icb_iqdevtype, &dst->icb_iqdevtype); ISP_IOXPUT_8(isp, src->icb_logintime, &dst->icb_logintime); } for (i = 0; i < 8; i++) { ISP_IOXPUT_8(isp, src->icb_nodename[i], &dst->icb_nodename[i]); } ISP_IOXPUT_16(isp, src->icb_rqstout, &dst->icb_rqstout); ISP_IOXPUT_16(isp, src->icb_rspnsin, &dst->icb_rspnsin); ISP_IOXPUT_16(isp, src->icb_rqstqlen, &dst->icb_rqstqlen); ISP_IOXPUT_16(isp, src->icb_rsltqlen, &dst->icb_rsltqlen); for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->icb_rqstaddr[i], &dst->icb_rqstaddr[i]); } for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->icb_respaddr[i], &dst->icb_respaddr[i]); } ISP_IOXPUT_16(isp, src->icb_lunenables, &dst->icb_lunenables); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->icb_ccnt, &dst->icb_icnt); ISP_IOXPUT_8(isp, src->icb_icnt, &dst->icb_ccnt); } else { ISP_IOXPUT_8(isp, src->icb_ccnt, &dst->icb_ccnt); ISP_IOXPUT_8(isp, src->icb_icnt, &dst->icb_icnt); } ISP_IOXPUT_16(isp, src->icb_lunetimeout, &dst->icb_lunetimeout); ISP_IOXPUT_16(isp, src->icb_reserved1, &dst->icb_reserved1); ISP_IOXPUT_16(isp, src->icb_xfwoptions, &dst->icb_xfwoptions); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->icb_racctimer, &dst->icb_idelaytimer); ISP_IOXPUT_8(isp, src->icb_idelaytimer, &dst->icb_racctimer); } else { ISP_IOXPUT_8(isp, src->icb_racctimer, &dst->icb_racctimer); ISP_IOXPUT_8(isp, src->icb_idelaytimer, &dst->icb_idelaytimer); } ISP_IOXPUT_16(isp, src->icb_zfwoptions, &dst->icb_zfwoptions); } void isp_put_icb_2400(ispsoftc_t *isp, isp_icb_2400_t *src, isp_icb_2400_t *dst) { int i; ISP_IOXPUT_16(isp, src->icb_version, &dst->icb_version); ISP_IOXPUT_16(isp, src->icb_reserved0, &dst->icb_reserved0); ISP_IOXPUT_16(isp, src->icb_maxfrmlen, &dst->icb_maxfrmlen); ISP_IOXPUT_16(isp, src->icb_execthrottle, &dst->icb_execthrottle); ISP_IOXPUT_16(isp, src->icb_xchgcnt, &dst->icb_xchgcnt); ISP_IOXPUT_16(isp, src->icb_hardaddr, &dst->icb_hardaddr); for (i = 0; i < 8; i++) { ISP_IOXPUT_8(isp, src->icb_portname[i], &dst->icb_portname[i]); } for (i = 0; i < 8; i++) { ISP_IOXPUT_8(isp, src->icb_nodename[i], &dst->icb_nodename[i]); } ISP_IOXPUT_16(isp, src->icb_rspnsin, &dst->icb_rspnsin); ISP_IOXPUT_16(isp, src->icb_rqstout, &dst->icb_rqstout); ISP_IOXPUT_16(isp, src->icb_retry_count, &dst->icb_retry_count); ISP_IOXPUT_16(isp, src->icb_priout, &dst->icb_priout); ISP_IOXPUT_16(isp, src->icb_rsltqlen, &dst->icb_rsltqlen); ISP_IOXPUT_16(isp, src->icb_rqstqlen, &dst->icb_rqstqlen); ISP_IOXPUT_16(isp, src->icb_ldn_nols, &dst->icb_ldn_nols); ISP_IOXPUT_16(isp, src->icb_prqstqlen, &dst->icb_prqstqlen); for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->icb_rqstaddr[i], &dst->icb_rqstaddr[i]); } for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->icb_respaddr[i], &dst->icb_respaddr[i]); } for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->icb_priaddr[i], &dst->icb_priaddr[i]); } for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->icb_reserved1[i], &dst->icb_reserved1[i]); } ISP_IOXPUT_16(isp, src->icb_atio_in, &dst->icb_atio_in); ISP_IOXPUT_16(isp, src->icb_atioqlen, &dst->icb_atioqlen); for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->icb_atioqaddr[i], &dst->icb_atioqaddr[i]); } ISP_IOXPUT_16(isp, src->icb_idelaytimer, &dst->icb_idelaytimer); ISP_IOXPUT_16(isp, src->icb_logintime, &dst->icb_logintime); ISP_IOXPUT_32(isp, src->icb_fwoptions1, &dst->icb_fwoptions1); ISP_IOXPUT_32(isp, src->icb_fwoptions2, &dst->icb_fwoptions2); ISP_IOXPUT_32(isp, src->icb_fwoptions3, &dst->icb_fwoptions3); for (i = 0; i < 12; i++) { ISP_IOXPUT_16(isp, src->icb_reserved2[i], &dst->icb_reserved2[i]); } } void isp_put_icb_2400_vpinfo(ispsoftc_t *isp, isp_icb_2400_vpinfo_t *src, isp_icb_2400_vpinfo_t *dst) { ISP_IOXPUT_16(isp, src->vp_count, &dst->vp_count); ISP_IOXPUT_16(isp, src->vp_global_options, &dst->vp_global_options); } void isp_put_vp_port_info(ispsoftc_t *isp, vp_port_info_t *src, vp_port_info_t *dst) { int i; ISP_IOXPUT_16(isp, src->vp_port_status, &dst->vp_port_status); ISP_IOXPUT_8(isp, src->vp_port_options, &dst->vp_port_options); ISP_IOXPUT_8(isp, src->vp_port_loopid, &dst->vp_port_loopid); for (i = 0; i < 8; i++) { ISP_IOXPUT_8(isp, src->vp_port_portname[i], &dst->vp_port_portname[i]); } for (i = 0; i < 8; i++) { ISP_IOXPUT_8(isp, src->vp_port_nodename[i], &dst->vp_port_nodename[i]); } /* we never *put* portid_lo/portid_hi */ } void isp_get_vp_port_info(ispsoftc_t *isp, vp_port_info_t *src, vp_port_info_t *dst) { int i; ISP_IOXGET_16(isp, &src->vp_port_status, dst->vp_port_status); ISP_IOXGET_8(isp, &src->vp_port_options, dst->vp_port_options); ISP_IOXGET_8(isp, &src->vp_port_loopid, dst->vp_port_loopid); for (i = 0; i < ASIZE(src->vp_port_portname); i++) { ISP_IOXGET_8(isp, &src->vp_port_portname[i], dst->vp_port_portname[i]); } for (i = 0; i < ASIZE(src->vp_port_nodename); i++) { ISP_IOXGET_8(isp, &src->vp_port_nodename[i], dst->vp_port_nodename[i]); } ISP_IOXGET_16(isp, &src->vp_port_portid_lo, dst->vp_port_portid_lo); ISP_IOXGET_16(isp, &src->vp_port_portid_hi, dst->vp_port_portid_hi); } void isp_put_vp_ctrl_info(ispsoftc_t *isp, vp_ctrl_info_t *src, vp_ctrl_info_t *dst) { int i; isp_put_hdr(isp, &src->vp_ctrl_hdr, &dst->vp_ctrl_hdr); ISP_IOXPUT_32(isp, src->vp_ctrl_handle, &dst->vp_ctrl_handle); ISP_IOXPUT_16(isp, src->vp_ctrl_index_fail, &dst->vp_ctrl_index_fail); ISP_IOXPUT_16(isp, src->vp_ctrl_status, &dst->vp_ctrl_status); ISP_IOXPUT_16(isp, src->vp_ctrl_command, &dst->vp_ctrl_command); ISP_IOXPUT_16(isp, src->vp_ctrl_vp_count, &dst->vp_ctrl_vp_count); for (i = 0; i < ASIZE(src->vp_ctrl_idmap); i++) { ISP_IOXPUT_16(isp, src->vp_ctrl_idmap[i], &dst->vp_ctrl_idmap[i]); } for (i = 0; i < ASIZE(src->vp_ctrl_reserved); i++) { ISP_IOXPUT_16(isp, src->vp_ctrl_reserved[i], &dst->vp_ctrl_reserved[i]); } ISP_IOXPUT_16(isp, src->vp_ctrl_fcf_index, &dst->vp_ctrl_fcf_index); } void isp_get_vp_ctrl_info(ispsoftc_t *isp, vp_ctrl_info_t *src, vp_ctrl_info_t *dst) { int i; isp_get_hdr(isp, &src->vp_ctrl_hdr, &dst->vp_ctrl_hdr); ISP_IOXGET_32(isp, &src->vp_ctrl_handle, dst->vp_ctrl_handle); ISP_IOXGET_16(isp, &src->vp_ctrl_index_fail, dst->vp_ctrl_index_fail); ISP_IOXGET_16(isp, &src->vp_ctrl_status, dst->vp_ctrl_status); ISP_IOXGET_16(isp, &src->vp_ctrl_command, dst->vp_ctrl_command); ISP_IOXGET_16(isp, &src->vp_ctrl_vp_count, dst->vp_ctrl_vp_count); for (i = 0; i < ASIZE(src->vp_ctrl_idmap); i++) { ISP_IOXGET_16(isp, &src->vp_ctrl_idmap[i], dst->vp_ctrl_idmap[i]); } for (i = 0; i < ASIZE(src->vp_ctrl_reserved); i++) { ISP_IOXGET_16(isp, &src->vp_ctrl_reserved[i], dst->vp_ctrl_reserved[i]); } ISP_IOXGET_16(isp, &src->vp_ctrl_fcf_index, dst->vp_ctrl_fcf_index); } void isp_put_vp_modify(ispsoftc_t *isp, vp_modify_t *src, vp_modify_t *dst) { int i, j; isp_put_hdr(isp, &src->vp_mod_hdr, &dst->vp_mod_hdr); ISP_IOXPUT_32(isp, src->vp_mod_hdl, &dst->vp_mod_hdl); ISP_IOXPUT_16(isp, src->vp_mod_reserved0, &dst->vp_mod_reserved0); ISP_IOXPUT_16(isp, src->vp_mod_status, &dst->vp_mod_status); ISP_IOXPUT_8(isp, src->vp_mod_cmd, &dst->vp_mod_cmd); ISP_IOXPUT_8(isp, src->vp_mod_cnt, &dst->vp_mod_cnt); ISP_IOXPUT_8(isp, src->vp_mod_idx0, &dst->vp_mod_idx0); ISP_IOXPUT_8(isp, src->vp_mod_idx1, &dst->vp_mod_idx1); for (i = 0; i < ASIZE(src->vp_mod_ports); i++) { ISP_IOXPUT_8(isp, src->vp_mod_ports[i].options, &dst->vp_mod_ports[i].options); ISP_IOXPUT_8(isp, src->vp_mod_ports[i].loopid, &dst->vp_mod_ports[i].loopid); ISP_IOXPUT_16(isp, src->vp_mod_ports[i].reserved1, &dst->vp_mod_ports[i].reserved1); for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwpn); j++) { ISP_IOXPUT_8(isp, src->vp_mod_ports[i].wwpn[j], &dst->vp_mod_ports[i].wwpn[j]); } for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwnn); j++) { ISP_IOXPUT_8(isp, src->vp_mod_ports[i].wwnn[j], &dst->vp_mod_ports[i].wwnn[j]); } } for (i = 0; i < ASIZE(src->vp_mod_reserved2); i++) { ISP_IOXPUT_8(isp, src->vp_mod_reserved2[i], &dst->vp_mod_reserved2[i]); } } void isp_get_vp_modify(ispsoftc_t *isp, vp_modify_t *src, vp_modify_t *dst) { int i, j; isp_get_hdr(isp, &src->vp_mod_hdr, &dst->vp_mod_hdr); ISP_IOXGET_32(isp, &src->vp_mod_hdl, dst->vp_mod_hdl); ISP_IOXGET_16(isp, &src->vp_mod_reserved0, dst->vp_mod_reserved0); ISP_IOXGET_16(isp, &src->vp_mod_status, dst->vp_mod_status); ISP_IOXGET_8(isp, &src->vp_mod_cmd, dst->vp_mod_cmd); ISP_IOXGET_8(isp, &src->vp_mod_cnt, dst->vp_mod_cnt); ISP_IOXGET_8(isp, &src->vp_mod_idx0, dst->vp_mod_idx0); ISP_IOXGET_8(isp, &src->vp_mod_idx1, dst->vp_mod_idx1); for (i = 0; i < ASIZE(src->vp_mod_ports); i++) { ISP_IOXGET_8(isp, &src->vp_mod_ports[i].options, dst->vp_mod_ports[i].options); ISP_IOXGET_8(isp, &src->vp_mod_ports[i].loopid, dst->vp_mod_ports[i].loopid); ISP_IOXGET_16(isp, &src->vp_mod_ports[i].reserved1, dst->vp_mod_ports[i].reserved1); for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwpn); j++) { ISP_IOXGET_8(isp, &src->vp_mod_ports[i].wwpn[j], dst->vp_mod_ports[i].wwpn[j]); } for (j = 0; j < ASIZE(src->vp_mod_ports[i].wwnn); j++) { ISP_IOXGET_8(isp, &src->vp_mod_ports[i].wwnn[j], dst->vp_mod_ports[i].wwnn[j]); } } for (i = 0; i < ASIZE(src->vp_mod_reserved2); i++) { ISP_IOXGET_8(isp, &src->vp_mod_reserved2[i], dst->vp_mod_reserved2[i]); } } void isp_get_pdb_21xx(ispsoftc_t *isp, isp_pdb_21xx_t *src, isp_pdb_21xx_t *dst) { int i; ISP_IOXGET_16(isp, &src->pdb_options, dst->pdb_options); ISP_IOXGET_8(isp, &src->pdb_mstate, dst->pdb_mstate); ISP_IOXGET_8(isp, &src->pdb_sstate, dst->pdb_sstate); for (i = 0; i < 4; i++) { ISP_IOXGET_8(isp, &src->pdb_hardaddr_bits[i], dst->pdb_hardaddr_bits[i]); } for (i = 0; i < 4; i++) { ISP_IOXGET_8(isp, &src->pdb_portid_bits[i], dst->pdb_portid_bits[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->pdb_nodename[i], dst->pdb_nodename[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->pdb_portname[i], dst->pdb_portname[i]); } ISP_IOXGET_16(isp, &src->pdb_execthrottle, dst->pdb_execthrottle); ISP_IOXGET_16(isp, &src->pdb_exec_count, dst->pdb_exec_count); ISP_IOXGET_8(isp, &src->pdb_retry_count, dst->pdb_retry_count); ISP_IOXGET_8(isp, &src->pdb_retry_delay, dst->pdb_retry_delay); ISP_IOXGET_16(isp, &src->pdb_resalloc, dst->pdb_resalloc); ISP_IOXGET_16(isp, &src->pdb_curalloc, dst->pdb_curalloc); ISP_IOXGET_16(isp, &src->pdb_qhead, dst->pdb_qhead); ISP_IOXGET_16(isp, &src->pdb_qtail, dst->pdb_qtail); ISP_IOXGET_16(isp, &src->pdb_tl_next, dst->pdb_tl_next); ISP_IOXGET_16(isp, &src->pdb_tl_last, dst->pdb_tl_last); ISP_IOXGET_16(isp, &src->pdb_features, dst->pdb_features); ISP_IOXGET_16(isp, &src->pdb_pconcurrnt, dst->pdb_pconcurrnt); ISP_IOXGET_16(isp, &src->pdb_roi, dst->pdb_roi); ISP_IOXGET_8(isp, &src->pdb_target, dst->pdb_target); ISP_IOXGET_8(isp, &src->pdb_initiator, dst->pdb_initiator); ISP_IOXGET_16(isp, &src->pdb_rdsiz, dst->pdb_rdsiz); ISP_IOXGET_16(isp, &src->pdb_ncseq, dst->pdb_ncseq); ISP_IOXGET_16(isp, &src->pdb_noseq, dst->pdb_noseq); ISP_IOXGET_16(isp, &src->pdb_labrtflg, dst->pdb_labrtflg); ISP_IOXGET_16(isp, &src->pdb_lstopflg, dst->pdb_lstopflg); ISP_IOXGET_16(isp, &src->pdb_sqhead, dst->pdb_sqhead); ISP_IOXGET_16(isp, &src->pdb_sqtail, dst->pdb_sqtail); ISP_IOXGET_16(isp, &src->pdb_ptimer, dst->pdb_ptimer); ISP_IOXGET_16(isp, &src->pdb_nxt_seqid, dst->pdb_nxt_seqid); ISP_IOXGET_16(isp, &src->pdb_fcount, dst->pdb_fcount); ISP_IOXGET_16(isp, &src->pdb_prli_len, dst->pdb_prli_len); ISP_IOXGET_16(isp, &src->pdb_prli_svc0, dst->pdb_prli_svc0); ISP_IOXGET_16(isp, &src->pdb_prli_svc3, dst->pdb_prli_svc3); ISP_IOXGET_16(isp, &src->pdb_loopid, dst->pdb_loopid); ISP_IOXGET_16(isp, &src->pdb_il_ptr, dst->pdb_il_ptr); ISP_IOXGET_16(isp, &src->pdb_sl_ptr, dst->pdb_sl_ptr); } void isp_get_pdb_24xx(ispsoftc_t *isp, isp_pdb_24xx_t *src, isp_pdb_24xx_t *dst) { int i; ISP_IOXGET_16(isp, &src->pdb_flags, dst->pdb_flags); ISP_IOXGET_8(isp, &src->pdb_curstate, dst->pdb_curstate); ISP_IOXGET_8(isp, &src->pdb_laststate, dst->pdb_laststate); for (i = 0; i < 4; i++) { ISP_IOXGET_8(isp, &src->pdb_hardaddr_bits[i], dst->pdb_hardaddr_bits[i]); } for (i = 0; i < 4; i++) { ISP_IOXGET_8(isp, &src->pdb_portid_bits[i], dst->pdb_portid_bits[i]); } ISP_IOXGET_16(isp, &src->pdb_retry_timer, dst->pdb_retry_timer); ISP_IOXGET_16(isp, &src->pdb_handle, dst->pdb_handle); ISP_IOXGET_16(isp, &src->pdb_rcv_dsize, dst->pdb_rcv_dsize); ISP_IOXGET_16(isp, &src->pdb_reserved0, dst->pdb_reserved0); ISP_IOXGET_16(isp, &src->pdb_prli_svc0, dst->pdb_prli_svc0); ISP_IOXGET_16(isp, &src->pdb_prli_svc3, dst->pdb_prli_svc3); for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->pdb_nodename[i], dst->pdb_nodename[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->pdb_portname[i], dst->pdb_portname[i]); } for (i = 0; i < 24; i++) { ISP_IOXGET_8(isp, &src->pdb_reserved1[i], dst->pdb_reserved1[i]); } } void isp_get_pnhle_21xx(ispsoftc_t *isp, isp_pnhle_21xx_t *src, isp_pnhle_21xx_t *dst) { ISP_IOXGET_16(isp, &src->pnhle_port_id_lo, dst->pnhle_port_id_lo); ISP_IOXGET_16(isp, &src->pnhle_port_id_hi_handle, dst->pnhle_port_id_hi_handle); } void isp_get_pnhle_23xx(ispsoftc_t *isp, isp_pnhle_23xx_t *src, isp_pnhle_23xx_t *dst) { ISP_IOXGET_16(isp, &src->pnhle_port_id_lo, dst->pnhle_port_id_lo); ISP_IOXGET_16(isp, &src->pnhle_port_id_hi, dst->pnhle_port_id_hi); ISP_IOXGET_16(isp, &src->pnhle_handle, dst->pnhle_handle); } void isp_get_pnhle_24xx(ispsoftc_t *isp, isp_pnhle_24xx_t *src, isp_pnhle_24xx_t *dst) { ISP_IOXGET_16(isp, &src->pnhle_port_id_lo, dst->pnhle_port_id_lo); ISP_IOXGET_16(isp, &src->pnhle_port_id_hi, dst->pnhle_port_id_hi); ISP_IOXGET_16(isp, &src->pnhle_handle, dst->pnhle_handle); ISP_IOXGET_16(isp, &src->pnhle_reserved, dst->pnhle_reserved); } void isp_get_pnnle(ispsoftc_t *isp, isp_pnnle_t *src, isp_pnnle_t *dst) { int i; for (i = 0; i < 8; i++) ISP_IOXGET_8(isp, &src->pnnle_name[i], dst->pnnle_name[i]); ISP_IOXGET_16(isp, &src->pnnle_handle, dst->pnnle_handle); ISP_IOXGET_16(isp, &src->pnnle_reserved, dst->pnnle_reserved); } /* * PLOGI/LOGO IOCB canonicalization */ void isp_get_plogx(ispsoftc_t *isp, isp_plogx_t *src, isp_plogx_t *dst) { int i; isp_get_hdr(isp, &src->plogx_header, &dst->plogx_header); ISP_IOXGET_32(isp, &src->plogx_handle, dst->plogx_handle); ISP_IOXGET_16(isp, &src->plogx_status, dst->plogx_status); ISP_IOXGET_16(isp, &src->plogx_nphdl, dst->plogx_nphdl); ISP_IOXGET_16(isp, &src->plogx_flags, dst->plogx_flags); ISP_IOXGET_16(isp, &src->plogx_vphdl, dst->plogx_vphdl); ISP_IOXGET_16(isp, &src->plogx_portlo, dst->plogx_portlo); ISP_IOXGET_16(isp, &src->plogx_rspsz_porthi, dst->plogx_rspsz_porthi); for (i = 0; i < 11; i++) { ISP_IOXGET_16(isp, &src->plogx_ioparm[i].lo16, dst->plogx_ioparm[i].lo16); ISP_IOXGET_16(isp, &src->plogx_ioparm[i].hi16, dst->plogx_ioparm[i].hi16); } } void isp_put_plogx(ispsoftc_t *isp, isp_plogx_t *src, isp_plogx_t *dst) { int i; isp_put_hdr(isp, &src->plogx_header, &dst->plogx_header); ISP_IOXPUT_32(isp, src->plogx_handle, &dst->plogx_handle); ISP_IOXPUT_16(isp, src->plogx_status, &dst->plogx_status); ISP_IOXPUT_16(isp, src->plogx_nphdl, &dst->plogx_nphdl); ISP_IOXPUT_16(isp, src->plogx_flags, &dst->plogx_flags); ISP_IOXPUT_16(isp, src->plogx_vphdl, &dst->plogx_vphdl); ISP_IOXPUT_16(isp, src->plogx_portlo, &dst->plogx_portlo); ISP_IOXPUT_16(isp, src->plogx_rspsz_porthi, &dst->plogx_rspsz_porthi); for (i = 0; i < 11; i++) { ISP_IOXPUT_16(isp, src->plogx_ioparm[i].lo16, &dst->plogx_ioparm[i].lo16); ISP_IOXPUT_16(isp, src->plogx_ioparm[i].hi16, &dst->plogx_ioparm[i].hi16); } } /* * Report ID canonicalization */ void isp_get_ridacq(ispsoftc_t *isp, isp_ridacq_t *src, isp_ridacq_t *dst) { int i; isp_get_hdr(isp, &src->ridacq_hdr, &dst->ridacq_hdr); ISP_IOXGET_32(isp, &src->ridacq_handle, dst->ridacq_handle); + ISP_IOXGET_8(isp, &src->ridacq_vp_acquired, dst->ridacq_vp_acquired); + ISP_IOXGET_8(isp, &src->ridacq_vp_setup, dst->ridacq_vp_setup); + ISP_IOXGET_8(isp, &src->ridacq_vp_index, dst->ridacq_vp_index); + ISP_IOXGET_8(isp, &src->ridacq_vp_status, dst->ridacq_vp_status); ISP_IOXGET_16(isp, &src->ridacq_vp_port_lo, dst->ridacq_vp_port_lo); ISP_IOXGET_8(isp, &src->ridacq_vp_port_hi, dst->ridacq_vp_port_hi); ISP_IOXGET_8(isp, &src->ridacq_format, dst->ridacq_format); for (i = 0; i < sizeof (src->ridacq_map) / sizeof (src->ridacq_map[0]); i++) { ISP_IOXGET_16(isp, &src->ridacq_map[i], dst->ridacq_map[i]); } for (i = 0; i < sizeof (src->ridacq_reserved1) / sizeof (src->ridacq_reserved1[0]); i++) { ISP_IOXGET_16(isp, &src->ridacq_reserved1[i], dst->ridacq_reserved1[i]); - } - if (dst->ridacq_format == 0) { - ISP_IOXGET_8(isp, &src->un.type0.ridacq_vp_acquired, dst->un.type0.ridacq_vp_acquired); - ISP_IOXGET_8(isp, &src->un.type0.ridacq_vp_setup, dst->un.type0.ridacq_vp_setup); - ISP_IOXGET_16(isp, &src->un.type0.ridacq_reserved0, dst->un.type0.ridacq_reserved0); - } else if (dst->ridacq_format == 1) { - ISP_IOXGET_16(isp, &src->un.type1.ridacq_vp_count, dst->un.type1.ridacq_vp_count); - ISP_IOXGET_8(isp, &src->un.type1.ridacq_vp_index, dst->un.type1.ridacq_vp_index); - ISP_IOXGET_8(isp, &src->un.type1.ridacq_vp_status, dst->un.type1.ridacq_vp_status); - } else { - ISP_MEMZERO(&dst->un, sizeof (dst->un)); } } /* * CT Passthru canonicalization */ void isp_get_ct_pt(ispsoftc_t *isp, isp_ct_pt_t *src, isp_ct_pt_t *dst) { int i; isp_get_hdr(isp, &src->ctp_header, &dst->ctp_header); ISP_IOXGET_32(isp, &src->ctp_handle, dst->ctp_handle); ISP_IOXGET_16(isp, &src->ctp_status, dst->ctp_status); ISP_IOXGET_16(isp, &src->ctp_nphdl, dst->ctp_nphdl); ISP_IOXGET_16(isp, &src->ctp_cmd_cnt, dst->ctp_cmd_cnt); ISP_IOXGET_8(isp, &src->ctp_vpidx, dst->ctp_vpidx); ISP_IOXGET_8(isp, &src->ctp_reserved0, dst->ctp_reserved0); ISP_IOXGET_16(isp, &src->ctp_time, dst->ctp_time); ISP_IOXGET_16(isp, &src->ctp_reserved1, dst->ctp_reserved1); ISP_IOXGET_16(isp, &src->ctp_rsp_cnt, dst->ctp_rsp_cnt); for (i = 0; i < 5; i++) { ISP_IOXGET_16(isp, &src->ctp_reserved2[i], dst->ctp_reserved2[i]); } ISP_IOXGET_32(isp, &src->ctp_rsp_bcnt, dst->ctp_rsp_bcnt); ISP_IOXGET_32(isp, &src->ctp_cmd_bcnt, dst->ctp_cmd_bcnt); for (i = 0; i < 2; i++) { ISP_IOXGET_32(isp, &src->ctp_dataseg[i].ds_base, dst->ctp_dataseg[i].ds_base); ISP_IOXGET_32(isp, &src->ctp_dataseg[i].ds_basehi, dst->ctp_dataseg[i].ds_basehi); ISP_IOXGET_32(isp, &src->ctp_dataseg[i].ds_count, dst->ctp_dataseg[i].ds_count); } } void isp_get_ms(ispsoftc_t *isp, isp_ms_t *src, isp_ms_t *dst) { int i; isp_get_hdr(isp, &src->ms_header, &dst->ms_header); ISP_IOXGET_32(isp, &src->ms_handle, dst->ms_handle); ISP_IOXGET_16(isp, &src->ms_nphdl, dst->ms_nphdl); ISP_IOXGET_16(isp, &src->ms_status, dst->ms_status); ISP_IOXGET_16(isp, &src->ms_flags, dst->ms_flags); ISP_IOXGET_16(isp, &src->ms_reserved1, dst->ms_reserved1); ISP_IOXGET_16(isp, &src->ms_time, dst->ms_time); ISP_IOXGET_16(isp, &src->ms_cmd_cnt, dst->ms_cmd_cnt); ISP_IOXGET_16(isp, &src->ms_tot_cnt, dst->ms_tot_cnt); ISP_IOXGET_8(isp, &src->ms_type, dst->ms_type); ISP_IOXGET_8(isp, &src->ms_r_ctl, dst->ms_r_ctl); ISP_IOXGET_16(isp, &src->ms_rxid, dst->ms_rxid); ISP_IOXGET_16(isp, &src->ms_reserved2, dst->ms_reserved2); ISP_IOXGET_32(isp, &src->ms_rsp_bcnt, dst->ms_rsp_bcnt); ISP_IOXGET_32(isp, &src->ms_cmd_bcnt, dst->ms_cmd_bcnt); for (i = 0; i < 2; i++) { ISP_IOXGET_32(isp, &src->ms_dataseg[i].ds_base, dst->ms_dataseg[i].ds_base); ISP_IOXGET_32(isp, &src->ms_dataseg[i].ds_basehi, dst->ms_dataseg[i].ds_basehi); ISP_IOXGET_32(isp, &src->ms_dataseg[i].ds_count, dst->ms_dataseg[i].ds_count); } } void isp_put_ct_pt(ispsoftc_t *isp, isp_ct_pt_t *src, isp_ct_pt_t *dst) { int i; isp_put_hdr(isp, &src->ctp_header, &dst->ctp_header); ISP_IOXPUT_32(isp, src->ctp_handle, &dst->ctp_handle); ISP_IOXPUT_16(isp, src->ctp_status, &dst->ctp_status); ISP_IOXPUT_16(isp, src->ctp_nphdl, &dst->ctp_nphdl); ISP_IOXPUT_16(isp, src->ctp_cmd_cnt, &dst->ctp_cmd_cnt); ISP_IOXPUT_8(isp, src->ctp_vpidx, &dst->ctp_vpidx); ISP_IOXPUT_8(isp, src->ctp_reserved0, &dst->ctp_reserved0); ISP_IOXPUT_16(isp, src->ctp_time, &dst->ctp_time); ISP_IOXPUT_16(isp, src->ctp_reserved1, &dst->ctp_reserved1); ISP_IOXPUT_16(isp, src->ctp_rsp_cnt, &dst->ctp_rsp_cnt); for (i = 0; i < 5; i++) { ISP_IOXPUT_16(isp, src->ctp_reserved2[i], &dst->ctp_reserved2[i]); } ISP_IOXPUT_32(isp, src->ctp_rsp_bcnt, &dst->ctp_rsp_bcnt); ISP_IOXPUT_32(isp, src->ctp_cmd_bcnt, &dst->ctp_cmd_bcnt); for (i = 0; i < 2; i++) { ISP_IOXPUT_32(isp, src->ctp_dataseg[i].ds_base, &dst->ctp_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->ctp_dataseg[i].ds_basehi, &dst->ctp_dataseg[i].ds_basehi); ISP_IOXPUT_32(isp, src->ctp_dataseg[i].ds_count, &dst->ctp_dataseg[i].ds_count); } } void isp_put_ms(ispsoftc_t *isp, isp_ms_t *src, isp_ms_t *dst) { int i; isp_put_hdr(isp, &src->ms_header, &dst->ms_header); ISP_IOXPUT_32(isp, src->ms_handle, &dst->ms_handle); ISP_IOXPUT_16(isp, src->ms_nphdl, &dst->ms_nphdl); ISP_IOXPUT_16(isp, src->ms_status, &dst->ms_status); ISP_IOXPUT_16(isp, src->ms_flags, &dst->ms_flags); ISP_IOXPUT_16(isp, src->ms_reserved1, &dst->ms_reserved1); ISP_IOXPUT_16(isp, src->ms_time, &dst->ms_time); ISP_IOXPUT_16(isp, src->ms_cmd_cnt, &dst->ms_cmd_cnt); ISP_IOXPUT_16(isp, src->ms_tot_cnt, &dst->ms_tot_cnt); ISP_IOXPUT_8(isp, src->ms_type, &dst->ms_type); ISP_IOXPUT_8(isp, src->ms_r_ctl, &dst->ms_r_ctl); ISP_IOXPUT_16(isp, src->ms_rxid, &dst->ms_rxid); ISP_IOXPUT_16(isp, src->ms_reserved2, &dst->ms_reserved2); ISP_IOXPUT_32(isp, src->ms_rsp_bcnt, &dst->ms_rsp_bcnt); ISP_IOXPUT_32(isp, src->ms_cmd_bcnt, &dst->ms_cmd_bcnt); for (i = 0; i < 2; i++) { ISP_IOXPUT_32(isp, src->ms_dataseg[i].ds_base, &dst->ms_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->ms_dataseg[i].ds_basehi, &dst->ms_dataseg[i].ds_basehi); ISP_IOXPUT_32(isp, src->ms_dataseg[i].ds_count, &dst->ms_dataseg[i].ds_count); } } /* * Generic SNS request - not particularly useful since the per-command data * isn't always 16 bit words. */ void isp_put_sns_request(ispsoftc_t *isp, sns_screq_t *src, sns_screq_t *dst) { int i, nw = (int) src->snscb_sblen; ISP_IOXPUT_16(isp, src->snscb_rblen, &dst->snscb_rblen); for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->snscb_addr[i], &dst->snscb_addr[i]); } ISP_IOXPUT_16(isp, src->snscb_sblen, &dst->snscb_sblen); for (i = 0; i < nw; i++) { ISP_IOXPUT_16(isp, src->snscb_data[i], &dst->snscb_data[i]); } } void isp_put_gid_ft_request(ispsoftc_t *isp, sns_gid_ft_req_t *src, sns_gid_ft_req_t *dst) { ISP_IOXPUT_16(isp, src->snscb_rblen, &dst->snscb_rblen); ISP_IOXPUT_16(isp, src->snscb_reserved0, &dst->snscb_reserved0); ISP_IOXPUT_16(isp, src->snscb_addr[0], &dst->snscb_addr[0]); ISP_IOXPUT_16(isp, src->snscb_addr[1], &dst->snscb_addr[1]); ISP_IOXPUT_16(isp, src->snscb_addr[2], &dst->snscb_addr[2]); ISP_IOXPUT_16(isp, src->snscb_addr[3], &dst->snscb_addr[3]); ISP_IOXPUT_16(isp, src->snscb_sblen, &dst->snscb_sblen); ISP_IOXPUT_16(isp, src->snscb_reserved1, &dst->snscb_reserved1); ISP_IOXPUT_16(isp, src->snscb_cmd, &dst->snscb_cmd); ISP_IOXPUT_16(isp, src->snscb_mword_div_2, &dst->snscb_mword_div_2); ISP_IOXPUT_32(isp, src->snscb_reserved3, &dst->snscb_reserved3); ISP_IOXPUT_32(isp, src->snscb_fc4_type, &dst->snscb_fc4_type); } void isp_put_gxn_id_request(ispsoftc_t *isp, sns_gxn_id_req_t *src, sns_gxn_id_req_t *dst) { ISP_IOXPUT_16(isp, src->snscb_rblen, &dst->snscb_rblen); ISP_IOXPUT_16(isp, src->snscb_reserved0, &dst->snscb_reserved0); ISP_IOXPUT_16(isp, src->snscb_addr[0], &dst->snscb_addr[0]); ISP_IOXPUT_16(isp, src->snscb_addr[1], &dst->snscb_addr[1]); ISP_IOXPUT_16(isp, src->snscb_addr[2], &dst->snscb_addr[2]); ISP_IOXPUT_16(isp, src->snscb_addr[3], &dst->snscb_addr[3]); ISP_IOXPUT_16(isp, src->snscb_sblen, &dst->snscb_sblen); ISP_IOXPUT_16(isp, src->snscb_reserved1, &dst->snscb_reserved1); ISP_IOXPUT_16(isp, src->snscb_cmd, &dst->snscb_cmd); ISP_IOXPUT_16(isp, src->snscb_reserved2, &dst->snscb_reserved2); ISP_IOXPUT_32(isp, src->snscb_reserved3, &dst->snscb_reserved3); ISP_IOXPUT_32(isp, src->snscb_portid, &dst->snscb_portid); } /* * Generic SNS response - not particularly useful since the per-command data * isn't always 16 bit words. */ void isp_get_sns_response(ispsoftc_t *isp, sns_scrsp_t *src, sns_scrsp_t *dst, int nwords) { int i; isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr); ISP_IOXGET_8(isp, &src->snscb_port_type, dst->snscb_port_type); for (i = 0; i < 3; i++) { ISP_IOXGET_8(isp, &src->snscb_port_id[i], dst->snscb_port_id[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->snscb_portname[i], dst->snscb_portname[i]); } for (i = 0; i < nwords; i++) { ISP_IOXGET_16(isp, &src->snscb_data[i], dst->snscb_data[i]); } } void isp_get_gid_ft_response(ispsoftc_t *isp, sns_gid_ft_rsp_t *src, sns_gid_ft_rsp_t *dst, int nwords) { int i; isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr); for (i = 0; i < nwords; i++) { int j; ISP_IOXGET_8(isp, &src->snscb_ports[i].control, dst->snscb_ports[i].control); for (j = 0; j < 3; j++) { ISP_IOXGET_8(isp, &src->snscb_ports[i].portid[j], dst->snscb_ports[i].portid[j]); } if (dst->snscb_ports[i].control & 0x80) { break; } } } void isp_get_gxn_id_response(ispsoftc_t *isp, sns_gxn_id_rsp_t *src, sns_gxn_id_rsp_t *dst) { int i; isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr); for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->snscb_wwn[i], dst->snscb_wwn[i]); } } void isp_get_gff_id_response(ispsoftc_t *isp, sns_gff_id_rsp_t *src, sns_gff_id_rsp_t *dst) { int i; isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr); for (i = 0; i < 32; i++) { ISP_IOXGET_32(isp, &src->snscb_fc4_features[i], dst->snscb_fc4_features[i]); } } void isp_get_ga_nxt_response(ispsoftc_t *isp, sns_ga_nxt_rsp_t *src, sns_ga_nxt_rsp_t *dst) { int i; isp_get_ct_hdr(isp, &src->snscb_cthdr, &dst->snscb_cthdr); ISP_IOXGET_8(isp, &src->snscb_port_type, dst->snscb_port_type); for (i = 0; i < 3; i++) { ISP_IOXGET_8(isp, &src->snscb_port_id[i], dst->snscb_port_id[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->snscb_portname[i], dst->snscb_portname[i]); } ISP_IOXGET_8(isp, &src->snscb_pnlen, dst->snscb_pnlen); for (i = 0; i < 255; i++) { ISP_IOXGET_8(isp, &src->snscb_pname[i], dst->snscb_pname[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->snscb_nodename[i], dst->snscb_nodename[i]); } ISP_IOXGET_8(isp, &src->snscb_nnlen, dst->snscb_nnlen); for (i = 0; i < 255; i++) { ISP_IOXGET_8(isp, &src->snscb_nname[i], dst->snscb_nname[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->snscb_ipassoc[i], dst->snscb_ipassoc[i]); } for (i = 0; i < 16; i++) { ISP_IOXGET_8(isp, &src->snscb_ipaddr[i], dst->snscb_ipaddr[i]); } for (i = 0; i < 4; i++) { ISP_IOXGET_8(isp, &src->snscb_svc_class[i], dst->snscb_svc_class[i]); } for (i = 0; i < 32; i++) { ISP_IOXGET_8(isp, &src->snscb_fc4_types[i], dst->snscb_fc4_types[i]); } for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->snscb_fpname[i], dst->snscb_fpname[i]); } ISP_IOXGET_8(isp, &src->snscb_reserved, dst->snscb_reserved); for (i = 0; i < 3; i++) { ISP_IOXGET_8(isp, &src->snscb_hardaddr[i], dst->snscb_hardaddr[i]); } } void isp_get_els(ispsoftc_t *isp, els_t *src, els_t *dst) { int i; isp_get_hdr(isp, &src->els_hdr, &dst->els_hdr); ISP_IOXGET_32(isp, &src->els_handle, dst->els_handle); ISP_IOXGET_16(isp, &src->els_status, dst->els_status); ISP_IOXGET_16(isp, &src->els_nphdl, dst->els_nphdl); ISP_IOXGET_16(isp, &src->els_xmit_dsd_count, dst->els_xmit_dsd_count); ISP_IOXGET_8(isp, &src->els_vphdl, dst->els_vphdl); ISP_IOXGET_8(isp, &src->els_sof, dst->els_sof); ISP_IOXGET_32(isp, &src->els_rxid, dst->els_rxid); ISP_IOXGET_16(isp, &src->els_recv_dsd_count, dst->els_recv_dsd_count); ISP_IOXGET_8(isp, &src->els_opcode, dst->els_opcode); ISP_IOXGET_8(isp, &src->els_reserved2, dst->els_reserved1); ISP_IOXGET_8(isp, &src->els_did_lo, dst->els_did_lo); ISP_IOXGET_8(isp, &src->els_did_mid, dst->els_did_mid); ISP_IOXGET_8(isp, &src->els_did_hi, dst->els_did_hi); ISP_IOXGET_8(isp, &src->els_reserved2, dst->els_reserved2); ISP_IOXGET_16(isp, &src->els_reserved3, dst->els_reserved3); ISP_IOXGET_16(isp, &src->els_ctl_flags, dst->els_ctl_flags); ISP_IOXGET_32(isp, &src->els_bytecnt, dst->els_bytecnt); ISP_IOXGET_32(isp, &src->els_subcode1, dst->els_subcode1); ISP_IOXGET_32(isp, &src->els_subcode2, dst->els_subcode2); for (i = 0; i < 20; i++) { ISP_IOXGET_8(isp, &src->els_reserved4[i], dst->els_reserved4[i]); } } void isp_put_els(ispsoftc_t *isp, els_t *src, els_t *dst) { isp_put_hdr(isp, &src->els_hdr, &dst->els_hdr); ISP_IOXPUT_32(isp, src->els_handle, &dst->els_handle); ISP_IOXPUT_16(isp, src->els_status, &dst->els_status); ISP_IOXPUT_16(isp, src->els_nphdl, &dst->els_nphdl); ISP_IOXPUT_16(isp, src->els_xmit_dsd_count, &dst->els_xmit_dsd_count); ISP_IOXPUT_8(isp, src->els_vphdl, &dst->els_vphdl); ISP_IOXPUT_8(isp, src->els_sof, &dst->els_sof); ISP_IOXPUT_32(isp, src->els_rxid, &dst->els_rxid); ISP_IOXPUT_16(isp, src->els_recv_dsd_count, &dst->els_recv_dsd_count); ISP_IOXPUT_8(isp, src->els_opcode, &dst->els_opcode); ISP_IOXPUT_8(isp, src->els_reserved2, &dst->els_reserved1); ISP_IOXPUT_8(isp, src->els_did_lo, &dst->els_did_lo); ISP_IOXPUT_8(isp, src->els_did_mid, &dst->els_did_mid); ISP_IOXPUT_8(isp, src->els_did_hi, &dst->els_did_hi); ISP_IOXPUT_8(isp, src->els_reserved2, &dst->els_reserved2); ISP_IOXPUT_16(isp, src->els_reserved3, &dst->els_reserved3); ISP_IOXPUT_16(isp, src->els_ctl_flags, &dst->els_ctl_flags); ISP_IOXPUT_32(isp, src->els_recv_bytecnt, &dst->els_recv_bytecnt); ISP_IOXPUT_32(isp, src->els_xmit_bytecnt, &dst->els_xmit_bytecnt); ISP_IOXPUT_32(isp, src->els_xmit_dsd_length, &dst->els_xmit_dsd_length); ISP_IOXPUT_16(isp, src->els_xmit_dsd_a1500, &dst->els_xmit_dsd_a1500); ISP_IOXPUT_16(isp, src->els_xmit_dsd_a3116, &dst->els_xmit_dsd_a3116); ISP_IOXPUT_16(isp, src->els_xmit_dsd_a4732, &dst->els_xmit_dsd_a4732); ISP_IOXPUT_16(isp, src->els_xmit_dsd_a6348, &dst->els_xmit_dsd_a6348); ISP_IOXPUT_32(isp, src->els_recv_dsd_length, &dst->els_recv_dsd_length); ISP_IOXPUT_16(isp, src->els_recv_dsd_a1500, &dst->els_recv_dsd_a1500); ISP_IOXPUT_16(isp, src->els_recv_dsd_a3116, &dst->els_recv_dsd_a3116); ISP_IOXPUT_16(isp, src->els_recv_dsd_a4732, &dst->els_recv_dsd_a4732); ISP_IOXPUT_16(isp, src->els_recv_dsd_a6348, &dst->els_recv_dsd_a6348); } /* * FC Structure Canonicalization */ void isp_get_fc_hdr(ispsoftc_t *isp, fc_hdr_t *src, fc_hdr_t *dst) { ISP_IOZGET_8(isp, &src->r_ctl, dst->r_ctl); ISP_IOZGET_8(isp, &src->d_id[0], dst->d_id[0]); ISP_IOZGET_8(isp, &src->d_id[1], dst->d_id[1]); ISP_IOZGET_8(isp, &src->d_id[2], dst->d_id[2]); ISP_IOZGET_8(isp, &src->cs_ctl, dst->cs_ctl); ISP_IOZGET_8(isp, &src->s_id[0], dst->s_id[0]); ISP_IOZGET_8(isp, &src->s_id[1], dst->s_id[1]); ISP_IOZGET_8(isp, &src->s_id[2], dst->s_id[2]); ISP_IOZGET_8(isp, &src->type, dst->type); ISP_IOZGET_8(isp, &src->f_ctl[0], dst->f_ctl[0]); ISP_IOZGET_8(isp, &src->f_ctl[1], dst->f_ctl[1]); ISP_IOZGET_8(isp, &src->f_ctl[2], dst->f_ctl[2]); ISP_IOZGET_8(isp, &src->seq_id, dst->seq_id); ISP_IOZGET_8(isp, &src->df_ctl, dst->df_ctl); ISP_IOZGET_16(isp, &src->seq_cnt, dst->seq_cnt); ISP_IOZGET_16(isp, &src->ox_id, dst->ox_id); ISP_IOZGET_16(isp, &src->rx_id, dst->rx_id); ISP_IOZGET_32(isp, &src->parameter, dst->parameter); } void isp_put_fc_hdr(ispsoftc_t *isp, fc_hdr_t *src, fc_hdr_t *dst) { ISP_IOZPUT_8(isp, src->r_ctl, &dst->r_ctl); ISP_IOZPUT_8(isp, src->d_id[0], &dst->d_id[0]); ISP_IOZPUT_8(isp, src->d_id[1], &dst->d_id[1]); ISP_IOZPUT_8(isp, src->d_id[2], &dst->d_id[2]); ISP_IOZPUT_8(isp, src->cs_ctl, &dst->cs_ctl); ISP_IOZPUT_8(isp, src->s_id[0], &dst->s_id[0]); ISP_IOZPUT_8(isp, src->s_id[1], &dst->s_id[1]); ISP_IOZPUT_8(isp, src->s_id[2], &dst->s_id[2]); ISP_IOZPUT_8(isp, src->type, &dst->type); ISP_IOZPUT_8(isp, src->f_ctl[0], &dst->f_ctl[0]); ISP_IOZPUT_8(isp, src->f_ctl[1], &dst->f_ctl[1]); ISP_IOZPUT_8(isp, src->f_ctl[2], &dst->f_ctl[2]); ISP_IOZPUT_8(isp, src->seq_id, &dst->seq_id); ISP_IOZPUT_8(isp, src->df_ctl, &dst->df_ctl); ISP_IOZPUT_16(isp, src->seq_cnt, &dst->seq_cnt); ISP_IOZPUT_16(isp, src->ox_id, &dst->ox_id); ISP_IOZPUT_16(isp, src->rx_id, &dst->rx_id); ISP_IOZPUT_32(isp, src->parameter, &dst->parameter); } void isp_get_fcp_cmnd_iu(ispsoftc_t *isp, fcp_cmnd_iu_t *src, fcp_cmnd_iu_t *dst) { int i; for (i = 0; i < 8; i++) { ISP_IOZGET_8(isp, &src->fcp_cmnd_lun[i], dst->fcp_cmnd_lun[i]); } ISP_IOZGET_8(isp, &src->fcp_cmnd_crn, dst->fcp_cmnd_crn); ISP_IOZGET_8(isp, &src->fcp_cmnd_task_attribute, dst->fcp_cmnd_task_attribute); ISP_IOZGET_8(isp, &src->fcp_cmnd_task_management, dst->fcp_cmnd_task_management); ISP_IOZGET_8(isp, &src->fcp_cmnd_alen_datadir, dst->fcp_cmnd_alen_datadir); for (i = 0; i < 16; i++) { ISP_IOZGET_8(isp, &src->cdb_dl.sf.fcp_cmnd_cdb[i], dst->cdb_dl.sf.fcp_cmnd_cdb[i]); } ISP_IOZGET_32(isp, &src->cdb_dl.sf.fcp_cmnd_dl, dst->cdb_dl.sf.fcp_cmnd_dl); } void isp_put_rft_id(ispsoftc_t *isp, rft_id_t *src, rft_id_t *dst) { int i; isp_put_ct_hdr(isp, &src->rftid_hdr, &dst->rftid_hdr); ISP_IOZPUT_8(isp, src->rftid_reserved, &dst->rftid_reserved); for (i = 0; i < 3; i++) { ISP_IOZPUT_8(isp, src->rftid_portid[i], &dst->rftid_portid[i]); } for (i = 0; i < 8; i++) { ISP_IOZPUT_32(isp, src->rftid_fc4types[i], &dst->rftid_fc4types[i]); } } void isp_get_ct_hdr(ispsoftc_t *isp, ct_hdr_t *src, ct_hdr_t *dst) { ISP_IOZGET_8(isp, &src->ct_revision, dst->ct_revision); ISP_IOZGET_8(isp, &src->ct_in_id[0], dst->ct_in_id[0]); ISP_IOZGET_8(isp, &src->ct_in_id[1], dst->ct_in_id[1]); ISP_IOZGET_8(isp, &src->ct_in_id[2], dst->ct_in_id[2]); ISP_IOZGET_8(isp, &src->ct_fcs_type, dst->ct_fcs_type); ISP_IOZGET_8(isp, &src->ct_fcs_subtype, dst->ct_fcs_subtype); ISP_IOZGET_8(isp, &src->ct_options, dst->ct_options); ISP_IOZGET_8(isp, &src->ct_reserved0, dst->ct_reserved0); ISP_IOZGET_16(isp, &src->ct_cmd_resp, dst->ct_cmd_resp); ISP_IOZGET_16(isp, &src->ct_bcnt_resid, dst->ct_bcnt_resid); ISP_IOZGET_8(isp, &src->ct_reserved1, dst->ct_reserved1); ISP_IOZGET_8(isp, &src->ct_reason, dst->ct_reason); ISP_IOZGET_8(isp, &src->ct_explanation, dst->ct_explanation); ISP_IOZGET_8(isp, &src->ct_vunique, dst->ct_vunique); } void isp_put_ct_hdr(ispsoftc_t *isp, ct_hdr_t *src, ct_hdr_t *dst) { ISP_IOZPUT_8(isp, src->ct_revision, &dst->ct_revision); ISP_IOZPUT_8(isp, src->ct_in_id[0], &dst->ct_in_id[0]); ISP_IOZPUT_8(isp, src->ct_in_id[1], &dst->ct_in_id[1]); ISP_IOZPUT_8(isp, src->ct_in_id[2], &dst->ct_in_id[2]); ISP_IOZPUT_8(isp, src->ct_fcs_type, &dst->ct_fcs_type); ISP_IOZPUT_8(isp, src->ct_fcs_subtype, &dst->ct_fcs_subtype); ISP_IOZPUT_8(isp, src->ct_options, &dst->ct_options); ISP_IOZPUT_8(isp, src->ct_reserved0, &dst->ct_reserved0); ISP_IOZPUT_16(isp, src->ct_cmd_resp, &dst->ct_cmd_resp); ISP_IOZPUT_16(isp, src->ct_bcnt_resid, &dst->ct_bcnt_resid); ISP_IOZPUT_8(isp, src->ct_reserved1, &dst->ct_reserved1); ISP_IOZPUT_8(isp, src->ct_reason, &dst->ct_reason); ISP_IOZPUT_8(isp, src->ct_explanation, &dst->ct_explanation); ISP_IOZPUT_8(isp, src->ct_vunique, &dst->ct_vunique); } void isp_put_fcp_rsp_iu(ispsoftc_t *isp, fcp_rsp_iu_t *src, fcp_rsp_iu_t *dst) { int i; for (i = 0; i < ((sizeof (src->fcp_rsp_reserved))/(sizeof (src->fcp_rsp_reserved[0]))); i++) { ISP_IOZPUT_8(isp, src->fcp_rsp_reserved[i], &dst->fcp_rsp_reserved[i]); } ISP_IOZPUT_16(isp, src->fcp_rsp_status_qualifier, &dst->fcp_rsp_status_qualifier); ISP_IOZPUT_8(isp, src->fcp_rsp_bits, &dst->fcp_rsp_bits); ISP_IOZPUT_8(isp, src->fcp_rsp_scsi_status, &dst->fcp_rsp_scsi_status); ISP_IOZPUT_32(isp, src->fcp_rsp_resid, &dst->fcp_rsp_resid); ISP_IOZPUT_32(isp, src->fcp_rsp_snslen, &dst->fcp_rsp_snslen); ISP_IOZPUT_32(isp, src->fcp_rsp_rsplen, &dst->fcp_rsp_rsplen); } #ifdef ISP_TARGET_MODE /* * Command shipping- finish off first queue entry and do dma mapping and * additional segments as needed. * * Called with the first queue entry mostly filled out. * Our job here is to finish that and add additional data * segments if needed. * * We used to do synthetic entries to split data and status * at this level, but that started getting too tricky. */ int isp_send_tgt_cmd(ispsoftc_t *isp, void *fqe, void *segp, uint32_t nsegs, uint32_t totalcnt, isp_ddir_t ddir, void *snsptr, uint32_t snslen) { uint8_t storage[QENTRY_LEN]; uint8_t type, nqe; uint32_t seg, curseg, seglim, nxt, nxtnxt; ispds_t *dsp = NULL; ispds64_t *dsp64 = NULL; void *qe0, *qe1; qe0 = isp_getrqentry(isp); if (qe0 == NULL) { return (CMD_EAGAIN); } nxt = ISP_NXT_QENTRY(isp->isp_reqidx, RQUEST_QUEUE_LEN(isp)); type = ((isphdr_t *)fqe)->rqs_entry_type; nqe = 1; seglim = 0; /* * If we have data to transmit, figure out how many segments can fit into the first entry. */ if (ddir != ISP_NOXFR) { /* * First, figure out how many pieces of data to transfer and what kind and how many we can put into the first queue entry. */ switch (type) { case RQSTYPE_CTIO: dsp = ((ct_entry_t *)fqe)->ct_dataseg; seglim = ISP_RQDSEG; break; case RQSTYPE_CTIO2: dsp = ((ct2_entry_t *)fqe)->rsp.m0.u.ct_dataseg; seglim = ISP_RQDSEG_T2; break; case RQSTYPE_CTIO3: dsp64 = ((ct2_entry_t *)fqe)->rsp.m0.u.ct_dataseg64; seglim = ISP_RQDSEG_T3; break; case RQSTYPE_CTIO7: dsp64 = &((ct7_entry_t *)fqe)->rsp.m0.ds; seglim = 1; break; default: return (CMD_COMPLETE); } } /* * First, fill out any of the data transfer stuff that fits * in the first queue entry. */ if (seglim > nsegs) { seglim = nsegs; } for (seg = curseg = 0; curseg < seglim; curseg++) { if (dsp64) { XS_GET_DMA64_SEG(dsp64++, segp, seg++); } else { XS_GET_DMA_SEG(dsp++, segp, seg++); } } /* * Second, start building additional continuation segments as needed. */ while (seg < nsegs) { nxtnxt = ISP_NXT_QENTRY(nxt, RQUEST_QUEUE_LEN(isp)); if (nxtnxt == isp->isp_reqodx) { isp->isp_reqodx = ISP_READ(isp, isp->isp_rqstoutrp); if (nxtnxt == isp->isp_reqodx) return (CMD_EAGAIN); } ISP_MEMZERO(storage, QENTRY_LEN); qe1 = ISP_QUEUE_ENTRY(isp->isp_rquest, nxt); nxt = nxtnxt; if (dsp64) { ispcontreq64_t *crq = (ispcontreq64_t *) storage; seglim = ISP_CDSEG64; crq->req_header.rqs_entry_type = RQSTYPE_A64_CONT; crq->req_header.rqs_entry_count = 1; dsp64 = crq->req_dataseg; } else { ispcontreq_t *crq = (ispcontreq_t *) storage; seglim = ISP_CDSEG; crq->req_header.rqs_entry_type = RQSTYPE_DATASEG; crq->req_header.rqs_entry_count = 1; dsp = crq->req_dataseg; } if (seg + seglim > nsegs) { seglim = nsegs - seg; } for (curseg = 0; curseg < seglim; curseg++) { if (dsp64) { XS_GET_DMA64_SEG(dsp64++, segp, seg++); } else { XS_GET_DMA_SEG(dsp++, segp, seg++); } } if (dsp64) { isp_put_cont64_req(isp, (ispcontreq64_t *)storage, qe1); } else { isp_put_cont_req(isp, (ispcontreq_t *)storage, qe1); } if (isp->isp_dblev & ISP_LOGTDEBUG1) { isp_print_bytes(isp, "additional queue entry", QENTRY_LEN, storage); } nqe++; } /* * Third, not patch up the first queue entry with the number of segments * we actually are going to be transmitting. At the same time, handle * any mode 2 requests. */ ((isphdr_t *)fqe)->rqs_entry_count = nqe; switch (type) { case RQSTYPE_CTIO: ((ct_entry_t *)fqe)->ct_seg_count = nsegs; isp_put_ctio(isp, fqe, qe0); break; case RQSTYPE_CTIO2: case RQSTYPE_CTIO3: if (((ct2_entry_t *)fqe)->ct_flags & CT2_FLAG_MODE2) { ((ct2_entry_t *)fqe)->ct_seg_count = 1; } else { ((ct2_entry_t *)fqe)->ct_seg_count = nsegs; } if (ISP_CAP_2KLOGIN(isp)) { isp_put_ctio2e(isp, fqe, qe0); } else { isp_put_ctio2(isp, fqe, qe0); } break; case RQSTYPE_CTIO7: if (((ct7_entry_t *)fqe)->ct_flags & CT7_FLAG_MODE2) { ((ct7_entry_t *)fqe)->ct_seg_count = 1; } else { ((ct7_entry_t *)fqe)->ct_seg_count = nsegs; } isp_put_ctio7(isp, fqe, qe0); break; default: return (CMD_COMPLETE); } if (isp->isp_dblev & ISP_LOGTDEBUG1) { isp_print_bytes(isp, "first queue entry", QENTRY_LEN, fqe); } ISP_ADD_REQUEST(isp, nxt); return (CMD_QUEUED); } int isp_allocate_xs_tgt(ispsoftc_t *isp, void *xs, uint32_t *handlep) { isp_hdl_t *hdp; hdp = isp->isp_tgtfree; if (hdp == NULL) { return (-1); } isp->isp_tgtfree = hdp->cmd; hdp->cmd = xs; hdp->handle = (hdp - isp->isp_tgtlist); hdp->handle |= (ISP_HANDLE_TARGET << ISP_HANDLE_USAGE_SHIFT); /* * Target handles for SCSI cards are only 16 bits, so * sequence number protection will be ommitted. */ if (IS_FC(isp)) { hdp->handle |= (isp->isp_seqno++ << ISP_HANDLE_SEQ_SHIFT); } *handlep = hdp->handle; return (0); } void * isp_find_xs_tgt(ispsoftc_t *isp, uint32_t handle) { if (!ISP_VALID_TGT_HANDLE(isp, handle)) { isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle); return (NULL); } return (isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)].cmd); } uint32_t isp_find_tgt_handle(ispsoftc_t *isp, void *xs) { uint32_t i, foundhdl = ISP_HANDLE_FREE; if (xs != NULL) { for (i = 0; i < isp->isp_maxcmds; i++) { if (isp->isp_tgtlist[i].cmd != xs) { continue; } foundhdl = isp->isp_tgtlist[i].handle; break; } } return (foundhdl); } void isp_destroy_tgt_handle(ispsoftc_t *isp, uint32_t handle) { if (!ISP_VALID_TGT_HANDLE(isp, handle)) { isp_prt(isp, ISP_LOGERR, "%s: bad handle 0x%x", __func__, handle); } else { isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)].handle = ISP_HANDLE_FREE; isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)].cmd = isp->isp_tgtfree; isp->isp_tgtfree = &isp->isp_tgtlist[(handle & ISP_HANDLE_CMD_MASK)]; } } #endif /* * Find port database entries */ int isp_find_pdb_by_wwn(ispsoftc_t *isp, int chan, uint64_t wwn, fcportdb_t **lptr) { fcparam *fcp; int i; if (chan >= isp->isp_nchan) return (0); fcp = FCPARAM(isp, chan); for (i = 0; i < MAX_FC_TARG; i++) { fcportdb_t *lp = &fcp->portdb[i]; if (lp->state == FC_PORTDB_STATE_NIL) continue; if (lp->port_wwn == wwn) { *lptr = lp; return (1); } } return (0); } #ifdef ISP_TARGET_MODE int isp_find_pdb_by_handle(ispsoftc_t *isp, int chan, uint32_t handle, fcportdb_t **lptr) { fcparam *fcp; int i; if (chan >= isp->isp_nchan) return (0); fcp = FCPARAM(isp, chan); for (i = 0; i < MAX_FC_TARG; i++) { fcportdb_t *lp = &fcp->portdb[i]; if (lp->state == FC_PORTDB_STATE_NIL) continue; if (lp->handle == handle) { *lptr = lp; return (1); } } return (0); } int isp_find_pdb_by_sid(ispsoftc_t *isp, int chan, uint32_t sid, fcportdb_t **lptr) { fcparam *fcp; int i; if (chan >= isp->isp_nchan) return (0); fcp = FCPARAM(isp, chan); for (i = 0; i < MAX_FC_TARG; i++) { fcportdb_t *lp = &fcp->portdb[i]; if (lp->state == FC_PORTDB_STATE_NIL) continue; if (lp->portid == sid) { *lptr = lp; return (1); } } return (0); } void isp_find_chan_by_did(ispsoftc_t *isp, uint32_t did, uint16_t *cp) { uint16_t chan; *cp = ISP_NOCHAN; for (chan = 0; chan < isp->isp_nchan; chan++) { fcparam *fcp = FCPARAM(isp, chan); if ((fcp->role & ISP_ROLE_TARGET) == 0 || fcp->isp_fwstate != FW_READY || fcp->isp_loopstate < LOOP_PDB_RCVD) { continue; } if (fcp->isp_portid == did) { *cp = chan; break; } } } /* * Add an initiator device to the port database */ void isp_add_wwn_entry(ispsoftc_t *isp, int chan, uint64_t wwpn, uint64_t wwnn, uint16_t nphdl, uint32_t s_id, uint16_t prli_params) { char buf[64]; fcparam *fcp; fcportdb_t *lp; int i, change; fcp = FCPARAM(isp, chan); if (nphdl >= MAX_NPORT_HANDLE) { isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN, "Chan %d WWPN 0x%016llx " "PortID 0x%06x handle 0x%x -- bad handle", chan, (unsigned long long) wwpn, s_id, nphdl); return; } /* * If valid record for requested handle already exists, update it * with new parameters. Some cases of update can be suspicious, * so log them verbosely and dump the whole port database. */ if ((VALID_INI(wwpn) && isp_find_pdb_by_wwn(isp, chan, wwpn, &lp)) || (s_id != PORT_NONE && isp_find_pdb_by_sid(isp, chan, s_id, &lp))) { change = 0; lp->new_portid = lp->portid; lp->new_prli_word3 = lp->prli_word3; if (s_id != PORT_NONE && lp->portid != s_id) { if (lp->portid == PORT_NONE) { isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx handle 0x%x " "gets PortID 0x%06x", chan, (unsigned long long) lp->port_wwn, nphdl, s_id); } else { isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN, "Chan %d WWPN 0x%016llx handle 0x%x " "changes PortID 0x%06x to 0x%06x", chan, (unsigned long long) lp->port_wwn, nphdl, lp->portid, s_id); if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN)) isp_dump_portdb(isp, chan); } lp->new_portid = s_id; change++; } if (VALID_INI(wwpn) && lp->port_wwn != wwpn) { if (!VALID_INI(lp->port_wwn)) { isp_prt(isp, ISP_LOGTINFO, "Chan %d PortID 0x%06x handle 0x%x " "gets WWPN 0x%016llxx", chan, lp->portid, nphdl, (unsigned long long) wwpn); } else if (lp->port_wwn != wwpn) { isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN, "Chan %d PortID 0x%06x handle 0x%x " "changes WWPN 0x%016llx to 0x%016llx", chan, lp->portid, nphdl, (unsigned long long) lp->port_wwn, (unsigned long long) wwpn); if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN)) isp_dump_portdb(isp, chan); } lp->port_wwn = wwpn; change++; } if (VALID_INI(wwnn) && lp->node_wwn != wwnn) { if (!VALID_INI(lp->node_wwn)) { isp_prt(isp, ISP_LOGTINFO, "Chan %d PortID 0x%06x handle 0x%x " "gets WWNN 0x%016llxx", chan, lp->portid, nphdl, (unsigned long long) wwnn); } else if (lp->port_wwn != wwnn) { isp_prt(isp, ISP_LOGTINFO, "Chan %d PortID 0x%06x handle 0x%x " "changes WWNN 0x%016llx to 0x%016llx", chan, lp->portid, nphdl, (unsigned long long) lp->node_wwn, (unsigned long long) wwnn); } lp->node_wwn = wwnn; change++; } if (prli_params != 0 && lp->prli_word3 != prli_params) { isp_gen_role_str(buf, sizeof (buf), prli_params); isp_prt(isp, ISP_LOGTINFO|ISP_LOGCONFIG, "Chan %d WWPN 0x%016llx PortID 0x%06x " "handle 0x%x changes PRLI Word 3 %s", chan, (unsigned long long) lp->port_wwn, lp->portid, lp->handle, buf); lp->new_prli_word3 = prli_params; change++; } if (lp->handle != nphdl) { isp_prt(isp, ISP_LOGTINFO|ISP_LOGCONFIG, "Chan %d WWPN 0x%016llx PortID 0x%06x " "changes handle 0x%x to 0x%x", chan, (unsigned long long) lp->port_wwn, lp->portid, lp->handle, nphdl); lp->handle = nphdl; change++; } lp->state = FC_PORTDB_STATE_VALID; if (change) { isp_async(isp, ISPASYNC_DEV_CHANGED, chan, lp); lp->portid = lp->new_portid; lp->prli_word3 = lp->new_prli_word3; lp->new_prli_word3 = 0; lp->new_portid = 0; } else { isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx PortID 0x%06x " "handle 0x%x reentered", chan, (unsigned long long) lp->port_wwn, lp->portid, lp->handle); isp_async(isp, ISPASYNC_DEV_STAYED, chan, lp); } return; } /* Search for room to insert new record. */ for (i = 0; i < MAX_FC_TARG; i++) { if (fcp->portdb[i].state == FC_PORTDB_STATE_NIL) break; } if (i >= MAX_FC_TARG) { isp_prt(isp, ISP_LOGTINFO|ISP_LOGWARN, "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x " "-- no room in port database", chan, (unsigned long long) wwpn, s_id, nphdl); if (isp->isp_dblev & (ISP_LOGTINFO|ISP_LOGWARN)) isp_dump_portdb(isp, chan); return; } /* Insert new record and mark it valid. */ lp = &fcp->portdb[i]; ISP_MEMZERO(lp, sizeof (fcportdb_t)); lp->handle = nphdl; lp->portid = s_id; lp->port_wwn = wwpn; lp->node_wwn = wwnn; lp->prli_word3 = (prli_params != 0) ? prli_params : PRLI_WD3_INITIATOR_FUNCTION; lp->state = FC_PORTDB_STATE_VALID; isp_gen_role_str(buf, sizeof (buf), lp->prli_word3); isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx " "PortID 0x%06x handle 0x%x vtgt %d %s added", chan, (unsigned long long) wwpn, s_id, nphdl, i, buf); /* Notify above levels about new port arrival. */ isp_async(isp, ISPASYNC_DEV_ARRIVED, chan, lp); } /* * Remove a target device to the port database */ void isp_del_wwn_entry(ispsoftc_t *isp, int chan, uint64_t wwpn, uint16_t nphdl, uint32_t s_id) { fcparam *fcp; fcportdb_t *lp; if (nphdl >= MAX_NPORT_HANDLE) { isp_prt(isp, ISP_LOGWARN, "Chan %d WWPN 0x%016llx PortID 0x%06x bad handle 0x%x", chan, (unsigned long long) wwpn, s_id, nphdl); return; } fcp = FCPARAM(isp, chan); if (isp_find_pdb_by_handle(isp, chan, nphdl, &lp) == 0) { isp_prt(isp, ISP_LOGWARN, "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x cannot be found to be deleted", chan, (unsigned long long) wwpn, s_id, nphdl); isp_dump_portdb(isp, chan); return; } isp_prt(isp, ISP_LOGTINFO, "Chan %d WWPN 0x%016llx PortID 0x%06x handle 0x%x vtgt %d deleted", chan, (unsigned long long) lp->port_wwn, lp->portid, nphdl, FC_PORTDB_TGT(isp, chan, lp)); lp->state = FC_PORTDB_STATE_NIL; /* Notify above levels about gone port. */ isp_async(isp, ISPASYNC_DEV_GONE, chan, lp); } void isp_del_all_wwn_entries(ispsoftc_t *isp, int chan) { fcparam *fcp; int i; if (!IS_FC(isp)) { return; } /* * Handle iterations over all channels via recursion */ if (chan == ISP_NOCHAN) { for (chan = 0; chan < isp->isp_nchan; chan++) { isp_del_all_wwn_entries(isp, chan); } return; } if (chan > isp->isp_nchan) { return; } fcp = FCPARAM(isp, chan); if (fcp == NULL) { return; } for (i = 0; i < MAX_FC_TARG; i++) { fcportdb_t *lp = &fcp->portdb[i]; if (lp->state != FC_PORTDB_STATE_NIL) isp_del_wwn_entry(isp, chan, lp->port_wwn, lp->handle, lp->portid); } } void isp_del_wwn_entries(ispsoftc_t *isp, isp_notify_t *mp) { fcportdb_t *lp; /* * Handle iterations over all channels via recursion */ if (mp->nt_channel == ISP_NOCHAN) { for (mp->nt_channel = 0; mp->nt_channel < isp->isp_nchan; mp->nt_channel++) { isp_del_wwn_entries(isp, mp); } mp->nt_channel = ISP_NOCHAN; return; } /* * We have an entry which is only partially identified. * * It's only known by WWN, N-Port handle, or Port ID. * We need to find the actual entry so we can delete it. */ if (mp->nt_nphdl != NIL_HANDLE) { if (isp_find_pdb_by_handle(isp, mp->nt_channel, mp->nt_nphdl, &lp)) { isp_del_wwn_entry(isp, mp->nt_channel, lp->port_wwn, lp->handle, lp->portid); return; } } if (mp->nt_wwn != INI_ANY) { if (isp_find_pdb_by_wwn(isp, mp->nt_channel, mp->nt_wwn, &lp)) { isp_del_wwn_entry(isp, mp->nt_channel, lp->port_wwn, lp->handle, lp->portid); return; } } if (mp->nt_sid != PORT_ANY && mp->nt_sid != PORT_NONE) { if (isp_find_pdb_by_sid(isp, mp->nt_channel, mp->nt_sid, &lp)) { isp_del_wwn_entry(isp, mp->nt_channel, lp->port_wwn, lp->handle, lp->portid); return; } } isp_prt(isp, ISP_LOGWARN, "Chan %d unable to find entry to delete WWPN 0x%016jx PortID 0x%06x handle 0x%x", mp->nt_channel, mp->nt_wwn, mp->nt_sid, mp->nt_nphdl); } void isp_put_atio(ispsoftc_t *isp, at_entry_t *src, at_entry_t *dst) { int i; isp_put_hdr(isp, &src->at_header, &dst->at_header); ISP_IOXPUT_16(isp, src->at_reserved, &dst->at_reserved); ISP_IOXPUT_16(isp, src->at_handle, &dst->at_handle); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->at_lun, &dst->at_iid); ISP_IOXPUT_8(isp, src->at_iid, &dst->at_lun); ISP_IOXPUT_8(isp, src->at_cdblen, &dst->at_tgt); ISP_IOXPUT_8(isp, src->at_tgt, &dst->at_cdblen); ISP_IOXPUT_8(isp, src->at_status, &dst->at_scsi_status); ISP_IOXPUT_8(isp, src->at_scsi_status, &dst->at_status); ISP_IOXPUT_8(isp, src->at_tag_val, &dst->at_tag_type); ISP_IOXPUT_8(isp, src->at_tag_type, &dst->at_tag_val); } else { ISP_IOXPUT_8(isp, src->at_lun, &dst->at_lun); ISP_IOXPUT_8(isp, src->at_iid, &dst->at_iid); ISP_IOXPUT_8(isp, src->at_cdblen, &dst->at_cdblen); ISP_IOXPUT_8(isp, src->at_tgt, &dst->at_tgt); ISP_IOXPUT_8(isp, src->at_status, &dst->at_status); ISP_IOXPUT_8(isp, src->at_scsi_status, &dst->at_scsi_status); ISP_IOXPUT_8(isp, src->at_tag_val, &dst->at_tag_val); ISP_IOXPUT_8(isp, src->at_tag_type, &dst->at_tag_type); } ISP_IOXPUT_32(isp, src->at_flags, &dst->at_flags); for (i = 0; i < ATIO_CDBLEN; i++) { ISP_IOXPUT_8(isp, src->at_cdb[i], &dst->at_cdb[i]); } for (i = 0; i < QLTM_SENSELEN; i++) { ISP_IOXPUT_8(isp, src->at_sense[i], &dst->at_sense[i]); } } void isp_get_atio(ispsoftc_t *isp, at_entry_t *src, at_entry_t *dst) { int i; isp_get_hdr(isp, &src->at_header, &dst->at_header); ISP_IOXGET_16(isp, &src->at_reserved, dst->at_reserved); ISP_IOXGET_16(isp, &src->at_handle, dst->at_handle); if (ISP_IS_SBUS(isp)) { ISP_IOXGET_8(isp, &src->at_lun, dst->at_iid); ISP_IOXGET_8(isp, &src->at_iid, dst->at_lun); ISP_IOXGET_8(isp, &src->at_cdblen, dst->at_tgt); ISP_IOXGET_8(isp, &src->at_tgt, dst->at_cdblen); ISP_IOXGET_8(isp, &src->at_status, dst->at_scsi_status); ISP_IOXGET_8(isp, &src->at_scsi_status, dst->at_status); ISP_IOXGET_8(isp, &src->at_tag_val, dst->at_tag_type); ISP_IOXGET_8(isp, &src->at_tag_type, dst->at_tag_val); } else { ISP_IOXGET_8(isp, &src->at_lun, dst->at_lun); ISP_IOXGET_8(isp, &src->at_iid, dst->at_iid); ISP_IOXGET_8(isp, &src->at_cdblen, dst->at_cdblen); ISP_IOXGET_8(isp, &src->at_tgt, dst->at_tgt); ISP_IOXGET_8(isp, &src->at_status, dst->at_status); ISP_IOXGET_8(isp, &src->at_scsi_status, dst->at_scsi_status); ISP_IOXGET_8(isp, &src->at_tag_val, dst->at_tag_val); ISP_IOXGET_8(isp, &src->at_tag_type, dst->at_tag_type); } ISP_IOXGET_32(isp, &src->at_flags, dst->at_flags); for (i = 0; i < ATIO_CDBLEN; i++) { ISP_IOXGET_8(isp, &src->at_cdb[i], dst->at_cdb[i]); } for (i = 0; i < QLTM_SENSELEN; i++) { ISP_IOXGET_8(isp, &src->at_sense[i], dst->at_sense[i]); } } void isp_put_atio2(ispsoftc_t *isp, at2_entry_t *src, at2_entry_t *dst) { int i; isp_put_hdr(isp, &src->at_header, &dst->at_header); ISP_IOXPUT_32(isp, src->at_reserved, &dst->at_reserved); ISP_IOXPUT_8(isp, src->at_lun, &dst->at_lun); ISP_IOXPUT_8(isp, src->at_iid, &dst->at_iid); ISP_IOXPUT_16(isp, src->at_rxid, &dst->at_rxid); ISP_IOXPUT_16(isp, src->at_flags, &dst->at_flags); ISP_IOXPUT_16(isp, src->at_status, &dst->at_status); ISP_IOXPUT_8(isp, src->at_crn, &dst->at_crn); ISP_IOXPUT_8(isp, src->at_taskcodes, &dst->at_taskcodes); ISP_IOXPUT_8(isp, src->at_taskflags, &dst->at_taskflags); ISP_IOXPUT_8(isp, src->at_execodes, &dst->at_execodes); for (i = 0; i < ATIO2_CDBLEN; i++) { ISP_IOXPUT_8(isp, src->at_cdb[i], &dst->at_cdb[i]); } ISP_IOXPUT_32(isp, src->at_datalen, &dst->at_datalen); ISP_IOXPUT_16(isp, src->at_scclun, &dst->at_scclun); for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->at_wwpn[i], &dst->at_wwpn[i]); } for (i = 0; i < 6; i++) { ISP_IOXPUT_16(isp, src->at_reserved2[i], &dst->at_reserved2[i]); } ISP_IOXPUT_16(isp, src->at_oxid, &dst->at_oxid); } void isp_put_atio2e(ispsoftc_t *isp, at2e_entry_t *src, at2e_entry_t *dst) { int i; isp_put_hdr(isp, &src->at_header, &dst->at_header); ISP_IOXPUT_32(isp, src->at_reserved, &dst->at_reserved); ISP_IOXPUT_16(isp, src->at_iid, &dst->at_iid); ISP_IOXPUT_16(isp, src->at_rxid, &dst->at_rxid); ISP_IOXPUT_16(isp, src->at_flags, &dst->at_flags); ISP_IOXPUT_16(isp, src->at_status, &dst->at_status); ISP_IOXPUT_8(isp, src->at_crn, &dst->at_crn); ISP_IOXPUT_8(isp, src->at_taskcodes, &dst->at_taskcodes); ISP_IOXPUT_8(isp, src->at_taskflags, &dst->at_taskflags); ISP_IOXPUT_8(isp, src->at_execodes, &dst->at_execodes); for (i = 0; i < ATIO2_CDBLEN; i++) { ISP_IOXPUT_8(isp, src->at_cdb[i], &dst->at_cdb[i]); } ISP_IOXPUT_32(isp, src->at_datalen, &dst->at_datalen); ISP_IOXPUT_16(isp, src->at_scclun, &dst->at_scclun); for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->at_wwpn[i], &dst->at_wwpn[i]); } for (i = 0; i < 6; i++) { ISP_IOXPUT_16(isp, src->at_reserved2[i], &dst->at_reserved2[i]); } ISP_IOXPUT_16(isp, src->at_oxid, &dst->at_oxid); } void isp_get_atio2(ispsoftc_t *isp, at2_entry_t *src, at2_entry_t *dst) { int i; isp_get_hdr(isp, &src->at_header, &dst->at_header); ISP_IOXGET_32(isp, &src->at_reserved, dst->at_reserved); ISP_IOXGET_8(isp, &src->at_lun, dst->at_lun); ISP_IOXGET_8(isp, &src->at_iid, dst->at_iid); ISP_IOXGET_16(isp, &src->at_rxid, dst->at_rxid); ISP_IOXGET_16(isp, &src->at_flags, dst->at_flags); ISP_IOXGET_16(isp, &src->at_status, dst->at_status); ISP_IOXGET_8(isp, &src->at_crn, dst->at_crn); ISP_IOXGET_8(isp, &src->at_taskcodes, dst->at_taskcodes); ISP_IOXGET_8(isp, &src->at_taskflags, dst->at_taskflags); ISP_IOXGET_8(isp, &src->at_execodes, dst->at_execodes); for (i = 0; i < ATIO2_CDBLEN; i++) { ISP_IOXGET_8(isp, &src->at_cdb[i], dst->at_cdb[i]); } ISP_IOXGET_32(isp, &src->at_datalen, dst->at_datalen); ISP_IOXGET_16(isp, &src->at_scclun, dst->at_scclun); for (i = 0; i < 4; i++) { ISP_IOXGET_16(isp, &src->at_wwpn[i], dst->at_wwpn[i]); } for (i = 0; i < 6; i++) { ISP_IOXGET_16(isp, &src->at_reserved2[i], dst->at_reserved2[i]); } ISP_IOXGET_16(isp, &src->at_oxid, dst->at_oxid); } void isp_get_atio2e(ispsoftc_t *isp, at2e_entry_t *src, at2e_entry_t *dst) { int i; isp_get_hdr(isp, &src->at_header, &dst->at_header); ISP_IOXGET_32(isp, &src->at_reserved, dst->at_reserved); ISP_IOXGET_16(isp, &src->at_iid, dst->at_iid); ISP_IOXGET_16(isp, &src->at_rxid, dst->at_rxid); ISP_IOXGET_16(isp, &src->at_flags, dst->at_flags); ISP_IOXGET_16(isp, &src->at_status, dst->at_status); ISP_IOXGET_8(isp, &src->at_crn, dst->at_crn); ISP_IOXGET_8(isp, &src->at_taskcodes, dst->at_taskcodes); ISP_IOXGET_8(isp, &src->at_taskflags, dst->at_taskflags); ISP_IOXGET_8(isp, &src->at_execodes, dst->at_execodes); for (i = 0; i < ATIO2_CDBLEN; i++) { ISP_IOXGET_8(isp, &src->at_cdb[i], dst->at_cdb[i]); } ISP_IOXGET_32(isp, &src->at_datalen, dst->at_datalen); ISP_IOXGET_16(isp, &src->at_scclun, dst->at_scclun); for (i = 0; i < 4; i++) { ISP_IOXGET_16(isp, &src->at_wwpn[i], dst->at_wwpn[i]); } for (i = 0; i < 6; i++) { ISP_IOXGET_16(isp, &src->at_reserved2[i], dst->at_reserved2[i]); } ISP_IOXGET_16(isp, &src->at_oxid, dst->at_oxid); } void isp_get_atio7(ispsoftc_t *isp, at7_entry_t *src, at7_entry_t *dst) { ISP_IOXGET_8(isp, &src->at_type, dst->at_type); ISP_IOXGET_8(isp, &src->at_count, dst->at_count); ISP_IOXGET_16(isp, &src->at_ta_len, dst->at_ta_len); ISP_IOXGET_32(isp, &src->at_rxid, dst->at_rxid); isp_get_fc_hdr(isp, &src->at_hdr, &dst->at_hdr); isp_get_fcp_cmnd_iu(isp, &src->at_cmnd, &dst->at_cmnd); } void isp_put_ctio(ispsoftc_t *isp, ct_entry_t *src, ct_entry_t *dst) { int i; isp_put_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXPUT_16(isp, src->ct_syshandle, &dst->ct_syshandle); ISP_IOXPUT_16(isp, src->ct_fwhandle, &dst->ct_fwhandle); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->ct_iid, &dst->ct_lun); ISP_IOXPUT_8(isp, src->ct_lun, &dst->ct_iid); ISP_IOXPUT_8(isp, src->ct_tgt, &dst->ct_reserved2); ISP_IOXPUT_8(isp, src->ct_reserved2, &dst->ct_tgt); ISP_IOXPUT_8(isp, src->ct_status, &dst->ct_scsi_status); ISP_IOXPUT_8(isp, src->ct_scsi_status, &dst->ct_status); ISP_IOXPUT_8(isp, src->ct_tag_type, &dst->ct_tag_val); ISP_IOXPUT_8(isp, src->ct_tag_val, &dst->ct_tag_type); } else { ISP_IOXPUT_8(isp, src->ct_iid, &dst->ct_iid); ISP_IOXPUT_8(isp, src->ct_lun, &dst->ct_lun); ISP_IOXPUT_8(isp, src->ct_tgt, &dst->ct_tgt); ISP_IOXPUT_8(isp, src->ct_reserved2, &dst->ct_reserved2); ISP_IOXPUT_8(isp, src->ct_scsi_status, &dst->ct_scsi_status); ISP_IOXPUT_8(isp, src->ct_status, &dst->ct_status); ISP_IOXPUT_8(isp, src->ct_tag_type, &dst->ct_tag_type); ISP_IOXPUT_8(isp, src->ct_tag_val, &dst->ct_tag_val); } ISP_IOXPUT_32(isp, src->ct_flags, &dst->ct_flags); ISP_IOXPUT_32(isp, src->ct_xfrlen, &dst->ct_xfrlen); ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid); ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout); ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count); for (i = 0; i < ISP_RQDSEG; i++) { ISP_IOXPUT_32(isp, src->ct_dataseg[i].ds_base, &dst->ct_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->ct_dataseg[i].ds_count, &dst->ct_dataseg[i].ds_count); } } void isp_get_ctio(ispsoftc_t *isp, ct_entry_t *src, ct_entry_t *dst) { int i; isp_get_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXGET_16(isp, &src->ct_syshandle, dst->ct_syshandle); ISP_IOXGET_16(isp, &src->ct_fwhandle, dst->ct_fwhandle); if (ISP_IS_SBUS(isp)) { ISP_IOXGET_8(isp, &src->ct_lun, dst->ct_iid); ISP_IOXGET_8(isp, &src->ct_iid, dst->ct_lun); ISP_IOXGET_8(isp, &src->ct_reserved2, dst->ct_tgt); ISP_IOXGET_8(isp, &src->ct_tgt, dst->ct_reserved2); ISP_IOXGET_8(isp, &src->ct_status, dst->ct_scsi_status); ISP_IOXGET_8(isp, &src->ct_scsi_status, dst->ct_status); ISP_IOXGET_8(isp, &src->ct_tag_val, dst->ct_tag_type); ISP_IOXGET_8(isp, &src->ct_tag_type, dst->ct_tag_val); } else { ISP_IOXGET_8(isp, &src->ct_lun, dst->ct_lun); ISP_IOXGET_8(isp, &src->ct_iid, dst->ct_iid); ISP_IOXGET_8(isp, &src->ct_reserved2, dst->ct_reserved2); ISP_IOXGET_8(isp, &src->ct_tgt, dst->ct_tgt); ISP_IOXGET_8(isp, &src->ct_status, dst->ct_status); ISP_IOXGET_8(isp, &src->ct_scsi_status, dst->ct_scsi_status); ISP_IOXGET_8(isp, &src->ct_tag_val, dst->ct_tag_val); ISP_IOXGET_8(isp, &src->ct_tag_type, dst->ct_tag_type); } ISP_IOXGET_32(isp, &src->ct_flags, dst->ct_flags); ISP_IOXGET_32(isp, &src->ct_xfrlen, dst->ct_xfrlen); ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid); ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout); ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count); for (i = 0; i < ISP_RQDSEG; i++) { ISP_IOXGET_32(isp, &src->ct_dataseg[i].ds_base, dst->ct_dataseg[i].ds_base); ISP_IOXGET_32(isp, &src->ct_dataseg[i].ds_count, dst->ct_dataseg[i].ds_count); } } void isp_put_ctio2(ispsoftc_t *isp, ct2_entry_t *src, ct2_entry_t *dst) { int i; isp_put_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXPUT_32(isp, src->ct_syshandle, &dst->ct_syshandle); ISP_IOXPUT_8(isp, src->ct_lun, &dst->ct_lun); ISP_IOXPUT_8(isp, src->ct_iid, &dst->ct_iid); ISP_IOXPUT_16(isp, src->ct_rxid, &dst->ct_rxid); ISP_IOXPUT_16(isp, src->ct_flags, &dst->ct_flags); ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout); ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count); ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid); ISP_IOXPUT_32(isp, src->ct_reloff, &dst->ct_reloff); if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) { ISP_IOXPUT_32(isp, src->rsp.m0._reserved, &dst->rsp.m0._reserved); ISP_IOXPUT_16(isp, src->rsp.m0._reserved2, &dst->rsp.m0._reserved2); ISP_IOXPUT_16(isp, src->rsp.m0.ct_scsi_status, &dst->rsp.m0.ct_scsi_status); ISP_IOXPUT_32(isp, src->rsp.m0.ct_xfrlen, &dst->rsp.m0.ct_xfrlen); if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { for (i = 0; i < ISP_RQDSEG_T2; i++) { ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_base, &dst->rsp.m0.u.ct_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_count, &dst->rsp.m0.u.ct_dataseg[i].ds_count); } } else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO3) { for (i = 0; i < ISP_RQDSEG_T3; i++) { ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_base, &dst->rsp.m0.u.ct_dataseg64[i].ds_base); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_basehi, &dst->rsp.m0.u.ct_dataseg64[i].ds_basehi); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_count, &dst->rsp.m0.u.ct_dataseg64[i].ds_count); } } else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO4) { ISP_IOXPUT_16(isp, src->rsp.m0.u.ct_dslist.ds_type, &dst->rsp.m0.u.ct_dslist.ds_type); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_segment, &dst->rsp.m0.u.ct_dslist.ds_segment); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_base, &dst->rsp.m0.u.ct_dslist.ds_base); } } else if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) { ISP_IOXPUT_16(isp, src->rsp.m1._reserved, &dst->rsp.m1._reserved); ISP_IOXPUT_16(isp, src->rsp.m1._reserved2, &dst->rsp.m1._reserved2); ISP_IOXPUT_16(isp, src->rsp.m1.ct_senselen, &dst->rsp.m1.ct_senselen); ISP_IOXPUT_16(isp, src->rsp.m1.ct_scsi_status, &dst->rsp.m1.ct_scsi_status); ISP_IOXPUT_16(isp, src->rsp.m1.ct_resplen, &dst->rsp.m1.ct_resplen); for (i = 0; i < MAXRESPLEN; i++) { ISP_IOXPUT_8(isp, src->rsp.m1.ct_resp[i], &dst->rsp.m1.ct_resp[i]); } } else { ISP_IOXPUT_32(isp, src->rsp.m2._reserved, &dst->rsp.m2._reserved); ISP_IOXPUT_16(isp, src->rsp.m2._reserved2, &dst->rsp.m2._reserved2); ISP_IOXPUT_16(isp, src->rsp.m2._reserved3, &dst->rsp.m2._reserved3); ISP_IOXPUT_32(isp, src->rsp.m2.ct_datalen, &dst->rsp.m2.ct_datalen); if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base); ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count); } else { ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base); ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi); ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count); } } } void isp_put_ctio2e(ispsoftc_t *isp, ct2e_entry_t *src, ct2e_entry_t *dst) { int i; isp_put_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXPUT_32(isp, src->ct_syshandle, &dst->ct_syshandle); ISP_IOXPUT_16(isp, src->ct_iid, &dst->ct_iid); ISP_IOXPUT_16(isp, src->ct_rxid, &dst->ct_rxid); ISP_IOXPUT_16(isp, src->ct_flags, &dst->ct_flags); ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout); ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count); ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid); ISP_IOXPUT_32(isp, src->ct_reloff, &dst->ct_reloff); if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) { ISP_IOXPUT_32(isp, src->rsp.m0._reserved, &dst->rsp.m0._reserved); ISP_IOXPUT_16(isp, src->rsp.m0._reserved2, &dst->rsp.m0._reserved2); ISP_IOXPUT_16(isp, src->rsp.m0.ct_scsi_status, &dst->rsp.m0.ct_scsi_status); ISP_IOXPUT_32(isp, src->rsp.m0.ct_xfrlen, &dst->rsp.m0.ct_xfrlen); if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { for (i = 0; i < ISP_RQDSEG_T2; i++) { ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_base, &dst->rsp.m0.u.ct_dataseg[i].ds_base); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg[i].ds_count, &dst->rsp.m0.u.ct_dataseg[i].ds_count); } } else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO3) { for (i = 0; i < ISP_RQDSEG_T3; i++) { ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_base, &dst->rsp.m0.u.ct_dataseg64[i].ds_base); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_basehi, &dst->rsp.m0.u.ct_dataseg64[i].ds_basehi); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dataseg64[i].ds_count, &dst->rsp.m0.u.ct_dataseg64[i].ds_count); } } else if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO4) { ISP_IOXPUT_16(isp, src->rsp.m0.u.ct_dslist.ds_type, &dst->rsp.m0.u.ct_dslist.ds_type); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_segment, &dst->rsp.m0.u.ct_dslist.ds_segment); ISP_IOXPUT_32(isp, src->rsp.m0.u.ct_dslist.ds_base, &dst->rsp.m0.u.ct_dslist.ds_base); } } else if ((src->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) { ISP_IOXPUT_16(isp, src->rsp.m1._reserved, &dst->rsp.m1._reserved); ISP_IOXPUT_16(isp, src->rsp.m1._reserved2, &dst->rsp.m1._reserved2); ISP_IOXPUT_16(isp, src->rsp.m1.ct_senselen, &dst->rsp.m1.ct_senselen); ISP_IOXPUT_16(isp, src->rsp.m1.ct_scsi_status, &dst->rsp.m1.ct_scsi_status); ISP_IOXPUT_16(isp, src->rsp.m1.ct_resplen, &dst->rsp.m1.ct_resplen); for (i = 0; i < MAXRESPLEN; i++) { ISP_IOXPUT_8(isp, src->rsp.m1.ct_resp[i], &dst->rsp.m1.ct_resp[i]); } } else { ISP_IOXPUT_32(isp, src->rsp.m2._reserved, &dst->rsp.m2._reserved); ISP_IOXPUT_16(isp, src->rsp.m2._reserved2, &dst->rsp.m2._reserved2); ISP_IOXPUT_16(isp, src->rsp.m2._reserved3, &dst->rsp.m2._reserved3); ISP_IOXPUT_32(isp, src->rsp.m2.ct_datalen, &dst->rsp.m2.ct_datalen); if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base); ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count); } else { ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base); ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi); ISP_IOXPUT_32(isp, src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, &dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count); } } } void isp_put_ctio7(ispsoftc_t *isp, ct7_entry_t *src, ct7_entry_t *dst) { int i; isp_put_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXPUT_32(isp, src->ct_syshandle, &dst->ct_syshandle); ISP_IOXPUT_16(isp, src->ct_nphdl, &dst->ct_nphdl); ISP_IOXPUT_16(isp, src->ct_timeout, &dst->ct_timeout); ISP_IOXPUT_16(isp, src->ct_seg_count, &dst->ct_seg_count); ISP_IOXPUT_8(isp, src->ct_vpidx, &dst->ct_vpidx); ISP_IOXPUT_8(isp, src->ct_xflags, &dst->ct_xflags); ISP_IOXPUT_16(isp, src->ct_iid_lo, &dst->ct_iid_lo); ISP_IOXPUT_8(isp, src->ct_iid_hi, &dst->ct_iid_hi); ISP_IOXPUT_8(isp, src->ct_reserved, &dst->ct_reserved); ISP_IOXPUT_32(isp, src->ct_rxid, &dst->ct_rxid); ISP_IOXPUT_16(isp, src->ct_senselen, &dst->ct_senselen); ISP_IOXPUT_16(isp, src->ct_flags, &dst->ct_flags); ISP_IOXPUT_32(isp, src->ct_resid, &dst->ct_resid); ISP_IOXPUT_16(isp, src->ct_oxid, &dst->ct_oxid); ISP_IOXPUT_16(isp, src->ct_scsi_status, &dst->ct_scsi_status); if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE0) { ISP_IOXPUT_32(isp, src->rsp.m0.reloff, &dst->rsp.m0.reloff); ISP_IOXPUT_32(isp, src->rsp.m0.reserved0, &dst->rsp.m0.reserved0); ISP_IOXPUT_32(isp, src->rsp.m0.ct_xfrlen, &dst->rsp.m0.ct_xfrlen); ISP_IOXPUT_32(isp, src->rsp.m0.reserved1, &dst->rsp.m0.reserved1); ISP_IOXPUT_32(isp, src->rsp.m0.ds.ds_base, &dst->rsp.m0.ds.ds_base); ISP_IOXPUT_32(isp, src->rsp.m0.ds.ds_basehi, &dst->rsp.m0.ds.ds_basehi); ISP_IOXPUT_32(isp, src->rsp.m0.ds.ds_count, &dst->rsp.m0.ds.ds_count); } else if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE1) { uint32_t *a, *b; ISP_IOXPUT_16(isp, src->rsp.m1.ct_resplen, &dst->rsp.m1.ct_resplen); ISP_IOXPUT_16(isp, src->rsp.m1.reserved, &dst->rsp.m1.reserved); a = (uint32_t *) src->rsp.m1.ct_resp; b = (uint32_t *) dst->rsp.m1.ct_resp; for (i = 0; i < (ASIZE(src->rsp.m1.ct_resp) >> 2); i++) { *b++ = ISP_SWAP32(isp, *a++); } } else { ISP_IOXPUT_32(isp, src->rsp.m2.reserved0, &dst->rsp.m2.reserved0); ISP_IOXPUT_32(isp, src->rsp.m2.reserved1, &dst->rsp.m2.reserved1); ISP_IOXPUT_32(isp, src->rsp.m2.ct_datalen, &dst->rsp.m2.ct_datalen); ISP_IOXPUT_32(isp, src->rsp.m2.reserved2, &dst->rsp.m2.reserved2); ISP_IOXPUT_32(isp, src->rsp.m2.ct_fcp_rsp_iudata.ds_base, &dst->rsp.m2.ct_fcp_rsp_iudata.ds_base); ISP_IOXPUT_32(isp, src->rsp.m2.ct_fcp_rsp_iudata.ds_basehi, &dst->rsp.m2.ct_fcp_rsp_iudata.ds_basehi); ISP_IOXPUT_32(isp, src->rsp.m2.ct_fcp_rsp_iudata.ds_count, &dst->rsp.m2.ct_fcp_rsp_iudata.ds_count); } } void isp_get_ctio2(ispsoftc_t *isp, ct2_entry_t *src, ct2_entry_t *dst) { int i; isp_get_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXGET_32(isp, &src->ct_syshandle, dst->ct_syshandle); ISP_IOXGET_8(isp, &src->ct_lun, dst->ct_lun); ISP_IOXGET_8(isp, &src->ct_iid, dst->ct_iid); ISP_IOXGET_16(isp, &src->ct_rxid, dst->ct_rxid); ISP_IOXGET_16(isp, &src->ct_flags, dst->ct_flags); ISP_IOXGET_16(isp, &src->ct_status, dst->ct_status); ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout); ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count); ISP_IOXGET_32(isp, &src->ct_reloff, dst->ct_reloff); ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid); if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) { ISP_IOXGET_32(isp, &src->rsp.m0._reserved, dst->rsp.m0._reserved); ISP_IOXGET_16(isp, &src->rsp.m0._reserved2, dst->rsp.m0._reserved2); ISP_IOXGET_16(isp, &src->rsp.m0.ct_scsi_status, dst->rsp.m0.ct_scsi_status); ISP_IOXGET_32(isp, &src->rsp.m0.ct_xfrlen, dst->rsp.m0.ct_xfrlen); if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { for (i = 0; i < ISP_RQDSEG_T2; i++) { ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_base, dst->rsp.m0.u.ct_dataseg[i].ds_base); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_count, dst->rsp.m0.u.ct_dataseg[i].ds_count); } } else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO3) { for (i = 0; i < ISP_RQDSEG_T3; i++) { ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_base, dst->rsp.m0.u.ct_dataseg64[i].ds_base); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_basehi, dst->rsp.m0.u.ct_dataseg64[i].ds_basehi); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_count, dst->rsp.m0.u.ct_dataseg64[i].ds_count); } } else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO4) { ISP_IOXGET_16(isp, &src->rsp.m0.u.ct_dslist.ds_type, dst->rsp.m0.u.ct_dslist.ds_type); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_segment, dst->rsp.m0.u.ct_dslist.ds_segment); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_base, dst->rsp.m0.u.ct_dslist.ds_base); } } else if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) { ISP_IOXGET_16(isp, &src->rsp.m1._reserved, dst->rsp.m1._reserved); ISP_IOXGET_16(isp, &src->rsp.m1._reserved2, dst->rsp.m1._reserved2); ISP_IOXGET_16(isp, &src->rsp.m1.ct_senselen, dst->rsp.m1.ct_senselen); ISP_IOXGET_16(isp, &src->rsp.m1.ct_scsi_status, dst->rsp.m1.ct_scsi_status); ISP_IOXGET_16(isp, &src->rsp.m1.ct_resplen, dst->rsp.m1.ct_resplen); for (i = 0; i < MAXRESPLEN; i++) { ISP_IOXGET_8(isp, &src->rsp.m1.ct_resp[i], dst->rsp.m1.ct_resp[i]); } } else { ISP_IOXGET_32(isp, &src->rsp.m2._reserved, dst->rsp.m2._reserved); ISP_IOXGET_16(isp, &src->rsp.m2._reserved2, dst->rsp.m2._reserved2); ISP_IOXGET_16(isp, &src->rsp.m2._reserved3, dst->rsp.m2._reserved3); ISP_IOXGET_32(isp, &src->rsp.m2.ct_datalen, dst->rsp.m2.ct_datalen); if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base); ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count); } else { ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base); ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi); ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count); } } } void isp_get_ctio2e(ispsoftc_t *isp, ct2e_entry_t *src, ct2e_entry_t *dst) { int i; isp_get_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXGET_32(isp, &src->ct_syshandle, dst->ct_syshandle); ISP_IOXGET_16(isp, &src->ct_iid, dst->ct_iid); ISP_IOXGET_16(isp, &src->ct_rxid, dst->ct_rxid); ISP_IOXGET_16(isp, &src->ct_flags, dst->ct_flags); ISP_IOXGET_16(isp, &src->ct_status, dst->ct_status); ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout); ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count); ISP_IOXGET_32(isp, &src->ct_reloff, dst->ct_reloff); ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid); if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE0) { ISP_IOXGET_32(isp, &src->rsp.m0._reserved, dst->rsp.m0._reserved); ISP_IOXGET_16(isp, &src->rsp.m0._reserved2, dst->rsp.m0._reserved2); ISP_IOXGET_16(isp, &src->rsp.m0.ct_scsi_status, dst->rsp.m0.ct_scsi_status); ISP_IOXGET_32(isp, &src->rsp.m0.ct_xfrlen, dst->rsp.m0.ct_xfrlen); if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { for (i = 0; i < ISP_RQDSEG_T2; i++) { ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_base, dst->rsp.m0.u.ct_dataseg[i].ds_base); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg[i].ds_count, dst->rsp.m0.u.ct_dataseg[i].ds_count); } } else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO3) { for (i = 0; i < ISP_RQDSEG_T3; i++) { ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_base, dst->rsp.m0.u.ct_dataseg64[i].ds_base); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_basehi, dst->rsp.m0.u.ct_dataseg64[i].ds_basehi); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dataseg64[i].ds_count, dst->rsp.m0.u.ct_dataseg64[i].ds_count); } } else if (dst->ct_header.rqs_entry_type == RQSTYPE_CTIO4) { ISP_IOXGET_16(isp, &src->rsp.m0.u.ct_dslist.ds_type, dst->rsp.m0.u.ct_dslist.ds_type); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_segment, dst->rsp.m0.u.ct_dslist.ds_segment); ISP_IOXGET_32(isp, &src->rsp.m0.u.ct_dslist.ds_base, dst->rsp.m0.u.ct_dslist.ds_base); } } else if ((dst->ct_flags & CT2_FLAG_MMASK) == CT2_FLAG_MODE1) { ISP_IOXGET_16(isp, &src->rsp.m1._reserved, dst->rsp.m1._reserved); ISP_IOXGET_16(isp, &src->rsp.m1._reserved2, dst->rsp.m1._reserved2); ISP_IOXGET_16(isp, &src->rsp.m1.ct_senselen, dst->rsp.m1.ct_senselen); ISP_IOXGET_16(isp, &src->rsp.m1.ct_scsi_status, dst->rsp.m1.ct_scsi_status); ISP_IOXGET_16(isp, &src->rsp.m1.ct_resplen, dst->rsp.m1.ct_resplen); for (i = 0; i < MAXRESPLEN; i++) { ISP_IOXGET_8(isp, &src->rsp.m1.ct_resp[i], dst->rsp.m1.ct_resp[i]); } } else { ISP_IOXGET_32(isp, &src->rsp.m2._reserved, dst->rsp.m2._reserved); ISP_IOXGET_16(isp, &src->rsp.m2._reserved2, dst->rsp.m2._reserved2); ISP_IOXGET_16(isp, &src->rsp.m2._reserved3, dst->rsp.m2._reserved3); ISP_IOXGET_32(isp, &src->rsp.m2.ct_datalen, dst->rsp.m2.ct_datalen); if (src->ct_header.rqs_entry_type == RQSTYPE_CTIO2) { ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_base); ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_32.ds_count); } else { ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_base); ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_basehi); ISP_IOXGET_32(isp, &src->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count, dst->rsp.m2.u.ct_fcp_rsp_iudata_64.ds_count); } } } void isp_get_ctio7(ispsoftc_t *isp, ct7_entry_t *src, ct7_entry_t *dst) { int i; isp_get_hdr(isp, &src->ct_header, &dst->ct_header); ISP_IOXGET_32(isp, &src->ct_syshandle, dst->ct_syshandle); ISP_IOXGET_16(isp, &src->ct_nphdl, dst->ct_nphdl); ISP_IOXGET_16(isp, &src->ct_timeout, dst->ct_timeout); ISP_IOXGET_16(isp, &src->ct_seg_count, dst->ct_seg_count); ISP_IOXGET_8(isp, &src->ct_vpidx, dst->ct_vpidx); ISP_IOXGET_8(isp, &src->ct_xflags, dst->ct_xflags); ISP_IOXGET_16(isp, &src->ct_iid_lo, dst->ct_iid_lo); ISP_IOXGET_8(isp, &src->ct_iid_hi, dst->ct_iid_hi); ISP_IOXGET_8(isp, &src->ct_reserved, dst->ct_reserved); ISP_IOXGET_32(isp, &src->ct_rxid, dst->ct_rxid); ISP_IOXGET_16(isp, &src->ct_senselen, dst->ct_senselen); ISP_IOXGET_16(isp, &src->ct_flags, dst->ct_flags); ISP_IOXGET_32(isp, &src->ct_resid, dst->ct_resid); ISP_IOXGET_16(isp, &src->ct_oxid, dst->ct_oxid); ISP_IOXGET_16(isp, &src->ct_scsi_status, dst->ct_scsi_status); if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE0) { ISP_IOXGET_32(isp, &src->rsp.m0.reloff, dst->rsp.m0.reloff); ISP_IOXGET_32(isp, &src->rsp.m0.reserved0, dst->rsp.m0.reserved0); ISP_IOXGET_32(isp, &src->rsp.m0.ct_xfrlen, dst->rsp.m0.ct_xfrlen); ISP_IOXGET_32(isp, &src->rsp.m0.reserved1, dst->rsp.m0.reserved1); ISP_IOXGET_32(isp, &src->rsp.m0.ds.ds_base, dst->rsp.m0.ds.ds_base); ISP_IOXGET_32(isp, &src->rsp.m0.ds.ds_basehi, dst->rsp.m0.ds.ds_basehi); ISP_IOXGET_32(isp, &src->rsp.m0.ds.ds_count, dst->rsp.m0.ds.ds_count); } else if ((dst->ct_flags & CT7_FLAG_MMASK) == CT7_FLAG_MODE1) { uint32_t *a, *b; ISP_IOXGET_16(isp, &src->rsp.m1.ct_resplen, dst->rsp.m1.ct_resplen); ISP_IOXGET_16(isp, &src->rsp.m1.reserved, dst->rsp.m1.reserved); a = (uint32_t *) src->rsp.m1.ct_resp; b = (uint32_t *) dst->rsp.m1.ct_resp; for (i = 0; i < MAXRESPLEN_24XX; i++) { ISP_IOXGET_8(isp, &src->rsp.m1.ct_resp[i], dst->rsp.m1.ct_resp[i]); } for (i = 0; i < (ASIZE(src->rsp.m1.ct_resp) >> 2); i++) { *b++ = ISP_SWAP32(isp, *a++); } } else { ISP_IOXGET_32(isp, &src->rsp.m2.reserved0, dst->rsp.m2.reserved0); ISP_IOXGET_32(isp, &src->rsp.m2.ct_datalen, dst->rsp.m2.ct_datalen); ISP_IOXGET_32(isp, &src->rsp.m2.reserved1, dst->rsp.m2.reserved1); ISP_IOXGET_32(isp, &src->rsp.m2.ct_fcp_rsp_iudata.ds_base, dst->rsp.m2.ct_fcp_rsp_iudata.ds_base); ISP_IOXGET_32(isp, &src->rsp.m2.ct_fcp_rsp_iudata.ds_basehi, dst->rsp.m2.ct_fcp_rsp_iudata.ds_basehi); ISP_IOXGET_32(isp, &src->rsp.m2.ct_fcp_rsp_iudata.ds_count, dst->rsp.m2.ct_fcp_rsp_iudata.ds_count); } } void isp_put_enable_lun(ispsoftc_t *isp, lun_entry_t *lesrc, lun_entry_t *ledst) { int i; isp_put_hdr(isp, &lesrc->le_header, &ledst->le_header); ISP_IOXPUT_32(isp, lesrc->le_reserved, &ledst->le_reserved); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, lesrc->le_lun, &ledst->le_rsvd); ISP_IOXPUT_8(isp, lesrc->le_rsvd, &ledst->le_lun); ISP_IOXPUT_8(isp, lesrc->le_ops, &ledst->le_tgt); ISP_IOXPUT_8(isp, lesrc->le_tgt, &ledst->le_ops); ISP_IOXPUT_8(isp, lesrc->le_status, &ledst->le_reserved2); ISP_IOXPUT_8(isp, lesrc->le_reserved2, &ledst->le_status); ISP_IOXPUT_8(isp, lesrc->le_cmd_count, &ledst->le_in_count); ISP_IOXPUT_8(isp, lesrc->le_in_count, &ledst->le_cmd_count); ISP_IOXPUT_8(isp, lesrc->le_cdb6len, &ledst->le_cdb7len); ISP_IOXPUT_8(isp, lesrc->le_cdb7len, &ledst->le_cdb6len); } else { ISP_IOXPUT_8(isp, lesrc->le_lun, &ledst->le_lun); ISP_IOXPUT_8(isp, lesrc->le_rsvd, &ledst->le_rsvd); ISP_IOXPUT_8(isp, lesrc->le_ops, &ledst->le_ops); ISP_IOXPUT_8(isp, lesrc->le_tgt, &ledst->le_tgt); ISP_IOXPUT_8(isp, lesrc->le_status, &ledst->le_status); ISP_IOXPUT_8(isp, lesrc->le_reserved2, &ledst->le_reserved2); ISP_IOXPUT_8(isp, lesrc->le_cmd_count, &ledst->le_cmd_count); ISP_IOXPUT_8(isp, lesrc->le_in_count, &ledst->le_in_count); ISP_IOXPUT_8(isp, lesrc->le_cdb6len, &ledst->le_cdb6len); ISP_IOXPUT_8(isp, lesrc->le_cdb7len, &ledst->le_cdb7len); } ISP_IOXPUT_32(isp, lesrc->le_flags, &ledst->le_flags); ISP_IOXPUT_16(isp, lesrc->le_timeout, &ledst->le_timeout); for (i = 0; i < 20; i++) { ISP_IOXPUT_8(isp, lesrc->le_reserved3[i], &ledst->le_reserved3[i]); } } void isp_get_enable_lun(ispsoftc_t *isp, lun_entry_t *lesrc, lun_entry_t *ledst) { int i; isp_get_hdr(isp, &lesrc->le_header, &ledst->le_header); ISP_IOXGET_32(isp, &lesrc->le_reserved, ledst->le_reserved); if (ISP_IS_SBUS(isp)) { ISP_IOXGET_8(isp, &lesrc->le_lun, ledst->le_rsvd); ISP_IOXGET_8(isp, &lesrc->le_rsvd, ledst->le_lun); ISP_IOXGET_8(isp, &lesrc->le_ops, ledst->le_tgt); ISP_IOXGET_8(isp, &lesrc->le_tgt, ledst->le_ops); ISP_IOXGET_8(isp, &lesrc->le_status, ledst->le_reserved2); ISP_IOXGET_8(isp, &lesrc->le_reserved2, ledst->le_status); ISP_IOXGET_8(isp, &lesrc->le_cmd_count, ledst->le_in_count); ISP_IOXGET_8(isp, &lesrc->le_in_count, ledst->le_cmd_count); ISP_IOXGET_8(isp, &lesrc->le_cdb6len, ledst->le_cdb7len); ISP_IOXGET_8(isp, &lesrc->le_cdb7len, ledst->le_cdb6len); } else { ISP_IOXGET_8(isp, &lesrc->le_lun, ledst->le_lun); ISP_IOXGET_8(isp, &lesrc->le_rsvd, ledst->le_rsvd); ISP_IOXGET_8(isp, &lesrc->le_ops, ledst->le_ops); ISP_IOXGET_8(isp, &lesrc->le_tgt, ledst->le_tgt); ISP_IOXGET_8(isp, &lesrc->le_status, ledst->le_status); ISP_IOXGET_8(isp, &lesrc->le_reserved2, ledst->le_reserved2); ISP_IOXGET_8(isp, &lesrc->le_cmd_count, ledst->le_cmd_count); ISP_IOXGET_8(isp, &lesrc->le_in_count, ledst->le_in_count); ISP_IOXGET_8(isp, &lesrc->le_cdb6len, ledst->le_cdb6len); ISP_IOXGET_8(isp, &lesrc->le_cdb7len, ledst->le_cdb7len); } ISP_IOXGET_32(isp, &lesrc->le_flags, ledst->le_flags); ISP_IOXGET_16(isp, &lesrc->le_timeout, ledst->le_timeout); for (i = 0; i < 20; i++) { ISP_IOXGET_8(isp, &lesrc->le_reserved3[i], ledst->le_reserved3[i]); } } void isp_put_notify(ispsoftc_t *isp, in_entry_t *src, in_entry_t *dst) { int i; isp_put_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->in_lun, &dst->in_iid); ISP_IOXPUT_8(isp, src->in_iid, &dst->in_lun); ISP_IOXPUT_8(isp, src->in_reserved2, &dst->in_tgt); ISP_IOXPUT_8(isp, src->in_tgt, &dst->in_reserved2); ISP_IOXPUT_8(isp, src->in_status, &dst->in_rsvd2); ISP_IOXPUT_8(isp, src->in_rsvd2, &dst->in_status); ISP_IOXPUT_8(isp, src->in_tag_val, &dst->in_tag_type); ISP_IOXPUT_8(isp, src->in_tag_type, &dst->in_tag_val); } else { ISP_IOXPUT_8(isp, src->in_lun, &dst->in_lun); ISP_IOXPUT_8(isp, src->in_iid, &dst->in_iid); ISP_IOXPUT_8(isp, src->in_reserved2, &dst->in_reserved2); ISP_IOXPUT_8(isp, src->in_tgt, &dst->in_tgt); ISP_IOXPUT_8(isp, src->in_status, &dst->in_status); ISP_IOXPUT_8(isp, src->in_rsvd2, &dst->in_rsvd2); ISP_IOXPUT_8(isp, src->in_tag_val, &dst->in_tag_val); ISP_IOXPUT_8(isp, src->in_tag_type, &dst->in_tag_type); } ISP_IOXPUT_32(isp, src->in_flags, &dst->in_flags); ISP_IOXPUT_16(isp, src->in_seqid, &dst->in_seqid); for (i = 0; i < IN_MSGLEN; i++) { ISP_IOXPUT_8(isp, src->in_msg[i], &dst->in_msg[i]); } for (i = 0; i < IN_RSVDLEN; i++) { ISP_IOXPUT_8(isp, src->in_reserved3[i], &dst->in_reserved3[i]); } for (i = 0; i < QLTM_SENSELEN; i++) { ISP_IOXPUT_8(isp, src->in_sense[i], &dst->in_sense[i]); } } void isp_get_notify(ispsoftc_t *isp, in_entry_t *src, in_entry_t *dst) { int i; isp_get_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved); if (ISP_IS_SBUS(isp)) { ISP_IOXGET_8(isp, &src->in_lun, dst->in_iid); ISP_IOXGET_8(isp, &src->in_iid, dst->in_lun); ISP_IOXGET_8(isp, &src->in_reserved2, dst->in_tgt); ISP_IOXGET_8(isp, &src->in_tgt, dst->in_reserved2); ISP_IOXGET_8(isp, &src->in_status, dst->in_rsvd2); ISP_IOXGET_8(isp, &src->in_rsvd2, dst->in_status); ISP_IOXGET_8(isp, &src->in_tag_val, dst->in_tag_type); ISP_IOXGET_8(isp, &src->in_tag_type, dst->in_tag_val); } else { ISP_IOXGET_8(isp, &src->in_lun, dst->in_lun); ISP_IOXGET_8(isp, &src->in_iid, dst->in_iid); ISP_IOXGET_8(isp, &src->in_reserved2, dst->in_reserved2); ISP_IOXGET_8(isp, &src->in_tgt, dst->in_tgt); ISP_IOXGET_8(isp, &src->in_status, dst->in_status); ISP_IOXGET_8(isp, &src->in_rsvd2, dst->in_rsvd2); ISP_IOXGET_8(isp, &src->in_tag_val, dst->in_tag_val); ISP_IOXGET_8(isp, &src->in_tag_type, dst->in_tag_type); } ISP_IOXGET_32(isp, &src->in_flags, dst->in_flags); ISP_IOXGET_16(isp, &src->in_seqid, dst->in_seqid); for (i = 0; i < IN_MSGLEN; i++) { ISP_IOXGET_8(isp, &src->in_msg[i], dst->in_msg[i]); } for (i = 0; i < IN_RSVDLEN; i++) { ISP_IOXGET_8(isp, &src->in_reserved3[i], dst->in_reserved3[i]); } for (i = 0; i < QLTM_SENSELEN; i++) { ISP_IOXGET_8(isp, &src->in_sense[i], dst->in_sense[i]); } } void isp_put_notify_fc(ispsoftc_t *isp, in_fcentry_t *src, in_fcentry_t *dst) { isp_put_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved); ISP_IOXPUT_8(isp, src->in_lun, &dst->in_lun); ISP_IOXPUT_8(isp, src->in_iid, &dst->in_iid); ISP_IOXPUT_16(isp, src->in_scclun, &dst->in_scclun); ISP_IOXPUT_32(isp, src->in_reserved2, &dst->in_reserved2); ISP_IOXPUT_16(isp, src->in_status, &dst->in_status); ISP_IOXPUT_16(isp, src->in_task_flags, &dst->in_task_flags); ISP_IOXPUT_16(isp, src->in_seqid, &dst->in_seqid); } void isp_put_notify_fc_e(ispsoftc_t *isp, in_fcentry_e_t *src, in_fcentry_e_t *dst) { isp_put_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved); ISP_IOXPUT_16(isp, src->in_iid, &dst->in_iid); ISP_IOXPUT_16(isp, src->in_scclun, &dst->in_scclun); ISP_IOXPUT_32(isp, src->in_reserved2, &dst->in_reserved2); ISP_IOXPUT_16(isp, src->in_status, &dst->in_status); ISP_IOXPUT_16(isp, src->in_task_flags, &dst->in_task_flags); ISP_IOXPUT_16(isp, src->in_seqid, &dst->in_seqid); } void isp_put_notify_24xx(ispsoftc_t *isp, in_fcentry_24xx_t *src, in_fcentry_24xx_t *dst) { int i; isp_put_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXPUT_32(isp, src->in_reserved, &dst->in_reserved); ISP_IOXPUT_16(isp, src->in_nphdl, &dst->in_nphdl); ISP_IOXPUT_16(isp, src->in_reserved1, &dst->in_reserved1); ISP_IOXPUT_16(isp, src->in_flags, &dst->in_flags); ISP_IOXPUT_16(isp, src->in_srr_rxid, &dst->in_srr_rxid); ISP_IOXPUT_16(isp, src->in_status, &dst->in_status); ISP_IOXPUT_8(isp, src->in_status_subcode, &dst->in_status_subcode); ISP_IOXPUT_8(isp, src->in_fwhandle, &dst->in_fwhandle); ISP_IOXPUT_32(isp, src->in_rxid, &dst->in_rxid); ISP_IOXPUT_16(isp, src->in_srr_reloff_hi, &dst->in_srr_reloff_hi); ISP_IOXPUT_16(isp, src->in_srr_reloff_lo, &dst->in_srr_reloff_lo); ISP_IOXPUT_16(isp, src->in_srr_iu, &dst->in_srr_iu); ISP_IOXPUT_16(isp, src->in_srr_oxid, &dst->in_srr_oxid); ISP_IOXPUT_16(isp, src->in_nport_id_hi, &dst->in_nport_id_hi); ISP_IOXPUT_8(isp, src->in_nport_id_lo, &dst->in_nport_id_lo); ISP_IOXPUT_8(isp, src->in_reserved3, &dst->in_reserved3); ISP_IOXPUT_16(isp, src->in_np_handle, &dst->in_np_handle); for (i = 0; i < ASIZE(src->in_reserved4); i++) { ISP_IOXPUT_8(isp, src->in_reserved4[i], &dst->in_reserved4[i]); } ISP_IOXPUT_8(isp, src->in_reserved5, &dst->in_reserved5); ISP_IOXPUT_8(isp, src->in_vpidx, &dst->in_vpidx); ISP_IOXPUT_32(isp, src->in_reserved6, &dst->in_reserved6); ISP_IOXPUT_16(isp, src->in_portid_lo, &dst->in_portid_lo); ISP_IOXPUT_8(isp, src->in_portid_hi, &dst->in_portid_hi); ISP_IOXPUT_8(isp, src->in_reserved7, &dst->in_reserved7); ISP_IOXPUT_16(isp, src->in_reserved8, &dst->in_reserved8); ISP_IOXPUT_16(isp, src->in_oxid, &dst->in_oxid); } void isp_get_notify_fc(ispsoftc_t *isp, in_fcentry_t *src, in_fcentry_t *dst) { isp_get_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved); ISP_IOXGET_8(isp, &src->in_lun, dst->in_lun); ISP_IOXGET_8(isp, &src->in_iid, dst->in_iid); ISP_IOXGET_16(isp, &src->in_scclun, dst->in_scclun); ISP_IOXGET_32(isp, &src->in_reserved2, dst->in_reserved2); ISP_IOXGET_16(isp, &src->in_status, dst->in_status); ISP_IOXGET_16(isp, &src->in_task_flags, dst->in_task_flags); ISP_IOXGET_16(isp, &src->in_seqid, dst->in_seqid); } void isp_get_notify_fc_e(ispsoftc_t *isp, in_fcentry_e_t *src, in_fcentry_e_t *dst) { isp_get_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved); ISP_IOXGET_16(isp, &src->in_iid, dst->in_iid); ISP_IOXGET_16(isp, &src->in_scclun, dst->in_scclun); ISP_IOXGET_32(isp, &src->in_reserved2, dst->in_reserved2); ISP_IOXGET_16(isp, &src->in_status, dst->in_status); ISP_IOXGET_16(isp, &src->in_task_flags, dst->in_task_flags); ISP_IOXGET_16(isp, &src->in_seqid, dst->in_seqid); } void isp_get_notify_24xx(ispsoftc_t *isp, in_fcentry_24xx_t *src, in_fcentry_24xx_t *dst) { int i; isp_get_hdr(isp, &src->in_header, &dst->in_header); ISP_IOXGET_32(isp, &src->in_reserved, dst->in_reserved); ISP_IOXGET_16(isp, &src->in_nphdl, dst->in_nphdl); ISP_IOXGET_16(isp, &src->in_reserved1, dst->in_reserved1); ISP_IOXGET_16(isp, &src->in_flags, dst->in_flags); ISP_IOXGET_16(isp, &src->in_srr_rxid, dst->in_srr_rxid); ISP_IOXGET_16(isp, &src->in_status, dst->in_status); ISP_IOXGET_8(isp, &src->in_status_subcode, dst->in_status_subcode); ISP_IOXGET_8(isp, &src->in_fwhandle, dst->in_fwhandle); ISP_IOXGET_32(isp, &src->in_rxid, dst->in_rxid); ISP_IOXGET_16(isp, &src->in_srr_reloff_hi, dst->in_srr_reloff_hi); ISP_IOXGET_16(isp, &src->in_srr_reloff_lo, dst->in_srr_reloff_lo); ISP_IOXGET_16(isp, &src->in_srr_iu, dst->in_srr_iu); ISP_IOXGET_16(isp, &src->in_srr_oxid, dst->in_srr_oxid); ISP_IOXGET_16(isp, &src->in_nport_id_hi, dst->in_nport_id_hi); ISP_IOXGET_8(isp, &src->in_nport_id_lo, dst->in_nport_id_lo); ISP_IOXGET_8(isp, &src->in_reserved3, dst->in_reserved3); ISP_IOXGET_16(isp, &src->in_np_handle, dst->in_np_handle); for (i = 0; i < ASIZE(src->in_reserved4); i++) { ISP_IOXGET_8(isp, &src->in_reserved4[i], dst->in_reserved4[i]); } ISP_IOXGET_8(isp, &src->in_reserved5, dst->in_reserved5); ISP_IOXGET_8(isp, &src->in_vpidx, dst->in_vpidx); ISP_IOXGET_32(isp, &src->in_reserved6, dst->in_reserved6); ISP_IOXGET_16(isp, &src->in_portid_lo, dst->in_portid_lo); ISP_IOXGET_8(isp, &src->in_portid_hi, dst->in_portid_hi); ISP_IOXGET_8(isp, &src->in_reserved7, dst->in_reserved7); ISP_IOXGET_16(isp, &src->in_reserved8, dst->in_reserved8); ISP_IOXGET_16(isp, &src->in_oxid, dst->in_oxid); } void isp_put_notify_ack(ispsoftc_t *isp, na_entry_t *src, na_entry_t *dst) { int i; isp_put_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXPUT_32(isp, src->na_reserved, &dst->na_reserved); if (ISP_IS_SBUS(isp)) { ISP_IOXPUT_8(isp, src->na_lun, &dst->na_iid); ISP_IOXPUT_8(isp, src->na_iid, &dst->na_lun); ISP_IOXPUT_8(isp, src->na_status, &dst->na_event); ISP_IOXPUT_8(isp, src->na_event, &dst->na_status); } else { ISP_IOXPUT_8(isp, src->na_lun, &dst->na_lun); ISP_IOXPUT_8(isp, src->na_iid, &dst->na_iid); ISP_IOXPUT_8(isp, src->na_status, &dst->na_status); ISP_IOXPUT_8(isp, src->na_event, &dst->na_event); } ISP_IOXPUT_32(isp, src->na_flags, &dst->na_flags); for (i = 0; i < NA_RSVDLEN; i++) { ISP_IOXPUT_16(isp, src->na_reserved3[i], &dst->na_reserved3[i]); } } void isp_get_notify_ack(ispsoftc_t *isp, na_entry_t *src, na_entry_t *dst) { int i; isp_get_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXGET_32(isp, &src->na_reserved, dst->na_reserved); if (ISP_IS_SBUS(isp)) { ISP_IOXGET_8(isp, &src->na_lun, dst->na_iid); ISP_IOXGET_8(isp, &src->na_iid, dst->na_lun); ISP_IOXGET_8(isp, &src->na_status, dst->na_event); ISP_IOXGET_8(isp, &src->na_event, dst->na_status); } else { ISP_IOXGET_8(isp, &src->na_lun, dst->na_lun); ISP_IOXGET_8(isp, &src->na_iid, dst->na_iid); ISP_IOXGET_8(isp, &src->na_status, dst->na_status); ISP_IOXGET_8(isp, &src->na_event, dst->na_event); } ISP_IOXGET_32(isp, &src->na_flags, dst->na_flags); for (i = 0; i < NA_RSVDLEN; i++) { ISP_IOXGET_16(isp, &src->na_reserved3[i], dst->na_reserved3[i]); } } void isp_put_notify_ack_fc(ispsoftc_t *isp, na_fcentry_t *src, na_fcentry_t *dst) { int i; isp_put_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXPUT_32(isp, src->na_reserved, &dst->na_reserved); ISP_IOXPUT_8(isp, src->na_reserved1, &dst->na_reserved1); ISP_IOXPUT_8(isp, src->na_iid, &dst->na_iid); ISP_IOXPUT_16(isp, src->na_response, &dst->na_response); ISP_IOXPUT_16(isp, src->na_flags, &dst->na_flags); ISP_IOXPUT_16(isp, src->na_reserved2, &dst->na_reserved2); ISP_IOXPUT_16(isp, src->na_status, &dst->na_status); ISP_IOXPUT_16(isp, src->na_task_flags, &dst->na_task_flags); ISP_IOXPUT_16(isp, src->na_seqid, &dst->na_seqid); for (i = 0; i < NA2_RSVDLEN; i++) { ISP_IOXPUT_16(isp, src->na_reserved3[i], &dst->na_reserved3[i]); } } void isp_put_notify_ack_fc_e(ispsoftc_t *isp, na_fcentry_e_t *src, na_fcentry_e_t *dst) { int i; isp_put_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXPUT_32(isp, src->na_reserved, &dst->na_reserved); ISP_IOXPUT_16(isp, src->na_iid, &dst->na_iid); ISP_IOXPUT_16(isp, src->na_response, &dst->na_response); ISP_IOXPUT_16(isp, src->na_flags, &dst->na_flags); ISP_IOXPUT_16(isp, src->na_reserved2, &dst->na_reserved2); ISP_IOXPUT_16(isp, src->na_status, &dst->na_status); ISP_IOXPUT_16(isp, src->na_task_flags, &dst->na_task_flags); ISP_IOXPUT_16(isp, src->na_seqid, &dst->na_seqid); for (i = 0; i < NA2_RSVDLEN; i++) { ISP_IOXPUT_16(isp, src->na_reserved3[i], &dst->na_reserved3[i]); } } void isp_put_notify_24xx_ack(ispsoftc_t *isp, na_fcentry_24xx_t *src, na_fcentry_24xx_t *dst) { int i; isp_put_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXPUT_32(isp, src->na_handle, &dst->na_handle); ISP_IOXPUT_16(isp, src->na_nphdl, &dst->na_nphdl); ISP_IOXPUT_16(isp, src->na_reserved1, &dst->na_reserved1); ISP_IOXPUT_16(isp, src->na_flags, &dst->na_flags); ISP_IOXPUT_16(isp, src->na_srr_rxid, &dst->na_srr_rxid); ISP_IOXPUT_16(isp, src->na_status, &dst->na_status); ISP_IOXPUT_8(isp, src->na_status_subcode, &dst->na_status_subcode); ISP_IOXPUT_8(isp, src->na_fwhandle, &dst->na_fwhandle); ISP_IOXPUT_32(isp, src->na_rxid, &dst->na_rxid); ISP_IOXPUT_16(isp, src->na_srr_reloff_hi, &dst->na_srr_reloff_hi); ISP_IOXPUT_16(isp, src->na_srr_reloff_lo, &dst->na_srr_reloff_lo); ISP_IOXPUT_16(isp, src->na_srr_iu, &dst->na_srr_iu); ISP_IOXPUT_16(isp, src->na_srr_flags, &dst->na_srr_flags); for (i = 0; i < 18; i++) { ISP_IOXPUT_8(isp, src->na_reserved3[i], &dst->na_reserved3[i]); } ISP_IOXPUT_8(isp, src->na_reserved4, &dst->na_reserved4); ISP_IOXPUT_8(isp, src->na_vpidx, &dst->na_vpidx); ISP_IOXPUT_8(isp, src->na_srr_reject_vunique, &dst->na_srr_reject_vunique); ISP_IOXPUT_8(isp, src->na_srr_reject_explanation, &dst->na_srr_reject_explanation); ISP_IOXPUT_8(isp, src->na_srr_reject_code, &dst->na_srr_reject_code); ISP_IOXPUT_8(isp, src->na_reserved5, &dst->na_reserved5); for (i = 0; i < 6; i++) { ISP_IOXPUT_8(isp, src->na_reserved6[i], &dst->na_reserved6[i]); } ISP_IOXPUT_16(isp, src->na_oxid, &dst->na_oxid); } void isp_get_notify_ack_fc(ispsoftc_t *isp, na_fcentry_t *src, na_fcentry_t *dst) { int i; isp_get_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXGET_32(isp, &src->na_reserved, dst->na_reserved); ISP_IOXGET_8(isp, &src->na_reserved1, dst->na_reserved1); ISP_IOXGET_8(isp, &src->na_iid, dst->na_iid); ISP_IOXGET_16(isp, &src->na_response, dst->na_response); ISP_IOXGET_16(isp, &src->na_flags, dst->na_flags); ISP_IOXGET_16(isp, &src->na_reserved2, dst->na_reserved2); ISP_IOXGET_16(isp, &src->na_status, dst->na_status); ISP_IOXGET_16(isp, &src->na_task_flags, dst->na_task_flags); ISP_IOXGET_16(isp, &src->na_seqid, dst->na_seqid); for (i = 0; i < NA2_RSVDLEN; i++) { ISP_IOXGET_16(isp, &src->na_reserved3[i], dst->na_reserved3[i]); } } void isp_get_notify_ack_fc_e(ispsoftc_t *isp, na_fcentry_e_t *src, na_fcentry_e_t *dst) { int i; isp_get_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXGET_32(isp, &src->na_reserved, dst->na_reserved); ISP_IOXGET_16(isp, &src->na_iid, dst->na_iid); ISP_IOXGET_16(isp, &src->na_response, dst->na_response); ISP_IOXGET_16(isp, &src->na_flags, dst->na_flags); ISP_IOXGET_16(isp, &src->na_reserved2, dst->na_reserved2); ISP_IOXGET_16(isp, &src->na_status, dst->na_status); ISP_IOXGET_16(isp, &src->na_task_flags, dst->na_task_flags); ISP_IOXGET_16(isp, &src->na_seqid, dst->na_seqid); for (i = 0; i < NA2_RSVDLEN; i++) { ISP_IOXGET_16(isp, &src->na_reserved3[i], dst->na_reserved3[i]); } } void isp_get_notify_ack_24xx(ispsoftc_t *isp, na_fcentry_24xx_t *src, na_fcentry_24xx_t *dst) { int i; isp_get_hdr(isp, &src->na_header, &dst->na_header); ISP_IOXGET_32(isp, &src->na_handle, dst->na_handle); ISP_IOXGET_16(isp, &src->na_nphdl, dst->na_nphdl); ISP_IOXGET_16(isp, &src->na_reserved1, dst->na_reserved1); ISP_IOXGET_16(isp, &src->na_flags, dst->na_flags); ISP_IOXGET_16(isp, &src->na_srr_rxid, dst->na_srr_rxid); ISP_IOXGET_16(isp, &src->na_status, dst->na_status); ISP_IOXGET_8(isp, &src->na_status_subcode, dst->na_status_subcode); ISP_IOXGET_8(isp, &src->na_fwhandle, dst->na_fwhandle); ISP_IOXGET_32(isp, &src->na_rxid, dst->na_rxid); ISP_IOXGET_16(isp, &src->na_srr_reloff_hi, dst->na_srr_reloff_hi); ISP_IOXGET_16(isp, &src->na_srr_reloff_lo, dst->na_srr_reloff_lo); ISP_IOXGET_16(isp, &src->na_srr_iu, dst->na_srr_iu); ISP_IOXGET_16(isp, &src->na_srr_flags, dst->na_srr_flags); for (i = 0; i < 18; i++) { ISP_IOXGET_8(isp, &src->na_reserved3[i], dst->na_reserved3[i]); } ISP_IOXGET_8(isp, &src->na_reserved4, dst->na_reserved4); ISP_IOXGET_8(isp, &src->na_vpidx, dst->na_vpidx); ISP_IOXGET_8(isp, &src->na_srr_reject_vunique, dst->na_srr_reject_vunique); ISP_IOXGET_8(isp, &src->na_srr_reject_explanation, dst->na_srr_reject_explanation); ISP_IOXGET_8(isp, &src->na_srr_reject_code, dst->na_srr_reject_code); ISP_IOXGET_8(isp, &src->na_reserved5, dst->na_reserved5); for (i = 0; i < 6; i++) { ISP_IOXGET_8(isp, &src->na_reserved6[i], dst->na_reserved6[i]); } ISP_IOXGET_16(isp, &src->na_oxid, dst->na_oxid); } void isp_get_abts(ispsoftc_t *isp, abts_t *src, abts_t *dst) { int i; isp_get_hdr(isp, &src->abts_header, &dst->abts_header); for (i = 0; i < 6; i++) { ISP_IOXGET_8(isp, &src->abts_reserved0[i], dst->abts_reserved0[i]); } ISP_IOXGET_16(isp, &src->abts_nphdl, dst->abts_nphdl); ISP_IOXGET_16(isp, &src->abts_reserved1, dst->abts_reserved1); ISP_IOXGET_16(isp, &src->abts_sof, dst->abts_sof); ISP_IOXGET_32(isp, &src->abts_rxid_abts, dst->abts_rxid_abts); ISP_IOXGET_16(isp, &src->abts_did_lo, dst->abts_did_lo); ISP_IOXGET_8(isp, &src->abts_did_hi, dst->abts_did_hi); ISP_IOXGET_8(isp, &src->abts_r_ctl, dst->abts_r_ctl); ISP_IOXGET_16(isp, &src->abts_sid_lo, dst->abts_sid_lo); ISP_IOXGET_8(isp, &src->abts_sid_hi, dst->abts_sid_hi); ISP_IOXGET_8(isp, &src->abts_cs_ctl, dst->abts_cs_ctl); ISP_IOXGET_16(isp, &src->abts_fs_ctl, dst->abts_fs_ctl); ISP_IOXGET_8(isp, &src->abts_f_ctl, dst->abts_f_ctl); ISP_IOXGET_8(isp, &src->abts_type, dst->abts_type); ISP_IOXGET_16(isp, &src->abts_seq_cnt, dst->abts_seq_cnt); ISP_IOXGET_8(isp, &src->abts_df_ctl, dst->abts_df_ctl); ISP_IOXGET_8(isp, &src->abts_seq_id, dst->abts_seq_id); ISP_IOXGET_16(isp, &src->abts_rx_id, dst->abts_rx_id); ISP_IOXGET_16(isp, &src->abts_ox_id, dst->abts_ox_id); ISP_IOXGET_32(isp, &src->abts_param, dst->abts_param); for (i = 0; i < 16; i++) { ISP_IOXGET_8(isp, &src->abts_reserved2[i], dst->abts_reserved2[i]); } ISP_IOXGET_32(isp, &src->abts_rxid_task, dst->abts_rxid_task); } void isp_put_abts_rsp(ispsoftc_t *isp, abts_rsp_t *src, abts_rsp_t *dst) { int i; isp_put_hdr(isp, &src->abts_rsp_header, &dst->abts_rsp_header); ISP_IOXPUT_32(isp, src->abts_rsp_handle, &dst->abts_rsp_handle); ISP_IOXPUT_16(isp, src->abts_rsp_status, &dst->abts_rsp_status); ISP_IOXPUT_16(isp, src->abts_rsp_nphdl, &dst->abts_rsp_nphdl); ISP_IOXPUT_16(isp, src->abts_rsp_ctl_flags, &dst->abts_rsp_ctl_flags); ISP_IOXPUT_16(isp, src->abts_rsp_sof, &dst->abts_rsp_sof); ISP_IOXPUT_32(isp, src->abts_rsp_rxid_abts, &dst->abts_rsp_rxid_abts); ISP_IOXPUT_16(isp, src->abts_rsp_did_lo, &dst->abts_rsp_did_lo); ISP_IOXPUT_8(isp, src->abts_rsp_did_hi, &dst->abts_rsp_did_hi); ISP_IOXPUT_8(isp, src->abts_rsp_r_ctl, &dst->abts_rsp_r_ctl); ISP_IOXPUT_16(isp, src->abts_rsp_sid_lo, &dst->abts_rsp_sid_lo); ISP_IOXPUT_8(isp, src->abts_rsp_sid_hi, &dst->abts_rsp_sid_hi); ISP_IOXPUT_8(isp, src->abts_rsp_cs_ctl, &dst->abts_rsp_cs_ctl); ISP_IOXPUT_16(isp, src->abts_rsp_f_ctl_lo, &dst->abts_rsp_f_ctl_lo); ISP_IOXPUT_8(isp, src->abts_rsp_f_ctl_hi, &dst->abts_rsp_f_ctl_hi); ISP_IOXPUT_8(isp, src->abts_rsp_type, &dst->abts_rsp_type); ISP_IOXPUT_16(isp, src->abts_rsp_seq_cnt, &dst->abts_rsp_seq_cnt); ISP_IOXPUT_8(isp, src->abts_rsp_df_ctl, &dst->abts_rsp_df_ctl); ISP_IOXPUT_8(isp, src->abts_rsp_seq_id, &dst->abts_rsp_seq_id); ISP_IOXPUT_16(isp, src->abts_rsp_rx_id, &dst->abts_rsp_rx_id); ISP_IOXPUT_16(isp, src->abts_rsp_ox_id, &dst->abts_rsp_ox_id); ISP_IOXPUT_32(isp, src->abts_rsp_param, &dst->abts_rsp_param); if (src->abts_rsp_r_ctl == BA_ACC) { ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.reserved, &dst->abts_rsp_payload.ba_acc.reserved); ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_acc.last_seq_id, &dst->abts_rsp_payload.ba_acc.last_seq_id); ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_acc.seq_id_valid, &dst->abts_rsp_payload.ba_acc.seq_id_valid); ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.aborted_rx_id, &dst->abts_rsp_payload.ba_acc.aborted_rx_id); ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.aborted_ox_id, &dst->abts_rsp_payload.ba_acc.aborted_ox_id); ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.high_seq_cnt, &dst->abts_rsp_payload.ba_acc.high_seq_cnt); ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.low_seq_cnt, &dst->abts_rsp_payload.ba_acc.low_seq_cnt); for (i = 0; i < 4; i++) { ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_acc.reserved2[i], &dst->abts_rsp_payload.ba_acc.reserved2[i]); } } else if (src->abts_rsp_r_ctl == BA_RJT) { ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.vendor_unique, &dst->abts_rsp_payload.ba_rjt.vendor_unique); ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.explanation, &dst->abts_rsp_payload.ba_rjt.explanation); ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.reason, &dst->abts_rsp_payload.ba_rjt.reason); ISP_IOXPUT_8(isp, src->abts_rsp_payload.ba_rjt.reserved, &dst->abts_rsp_payload.ba_rjt.reserved); for (i = 0; i < 12; i++) { ISP_IOXPUT_16(isp, src->abts_rsp_payload.ba_rjt.reserved2[i], &dst->abts_rsp_payload.ba_rjt.reserved2[i]); } } else { for (i = 0; i < 16; i++) { ISP_IOXPUT_8(isp, src->abts_rsp_payload.reserved[i], &dst->abts_rsp_payload.reserved[i]); } } ISP_IOXPUT_32(isp, src->abts_rsp_rxid_task, &dst->abts_rsp_rxid_task); } void isp_get_abts_rsp(ispsoftc_t *isp, abts_rsp_t *src, abts_rsp_t *dst) { int i; isp_get_hdr(isp, &src->abts_rsp_header, &dst->abts_rsp_header); ISP_IOXGET_32(isp, &src->abts_rsp_handle, dst->abts_rsp_handle); ISP_IOXGET_16(isp, &src->abts_rsp_status, dst->abts_rsp_status); ISP_IOXGET_16(isp, &src->abts_rsp_nphdl, dst->abts_rsp_nphdl); ISP_IOXGET_16(isp, &src->abts_rsp_ctl_flags, dst->abts_rsp_ctl_flags); ISP_IOXGET_16(isp, &src->abts_rsp_sof, dst->abts_rsp_sof); ISP_IOXGET_32(isp, &src->abts_rsp_rxid_abts, dst->abts_rsp_rxid_abts); ISP_IOXGET_16(isp, &src->abts_rsp_did_lo, dst->abts_rsp_did_lo); ISP_IOXGET_8(isp, &src->abts_rsp_did_hi, dst->abts_rsp_did_hi); ISP_IOXGET_8(isp, &src->abts_rsp_r_ctl, dst->abts_rsp_r_ctl); ISP_IOXGET_16(isp, &src->abts_rsp_sid_lo, dst->abts_rsp_sid_lo); ISP_IOXGET_8(isp, &src->abts_rsp_sid_hi, dst->abts_rsp_sid_hi); ISP_IOXGET_8(isp, &src->abts_rsp_cs_ctl, dst->abts_rsp_cs_ctl); ISP_IOXGET_16(isp, &src->abts_rsp_f_ctl_lo, dst->abts_rsp_f_ctl_lo); ISP_IOXGET_8(isp, &src->abts_rsp_f_ctl_hi, dst->abts_rsp_f_ctl_hi); ISP_IOXGET_8(isp, &src->abts_rsp_type, dst->abts_rsp_type); ISP_IOXGET_16(isp, &src->abts_rsp_seq_cnt, dst->abts_rsp_seq_cnt); ISP_IOXGET_8(isp, &src->abts_rsp_df_ctl, dst->abts_rsp_df_ctl); ISP_IOXGET_8(isp, &src->abts_rsp_seq_id, dst->abts_rsp_seq_id); ISP_IOXGET_16(isp, &src->abts_rsp_rx_id, dst->abts_rsp_rx_id); ISP_IOXGET_16(isp, &src->abts_rsp_ox_id, dst->abts_rsp_ox_id); ISP_IOXGET_32(isp, &src->abts_rsp_param, dst->abts_rsp_param); for (i = 0; i < 8; i++) { ISP_IOXGET_8(isp, &src->abts_rsp_payload.rsp.reserved[i], dst->abts_rsp_payload.rsp.reserved[i]); } ISP_IOXGET_32(isp, &src->abts_rsp_payload.rsp.subcode1, dst->abts_rsp_payload.rsp.subcode1); ISP_IOXGET_32(isp, &src->abts_rsp_payload.rsp.subcode2, dst->abts_rsp_payload.rsp.subcode2); ISP_IOXGET_32(isp, &src->abts_rsp_rxid_task, dst->abts_rsp_rxid_task); } #endif /* ISP_TARGET_MODE */ /* * vim:ts=8:sw=8 */ Index: projects/powernv/dev/isp/ispmbox.h =================================================================== --- projects/powernv/dev/isp/ispmbox.h (revision 290990) +++ projects/powernv/dev/isp/ispmbox.h (revision 290991) @@ -1,2648 +1,2643 @@ /* $FreeBSD$ */ /*- * Copyright (c) 1997-2009 by Matthew Jacob * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Mailbox and Queue Entry Definitions for for Qlogic ISP SCSI adapters. */ #ifndef _ISPMBOX_H #define _ISPMBOX_H /* * Mailbox Command Opcodes */ #define MBOX_NO_OP 0x0000 #define MBOX_LOAD_RAM 0x0001 #define MBOX_EXEC_FIRMWARE 0x0002 #define MBOX_DUMP_RAM 0x0003 #define MBOX_WRITE_RAM_WORD 0x0004 #define MBOX_READ_RAM_WORD 0x0005 #define MBOX_MAILBOX_REG_TEST 0x0006 #define MBOX_VERIFY_CHECKSUM 0x0007 #define MBOX_ABOUT_FIRMWARE 0x0008 #define MBOX_LOAD_RISC_RAM_2100 0x0009 /* a */ #define MBOX_LOAD_RISC_RAM 0x000b /* c */ #define MBOX_WRITE_RAM_WORD_EXTENDED 0x000d #define MBOX_CHECK_FIRMWARE 0x000e #define MBOX_READ_RAM_WORD_EXTENDED 0x000f #define MBOX_INIT_REQ_QUEUE 0x0010 #define MBOX_INIT_RES_QUEUE 0x0011 #define MBOX_EXECUTE_IOCB 0x0012 #define MBOX_WAKE_UP 0x0013 #define MBOX_STOP_FIRMWARE 0x0014 #define MBOX_ABORT 0x0015 #define MBOX_ABORT_DEVICE 0x0016 #define MBOX_ABORT_TARGET 0x0017 #define MBOX_BUS_RESET 0x0018 #define MBOX_STOP_QUEUE 0x0019 #define MBOX_START_QUEUE 0x001a #define MBOX_SINGLE_STEP_QUEUE 0x001b #define MBOX_ABORT_QUEUE 0x001c #define MBOX_GET_DEV_QUEUE_STATUS 0x001d /* 1e */ #define MBOX_GET_FIRMWARE_STATUS 0x001f #define MBOX_GET_INIT_SCSI_ID 0x0020 #define MBOX_GET_SELECT_TIMEOUT 0x0021 #define MBOX_GET_RETRY_COUNT 0x0022 #define MBOX_GET_TAG_AGE_LIMIT 0x0023 #define MBOX_GET_CLOCK_RATE 0x0024 #define MBOX_GET_ACT_NEG_STATE 0x0025 #define MBOX_GET_ASYNC_DATA_SETUP_TIME 0x0026 #define MBOX_GET_SBUS_PARAMS 0x0027 #define MBOX_GET_PCI_PARAMS MBOX_GET_SBUS_PARAMS #define MBOX_GET_TARGET_PARAMS 0x0028 #define MBOX_GET_DEV_QUEUE_PARAMS 0x0029 #define MBOX_GET_RESET_DELAY_PARAMS 0x002a /* 2b */ /* 2c */ /* 2d */ /* 2e */ /* 2f */ #define MBOX_SET_INIT_SCSI_ID 0x0030 #define MBOX_SET_SELECT_TIMEOUT 0x0031 #define MBOX_SET_RETRY_COUNT 0x0032 #define MBOX_SET_TAG_AGE_LIMIT 0x0033 #define MBOX_SET_CLOCK_RATE 0x0034 #define MBOX_SET_ACT_NEG_STATE 0x0035 #define MBOX_SET_ASYNC_DATA_SETUP_TIME 0x0036 #define MBOX_SET_SBUS_CONTROL_PARAMS 0x0037 #define MBOX_SET_PCI_PARAMETERS 0x0037 #define MBOX_SET_TARGET_PARAMS 0x0038 #define MBOX_SET_DEV_QUEUE_PARAMS 0x0039 #define MBOX_SET_RESET_DELAY_PARAMS 0x003a /* 3b */ /* 3c */ /* 3d */ /* 3e */ /* 3f */ #define MBOX_RETURN_BIOS_BLOCK_ADDR 0x0040 #define MBOX_WRITE_FOUR_RAM_WORDS 0x0041 #define MBOX_EXEC_BIOS_IOCB 0x0042 #define MBOX_SET_FW_FEATURES 0x004a #define MBOX_GET_FW_FEATURES 0x004b #define FW_FEATURE_FAST_POST 0x1 #define FW_FEATURE_LVD_NOTIFY 0x2 #define FW_FEATURE_RIO_32BIT 0x4 #define FW_FEATURE_RIO_16BIT 0x8 #define MBOX_INIT_REQ_QUEUE_A64 0x0052 #define MBOX_INIT_RES_QUEUE_A64 0x0053 #define MBOX_ENABLE_TARGET_MODE 0x0055 #define ENABLE_TARGET_FLAG 0x8000 #define ENABLE_TQING_FLAG 0x0004 #define ENABLE_MANDATORY_DISC 0x0002 #define MBOX_GET_TARGET_STATUS 0x0056 /* These are for the ISP2X00 FC cards */ #define MBOX_GET_LOOP_ID 0x0020 /* for 24XX cards, outgoing mailbox 7 has these values for F or FL topologies */ #define ISP24XX_INORDER 0x0100 #define ISP24XX_NPIV_SAN 0x0400 #define ISP24XX_VSAN_SAN 0x1000 #define ISP24XX_FC_SP_SAN 0x2000 #define MBOX_GET_FIRMWARE_OPTIONS 0x0028 #define MBOX_SET_FIRMWARE_OPTIONS 0x0038 #define MBOX_GET_RESOURCE_COUNT 0x0042 #define MBOX_REQUEST_OFFLINE_MODE 0x0043 #define MBOX_ENHANCED_GET_PDB 0x0047 #define MBOX_INIT_FIRMWARE_MULTI_ID 0x0048 /* 2400 only */ #define MBOX_GET_VP_DATABASE 0x0049 /* 2400 only */ #define MBOX_GET_VP_DATABASE_ENTRY 0x004a /* 2400 only */ #define MBOX_EXEC_COMMAND_IOCB_A64 0x0054 #define MBOX_INIT_FIRMWARE 0x0060 #define MBOX_GET_INIT_CONTROL_BLOCK 0x0061 #define MBOX_INIT_LIP 0x0062 #define MBOX_GET_FC_AL_POSITION_MAP 0x0063 #define MBOX_GET_PORT_DB 0x0064 #define MBOX_CLEAR_ACA 0x0065 #define MBOX_TARGET_RESET 0x0066 #define MBOX_CLEAR_TASK_SET 0x0067 #define MBOX_ABORT_TASK_SET 0x0068 #define MBOX_GET_FW_STATE 0x0069 #define MBOX_GET_PORT_NAME 0x006A #define MBOX_GET_LINK_STATUS 0x006B #define MBOX_INIT_LIP_RESET 0x006C #define MBOX_SEND_SNS 0x006E #define MBOX_FABRIC_LOGIN 0x006F #define MBOX_SEND_CHANGE_REQUEST 0x0070 #define MBOX_FABRIC_LOGOUT 0x0071 #define MBOX_INIT_LIP_LOGIN 0x0072 #define MBOX_GET_PORT_NODE_NAME_LIST 0x0075 #define MBOX_GET_ID_LIST 0x007C #define MBOX_LUN_RESET 0x007E #define MBOX_DRIVER_HEARTBEAT 0x005B #define MBOX_FW_HEARTBEAT 0x005C #define MBOX_GET_SET_DATA_RATE 0x005D /* 24XX/23XX only */ #define MBGSD_GET_RATE 0 #define MBGSD_SET_RATE 1 #define MBGSD_SET_RATE_NOW 2 /* 24XX only */ #define MBGSD_ONEGB 0 #define MBGSD_TWOGB 1 #define MBGSD_AUTO 2 #define MBGSD_FOURGB 3 /* 24XX only */ #define MBGSD_EIGHTGB 4 /* 25XX only */ #define ISP2100_SET_PCI_PARAM 0x00ff #define MBOX_BUSY 0x04 /* * Mailbox Command Complete Status Codes */ #define MBOX_COMMAND_COMPLETE 0x4000 #define MBOX_INVALID_COMMAND 0x4001 #define MBOX_HOST_INTERFACE_ERROR 0x4002 #define MBOX_TEST_FAILED 0x4003 #define MBOX_COMMAND_ERROR 0x4005 #define MBOX_COMMAND_PARAM_ERROR 0x4006 #define MBOX_PORT_ID_USED 0x4007 #define MBOX_LOOP_ID_USED 0x4008 #define MBOX_ALL_IDS_USED 0x4009 #define MBOX_NOT_LOGGED_IN 0x400A #define MBOX_LINK_DOWN_ERROR 0x400B #define MBOX_LOOPBACK_ERROR 0x400C #define MBOX_CHECKSUM_ERROR 0x4010 #define MBOX_INVALID_PRODUCT_KEY 0x4020 /* pseudo mailbox completion codes */ #define MBOX_REGS_BUSY 0x6000 /* registers in use */ #define MBOX_TIMEOUT 0x6001 /* command timed out */ #define MBLOGALL 0xffffffff #define MBLOGNONE 0x00000000 #define MBLOGMASK(x) (1 << (((x) - 1) & 0x1f)) /* * Asynchronous event status codes */ #define ASYNC_BUS_RESET 0x8001 #define ASYNC_SYSTEM_ERROR 0x8002 #define ASYNC_RQS_XFER_ERR 0x8003 #define ASYNC_RSP_XFER_ERR 0x8004 #define ASYNC_QWAKEUP 0x8005 #define ASYNC_TIMEOUT_RESET 0x8006 #define ASYNC_DEVICE_RESET 0x8007 #define ASYNC_EXTMSG_UNDERRUN 0x800A #define ASYNC_SCAM_INT 0x800B #define ASYNC_HUNG_SCSI 0x800C #define ASYNC_KILLED_BUS 0x800D #define ASYNC_BUS_TRANSIT 0x800E /* LVD -> HVD, eg. */ #define ASYNC_LIP_OCCURRED 0x8010 #define ASYNC_LOOP_UP 0x8011 #define ASYNC_LOOP_DOWN 0x8012 #define ASYNC_LOOP_RESET 0x8013 #define ASYNC_PDB_CHANGED 0x8014 #define ASYNC_CHANGE_NOTIFY 0x8015 #define ASYNC_LIP_F8 0x8016 #define ASYNC_LIP_ERROR 0x8017 #define ASYNC_SECURITY_UPDATE 0x801B #define ASYNC_CMD_CMPLT 0x8020 #define ASYNC_CTIO_DONE 0x8021 #define ASYNC_RIO32_1 0x8021 #define ASYNC_RIO32_2 0x8022 #define ASYNC_IP_XMIT_DONE 0x8022 #define ASYNC_IP_RECV_DONE 0x8023 #define ASYNC_IP_BROADCAST 0x8024 #define ASYNC_IP_RCVQ_LOW 0x8025 #define ASYNC_IP_RCVQ_EMPTY 0x8026 #define ASYNC_IP_RECV_DONE_ALIGNED 0x8027 #define ASYNC_PTPMODE 0x8030 #define ASYNC_RIO16_1 0x8031 #define ASYNC_RIO16_2 0x8032 #define ASYNC_RIO16_3 0x8033 #define ASYNC_RIO16_4 0x8034 #define ASYNC_RIO16_5 0x8035 #define ASYNC_CONNMODE 0x8036 #define ISP_CONN_LOOP 1 #define ISP_CONN_PTP 2 #define ISP_CONN_BADLIP 3 #define ISP_CONN_FATAL 4 #define ISP_CONN_LOOPBACK 5 #define ASYNC_RIOZIO_STALL 0x8040 /* there's a RIO/ZIO entry that hasn't been serviced */ #define ASYNC_RIO32_2_2200 0x8042 /* same as ASYNC_RIO32_2, but for 2100/2200 */ #define ASYNC_RCV_ERR 0x8048 /* * Firmware Options. There are a lot of them. * * IFCOPTN - ISP Fibre Channel Option Word N */ #define IFCOPT1_EQFQASYNC (1 << 13) /* enable QFULL notification */ #define IFCOPT1_EAABSRCVD (1 << 12) #define IFCOPT1_RJTASYNC (1 << 11) /* enable 8018 notification */ #define IFCOPT1_ENAPURE (1 << 10) #define IFCOPT1_ENA8017 (1 << 7) #define IFCOPT1_DISGPIO67 (1 << 6) #define IFCOPT1_LIPLOSSIMM (1 << 5) #define IFCOPT1_DISF7SWTCH (1 << 4) #define IFCOPT1_CTIO_RETRY (1 << 3) #define IFCOPT1_LIPASYNC (1 << 1) #define IFCOPT1_LIPF8 (1 << 0) #define IFCOPT2_LOOPBACK (1 << 1) #define IFCOPT2_ATIO3_ONLY (1 << 0) #define IFCOPT3_NOPRLI (1 << 4) /* disable automatic sending of PRLI on local loops */ #define IFCOPT3_RNDASYNC (1 << 1) /* * 2.01.31 2200 Only. Need Bit 13 in Mailbox 1 for Set Firmware Options * mailbox command to enable this. */ #define ASYNC_QFULL_SENT 0x8049 /* * Needs to be enabled */ #define ASYNC_AUTO_PLOGI_RJT 0x8018 /* * 24XX only */ #define ASYNC_RJT_SENT 0x8049 /* * All IOCB Queue entries are this size */ #define QENTRY_LEN 64 /* * Command Structure Definitions */ typedef struct { uint32_t ds_base; uint32_t ds_count; } ispds_t; typedef struct { uint32_t ds_base; uint32_t ds_basehi; uint32_t ds_count; } ispds64_t; #define DSTYPE_32BIT 0 #define DSTYPE_64BIT 1 typedef struct { uint16_t ds_type; /* 0-> ispds_t, 1-> ispds64_t */ uint32_t ds_segment; /* unused */ uint32_t ds_base; /* 32 bit address of DSD list */ } ispdslist_t; typedef struct { uint8_t rqs_entry_type; uint8_t rqs_entry_count; uint8_t rqs_seqno; uint8_t rqs_flags; } isphdr_t; /* RQS Flag definitions */ #define RQSFLAG_CONTINUATION 0x01 #define RQSFLAG_FULL 0x02 #define RQSFLAG_BADHEADER 0x04 #define RQSFLAG_BADPACKET 0x08 #define RQSFLAG_BADCOUNT 0x10 #define RQSFLAG_BADORDER 0x20 #define RQSFLAG_MASK 0x3f /* RQS entry_type definitions */ #define RQSTYPE_REQUEST 0x01 #define RQSTYPE_DATASEG 0x02 #define RQSTYPE_RESPONSE 0x03 #define RQSTYPE_MARKER 0x04 #define RQSTYPE_CMDONLY 0x05 #define RQSTYPE_ATIO 0x06 /* Target Mode */ #define RQSTYPE_CTIO 0x07 /* Target Mode */ #define RQSTYPE_SCAM 0x08 #define RQSTYPE_A64 0x09 #define RQSTYPE_A64_CONT 0x0a #define RQSTYPE_ENABLE_LUN 0x0b /* Target Mode */ #define RQSTYPE_MODIFY_LUN 0x0c /* Target Mode */ #define RQSTYPE_NOTIFY 0x0d /* Target Mode */ #define RQSTYPE_NOTIFY_ACK 0x0e /* Target Mode */ #define RQSTYPE_CTIO1 0x0f /* Target Mode */ #define RQSTYPE_STATUS_CONT 0x10 #define RQSTYPE_T2RQS 0x11 #define RQSTYPE_CTIO7 0x12 #define RQSTYPE_IP_XMIT 0x13 #define RQSTYPE_TSK_MGMT 0x14 #define RQSTYPE_T4RQS 0x15 #define RQSTYPE_ATIO2 0x16 /* Target Mode */ #define RQSTYPE_CTIO2 0x17 /* Target Mode */ #define RQSTYPE_T7RQS 0x18 #define RQSTYPE_T3RQS 0x19 #define RQSTYPE_IP_XMIT_64 0x1b #define RQSTYPE_CTIO4 0x1e /* Target Mode */ #define RQSTYPE_CTIO3 0x1f /* Target Mode */ #define RQSTYPE_RIO1 0x21 #define RQSTYPE_RIO2 0x22 #define RQSTYPE_IP_RECV 0x23 #define RQSTYPE_IP_RECV_CONT 0x24 #define RQSTYPE_CT_PASSTHRU 0x29 #define RQSTYPE_MS_PASSTHRU 0x29 #define RQSTYPE_VP_CTRL 0x30 /* 24XX only */ #define RQSTYPE_VP_MODIFY 0x31 /* 24XX only */ #define RQSTYPE_RPT_ID_ACQ 0x32 /* 24XX only */ #define RQSTYPE_ABORT_IO 0x33 #define RQSTYPE_T6RQS 0x48 #define RQSTYPE_LOGIN 0x52 #define RQSTYPE_ABTS_RCVD 0x54 /* 24XX only */ #define RQSTYPE_ABTS_RSP 0x55 /* 24XX only */ #define ISP_RQDSEG 4 typedef struct { isphdr_t req_header; uint32_t req_handle; uint8_t req_lun_trn; uint8_t req_target; uint16_t req_cdblen; uint16_t req_flags; uint16_t req_reserved; uint16_t req_time; uint16_t req_seg_count; uint8_t req_cdb[12]; ispds_t req_dataseg[ISP_RQDSEG]; } ispreq_t; #define ISP_RQDSEG_A64 2 typedef struct { isphdr_t mrk_header; uint32_t mrk_handle; uint8_t mrk_reserved0; uint8_t mrk_target; uint16_t mrk_modifier; uint16_t mrk_flags; uint16_t mrk_lun; uint8_t mrk_reserved1[48]; } isp_marker_t; typedef struct { isphdr_t mrk_header; uint32_t mrk_handle; uint16_t mrk_nphdl; uint8_t mrk_modifier; uint8_t mrk_reserved0; uint8_t mrk_reserved1; uint8_t mrk_vphdl; uint16_t mrk_reserved2; uint8_t mrk_lun[8]; uint8_t mrk_reserved3[40]; } isp_marker_24xx_t; #define SYNC_DEVICE 0 #define SYNC_TARGET 1 #define SYNC_ALL 2 #define SYNC_LIP 3 #define ISP_RQDSEG_T2 3 typedef struct { isphdr_t req_header; uint32_t req_handle; uint8_t req_lun_trn; uint8_t req_target; uint16_t req_scclun; uint16_t req_flags; uint8_t req_crn; uint8_t req_reserved; uint16_t req_time; uint16_t req_seg_count; uint8_t req_cdb[16]; uint32_t req_totalcnt; ispds_t req_dataseg[ISP_RQDSEG_T2]; } ispreqt2_t; typedef struct { isphdr_t req_header; uint32_t req_handle; uint16_t req_target; uint16_t req_scclun; uint16_t req_flags; uint16_t req_reserved; uint16_t req_time; uint16_t req_seg_count; uint8_t req_cdb[16]; uint32_t req_totalcnt; ispds_t req_dataseg[ISP_RQDSEG_T2]; } ispreqt2e_t; #define ISP_RQDSEG_T3 2 typedef struct { isphdr_t req_header; uint32_t req_handle; uint8_t req_lun_trn; uint8_t req_target; uint16_t req_scclun; uint16_t req_flags; uint8_t req_crn; uint8_t req_reserved; uint16_t req_time; uint16_t req_seg_count; uint8_t req_cdb[16]; uint32_t req_totalcnt; ispds64_t req_dataseg[ISP_RQDSEG_T3]; } ispreqt3_t; #define ispreq64_t ispreqt3_t /* same as.... */ typedef struct { isphdr_t req_header; uint32_t req_handle; uint16_t req_target; uint16_t req_scclun; uint16_t req_flags; uint8_t req_crn; uint8_t req_reserved; uint16_t req_time; uint16_t req_seg_count; uint8_t req_cdb[16]; uint32_t req_totalcnt; ispds64_t req_dataseg[ISP_RQDSEG_T3]; } ispreqt3e_t; /* req_flag values */ #define REQFLAG_NODISCON 0x0001 #define REQFLAG_HTAG 0x0002 #define REQFLAG_OTAG 0x0004 #define REQFLAG_STAG 0x0008 #define REQFLAG_TARGET_RTN 0x0010 #define REQFLAG_NODATA 0x0000 #define REQFLAG_DATA_IN 0x0020 #define REQFLAG_DATA_OUT 0x0040 #define REQFLAG_DATA_UNKNOWN 0x0060 #define REQFLAG_DISARQ 0x0100 #define REQFLAG_FRC_ASYNC 0x0200 #define REQFLAG_FRC_SYNC 0x0400 #define REQFLAG_FRC_WIDE 0x0800 #define REQFLAG_NOPARITY 0x1000 #define REQFLAG_STOPQ 0x2000 #define REQFLAG_XTRASNS 0x4000 #define REQFLAG_PRIORITY 0x8000 typedef struct { isphdr_t req_header; uint32_t req_handle; uint8_t req_lun_trn; uint8_t req_target; uint16_t req_cdblen; uint16_t req_flags; uint16_t req_reserved; uint16_t req_time; uint16_t req_seg_count; uint8_t req_cdb[44]; } ispextreq_t; /* * ISP24XX structures */ typedef struct { isphdr_t req_header; uint32_t req_handle; uint16_t req_nphdl; uint16_t req_time; uint16_t req_seg_count; uint16_t req_reserved; uint8_t req_lun[8]; uint8_t req_alen_datadir; uint8_t req_task_management; uint8_t req_task_attribute; uint8_t req_crn; uint8_t req_cdb[16]; uint32_t req_dl; uint16_t req_tidlo; uint8_t req_tidhi; uint8_t req_vpidx; ispds64_t req_dataseg; } ispreqt7_t; /* Task Management Request Function */ typedef struct { isphdr_t tmf_header; uint32_t tmf_handle; uint16_t tmf_nphdl; uint8_t tmf_reserved0[2]; uint16_t tmf_delay; uint16_t tmf_timeout; uint8_t tmf_lun[8]; uint32_t tmf_flags; uint8_t tmf_reserved1[20]; uint16_t tmf_tidlo; uint8_t tmf_tidhi; uint8_t tmf_vpidx; uint8_t tmf_reserved2[12]; } isp24xx_tmf_t; #define ISP24XX_TMF_NOSEND 0x80000000 #define ISP24XX_TMF_LUN_RESET 0x00000010 #define ISP24XX_TMF_ABORT_TASK_SET 0x00000008 #define ISP24XX_TMF_CLEAR_TASK_SET 0x00000004 #define ISP24XX_TMF_TARGET_RESET 0x00000002 #define ISP24XX_TMF_CLEAR_ACA 0x00000001 /* I/O Abort Structure */ typedef struct { isphdr_t abrt_header; uint32_t abrt_handle; uint16_t abrt_nphdl; uint16_t abrt_options; uint32_t abrt_cmd_handle; uint16_t abrt_queue_number; uint8_t abrt_reserved[30]; uint16_t abrt_tidlo; uint8_t abrt_tidhi; uint8_t abrt_vpidx; uint8_t abrt_reserved1[12]; } isp24xx_abrt_t; #define ISP24XX_ABRT_NOSEND 0x01 /* don't actually send ABTS */ #define ISP24XX_ABRT_OKAY 0x00 /* in nphdl on return */ #define ISP24XX_ABRT_ENXIO 0x31 /* in nphdl on return */ #define ISP_CDSEG 7 typedef struct { isphdr_t req_header; uint32_t req_reserved; ispds_t req_dataseg[ISP_CDSEG]; } ispcontreq_t; #define ISP_CDSEG64 5 typedef struct { isphdr_t req_header; ispds64_t req_dataseg[ISP_CDSEG64]; } ispcontreq64_t; typedef struct { isphdr_t req_header; uint32_t req_handle; uint16_t req_scsi_status; uint16_t req_completion_status; uint16_t req_state_flags; uint16_t req_status_flags; uint16_t req_time; #define req_response_len req_time /* FC only */ uint16_t req_sense_len; uint32_t req_resid; uint8_t req_response[8]; /* FC only */ uint8_t req_sense_data[32]; } ispstatusreq_t; /* * Status Continuation */ typedef struct { isphdr_t req_header; uint8_t req_sense_data[60]; } ispstatus_cont_t; /* * 24XX Type 0 status */ typedef struct { isphdr_t req_header; uint32_t req_handle; uint16_t req_completion_status; uint16_t req_oxid; uint32_t req_resid; uint16_t req_reserved0; uint16_t req_state_flags; uint16_t req_retry_delay; /* aka Status Qualifier */ uint16_t req_scsi_status; uint32_t req_fcp_residual; uint32_t req_sense_len; uint32_t req_response_len; uint8_t req_rsp_sense[28]; } isp24xx_statusreq_t; /* * For Qlogic 2X00, the high order byte of SCSI status has * additional meaning. */ #define RQCS_CR 0x1000 /* Confirmation Request */ #define RQCS_RU 0x0800 /* Residual Under */ #define RQCS_RO 0x0400 /* Residual Over */ #define RQCS_RESID (RQCS_RU|RQCS_RO) #define RQCS_SV 0x0200 /* Sense Length Valid */ #define RQCS_RV 0x0100 /* FCP Response Length Valid */ /* * CT Passthru IOCB */ typedef struct { isphdr_t ctp_header; uint32_t ctp_handle; uint16_t ctp_status; uint16_t ctp_nphdl; /* n-port handle */ uint16_t ctp_cmd_cnt; /* Command DSD count */ uint8_t ctp_vpidx; uint8_t ctp_reserved0; uint16_t ctp_time; uint16_t ctp_reserved1; uint16_t ctp_rsp_cnt; /* Response DSD count */ uint16_t ctp_reserved2[5]; uint32_t ctp_rsp_bcnt; /* Response byte count */ uint32_t ctp_cmd_bcnt; /* Command byte count */ ispds64_t ctp_dataseg[2]; } isp_ct_pt_t; /* * MS Passthru IOCB */ typedef struct { isphdr_t ms_header; uint32_t ms_handle; uint16_t ms_nphdl; /* handle in high byte for !2k f/w */ uint16_t ms_status; uint16_t ms_flags; uint16_t ms_reserved1; /* low 8 bits */ uint16_t ms_time; uint16_t ms_cmd_cnt; /* Command DSD count */ uint16_t ms_tot_cnt; /* Total DSD Count */ uint8_t ms_type; /* MS type */ uint8_t ms_r_ctl; /* R_CTL */ uint16_t ms_rxid; /* RX_ID */ uint16_t ms_reserved2; uint32_t ms_handle2; uint32_t ms_rsp_bcnt; /* Response byte count */ uint32_t ms_cmd_bcnt; /* Command byte count */ ispds64_t ms_dataseg[2]; } isp_ms_t; /* * Completion Status Codes. */ #define RQCS_COMPLETE 0x0000 #define RQCS_DMA_ERROR 0x0002 #define RQCS_RESET_OCCURRED 0x0004 #define RQCS_ABORTED 0x0005 #define RQCS_TIMEOUT 0x0006 #define RQCS_DATA_OVERRUN 0x0007 #define RQCS_DATA_UNDERRUN 0x0015 #define RQCS_QUEUE_FULL 0x001C /* 1X00 Only Completion Codes */ #define RQCS_INCOMPLETE 0x0001 #define RQCS_TRANSPORT_ERROR 0x0003 #define RQCS_COMMAND_OVERRUN 0x0008 #define RQCS_STATUS_OVERRUN 0x0009 #define RQCS_BAD_MESSAGE 0x000a #define RQCS_NO_MESSAGE_OUT 0x000b #define RQCS_EXT_ID_FAILED 0x000c #define RQCS_IDE_MSG_FAILED 0x000d #define RQCS_ABORT_MSG_FAILED 0x000e #define RQCS_REJECT_MSG_FAILED 0x000f #define RQCS_NOP_MSG_FAILED 0x0010 #define RQCS_PARITY_ERROR_MSG_FAILED 0x0011 #define RQCS_DEVICE_RESET_MSG_FAILED 0x0012 #define RQCS_ID_MSG_FAILED 0x0013 #define RQCS_UNEXP_BUS_FREE 0x0014 #define RQCS_XACT_ERR1 0x0018 #define RQCS_XACT_ERR2 0x0019 #define RQCS_XACT_ERR3 0x001A #define RQCS_BAD_ENTRY 0x001B #define RQCS_PHASE_SKIPPED 0x001D #define RQCS_ARQS_FAILED 0x001E #define RQCS_WIDE_FAILED 0x001F #define RQCS_SYNCXFER_FAILED 0x0020 #define RQCS_LVD_BUSERR 0x0021 /* 2X00 Only Completion Codes */ #define RQCS_PORT_UNAVAILABLE 0x0028 #define RQCS_PORT_LOGGED_OUT 0x0029 #define RQCS_PORT_CHANGED 0x002A #define RQCS_PORT_BUSY 0x002B /* 24XX Only Completion Codes */ #define RQCS_24XX_DRE 0x0011 /* data reassembly error */ #define RQCS_24XX_TABORT 0x0013 /* aborted by target */ #define RQCS_24XX_ENOMEM 0x002C /* f/w resource unavailable */ #define RQCS_24XX_TMO 0x0030 /* task management overrun */ /* * 1X00 specific State Flags */ #define RQSF_GOT_BUS 0x0100 #define RQSF_GOT_TARGET 0x0200 #define RQSF_SENT_CDB 0x0400 #define RQSF_XFRD_DATA 0x0800 #define RQSF_GOT_STATUS 0x1000 #define RQSF_GOT_SENSE 0x2000 #define RQSF_XFER_COMPLETE 0x4000 /* * 2X00 specific State Flags * (same as 1X00 except RQSF_GOT_BUS/RQSF_GOT_TARGET are not available) */ #define RQSF_DATA_IN 0x0020 #define RQSF_DATA_OUT 0x0040 #define RQSF_STAG 0x0008 #define RQSF_OTAG 0x0004 #define RQSF_HTAG 0x0002 /* * 1X00 Status Flags */ #define RQSTF_DISCONNECT 0x0001 #define RQSTF_SYNCHRONOUS 0x0002 #define RQSTF_PARITY_ERROR 0x0004 #define RQSTF_BUS_RESET 0x0008 #define RQSTF_DEVICE_RESET 0x0010 #define RQSTF_ABORTED 0x0020 #define RQSTF_TIMEOUT 0x0040 #define RQSTF_NEGOTIATION 0x0080 /* * 2X00 specific state flags */ /* RQSF_SENT_CDB */ /* RQSF_XFRD_DATA */ /* RQSF_GOT_STATUS */ /* RQSF_XFER_COMPLETE */ /* * 2X00 specific status flags */ /* RQSTF_ABORTED */ /* RQSTF_TIMEOUT */ #define RQSTF_DMA_ERROR 0x0080 #define RQSTF_LOGOUT 0x2000 /* * Miscellaneous */ #ifndef ISP_EXEC_THROTTLE #define ISP_EXEC_THROTTLE 16 #endif /* * About Firmware returns an 'attribute' word in mailbox 6. * These attributes are for 2200 and 2300. */ #define ISP_FW_ATTR_TMODE 0x0001 #define ISP_FW_ATTR_SCCLUN 0x0002 #define ISP_FW_ATTR_FABRIC 0x0004 #define ISP_FW_ATTR_CLASS2 0x0008 #define ISP_FW_ATTR_FCTAPE 0x0010 #define ISP_FW_ATTR_IP 0x0020 #define ISP_FW_ATTR_VI 0x0040 #define ISP_FW_ATTR_VI_SOLARIS 0x0080 #define ISP_FW_ATTR_2KLOGINS 0x0100 /* just a guess... */ /* and these are for the 2400 */ #define ISP2400_FW_ATTR_CLASS2 0x0001 #define ISP2400_FW_ATTR_IP 0x0002 #define ISP2400_FW_ATTR_MULTIID 0x0004 #define ISP2400_FW_ATTR_SB2 0x0008 #define ISP2400_FW_ATTR_T10CRC 0x0010 #define ISP2400_FW_ATTR_VI 0x0020 #define ISP2400_FW_ATTR_MQ 0x0040 #define ISP2400_FW_ATTR_MSIX 0x0080 #define ISP2400_FW_ATTR_FCOE 0x0800 #define ISP2400_FW_ATTR_VP0 0x1000 #define ISP2400_FW_ATTR_EXPFW 0x2000 #define ISP2400_FW_ATTR_HOTFW 0x4000 #define ISP2400_FW_ATTR_EXTNDED 0x8000 #define ISP2400_FW_ATTR_EXTVP 0x00010000 #define ISP2400_FW_ATTR_VN2VN 0x00040000 #define ISP2400_FW_ATTR_EXMOFF 0x00080000 #define ISP2400_FW_ATTR_NPMOFF 0x00100000 #define ISP2400_FW_ATTR_DIFCHOP 0x00400000 #define ISP2400_FW_ATTR_SRIOV 0x02000000 #define ISP2400_FW_ATTR_ASICTMP 0x0200000000 #define ISP2400_FW_ATTR_ATIOMQ 0x0400000000 /* * These are either manifestly true or are dependent on f/w attributes */ #define ISP_CAP_TMODE(isp) \ (IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_TMODE)) #define ISP_CAP_SCCFW(isp) \ (IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_SCCLUN)) #define ISP_CAP_2KLOGIN(isp) \ (IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_2KLOGINS)) /* * This is only true for 24XX cards with this f/w attribute */ #define ISP_CAP_MULTI_ID(isp) \ (IS_24XX(isp)? (isp->isp_fwattr & ISP2400_FW_ATTR_MULTIID) : 0) #define ISP_GET_VPIDX(isp, tag) \ (ISP_CAP_MULTI_ID(isp) ? tag : 0) #define ISP_CAP_VP0(isp) \ (IS_24XX(isp)? (isp->isp_fwattr & ISP2400_FW_ATTR_VP0) : 0) /* * This is true manifestly or is dependent on a f/w attribute * but may or may not actually be *enabled*. In any case, it * is enabled on a per-channel basis. */ #define ISP_CAP_FCTAPE(isp) \ (IS_24XX(isp)? 1 : (isp->isp_fwattr & ISP_FW_ATTR_FCTAPE)) #define ISP_FCTAPE_ENABLED(isp, chan) \ (IS_24XX(isp)? (FCPARAM(isp, chan)->isp_xfwoptions & ICB2400_OPT2_FCTAPE) != 0 : (FCPARAM(isp, chan)->isp_xfwoptions & ICBXOPT_FCTAPE) != 0) /* * Reduced Interrupt Operation Response Queue Entries */ typedef struct { isphdr_t req_header; uint32_t req_handles[15]; } isp_rio1_t; typedef struct { isphdr_t req_header; uint16_t req_handles[30]; } isp_rio2_t; /* * FC (ISP2100/ISP2200/ISP2300/ISP2400) specific data structures */ /* * Initialization Control Block * * Version One (prime) format. */ typedef struct { uint8_t icb_version; uint8_t icb_reserved0; uint16_t icb_fwoptions; uint16_t icb_maxfrmlen; uint16_t icb_maxalloc; uint16_t icb_execthrottle; uint8_t icb_retry_count; uint8_t icb_retry_delay; uint8_t icb_portname[8]; uint16_t icb_hardaddr; uint8_t icb_iqdevtype; uint8_t icb_logintime; uint8_t icb_nodename[8]; uint16_t icb_rqstout; uint16_t icb_rspnsin; uint16_t icb_rqstqlen; uint16_t icb_rsltqlen; uint16_t icb_rqstaddr[4]; uint16_t icb_respaddr[4]; uint16_t icb_lunenables; uint8_t icb_ccnt; uint8_t icb_icnt; uint16_t icb_lunetimeout; uint16_t icb_reserved1; uint16_t icb_xfwoptions; uint8_t icb_racctimer; uint8_t icb_idelaytimer; uint16_t icb_zfwoptions; uint16_t icb_reserved2[13]; } isp_icb_t; #define ICB_VERSION1 1 #define ICBOPT_EXTENDED 0x8000 #define ICBOPT_BOTH_WWNS 0x4000 #define ICBOPT_FULL_LOGIN 0x2000 #define ICBOPT_STOP_ON_QFULL 0x1000 /* 2200/2100 only */ #define ICBOPT_PREV_ADDRESS 0x0800 #define ICBOPT_SRCHDOWN 0x0400 #define ICBOPT_NOLIP 0x0200 #define ICBOPT_PDBCHANGE_AE 0x0100 #define ICBOPT_TGT_TYPE 0x0080 #define ICBOPT_INI_ADISC 0x0040 #define ICBOPT_INI_DISABLE 0x0020 #define ICBOPT_TGT_ENABLE 0x0010 #define ICBOPT_FAST_POST 0x0008 #define ICBOPT_FULL_DUPLEX 0x0004 #define ICBOPT_FAIRNESS 0x0002 #define ICBOPT_HARD_ADDRESS 0x0001 #define ICBXOPT_NO_LOGOUT 0x8000 /* no logout on link failure */ #define ICBXOPT_FCTAPE_CCQ 0x4000 /* FC-Tape Command Queueing */ #define ICBXOPT_FCTAPE_CONFIRM 0x2000 #define ICBXOPT_FCTAPE 0x1000 #define ICBXOPT_CLASS2_ACK0 0x0200 #define ICBXOPT_CLASS2 0x0100 #define ICBXOPT_NO_PLAY 0x0080 /* don't play if can't get hard addr */ #define ICBXOPT_TOPO_MASK 0x0070 #define ICBXOPT_LOOP_ONLY 0x0000 #define ICBXOPT_PTP_ONLY 0x0010 #define ICBXOPT_LOOP_2_PTP 0x0020 #define ICBXOPT_PTP_2_LOOP 0x0030 /* * The lower 4 bits of the xfwoptions field are the OPERATION MODE bits. * RIO is not defined for the 23XX cards (just 2200) */ #define ICBXOPT_RIO_OFF 0 #define ICBXOPT_RIO_16BIT 1 #define ICBXOPT_RIO_32BIT 2 #define ICBXOPT_RIO_16BIT_IOCB 3 #define ICBXOPT_RIO_32BIT_IOCB 4 #define ICBXOPT_ZIO 5 #define ICBXOPT_TIMER_MASK 0x7 #define ICBZOPT_RATE_MASK 0xC000 #define ICBZOPT_RATE_ONEGB 0x0000 #define ICBZOPT_RATE_AUTO 0x8000 #define ICBZOPT_RATE_TWOGB 0x4000 #define ICBZOPT_50_OHM 0x2000 #define ICBZOPT_ENA_OOF 0x0040 /* out of order frame handling */ #define ICBZOPT_RSPSZ_MASK 0x0030 #define ICBZOPT_RSPSZ_24 0x0000 #define ICBZOPT_RSPSZ_12 0x0010 #define ICBZOPT_RSPSZ_24A 0x0020 #define ICBZOPT_RSPSZ_32 0x0030 #define ICBZOPT_SOFTID 0x0002 #define ICBZOPT_ENA_RDXFR_RDY 0x0001 /* 2400 F/W options */ #define ICB2400_OPT1_BOTH_WWNS 0x00004000 #define ICB2400_OPT1_FULL_LOGIN 0x00002000 #define ICB2400_OPT1_PREV_ADDRESS 0x00000800 #define ICB2400_OPT1_SRCHDOWN 0x00000400 #define ICB2400_OPT1_NOLIP 0x00000200 #define ICB2400_OPT1_INI_DISABLE 0x00000020 #define ICB2400_OPT1_TGT_ENABLE 0x00000010 #define ICB2400_OPT1_FULL_DUPLEX 0x00000004 #define ICB2400_OPT1_FAIRNESS 0x00000002 #define ICB2400_OPT1_HARD_ADDRESS 0x00000001 #define ICB2400_OPT2_ENA_ATIOMQ 0x08000000 #define ICB2400_OPT2_ENA_IHA 0x04000000 #define ICB2400_OPT2_QOS 0x02000000 #define ICB2400_OPT2_IOCBS 0x01000000 #define ICB2400_OPT2_ENA_IHR 0x00400000 #define ICB2400_OPT2_ENA_VMS 0x00200000 #define ICB2400_OPT2_ENA_TA 0x00100000 #define ICB2400_OPT2_TPRLIC 0x00004000 #define ICB2400_OPT2_FCTAPE 0x00001000 #define ICB2400_OPT2_FCSP 0x00000800 #define ICB2400_OPT2_CLASS2_ACK0 0x00000200 #define ICB2400_OPT2_CLASS2 0x00000100 #define ICB2400_OPT2_NO_PLAY 0x00000080 #define ICB2400_OPT2_TOPO_MASK 0x00000070 #define ICB2400_OPT2_LOOP_ONLY 0x00000000 #define ICB2400_OPT2_PTP_ONLY 0x00000010 #define ICB2400_OPT2_LOOP_2_PTP 0x00000020 #define ICB2400_OPT2_TIMER_MASK 0x0000000f #define ICB2400_OPT2_ZIO 0x00000005 #define ICB2400_OPT2_ZIO1 0x00000006 #define ICB2400_OPT3_NO_CTXDIS 0x40000000 #define ICB2400_OPT3_ENA_ETH_RESP 0x08000000 #define ICB2400_OPT3_ENA_ETH_ATIO 0x04000000 #define ICB2400_OPT3_ENA_MFCF 0x00020000 #define ICB2400_OPT3_SKIP_FOURGB 0x00010000 #define ICB2400_OPT3_RATE_MASK 0x0000E000 #define ICB2400_OPT3_RATE_ONEGB 0x00000000 #define ICB2400_OPT3_RATE_TWOGB 0x00002000 #define ICB2400_OPT3_RATE_AUTO 0x00004000 #define ICB2400_OPT3_RATE_FOURGB 0x00006000 #define ICB2400_OPT3_RATE_EIGHTGB 0x00008000 #define ICB2400_OPT3_RATE_SIXTEENGB 0x0000A000 #define ICB2400_OPT3_ENA_OOF_XFRDY 0x00000200 #define ICB2400_OPT3_NO_N2N_LOGI 0x00000100 #define ICB2400_OPT3_NO_LOCAL_PLOGI 0x00000080 #define ICB2400_OPT3_ENA_OOF 0x00000040 /* note that a response size flag of zero is reserved! */ #define ICB2400_OPT3_RSPSZ_MASK 0x00000030 #define ICB2400_OPT3_RSPSZ_12 0x00000010 #define ICB2400_OPT3_RSPSZ_24 0x00000020 #define ICB2400_OPT3_RSPSZ_32 0x00000030 #define ICB2400_OPT3_SOFTID 0x00000002 #define ICB_MIN_FRMLEN 256 #define ICB_MAX_FRMLEN 2112 #define ICB_DFLT_FRMLEN 1024 #define ICB_DFLT_ALLOC 256 #define ICB_DFLT_THROTTLE 16 #define ICB_DFLT_RDELAY 5 #define ICB_DFLT_RCOUNT 3 #define ICB_LOGIN_TOV 30 #define ICB_LUN_ENABLE_TOV 15 /* * And somebody at QLogic had a great idea that you could just change * the structure *and* keep the version number the same as the other cards. */ typedef struct { uint16_t icb_version; uint16_t icb_reserved0; uint16_t icb_maxfrmlen; uint16_t icb_execthrottle; uint16_t icb_xchgcnt; uint16_t icb_hardaddr; uint8_t icb_portname[8]; uint8_t icb_nodename[8]; uint16_t icb_rspnsin; uint16_t icb_rqstout; uint16_t icb_retry_count; uint16_t icb_priout; uint16_t icb_rsltqlen; uint16_t icb_rqstqlen; uint16_t icb_ldn_nols; uint16_t icb_prqstqlen; uint16_t icb_rqstaddr[4]; uint16_t icb_respaddr[4]; uint16_t icb_priaddr[4]; uint16_t icb_msixresp; uint16_t icb_msixatio; uint16_t icb_reserved1[2]; uint16_t icb_atio_in; uint16_t icb_atioqlen; uint16_t icb_atioqaddr[4]; uint16_t icb_idelaytimer; uint16_t icb_logintime; uint32_t icb_fwoptions1; uint32_t icb_fwoptions2; uint32_t icb_fwoptions3; uint16_t icb_qos; uint16_t icb_reserved2[3]; uint16_t icb_enodemac[3]; uint16_t icb_disctime; uint16_t icb_reserved3[4]; } isp_icb_2400_t; #define RQRSP_ADDR0015 0 #define RQRSP_ADDR1631 1 #define RQRSP_ADDR3247 2 #define RQRSP_ADDR4863 3 #define ICB_NNM0 7 #define ICB_NNM1 6 #define ICB_NNM2 5 #define ICB_NNM3 4 #define ICB_NNM4 3 #define ICB_NNM5 2 #define ICB_NNM6 1 #define ICB_NNM7 0 #define MAKE_NODE_NAME_FROM_WWN(array, wwn) \ array[ICB_NNM0] = (uint8_t) ((wwn >> 0) & 0xff), \ array[ICB_NNM1] = (uint8_t) ((wwn >> 8) & 0xff), \ array[ICB_NNM2] = (uint8_t) ((wwn >> 16) & 0xff), \ array[ICB_NNM3] = (uint8_t) ((wwn >> 24) & 0xff), \ array[ICB_NNM4] = (uint8_t) ((wwn >> 32) & 0xff), \ array[ICB_NNM5] = (uint8_t) ((wwn >> 40) & 0xff), \ array[ICB_NNM6] = (uint8_t) ((wwn >> 48) & 0xff), \ array[ICB_NNM7] = (uint8_t) ((wwn >> 56) & 0xff) #define MAKE_WWN_FROM_NODE_NAME(wwn, array) \ wwn = ((uint64_t) array[ICB_NNM0]) | \ ((uint64_t) array[ICB_NNM1] << 8) | \ ((uint64_t) array[ICB_NNM2] << 16) | \ ((uint64_t) array[ICB_NNM3] << 24) | \ ((uint64_t) array[ICB_NNM4] << 32) | \ ((uint64_t) array[ICB_NNM5] << 40) | \ ((uint64_t) array[ICB_NNM6] << 48) | \ ((uint64_t) array[ICB_NNM7] << 56) /* * For MULTI_ID firmware, this describes a * virtual port entity for getting status. */ typedef struct { uint16_t vp_port_status; uint8_t vp_port_options; uint8_t vp_port_loopid; uint8_t vp_port_portname[8]; uint8_t vp_port_nodename[8]; uint16_t vp_port_portid_lo; /* not present when trailing icb */ uint16_t vp_port_portid_hi; /* not present when trailing icb */ } vp_port_info_t; #define ICB2400_VPOPT_ENA_SNSLOGIN 0x00000040 /* Enable SNS Login and SCR for Virtual Ports */ #define ICB2400_VPOPT_TGT_DISABLE 0x00000020 /* Target Mode Disabled */ #define ICB2400_VPOPT_INI_ENABLE 0x00000010 /* Initiator Mode Enabled */ #define ICB2400_VPOPT_ENABLED 0x00000008 /* VP Enabled */ #define ICB2400_VPOPT_NOPLAY 0x00000004 /* ID Not Acquired */ #define ICB2400_VPOPT_PREV_ADDRESS 0x00000002 /* Previously Assigned ID */ #define ICB2400_VPOPT_HARD_ADDRESS 0x00000001 /* Hard Assigned ID */ #define ICB2400_VPOPT_WRITE_SIZE 20 /* * For MULTI_ID firmware, we append this structure * to the isp_icb_2400_t above, followed by a list * structures that are *most* of the vp_port_info_t. */ typedef struct { uint16_t vp_count; uint16_t vp_global_options; } isp_icb_2400_vpinfo_t; #define ICB2400_VPINFO_OFF 0x80 /* offset from start of ICB */ #define ICB2400_VPINFO_PORT_OFF(chan) \ (ICB2400_VPINFO_OFF + \ sizeof (isp_icb_2400_vpinfo_t) + (chan * ICB2400_VPOPT_WRITE_SIZE)) #define ICB2400_VPGOPT_FCA 0x01 /* Assume Clean Address bit in FLOGI ACC set (works only in static configurations) */ #define ICB2400_VPGOPT_MID_DISABLE 0x02 /* when set, connection mode2 will work with NPIV-capable switched */ #define ICB2400_VPGOPT_VP0_DECOUPLE 0x04 /* Allow VP0 decoupling if firmware supports it */ #define ICB2400_VPGOPT_SUSP_FDISK 0x10 /* Suspend FDISC for Enabled VPs */ #define ICB2400_VPGOPT_GEN_RIDA 0x20 /* Generate RIDA if FLOGI Fails */ typedef struct { isphdr_t vp_ctrl_hdr; uint32_t vp_ctrl_handle; uint16_t vp_ctrl_index_fail; uint16_t vp_ctrl_status; uint16_t vp_ctrl_command; uint16_t vp_ctrl_vp_count; uint16_t vp_ctrl_idmap[16]; uint16_t vp_ctrl_reserved[7]; uint16_t vp_ctrl_fcf_index; } vp_ctrl_info_t; #define VP_CTRL_CMD_ENABLE_VP 0x00 #define VP_CTRL_CMD_DISABLE_VP 0x08 #define VP_CTRL_CMD_DISABLE_VP_REINIT_LINK 0x09 #define VP_CTRL_CMD_DISABLE_VP_LOGO 0x0A #define VP_CTRL_CMD_DISABLE_VP_LOGO_ALL 0x0B /* * We can use this structure for modifying either one or two VP ports after initialization */ typedef struct { isphdr_t vp_mod_hdr; uint32_t vp_mod_hdl; uint16_t vp_mod_reserved0; uint16_t vp_mod_status; uint8_t vp_mod_cmd; uint8_t vp_mod_cnt; uint8_t vp_mod_idx0; uint8_t vp_mod_idx1; struct { uint8_t options; uint8_t loopid; uint16_t reserved1; uint8_t wwpn[8]; uint8_t wwnn[8]; } vp_mod_ports[2]; uint8_t vp_mod_reserved2[8]; } vp_modify_t; #define VP_STS_OK 0x00 #define VP_STS_ERR 0x01 #define VP_CNT_ERR 0x02 #define VP_GEN_ERR 0x03 #define VP_IDX_ERR 0x04 #define VP_STS_BSY 0x05 #define VP_MODIFY 0x00 #define VP_MODIFY_ENA 0x01 #define VP_MODIFY_OPT 0x02 #define VP_RESUME 0x03 /* * Port Data Base Element */ typedef struct { uint16_t pdb_options; uint8_t pdb_mstate; uint8_t pdb_sstate; uint8_t pdb_hardaddr_bits[4]; uint8_t pdb_portid_bits[4]; uint8_t pdb_nodename[8]; uint8_t pdb_portname[8]; uint16_t pdb_execthrottle; uint16_t pdb_exec_count; uint8_t pdb_retry_count; uint8_t pdb_retry_delay; uint16_t pdb_resalloc; uint16_t pdb_curalloc; uint16_t pdb_qhead; uint16_t pdb_qtail; uint16_t pdb_tl_next; uint16_t pdb_tl_last; uint16_t pdb_features; /* PLOGI, Common Service */ uint16_t pdb_pconcurrnt; /* PLOGI, Common Service */ uint16_t pdb_roi; /* PLOGI, Common Service */ uint8_t pdb_target; uint8_t pdb_initiator; /* PLOGI, Class 3 Control Flags */ uint16_t pdb_rdsiz; /* PLOGI, Class 3 */ uint16_t pdb_ncseq; /* PLOGI, Class 3 */ uint16_t pdb_noseq; /* PLOGI, Class 3 */ uint16_t pdb_labrtflg; uint16_t pdb_lstopflg; uint16_t pdb_sqhead; uint16_t pdb_sqtail; uint16_t pdb_ptimer; uint16_t pdb_nxt_seqid; uint16_t pdb_fcount; uint16_t pdb_prli_len; uint16_t pdb_prli_svc0; uint16_t pdb_prli_svc3; uint16_t pdb_loopid; uint16_t pdb_il_ptr; uint16_t pdb_sl_ptr; } isp_pdb_21xx_t; #define PDB_OPTIONS_XMITTING (1<<11) #define PDB_OPTIONS_LNKXMIT (1<<10) #define PDB_OPTIONS_ABORTED (1<<9) #define PDB_OPTIONS_ADISC (1<<1) #define PDB_STATE_DISCOVERY 0 #define PDB_STATE_WDISC_ACK 1 #define PDB_STATE_PLOGI 2 #define PDB_STATE_PLOGI_ACK 3 #define PDB_STATE_PRLI 4 #define PDB_STATE_PRLI_ACK 5 #define PDB_STATE_LOGGED_IN 6 #define PDB_STATE_PORT_UNAVAIL 7 #define PDB_STATE_PRLO 8 #define PDB_STATE_PRLO_ACK 9 #define PDB_STATE_PLOGO 10 #define PDB_STATE_PLOG_ACK 11 #define SVC3_ROLE_MASK 0x30 #define SVC3_ROLE_SHIFT 4 #define BITS2WORD(x) ((x)[0] << 16 | (x)[3] << 8 | (x)[2]) #define BITS2WORD_24XX(x) ((x)[0] << 16 | (x)[1] << 8 | (x)[2]) /* * Port Data Base Element- 24XX cards */ typedef struct { uint16_t pdb_flags; uint8_t pdb_curstate; uint8_t pdb_laststate; uint8_t pdb_hardaddr_bits[4]; uint8_t pdb_portid_bits[4]; #define pdb_nxt_seqid_2400 pdb_portid_bits[3] uint16_t pdb_retry_timer; uint16_t pdb_handle; uint16_t pdb_rcv_dsize; uint16_t pdb_reserved0; uint16_t pdb_prli_svc0; uint16_t pdb_prli_svc3; uint8_t pdb_portname[8]; uint8_t pdb_nodename[8]; uint8_t pdb_reserved1[24]; } isp_pdb_24xx_t; #define PDB2400_TID_SUPPORTED 0x4000 #define PDB2400_FC_TAPE 0x0080 #define PDB2400_CLASS2_ACK0 0x0040 #define PDB2400_FCP_CONF 0x0020 #define PDB2400_CLASS2 0x0010 #define PDB2400_ADDR_VALID 0x0002 #define PDB2400_STATE_PLOGI_PEND 0x03 #define PDB2400_STATE_PLOGI_DONE 0x04 #define PDB2400_STATE_PRLI_PEND 0x05 #define PDB2400_STATE_LOGGED_IN 0x06 #define PDB2400_STATE_PORT_UNAVAIL 0x07 #define PDB2400_STATE_PRLO_PEND 0x09 #define PDB2400_STATE_LOGO_PEND 0x0B /* * Common elements from the above two structures that are actually useful to us. */ typedef struct { uint16_t handle; uint16_t prli_word3; uint32_t : 8, portid : 24; uint8_t portname[8]; uint8_t nodename[8]; } isp_pdb_t; /* * Port/Node Name List Element */ typedef struct { uint8_t pnnle_name[8]; uint16_t pnnle_handle; uint16_t pnnle_reserved; } isp_pnnle_t; #define PNNL_OPTIONS_NODE_NAMES (1<<0) #define PNNL_OPTIONS_PORT_DATA (1<<2) #define PNNL_OPTIONS_INITIATORS (1<<3) /* * Port and N-Port Handle List Element */ typedef struct { uint16_t pnhle_port_id_lo; uint16_t pnhle_port_id_hi_handle; } isp_pnhle_21xx_t; typedef struct { uint16_t pnhle_port_id_lo; uint16_t pnhle_port_id_hi; uint16_t pnhle_handle; } isp_pnhle_23xx_t; typedef struct { uint16_t pnhle_port_id_lo; uint16_t pnhle_port_id_hi; uint16_t pnhle_handle; uint16_t pnhle_reserved; } isp_pnhle_24xx_t; /* * Port Database Changed Async Event information for 24XX cards */ #define PDB24XX_AE_OK 0x00 #define PDB24XX_AE_IMPL_LOGO_1 0x01 #define PDB24XX_AE_IMPL_LOGO_2 0x02 #define PDB24XX_AE_IMPL_LOGO_3 0x03 #define PDB24XX_AE_PLOGI_RCVD 0x04 #define PDB24XX_AE_PLOGI_RJT 0x05 #define PDB24XX_AE_PRLI_RCVD 0x06 #define PDB24XX_AE_PRLI_RJT 0x07 #define PDB24XX_AE_TPRLO 0x08 #define PDB24XX_AE_TPRLO_RJT 0x09 #define PDB24XX_AE_PRLO_RCVD 0x0a #define PDB24XX_AE_LOGO_RCVD 0x0b #define PDB24XX_AE_TOPO_CHG 0x0c #define PDB24XX_AE_NPORT_CHG 0x0d #define PDB24XX_AE_FLOGI_RJT 0x0e #define PDB24XX_AE_BAD_FANN 0x0f #define PDB24XX_AE_FLOGI_TIMO 0x10 #define PDB24XX_AE_ABX_LOGO 0x11 #define PDB24XX_AE_PLOGI_DONE 0x12 #define PDB24XX_AE_PRLI_DONJE 0x13 #define PDB24XX_AE_OPN_1 0x14 #define PDB24XX_AE_OPN_2 0x15 #define PDB24XX_AE_TXERR 0x16 #define PDB24XX_AE_FORCED_LOGO 0x17 #define PDB24XX_AE_DISC_TIMO 0x18 /* * Genericized Port Login/Logout software structure */ typedef struct { uint16_t handle; uint16_t channel; uint32_t flags : 8, portid : 24; } isp_plcmd_t; /* the flags to use are those for PLOGX_FLG_* below */ /* * ISP24XX- Login/Logout Port IOCB */ typedef struct { isphdr_t plogx_header; uint32_t plogx_handle; uint16_t plogx_status; uint16_t plogx_nphdl; uint16_t plogx_flags; uint16_t plogx_vphdl; /* low 8 bits */ uint16_t plogx_portlo; /* low 16 bits */ uint16_t plogx_rspsz_porthi; struct { uint16_t lo16; uint16_t hi16; } plogx_ioparm[11]; } isp_plogx_t; #define PLOGX_STATUS_OK 0x00 #define PLOGX_STATUS_UNAVAIL 0x28 #define PLOGX_STATUS_LOGOUT 0x29 #define PLOGX_STATUS_IOCBERR 0x31 #define PLOGX_IOCBERR_NOLINK 0x01 #define PLOGX_IOCBERR_NOIOCB 0x02 #define PLOGX_IOCBERR_NOXGHG 0x03 #define PLOGX_IOCBERR_FAILED 0x04 /* further info in IOPARM 1 */ #define PLOGX_IOCBERR_NOFABRIC 0x05 #define PLOGX_IOCBERR_NOTREADY 0x07 #define PLOGX_IOCBERR_NOLOGIN 0x09 /* further info in IOPARM 1 */ #define PLOGX_IOCBERR_NOPCB 0x0a #define PLOGX_IOCBERR_REJECT 0x18 /* further info in IOPARM 1 */ #define PLOGX_IOCBERR_EINVAL 0x19 /* further info in IOPARM 1 */ #define PLOGX_IOCBERR_PORTUSED 0x1a /* further info in IOPARM 1 */ #define PLOGX_IOCBERR_HNDLUSED 0x1b /* further info in IOPARM 1 */ #define PLOGX_IOCBERR_NOHANDLE 0x1c #define PLOGX_IOCBERR_NOFLOGI 0x1f /* further info in IOPARM 1 */ #define PLOGX_FLG_CMD_MASK 0xf #define PLOGX_FLG_CMD_PLOGI 0 #define PLOGX_FLG_CMD_PRLI 1 #define PLOGX_FLG_CMD_PDISC 2 #define PLOGX_FLG_CMD_LOGO 8 #define PLOGX_FLG_CMD_PRLO 9 #define PLOGX_FLG_CMD_TPRLO 10 #define PLOGX_FLG_COND_PLOGI 0x10 /* if with PLOGI */ #define PLOGX_FLG_IMPLICIT 0x10 /* if with LOGO, PRLO, TPRLO */ #define PLOGX_FLG_SKIP_PRLI 0x20 /* if with PLOGI */ #define PLOGX_FLG_IMPLICIT_LOGO_ALL 0x20 /* if with LOGO */ #define PLOGX_FLG_EXPLICIT_LOGO 0x40 /* if with LOGO */ #define PLOGX_FLG_COMMON_FEATURES 0x80 /* if with PLOGI */ #define PLOGX_FLG_FREE_NPHDL 0x80 /* if with with LOGO */ #define PLOGX_FLG_CLASS2 0x100 /* if with PLOGI */ #define PLOGX_FLG_FCP2_OVERRIDE 0x200 /* if with PRLOG, PRLI */ /* * Report ID Acquisistion (24XX multi-id firmware) */ typedef struct { isphdr_t ridacq_hdr; uint32_t ridacq_handle; - union { - struct { - uint8_t ridacq_vp_acquired; - uint8_t ridacq_vp_setup; - uint16_t ridacq_reserved0; - } type0; /* type 0 */ - struct { - uint16_t ridacq_vp_count; - uint8_t ridacq_vp_index; - uint8_t ridacq_vp_status; - } type1; /* type 1 */ - } un; + uint8_t ridacq_vp_acquired; + uint8_t ridacq_vp_setup; + uint8_t ridacq_vp_index; + uint8_t ridacq_vp_status; uint16_t ridacq_vp_port_lo; uint8_t ridacq_vp_port_hi; uint8_t ridacq_format; /* 0 or 1 */ uint16_t ridacq_map[8]; uint8_t ridacq_reserved1[32]; } isp_ridacq_t; #define RIDACQ_STS_COMPLETE 0 #define RIDACQ_STS_UNACQUIRED 1 -#define RIDACQ_STS_CHANGED 20 - +#define RIDACQ_STS_CHANGED 2 +#define RIDACQ_STS_SNS_TIMEOUT 3 +#define RIDACQ_STS_SNS_REJECTED 4 +#define RIDACQ_STS_SCR_TIMEOUT 5 +#define RIDACQ_STS_SCR_REJECTED 6 /* * Simple Name Server Data Structures */ #define SNS_GA_NXT 0x100 #define SNS_GPN_ID 0x112 #define SNS_GNN_ID 0x113 #define SNS_GFF_ID 0x11F #define SNS_GID_FT 0x171 #define SNS_RFT_ID 0x217 typedef struct { uint16_t snscb_rblen; /* response buffer length (words) */ uint16_t snscb_reserved0; uint16_t snscb_addr[4]; /* response buffer address */ uint16_t snscb_sblen; /* subcommand buffer length (words) */ uint16_t snscb_reserved1; uint16_t snscb_data[]; /* variable data */ } sns_screq_t; /* Subcommand Request Structure */ typedef struct { uint16_t snscb_rblen; /* response buffer length (words) */ uint16_t snscb_reserved0; uint16_t snscb_addr[4]; /* response buffer address */ uint16_t snscb_sblen; /* subcommand buffer length (words) */ uint16_t snscb_reserved1; uint16_t snscb_cmd; uint16_t snscb_reserved2; uint32_t snscb_reserved3; uint32_t snscb_port; } sns_ga_nxt_req_t; #define SNS_GA_NXT_REQ_SIZE (sizeof (sns_ga_nxt_req_t)) typedef struct { uint16_t snscb_rblen; /* response buffer length (words) */ uint16_t snscb_reserved0; uint16_t snscb_addr[4]; /* response buffer address */ uint16_t snscb_sblen; /* subcommand buffer length (words) */ uint16_t snscb_reserved1; uint16_t snscb_cmd; uint16_t snscb_reserved2; uint32_t snscb_reserved3; uint32_t snscb_portid; } sns_gxn_id_req_t; #define SNS_GXN_ID_REQ_SIZE (sizeof (sns_gxn_id_req_t)) typedef struct { uint16_t snscb_rblen; /* response buffer length (words) */ uint16_t snscb_reserved0; uint16_t snscb_addr[4]; /* response buffer address */ uint16_t snscb_sblen; /* subcommand buffer length (words) */ uint16_t snscb_reserved1; uint16_t snscb_cmd; uint16_t snscb_mword_div_2; uint32_t snscb_reserved3; uint32_t snscb_fc4_type; } sns_gid_ft_req_t; #define SNS_GID_FT_REQ_SIZE (sizeof (sns_gid_ft_req_t)) typedef struct { uint16_t snscb_rblen; /* response buffer length (words) */ uint16_t snscb_reserved0; uint16_t snscb_addr[4]; /* response buffer address */ uint16_t snscb_sblen; /* subcommand buffer length (words) */ uint16_t snscb_reserved1; uint16_t snscb_cmd; uint16_t snscb_reserved2; uint32_t snscb_reserved3; uint32_t snscb_port; uint32_t snscb_fc4_types[8]; } sns_rft_id_req_t; #define SNS_RFT_ID_REQ_SIZE (sizeof (sns_rft_id_req_t)) typedef struct { ct_hdr_t snscb_cthdr; uint8_t snscb_port_type; uint8_t snscb_port_id[3]; uint8_t snscb_portname[8]; uint16_t snscb_data[]; /* variable data */ } sns_scrsp_t; /* Subcommand Response Structure */ typedef struct { ct_hdr_t snscb_cthdr; uint8_t snscb_port_type; uint8_t snscb_port_id[3]; uint8_t snscb_portname[8]; uint8_t snscb_pnlen; /* symbolic port name length */ uint8_t snscb_pname[255]; /* symbolic port name */ uint8_t snscb_nodename[8]; uint8_t snscb_nnlen; /* symbolic node name length */ uint8_t snscb_nname[255]; /* symbolic node name */ uint8_t snscb_ipassoc[8]; uint8_t snscb_ipaddr[16]; uint8_t snscb_svc_class[4]; uint8_t snscb_fc4_types[32]; uint8_t snscb_fpname[8]; uint8_t snscb_reserved; uint8_t snscb_hardaddr[3]; } sns_ga_nxt_rsp_t; /* Subcommand Response Structure */ #define SNS_GA_NXT_RESP_SIZE (sizeof (sns_ga_nxt_rsp_t)) typedef struct { ct_hdr_t snscb_cthdr; uint8_t snscb_wwn[8]; } sns_gxn_id_rsp_t; #define SNS_GXN_ID_RESP_SIZE (sizeof (sns_gxn_id_rsp_t)) typedef struct { ct_hdr_t snscb_cthdr; uint32_t snscb_fc4_features[32]; } sns_gff_id_rsp_t; #define SNS_GFF_ID_RESP_SIZE (sizeof (sns_gff_id_rsp_t)) typedef struct { ct_hdr_t snscb_cthdr; struct { uint8_t control; uint8_t portid[3]; } snscb_ports[1]; } sns_gid_ft_rsp_t; #define SNS_GID_FT_RESP_SIZE(x) ((sizeof (sns_gid_ft_rsp_t)) + ((x - 1) << 2)) #define SNS_RFT_ID_RESP_SIZE (sizeof (ct_hdr_t)) /* * Other Misc Structures */ /* ELS Pass Through */ typedef struct { isphdr_t els_hdr; uint32_t els_handle; uint16_t els_status; uint16_t els_nphdl; uint16_t els_xmit_dsd_count; /* outgoing only */ uint8_t els_vphdl; uint8_t els_sof; uint32_t els_rxid; uint16_t els_recv_dsd_count; /* outgoing only */ uint8_t els_opcode; uint8_t els_reserved1; uint8_t els_did_lo; uint8_t els_did_mid; uint8_t els_did_hi; uint8_t els_reserved2; uint16_t els_reserved3; uint16_t els_ctl_flags; union { struct { uint32_t _els_bytecnt; uint32_t _els_subcode1; uint32_t _els_subcode2; uint8_t _els_reserved4[20]; } in; struct { uint32_t _els_recv_bytecnt; uint32_t _els_xmit_bytecnt; uint32_t _els_xmit_dsd_length; uint16_t _els_xmit_dsd_a1500; uint16_t _els_xmit_dsd_a3116; uint16_t _els_xmit_dsd_a4732; uint16_t _els_xmit_dsd_a6348; uint32_t _els_recv_dsd_length; uint16_t _els_recv_dsd_a1500; uint16_t _els_recv_dsd_a3116; uint16_t _els_recv_dsd_a4732; uint16_t _els_recv_dsd_a6348; } out; } inout; #define els_bytecnt inout.in._els_bytecnt #define els_subcode1 inout.in._els_subcode1 #define els_subcode2 inout.in._els_subcode2 #define els_reserved4 inout.in._els_reserved4 #define els_recv_bytecnt inout.out._els_recv_bytecnt #define els_xmit_bytecnt inout.out._els_xmit_bytecnt #define els_xmit_dsd_length inout.out._els_xmit_dsd_length #define els_xmit_dsd_a1500 inout.out._els_xmit_dsd_a1500 #define els_xmit_dsd_a3116 inout.out._els_xmit_dsd_a3116 #define els_xmit_dsd_a4732 inout.out._els_xmit_dsd_a4732 #define els_xmit_dsd_a6348 inout.out._els_xmit_dsd_a6348 #define els_recv_dsd_length inout.out._els_recv_dsd_length #define els_recv_dsd_a1500 inout.out._els_recv_dsd_a1500 #define els_recv_dsd_a3116 inout.out._els_recv_dsd_a3116 #define els_recv_dsd_a4732 inout.out._els_recv_dsd_a4732 #define els_recv_dsd_a6348 inout.out._els_recv_dsd_a6348 } els_t; /* * A handy package structure for running FC-SCSI commands internally */ typedef struct { uint16_t handle; uint16_t lun; uint32_t channel : 8, portid : 24; uint32_t timeout; union { struct { uint32_t data_length; uint32_t no_wait : 1, do_read : 1; uint8_t cdb[16]; void *data_ptr; } beg; struct { uint32_t data_residual; uint8_t status; uint8_t pad; uint16_t sense_length; uint8_t sense_data[32]; } end; } fcd; } isp_xcmd_t; /* * Target Mode related definitions */ #define QLTM_SENSELEN 18 /* non-FC cards only */ #define QLTM_SVALID 0x80 /* * Structure for Enable Lun and Modify Lun queue entries */ typedef struct { isphdr_t le_header; uint32_t le_reserved; uint8_t le_lun; uint8_t le_rsvd; uint8_t le_ops; /* Modify LUN only */ uint8_t le_tgt; /* Not for FC */ uint32_t le_flags; /* Not for FC */ uint8_t le_status; uint8_t le_reserved2; uint8_t le_cmd_count; uint8_t le_in_count; uint8_t le_cdb6len; /* Not for FC */ uint8_t le_cdb7len; /* Not for FC */ uint16_t le_timeout; uint16_t le_reserved3[20]; } lun_entry_t; /* * le_flags values */ #define LUN_TQAE 0x00000002 /* bit1 Tagged Queue Action Enable */ #define LUN_DSSM 0x01000000 /* bit24 Disable Sending SDP Message */ #define LUN_DISAD 0x02000000 /* bit25 Disable autodisconnect */ #define LUN_DM 0x40000000 /* bit30 Disconnects Mandatory */ /* * le_ops values */ #define LUN_CCINCR 0x01 /* increment command count */ #define LUN_CCDECR 0x02 /* decrement command count */ #define LUN_ININCR 0x40 /* increment immed. notify count */ #define LUN_INDECR 0x80 /* decrement immed. notify count */ /* * le_status values */ #define LUN_OK 0x01 /* we be rockin' */ #define LUN_ERR 0x04 /* request completed with error */ #define LUN_INVAL 0x06 /* invalid request */ #define LUN_NOCAP 0x16 /* can't provide requested capability */ #define LUN_ENABLED 0x3E /* LUN already enabled */ /* * Immediate Notify Entry structure */ #define IN_MSGLEN 8 /* 8 bytes */ #define IN_RSVDLEN 8 /* 8 words */ typedef struct { isphdr_t in_header; uint32_t in_reserved; uint8_t in_lun; /* lun */ uint8_t in_iid; /* initiator */ uint8_t in_reserved2; uint8_t in_tgt; /* target */ uint32_t in_flags; uint8_t in_status; uint8_t in_rsvd2; uint8_t in_tag_val; /* tag value */ uint8_t in_tag_type; /* tag type */ uint16_t in_seqid; /* sequence id */ uint8_t in_msg[IN_MSGLEN]; /* SCSI message bytes */ uint16_t in_reserved3[IN_RSVDLEN]; uint8_t in_sense[QLTM_SENSELEN];/* suggested sense data */ } in_entry_t; typedef struct { isphdr_t in_header; uint32_t in_reserved; uint8_t in_lun; /* lun */ uint8_t in_iid; /* initiator */ uint16_t in_scclun; uint32_t in_reserved2; uint16_t in_status; uint16_t in_task_flags; uint16_t in_seqid; /* sequence id */ } in_fcentry_t; typedef struct { isphdr_t in_header; uint32_t in_reserved; uint16_t in_iid; /* initiator */ uint16_t in_scclun; uint32_t in_reserved2; uint16_t in_status; uint16_t in_task_flags; uint16_t in_seqid; /* sequence id */ } in_fcentry_e_t; /* * Values for the in_status field */ #define IN_REJECT 0x0D /* Message Reject message received */ #define IN_RESET 0x0E /* Bus Reset occurred */ #define IN_NO_RCAP 0x16 /* requested capability not available */ #define IN_IDE_RECEIVED 0x33 /* Initiator Detected Error msg received */ #define IN_RSRC_UNAVAIL 0x34 /* resource unavailable */ #define IN_MSG_RECEIVED 0x36 /* SCSI message received */ #define IN_ABORT_TASK 0x20 /* task named in RX_ID is being aborted (FC) */ #define IN_PORT_LOGOUT 0x29 /* port has logged out (FC) */ #define IN_PORT_CHANGED 0x2A /* port changed */ #define IN_GLOBAL_LOGO 0x2E /* all ports logged out */ #define IN_NO_NEXUS 0x3B /* Nexus not established */ #define IN_SRR_RCVD 0x45 /* SRR received */ /* * Values for the in_task_flags field- should only get one at a time! */ #define TASK_FLAGS_RESERVED_MASK (0xe700) #define TASK_FLAGS_CLEAR_ACA (1<<14) #define TASK_FLAGS_TARGET_RESET (1<<13) #define TASK_FLAGS_LUN_RESET (1<<12) #define TASK_FLAGS_CLEAR_TASK_SET (1<<10) #define TASK_FLAGS_ABORT_TASK_SET (1<<9) /* * ISP24XX Immediate Notify */ typedef struct { isphdr_t in_header; uint32_t in_reserved; uint16_t in_nphdl; uint16_t in_reserved1; uint16_t in_flags; uint16_t in_srr_rxid; uint16_t in_status; uint8_t in_status_subcode; uint8_t in_fwhandle; uint32_t in_rxid; uint16_t in_srr_reloff_lo; uint16_t in_srr_reloff_hi; uint16_t in_srr_iu; uint16_t in_srr_oxid; /* * If bit 2 is set in in_flags, the N-Port and * handle tags are valid. If the received ELS is * a LOGO, then these tags contain the N Port ID * from the LOGO payload. If the received ELS * request is TPRLO, these tags contain the * Third Party Originator N Port ID. */ uint16_t in_nport_id_hi; #define in_prli_options in_nport_id_hi uint8_t in_nport_id_lo; uint8_t in_reserved3; uint16_t in_np_handle; uint8_t in_reserved4[12]; uint8_t in_reserved5; uint8_t in_vpidx; uint32_t in_reserved6; uint16_t in_portid_lo; uint8_t in_portid_hi; uint8_t in_reserved7; uint16_t in_reserved8; uint16_t in_oxid; } in_fcentry_24xx_t; #define IN24XX_FLAG_PUREX_IOCB 0x1 #define IN24XX_FLAG_GLOBAL_LOGOUT 0x2 #define IN24XX_FLAG_NPHDL_VALID 0x4 #define IN24XX_FLAG_N2N_PRLI 0x8 #define IN24XX_FLAG_PN_NN_VALID 0x10 #define IN24XX_LIP_RESET 0x0E #define IN24XX_LINK_RESET 0x0F #define IN24XX_PORT_LOGOUT 0x29 #define IN24XX_PORT_CHANGED 0x2A #define IN24XX_LINK_FAILED 0x2E #define IN24XX_SRR_RCVD 0x45 #define IN24XX_ELS_RCVD 0x46 /* * login-affectin ELS received- check * subcode for specific opcode */ /* * For f/w > 4.0.25, these offsets in the Immediate Notify contain * the WWNN/WWPN if the ELS is PLOGI, PDISC or ADISC. The WWN is in * Big Endian format. */ #define IN24XX_PRLI_WWNN_OFF 0x18 #define IN24XX_PRLI_WWPN_OFF 0x28 #define IN24XX_PLOGI_WWNN_OFF 0x20 #define IN24XX_PLOGI_WWPN_OFF 0x28 /* * For f/w > 4.0.25, this offset in the Immediate Notify contain * the WWPN if the ELS is LOGO. The WWN is in Big Endian format. */ #define IN24XX_LOGO_WWPN_OFF 0x28 /* * Immediate Notify Status Subcodes for IN24XX_PORT_LOGOUT */ #define IN24XX_PORT_LOGOUT_PDISC_TMO 0x00 #define IN24XX_PORT_LOGOUT_UXPR_DISC 0x01 #define IN24XX_PORT_LOGOUT_OWN_OPN 0x02 #define IN24XX_PORT_LOGOUT_OWN_OPN_SFT 0x03 #define IN24XX_PORT_LOGOUT_ABTS_TMO 0x04 #define IN24XX_PORT_LOGOUT_DISC_RJT 0x05 #define IN24XX_PORT_LOGOUT_LOGIN_NEEDED 0x06 #define IN24XX_PORT_LOGOUT_BAD_DISC 0x07 #define IN24XX_PORT_LOGOUT_LOST_ALPA 0x08 #define IN24XX_PORT_LOGOUT_XMIT_FAILURE 0x09 /* * Immediate Notify Status Subcodes for IN24XX_PORT_CHANGED */ #define IN24XX_PORT_CHANGED_BADFAN 0x00 #define IN24XX_PORT_CHANGED_TOPO_CHANGE 0x01 #define IN24XX_PORT_CHANGED_FLOGI_ACC 0x02 #define IN24XX_PORT_CHANGED_FLOGI_RJT 0x03 #define IN24XX_PORT_CHANGED_TIMEOUT 0x04 #define IN24XX_PORT_CHANGED_PORT_CHANGE 0x05 /* * Notify Acknowledge Entry structure */ #define NA_RSVDLEN 22 typedef struct { isphdr_t na_header; uint32_t na_reserved; uint8_t na_lun; /* lun */ uint8_t na_iid; /* initiator */ uint8_t na_reserved2; uint8_t na_tgt; /* target */ uint32_t na_flags; uint8_t na_status; uint8_t na_event; uint16_t na_seqid; /* sequence id */ uint16_t na_reserved3[NA_RSVDLEN]; } na_entry_t; /* * Value for the na_event field */ #define NA_RST_CLRD 0x80 /* Clear an async event notification */ #define NA_OK 0x01 /* Notify Acknowledge Succeeded */ #define NA_INVALID 0x06 /* Invalid Notify Acknowledge */ #define NA2_RSVDLEN 21 typedef struct { isphdr_t na_header; uint32_t na_reserved; uint8_t na_reserved1; uint8_t na_iid; /* initiator loop id */ uint16_t na_response; uint16_t na_flags; uint16_t na_reserved2; uint16_t na_status; uint16_t na_task_flags; uint16_t na_seqid; /* sequence id */ uint16_t na_reserved3[NA2_RSVDLEN]; } na_fcentry_t; typedef struct { isphdr_t na_header; uint32_t na_reserved; uint16_t na_iid; /* initiator loop id */ uint16_t na_response; /* response code */ uint16_t na_flags; uint16_t na_reserved2; uint16_t na_status; uint16_t na_task_flags; uint16_t na_seqid; /* sequence id */ uint16_t na_reserved3[NA2_RSVDLEN]; } na_fcentry_e_t; #define NAFC_RCOUNT 0x80 /* increment resource count */ #define NAFC_RST_CLRD 0x20 /* Clear LIP Reset */ #define NAFC_TVALID 0x10 /* task mangement response code is valid */ /* * ISP24XX Notify Acknowledge */ typedef struct { isphdr_t na_header; uint32_t na_handle; uint16_t na_nphdl; uint16_t na_reserved1; uint16_t na_flags; uint16_t na_srr_rxid; uint16_t na_status; uint8_t na_status_subcode; uint8_t na_fwhandle; uint32_t na_rxid; uint16_t na_srr_reloff_lo; uint16_t na_srr_reloff_hi; uint16_t na_srr_iu; uint16_t na_srr_flags; uint8_t na_reserved3[18]; uint8_t na_reserved4; uint8_t na_vpidx; uint8_t na_srr_reject_vunique; uint8_t na_srr_reject_explanation; uint8_t na_srr_reject_code; uint8_t na_reserved5; uint8_t na_reserved6[6]; uint16_t na_oxid; } na_fcentry_24xx_t; /* * Accept Target I/O Entry structure */ #define ATIO_CDBLEN 26 typedef struct { isphdr_t at_header; uint16_t at_reserved; uint16_t at_handle; uint8_t at_lun; /* lun */ uint8_t at_iid; /* initiator */ uint8_t at_cdblen; /* cdb length */ uint8_t at_tgt; /* target */ uint32_t at_flags; uint8_t at_status; /* firmware status */ uint8_t at_scsi_status; /* scsi status */ uint8_t at_tag_val; /* tag value */ uint8_t at_tag_type; /* tag type */ uint8_t at_cdb[ATIO_CDBLEN]; /* received CDB */ uint8_t at_sense[QLTM_SENSELEN];/* suggested sense data */ } at_entry_t; /* * at_flags values */ #define AT_NODISC 0x00008000 /* disconnect disabled */ #define AT_TQAE 0x00000002 /* Tagged Queue Action enabled */ /* * at_status values */ #define AT_PATH_INVALID 0x07 /* ATIO sent to firmware for disabled lun */ #define AT_RESET 0x0E /* SCSI Bus Reset Occurred */ #define AT_PHASE_ERROR 0x14 /* Bus phase sequence error */ #define AT_NOCAP 0x16 /* Requested capability not available */ #define AT_BDR_MSG 0x17 /* Bus Device Reset msg received */ #define AT_CDB 0x3D /* CDB received */ /* * Macros to create and fetch and test concatenated handle and tag value macros * (SPI only) */ #define AT_MAKE_TAGID(tid, aep) \ tid = aep->at_handle; \ if (aep->at_flags & AT_TQAE) { \ tid |= (aep->at_tag_val << 16); \ tid |= (1 << 24); \ } #define CT_MAKE_TAGID(tid, ct) \ tid = ct->ct_fwhandle; \ if (ct->ct_flags & CT_TQAE) { \ tid |= (ct->ct_tag_val << 16); \ tid |= (1 << 24); \ } #define AT_HAS_TAG(val) ((val) & (1 << 24)) #define AT_GET_TAG(val) (((val) >> 16) & 0xff) #define AT_GET_HANDLE(val) ((val) & 0xffff) #define IN_MAKE_TAGID(tid, inp) \ tid = inp->in_seqid; \ tid |= (inp->in_tag_val << 16); \ tid |= (1 << 24) /* * Accept Target I/O Entry structure, Type 2 */ #define ATIO2_CDBLEN 16 typedef struct { isphdr_t at_header; uint32_t at_reserved; uint8_t at_lun; /* lun or reserved */ uint8_t at_iid; /* initiator */ uint16_t at_rxid; /* response ID */ uint16_t at_flags; uint16_t at_status; /* firmware status */ uint8_t at_crn; /* command reference number */ uint8_t at_taskcodes; uint8_t at_taskflags; uint8_t at_execodes; uint8_t at_cdb[ATIO2_CDBLEN]; /* received CDB */ uint32_t at_datalen; /* allocated data len */ uint16_t at_scclun; /* SCC Lun or reserved */ uint16_t at_wwpn[4]; /* WWPN of initiator */ uint16_t at_reserved2[6]; uint16_t at_oxid; } at2_entry_t; typedef struct { isphdr_t at_header; uint32_t at_reserved; uint16_t at_iid; /* initiator */ uint16_t at_rxid; /* response ID */ uint16_t at_flags; uint16_t at_status; /* firmware status */ uint8_t at_crn; /* command reference number */ uint8_t at_taskcodes; uint8_t at_taskflags; uint8_t at_execodes; uint8_t at_cdb[ATIO2_CDBLEN]; /* received CDB */ uint32_t at_datalen; /* allocated data len */ uint16_t at_scclun; /* SCC Lun or reserved */ uint16_t at_wwpn[4]; /* WWPN of initiator */ uint16_t at_reserved2[6]; uint16_t at_oxid; } at2e_entry_t; #define ATIO2_WWPN_OFFSET 0x2A #define ATIO2_OXID_OFFSET 0x3E #define ATIO2_TC_ATTR_MASK 0x7 #define ATIO2_TC_ATTR_SIMPLEQ 0 #define ATIO2_TC_ATTR_HEADOFQ 1 #define ATIO2_TC_ATTR_ORDERED 2 #define ATIO2_TC_ATTR_ACAQ 4 #define ATIO2_TC_ATTR_UNTAGGED 5 #define ATIO2_EX_WRITE 0x1 #define ATIO2_EX_READ 0x2 /* * Macros to create and fetch and test concatenated handle and tag value macros */ #define AT2_MAKE_TAGID(tid, bus, inst, aep) \ tid = aep->at_rxid; \ tid |= (((uint64_t)inst) << 32); \ tid |= (((uint64_t)bus) << 48) #define CT2_MAKE_TAGID(tid, bus, inst, ct) \ tid = ct->ct_rxid; \ tid |= (((uint64_t)inst) << 32); \ tid |= (((uint64_t)(bus & 0xff)) << 48) #define AT2_HAS_TAG(val) 1 #define AT2_GET_TAG(val) ((val) & 0xffffffff) #define AT2_GET_INST(val) (((val) >> 32) & 0xffff) #define AT2_GET_HANDLE AT2_GET_TAG #define AT2_GET_BUS(val) (((val) >> 48) & 0xff) #define FC_HAS_TAG AT2_HAS_TAG #define FC_GET_TAG AT2_GET_TAG #define FC_GET_INST AT2_GET_INST #define FC_GET_HANDLE AT2_GET_HANDLE #define IN_FC_MAKE_TAGID(tid, bus, inst, seqid) \ tid = seqid; \ tid |= (((uint64_t)inst) << 32); \ tid |= (((uint64_t)(bus & 0xff)) << 48) #define FC_TAG_INSERT_INST(tid, inst) \ tid &= ~0x0000ffff00000000ull; \ tid |= (((uint64_t)inst) << 32) /* * 24XX ATIO Definition * * This is *quite* different from other entry types. * First of all, it has its own queue it comes in on. * * Secondly, it doesn't have a normal header. * * Thirdly, it's just a passthru of the FCP CMND IU * which is recorded in big endian mode. */ typedef struct { uint8_t at_type; uint8_t at_count; /* * Task attribute in high four bits, * the rest is the FCP CMND IU Length. * NB: the command can extend past the * length for a single queue entry. */ uint16_t at_ta_len; uint32_t at_rxid; fc_hdr_t at_hdr; fcp_cmnd_iu_t at_cmnd; } at7_entry_t; #define AT7_NORESRC_RXID 0xffffffff /* * Continue Target I/O Entry structure * Request from driver. The response from the * ISP firmware is the same except that the last 18 * bytes are overwritten by suggested sense data if * the 'autosense valid' bit is set in the status byte. */ typedef struct { isphdr_t ct_header; uint16_t ct_syshandle; uint16_t ct_fwhandle; /* required by f/w */ uint8_t ct_lun; /* lun */ uint8_t ct_iid; /* initiator id */ uint8_t ct_reserved2; uint8_t ct_tgt; /* our target id */ uint32_t ct_flags; uint8_t ct_status; /* isp status */ uint8_t ct_scsi_status; /* scsi status */ uint8_t ct_tag_val; /* tag value */ uint8_t ct_tag_type; /* tag type */ uint32_t ct_xfrlen; /* transfer length */ uint32_t ct_resid; /* residual length */ uint16_t ct_timeout; uint16_t ct_seg_count; ispds_t ct_dataseg[ISP_RQDSEG]; } ct_entry_t; /* * For some of the dual port SCSI adapters, port (bus #) is reported * in the MSbit of ct_iid. Bit fields are a bit too awkward here. * * Note that this does not apply to FC adapters at all which can and * do report IIDs between 0x81 && 0xfe (or 0x7ff) which represent devices * that have logged in across a SCSI fabric. */ #define GET_IID_VAL(x) (x & 0x3f) #define GET_BUS_VAL(x) ((x >> 7) & 0x1) #define SET_IID_VAL(y, x) y = ((y & ~0x3f) | (x & 0x3f)) #define SET_BUS_VAL(y, x) y = ((y & 0x3f) | ((x & 0x1) << 7)) /* * ct_flags values */ #define CT_TQAE 0x00000002 /* bit 1, Tagged Queue Action enable */ #define CT_DATA_IN 0x00000040 /* bits 6&7, Data direction - *to* initiator */ #define CT_DATA_OUT 0x00000080 /* bits 6&7, Data direction - *from* initiator */ #define CT_NO_DATA 0x000000C0 /* bits 6&7, Data direction */ #define CT_CCINCR 0x00000100 /* bit 8, autoincrement atio count */ #define CT_DATAMASK 0x000000C0 /* bits 6&7, Data direction */ #define CT_INISYNCWIDE 0x00004000 /* bit 14, Do Sync/Wide Negotiation */ #define CT_NODISC 0x00008000 /* bit 15, Disconnects disabled */ #define CT_DSDP 0x01000000 /* bit 24, Disable Save Data Pointers */ #define CT_SENDRDP 0x04000000 /* bit 26, Send Restore Pointers msg */ #define CT_SENDSTATUS 0x80000000 /* bit 31, Send SCSI status byte */ /* * ct_status values * - set by the firmware when it returns the CTIO */ #define CT_OK 0x01 /* completed without error */ #define CT_ABORTED 0x02 /* aborted by host */ #define CT_ERR 0x04 /* see sense data for error */ #define CT_INVAL 0x06 /* request for disabled lun */ #define CT_NOPATH 0x07 /* invalid ITL nexus */ #define CT_INVRXID 0x08 /* (FC only) Invalid RX_ID */ #define CT_DATA_OVER 0x09 /* (FC only) Data Overrun */ #define CT_RSELTMO 0x0A /* reselection timeout after 2 tries */ #define CT_TIMEOUT 0x0B /* timed out */ #define CT_RESET 0x0E /* SCSI Bus Reset occurred */ #define CT_PARITY 0x0F /* Uncorrectable Parity Error */ #define CT_BUS_ERROR 0x10 /* (FC Only) DMA PCI Error */ #define CT_PANIC 0x13 /* Unrecoverable Error */ #define CT_PHASE_ERROR 0x14 /* Bus phase sequence error */ #define CT_DATA_UNDER 0x15 /* (FC only) Data Underrun */ #define CT_BDR_MSG 0x17 /* Bus Device Reset msg received */ #define CT_TERMINATED 0x19 /* due to Terminate Transfer mbox cmd */ #define CT_PORTUNAVAIL 0x28 /* port not available */ #define CT_LOGOUT 0x29 /* port logout */ #define CT_PORTCHANGED 0x2A /* port changed */ #define CT_IDE 0x33 /* Initiator Detected Error */ #define CT_NOACK 0x35 /* Outstanding Immed. Notify. entry */ #define CT_SRR 0x45 /* SRR Received */ #define CT_LUN_RESET 0x48 /* Lun Reset Received */ #define CT_HBA_RESET 0xffff /* pseudo error - command destroyed by HBA reset*/ /* * When the firmware returns a CTIO entry, it may overwrite the last * part of the structure with sense data. This starts at offset 0x2E * into the entry, which is in the middle of ct_dataseg[1]. Rather * than define a new struct for this, I'm just using the sense data * offset. */ #define CTIO_SENSE_OFFSET 0x2E /* * Entry length in u_longs. All entries are the same size so * any one will do as the numerator. */ #define UINT32_ENTRY_SIZE (sizeof(at_entry_t)/sizeof(uint32_t)) /* * QLA2100 CTIO (type 2) entry */ #define MAXRESPLEN 26 typedef struct { isphdr_t ct_header; uint32_t ct_syshandle; uint8_t ct_lun; /* lun */ uint8_t ct_iid; /* initiator id */ uint16_t ct_rxid; /* response ID */ uint16_t ct_flags; uint16_t ct_status; /* isp status */ uint16_t ct_timeout; uint16_t ct_seg_count; uint32_t ct_reloff; /* relative offset */ uint32_t ct_resid; /* residual length */ union { /* * The three different modes that the target driver * can set the CTIO{2,3,4} up as. * * The first is for sending FCP_DATA_IUs as well as * (optionally) sending a terminal SCSI status FCP_RSP_IU. * * The second is for sending SCSI sense data in an FCP_RSP_IU. * Note that no FCP_DATA_IUs will be sent. * * The third is for sending FCP_RSP_IUs as built specifically * in system memory as located by the isp_dataseg. */ struct { uint32_t _reserved; uint16_t _reserved2; uint16_t ct_scsi_status; uint32_t ct_xfrlen; union { ispds_t ct_dataseg[ISP_RQDSEG_T2]; ispds64_t ct_dataseg64[ISP_RQDSEG_T3]; ispdslist_t ct_dslist; } u; } m0; struct { uint16_t _reserved; uint16_t _reserved2; uint16_t ct_senselen; uint16_t ct_scsi_status; uint16_t ct_resplen; uint8_t ct_resp[MAXRESPLEN]; } m1; struct { uint32_t _reserved; uint16_t _reserved2; uint16_t _reserved3; uint32_t ct_datalen; union { ispds_t ct_fcp_rsp_iudata_32; ispds64_t ct_fcp_rsp_iudata_64; } u; } m2; } rsp; } ct2_entry_t; typedef struct { isphdr_t ct_header; uint32_t ct_syshandle; uint16_t ct_iid; /* initiator id */ uint16_t ct_rxid; /* response ID */ uint16_t ct_flags; uint16_t ct_status; /* isp status */ uint16_t ct_timeout; uint16_t ct_seg_count; uint32_t ct_reloff; /* relative offset */ uint32_t ct_resid; /* residual length */ union { struct { uint32_t _reserved; uint16_t _reserved2; uint16_t ct_scsi_status; uint32_t ct_xfrlen; union { ispds_t ct_dataseg[ISP_RQDSEG_T2]; ispds64_t ct_dataseg64[ISP_RQDSEG_T3]; ispdslist_t ct_dslist; } u; } m0; struct { uint16_t _reserved; uint16_t _reserved2; uint16_t ct_senselen; uint16_t ct_scsi_status; uint16_t ct_resplen; uint8_t ct_resp[MAXRESPLEN]; } m1; struct { uint32_t _reserved; uint16_t _reserved2; uint16_t _reserved3; uint32_t ct_datalen; union { ispds_t ct_fcp_rsp_iudata_32; ispds64_t ct_fcp_rsp_iudata_64; } u; } m2; } rsp; } ct2e_entry_t; /* * ct_flags values for CTIO2 */ #define CT2_FLAG_MODE0 0x0000 #define CT2_FLAG_MODE1 0x0001 #define CT2_FLAG_MODE2 0x0002 #define CT2_FLAG_MMASK 0x0003 #define CT2_DATA_IN 0x0040 /* *to* initiator */ #define CT2_DATA_OUT 0x0080 /* *from* initiator */ #define CT2_NO_DATA 0x00C0 #define CT2_DATAMASK 0x00C0 #define CT2_CCINCR 0x0100 #define CT2_FASTPOST 0x0200 #define CT2_CONFIRM 0x2000 #define CT2_TERMINATE 0x4000 #define CT2_SENDSTATUS 0x8000 /* * ct_status values are (mostly) the same as that for ct_entry. */ /* * ct_scsi_status values- the low 8 bits are the normal SCSI status * we know and love. The upper 8 bits are validity markers for FCP_RSP_IU * fields. */ #define CT2_RSPLEN_VALID 0x0100 #define CT2_SNSLEN_VALID 0x0200 #define CT2_DATA_OVER 0x0400 #define CT2_DATA_UNDER 0x0800 /* * ISP24XX CTIO */ #define MAXRESPLEN_24XX 24 typedef struct { isphdr_t ct_header; uint32_t ct_syshandle; uint16_t ct_nphdl; /* status on returned CTIOs */ uint16_t ct_timeout; uint16_t ct_seg_count; uint8_t ct_vpidx; uint8_t ct_xflags; uint16_t ct_iid_lo; /* low 16 bits of portid */ uint8_t ct_iid_hi; /* hi 8 bits of portid */ uint8_t ct_reserved; uint32_t ct_rxid; uint16_t ct_senselen; /* mode 1 only */ uint16_t ct_flags; uint32_t ct_resid; /* residual length */ uint16_t ct_oxid; uint16_t ct_scsi_status; /* modes 0 && 1 only */ union { struct { uint32_t reloff; uint32_t reserved0; uint32_t ct_xfrlen; uint32_t reserved1; ispds64_t ds; } m0; struct { uint16_t ct_resplen; uint16_t reserved; uint8_t ct_resp[MAXRESPLEN_24XX]; } m1; struct { uint32_t reserved0; uint32_t reserved1; uint32_t ct_datalen; uint32_t reserved2; ispds64_t ct_fcp_rsp_iudata; } m2; } rsp; } ct7_entry_t; /* * ct_flags values for CTIO7 */ #define CT7_NO_DATA 0x0000 #define CT7_DATA_OUT 0x0001 /* *from* initiator */ #define CT7_DATA_IN 0x0002 /* *to* initiator */ #define CT7_DATAMASK 0x3 #define CT7_DSD_ENABLE 0x0004 #define CT7_CONF_STSFD 0x0010 #define CT7_EXPLCT_CONF 0x0020 #define CT7_FLAG_MODE0 0x0000 #define CT7_FLAG_MODE1 0x0040 #define CT7_FLAG_MODE2 0x0080 #define CT7_FLAG_MMASK 0x00C0 #define CT7_NOACK 0x0100 #define CT7_TASK_ATTR_SHIFT 9 #define CT7_CONFIRM 0x2000 #define CT7_TERMINATE 0x4000 #define CT7_SENDSTATUS 0x8000 /* * Type 7 CTIO status codes */ #define CT7_OK 0x01 /* completed without error */ #define CT7_ABORTED 0x02 /* aborted by host */ #define CT7_ERR 0x04 /* see sense data for error */ #define CT7_INVAL 0x06 /* request for disabled lun */ #define CT7_INVRXID 0x08 /* Invalid RX_ID */ #define CT7_DATA_OVER 0x09 /* Data Overrun */ #define CT7_TIMEOUT 0x0B /* timed out */ #define CT7_RESET 0x0E /* LIP Rset Received */ #define CT7_BUS_ERROR 0x10 /* DMA PCI Error */ #define CT7_REASSY_ERR 0x11 /* DMA reassembly error */ #define CT7_DATA_UNDER 0x15 /* Data Underrun */ #define CT7_PORTUNAVAIL 0x28 /* port not available */ #define CT7_LOGOUT 0x29 /* port logout */ #define CT7_PORTCHANGED 0x2A /* port changed */ #define CT7_SRR 0x45 /* SRR Received */ /* * Other 24XX related target IOCBs */ /* * ABTS Received */ typedef struct { isphdr_t abts_header; uint8_t abts_reserved0[6]; uint16_t abts_nphdl; uint16_t abts_reserved1; uint16_t abts_sof; uint32_t abts_rxid_abts; uint16_t abts_did_lo; uint8_t abts_did_hi; uint8_t abts_r_ctl; uint16_t abts_sid_lo; uint8_t abts_sid_hi; uint8_t abts_cs_ctl; uint16_t abts_fs_ctl; uint8_t abts_f_ctl; uint8_t abts_type; uint16_t abts_seq_cnt; uint8_t abts_df_ctl; uint8_t abts_seq_id; uint16_t abts_rx_id; uint16_t abts_ox_id; uint32_t abts_param; uint8_t abts_reserved2[16]; uint32_t abts_rxid_task; } abts_t; typedef struct { isphdr_t abts_rsp_header; uint32_t abts_rsp_handle; uint16_t abts_rsp_status; uint16_t abts_rsp_nphdl; uint16_t abts_rsp_ctl_flags; uint16_t abts_rsp_sof; uint32_t abts_rsp_rxid_abts; uint16_t abts_rsp_did_lo; uint8_t abts_rsp_did_hi; uint8_t abts_rsp_r_ctl; uint16_t abts_rsp_sid_lo; uint8_t abts_rsp_sid_hi; uint8_t abts_rsp_cs_ctl; uint16_t abts_rsp_f_ctl_lo; uint8_t abts_rsp_f_ctl_hi; uint8_t abts_rsp_type; uint16_t abts_rsp_seq_cnt; uint8_t abts_rsp_df_ctl; uint8_t abts_rsp_seq_id; uint16_t abts_rsp_rx_id; uint16_t abts_rsp_ox_id; uint32_t abts_rsp_param; union { struct { uint16_t reserved; uint8_t last_seq_id; uint8_t seq_id_valid; uint16_t aborted_rx_id; uint16_t aborted_ox_id; uint16_t high_seq_cnt; uint16_t low_seq_cnt; uint8_t reserved2[4]; } ba_acc; struct { uint8_t vendor_unique; uint8_t explanation; uint8_t reason; uint8_t reserved; uint8_t reserved2[12]; } ba_rjt; struct { uint8_t reserved[8]; uint32_t subcode1; uint32_t subcode2; } rsp; uint8_t reserved[16]; } abts_rsp_payload; uint32_t abts_rsp_rxid_task; } abts_rsp_t; /* terminate this ABTS exchange */ #define ISP24XX_ABTS_RSP_TERMINATE 0x01 #define ISP24XX_ABTS_RSP_COMPLETE 0x00 #define ISP24XX_ABTS_RSP_RESET 0x04 #define ISP24XX_ABTS_RSP_ABORTED 0x05 #define ISP24XX_ABTS_RSP_TIMEOUT 0x06 #define ISP24XX_ABTS_RSP_INVXID 0x08 #define ISP24XX_ABTS_RSP_LOGOUT 0x29 #define ISP24XX_ABTS_RSP_SUBCODE 0x31 #define ISP24XX_NO_TASK 0xffffffff /* * Miscellaneous * * These are the limits of the number of dma segments we * can deal with based not on the size of the segment counter * (which is 16 bits), but on the size of the number of * queue entries field (which is 8 bits). We assume no * segments in the first queue entry, so we can either * have 7 dma segments per continuation entry or 5 * (for 64 bit dma).. multiplying out by 254.... */ #define ISP_NSEG_MAX 1778 #define ISP_NSEG64_MAX 1270 #endif /* _ISPMBOX_H */ Index: projects/powernv/dev/re/if_re.c =================================================================== --- projects/powernv/dev/re/if_re.c (revision 290990) +++ projects/powernv/dev/re/if_re.c (revision 290991) @@ -1,4068 +1,4074 @@ /*- * Copyright (c) 1997, 1998-2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver * * Written by Bill Paul * Senior Networking Software Engineer * Wind River Systems */ /* * This driver is designed to support RealTek's next generation of * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S, * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E. * * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible * with the older 8139 family, however it also supports a special * C+ mode of operation that provides several new performance enhancing * features. These include: * * o Descriptor based DMA mechanism. Each descriptor represents * a single packet fragment. Data buffers may be aligned on * any byte boundary. * * o 64-bit DMA * * o TCP/IP checksum offload for both RX and TX * * o High and normal priority transmit DMA rings * * o VLAN tag insertion and extraction * * o TCP large send (segmentation offload) * * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+ * programming API is fairly straightforward. The RX filtering, EEPROM * access and PHY access is the same as it is on the older 8139 series * chips. * * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the * same programming API and feature set as the 8139C+ with the following * differences and additions: * * o 1000Mbps mode * * o Jumbo frames * * o GMII and TBI ports/registers for interfacing with copper * or fiber PHYs * * o RX and TX DMA rings can have up to 1024 descriptors * (the 8139C+ allows a maximum of 64) * * o Slight differences in register layout from the 8139C+ * * The TX start and timer interrupt registers are at different locations * on the 8169 than they are on the 8139C+. Also, the status word in the * RX descriptor has a slightly different bit layout. The 8169 does not * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska' * copper gigE PHY. * * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs * (the 'S' stands for 'single-chip'). These devices have the same * programming API as the older 8169, but also have some vendor-specific * registers for the on-board PHY. The 8110S is a LAN-on-motherboard * part designed to be pin-compatible with the RealTek 8100 10/100 chip. * * This driver takes advantage of the RX and TX checksum offload and * VLAN tag insertion/extraction features. It also implements TX * interrupt moderation using the timer interrupt registers, which * significantly reduces TX interrupt load. There is also support * for jumbo frames, however the 8169/8169S/8110S can not transmit * jumbo frames larger than 7440, so the max MTU possible with this * driver is 7422 bytes. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(re, pci, 1, 1, 1); MODULE_DEPEND(re, ether, 1, 1, 1); MODULE_DEPEND(re, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* Tunables. */ static int intr_filter = 0; TUNABLE_INT("hw.re.intr_filter", &intr_filter); static int msi_disable = 0; TUNABLE_INT("hw.re.msi_disable", &msi_disable); static int msix_disable = 0; TUNABLE_INT("hw.re.msix_disable", &msix_disable); static int prefer_iomap = 0; TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap); #define RE_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) /* * Various supported device vendors/types and their names. */ static const struct rl_type re_devs[] = { { DLINK_VENDORID, DLINK_DEVICEID_528T, 0, "D-Link DGE-528(T) Gigabit Ethernet Adapter" }, { DLINK_VENDORID, DLINK_DEVICEID_530T_REVC, 0, "D-Link DGE-530(T) Gigabit Ethernet Adapter" }, { RT_VENDORID, RT_DEVICEID_8139, 0, "RealTek 8139C+ 10/100BaseTX" }, { RT_VENDORID, RT_DEVICEID_8101E, 0, "RealTek 810xE PCIe 10/100baseTX" }, { RT_VENDORID, RT_DEVICEID_8168, 0, "RealTek 8168/8111 B/C/CP/D/DP/E/F/G PCIe Gigabit Ethernet" }, { RT_VENDORID, RT_DEVICEID_8169, 0, "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" }, { RT_VENDORID, RT_DEVICEID_8169SC, 0, "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" }, { COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0, "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" }, { LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0, "Linksys EG1032 (RTL8169S) Gigabit Ethernet" }, { USR_VENDORID, USR_DEVICEID_997902, 0, "US Robotics 997902 (RTL8169S) Gigabit Ethernet" } }; static const struct rl_hwrev re_hwrevs[] = { { RL_HWREV_8139, RL_8139, "", RL_MTU }, { RL_HWREV_8139A, RL_8139, "A", RL_MTU }, { RL_HWREV_8139AG, RL_8139, "A-G", RL_MTU }, { RL_HWREV_8139B, RL_8139, "B", RL_MTU }, { RL_HWREV_8130, RL_8139, "8130", RL_MTU }, { RL_HWREV_8139C, RL_8139, "C", RL_MTU }, { RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C", RL_MTU }, { RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+", RL_MTU }, { RL_HWREV_8168B_SPIN1, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8169, RL_8169, "8169", RL_JUMBO_MTU }, { RL_HWREV_8169S, RL_8169, "8169S", RL_JUMBO_MTU }, { RL_HWREV_8110S, RL_8169, "8110S", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL", RL_JUMBO_MTU }, { RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU }, { RL_HWREV_8100, RL_8139, "8100", RL_MTU }, { RL_HWREV_8101, RL_8139, "8101", RL_MTU }, { RL_HWREV_8100E, RL_8169, "8100E", RL_MTU }, { RL_HWREV_8101E, RL_8169, "8101E", RL_MTU }, { RL_HWREV_8102E, RL_8169, "8102E", RL_MTU }, { RL_HWREV_8102EL, RL_8169, "8102EL", RL_MTU }, { RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL", RL_MTU }, { RL_HWREV_8103E, RL_8169, "8103E", RL_MTU }, { RL_HWREV_8401E, RL_8169, "8401E", RL_MTU }, { RL_HWREV_8402, RL_8169, "8402", RL_MTU }, { RL_HWREV_8105E, RL_8169, "8105E", RL_MTU }, { RL_HWREV_8105E_SPIN1, RL_8169, "8105E", RL_MTU }, { RL_HWREV_8106E, RL_8169, "8106E", RL_MTU }, { RL_HWREV_8168B_SPIN2, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8168B_SPIN3, RL_8169, "8168", RL_JUMBO_MTU }, { RL_HWREV_8168C, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K }, { RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K }, { RL_HWREV_8168CP, RL_8169, "8168CP/8111CP", RL_JUMBO_MTU_6K }, { RL_HWREV_8168D, RL_8169, "8168D/8111D", RL_JUMBO_MTU_9K }, { RL_HWREV_8168DP, RL_8169, "8168DP/8111DP", RL_JUMBO_MTU_9K }, { RL_HWREV_8168E, RL_8169, "8168E/8111E", RL_JUMBO_MTU_9K}, { RL_HWREV_8168E_VL, RL_8169, "8168E/8111E-VL", RL_JUMBO_MTU_6K}, { RL_HWREV_8168EP, RL_8169, "8168EP/8111EP", RL_JUMBO_MTU_9K}, { RL_HWREV_8168F, RL_8169, "8168F/8111F", RL_JUMBO_MTU_9K}, { RL_HWREV_8168G, RL_8169, "8168G/8111G", RL_JUMBO_MTU_9K}, { RL_HWREV_8168GU, RL_8169, "8168GU/8111GU", RL_JUMBO_MTU_9K}, { RL_HWREV_8168H, RL_8169, "8168H/8111H", RL_JUMBO_MTU_9K}, { RL_HWREV_8411, RL_8169, "8411", RL_JUMBO_MTU_9K}, { RL_HWREV_8411B, RL_8169, "8411B", RL_JUMBO_MTU_9K}, { 0, 0, NULL, 0 } }; static int re_probe (device_t); static int re_attach (device_t); static int re_detach (device_t); static int re_encap (struct rl_softc *, struct mbuf **); static void re_dma_map_addr (void *, bus_dma_segment_t *, int, int); static int re_allocmem (device_t, struct rl_softc *); static __inline void re_discard_rxbuf (struct rl_softc *, int); static int re_newbuf (struct rl_softc *, int); static int re_jumbo_newbuf (struct rl_softc *, int); static int re_rx_list_init (struct rl_softc *); static int re_jrx_list_init (struct rl_softc *); static int re_tx_list_init (struct rl_softc *); #ifdef RE_FIXUP_RX static __inline void re_fixup_rx (struct mbuf *); #endif static int re_rxeof (struct rl_softc *, int *); static void re_txeof (struct rl_softc *); #ifdef DEVICE_POLLING static int re_poll (struct ifnet *, enum poll_cmd, int); static int re_poll_locked (struct ifnet *, enum poll_cmd, int); #endif static int re_intr (void *); static void re_intr_msi (void *); static void re_tick (void *); static void re_int_task (void *, int); static void re_start (struct ifnet *); static void re_start_locked (struct ifnet *); static int re_ioctl (struct ifnet *, u_long, caddr_t); static void re_init (void *); static void re_init_locked (struct rl_softc *); static void re_stop (struct rl_softc *); static void re_watchdog (struct rl_softc *); static int re_suspend (device_t); static int re_resume (device_t); static int re_shutdown (device_t); static int re_ifmedia_upd (struct ifnet *); static void re_ifmedia_sts (struct ifnet *, struct ifmediareq *); static void re_eeprom_putbyte (struct rl_softc *, int); static void re_eeprom_getword (struct rl_softc *, int, u_int16_t *); static void re_read_eeprom (struct rl_softc *, caddr_t, int, int); static int re_gmii_readreg (device_t, int, int); static int re_gmii_writereg (device_t, int, int, int); static int re_miibus_readreg (device_t, int, int); static int re_miibus_writereg (device_t, int, int, int); static void re_miibus_statchg (device_t); static void re_set_jumbo (struct rl_softc *, int); static void re_set_rxmode (struct rl_softc *); static void re_reset (struct rl_softc *); static void re_setwol (struct rl_softc *); static void re_clrwol (struct rl_softc *); static void re_set_linkspeed (struct rl_softc *); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include MODULE_DEPEND(re, netmap, 1, 1, 1); #endif /* !DEV_NETMAP */ #ifdef RE_DIAG static int re_diag (struct rl_softc *); #endif static void re_add_sysctls (struct rl_softc *); static int re_sysctl_stats (SYSCTL_HANDLER_ARGS); static int sysctl_int_range (SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_re_int_mod (SYSCTL_HANDLER_ARGS); static device_method_t re_methods[] = { /* Device interface */ DEVMETHOD(device_probe, re_probe), DEVMETHOD(device_attach, re_attach), DEVMETHOD(device_detach, re_detach), DEVMETHOD(device_suspend, re_suspend), DEVMETHOD(device_resume, re_resume), DEVMETHOD(device_shutdown, re_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, re_miibus_readreg), DEVMETHOD(miibus_writereg, re_miibus_writereg), DEVMETHOD(miibus_statchg, re_miibus_statchg), DEVMETHOD_END }; static driver_t re_driver = { "re", re_methods, sizeof(struct rl_softc) }; static devclass_t re_devclass; DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0); DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0); #define EE_SET(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) | x) #define EE_CLR(x) \ CSR_WRITE_1(sc, RL_EECMD, \ CSR_READ_1(sc, RL_EECMD) & ~x) /* * Send a read command and address to the EEPROM, check for ACK. */ static void re_eeprom_putbyte(struct rl_softc *sc, int addr) { int d, i; d = addr | (RL_9346_READ << sc->rl_eewidth); /* * Feed in each bit and strobe the clock. */ for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) { if (d & i) { EE_SET(RL_EE_DATAIN); } else { EE_CLR(RL_EE_DATAIN); } DELAY(100); EE_SET(RL_EE_CLK); DELAY(150); EE_CLR(RL_EE_CLK); DELAY(100); } } /* * Read a word of data stored in the EEPROM at address 'addr.' */ static void re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest) { int i; u_int16_t word = 0; /* * Send address of word we want to read. */ re_eeprom_putbyte(sc, addr); /* * Start reading bits from EEPROM. */ for (i = 0x8000; i; i >>= 1) { EE_SET(RL_EE_CLK); DELAY(100); if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT) word |= i; EE_CLR(RL_EE_CLK); DELAY(100); } *dest = word; } /* * Read a sequence of words from the EEPROM. */ static void re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt) { int i; u_int16_t word = 0, *ptr; CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM); DELAY(100); for (i = 0; i < cnt; i++) { CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL); re_eeprom_getword(sc, off + i, &word); CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL); ptr = (u_int16_t *)(dest + (i * 2)); *ptr = word; } CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM); } static int re_gmii_readreg(device_t dev, int phy, int reg) { struct rl_softc *sc; u_int32_t rval; int i; sc = device_get_softc(dev); /* Let the rgephy driver read the GMEDIASTAT register */ if (reg == RL_GMEDIASTAT) { rval = CSR_READ_1(sc, RL_GMEDIASTAT); return (rval); } CSR_WRITE_4(sc, RL_PHYAR, reg << 16); for (i = 0; i < RL_PHY_TIMEOUT; i++) { rval = CSR_READ_4(sc, RL_PHYAR); if (rval & RL_PHYAR_BUSY) break; DELAY(25); } if (i == RL_PHY_TIMEOUT) { device_printf(sc->rl_dev, "PHY read failed\n"); return (0); } /* * Controller requires a 20us delay to process next MDIO request. */ DELAY(20); return (rval & RL_PHYAR_PHYDATA); } static int re_gmii_writereg(device_t dev, int phy, int reg, int data) { struct rl_softc *sc; u_int32_t rval; int i; sc = device_get_softc(dev); CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) | (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY); for (i = 0; i < RL_PHY_TIMEOUT; i++) { rval = CSR_READ_4(sc, RL_PHYAR); if (!(rval & RL_PHYAR_BUSY)) break; DELAY(25); } if (i == RL_PHY_TIMEOUT) { device_printf(sc->rl_dev, "PHY write failed\n"); return (0); } /* * Controller requires a 20us delay to process next MDIO request. */ DELAY(20); return (0); } static int re_miibus_readreg(device_t dev, int phy, int reg) { struct rl_softc *sc; u_int16_t rval = 0; u_int16_t re8139_reg = 0; sc = device_get_softc(dev); if (sc->rl_type == RL_8169) { rval = re_gmii_readreg(dev, phy, reg); return (rval); } switch (reg) { case MII_BMCR: re8139_reg = RL_BMCR; break; case MII_BMSR: re8139_reg = RL_BMSR; break; case MII_ANAR: re8139_reg = RL_ANAR; break; case MII_ANER: re8139_reg = RL_ANER; break; case MII_ANLPAR: re8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); /* * Allow the rlphy driver to read the media status * register. If we have a link partner which does not * support NWAY, this is the register which will tell * us the results of parallel detection. */ case RL_MEDIASTAT: rval = CSR_READ_1(sc, RL_MEDIASTAT); return (rval); default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } rval = CSR_READ_2(sc, re8139_reg); if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) { /* 8139C+ has different bit layout. */ rval &= ~(BMCR_LOOP | BMCR_ISO); } return (rval); } static int re_miibus_writereg(device_t dev, int phy, int reg, int data) { struct rl_softc *sc; u_int16_t re8139_reg = 0; int rval = 0; sc = device_get_softc(dev); if (sc->rl_type == RL_8169) { rval = re_gmii_writereg(dev, phy, reg, data); return (rval); } switch (reg) { case MII_BMCR: re8139_reg = RL_BMCR; if (sc->rl_type == RL_8139CPLUS) { /* 8139C+ has different bit layout. */ data &= ~(BMCR_LOOP | BMCR_ISO); } break; case MII_BMSR: re8139_reg = RL_BMSR; break; case MII_ANAR: re8139_reg = RL_ANAR; break; case MII_ANER: re8139_reg = RL_ANER; break; case MII_ANLPAR: re8139_reg = RL_LPAR; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); break; default: device_printf(sc->rl_dev, "bad phy register\n"); return (0); } CSR_WRITE_2(sc, re8139_reg, data); return (0); } static void re_miibus_statchg(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; struct mii_data *mii; sc = device_get_softc(dev); mii = device_get_softc(sc->rl_miibus); ifp = sc->rl_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc->rl_flags &= ~RL_FLAG_LINK; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc->rl_flags |= RL_FLAG_LINK; break; case IFM_1000_T: if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0) break; sc->rl_flags |= RL_FLAG_LINK; break; default: break; } } /* * RealTek controllers do not provide any interface to the RX/TX * MACs for resolved speed, duplex and flow-control parameters. */ } /* * Set the RX configuration and 64-bit multicast hash filter. */ static void re_set_rxmode(struct rl_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; uint32_t hashes[2] = { 0, 0 }; uint32_t h, rxfilt; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD; if ((sc->rl_flags & RL_FLAG_EARLYOFF) != 0) rxfilt |= RL_RXCFG_EARLYOFF; else if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) rxfilt |= RL_RXCFG_EARLYOFFV2; if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) { if (ifp->if_flags & IFF_PROMISC) rxfilt |= RL_RXCFG_RX_ALLPHYS; /* * Unlike other hardwares, we have to explicitly set * RL_RXCFG_RX_MULTI to receive multicast frames in * promiscuous mode. */ rxfilt |= RL_RXCFG_RX_MULTI; hashes[0] = hashes[1] = 0xffffffff; goto done; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 26; if (h < 32) hashes[0] |= (1 << h); else hashes[1] |= (1 << (h - 32)); } if_maddr_runlock(ifp); if (hashes[0] != 0 || hashes[1] != 0) { /* * For some unfathomable reason, RealTek decided to * reverse the order of the multicast hash registers * in the PCI Express parts. This means we have to * write the hash pattern in reverse order for those * devices. */ if ((sc->rl_flags & RL_FLAG_PCIE) != 0) { h = bswap32(hashes[0]); hashes[0] = bswap32(hashes[1]); hashes[1] = h; } rxfilt |= RL_RXCFG_RX_MULTI; } if (sc->rl_hwrev->rl_rev == RL_HWREV_8168F) { /* Disable multicast filtering due to silicon bug. */ hashes[0] = 0xffffffff; hashes[1] = 0xffffffff; } done: CSR_WRITE_4(sc, RL_MAR0, hashes[0]); CSR_WRITE_4(sc, RL_MAR4, hashes[1]); CSR_WRITE_4(sc, RL_RXCFG, rxfilt); } static void re_reset(struct rl_softc *sc) { int i; RL_LOCK_ASSERT(sc); CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET); for (i = 0; i < RL_TIMEOUT; i++) { DELAY(10); if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET)) break; } if (i == RL_TIMEOUT) device_printf(sc->rl_dev, "reset never completed!\n"); if ((sc->rl_flags & RL_FLAG_MACRESET) != 0) CSR_WRITE_1(sc, 0x82, 1); if (sc->rl_hwrev->rl_rev == RL_HWREV_8169S) re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0); } #ifdef RE_DIAG /* * The following routine is designed to test for a defect on some * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64# * lines connected to the bus, however for a 32-bit only card, they * should be pulled high. The result of this defect is that the * NIC will not work right if you plug it into a 64-bit slot: DMA * operations will be done with 64-bit transfers, which will fail * because the 64-bit data lines aren't connected. * * There's no way to work around this (short of talking a soldering * iron to the board), however we can detect it. The method we use * here is to put the NIC into digital loopback mode, set the receiver * to promiscuous mode, and then try to send a frame. We then compare * the frame data we sent to what was received. If the data matches, * then the NIC is working correctly, otherwise we know the user has * a defective NIC which has been mistakenly plugged into a 64-bit PCI * slot. In the latter case, there's no way the NIC can work correctly, * so we print out a message on the console and abort the device attach. */ static int re_diag(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; struct mbuf *m0; struct ether_header *eh; struct rl_desc *cur_rx; u_int16_t status; u_int32_t rxstat; int total_len, i, error = 0, phyaddr; u_int8_t dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' }; u_int8_t src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' }; /* Allocate a single mbuf */ MGETHDR(m0, M_NOWAIT, MT_DATA); if (m0 == NULL) return (ENOBUFS); RL_LOCK(sc); /* * Initialize the NIC in test mode. This sets the chip up * so that it can send and receive frames, but performs the * following special functions: * - Puts receiver in promiscuous mode * - Enables digital loopback mode * - Leaves interrupts turned off */ ifp->if_flags |= IFF_PROMISC; sc->rl_testmode = 1; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); sc->rl_flags |= RL_FLAG_LINK; if (sc->rl_type == RL_8169) phyaddr = 1; else phyaddr = 0; re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET); for (i = 0; i < RL_TIMEOUT; i++) { status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR); if (!(status & BMCR_RESET)) break; } re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP); CSR_WRITE_2(sc, RL_ISR, RL_INTRS); DELAY(100000); /* Put some data in the mbuf */ eh = mtod(m0, struct ether_header *); bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN); bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN); eh->ether_type = htons(ETHERTYPE_IP); m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN; /* * Queue the packet, start transmission. * Note: IF_HANDOFF() ultimately calls re_start() for us. */ CSR_WRITE_2(sc, RL_ISR, 0xFFFF); RL_UNLOCK(sc); /* XXX: re_diag must not be called when in ALTQ mode */ IF_HANDOFF(&ifp->if_snd, m0, ifp); RL_LOCK(sc); m0 = NULL; /* Wait for it to propagate through the chip */ DELAY(100000); for (i = 0; i < RL_TIMEOUT; i++) { status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) == (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) break; DELAY(10); } if (i == RL_TIMEOUT) { device_printf(sc->rl_dev, "diagnostic failed, failed to receive packet in" " loopback mode\n"); error = EIO; goto done; } /* * The packet should have been dumped into the first * entry in the RX DMA ring. Grab it from there. */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[0].rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[0].rx_dmamap); m0 = sc->rl_ldata.rl_rx_desc[0].rx_m; sc->rl_ldata.rl_rx_desc[0].rx_m = NULL; eh = mtod(m0, struct ether_header *); cur_rx = &sc->rl_ldata.rl_rx_list[0]; total_len = RL_RXBYTES(cur_rx); rxstat = le32toh(cur_rx->rl_cmdstat); if (total_len != ETHER_MIN_LEN) { device_printf(sc->rl_dev, "diagnostic failed, received short packet\n"); error = EIO; goto done; } /* Test that the received packet data matches what we sent. */ if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) || bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) || ntohs(eh->ether_type) != ETHERTYPE_IP) { device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n"); device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n", dst, ":", src, ":", ETHERTYPE_IP); device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n", eh->ether_dhost, ":", eh->ether_shost, ":", ntohs(eh->ether_type)); device_printf(sc->rl_dev, "You may have a defective 32-bit " "NIC plugged into a 64-bit PCI slot.\n"); device_printf(sc->rl_dev, "Please re-install the NIC in a " "32-bit slot for proper operation.\n"); device_printf(sc->rl_dev, "Read the re(4) man page for more " "details.\n"); error = EIO; } done: /* Turn interface off, release resources */ sc->rl_testmode = 0; sc->rl_flags &= ~RL_FLAG_LINK; ifp->if_flags &= ~IFF_PROMISC; re_stop(sc); if (m0 != NULL) m_freem(m0); RL_UNLOCK(sc); return (error); } #endif /* * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device * IDs against our list and return a device name if we find a match. */ static int re_probe(device_t dev) { const struct rl_type *t; uint16_t devid, vendor; uint16_t revid, sdevid; int i; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); revid = pci_get_revid(dev); sdevid = pci_get_subdevice(dev); if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) { if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) { /* * Only attach to rev. 3 of the Linksys EG1032 adapter. * Rev. 2 is supported by sk(4). */ return (ENXIO); } } if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) { if (revid != 0x20) { /* 8139, let rl(4) take care of this device. */ return (ENXIO); } } t = re_devs; for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) { if (vendor == t->rl_vid && devid == t->rl_did) { device_set_desc(dev, t->rl_name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } /* * Map a single buffer address. */ static void re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *addr; if (error) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); addr = arg; *addr = segs->ds_addr; } static int re_allocmem(device_t dev, struct rl_softc *sc) { bus_addr_t lowaddr; bus_size_t rx_list_size, tx_list_size; int error; int i; rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc); tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc); /* * Allocate the parent bus DMA tag appropriate for PCI. * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD * register should be set. However some RealTek chips are known * to be buggy on DAC handling, therefore disable DAC by limiting * DMA address space to 32bit. PCIe variants of RealTek chips * may not have the limitation. */ lowaddr = BUS_SPACE_MAXADDR; if ((sc->rl_flags & RL_FLAG_PCIE) == 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->rl_parent_tag); if (error) { device_printf(dev, "could not allocate parent DMA tag\n"); return (error); } /* * Allocate map for TX mbufs. */ error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0, NULL, NULL, &sc->rl_ldata.rl_tx_mtag); if (error) { device_printf(dev, "could not allocate TX DMA tag\n"); return (error); } /* * Allocate map for RX mbufs. */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL, &sc->rl_ldata.rl_jrx_mtag); if (error) { device_printf(dev, "could not allocate jumbo RX DMA tag\n"); return (error); } } error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag); if (error) { device_printf(dev, "could not allocate RX DMA tag\n"); return (error); } /* * Allocate map for TX descriptor list. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, tx_list_size, 1, tx_list_size, 0, NULL, NULL, &sc->rl_ldata.rl_tx_list_tag); if (error) { device_printf(dev, "could not allocate TX DMA ring tag\n"); return (error); } /* Allocate DMA'able memory for the TX ring */ error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag, (void **)&sc->rl_ldata.rl_tx_list, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_tx_list_map); if (error) { device_printf(dev, "could not allocate TX DMA ring\n"); return (error); } /* Load the map for the TX ring. */ sc->rl_ldata.rl_tx_list_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list, tx_list_size, re_dma_map_addr, &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) { device_printf(dev, "could not load TX DMA ring\n"); return (ENOMEM); } /* Create DMA maps for TX buffers */ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0, &sc->rl_ldata.rl_tx_desc[i].tx_dmamap); if (error) { device_printf(dev, "could not create DMA map for TX\n"); return (error); } } /* * Allocate map for RX descriptor list. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, rx_list_size, 1, rx_list_size, 0, NULL, NULL, &sc->rl_ldata.rl_rx_list_tag); if (error) { device_printf(dev, "could not create RX DMA ring tag\n"); return (error); } /* Allocate DMA'able memory for the RX ring */ error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag, (void **)&sc->rl_ldata.rl_rx_list, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_rx_list_map); if (error) { device_printf(dev, "could not allocate RX DMA ring\n"); return (error); } /* Load the map for the RX ring. */ sc->rl_ldata.rl_rx_list_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list, rx_list_size, re_dma_map_addr, &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) { device_printf(dev, "could not load RX DMA ring\n"); return (ENOMEM); } /* Create DMA maps for RX buffers */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0, &sc->rl_ldata.rl_jrx_sparemap); if (error) { device_printf(dev, "could not create spare DMA map for jumbo RX\n"); return (error); } for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0, &sc->rl_ldata.rl_jrx_desc[i].rx_dmamap); if (error) { device_printf(dev, "could not create DMA map for jumbo RX\n"); return (error); } } } error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0, &sc->rl_ldata.rl_rx_sparemap); if (error) { device_printf(dev, "could not create spare DMA map for RX\n"); return (error); } for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0, &sc->rl_ldata.rl_rx_desc[i].rx_dmamap); if (error) { device_printf(dev, "could not create DMA map for RX\n"); return (error); } } /* Create DMA map for statistics. */ error = bus_dma_tag_create(sc->rl_parent_tag, RL_DUMP_ALIGN, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct rl_stats), 1, sizeof(struct rl_stats), 0, NULL, NULL, &sc->rl_ldata.rl_stag); if (error) { device_printf(dev, "could not create statistics DMA tag\n"); return (error); } /* Allocate DMA'able memory for statistics. */ error = bus_dmamem_alloc(sc->rl_ldata.rl_stag, (void **)&sc->rl_ldata.rl_stats, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->rl_ldata.rl_smap); if (error) { device_printf(dev, "could not allocate statistics DMA memory\n"); return (error); } /* Load the map for statistics. */ sc->rl_ldata.rl_stats_addr = 0; error = bus_dmamap_load(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, sc->rl_ldata.rl_stats, sizeof(struct rl_stats), re_dma_map_addr, &sc->rl_ldata.rl_stats_addr, BUS_DMA_NOWAIT); if (error != 0 || sc->rl_ldata.rl_stats_addr == 0) { device_printf(dev, "could not load statistics DMA memory\n"); return (ENOMEM); } return (0); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int re_attach(device_t dev) { u_char eaddr[ETHER_ADDR_LEN]; u_int16_t as[ETHER_ADDR_LEN / 2]; struct rl_softc *sc; struct ifnet *ifp; const struct rl_hwrev *hw_rev; int capmask, error = 0, hwrev, i, msic, msixc, phy, reg, rid; u_int32_t cap, ctl; u_int16_t devid, re_did = 0; uint8_t cfg; sc = device_get_softc(dev); sc->rl_dev = dev; mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0); /* * Map control/status registers. */ pci_enable_busmaster(dev); devid = pci_get_device(dev); /* * Prefer memory space register mapping over IO space. * Because RTL8169SC does not seem to work when memory mapping * is used always activate io mapping. */ if (devid == RT_DEVICEID_8169SC) prefer_iomap = 1; if (prefer_iomap == 0) { sc->rl_res_id = PCIR_BAR(1); sc->rl_res_type = SYS_RES_MEMORY; /* RTL8168/8101E seems to use different BARs. */ if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E) sc->rl_res_id = PCIR_BAR(2); } else { sc->rl_res_id = PCIR_BAR(0); sc->rl_res_type = SYS_RES_IOPORT; } sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); if (sc->rl_res == NULL && prefer_iomap == 0) { sc->rl_res_id = PCIR_BAR(0); sc->rl_res_type = SYS_RES_IOPORT; sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type, &sc->rl_res_id, RF_ACTIVE); } if (sc->rl_res == NULL) { device_printf(dev, "couldn't map ports/memory\n"); error = ENXIO; goto fail; } sc->rl_btag = rman_get_bustag(sc->rl_res); sc->rl_bhandle = rman_get_bushandle(sc->rl_res); msic = pci_msi_count(dev); msixc = pci_msix_count(dev); if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { sc->rl_flags |= RL_FLAG_PCIE; sc->rl_expcap = reg; } if (bootverbose) { device_printf(dev, "MSI count : %d\n", msic); device_printf(dev, "MSI-X count : %d\n", msixc); } if (msix_disable > 0) msixc = 0; if (msi_disable > 0) msic = 0; /* Prefer MSI-X to MSI. */ if (msixc > 0) { msixc = RL_MSI_MESSAGES; rid = PCIR_BAR(4); sc->rl_res_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->rl_res_pba == NULL) { device_printf(sc->rl_dev, "could not allocate MSI-X PBA resource\n"); } if (sc->rl_res_pba != NULL && pci_alloc_msix(dev, &msixc) == 0) { if (msixc == RL_MSI_MESSAGES) { device_printf(dev, "Using %d MSI-X message\n", msixc); sc->rl_flags |= RL_FLAG_MSIX; } else pci_release_msi(dev); } if ((sc->rl_flags & RL_FLAG_MSIX) == 0) { if (sc->rl_res_pba != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba); sc->rl_res_pba = NULL; msixc = 0; } } /* Prefer MSI to INTx. */ if (msixc == 0 && msic > 0) { msic = RL_MSI_MESSAGES; if (pci_alloc_msi(dev, &msic) == 0) { if (msic == RL_MSI_MESSAGES) { device_printf(dev, "Using %d MSI message\n", msic); sc->rl_flags |= RL_FLAG_MSI; /* Explicitly set MSI enable bit. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, RL_CFG2); cfg |= RL_CFG2_MSI; CSR_WRITE_1(sc, RL_CFG2, cfg); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); } else pci_release_msi(dev); } if ((sc->rl_flags & RL_FLAG_MSI) == 0) msic = 0; } /* Allocate interrupt */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) { rid = 0; sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->rl_irq[0] == NULL) { device_printf(dev, "couldn't allocate IRQ resources\n"); error = ENXIO; goto fail; } } else { for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) { sc->rl_irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->rl_irq[i] == NULL) { device_printf(dev, "couldn't allocate IRQ resources for " "message %d\n", rid); error = ENXIO; goto fail; } } } if ((sc->rl_flags & RL_FLAG_MSI) == 0) { CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, RL_CFG2); if ((cfg & RL_CFG2_MSI) != 0) { device_printf(dev, "turning off MSI enable bit.\n"); cfg &= ~RL_CFG2_MSI; CSR_WRITE_1(sc, RL_CFG2, cfg); } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); } /* Disable ASPM L0S/L1. */ if (sc->rl_expcap != 0) { cap = pci_read_config(dev, sc->rl_expcap + PCIER_LINK_CAP, 2); if ((cap & PCIEM_LINK_CAP_ASPM) != 0) { ctl = pci_read_config(dev, sc->rl_expcap + PCIER_LINK_CTL, 2); if ((ctl & PCIEM_LINK_CTL_ASPMC) != 0) { ctl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, sc->rl_expcap + PCIER_LINK_CTL, ctl, 2); device_printf(dev, "ASPM disabled\n"); } } else device_printf(dev, "no ASPM capability\n"); } hw_rev = re_hwrevs; hwrev = CSR_READ_4(sc, RL_TXCFG); switch (hwrev & 0x70000000) { case 0x00000000: case 0x10000000: device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000); hwrev &= (RL_TXCFG_HWREV | 0x80000000); break; default: device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000); sc->rl_macrev = hwrev & 0x00700000; hwrev &= RL_TXCFG_HWREV; break; } device_printf(dev, "MAC rev. 0x%08x\n", sc->rl_macrev); while (hw_rev->rl_desc != NULL) { if (hw_rev->rl_rev == hwrev) { sc->rl_type = hw_rev->rl_type; sc->rl_hwrev = hw_rev; break; } hw_rev++; } if (hw_rev->rl_desc == NULL) { device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev); error = ENXIO; goto fail; } switch (hw_rev->rl_rev) { case RL_HWREV_8139CPLUS: sc->rl_flags |= RL_FLAG_FASTETHER | RL_FLAG_AUTOPAD; break; case RL_HWREV_8100E: case RL_HWREV_8101E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER; break; case RL_HWREV_8102E: case RL_HWREV_8102EL: case RL_HWREV_8102EL_SPIN1: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD; break; case RL_HWREV_8103E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_MACSLEEP; break; case RL_HWREV_8401E: case RL_HWREV_8105E: case RL_HWREV_8105E_SPIN1: case RL_HWREV_8106E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD; break; case RL_HWREV_8402: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ; break; case RL_HWREV_8168B_SPIN1: case RL_HWREV_8168B_SPIN2: sc->rl_flags |= RL_FLAG_WOLRXENB; /* FALLTHROUGH */ case RL_HWREV_8168B_SPIN3: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT; break; case RL_HWREV_8168C_SPIN2: sc->rl_flags |= RL_FLAG_MACSLEEP; /* FALLTHROUGH */ case RL_HWREV_8168C: if (sc->rl_macrev == 0x00200000) sc->rl_flags |= RL_FLAG_MACSLEEP; /* FALLTHROUGH */ case RL_HWREV_8168CP: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168D: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168DP: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WAIT_TXPOLL | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168E: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168E_VL: case RL_HWREV_8168F: sc->rl_flags |= RL_FLAG_EARLYOFF; /* FALLTHROUGH */ case RL_HWREV_8411: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK; break; case RL_HWREV_8168EP: case RL_HWREV_8168G: case RL_HWREV_8411B: sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2 | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_WOL_MANLINK | RL_FLAG_8168G_PLUS; break; case RL_HWREV_8168GU: case RL_HWREV_8168H: if (pci_get_device(dev) == RT_DEVICEID_8101E) { /* RTL8106E(US), RTL8107E */ sc->rl_flags |= RL_FLAG_FASTETHER; } else sc->rl_flags |= RL_FLAG_JUMBOV2 | RL_FLAG_WOL_MANLINK; sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_CMDSTOP_WAIT_TXQ | RL_FLAG_8168G_PLUS; break; case RL_HWREV_8169_8110SB: case RL_HWREV_8169_8110SBL: case RL_HWREV_8169_8110SC: case RL_HWREV_8169_8110SCE: sc->rl_flags |= RL_FLAG_PHYWAKE; /* FALLTHROUGH */ case RL_HWREV_8169: case RL_HWREV_8169S: case RL_HWREV_8110S: sc->rl_flags |= RL_FLAG_MACRESET; break; default: break; } if (sc->rl_hwrev->rl_rev == RL_HWREV_8139CPLUS) { sc->rl_cfg0 = RL_8139_CFG0; sc->rl_cfg1 = RL_8139_CFG1; sc->rl_cfg2 = 0; sc->rl_cfg3 = RL_8139_CFG3; sc->rl_cfg4 = RL_8139_CFG4; sc->rl_cfg5 = RL_8139_CFG5; } else { sc->rl_cfg0 = RL_CFG0; sc->rl_cfg1 = RL_CFG1; sc->rl_cfg2 = RL_CFG2; sc->rl_cfg3 = RL_CFG3; sc->rl_cfg4 = RL_CFG4; sc->rl_cfg5 = RL_CFG5; } /* Reset the adapter. */ RL_LOCK(sc); re_reset(sc); RL_UNLOCK(sc); /* Enable PME. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); cfg = CSR_READ_1(sc, sc->rl_cfg1); cfg |= RL_CFG1_PME; CSR_WRITE_1(sc, sc->rl_cfg1, cfg); cfg = CSR_READ_1(sc, sc->rl_cfg5); cfg &= RL_CFG5_PME_STS; CSR_WRITE_1(sc, sc->rl_cfg5, cfg); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); if ((sc->rl_flags & RL_FLAG_PAR) != 0) { /* * XXX Should have a better way to extract station * address from EEPROM. */ for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i); } else { sc->rl_eewidth = RL_9356_ADDR_LEN; re_read_eeprom(sc, (caddr_t)&re_did, 0, 1); if (re_did != 0x8129) sc->rl_eewidth = RL_9346_ADDR_LEN; /* * Get station address from the EEPROM. */ re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3); for (i = 0; i < ETHER_ADDR_LEN / 2; i++) as[i] = le16toh(as[i]); bcopy(as, eaddr, ETHER_ADDR_LEN); } if (sc->rl_type == RL_8169) { /* Set RX length mask and number of descriptors. */ sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN; sc->rl_txstart = RL_GTXSTART; sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT; sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT; } else { /* Set RX length mask and number of descriptors. */ sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN; sc->rl_txstart = RL_TXSTART; sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT; sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT; } error = re_allocmem(dev, sc); if (error) goto fail; re_add_sysctls(sc); ifp = sc->rl_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } /* Take controller out of deep sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) | 0x01); else CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) & ~0x01); } /* Take PHY out of power down mode. */ if ((sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) { CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) | 0x80); if (hw_rev->rl_rev == RL_HWREV_8401E) CSR_WRITE_1(sc, 0xD1, CSR_READ_1(sc, 0xD1) & ~0x08); } if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) { re_gmii_writereg(dev, 1, 0x1f, 0); re_gmii_writereg(dev, 1, 0x0e, 0); } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = re_ioctl; ifp->if_start = re_start; /* * RTL8168/8111C generates wrong IP checksummed frame if the * packet has IP options so disable TX checksum offloading. */ if (sc->rl_hwrev->rl_rev == RL_HWREV_8168C || sc->rl_hwrev->rl_rev == RL_HWREV_8168C_SPIN2 || sc->rl_hwrev->rl_rev == RL_HWREV_8168CP) { ifp->if_hwassist = 0; ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TSO4; } else { ifp->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP; ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4; } ifp->if_hwassist |= CSUM_TSO; ifp->if_capenable = ifp->if_capabilities; ifp->if_init = re_init; IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN); ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN; IFQ_SET_READY(&ifp->if_snd); TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc); #define RE_PHYAD_INTERNAL 0 /* Do MII setup. */ phy = RE_PHYAD_INTERNAL; if (sc->rl_type == RL_8169) phy = 1; capmask = BMSR_DEFCAPMASK; if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0) capmask &= ~BMSR_EXTSTAT; error = mii_attach(dev, &sc->rl_miibus, ifp, re_ifmedia_upd, re_ifmedia_sts, capmask, phy, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* VLAN capability setup */ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; if (ifp->if_capabilities & IFCAP_HWCSUM) ifp->if_capabilities |= IFCAP_VLAN_HWCSUM; /* Enable WOL if PM is supported. */ if (pci_find_cap(sc->rl_dev, PCIY_PMG, ®) == 0) ifp->if_capabilities |= IFCAP_WOL; ifp->if_capenable = ifp->if_capabilities; ifp->if_capenable &= ~(IFCAP_WOL_UCAST | IFCAP_WOL_MCAST); /* * Don't enable TSO by default. It is known to generate * corrupted TCP segments(bad TCP options) under certain * circumstances. */ ifp->if_hwassist &= ~CSUM_TSO; ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Tell the upper layer(s) we support long frames. * Must appear after the call to ether_ifattach() because * ether_ifattach() sets ifi_hdrlen to the default value. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); #ifdef DEV_NETMAP re_netmap_attach(sc); #endif /* DEV_NETMAP */ + #ifdef RE_DIAG /* * Perform hardware diagnostic on the original RTL8169. * Some 32-bit cards were incorrectly wired and would * malfunction if plugged into a 64-bit slot. */ - if (hwrev == RL_HWREV_8169) { error = re_diag(sc); if (error) { device_printf(dev, "attach aborted due to hardware diag failure\n"); ether_ifdetach(ifp); goto fail; } } #endif #ifdef RE_TX_MODERATION intr_filter = 1; #endif /* Hook interrupt last to avoid having to lock softc */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 && intr_filter == 0) { error = bus_setup_intr(dev, sc->rl_irq[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, re_intr_msi, sc, &sc->rl_intrhand[0]); } else { error = bus_setup_intr(dev, sc->rl_irq[0], INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc, &sc->rl_intrhand[0]); } if (error) { device_printf(dev, "couldn't set up irq\n"); ether_ifdetach(ifp); } fail: if (error) re_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int re_detach(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; int i, rid; sc = device_get_softc(dev); ifp = sc->rl_ifp; KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized")); /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif RL_LOCK(sc); #if 0 sc->suspended = 1; #endif re_stop(sc); RL_UNLOCK(sc); callout_drain(&sc->rl_stat_callout); taskqueue_drain(taskqueue_fast, &sc->rl_inttask); /* * Force off the IFF_UP flag here, in case someone * still had a BPF descriptor attached to this * interface. If they do, ether_ifdetach() will cause * the BPF code to try and clear the promisc mode * flag, which will bubble down to re_ioctl(), * which will try to call re_init() again. This will * turn the NIC back on and restart the MII ticker, * which will panic the system when the kernel tries * to invoke the re_tick() function that isn't there * anymore. */ ifp->if_flags &= ~IFF_UP; ether_ifdetach(ifp); } if (sc->rl_miibus) device_delete_child(dev, sc->rl_miibus); bus_generic_detach(dev); /* * The rest is resource deallocation, so we should already be * stopped here. */ if (sc->rl_intrhand[0] != NULL) { bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]); sc->rl_intrhand[0] = NULL; } if (ifp != NULL) { #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ if_free(ifp); } if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) rid = 0; else rid = 1; if (sc->rl_irq[0] != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, sc->rl_irq[0]); sc->rl_irq[0] = NULL; } if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0) pci_release_msi(dev); if (sc->rl_res_pba) { rid = PCIR_BAR(4); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba); } if (sc->rl_res) bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id, sc->rl_res); /* Unload and free the RX DMA ring memory and map */ if (sc->rl_ldata.rl_rx_list_tag) { if (sc->rl_ldata.rl_rx_list_addr) bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map); if (sc->rl_ldata.rl_rx_list) bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_list_map); bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag); } /* Unload and free the TX DMA ring memory and map */ if (sc->rl_ldata.rl_tx_list_tag) { if (sc->rl_ldata.rl_tx_list_addr) bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map); if (sc->rl_ldata.rl_tx_list) bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list, sc->rl_ldata.rl_tx_list_map); bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag); } /* Destroy all the RX and TX buffer maps */ if (sc->rl_ldata.rl_tx_mtag) { for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { if (sc->rl_ldata.rl_tx_desc[i].tx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag, sc->rl_ldata.rl_tx_desc[i].tx_dmamap); } bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag); } if (sc->rl_ldata.rl_rx_mtag) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { if (sc->rl_ldata.rl_rx_desc[i].rx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[i].rx_dmamap); } if (sc->rl_ldata.rl_rx_sparemap) bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_sparemap); bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag); } if (sc->rl_ldata.rl_jrx_mtag) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { if (sc->rl_ldata.rl_jrx_desc[i].rx_dmamap) bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_desc[i].rx_dmamap); } if (sc->rl_ldata.rl_jrx_sparemap) bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_sparemap); bus_dma_tag_destroy(sc->rl_ldata.rl_jrx_mtag); } /* Unload and free the stats buffer and map */ if (sc->rl_ldata.rl_stag) { if (sc->rl_ldata.rl_stats_addr) bus_dmamap_unload(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap); if (sc->rl_ldata.rl_stats) bus_dmamem_free(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_stats, sc->rl_ldata.rl_smap); bus_dma_tag_destroy(sc->rl_ldata.rl_stag); } if (sc->rl_parent_tag) bus_dma_tag_destroy(sc->rl_parent_tag); mtx_destroy(&sc->rl_mtx); return (0); } static __inline void re_discard_rxbuf(struct rl_softc *sc, int idx) { struct rl_desc *desc; struct rl_rxdesc *rxd; uint32_t cmdstat; if (sc->rl_ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) rxd = &sc->rl_ldata.rl_jrx_desc[idx]; else rxd = &sc->rl_ldata.rl_rx_desc[idx]; desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; cmdstat = rxd->rx_size; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); } static int re_newbuf(struct rl_softc *sc, int idx) { struct mbuf *m; struct rl_rxdesc *rxd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct rl_desc *desc; uint32_t cmdstat; int error, nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; #ifdef RE_FIXUP_RX /* * This is part of an evil trick to deal with non-x86 platforms. * The RealTek chip requires RX buffers to be aligned on 64-bit * boundaries, but that will hose non-x86 machines. To get around * this, we leave some empty space at the start of each buffer * and for non-x86 hosts, we copy the buffer back six bytes * to achieve word alignment. This is slightly more efficient * than allocating a new buffer, copying the contents, and * discarding the old buffer. */ m_adj(m, RE_ETHER_ALIGN); #endif error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs)); rxd = &sc->rl_ldata.rl_rx_desc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap); } rxd->rx_m = m; map = rxd->rx_dmamap; rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap; rxd->rx_size = segs[0].ds_len; sc->rl_ldata.rl_rx_sparemap = map; bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr)); cmdstat = segs[0].ds_len; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); return (0); } static int re_jumbo_newbuf(struct rl_softc *sc, int idx) { struct mbuf *m; struct rl_rxdesc *rxd; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct rl_desc *desc; uint32_t cmdstat; int error, nsegs; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; #ifdef RE_FIXUP_RX m_adj(m, RE_ETHER_ALIGN); #endif error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_jrx_mtag, sc->rl_ldata.rl_jrx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs)); rxd = &sc->rl_ldata.rl_jrx_desc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap); } rxd->rx_m = m; map = rxd->rx_dmamap; rxd->rx_dmamap = sc->rl_ldata.rl_jrx_sparemap; rxd->rx_size = segs[0].ds_len; sc->rl_ldata.rl_jrx_sparemap = map; bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); desc = &sc->rl_ldata.rl_rx_list[idx]; desc->rl_vlanctl = 0; desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr)); cmdstat = segs[0].ds_len; if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1) cmdstat |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); return (0); } #ifdef RE_FIXUP_RX static __inline void re_fixup_rx(struct mbuf *m) { int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src; for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) *dst++ = *src++; m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN; } #endif static int re_tx_list_init(struct rl_softc *sc) { struct rl_desc *desc; int i; RL_LOCK_ASSERT(sc); bzero(sc->rl_ldata.rl_tx_list, sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) sc->rl_ldata.rl_tx_desc[i].tx_m = NULL; #ifdef DEV_NETMAP re_netmap_tx_init(sc); #endif /* DEV_NETMAP */ /* Set EOR. */ desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1]; desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR); bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->rl_ldata.rl_tx_prodidx = 0; sc->rl_ldata.rl_tx_considx = 0; sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt; return (0); } static int re_rx_list_init(struct rl_softc *sc) { int error, i; bzero(sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { sc->rl_ldata.rl_rx_desc[i].rx_m = NULL; if ((error = re_newbuf(sc, i)) != 0) return (error); } #ifdef DEV_NETMAP re_netmap_rx_init(sc); #endif /* DEV_NETMAP */ /* Flush the RX descriptors */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = 0; sc->rl_head = sc->rl_tail = NULL; sc->rl_int_rx_act = 0; return (0); } static int re_jrx_list_init(struct rl_softc *sc) { int error, i; bzero(sc->rl_ldata.rl_rx_list, sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc)); for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { sc->rl_ldata.rl_jrx_desc[i].rx_m = NULL; if ((error = re_jumbo_newbuf(sc, i)) != 0) return (error); } bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = 0; sc->rl_head = sc->rl_tail = NULL; sc->rl_int_rx_act = 0; return (0); } /* * RX handler for C+ and 8169. For the gigE chips, we support * the reception of jumbo frames that have been fragmented * across multiple 2K mbuf cluster buffers. */ static int re_rxeof(struct rl_softc *sc, int *rx_npktsp) { struct mbuf *m; struct ifnet *ifp; int i, rxerr, total_len; struct rl_desc *cur_rx; u_int32_t rxstat, rxvlan; int jumbo, maxpkt = 16, rx_npkts = 0; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &rx_npkts)) return 0; #endif /* DEV_NETMAP */ if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) jumbo = 1; else jumbo = 0; /* Invalidate the descriptor memory */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0; i = RL_RX_DESC_NXT(sc, i)) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; cur_rx = &sc->rl_ldata.rl_rx_list[i]; rxstat = le32toh(cur_rx->rl_cmdstat); if ((rxstat & RL_RDESC_STAT_OWN) != 0) break; total_len = rxstat & sc->rl_rxlenmask; rxvlan = le32toh(cur_rx->rl_vlanctl); if (jumbo != 0) m = sc->rl_ldata.rl_jrx_desc[i].rx_m; else m = sc->rl_ldata.rl_rx_desc[i].rx_m; if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (rxstat & (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) != (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) { /* * RTL8168C or later controllers do not * support multi-fragment packet. */ re_discard_rxbuf(sc, i); continue; } else if ((rxstat & RL_RDESC_STAT_EOF) == 0) { if (re_newbuf(sc, i) != 0) { /* * If this is part of a multi-fragment packet, * discard all the pieces. */ if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } m->m_len = RE_RX_DESC_BUFLEN; if (sc->rl_head == NULL) sc->rl_head = sc->rl_tail = m; else { m->m_flags &= ~M_PKTHDR; sc->rl_tail->m_next = m; sc->rl_tail = m; } continue; } /* * NOTE: for the 8139C+, the frame length field * is always 12 bits in size, but for the gigE chips, * it is 13 bits (since the max RX frame length is 16K). * Unfortunately, all 32 bits in the status word * were already used, so to make room for the extra * length bit, RealTek took out the 'frame alignment * error' bit and shifted the other status bits * over one slot. The OWN, EOR, FS and LS bits are * still in the same places. We have already extracted * the frame length and checked the OWN bit, so rather * than using an alternate bit mapping, we shift the * status bits one space to the right so we can evaluate * them using the 8169 status as though it was in the * same format as that of the 8139C+. */ if (sc->rl_type == RL_8169) rxstat >>= 1; /* * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be * set, but if CRC is clear, it will still be a valid frame. */ if ((rxstat & RL_RDESC_STAT_RXERRSUM) != 0) { rxerr = 1; if ((sc->rl_flags & RL_FLAG_JUMBOV2) == 0 && total_len > 8191 && (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT) rxerr = 0; if (rxerr != 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); /* * If this is part of a multi-fragment packet, * discard all the pieces. */ if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } } /* * If allocating a replacement mbuf fails, * reload the current one. */ if (jumbo != 0) rxerr = re_jumbo_newbuf(sc, i); else rxerr = re_newbuf(sc, i); if (rxerr != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } re_discard_rxbuf(sc, i); continue; } if (sc->rl_head != NULL) { if (jumbo != 0) m->m_len = total_len; else { m->m_len = total_len % RE_RX_DESC_BUFLEN; if (m->m_len == 0) m->m_len = RE_RX_DESC_BUFLEN; } /* * Special case: if there's 4 bytes or less * in this buffer, the mbuf can be discarded: * the last 4 bytes is the CRC, which we don't * care about anyway. */ if (m->m_len <= ETHER_CRC_LEN) { sc->rl_tail->m_len -= (ETHER_CRC_LEN - m->m_len); m_freem(m); } else { m->m_len -= ETHER_CRC_LEN; m->m_flags &= ~M_PKTHDR; sc->rl_tail->m_next = m; } m = sc->rl_head; sc->rl_head = sc->rl_tail = NULL; m->m_pkthdr.len = total_len - ETHER_CRC_LEN; } else m->m_pkthdr.len = m->m_len = (total_len - ETHER_CRC_LEN); #ifdef RE_FIXUP_RX re_fixup_rx(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_pkthdr.rcvif = ifp; /* Do RX checksumming if enabled */ if (ifp->if_capenable & IFCAP_RXCSUM) { if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) { /* Check IP header checksum */ if (rxstat & RL_RDESC_STAT_PROTOID) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & RL_RDESC_STAT_IPSUMBAD)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; /* Check TCP/UDP checksum */ if ((RL_TCPPKT(rxstat) && !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) || (RL_UDPPKT(rxstat) && !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } else { /* * RTL8168C/RTL816CP/RTL8111C/RTL8111CP */ if ((rxstat & RL_RDESC_STAT_PROTOID) && (rxvlan & RL_RDESC_IPV4)) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) && (rxvlan & RL_RDESC_IPV4)) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if (((rxstat & RL_RDESC_STAT_TCP) && !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) || ((rxstat & RL_RDESC_STAT_UDP) && !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } } maxpkt--; if (rxvlan & RL_RDESC_VLANCTL_TAG) { m->m_pkthdr.ether_vtag = bswap16((rxvlan & RL_RDESC_VLANCTL_DATA)); m->m_flags |= M_VLANTAG; } RL_UNLOCK(sc); (*ifp->if_input)(ifp, m); RL_LOCK(sc); rx_npkts++; } /* Flush the RX DMA ring */ bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag, sc->rl_ldata.rl_rx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); sc->rl_ldata.rl_rx_prodidx = i; if (rx_npktsp != NULL) *rx_npktsp = rx_npkts; if (maxpkt) return (EAGAIN); return (0); } static void re_txeof(struct rl_softc *sc) { struct ifnet *ifp; struct rl_txdesc *txd; u_int32_t txstat; int cons; cons = sc->rl_ldata.rl_tx_considx; if (cons == sc->rl_ldata.rl_tx_prodidx) return; ifp = sc->rl_ifp; #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, 0)) return; #endif /* DEV_NETMAP */ /* Invalidate the TX descriptor list */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (; cons != sc->rl_ldata.rl_tx_prodidx; cons = RL_TX_DESC_NXT(sc, cons)) { txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat); if (txstat & RL_TDESC_STAT_OWN) break; /* * We only stash mbufs in the last descriptor * in a fragment chain, which also happens to * be the only place where the TX status bits * are valid. */ if (txstat & RL_TDESC_CMD_EOF) { txd = &sc->rl_ldata.rl_tx_desc[cons]; bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); KASSERT(txd->tx_m != NULL, ("%s: freeing NULL mbufs!", __func__)); m_freem(txd->tx_m); txd->tx_m = NULL; if (txstat & (RL_TDESC_STAT_EXCESSCOL| RL_TDESC_STAT_COLCNT)) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); if (txstat & RL_TDESC_STAT_TXERRSUM) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } sc->rl_ldata.rl_tx_free++; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->rl_ldata.rl_tx_considx = cons; /* No changes made to the TX ring, so no flush needed */ if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) { #ifdef RE_TX_MODERATION /* * If not all descriptors have been reaped yet, reload * the timer so that we will eventually get another * interrupt that will cause us to re-enter this routine. * This is done in case the transmitter has gone idle. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif } else sc->rl_watchdog_timer = 0; } static void re_tick(void *xsc) { struct rl_softc *sc; struct mii_data *mii; sc = xsc; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); mii_tick(mii); if ((sc->rl_flags & RL_FLAG_LINK) == 0) re_miibus_statchg(sc->rl_dev); /* * Reclaim transmitted frames here. Technically it is not * necessary to do here but it ensures periodic reclamation * regardless of Tx completion interrupt which seems to be * lost on PCIe based controllers under certain situations. */ re_txeof(sc); re_watchdog(sc); callout_reset(&sc->rl_stat_callout, hz, re_tick, sc); } #ifdef DEVICE_POLLING static int re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts = 0; RL_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rx_npkts = re_poll_locked(ifp, cmd, count); RL_UNLOCK(sc); return (rx_npkts); } static int re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct rl_softc *sc = ifp->if_softc; int rx_npkts; RL_LOCK_ASSERT(sc); sc->rxcycles = count; re_rxeof(sc, &rx_npkts); re_txeof(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */ u_int16_t status; status = CSR_READ_2(sc, RL_ISR); if (status == 0xffff) return (rx_npkts); if (status) CSR_WRITE_2(sc, RL_ISR, status); if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); /* * XXX check behaviour on receiver stalls. */ if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } } return (rx_npkts); } #endif /* DEVICE_POLLING */ static int re_intr(void *arg) { struct rl_softc *sc; uint16_t status; sc = arg; status = CSR_READ_2(sc, RL_ISR); if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0) return (FILTER_STRAY); CSR_WRITE_2(sc, RL_IMR, 0); taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask); return (FILTER_HANDLED); } static void re_int_task(void *arg, int npending) { struct rl_softc *sc; struct ifnet *ifp; u_int16_t status; int rval = 0; sc = arg; ifp = sc->rl_ifp; RL_LOCK(sc); status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if (sc->suspended || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); return; } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { RL_UNLOCK(sc); return; } #endif if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW)) rval = re_rxeof(sc, NULL); /* * Some chips will ignore a second TX request issued * while an existing transmission is in progress. If * the transmitter goes idle but there are still * packets waiting to be sent, we need to restart the * channel here to flush them out. This only seems to * be required with the PCIe devices. */ if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); if (status & ( #ifdef RE_TX_MODERATION RL_ISR_TIMEOUT_EXPIRED| #else RL_ISR_TX_OK| #endif RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL)) re_txeof(sc); if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); RL_UNLOCK(sc); if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) { taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask); return; } CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); } static void re_intr_msi(void *xsc) { struct rl_softc *sc; struct ifnet *ifp; uint16_t intrs, status; sc = xsc; RL_LOCK(sc); ifp = sc->rl_ifp; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { RL_UNLOCK(sc); return; } #endif /* Disable interrupts. */ CSR_WRITE_2(sc, RL_IMR, 0); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); return; } intrs = RL_INTRS_CPLUS; status = CSR_READ_2(sc, RL_ISR); CSR_WRITE_2(sc, RL_ISR, status); if (sc->rl_int_rx_act > 0) { intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); status &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); } if (status & (RL_ISR_TIMEOUT_EXPIRED | RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) { re_rxeof(sc, NULL); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (sc->rl_int_rx_mod != 0 && (status & (RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN)) != 0) { /* Rearm one-shot timer. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); intrs &= ~(RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN); sc->rl_int_rx_act = 1; } else { intrs |= RL_ISR_RX_OK | RL_ISR_RX_ERR | RL_ISR_FIFO_OFLOW | RL_ISR_RX_OVERRUN; sc->rl_int_rx_act = 0; } } } /* * Some chips will ignore a second TX request issued * while an existing transmission is in progress. If * the transmitter goes idle but there are still * packets waiting to be sent, we need to restart the * channel here to flush them out. This only seems to * be required with the PCIe devices. */ if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) && (sc->rl_flags & RL_FLAG_PCIE)) CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); if (status & (RL_ISR_TX_OK | RL_ISR_TX_ERR | RL_ISR_TX_DESC_UNAVAIL)) re_txeof(sc); if (status & RL_ISR_SYSTEM_ERR) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); CSR_WRITE_2(sc, RL_IMR, intrs); } RL_UNLOCK(sc); } static int re_encap(struct rl_softc *sc, struct mbuf **m_head) { struct rl_txdesc *txd, *txd_last; bus_dma_segment_t segs[RL_NTXSEGS]; bus_dmamap_t map; struct mbuf *m_new; struct rl_desc *desc; int nsegs, prod; int i, error, ei, si; int padlen; uint32_t cmdstat, csum_flags, vlanctl; RL_LOCK_ASSERT(sc); M_ASSERTPKTHDR((*m_head)); /* * With some of the RealTek chips, using the checksum offload * support in conjunction with the autopadding feature results * in the transmission of corrupt frames. For example, if we * need to send a really small IP fragment that's less than 60 * bytes in size, and IP header checksumming is enabled, the * resulting ethernet frame that appears on the wire will * have garbled payload. To work around this, if TX IP checksum * offload is enabled, we always manually pad short frames out * to the minimum ethernet frame size. */ if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 && (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN && ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) { padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len; if (M_WRITABLE(*m_head) == 0) { /* Get a writable copy. */ m_new = m_dup(*m_head, M_NOWAIT); m_freem(*m_head); if (m_new == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m_new; } if ((*m_head)->m_next != NULL || M_TRAILINGSPACE(*m_head) < padlen) { m_new = m_defrag(*m_head, M_NOWAIT); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } } else m_new = *m_head; /* * Manually pad short frames, and zero the pad space * to avoid leaking data. */ bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen); m_new->m_pkthdr.len += padlen; m_new->m_len = m_new->m_pkthdr.len; *m_head = m_new; } prod = sc->rl_ldata.rl_tx_prodidx; txd = &sc->rl_ldata.rl_tx_desc[prod]; error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m_new = m_collapse(*m_head, M_NOWAIT, RL_NTXSEGS); if (m_new == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m_new; error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check for number of available descriptors. */ if (sc->rl_ldata.rl_tx_free - nsegs <= 1) { bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); return (ENOBUFS); } bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_PREWRITE); /* * Set up checksum offload. Note: checksum offload bits must * appear in all descriptors of a multi-descriptor transmit * attempt. This is according to testing done with an 8169 * chip. This is a requirement. */ vlanctl = 0; csum_flags = 0; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0) { if ((sc->rl_flags & RL_FLAG_DESCV2) != 0) { csum_flags |= RL_TDESC_CMD_LGSEND; vlanctl |= ((uint32_t)(*m_head)->m_pkthdr.tso_segsz << RL_TDESC_CMD_MSSVALV2_SHIFT); } else { csum_flags |= RL_TDESC_CMD_LGSEND | ((uint32_t)(*m_head)->m_pkthdr.tso_segsz << RL_TDESC_CMD_MSSVAL_SHIFT); } } else { /* * Unconditionally enable IP checksum if TCP or UDP * checksum is required. Otherwise, TCP/UDP checksum * doesn't make effects. */ if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) { if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) { csum_flags |= RL_TDESC_CMD_IPCSUM; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) != 0) csum_flags |= RL_TDESC_CMD_TCPCSUM; if (((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) != 0) csum_flags |= RL_TDESC_CMD_UDPCSUM; } else { vlanctl |= RL_TDESC_CMD_IPCSUMV2; if (((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) != 0) vlanctl |= RL_TDESC_CMD_TCPCSUMV2; if (((*m_head)->m_pkthdr.csum_flags & CSUM_UDP) != 0) vlanctl |= RL_TDESC_CMD_UDPCSUMV2; } } } /* * Set up hardware VLAN tagging. Note: vlan tag info must * appear in all descriptors of a multi-descriptor * transmission attempt. */ if ((*m_head)->m_flags & M_VLANTAG) vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) | RL_TDESC_VLANCTL_TAG; si = prod; for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) { desc = &sc->rl_ldata.rl_tx_list[prod]; desc->rl_vlanctl = htole32(vlanctl); desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr)); cmdstat = segs[i].ds_len; if (i != 0) cmdstat |= RL_TDESC_CMD_OWN; if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1) cmdstat |= RL_TDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmdstat | csum_flags); sc->rl_ldata.rl_tx_free--; } /* Update producer index. */ sc->rl_ldata.rl_tx_prodidx = prod; /* Set EOF on the last descriptor. */ ei = RL_TX_DESC_PRV(sc, prod); desc = &sc->rl_ldata.rl_tx_list[ei]; desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF); desc = &sc->rl_ldata.rl_tx_list[si]; /* Set SOF and transfer ownership of packet to the chip. */ desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF); /* * Insure that the map for this transmission * is placed at the array index of the last descriptor * in this chain. (Swap last and first dmamaps.) */ txd_last = &sc->rl_ldata.rl_tx_desc[ei]; map = txd->tx_dmamap; txd->tx_dmamap = txd_last->tx_dmamap; txd_last->tx_dmamap = map; txd_last->tx_m = *m_head; return (0); } static void re_start(struct ifnet *ifp) { struct rl_softc *sc; sc = ifp->if_softc; RL_LOCK(sc); re_start_locked(ifp); RL_UNLOCK(sc); } /* * Main transmit routine for C+ and gigE NICs. */ static void re_start_locked(struct ifnet *ifp) { struct rl_softc *sc; struct mbuf *m_head; int queued; sc = ifp->if_softc; #ifdef DEV_NETMAP /* XXX is this necessary ? */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_kring *kring = &NA(ifp)->tx_rings[0]; if (sc->rl_ldata.rl_tx_prodidx != kring->nr_hwcur) { /* kick the tx unit */ CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); #ifdef RE_TX_MODERATION CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif sc->rl_watchdog_timer = 5; } return; } #endif /* DEV_NETMAP */ + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) return; for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->rl_ldata.rl_tx_free > 1;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (re_encap(sc, &m_head) != 0) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); queued++; } if (queued == 0) { #ifdef RE_TX_MODERATION if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif return; } /* Flush the TX descriptors */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START); #ifdef RE_TX_MODERATION /* * Use the countdown timer for interrupt moderation. * 'TX done' interrupts are disabled. Instead, we reset the * countdown timer, which will begin counting until it hits * the value in the TIMERINT register, and then trigger an * interrupt. Each time we write to the TIMERCNT register, * the timer count is reset to 0. */ CSR_WRITE_4(sc, RL_TIMERCNT, 1); #endif /* * Set a timeout in case the chip goes out to lunch. */ sc->rl_watchdog_timer = 5; } static void re_set_jumbo(struct rl_softc *sc, int jumbo) { if (sc->rl_hwrev->rl_rev == RL_HWREV_8168E_VL) { pci_set_max_read_req(sc->rl_dev, 4096); return; } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG); if (jumbo != 0) { CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) | RL_CFG3_JUMBO_EN0); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: break; case RL_HWREV_8168E: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) | 0x01); break; default: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) | RL_CFG4_JUMBO_EN1); } } else { CSR_WRITE_1(sc, sc->rl_cfg3, CSR_READ_1(sc, sc->rl_cfg3) & ~RL_CFG3_JUMBO_EN0); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: break; case RL_HWREV_8168E: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) & ~0x01); break; default: CSR_WRITE_1(sc, sc->rl_cfg4, CSR_READ_1(sc, sc->rl_cfg4) & ~RL_CFG4_JUMBO_EN1); } } CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); switch (sc->rl_hwrev->rl_rev) { case RL_HWREV_8168DP: pci_set_max_read_req(sc->rl_dev, 4096); break; default: if (jumbo != 0) pci_set_max_read_req(sc->rl_dev, 512); else pci_set_max_read_req(sc->rl_dev, 4096); } } static void re_init(void *xsc) { struct rl_softc *sc = xsc; RL_LOCK(sc); re_init_locked(sc); RL_UNLOCK(sc); } static void re_init_locked(struct rl_softc *sc) { struct ifnet *ifp = sc->rl_ifp; struct mii_data *mii; uint32_t reg; uint16_t cfg; union { uint32_t align_dummy; u_char eaddr[ETHER_ADDR_LEN]; } eaddr; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; /* * Cancel pending I/O and free all RX/TX buffers. */ re_stop(sc); /* Put controller into known state. */ re_reset(sc); /* * For C+ mode, initialize the RX descriptors and mbufs. */ if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { if (ifp->if_mtu > RL_MTU) { if (re_jrx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for jumbo RX buffers\n"); re_stop(sc); return; } /* Disable checksum offloading for jumbo frames. */ ifp->if_capenable &= ~(IFCAP_HWCSUM | IFCAP_TSO4); ifp->if_hwassist &= ~(RE_CSUM_FEATURES | CSUM_TSO); } else { if (re_rx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for RX buffers\n"); re_stop(sc); return; } } re_set_jumbo(sc, ifp->if_mtu > RL_MTU); } else { if (re_rx_list_init(sc) != 0) { device_printf(sc->rl_dev, "no memory for RX buffers\n"); re_stop(sc); return; } if ((sc->rl_flags & RL_FLAG_PCIE) != 0 && pci_get_device(sc->rl_dev) != RT_DEVICEID_8101E) { if (ifp->if_mtu > RL_MTU) pci_set_max_read_req(sc->rl_dev, 512); else pci_set_max_read_req(sc->rl_dev, 4096); } } re_tx_list_init(sc); /* * Enable C+ RX and TX mode, as well as VLAN stripping and * RX checksum offload. We must configure the C+ register * before all others. */ cfg = RL_CPLUSCMD_PCI_MRW; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) cfg |= RL_CPLUSCMD_RXCSUM_ENB; if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) cfg |= RL_CPLUSCMD_VLANSTRIP; if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) { cfg |= RL_CPLUSCMD_MACSTAT_DIS; /* XXX magic. */ cfg |= 0x0001; } else cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB; CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg); if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SC || sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) { reg = 0x000fff00; if ((CSR_READ_1(sc, sc->rl_cfg2) & RL_CFG2_PCI66MHZ) != 0) reg |= 0x000000ff; if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) reg |= 0x00f00000; CSR_WRITE_4(sc, 0x7c, reg); /* Disable interrupt mitigation. */ CSR_WRITE_2(sc, 0xe2, 0); } /* * Disable TSO if interface MTU size is greater than MSS * allowed in controller. */ if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } /* * Init our MAC address. Even though the chipset * documentation doesn't mention it, we need to enter "Config * register write enable" mode to modify the ID registers. */ /* Copy MAC address on stack to align. */ bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG); CSR_WRITE_4(sc, RL_IDR0, htole32(*(u_int32_t *)(&eaddr.eaddr[0]))); CSR_WRITE_4(sc, RL_IDR4, htole32(*(u_int32_t *)(&eaddr.eaddr[4]))); CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); /* * Load the addresses of the RX and TX lists into the chip. */ CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI, RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr)); CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO, RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr)); CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI, RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr)); CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO, RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr)); if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Disable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) & ~0x00080000); } /* * Enable transmit and receive for pre-RTL8168G controllers. * RX/TX MACs should be enabled before RX/TX configuration. */ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) == 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB); /* * Set the initial TX configuration. */ if (sc->rl_testmode) { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG|RL_LOOPTEST_ON); else CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS); } else CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG); CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16); /* * Set the initial RX configuration. */ re_set_rxmode(sc); /* Configure interrupt moderation. */ if (sc->rl_type == RL_8169) { /* Magic from vendor. */ CSR_WRITE_2(sc, RL_INTRMOD, 0x5100); } /* * Enable transmit and receive for RTL8168G and later controllers. * RX/TX MACs should be enabled after RX/TX configuration. */ if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB | RL_CMD_RX_ENB); #ifdef DEVICE_POLLING /* * Disable interrupts if we are polling. */ if (ifp->if_capenable & IFCAP_POLLING) CSR_WRITE_2(sc, RL_IMR, 0); else /* otherwise ... */ #endif /* * Enable interrupts. */ if (sc->rl_testmode) CSR_WRITE_2(sc, RL_IMR, 0); else CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS); /* Set initial TX threshold */ sc->rl_txthresh = RL_TX_THRESH_INIT; /* Start RX/TX process. */ CSR_WRITE_4(sc, RL_MISSEDPKT, 0); /* * Initialize the timer interrupt register so that * a timer interrupt will be generated once the timer * reaches a certain number of ticks. The timer is * reloaded on each transmit. */ #ifdef RE_TX_MODERATION /* * Use timer interrupt register to moderate TX interrupt * moderation, which dramatically improves TX frame rate. */ if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800); else CSR_WRITE_4(sc, RL_TIMERINT, 0x400); #else /* * Use timer interrupt register to moderate RX interrupt * moderation. */ if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) != 0 && intr_filter == 0) { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(sc->rl_int_rx_mod)); } else { if (sc->rl_type == RL_8169) CSR_WRITE_4(sc, RL_TIMERINT_8169, RL_USECS(0)); } #endif /* * For 8169 gigE NICs, set the max allowed RX packet * size so we can receive jumbo frames. */ if (sc->rl_type == RL_8169) { if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { /* * For controllers that use new jumbo frame scheme, * set maximum size of jumbo frame depending on * controller revisions. */ if (ifp->if_mtu > RL_MTU) CSR_WRITE_2(sc, RL_MAXRXPKTLEN, sc->rl_hwrev->rl_max_mtu + ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN + ETHER_CRC_LEN); else CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN); } else if ((sc->rl_flags & RL_FLAG_PCIE) != 0 && sc->rl_hwrev->rl_max_mtu == RL_MTU) { /* RTL810x has no jumbo frame support. */ CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN); } else CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383); } if (sc->rl_testmode) return; CSR_WRITE_1(sc, sc->rl_cfg1, CSR_READ_1(sc, sc->rl_cfg1) | RL_CFG1_DRVLOAD); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->rl_flags &= ~RL_FLAG_LINK; mii_mediachg(mii); sc->rl_watchdog_timer = 0; callout_reset(&sc->rl_stat_callout, hz, re_tick, sc); } /* * Set media options. */ static int re_ifmedia_upd(struct ifnet *ifp) { struct rl_softc *sc; struct mii_data *mii; int error; sc = ifp->if_softc; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); error = mii_mediachg(mii); RL_UNLOCK(sc); return (error); } /* * Report current media status. */ static void re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct rl_softc *sc; struct mii_data *mii; sc = ifp->if_softc; mii = device_get_softc(sc->rl_miibus); RL_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; RL_UNLOCK(sc); } static int re_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct rl_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int error = 0; switch (command) { case SIOCSIFMTU: if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > sc->rl_hwrev->rl_max_mtu || ((sc->rl_flags & RL_FLAG_FASTETHER) != 0 && ifr->ifr_mtu > RL_MTU)) { error = EINVAL; break; } RL_LOCK(sc); if (ifp->if_mtu != ifr->ifr_mtu) { ifp->if_mtu = ifr->ifr_mtu; if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); ifp->if_hwassist &= ~CSUM_TSO; } VLAN_CAPABILITIES(ifp); } RL_UNLOCK(sc); break; case SIOCSIFFLAGS: RL_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (((ifp->if_flags ^ sc->rl_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) re_set_rxmode(sc); } else re_init_locked(sc); } else { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) re_stop(sc); } sc->rl_if_flags = ifp->if_flags; RL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: RL_LOCK(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) re_set_rxmode(sc); RL_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc->rl_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: { int mask, reinit; mask = ifr->ifr_reqcap ^ ifp->if_capenable; reinit = 0; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(re_poll, ifp); if (error) return (error); RL_LOCK(sc); /* Disable interrupts */ CSR_WRITE_2(sc, RL_IMR, 0x0000); ifp->if_capenable |= IFCAP_POLLING; RL_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); /* Enable interrupts. */ RL_LOCK(sc); CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS); ifp->if_capenable &= ~IFCAP_POLLING; RL_UNLOCK(sc); } } #endif /* DEVICE_POLLING */ RL_LOCK(sc); if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= RE_CSUM_FEATURES; else ifp->if_hwassist &= ~RE_CSUM_FEATURES; reinit = 1; } if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) { ifp->if_capenable ^= IFCAP_RXCSUM; reinit = 1; } if ((mask & IFCAP_TSO4) != 0 && (ifp->if_capabilities & IFCAP_TSO4) != 0) { ifp->if_capenable ^= IFCAP_TSO4; if ((IFCAP_TSO4 & ifp->if_capenable) != 0) ifp->if_hwassist |= CSUM_TSO; else ifp->if_hwassist &= ~CSUM_TSO; if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } } if ((mask & IFCAP_VLAN_HWTSO) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; /* TSO over VLAN requires VLAN hardware tagging. */ if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; reinit = 1; } if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 && (mask & (IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWTSO)) != 0) reinit = 1; if ((mask & IFCAP_WOL) != 0 && (ifp->if_capabilities & IFCAP_WOL) != 0) { if ((mask & IFCAP_WOL_UCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_UCAST; if ((mask & IFCAP_WOL_MCAST) != 0) ifp->if_capenable ^= IFCAP_WOL_MCAST; if ((mask & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); } RL_UNLOCK(sc); VLAN_CAPABILITIES(ifp); } break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static void re_watchdog(struct rl_softc *sc) { struct ifnet *ifp; RL_LOCK_ASSERT(sc); if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0) return; ifp = sc->rl_ifp; re_txeof(sc); if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) { if_printf(ifp, "watchdog timeout (missed Tx interrupts) " "-- recovering\n"); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); return; } if_printf(ifp, "watchdog timeout\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); re_rxeof(sc, NULL); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; re_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) re_start_locked(ifp); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void re_stop(struct rl_softc *sc) { int i; struct ifnet *ifp; struct rl_txdesc *txd; struct rl_rxdesc *rxd; RL_LOCK_ASSERT(sc); ifp = sc->rl_ifp; sc->rl_watchdog_timer = 0; callout_stop(&sc->rl_stat_callout); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* * Disable accepting frames to put RX MAC into idle state. * Otherwise it's possible to get frames while stop command * execution is in progress and controller can DMA the frame * to already freed RX buffer during that period. */ CSR_WRITE_4(sc, RL_RXCFG, CSR_READ_4(sc, RL_RXCFG) & ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_MULTI | RL_RXCFG_RX_BROAD)); if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { /* Enable RXDV gate. */ CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) | 0x00080000); } if ((sc->rl_flags & RL_FLAG_WAIT_TXPOLL) != 0) { for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_1(sc, sc->rl_txstart) & RL_TXSTART_START) == 0) break; DELAY(20); } if (i == 0) device_printf(sc->rl_dev, "stopping TX poll timed out!\n"); CSR_WRITE_1(sc, RL_COMMAND, 0x00); } else if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0) { CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB | RL_CMD_RX_ENB); if ((sc->rl_flags & RL_FLAG_CMDSTOP_WAIT_TXQ) != 0) { for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_QUEUE_EMPTY) != 0) break; DELAY(100); } if (i == 0) device_printf(sc->rl_dev, "stopping TXQ timed out!\n"); } } else CSR_WRITE_1(sc, RL_COMMAND, 0x00); DELAY(1000); CSR_WRITE_2(sc, RL_IMR, 0x0000); CSR_WRITE_2(sc, RL_ISR, 0xFFFF); if (sc->rl_head != NULL) { m_freem(sc->rl_head); sc->rl_head = sc->rl_tail = NULL; } /* Free the TX list buffers. */ for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { txd = &sc->rl_ldata.rl_tx_desc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } /* Free the RX list buffers. */ for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { rxd = &sc->rl_ldata.rl_rx_desc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) { for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { rxd = &sc->rl_ldata.rl_jrx_desc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } } } /* * Device suspend routine. Stop the interface and save some PCI * settings in case the BIOS doesn't restore them properly on * resume. */ static int re_suspend(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); re_stop(sc); re_setwol(sc); sc->suspended = 1; RL_UNLOCK(sc); return (0); } /* * Device resume routine. Restore some PCI settings in case the BIOS * doesn't, re-enable busmastering, and restart the interface if * appropriate. */ static int re_resume(device_t dev) { struct rl_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); RL_LOCK(sc); ifp = sc->rl_ifp; /* Take controller out of sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) | 0x01); } /* * Clear WOL matching such that normal Rx filtering * wouldn't interfere with WOL patterns. */ re_clrwol(sc); /* reinitialize interface if necessary */ if (ifp->if_flags & IFF_UP) re_init_locked(sc); sc->suspended = 0; RL_UNLOCK(sc); return (0); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int re_shutdown(device_t dev) { struct rl_softc *sc; sc = device_get_softc(dev); RL_LOCK(sc); re_stop(sc); /* * Mark interface as down since otherwise we will panic if * interrupt comes in later on, which can happen in some * cases. */ sc->rl_ifp->if_flags &= ~IFF_UP; re_setwol(sc); RL_UNLOCK(sc); return (0); } static void re_set_linkspeed(struct rl_softc *sc) { struct mii_softc *miisc; struct mii_data *mii; int aneg, i, phyno; RL_LOCK_ASSERT(sc); mii = device_get_softc(sc->rl_miibus); mii_pollstat(mii); aneg = 0; if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch IFM_SUBTYPE(mii->mii_media_active) { case IFM_10_T: case IFM_100_TX: return; case IFM_1000_T: aneg++; break; default: break; } } miisc = LIST_FIRST(&mii->mii_phys); phyno = miisc->mii_phy; LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); re_miibus_writereg(sc->rl_dev, phyno, MII_100T2CR, 0); re_miibus_writereg(sc->rl_dev, phyno, MII_ANAR, ANAR_TX_FD | ANAR_TX | ANAR_10_FD | ANAR_10 | ANAR_CSMA); re_miibus_writereg(sc->rl_dev, phyno, MII_BMCR, BMCR_AUTOEN | BMCR_STARTNEG); DELAY(1000); if (aneg != 0) { /* * Poll link state until re(4) get a 10/100Mbps link. */ for (i = 0; i < MII_ANEGTICKS_GIGE; i++) { mii_pollstat(mii); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: return; default: break; } } RL_UNLOCK(sc); pause("relnk", hz); RL_LOCK(sc); } if (i == MII_ANEGTICKS_GIGE) device_printf(sc->rl_dev, "establishing a link failed, WOL may not work!"); } /* * No link, force MAC to have 100Mbps, full-duplex link. * MAC does not require reprogramming on resolved speed/duplex, * so this is just for completeness. */ mii->mii_media_status = IFM_AVALID | IFM_ACTIVE; mii->mii_media_active = IFM_ETHER | IFM_100_TX | IFM_FDX; } static void re_setwol(struct rl_softc *sc) { struct ifnet *ifp; int pmc; uint16_t pmstat; uint8_t v; RL_LOCK_ASSERT(sc); if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0) return; ifp = sc->rl_ifp; /* Put controller into sleep mode. */ if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) { if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80) CSR_WRITE_1(sc, RL_GPIO, CSR_READ_1(sc, RL_GPIO) & ~0x01); } if ((ifp->if_capenable & IFCAP_WOL) != 0) { + if ((sc->rl_flags & RL_FLAG_8168G_PLUS) != 0) { + /* Disable RXDV gate. */ + CSR_WRITE_4(sc, RL_MISC, CSR_READ_4(sc, RL_MISC) & + ~0x00080000); + } re_set_rxmode(sc); if ((sc->rl_flags & RL_FLAG_WOL_MANLINK) != 0) re_set_linkspeed(sc); if ((sc->rl_flags & RL_FLAG_WOLRXENB) != 0) CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB); } /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); /* Enable PME. */ v = CSR_READ_1(sc, sc->rl_cfg1); v &= ~RL_CFG1_PME; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG1_PME; CSR_WRITE_1(sc, sc->rl_cfg1, v); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) v |= RL_CFG3_WOL_MAGIC; CSR_WRITE_1(sc, sc->rl_cfg3, v); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST | RL_CFG5_WOL_LANWAKE); if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0) v |= RL_CFG5_WOL_UCAST; if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0) v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST; if ((ifp->if_capenable & IFCAP_WOL) != 0) v |= RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); if ((ifp->if_capenable & IFCAP_WOL) == 0 && (sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0) CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) & ~0x80); /* * It seems that hardware resets its link speed to 100Mbps in * power down mode so switching to 100Mbps in driver is not * needed. */ /* Request PME if WOL is requested. */ pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL) != 0) pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2); } static void re_clrwol(struct rl_softc *sc) { int pmc; uint8_t v; RL_LOCK_ASSERT(sc); if (pci_find_cap(sc->rl_dev, PCIY_PMG, &pmc) != 0) return; /* Enable config register write. */ CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE); v = CSR_READ_1(sc, sc->rl_cfg3); v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC); CSR_WRITE_1(sc, sc->rl_cfg3, v); /* Config register write done. */ CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF); v = CSR_READ_1(sc, sc->rl_cfg5); v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST); v &= ~RL_CFG5_WOL_LANWAKE; CSR_WRITE_1(sc, sc->rl_cfg5, v); } static void re_add_sysctls(struct rl_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int error; ctx = device_get_sysctl_ctx(sc->rl_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->rl_dev)); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0, re_sysctl_stats, "I", "Statistics Information"); if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) return; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "int_rx_mod", CTLTYPE_INT | CTLFLAG_RW, &sc->rl_int_rx_mod, 0, sysctl_hw_re_int_mod, "I", "re RX interrupt moderation"); /* Pull in device tunables. */ sc->rl_int_rx_mod = RL_TIMER_DEFAULT; error = resource_int_value(device_get_name(sc->rl_dev), device_get_unit(sc->rl_dev), "int_rx_mod", &sc->rl_int_rx_mod); if (error == 0) { if (sc->rl_int_rx_mod < RL_TIMER_MIN || sc->rl_int_rx_mod > RL_TIMER_MAX) { device_printf(sc->rl_dev, "int_rx_mod value out of " "range; using default: %d\n", RL_TIMER_DEFAULT); sc->rl_int_rx_mod = RL_TIMER_DEFAULT; } } } static int re_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct rl_softc *sc; struct rl_stats *stats; int error, i, result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || req->newptr == NULL) return (error); if (result == 1) { sc = (struct rl_softc *)arg1; RL_LOCK(sc); if ((sc->rl_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { RL_UNLOCK(sc); goto done; } bus_dmamap_sync(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, BUS_DMASYNC_PREREAD); CSR_WRITE_4(sc, RL_DUMPSTATS_HI, RL_ADDR_HI(sc->rl_ldata.rl_stats_addr)); CSR_WRITE_4(sc, RL_DUMPSTATS_LO, RL_ADDR_LO(sc->rl_ldata.rl_stats_addr)); CSR_WRITE_4(sc, RL_DUMPSTATS_LO, RL_ADDR_LO(sc->rl_ldata.rl_stats_addr | RL_DUMPSTATS_START)); for (i = RL_TIMEOUT; i > 0; i--) { if ((CSR_READ_4(sc, RL_DUMPSTATS_LO) & RL_DUMPSTATS_START) == 0) break; DELAY(1000); } bus_dmamap_sync(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap, BUS_DMASYNC_POSTREAD); RL_UNLOCK(sc); if (i == 0) { device_printf(sc->rl_dev, "DUMP statistics request timed out\n"); return (ETIMEDOUT); } done: stats = sc->rl_ldata.rl_stats; printf("%s statistics:\n", device_get_nameunit(sc->rl_dev)); printf("Tx frames : %ju\n", (uintmax_t)le64toh(stats->rl_tx_pkts)); printf("Rx frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_pkts)); printf("Tx errors : %ju\n", (uintmax_t)le64toh(stats->rl_tx_errs)); printf("Rx errors : %u\n", le32toh(stats->rl_rx_errs)); printf("Rx missed frames : %u\n", (uint32_t)le16toh(stats->rl_missed_pkts)); printf("Rx frame alignment errs : %u\n", (uint32_t)le16toh(stats->rl_rx_framealign_errs)); printf("Tx single collisions : %u\n", le32toh(stats->rl_tx_onecoll)); printf("Tx multiple collisions : %u\n", le32toh(stats->rl_tx_multicolls)); printf("Rx unicast frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_ucasts)); printf("Rx broadcast frames : %ju\n", (uintmax_t)le64toh(stats->rl_rx_bcasts)); printf("Rx multicast frames : %u\n", le32toh(stats->rl_rx_mcasts)); printf("Tx aborts : %u\n", (uint32_t)le16toh(stats->rl_tx_aborts)); printf("Tx underruns : %u\n", (uint32_t)le16toh(stats->rl_rx_underruns)); } return (error); } static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (arg1 == NULL) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_re_int_mod(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, RL_TIMER_MIN, RL_TIMER_MAX)); } Index: projects/powernv/dev/vmware/vmxnet3/if_vmx.c =================================================================== --- projects/powernv/dev/vmware/vmxnet3/if_vmx.c (revision 290990) +++ projects/powernv/dev/vmware/vmxnet3/if_vmx.c (revision 290991) @@ -1,3915 +1,3922 @@ /*- * Copyright (c) 2013 Tsubai Masanari * Copyright (c) 2013 Bryan Venteicher * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $ */ /* Driver for VMware vmxnet3 virtual ethernet devices. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "if_vmxreg.h" #include "if_vmxvar.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef VMXNET3_FAILPOINTS #include static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0, "vmxnet3 fail points"); #define VMXNET3_FP _debug_fail_point_vmxnet3 #endif static int vmxnet3_probe(device_t); static int vmxnet3_attach(device_t); static int vmxnet3_detach(device_t); static int vmxnet3_shutdown(device_t); static int vmxnet3_alloc_resources(struct vmxnet3_softc *); static void vmxnet3_free_resources(struct vmxnet3_softc *); static int vmxnet3_check_version(struct vmxnet3_softc *); static void vmxnet3_initial_config(struct vmxnet3_softc *); static void vmxnet3_check_multiqueue(struct vmxnet3_softc *); static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int, struct vmxnet3_interrupt *); static int vmxnet3_alloc_intr_resources(struct vmxnet3_softc *); static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *); static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *); static int vmxnet3_setup_interrupts(struct vmxnet3_softc *); static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *); static void vmxnet3_free_interrupt(struct vmxnet3_softc *, struct vmxnet3_interrupt *); static void vmxnet3_free_interrupts(struct vmxnet3_softc *); #ifndef VMXNET3_LEGACY_TX static int vmxnet3_alloc_taskqueue(struct vmxnet3_softc *); static void vmxnet3_start_taskqueue(struct vmxnet3_softc *); static void vmxnet3_drain_taskqueue(struct vmxnet3_softc *); static void vmxnet3_free_taskqueue(struct vmxnet3_softc *); #endif static int vmxnet3_init_rxq(struct vmxnet3_softc *, int); static int vmxnet3_init_txq(struct vmxnet3_softc *, int); static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *); static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *); static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *); static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *); static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *); static void vmxnet3_free_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *); static void vmxnet3_free_txq_data(struct vmxnet3_softc *); static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *); static void vmxnet3_free_rxq_data(struct vmxnet3_softc *); static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *); static void vmxnet3_free_queue_data(struct vmxnet3_softc *); static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *); static void vmxnet3_init_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_interface(struct vmxnet3_softc *); static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *); static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *); static int vmxnet3_alloc_data(struct vmxnet3_softc *); static void vmxnet3_free_data(struct vmxnet3_softc *); static int vmxnet3_setup_interface(struct vmxnet3_softc *); static void vmxnet3_evintr(struct vmxnet3_softc *); static void vmxnet3_txq_eof(struct vmxnet3_txqueue *); static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *); static int vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *); static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *, struct vmxnet3_rxring *, int); static void vmxnet3_rxq_eof(struct vmxnet3_rxqueue *); static void vmxnet3_legacy_intr(void *); static void vmxnet3_txq_intr(void *); static void vmxnet3_rxq_intr(void *); static void vmxnet3_event_intr(void *); static void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *); static void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); static void vmxnet3_stop(struct vmxnet3_softc *); static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *); static int vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *); static int vmxnet3_reinit_queues(struct vmxnet3_softc *); static int vmxnet3_enable_device(struct vmxnet3_softc *); static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *); static int vmxnet3_reinit(struct vmxnet3_softc *); static void vmxnet3_init_locked(struct vmxnet3_softc *); static void vmxnet3_init(void *); static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *, int *, int *, int *); static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **, bus_dmamap_t, bus_dma_segment_t [], int *); static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t); static int vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **); #ifdef VMXNET3_LEGACY_TX static void vmxnet3_start_locked(struct ifnet *); static void vmxnet3_start(struct ifnet *); #else static int vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *, struct mbuf *); static int vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *); static void vmxnet3_txq_tq_deferred(void *, int); #endif static void vmxnet3_txq_start(struct vmxnet3_txqueue *); static void vmxnet3_tx_start_all(struct vmxnet3_softc *); static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int, uint16_t); static void vmxnet3_register_vlan(void *, struct ifnet *, uint16_t); static void vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t); static void vmxnet3_set_rxfilter(struct vmxnet3_softc *); static int vmxnet3_change_mtu(struct vmxnet3_softc *, int); static int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t vmxnet3_get_counter(struct ifnet *, ift_counter); #ifndef VMXNET3_LEGACY_TX static void vmxnet3_qflush(struct ifnet *); #endif static int vmxnet3_watchdog(struct vmxnet3_txqueue *); static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *); static void vmxnet3_tick(void *); static void vmxnet3_link_status(struct vmxnet3_softc *); static void vmxnet3_media_status(struct ifnet *, struct ifmediareq *); static int vmxnet3_media_change(struct ifnet *); static void vmxnet3_set_lladdr(struct vmxnet3_softc *); static void vmxnet3_get_lladdr(struct vmxnet3_softc *); static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *, struct sysctl_ctx_list *, struct sysctl_oid_list *); static void vmxnet3_setup_sysctl(struct vmxnet3_softc *); static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t, uint32_t); static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t); static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t, uint32_t); static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t); static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t); static void vmxnet3_enable_intr(struct vmxnet3_softc *, int); static void vmxnet3_disable_intr(struct vmxnet3_softc *, int); static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *); static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *); static int vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t, bus_size_t, struct vmxnet3_dma_alloc *); static void vmxnet3_dma_free(struct vmxnet3_softc *, struct vmxnet3_dma_alloc *); static int vmxnet3_tunable_int(struct vmxnet3_softc *, const char *, int); typedef enum { VMXNET3_BARRIER_RD, VMXNET3_BARRIER_WR, VMXNET3_BARRIER_RDWR, } vmxnet3_barrier_t; static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); /* Tunables. */ static int vmxnet3_mq_disable = 0; TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable); static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES; TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue); static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES; TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue); static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC; TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc); static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC; TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc); static device_method_t vmxnet3_methods[] = { /* Device interface. */ DEVMETHOD(device_probe, vmxnet3_probe), DEVMETHOD(device_attach, vmxnet3_attach), DEVMETHOD(device_detach, vmxnet3_detach), DEVMETHOD(device_shutdown, vmxnet3_shutdown), DEVMETHOD_END }; static driver_t vmxnet3_driver = { "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc) }; static devclass_t vmxnet3_devclass; DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0); MODULE_DEPEND(vmx, pci, 1, 1, 1); MODULE_DEPEND(vmx, ether, 1, 1, 1); #define VMXNET3_VMWARE_VENDOR_ID 0x15AD #define VMXNET3_VMWARE_DEVICE_ID 0x07B0 static int vmxnet3_probe(device_t dev) { if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID && pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) { device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int vmxnet3_attach(device_t dev) { struct vmxnet3_softc *sc; int error; sc = device_get_softc(dev); sc->vmx_dev = dev; pci_enable_busmaster(dev); VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev)); callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0); vmxnet3_initial_config(sc); error = vmxnet3_alloc_resources(sc); if (error) goto fail; error = vmxnet3_check_version(sc); if (error) goto fail; error = vmxnet3_alloc_rxtx_queues(sc); if (error) goto fail; #ifndef VMXNET3_LEGACY_TX error = vmxnet3_alloc_taskqueue(sc); if (error) goto fail; #endif error = vmxnet3_alloc_interrupts(sc); if (error) goto fail; vmxnet3_check_multiqueue(sc); error = vmxnet3_alloc_data(sc); if (error) goto fail; error = vmxnet3_setup_interface(sc); if (error) goto fail; error = vmxnet3_setup_interrupts(sc); if (error) { ether_ifdetach(sc->vmx_ifp); device_printf(dev, "could not set up interrupt\n"); goto fail; } vmxnet3_setup_sysctl(sc); #ifndef VMXNET3_LEGACY_TX vmxnet3_start_taskqueue(sc); #endif fail: if (error) vmxnet3_detach(dev); return (error); } static int vmxnet3_detach(device_t dev) { struct vmxnet3_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->vmx_ifp; if (device_is_attached(dev)) { VMXNET3_CORE_LOCK(sc); vmxnet3_stop(sc); VMXNET3_CORE_UNLOCK(sc); callout_drain(&sc->vmx_tick); #ifndef VMXNET3_LEGACY_TX vmxnet3_drain_taskqueue(sc); #endif ether_ifdetach(ifp); } if (sc->vmx_vlan_attach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach); sc->vmx_vlan_attach = NULL; } if (sc->vmx_vlan_detach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach); sc->vmx_vlan_detach = NULL; } #ifndef VMXNET3_LEGACY_TX vmxnet3_free_taskqueue(sc); #endif vmxnet3_free_interrupts(sc); if (ifp != NULL) { if_free(ifp); sc->vmx_ifp = NULL; } ifmedia_removeall(&sc->vmx_media); vmxnet3_free_data(sc); vmxnet3_free_resources(sc); vmxnet3_free_rxtx_queues(sc); VMXNET3_CORE_LOCK_DESTROY(sc); return (0); } static int vmxnet3_shutdown(device_t dev) { return (0); } static int vmxnet3_alloc_resources(struct vmxnet3_softc *sc) { device_t dev; int rid; dev = sc->vmx_dev; rid = PCIR_BAR(0); sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res0 == NULL) { device_printf(dev, "could not map BAR0 memory\n"); return (ENXIO); } sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0); sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0); rid = PCIR_BAR(1); sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->vmx_res1 == NULL) { device_printf(dev, "could not map BAR1 memory\n"); return (ENXIO); } sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1); sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1); if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) { rid = PCIR_BAR(2); sc->vmx_msix_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); } if (sc->vmx_msix_res == NULL) sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX; return (0); } static void vmxnet3_free_resources(struct vmxnet3_softc *sc) { device_t dev; int rid; dev = sc->vmx_dev; if (sc->vmx_res0 != NULL) { rid = PCIR_BAR(0); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0); sc->vmx_res0 = NULL; } if (sc->vmx_res1 != NULL) { rid = PCIR_BAR(1); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1); sc->vmx_res1 = NULL; } if (sc->vmx_msix_res != NULL) { rid = PCIR_BAR(2); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_msix_res); sc->vmx_msix_res = NULL; } } static int vmxnet3_check_version(struct vmxnet3_softc *sc) { device_t dev; uint32_t version; dev = sc->vmx_dev; version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported hardware version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1); version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS); if ((version & 0x01) == 0) { device_printf(dev, "unsupported UPT version %#x\n", version); return (ENOTSUP); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1); return (0); } +static int +trunc_powerof2(int val) +{ + + return (1U << (fls(val) - 1)); +} + static void vmxnet3_initial_config(struct vmxnet3_softc *sc) { int nqueue, ndesc; nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue); if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1) nqueue = VMXNET3_DEF_TX_QUEUES; if (nqueue > mp_ncpus) nqueue = mp_ncpus; - sc->vmx_max_ntxqueues = nqueue; + sc->vmx_max_ntxqueues = trunc_powerof2(nqueue); nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue); if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1) nqueue = VMXNET3_DEF_RX_QUEUES; if (nqueue > mp_ncpus) nqueue = mp_ncpus; - sc->vmx_max_nrxqueues = nqueue; + sc->vmx_max_nrxqueues = trunc_powerof2(nqueue); if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) { sc->vmx_max_nrxqueues = 1; sc->vmx_max_ntxqueues = 1; } ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc); if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC) ndesc = VMXNET3_DEF_TX_NDESC; if (ndesc & VMXNET3_MASK_TX_NDESC) ndesc &= ~VMXNET3_MASK_TX_NDESC; sc->vmx_ntxdescs = ndesc; ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc); if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC) ndesc = VMXNET3_DEF_RX_NDESC; if (ndesc & VMXNET3_MASK_RX_NDESC) ndesc &= ~VMXNET3_MASK_RX_NDESC; sc->vmx_nrxdescs = ndesc; sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS; } static void vmxnet3_check_multiqueue(struct vmxnet3_softc *sc) { if (sc->vmx_intr_type != VMXNET3_IT_MSIX) goto out; /* BMV: Just use the maximum configured for now. */ sc->vmx_nrxqueues = sc->vmx_max_nrxqueues; sc->vmx_ntxqueues = sc->vmx_max_ntxqueues; if (sc->vmx_nrxqueues > 1) sc->vmx_flags |= VMXNET3_FLAG_RSS; return; out: sc->vmx_ntxqueues = 1; sc->vmx_nrxqueues = 1; } static int vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc) { device_t dev; int nmsix, cnt, required; dev = sc->vmx_dev; if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) return (1); /* Allocate an additional vector for the events interrupt. */ required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1; nmsix = pci_msix_count(dev); if (nmsix < required) return (1); cnt = required; if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { sc->vmx_nintrs = required; return (0); } else pci_release_msi(dev); /* BMV TODO Fallback to sharing MSIX vectors if possible. */ return (1); } static int vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc) { device_t dev; int nmsi, cnt, required; dev = sc->vmx_dev; required = 1; nmsi = pci_msi_count(dev); if (nmsi < required) return (1); cnt = required; if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) { sc->vmx_nintrs = 1; return (0); } else pci_release_msi(dev); return (1); } static int vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc) { sc->vmx_nintrs = 1; return (0); } static int vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags, struct vmxnet3_interrupt *intr) { struct resource *irq; irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags); if (irq == NULL) return (ENXIO); intr->vmxi_irq = irq; intr->vmxi_rid = rid; return (0); } static int vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc) { int i, rid, flags, error; rid = 0; flags = RF_ACTIVE; if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) flags |= RF_SHAREABLE; else rid = 1; for (i = 0; i < sc->vmx_nintrs; i++, rid++) { error = vmxnet3_alloc_interrupt(sc, rid, flags, &sc->vmx_intrs[i]); if (error) return (error); } return (0); } static int vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txqueue *txq; struct vmxnet3_rxqueue *rxq; struct vmxnet3_interrupt *intr; enum intr_type type; int i, error; dev = sc->vmx_dev; intr = &sc->vmx_intrs[0]; type = INTR_TYPE_NET | INTR_MPSAFE; for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) { txq = &sc->vmx_txq[i]; error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, vmxnet3_txq_intr, txq, &intr->vmxi_handler); if (error) return (error); bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "tq%d", i); txq->vxtxq_intr_idx = intr->vmxi_rid - 1; } for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) { rxq = &sc->vmx_rxq[i]; error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, vmxnet3_rxq_intr, rxq, &intr->vmxi_handler); if (error) return (error); bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "rq%d", i); rxq->vxrxq_intr_idx = intr->vmxi_rid - 1; } error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL, vmxnet3_event_intr, sc, &intr->vmxi_handler); if (error) return (error); bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event"); sc->vmx_event_intr_idx = intr->vmxi_rid - 1; return (0); } static int vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc) { struct vmxnet3_interrupt *intr; int i, error; intr = &sc->vmx_intrs[0]; error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc, &intr->vmxi_handler); for (i = 0; i < sc->vmx_ntxqueues; i++) sc->vmx_txq[i].vxtxq_intr_idx = 0; for (i = 0; i < sc->vmx_nrxqueues; i++) sc->vmx_rxq[i].vxrxq_intr_idx = 0; sc->vmx_event_intr_idx = 0; return (error); } static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc) { struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int i; sc->vmx_ds->evintr = sc->vmx_event_intr_idx; for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txs->intr_idx = txq->vxtxq_intr_idx; } for (i = 0; i < sc->vmx_nrxqueues; i++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxs->intr_idx = rxq->vxrxq_intr_idx; } } static int vmxnet3_setup_interrupts(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_intr_resources(sc); if (error) return (error); switch (sc->vmx_intr_type) { case VMXNET3_IT_MSIX: error = vmxnet3_setup_msix_interrupts(sc); break; case VMXNET3_IT_MSI: case VMXNET3_IT_LEGACY: error = vmxnet3_setup_legacy_interrupt(sc); break; default: panic("%s: invalid interrupt type %d", __func__, sc->vmx_intr_type); } if (error == 0) vmxnet3_set_interrupt_idx(sc); return (error); } static int vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc) { device_t dev; uint32_t config; int error; dev = sc->vmx_dev; config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG); sc->vmx_intr_type = config & 0x03; sc->vmx_intr_mask_mode = (config >> 2) & 0x03; switch (sc->vmx_intr_type) { case VMXNET3_IT_AUTO: sc->vmx_intr_type = VMXNET3_IT_MSIX; /* FALLTHROUGH */ case VMXNET3_IT_MSIX: error = vmxnet3_alloc_msix_interrupts(sc); if (error == 0) break; sc->vmx_intr_type = VMXNET3_IT_MSI; /* FALLTHROUGH */ case VMXNET3_IT_MSI: error = vmxnet3_alloc_msi_interrupts(sc); if (error == 0) break; sc->vmx_intr_type = VMXNET3_IT_LEGACY; /* FALLTHROUGH */ case VMXNET3_IT_LEGACY: error = vmxnet3_alloc_legacy_interrupts(sc); if (error == 0) break; /* FALLTHROUGH */ default: sc->vmx_intr_type = -1; device_printf(dev, "cannot allocate any interrupt resources\n"); return (ENXIO); } return (error); } static void vmxnet3_free_interrupt(struct vmxnet3_softc *sc, struct vmxnet3_interrupt *intr) { device_t dev; dev = sc->vmx_dev; if (intr->vmxi_handler != NULL) { bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler); intr->vmxi_handler = NULL; } if (intr->vmxi_irq != NULL) { bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid, intr->vmxi_irq); intr->vmxi_irq = NULL; intr->vmxi_rid = -1; } } static void vmxnet3_free_interrupts(struct vmxnet3_softc *sc) { int i; for (i = 0; i < sc->vmx_nintrs; i++) vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]); if (sc->vmx_intr_type == VMXNET3_IT_MSI || sc->vmx_intr_type == VMXNET3_IT_MSIX) pci_release_msi(sc->vmx_dev); } #ifndef VMXNET3_LEGACY_TX static int vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc) { device_t dev; dev = sc->vmx_dev; sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT, taskqueue_thread_enqueue, &sc->vmx_tq); if (sc->vmx_tq == NULL) return (ENOMEM); return (0); } static void vmxnet3_start_taskqueue(struct vmxnet3_softc *sc) { device_t dev; int nthreads, error; dev = sc->vmx_dev; /* * The taskqueue is typically not frequently used, so a dedicated * thread for each queue is unnecessary. */ nthreads = MAX(1, sc->vmx_ntxqueues / 2); /* * Most drivers just ignore the return value - it only fails * with ENOMEM so an error is not likely. It is hard for us * to recover from an error here. */ error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET, "%s taskq", device_get_nameunit(dev)); if (error) device_printf(dev, "failed to start taskqueue: %d", error); } static void vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc) { struct vmxnet3_txqueue *txq; int i; if (sc->vmx_tq != NULL) { for (i = 0; i < sc->vmx_max_ntxqueues; i++) { txq = &sc->vmx_txq[i]; taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask); } } } static void vmxnet3_free_taskqueue(struct vmxnet3_softc *sc) { if (sc->vmx_tq != NULL) { taskqueue_free(sc->vmx_tq); sc->vmx_tq = NULL; } } #endif static int vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; int i; rxq = &sc->vmx_rxq[q]; snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d", device_get_nameunit(sc->vmx_dev), q); mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF); rxq->vxrxq_sc = sc; rxq->vxrxq_id = q; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_rid = i; rxr->vxrxr_ndesc = sc->vmx_nrxdescs; rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->vxrxr_rxbuf == NULL) return (ENOMEM); rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs; } return (0); } static int vmxnet3_init_txq(struct vmxnet3_softc *sc, int q) { struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; txq = &sc->vmx_txq[q]; txr = &txq->vxtxq_cmd_ring; snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d", device_get_nameunit(sc->vmx_dev), q); mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF); txq->vxtxq_sc = sc; txq->vxtxq_id = q; txr->vxtxr_ndesc = sc->vmx_ntxdescs; txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc * sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO); if (txr->vxtxr_txbuf == NULL) return (ENOMEM); txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs; #ifndef VMXNET3_LEGACY_TX TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq); txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF, M_NOWAIT, &txq->vxtxq_mtx); if (txq->vxtxq_br == NULL) return (ENOMEM); #endif return (0); } static int vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc) { int i, error; /* * Only attempt to create multiple queues if MSIX is available. MSIX is * disabled by default because its apparently broken for devices passed * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable * must be set to zero for MSIX. This check prevents us from allocating * queue structures that we will not use. */ if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) { sc->vmx_max_nrxqueues = 1; sc->vmx_max_ntxqueues = 1; } sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) * sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) * sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL) return (ENOMEM); for (i = 0; i < sc->vmx_max_nrxqueues; i++) { error = vmxnet3_init_rxq(sc, i); if (error) return (error); } for (i = 0; i < sc->vmx_max_ntxqueues; i++) { error = vmxnet3_init_txq(sc, i); if (error) return (error); } return (0); } static void vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq) { struct vmxnet3_rxring *rxr; int i; rxq->vxrxq_sc = NULL; rxq->vxrxq_id = -1; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; if (rxr->vxrxr_rxbuf != NULL) { free(rxr->vxrxr_rxbuf, M_DEVBUF); rxr->vxrxr_rxbuf = NULL; } } if (mtx_initialized(&rxq->vxrxq_mtx) != 0) mtx_destroy(&rxq->vxrxq_mtx); } static void vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; txr = &txq->vxtxq_cmd_ring; txq->vxtxq_sc = NULL; txq->vxtxq_id = -1; #ifndef VMXNET3_LEGACY_TX if (txq->vxtxq_br != NULL) { buf_ring_free(txq->vxtxq_br, M_DEVBUF); txq->vxtxq_br = NULL; } #endif if (txr->vxtxr_txbuf != NULL) { free(txr->vxtxr_txbuf, M_DEVBUF); txr->vxtxr_txbuf = NULL; } if (mtx_initialized(&txq->vxtxq_mtx) != 0) mtx_destroy(&txq->vxtxq_mtx); } static void vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc) { int i; if (sc->vmx_rxq != NULL) { for (i = 0; i < sc->vmx_max_nrxqueues; i++) vmxnet3_destroy_rxq(&sc->vmx_rxq[i]); free(sc->vmx_rxq, M_DEVBUF); sc->vmx_rxq = NULL; } if (sc->vmx_txq != NULL) { for (i = 0; i < sc->vmx_max_ntxqueues; i++) vmxnet3_destroy_txq(&sc->vmx_txq[i]); free(sc->vmx_txq, M_DEVBUF); sc->vmx_txq = NULL; } } static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc) { device_t dev; uint8_t *kva; size_t size; int i, error; dev = sc->vmx_dev; size = sizeof(struct vmxnet3_driver_shared); error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma); if (error) { device_printf(dev, "cannot alloc shared memory\n"); return (error); } sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr; size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) + sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared); error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma); if (error) { device_printf(dev, "cannot alloc queue shared memory\n"); return (error); } sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr; kva = sc->vmx_qs; for (i = 0; i < sc->vmx_ntxqueues; i++) { sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva; kva += sizeof(struct vmxnet3_txq_shared); } for (i = 0; i < sc->vmx_nrxqueues; i++) { sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva; kva += sizeof(struct vmxnet3_rxq_shared); } if (sc->vmx_flags & VMXNET3_FLAG_RSS) { size = sizeof(struct vmxnet3_rss_shared); error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma); if (error) { device_printf(dev, "cannot alloc rss shared memory\n"); return (error); } sc->vmx_rss = (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr; } return (0); } static void vmxnet3_free_shared_data(struct vmxnet3_softc *sc) { if (sc->vmx_rss != NULL) { vmxnet3_dma_free(sc, &sc->vmx_rss_dma); sc->vmx_rss = NULL; } if (sc->vmx_qs != NULL) { vmxnet3_dma_free(sc, &sc->vmx_qs_dma); sc->vmx_qs = NULL; } if (sc->vmx_ds != NULL) { vmxnet3_dma_free(sc, &sc->vmx_ds_dma); sc->vmx_ds = NULL; } } static int vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; size_t descsz, compsz; int i, q, error; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_ntxqueues; q++) { txq = &sc->vmx_txq[q]; txr = &txq->vxtxq_cmd_ring; txc = &txq->vxtxq_comp_ring; descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc); compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc); error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ VMXNET3_TX_MAXSIZE, /* maxsize */ VMXNET3_TX_MAXSEGS, /* nsegments */ VMXNET3_TX_MAXSEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &txr->vxtxr_txtag); if (error) { device_printf(dev, "unable to create Tx buffer tag for queue %d\n", q); return (error); } error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma); if (error) { device_printf(dev, "cannot alloc Tx descriptors for " "queue %d error %d\n", q, error); return (error); } txr->vxtxr_txd = (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr; error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma); if (error) { device_printf(dev, "cannot alloc Tx comp descriptors " "for queue %d error %d\n", q, error); return (error); } txc->vxcr_u.txcd = (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr; for (i = 0; i < txr->vxtxr_ndesc; i++) { error = bus_dmamap_create(txr->vxtxr_txtag, 0, &txr->vxtxr_txbuf[i].vtxb_dmamap); if (error) { device_printf(dev, "unable to create Tx buf " "dmamap for queue %d idx %d\n", q, i); return (error); } } } return (0); } static void vmxnet3_free_txq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; struct vmxnet3_txbuf *txb; int i, q; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_ntxqueues; q++) { txq = &sc->vmx_txq[q]; txr = &txq->vxtxq_cmd_ring; txc = &txq->vxtxq_comp_ring; for (i = 0; i < txr->vxtxr_ndesc; i++) { txb = &txr->vxtxr_txbuf[i]; if (txb->vtxb_dmamap != NULL) { bus_dmamap_destroy(txr->vxtxr_txtag, txb->vtxb_dmamap); txb->vtxb_dmamap = NULL; } } if (txc->vxcr_u.txcd != NULL) { vmxnet3_dma_free(sc, &txc->vxcr_dma); txc->vxcr_u.txcd = NULL; } if (txr->vxtxr_txd != NULL) { vmxnet3_dma_free(sc, &txr->vxtxr_dma); txr->vxtxr_txd = NULL; } if (txr->vxtxr_txtag != NULL) { bus_dma_tag_destroy(txr->vxtxr_txtag); txr->vxtxr_txtag = NULL; } } } static int vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int descsz, compsz; int i, j, q, error; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_nrxqueues; q++) { rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; compsz = 0; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; descsz = rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc); compsz += rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxcompdesc); error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUMPAGESIZE, /* maxsize */ 1, /* nsegments */ MJUMPAGESIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &rxr->vxrxr_rxtag); if (error) { device_printf(dev, "unable to create Rx buffer tag for " "queue %d\n", q); return (error); } error = vmxnet3_dma_malloc(sc, descsz, 512, &rxr->vxrxr_dma); if (error) { device_printf(dev, "cannot allocate Rx " "descriptors for queue %d/%d error %d\n", i, q, error); return (error); } rxr->vxrxr_rxd = (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr; } error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma); if (error) { device_printf(dev, "cannot alloc Rx comp descriptors " "for queue %d error %d\n", q, error); return (error); } rxc->vxcr_u.rxcd = (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; error = bus_dmamap_create(rxr->vxrxr_rxtag, 0, &rxr->vxrxr_spare_dmap); if (error) { device_printf(dev, "unable to create spare " "dmamap for queue %d/%d error %d\n", q, i, error); return (error); } for (j = 0; j < rxr->vxrxr_ndesc; j++) { error = bus_dmamap_create(rxr->vxrxr_rxtag, 0, &rxr->vxrxr_rxbuf[j].vrxb_dmamap); if (error) { device_printf(dev, "unable to create " "dmamap for queue %d/%d slot %d " "error %d\n", q, i, j, error); return (error); } } } } return (0); } static void vmxnet3_free_rxq_data(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxbuf *rxb; int i, j, q; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_nrxqueues; q++) { rxq = &sc->vmx_rxq[q]; rxc = &rxq->vxrxq_comp_ring; for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; if (rxr->vxrxr_spare_dmap != NULL) { bus_dmamap_destroy(rxr->vxrxr_rxtag, rxr->vxrxr_spare_dmap); rxr->vxrxr_spare_dmap = NULL; } for (j = 0; j < rxr->vxrxr_ndesc; j++) { rxb = &rxr->vxrxr_rxbuf[j]; if (rxb->vrxb_dmamap != NULL) { bus_dmamap_destroy(rxr->vxrxr_rxtag, rxb->vrxb_dmamap); rxb->vrxb_dmamap = NULL; } } } if (rxc->vxcr_u.rxcd != NULL) { vmxnet3_dma_free(sc, &rxc->vxcr_dma); rxc->vxcr_u.rxcd = NULL; } for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; if (rxr->vxrxr_rxd != NULL) { vmxnet3_dma_free(sc, &rxr->vxrxr_dma); rxr->vxrxr_rxd = NULL; } if (rxr->vxrxr_rxtag != NULL) { bus_dma_tag_destroy(rxr->vxrxr_rxtag); rxr->vxrxr_rxtag = NULL; } } } } static int vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_txq_data(sc); if (error) return (error); error = vmxnet3_alloc_rxq_data(sc); if (error) return (error); return (0); } static void vmxnet3_free_queue_data(struct vmxnet3_softc *sc) { if (sc->vmx_rxq != NULL) vmxnet3_free_rxq_data(sc); if (sc->vmx_txq != NULL) vmxnet3_free_txq_data(sc); } static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc) { int error; error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma); if (error) device_printf(sc->vmx_dev, "unable to alloc multicast table\n"); else sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr; return (error); } static void vmxnet3_free_mcast_table(struct vmxnet3_softc *sc) { if (sc->vmx_mcast != NULL) { vmxnet3_dma_free(sc, &sc->vmx_mcast_dma); sc->vmx_mcast = NULL; } } static void vmxnet3_init_shared_data(struct vmxnet3_softc *sc) { struct vmxnet3_driver_shared *ds; struct vmxnet3_txqueue *txq; struct vmxnet3_txq_shared *txs; struct vmxnet3_rxqueue *rxq; struct vmxnet3_rxq_shared *rxs; int i; ds = sc->vmx_ds; /* * Initialize fields of the shared data that remains the same across * reinits. Note the shared data is zero'd when allocated. */ ds->magic = VMXNET3_REV1_MAGIC; /* DriverInfo */ ds->version = VMXNET3_DRIVER_VERSION; ds->guest = VMXNET3_GOS_FREEBSD | #ifdef __LP64__ VMXNET3_GOS_64BIT; #else VMXNET3_GOS_32BIT; #endif ds->vmxnet3_revision = 1; ds->upt_version = 1; /* Misc. conf */ ds->driver_data = vtophys(sc); ds->driver_data_len = sizeof(struct vmxnet3_softc); ds->queue_shared = sc->vmx_qs_dma.dma_paddr; ds->queue_shared_len = sc->vmx_qs_dma.dma_size; ds->nrxsg_max = sc->vmx_max_rxsegs; /* RSS conf */ if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->rss.version = 1; ds->rss.paddr = sc->vmx_rss_dma.dma_paddr; ds->rss.len = sc->vmx_rss_dma.dma_size; } /* Interrupt control. */ ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO; ds->nintr = sc->vmx_nintrs; ds->evintr = sc->vmx_event_intr_idx; ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < sc->vmx_nintrs; i++) ds->modlevel[i] = UPT1_IMOD_ADAPTIVE; /* Receive filter. */ ds->mcast_table = sc->vmx_mcast_dma.dma_paddr; ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size; /* Tx queues */ for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; txs = txq->vxtxq_ts; txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr; txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc; txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr; txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc; txs->driver_data = vtophys(txq); txs->driver_data_len = sizeof(struct vmxnet3_txqueue); } /* Rx queues */ for (i = 0; i < sc->vmx_nrxqueues; i++) { rxq = &sc->vmx_rxq[i]; rxs = rxq->vxrxq_rs; rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr; rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc; rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr; rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc; rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr; rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc; rxs->driver_data = vtophys(rxq); rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue); } } static void vmxnet3_reinit_interface(struct vmxnet3_softc *sc) { struct ifnet *ifp; ifp = sc->vmx_ifp; /* Use the current MAC address. */ bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN); vmxnet3_set_lladdr(sc); ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD; if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; } static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc) { /* * Use the same key as the Linux driver until FreeBSD can do * RSS (presumably Toeplitz) in software. */ static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = { 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac, 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28, 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70, 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3, 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9, }; struct vmxnet3_driver_shared *ds; struct vmxnet3_rss_shared *rss; int i; ds = sc->vmx_ds; rss = sc->vmx_rss; rss->hash_type = UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 | UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6; rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ; rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE; rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE; memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE); for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) rss->ind_table[i] = i % sc->vmx_nrxqueues; } static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc) { struct ifnet *ifp; struct vmxnet3_driver_shared *ds; ifp = sc->vmx_ifp; ds = sc->vmx_ds; ds->mtu = ifp->if_mtu; ds->ntxqueue = sc->vmx_ntxqueues; ds->nrxqueue = sc->vmx_nrxqueues; ds->upt_features = 0; if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) ds->upt_features |= UPT1_F_CSUM; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ds->upt_features |= UPT1_F_VLAN; if (ifp->if_capenable & IFCAP_LRO) ds->upt_features |= UPT1_F_LRO; if (sc->vmx_flags & VMXNET3_FLAG_RSS) { ds->upt_features |= UPT1_F_RSS; vmxnet3_reinit_rss_shared_data(sc); } vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr); vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH, (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32); } static int vmxnet3_alloc_data(struct vmxnet3_softc *sc) { int error; error = vmxnet3_alloc_shared_data(sc); if (error) return (error); error = vmxnet3_alloc_queue_data(sc); if (error) return (error); error = vmxnet3_alloc_mcast_table(sc); if (error) return (error); vmxnet3_init_shared_data(sc); return (0); } static void vmxnet3_free_data(struct vmxnet3_softc *sc) { vmxnet3_free_mcast_table(sc); vmxnet3_free_queue_data(sc); vmxnet3_free_shared_data(sc); } static int vmxnet3_setup_interface(struct vmxnet3_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vmx_dev; ifp = sc->vmx_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "cannot allocate ifnet structure\n"); return (ENOSPC); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); #if __FreeBSD_version < 1000025 ifp->if_baudrate = 1000000000; #elif __FreeBSD_version < 1100011 if_initbaudrate(ifp, IF_Gbps(10)); #else ifp->if_baudrate = IF_Gbps(10); #endif ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vmxnet3_init; ifp->if_ioctl = vmxnet3_ioctl; ifp->if_get_counter = vmxnet3_get_counter; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS; ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE; #ifdef VMXNET3_LEGACY_TX ifp->if_start = vmxnet3_start; ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1; IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1); IFQ_SET_READY(&ifp->if_snd); #else ifp->if_transmit = vmxnet3_txq_mq_start; ifp->if_qflush = vmxnet3_qflush; #endif vmxnet3_get_lladdr(sc); ether_ifattach(ifp, sc->vmx_lladdr); ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; ifp->if_capenable = ifp->if_capabilities; /* These capabilities are not enabled by default. */ ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config, vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change, vmxnet3_media_status); ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO); return (0); } static void vmxnet3_evintr(struct vmxnet3_softc *sc) { device_t dev; struct ifnet *ifp; struct vmxnet3_txq_shared *ts; struct vmxnet3_rxq_shared *rs; uint32_t event; int reset; dev = sc->vmx_dev; ifp = sc->vmx_ifp; reset = 0; VMXNET3_CORE_LOCK(sc); /* Clear events. */ event = sc->vmx_ds->event; vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event); if (event & VMXNET3_EVENT_LINK) { vmxnet3_link_status(sc); if (sc->vmx_link_active != 0) vmxnet3_tx_start_all(sc); } if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) { reset = 1; vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS); ts = sc->vmx_txq[0].vxtxq_ts; if (ts->stopped != 0) device_printf(dev, "Tx queue error %#x\n", ts->error); rs = sc->vmx_rxq[0].vxrxq_rs; if (rs->stopped != 0) device_printf(dev, "Rx queue error %#x\n", rs->error); device_printf(dev, "Rx/Tx queue error event ... resetting\n"); } if (event & VMXNET3_EVENT_DIC) device_printf(dev, "device implementation change event\n"); if (event & VMXNET3_EVENT_DEBUG) device_printf(dev, "debug event\n"); if (reset != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } VMXNET3_CORE_UNLOCK(sc); } static void vmxnet3_txq_eof(struct vmxnet3_txqueue *txq) { struct vmxnet3_softc *sc; struct ifnet *ifp; struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; struct vmxnet3_txcompdesc *txcd; struct vmxnet3_txbuf *txb; struct mbuf *m; u_int sop; sc = txq->vxtxq_sc; ifp = sc->vmx_ifp; txr = &txq->vxtxq_cmd_ring; txc = &txq->vxtxq_comp_ring; VMXNET3_TXQ_LOCK_ASSERT(txq); for (;;) { txcd = &txc->vxcr_u.txcd[txc->vxcr_next]; if (txcd->gen != txc->vxcr_gen) break; vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++txc->vxcr_next == txc->vxcr_ndesc) { txc->vxcr_next = 0; txc->vxcr_gen ^= 1; } sop = txr->vxtxr_next; txb = &txr->vxtxr_txbuf[sop]; if ((m = txb->vtxb_m) != NULL) { bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap); txq->vxtxq_stats.vmtxs_opackets++; txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len; if (m->m_flags & M_MCAST) txq->vxtxq_stats.vmtxs_omcasts++; m_freem(m); txb->vtxb_m = NULL; } txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc; } if (txr->vxtxr_head == txr->vxtxr_next) txq->vxtxq_watchdog = 0; } static int vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr) { struct ifnet *ifp; struct mbuf *m; struct vmxnet3_rxdesc *rxd; struct vmxnet3_rxbuf *rxb; bus_dma_tag_t tag; bus_dmamap_t dmap; bus_dma_segment_t segs[1]; int idx, clsize, btype, flags, nsegs, error; ifp = sc->vmx_ifp; tag = rxr->vxrxr_rxtag; dmap = rxr->vxrxr_spare_dmap; idx = rxr->vxrxr_fill; rxd = &rxr->vxrxr_rxd[idx]; rxb = &rxr->vxrxr_rxbuf[idx]; #ifdef VMXNET3_FAILPOINTS KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS); if (rxr->vxrxr_rid != 0) KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS); #endif if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) { flags = M_PKTHDR; clsize = MCLBYTES; btype = VMXNET3_BTYPE_HEAD; } else { #if __FreeBSD_version < 902001 /* * These mbufs will never be used for the start of a frame. * Roughly prior to branching releng/9.2, the load_mbuf_sg() * required the mbuf to always be a packet header. Avoid * unnecessary mbuf initialization in newer versions where * that is not the case. */ flags = M_PKTHDR; #else flags = 0; #endif clsize = MJUMPAGESIZE; btype = VMXNET3_BTYPE_BODY; } m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize); if (m == NULL) { sc->vmx_stats.vmst_mgetcl_failed++; return (ENOBUFS); } if (btype == VMXNET3_BTYPE_HEAD) { m->m_len = m->m_pkthdr.len = clsize; m_adj(m, ETHER_ALIGN); } else m->m_len = clsize; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs, BUS_DMA_NOWAIT); if (error) { m_freem(m); sc->vmx_stats.vmst_mbuf_load_failed++; return (error); } KASSERT(nsegs == 1, ("%s: mbuf %p with too many segments %d", __func__, m, nsegs)); #if __FreeBSD_version < 902001 if (btype == VMXNET3_BTYPE_BODY) m->m_flags &= ~M_PKTHDR; #endif if (rxb->vrxb_m != NULL) { bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(tag, rxb->vrxb_dmamap); } rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap; rxb->vrxb_dmamap = dmap; rxb->vrxb_m = m; rxd->addr = segs[0].ds_addr; rxd->len = segs[0].ds_len; rxd->btype = btype; rxd->gen = rxr->vxrxr_gen; vmxnet3_rxr_increment_fill(rxr); return (0); } static void vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq, struct vmxnet3_rxring *rxr, int idx) { struct vmxnet3_rxdesc *rxd; rxd = &rxr->vxrxr_rxd[idx]; rxd->gen = rxr->vxrxr_gen; vmxnet3_rxr_increment_fill(rxr); } static void vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq) { struct vmxnet3_softc *sc; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxcompdesc *rxcd; int idx, eof; sc = rxq->vxrxq_sc; rxc = &rxq->vxrxq_comp_ring; do { rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next]; if (rxcd->gen != rxc->vxcr_gen) break; /* Not expected. */ vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++rxc->vxcr_next == rxc->vxcr_ndesc) { rxc->vxcr_next = 0; rxc->vxcr_gen ^= 1; } idx = rxcd->rxd_idx; eof = rxcd->eop; if (rxcd->qid < sc->vmx_nrxqueues) rxr = &rxq->vxrxq_cmd_ring[0]; else rxr = &rxq->vxrxq_cmd_ring[1]; vmxnet3_rxq_eof_discard(rxq, rxr, idx); } while (!eof); } static void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m) { if (rxcd->ipv4) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (rxcd->ipcsum_ok) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (!rxcd->fragment) { if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; } } } static void vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq, struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m) { struct vmxnet3_softc *sc; struct ifnet *ifp; sc = rxq->vxrxq_sc; ifp = sc->vmx_ifp; if (rxcd->error) { rxq->vxrxq_stats.vmrxs_ierrors++; m_freem(m); return; } #ifdef notyet switch (rxcd->rss_type) { case VMXNET3_RCD_RSS_TYPE_IPV4: m->m_pkthdr.flowid = rxcd->rss_hash; M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4); break; case VMXNET3_RCD_RSS_TYPE_TCPIPV4: m->m_pkthdr.flowid = rxcd->rss_hash; M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4); break; case VMXNET3_RCD_RSS_TYPE_IPV6: m->m_pkthdr.flowid = rxcd->rss_hash; M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6); break; case VMXNET3_RCD_RSS_TYPE_TCPIPV6: m->m_pkthdr.flowid = rxcd->rss_hash; M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6); break; default: /* VMXNET3_RCD_RSS_TYPE_NONE */ m->m_pkthdr.flowid = rxq->vxrxq_id; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); break; } #else m->m_pkthdr.flowid = rxq->vxrxq_id; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); #endif if (!rxcd->no_csum) vmxnet3_rx_csum(rxcd, m); if (rxcd->vlan) { m->m_flags |= M_VLANTAG; m->m_pkthdr.ether_vtag = rxcd->vtag; } rxq->vxrxq_stats.vmrxs_ipackets++; rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len; VMXNET3_RXQ_UNLOCK(rxq); (*ifp->if_input)(ifp, m); VMXNET3_RXQ_LOCK(rxq); } static void vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq) { struct vmxnet3_softc *sc; struct ifnet *ifp; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; struct vmxnet3_rxdesc *rxd; struct vmxnet3_rxcompdesc *rxcd; struct mbuf *m, *m_head, *m_tail; int idx, length; sc = rxq->vxrxq_sc; ifp = sc->vmx_ifp; rxc = &rxq->vxrxq_comp_ring; VMXNET3_RXQ_LOCK_ASSERT(rxq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; m_head = rxq->vxrxq_mhead; rxq->vxrxq_mhead = NULL; m_tail = rxq->vxrxq_mtail; rxq->vxrxq_mtail = NULL; MPASS(m_head == NULL || m_tail != NULL); for (;;) { rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next]; if (rxcd->gen != rxc->vxcr_gen) { rxq->vxrxq_mhead = m_head; rxq->vxrxq_mtail = m_tail; break; } vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); if (++rxc->vxcr_next == rxc->vxcr_ndesc) { rxc->vxcr_next = 0; rxc->vxcr_gen ^= 1; } idx = rxcd->rxd_idx; length = rxcd->len; if (rxcd->qid < sc->vmx_nrxqueues) rxr = &rxq->vxrxq_cmd_ring[0]; else rxr = &rxq->vxrxq_cmd_ring[1]; rxd = &rxr->vxrxr_rxd[idx]; m = rxr->vxrxr_rxbuf[idx].vrxb_m; KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf", __func__, rxcd->qid, idx)); /* * The host may skip descriptors. We detect this when this * descriptor does not match the previous fill index. Catch * up with the host now. */ if (__predict_false(rxr->vxrxr_fill != idx)) { while (rxr->vxrxr_fill != idx) { rxr->vxrxr_rxd[rxr->vxrxr_fill].gen = rxr->vxrxr_gen; vmxnet3_rxr_increment_fill(rxr); } } if (rxcd->sop) { KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD, ("%s: start of frame w/o head buffer", __func__)); KASSERT(rxr == &rxq->vxrxq_cmd_ring[0], ("%s: start of frame not in ring 0", __func__)); KASSERT((idx % sc->vmx_rx_max_chain) == 0, ("%s: start of frame at unexcepted index %d (%d)", __func__, idx, sc->vmx_rx_max_chain)); KASSERT(m_head == NULL, ("%s: duplicate start of frame?", __func__)); if (length == 0) { /* Just ignore this descriptor. */ vmxnet3_rxq_eof_discard(rxq, rxr, idx); goto nextp; } if (vmxnet3_newbuf(sc, rxr) != 0) { rxq->vxrxq_stats.vmrxs_iqdrops++; vmxnet3_rxq_eof_discard(rxq, rxr, idx); if (!rxcd->eop) vmxnet3_rxq_discard_chain(rxq); goto nextp; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = length; m->m_pkthdr.csum_flags = 0; m_head = m_tail = m; } else { KASSERT(rxd->btype == VMXNET3_BTYPE_BODY, ("%s: non start of frame w/o body buffer", __func__)); KASSERT(m_head != NULL, ("%s: frame not started?", __func__)); if (vmxnet3_newbuf(sc, rxr) != 0) { rxq->vxrxq_stats.vmrxs_iqdrops++; vmxnet3_rxq_eof_discard(rxq, rxr, idx); if (!rxcd->eop) vmxnet3_rxq_discard_chain(rxq); m_freem(m_head); m_head = m_tail = NULL; goto nextp; } m->m_len = length; m_head->m_pkthdr.len += length; m_tail->m_next = m; m_tail = m; } if (rxcd->eop) { vmxnet3_rxq_input(rxq, rxcd, m_head); m_head = m_tail = NULL; /* Must recheck after dropping the Rx lock. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } nextp: if (__predict_false(rxq->vxrxq_rs->update_rxhead)) { int qid = rxcd->qid; bus_size_t r; idx = (idx + 1) % rxr->vxrxr_ndesc; if (qid >= sc->vmx_nrxqueues) { qid -= sc->vmx_nrxqueues; r = VMXNET3_BAR0_RXH2(qid); } else r = VMXNET3_BAR0_RXH1(qid); vmxnet3_write_bar0(sc, r, idx); } } } static void vmxnet3_legacy_intr(void *xsc) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; struct vmxnet3_txqueue *txq; sc = xsc; rxq = &sc->vmx_rxq[0]; txq = &sc->vmx_txq[0]; if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) { if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0) return; } if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_all_intrs(sc); if (sc->vmx_ds->event != 0) vmxnet3_evintr(sc); VMXNET3_RXQ_LOCK(rxq); vmxnet3_rxq_eof(rxq); VMXNET3_RXQ_UNLOCK(rxq); VMXNET3_TXQ_LOCK(txq); vmxnet3_txq_eof(txq); vmxnet3_txq_start(txq); VMXNET3_TXQ_UNLOCK(txq); vmxnet3_enable_all_intrs(sc); } static void vmxnet3_txq_intr(void *xtxq) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; txq = xtxq; sc = txq->vxtxq_sc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx); VMXNET3_TXQ_LOCK(txq); vmxnet3_txq_eof(txq); vmxnet3_txq_start(txq); VMXNET3_TXQ_UNLOCK(txq); vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx); } static void vmxnet3_rxq_intr(void *xrxq) { struct vmxnet3_softc *sc; struct vmxnet3_rxqueue *rxq; rxq = xrxq; sc = rxq->vxrxq_sc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx); VMXNET3_RXQ_LOCK(rxq); vmxnet3_rxq_eof(rxq); VMXNET3_RXQ_UNLOCK(rxq); vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx); } static void vmxnet3_event_intr(void *xsc) { struct vmxnet3_softc *sc; sc = xsc; if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE) vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx); if (sc->vmx_ds->event != 0) vmxnet3_evintr(sc); vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx); } static void vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; struct vmxnet3_txbuf *txb; int i; txr = &txq->vxtxq_cmd_ring; for (i = 0; i < txr->vxtxr_ndesc; i++) { txb = &txr->vxtxr_txbuf[i]; if (txb->vtxb_m == NULL) continue; bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap); m_freem(txb->vtxb_m); txb->vtxb_m = NULL; } } static void vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) { struct vmxnet3_rxring *rxr; struct vmxnet3_rxbuf *rxb; int i, j; if (rxq->vxrxq_mhead != NULL) { m_freem(rxq->vxrxq_mhead); rxq->vxrxq_mhead = NULL; rxq->vxrxq_mtail = NULL; } for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; for (j = 0; j < rxr->vxrxr_ndesc; j++) { rxb = &rxr->vxrxr_rxbuf[j]; if (rxb->vrxb_m == NULL) continue; bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap); m_freem(rxb->vrxb_m); rxb->vrxb_m = NULL; } } } static void vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc) { struct vmxnet3_rxqueue *rxq; struct vmxnet3_txqueue *txq; int i; for (i = 0; i < sc->vmx_nrxqueues; i++) { rxq = &sc->vmx_rxq[i]; VMXNET3_RXQ_LOCK(rxq); VMXNET3_RXQ_UNLOCK(rxq); } for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; VMXNET3_TXQ_LOCK(txq); VMXNET3_TXQ_UNLOCK(txq); } } static void vmxnet3_stop(struct vmxnet3_softc *sc) { struct ifnet *ifp; int q; ifp = sc->vmx_ifp; VMXNET3_CORE_LOCK_ASSERT(sc); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sc->vmx_link_active = 0; callout_stop(&sc->vmx_tick); /* Disable interrupts. */ vmxnet3_disable_all_intrs(sc); vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE); vmxnet3_stop_rendezvous(sc); for (q = 0; q < sc->vmx_ntxqueues; q++) vmxnet3_txstop(sc, &sc->vmx_txq[q]); for (q = 0; q < sc->vmx_nrxqueues; q++) vmxnet3_rxstop(sc, &sc->vmx_rxq[q]); vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET); } static void vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) { struct vmxnet3_txring *txr; struct vmxnet3_comp_ring *txc; txr = &txq->vxtxq_cmd_ring; txr->vxtxr_head = 0; txr->vxtxr_next = 0; txr->vxtxr_gen = VMXNET3_INIT_GEN; bzero(txr->vxtxr_txd, txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc)); txc = &txq->vxtxq_comp_ring; txc->vxcr_next = 0; txc->vxcr_gen = VMXNET3_INIT_GEN; bzero(txc->vxcr_u.txcd, txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc)); } static int vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq) { struct ifnet *ifp; struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int i, populate, idx, frame_size, error; ifp = sc->vmx_ifp; frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) + ifp->if_mtu; /* * If the MTU causes us to exceed what a regular sized cluster can * handle, we allocate a second MJUMPAGESIZE cluster after it in * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters. * * Keep rx_max_chain a divisor of the maximum Rx ring size to make * our life easier. We do not support changing the ring size after * the attach. */ if (frame_size <= MCLBYTES) sc->vmx_rx_max_chain = 1; else sc->vmx_rx_max_chain = 2; /* * Only populate ring 1 if the configuration will take advantage * of it. That is either when LRO is enabled or the frame size * exceeds what ring 0 can contain. */ if ((ifp->if_capenable & IFCAP_LRO) == 0 && frame_size <= MCLBYTES + MJUMPAGESIZE) populate = 1; else populate = VMXNET3_RXRINGS_PERQ; for (i = 0; i < populate; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_fill = 0; rxr->vxrxr_gen = VMXNET3_INIT_GEN; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) { error = vmxnet3_newbuf(sc, rxr); if (error) return (error); } } for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_fill = 0; rxr->vxrxr_gen = 0; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); } rxc = &rxq->vxrxq_comp_ring; rxc->vxcr_next = 0; rxc->vxcr_gen = VMXNET3_INIT_GEN; bzero(rxc->vxcr_u.rxcd, rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc)); return (0); } static int vmxnet3_reinit_queues(struct vmxnet3_softc *sc) { device_t dev; int q, error; dev = sc->vmx_dev; for (q = 0; q < sc->vmx_ntxqueues; q++) vmxnet3_txinit(sc, &sc->vmx_txq[q]); for (q = 0; q < sc->vmx_nrxqueues; q++) { error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]); if (error) { device_printf(dev, "cannot populate Rx queue %d\n", q); return (error); } } return (0); } static int vmxnet3_enable_device(struct vmxnet3_softc *sc) { int q; if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) { device_printf(sc->vmx_dev, "device enable command failed!\n"); return (1); } /* Reset the Rx queue heads. */ for (q = 0; q < sc->vmx_nrxqueues; q++) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0); vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0); } return (0); } static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc) { struct ifnet *ifp; ifp = sc->vmx_ifp; vmxnet3_set_rxfilter(sc); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); else bzero(sc->vmx_ds->vlan_filter, sizeof(sc->vmx_ds->vlan_filter)); vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER); } static int vmxnet3_reinit(struct vmxnet3_softc *sc) { vmxnet3_reinit_interface(sc); vmxnet3_reinit_shared_data(sc); if (vmxnet3_reinit_queues(sc) != 0) return (ENXIO); if (vmxnet3_enable_device(sc) != 0) return (ENXIO); vmxnet3_reinit_rxfilters(sc); return (0); } static void vmxnet3_init_locked(struct vmxnet3_softc *sc) { struct ifnet *ifp; ifp = sc->vmx_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; vmxnet3_stop(sc); if (vmxnet3_reinit(sc) != 0) { vmxnet3_stop(sc); return; } ifp->if_drv_flags |= IFF_DRV_RUNNING; vmxnet3_link_status(sc); vmxnet3_enable_all_intrs(sc); callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc); } static void vmxnet3_init(void *xsc) { struct vmxnet3_softc *sc; sc = xsc; VMXNET3_CORE_LOCK(sc); vmxnet3_init_locked(sc); VMXNET3_CORE_UNLOCK(sc); } /* * BMV: Much of this can go away once we finally have offsets in * the mbuf packet header. Bug andre@. */ static int vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m, int *etype, int *proto, int *start) { struct ether_vlan_header *evh; int offset; #if defined(INET) struct ip *ip = NULL; struct ip iphdr; #endif #if defined(INET6) struct ip6_hdr *ip6 = NULL; struct ip6_hdr ip6hdr; #endif evh = mtod(m, struct ether_vlan_header *); if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { /* BMV: We should handle nested VLAN tags too. */ *etype = ntohs(evh->evl_proto); offset = sizeof(struct ether_vlan_header); } else { *etype = ntohs(evh->evl_encap_proto); offset = sizeof(struct ether_header); } switch (*etype) { #if defined(INET) case ETHERTYPE_IP: if (__predict_false(m->m_len < offset + sizeof(struct ip))) { m_copydata(m, offset, sizeof(struct ip), (caddr_t) &iphdr); ip = &iphdr; } else ip = mtodo(m, offset); *proto = ip->ip_p; *start = offset + (ip->ip_hl << 2); break; #endif #if defined(INET6) case ETHERTYPE_IPV6: if (__predict_false(m->m_len < offset + sizeof(struct ip6_hdr))) { m_copydata(m, offset, sizeof(struct ip6_hdr), (caddr_t) &ip6hdr); ip6 = &ip6hdr; } else ip6 = mtodo(m, offset); *proto = -1; *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); /* Assert the network stack sent us a valid packet. */ KASSERT(*start > offset, ("%s: mbuf %p start %d offset %d proto %d", __func__, m, *start, offset, *proto)); break; #endif default: return (EINVAL); } if (m->m_pkthdr.csum_flags & CSUM_TSO) { struct tcphdr *tcp, tcphdr; uint16_t sum; if (__predict_false(*proto != IPPROTO_TCP)) { /* Likely failed to correctly parse the mbuf. */ return (EINVAL); } txq->vxtxq_stats.vmtxs_tso++; switch (*etype) { #if defined(INET) case ETHERTYPE_IP: sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); break; #endif #if defined(INET6) case ETHERTYPE_IPV6: sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); break; #endif default: sum = 0; break; } if (m->m_len < *start + sizeof(struct tcphdr)) { m_copyback(m, *start + offsetof(struct tcphdr, th_sum), sizeof(uint16_t), (caddr_t) &sum); m_copydata(m, *start, sizeof(struct tcphdr), (caddr_t) &tcphdr); tcp = &tcphdr; } else { tcp = mtodo(m, *start); tcp->th_sum = sum; } /* * For TSO, the size of the protocol header is also * included in the descriptor header size. */ *start += (tcp->th_off << 2); } else txq->vxtxq_stats.vmtxs_csum++; return (0); } static int vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0, bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs) { struct vmxnet3_txring *txr; struct mbuf *m; bus_dma_tag_t tag; int error; txr = &txq->vxtxq_cmd_ring; m = *m0; tag = txr->vxtxr_txtag; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0); if (error == 0 || error != EFBIG) return (error); m = m_defrag(m, M_NOWAIT); if (m != NULL) { *m0 = m; error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0); } else error = ENOBUFS; if (error) { m_freem(*m0); *m0 = NULL; txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++; } else txq->vxtxq_sc->vmx_stats.vmst_defragged++; return (error); } static void vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap) { struct vmxnet3_txring *txr; txr = &txq->vxtxq_cmd_ring; bus_dmamap_unload(txr->vxtxr_txtag, dmap); } static int vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0) { struct vmxnet3_softc *sc; struct vmxnet3_txring *txr; struct vmxnet3_txdesc *txd, *sop; struct mbuf *m; bus_dmamap_t dmap; bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS]; int i, gen, nsegs, etype, proto, start, error; sc = txq->vxtxq_sc; start = 0; txd = NULL; txr = &txq->vxtxq_cmd_ring; dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap; error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs); if (error) return (error); m = *m0; M_ASSERTPKTHDR(m); KASSERT(nsegs <= VMXNET3_TX_MAXSEGS, ("%s: mbuf %p with too many segments %d", __func__, m, nsegs)); if (VMXNET3_TXRING_AVAIL(txr) < nsegs) { txq->vxtxq_stats.vmtxs_full++; vmxnet3_txq_unload_mbuf(txq, dmap); return (ENOSPC); } else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) { error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start); if (error) { txq->vxtxq_stats.vmtxs_offload_failed++; vmxnet3_txq_unload_mbuf(txq, dmap); m_freem(m); *m0 = NULL; return (error); } } txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m; sop = &txr->vxtxr_txd[txr->vxtxr_head]; gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */ for (i = 0; i < nsegs; i++) { txd = &txr->vxtxr_txd[txr->vxtxr_head]; txd->addr = segs[i].ds_addr; txd->len = segs[i].ds_len; txd->gen = gen; txd->dtype = 0; txd->offload_mode = VMXNET3_OM_NONE; txd->offload_pos = 0; txd->hlen = 0; txd->eop = 0; txd->compreq = 0; txd->vtag_mode = 0; txd->vtag = 0; if (++txr->vxtxr_head == txr->vxtxr_ndesc) { txr->vxtxr_head = 0; txr->vxtxr_gen ^= 1; } gen = txr->vxtxr_gen; } txd->eop = 1; txd->compreq = 1; if (m->m_flags & M_VLANTAG) { sop->vtag_mode = 1; sop->vtag = m->m_pkthdr.ether_vtag; } if (m->m_pkthdr.csum_flags & CSUM_TSO) { sop->offload_mode = VMXNET3_OM_TSO; sop->hlen = start; sop->offload_pos = m->m_pkthdr.tso_segsz; } else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD | VMXNET3_CSUM_OFFLOAD_IPV6)) { sop->offload_mode = VMXNET3_OM_CSUM; sop->hlen = start; sop->offload_pos = start + m->m_pkthdr.csum_data; } /* Finally, change the ownership. */ vmxnet3_barrier(sc, VMXNET3_BARRIER_WR); sop->gen ^= 1; txq->vxtxq_ts->npending += nsegs; if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) { txq->vxtxq_ts->npending = 0; vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), txr->vxtxr_head); } return (0); } #ifdef VMXNET3_LEGACY_TX static void vmxnet3_start_locked(struct ifnet *ifp) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct vmxnet3_txring *txr; struct mbuf *m_head; int tx, avail; sc = ifp->if_softc; txq = &sc->vmx_txq[0]; txr = &txq->vxtxq_cmd_ring; tx = 0; VMXNET3_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vmx_link_active == 0) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2) break; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* Assume worse case if this mbuf is the head of a chain. */ if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) { IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } if (vmxnet3_txq_encap(txq, &m_head) != 0) { if (m_head != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } tx++; ETHER_BPF_MTAP(ifp, m_head); } if (tx > 0) txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT; } static void vmxnet3_start(struct ifnet *ifp) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; sc = ifp->if_softc; txq = &sc->vmx_txq[0]; VMXNET3_TXQ_LOCK(txq); vmxnet3_start_locked(ifp); VMXNET3_TXQ_UNLOCK(txq); } #else /* !VMXNET3_LEGACY_TX */ static int vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m) { struct vmxnet3_softc *sc; struct vmxnet3_txring *txr; struct buf_ring *br; struct ifnet *ifp; int tx, avail, error; sc = txq->vxtxq_sc; br = txq->vxtxq_br; ifp = sc->vmx_ifp; txr = &txq->vxtxq_cmd_ring; tx = 0; error = 0; VMXNET3_TXQ_LOCK_ASSERT(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->vmx_link_active == 0) { if (m != NULL) error = drbr_enqueue(ifp, br, m); return (error); } if (m != NULL) { error = drbr_enqueue(ifp, br, m); if (error) return (error); } while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) { m = drbr_peek(ifp, br); if (m == NULL) break; /* Assume worse case if this mbuf is the head of a chain. */ if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) { drbr_putback(ifp, br, m); break; } if (vmxnet3_txq_encap(txq, &m) != 0) { if (m != NULL) drbr_putback(ifp, br, m); else drbr_advance(ifp, br); break; } drbr_advance(ifp, br); tx++; ETHER_BPF_MTAP(ifp, m); } if (tx > 0) txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT; return (0); } static int vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; int i, ntxq, error; sc = ifp->if_softc; ntxq = sc->vmx_ntxqueues; /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % ntxq; else i = curcpu % ntxq; txq = &sc->vmx_txq[i]; if (VMXNET3_TXQ_TRYLOCK(txq) != 0) { error = vmxnet3_txq_mq_start_locked(txq, m); VMXNET3_TXQ_UNLOCK(txq); } else { error = drbr_enqueue(ifp, txq->vxtxq_br, m); taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask); } return (error); } static void vmxnet3_txq_tq_deferred(void *xtxq, int pending) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; txq = xtxq; sc = txq->vxtxq_sc; VMXNET3_TXQ_LOCK(txq); if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br)) vmxnet3_txq_mq_start_locked(txq, NULL); VMXNET3_TXQ_UNLOCK(txq); } #endif /* VMXNET3_LEGACY_TX */ static void vmxnet3_txq_start(struct vmxnet3_txqueue *txq) { struct vmxnet3_softc *sc; struct ifnet *ifp; sc = txq->vxtxq_sc; ifp = sc->vmx_ifp; #ifdef VMXNET3_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) vmxnet3_start_locked(ifp); #else if (!drbr_empty(ifp, txq->vxtxq_br)) vmxnet3_txq_mq_start_locked(txq, NULL); #endif } static void vmxnet3_tx_start_all(struct vmxnet3_softc *sc) { struct vmxnet3_txqueue *txq; int i; VMXNET3_CORE_LOCK_ASSERT(sc); for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; VMXNET3_TXQ_LOCK(txq); vmxnet3_txq_start(txq); VMXNET3_TXQ_UNLOCK(txq); } } static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag) { struct ifnet *ifp; int idx, bit; ifp = sc->vmx_ifp; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; if (tag == 0 || tag > 4095) return; VMXNET3_CORE_LOCK(sc); /* Update our private VLAN bitvector. */ if (add) sc->vmx_vlan_filter[idx] |= (1 << bit); else sc->vmx_vlan_filter[idx] &= ~(1 << bit); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { if (add) sc->vmx_ds->vlan_filter[idx] |= (1 << bit); else sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit); vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER); } VMXNET3_CORE_UNLOCK(sc); } static void vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc == arg) vmxnet3_update_vlan_filter(arg, 1, tag); } static void vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) { if (ifp->if_softc == arg) vmxnet3_update_vlan_filter(arg, 0, tag); } static void vmxnet3_set_rxfilter(struct vmxnet3_softc *sc) { struct ifnet *ifp; struct vmxnet3_driver_shared *ds; struct ifmultiaddr *ifma; u_int mode; ifp = sc->vmx_ifp; ds = sc->vmx_ds; mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST; if (ifp->if_flags & IFF_PROMISC) mode |= VMXNET3_RXMODE_PROMISC; if (ifp->if_flags & IFF_ALLMULTI) mode |= VMXNET3_RXMODE_ALLMULTI; else { int cnt = 0, overflow = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; else if (cnt == VMXNET3_MULTICAST_MAX) { overflow = 1; break; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN); cnt++; } if_maddr_runlock(ifp); if (overflow != 0) { cnt = 0; mode |= VMXNET3_RXMODE_ALLMULTI; } else if (cnt > 0) mode |= VMXNET3_RXMODE_MCAST; ds->mcast_tablelen = cnt * ETHER_ADDR_LEN; } ds->rxmode = mode; vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER); vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE); } static int vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu) { struct ifnet *ifp; ifp = sc->vmx_ifp; if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU) return (EINVAL); ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } return (0); } static int vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vmxnet3_softc *sc; struct ifreq *ifr; int reinit, mask, error; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFMTU: if (ifp->if_mtu != ifr->ifr_mtu) { VMXNET3_CORE_LOCK(sc); error = vmxnet3_change_mtu(sc, ifr->ifr_mtu); VMXNET3_CORE_UNLOCK(sc); } break; case SIOCSIFFLAGS: VMXNET3_CORE_LOCK(sc); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ sc->vmx_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { vmxnet3_set_rxfilter(sc); } } else vmxnet3_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vmxnet3_stop(sc); } sc->vmx_if_flags = ifp->if_flags; VMXNET3_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: VMXNET3_CORE_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) vmxnet3_set_rxfilter(sc); VMXNET3_CORE_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd); break; case SIOCSIFCAP: VMXNET3_CORE_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) { /* Changing these features requires us to reinit. */ reinit = 1; if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } else reinit = 0; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } VMXNET3_CORE_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, cmd, data); break; } VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc); return (error); } #ifndef VMXNET3_LEGACY_TX static void vmxnet3_qflush(struct ifnet *ifp) { struct vmxnet3_softc *sc; struct vmxnet3_txqueue *txq; struct mbuf *m; int i; sc = ifp->if_softc; for (i = 0; i < sc->vmx_ntxqueues; i++) { txq = &sc->vmx_txq[i]; VMXNET3_TXQ_LOCK(txq); while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL) m_freem(m); VMXNET3_TXQ_UNLOCK(txq); } if_qflush(ifp); } #endif static int vmxnet3_watchdog(struct vmxnet3_txqueue *txq) { struct vmxnet3_softc *sc; sc = txq->vxtxq_sc; VMXNET3_TXQ_LOCK(txq); if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) { VMXNET3_TXQ_UNLOCK(txq); return (0); } VMXNET3_TXQ_UNLOCK(txq); if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n", txq->vxtxq_id); return (1); } static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc) { vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS); } static uint64_t vmxnet3_get_counter(struct ifnet *ifp, ift_counter cnt) { struct vmxnet3_softc *sc; uint64_t rv; sc = if_getsoftc(ifp); rv = 0; /* * With the exception of if_ierrors, these ifnet statistics are * only updated in the driver, so just set them to our accumulated * values. if_ierrors is updated in ether_input() for malformed * frames that we should have already discarded. */ switch (cnt) { case IFCOUNTER_IPACKETS: for (int i = 0; i < sc->vmx_nrxqueues; i++) rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ipackets; return (rv); case IFCOUNTER_IQDROPS: for (int i = 0; i < sc->vmx_nrxqueues; i++) rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_iqdrops; return (rv); case IFCOUNTER_IERRORS: for (int i = 0; i < sc->vmx_nrxqueues; i++) rv += sc->vmx_rxq[i].vxrxq_stats.vmrxs_ierrors; return (rv); case IFCOUNTER_OPACKETS: for (int i = 0; i < sc->vmx_ntxqueues; i++) rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_opackets; return (rv); #ifndef VMXNET3_LEGACY_TX case IFCOUNTER_OBYTES: for (int i = 0; i < sc->vmx_ntxqueues; i++) rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_obytes; return (rv); case IFCOUNTER_OMCASTS: for (int i = 0; i < sc->vmx_ntxqueues; i++) rv += sc->vmx_txq[i].vxtxq_stats.vmtxs_omcasts; return (rv); #endif default: return (if_get_counter_default(ifp, cnt)); } } static void vmxnet3_tick(void *xsc) { struct vmxnet3_softc *sc; struct ifnet *ifp; int i, timedout; sc = xsc; ifp = sc->vmx_ifp; timedout = 0; VMXNET3_CORE_LOCK_ASSERT(sc); vmxnet3_refresh_host_stats(sc); for (i = 0; i < sc->vmx_ntxqueues; i++) timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]); if (timedout != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vmxnet3_init_locked(sc); } else callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc); } static int vmxnet3_link_is_up(struct vmxnet3_softc *sc) { uint32_t status; /* Also update the link speed while here. */ status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK); sc->vmx_link_speed = status >> 16; return !!(status & 0x1); } static void vmxnet3_link_status(struct vmxnet3_softc *sc) { struct ifnet *ifp; int link; ifp = sc->vmx_ifp; link = vmxnet3_link_is_up(sc); if (link != 0 && sc->vmx_link_active == 0) { sc->vmx_link_active = 1; if_link_state_change(ifp, LINK_STATE_UP); } else if (link == 0 && sc->vmx_link_active != 0) { sc->vmx_link_active = 0; if_link_state_change(ifp, LINK_STATE_DOWN); } } static void vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vmxnet3_softc *sc; sc = ifp->if_softc; ifmr->ifm_active = IFM_ETHER | IFM_AUTO; ifmr->ifm_status = IFM_AVALID; VMXNET3_CORE_LOCK(sc); if (vmxnet3_link_is_up(sc) != 0) ifmr->ifm_status |= IFM_ACTIVE; else ifmr->ifm_status |= IFM_NONE; VMXNET3_CORE_UNLOCK(sc); } static int vmxnet3_media_change(struct ifnet *ifp) { /* Ignore. */ return (0); } static void vmxnet3_set_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = sc->vmx_lladdr[0]; ml |= sc->vmx_lladdr[1] << 8; ml |= sc->vmx_lladdr[2] << 16; ml |= sc->vmx_lladdr[3] << 24; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml); mh = sc->vmx_lladdr[4]; mh |= sc->vmx_lladdr[5] << 8; vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh); } static void vmxnet3_get_lladdr(struct vmxnet3_softc *sc) { uint32_t ml, mh; ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL); mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH); sc->vmx_lladdr[0] = ml; sc->vmx_lladdr[1] = ml >> 8; sc->vmx_lladdr[2] = ml >> 16; sc->vmx_lladdr[3] = ml >> 24; sc->vmx_lladdr[4] = mh; sc->vmx_lladdr[5] = mh >> 8; } static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *txsnode; struct sysctl_oid_list *list, *txslist; struct vmxnet3_txq_stats *stats; struct UPT1_TxStats *txstats; char namebuf[16]; stats = &txq->vxtxq_stats; txstats = &txq->vxtxq_ts->stats; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Transmit Queue"); txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, &stats->vmtxs_opackets, "Transmit packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, &stats->vmtxs_obytes, "Transmit bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, &stats->vmtxs_omcasts, "Transmit multicasts"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vmtxs_csum, "Transmit checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, &stats->vmtxs_tso, "Transmit TCP segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD, &stats->vmtxs_full, "Transmit ring full"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD, &stats->vmtxs_offload_failed, "Transmit checksum offload failed"); /* * Add statistics reported by the host. These are updated once * per second. */ txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD, NULL, "Host Statistics"); txslist = SYSCTL_CHILDREN(txsnode); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD, &txstats->TSO_packets, "TSO packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD, &txstats->TSO_bytes, "TSO bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &txstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &txstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &txstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &txstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD, &txstats->error, "Errors"); SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD, &txstats->discard, "Discards"); } static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node, *rxsnode; struct sysctl_oid_list *list, *rxslist; struct vmxnet3_rxq_stats *stats; struct UPT1_RxStats *rxstats; char namebuf[16]; stats = &rxq->vxrxq_stats; rxstats = &rxq->vxrxq_rs->stats; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Receive Queue"); rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, &stats->vmrxs_ipackets, "Receive packets"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, &stats->vmrxs_ibytes, "Receive bytes"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, &stats->vmrxs_iqdrops, "Receive drops"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, &stats->vmrxs_ierrors, "Receive errors"); /* * Add statistics reported by the host. These are updated once * per second. */ rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD, NULL, "Host Statistics"); rxslist = SYSCTL_CHILDREN(rxsnode); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD, &rxstats->LRO_packets, "LRO packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD, &rxstats->LRO_bytes, "LRO bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD, &rxstats->ucast_packets, "Unicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD, &rxstats->ucast_bytes, "Unicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD, &rxstats->mcast_packets, "Multicast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD, &rxstats->mcast_bytes, "Multicast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD, &rxstats->bcast_packets, "Broadcast packets"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD, &rxstats->bcast_bytes, "Broadcast bytes"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD, &rxstats->nobuffer, "No buffer"); SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD, &rxstats->error, "Errors"); } static void vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { struct sysctl_oid *node; struct sysctl_oid_list *list; int i; for (i = 0; i < sc->vmx_ntxqueues; i++) { struct vmxnet3_txqueue *txq = &sc->vmx_txq[i]; node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_head, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD, &txq->vxtxq_cmd_ring.vxtxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &txq->vxtxq_comp_ring.vxcr_gen, 0, ""); } for (i = 0; i < sc->vmx_nrxqueues; i++) { struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i]; node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO, "debug", CTLFLAG_RD, NULL, ""); list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, ""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD, &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_next, 0, ""); SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,""); SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD, &rxq->vxrxq_comp_ring.vxcr_gen, 0, ""); } } static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child) { int i; for (i = 0; i < sc->vmx_ntxqueues; i++) vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child); for (i = 0; i < sc->vmx_nrxqueues; i++) vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child); vmxnet3_setup_debug_sysctl(sc, ctx, child); } static void vmxnet3_setup_sysctl(struct vmxnet3_softc *sc) { device_t dev; struct vmxnet3_statistics *stats; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = sc->vmx_dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD, &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD, &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD, &sc->vmx_ntxqueues, 0, "Number of Tx queues"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD, &sc->vmx_nrxqueues, 0, "Number of Rx queues"); stats = &sc->vmx_stats; SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD, &stats->vmst_defragged, 0, "Tx mbuf chains defragged"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD, &stats->vmst_defrag_failed, 0, "Tx mbuf dropped because defrag failed"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD, &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD, &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed"); vmxnet3_setup_queue_sysctl(sc, ctx, child); } static void vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v); } static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r) { return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r)); } static void vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v) { bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v); } static void vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd); } static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd) { vmxnet3_write_cmd(sc, cmd); bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD)); } static void vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0); } static void vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq) { vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1); } static void vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc) { int i; sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < sc->vmx_nintrs; i++) vmxnet3_enable_intr(sc, i); } static void vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc) { int i; sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL; for (i = 0; i < sc->vmx_nintrs; i++) vmxnet3_disable_intr(sc, i); } static void vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *baddr = arg; if (error == 0) *baddr = segs->ds_addr; } static int vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align, struct vmxnet3_dma_alloc *dma) { device_t dev; int error; dev = sc->vmx_dev; bzero(dma, sizeof(struct vmxnet3_dma_alloc)); error = bus_dma_tag_create(bus_get_dma_tag(dev), align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &dma->dma_tag); if (error) { device_printf(dev, "bus_dma_tag_create failed: %d\n", error); goto fail; } error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map); if (error) { device_printf(dev, "bus_dmamem_alloc failed: %d\n", error); goto fail; } error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "bus_dmamap_load failed: %d\n", error); goto fail; } dma->dma_size = size; fail: if (error) vmxnet3_dma_free(sc, dma); return (error); } static void vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma) { if (dma->dma_tag != NULL) { if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); } bus_dma_tag_destroy(dma->dma_tag); } bzero(dma, sizeof(struct vmxnet3_dma_alloc)); } static int vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def) { char path[64]; snprintf(path, sizeof(path), "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob); TUNABLE_INT_FETCH(path, &def); return (def); } /* * Since this is a purely paravirtualized device, we do not have * to worry about DMA coherency. But at times, we must make sure * both the compiler and CPU do not reorder memory operations. */ static inline void vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type) { switch (type) { case VMXNET3_BARRIER_RD: rmb(); break; case VMXNET3_BARRIER_WR: wmb(); break; case VMXNET3_BARRIER_RDWR: mb(); break; default: panic("%s: bad barrier type %d", __func__, type); } } Index: projects/powernv/dev/wpi/if_wpi.c =================================================================== --- projects/powernv/dev/wpi/if_wpi.c (revision 290990) +++ projects/powernv/dev/wpi/if_wpi.c (revision 290991) @@ -1,5644 +1,5644 @@ /*- * Copyright (c) 2006,2007 * Damien Bergamini * Benjamin Close * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for Intel PRO/Wireless 3945ABG 802.11 network adapters. * * The 3945ABG network adapter doesn't use traditional hardware as * many other adaptors do. Instead at run time the eeprom is set into a known * state and told to load boot firmware. The boot firmware loads an init and a * main binary firmware image into SRAM on the card via DMA. * Once the firmware is loaded, the driver/hw then * communicate by way of circular dma rings via the SRAM to the firmware. * * There is 6 memory rings. 1 command ring, 1 rx data ring & 4 tx data rings. * The 4 tx data rings allow for prioritization QoS. * * The rx data ring consists of 32 dma buffers. Two registers are used to * indicate where in the ring the driver and the firmware are up to. The * driver sets the initial read index (reg1) and the initial write index (reg2), * the firmware updates the read index (reg1) on rx of a packet and fires an * interrupt. The driver then processes the buffers starting at reg1 indicating * to the firmware which buffers have been accessed by updating reg2. At the * same time allocating new memory for the processed buffer. * * A similar thing happens with the tx rings. The difference is the firmware * stop processing buffers once the queue is full and until confirmation * of a successful transmition (tx_done) has occurred. * * The command ring operates in the same manner as the tx queues. * * All communication direct to the card (ie eeprom) is classed as Stage1 * communication * * All communication via the firmware to the card is classed as State2. * The firmware consists of 2 parts. A bootstrap firmware and a runtime * firmware. The bootstrap firmware and runtime firmware are loaded * from host memory via dma to the card then told to execute. From this point * on the majority of communications between the driver and the card goes * via the firmware. */ #include "opt_wlan.h" #include "opt_wpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct wpi_ident { uint16_t vendor; uint16_t device; uint16_t subdevice; const char *name; }; static const struct wpi_ident wpi_ident_table[] = { /* The below entries support ABG regardless of the subid */ { 0x8086, 0x4222, 0x0, "Intel(R) PRO/Wireless 3945ABG" }, { 0x8086, 0x4227, 0x0, "Intel(R) PRO/Wireless 3945ABG" }, /* The below entries only support BG */ { 0x8086, 0x4222, 0x1005, "Intel(R) PRO/Wireless 3945BG" }, { 0x8086, 0x4222, 0x1034, "Intel(R) PRO/Wireless 3945BG" }, { 0x8086, 0x4227, 0x1014, "Intel(R) PRO/Wireless 3945BG" }, { 0x8086, 0x4222, 0x1044, "Intel(R) PRO/Wireless 3945BG" }, { 0, 0, 0, NULL } }; static int wpi_probe(device_t); static int wpi_attach(device_t); static void wpi_radiotap_attach(struct wpi_softc *); static void wpi_sysctlattach(struct wpi_softc *); static void wpi_init_beacon(struct wpi_vap *); static struct ieee80211vap *wpi_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void wpi_vap_delete(struct ieee80211vap *); static int wpi_detach(device_t); static int wpi_shutdown(device_t); static int wpi_suspend(device_t); static int wpi_resume(device_t); static int wpi_nic_lock(struct wpi_softc *); static int wpi_read_prom_data(struct wpi_softc *, uint32_t, void *, int); static void wpi_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int wpi_dma_contig_alloc(struct wpi_softc *, struct wpi_dma_info *, void **, bus_size_t, bus_size_t); static void wpi_dma_contig_free(struct wpi_dma_info *); static int wpi_alloc_shared(struct wpi_softc *); static void wpi_free_shared(struct wpi_softc *); static int wpi_alloc_fwmem(struct wpi_softc *); static void wpi_free_fwmem(struct wpi_softc *); static int wpi_alloc_rx_ring(struct wpi_softc *); static void wpi_update_rx_ring(struct wpi_softc *); static void wpi_update_rx_ring_ps(struct wpi_softc *); static void wpi_reset_rx_ring(struct wpi_softc *); static void wpi_free_rx_ring(struct wpi_softc *); static int wpi_alloc_tx_ring(struct wpi_softc *, struct wpi_tx_ring *, uint8_t); static void wpi_update_tx_ring(struct wpi_softc *, struct wpi_tx_ring *); static void wpi_update_tx_ring_ps(struct wpi_softc *, struct wpi_tx_ring *); static void wpi_reset_tx_ring(struct wpi_softc *, struct wpi_tx_ring *); static void wpi_free_tx_ring(struct wpi_softc *, struct wpi_tx_ring *); static int wpi_read_eeprom(struct wpi_softc *, uint8_t macaddr[IEEE80211_ADDR_LEN]); static uint32_t wpi_eeprom_channel_flags(struct wpi_eeprom_chan *); static void wpi_read_eeprom_band(struct wpi_softc *, uint8_t); static int wpi_read_eeprom_channels(struct wpi_softc *, uint8_t); static struct wpi_eeprom_chan *wpi_find_eeprom_channel(struct wpi_softc *, struct ieee80211_channel *); static int wpi_setregdomain(struct ieee80211com *, struct ieee80211_regdomain *, int, struct ieee80211_channel[]); static int wpi_read_eeprom_group(struct wpi_softc *, uint8_t); static struct ieee80211_node *wpi_node_alloc(struct ieee80211vap *, const uint8_t mac[IEEE80211_ADDR_LEN]); static void wpi_node_free(struct ieee80211_node *); static void wpi_ibss_recv_mgmt(struct ieee80211_node *, struct mbuf *, int, const struct ieee80211_rx_stats *, int, int); static void wpi_restore_node(void *, struct ieee80211_node *); static void wpi_restore_node_table(struct wpi_softc *, struct wpi_vap *); static int wpi_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void wpi_calib_timeout(void *); static void wpi_rx_done(struct wpi_softc *, struct wpi_rx_desc *, struct wpi_rx_data *); static void wpi_rx_statistics(struct wpi_softc *, struct wpi_rx_desc *, struct wpi_rx_data *); static void wpi_tx_done(struct wpi_softc *, struct wpi_rx_desc *); static void wpi_cmd_done(struct wpi_softc *, struct wpi_rx_desc *); static void wpi_notif_intr(struct wpi_softc *); static void wpi_wakeup_intr(struct wpi_softc *); #ifdef WPI_DEBUG static void wpi_debug_registers(struct wpi_softc *); #endif static void wpi_fatal_intr(struct wpi_softc *); static void wpi_intr(void *); static void wpi_free_txfrags(struct wpi_softc *, uint16_t); static int wpi_cmd2(struct wpi_softc *, struct wpi_buf *); static int wpi_tx_data(struct wpi_softc *, struct mbuf *, struct ieee80211_node *); static int wpi_tx_data_raw(struct wpi_softc *, struct mbuf *, struct ieee80211_node *, const struct ieee80211_bpf_params *); static int wpi_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static int wpi_transmit(struct ieee80211com *, struct mbuf *); static void wpi_watchdog_rfkill(void *); static void wpi_scan_timeout(void *); static void wpi_tx_timeout(void *); static void wpi_parent(struct ieee80211com *); static int wpi_cmd(struct wpi_softc *, uint8_t, const void *, uint16_t, int); static int wpi_mrr_setup(struct wpi_softc *); static int wpi_add_node(struct wpi_softc *, struct ieee80211_node *); static int wpi_add_broadcast_node(struct wpi_softc *, int); static int wpi_add_ibss_node(struct wpi_softc *, struct ieee80211_node *); static void wpi_del_node(struct wpi_softc *, struct ieee80211_node *); static int wpi_updateedca(struct ieee80211com *); static void wpi_set_promisc(struct wpi_softc *); static void wpi_update_promisc(struct ieee80211com *); static void wpi_update_mcast(struct ieee80211com *); static void wpi_set_led(struct wpi_softc *, uint8_t, uint8_t, uint8_t); static int wpi_set_timing(struct wpi_softc *, struct ieee80211_node *); static void wpi_power_calibration(struct wpi_softc *); static int wpi_set_txpower(struct wpi_softc *, int); static int wpi_get_power_index(struct wpi_softc *, struct wpi_power_group *, uint8_t, int, int); static int wpi_set_pslevel(struct wpi_softc *, uint8_t, int, int); static int wpi_send_btcoex(struct wpi_softc *); static int wpi_send_rxon(struct wpi_softc *, int, int); static int wpi_config(struct wpi_softc *); static uint16_t wpi_get_active_dwell_time(struct wpi_softc *, struct ieee80211_channel *, uint8_t); static uint16_t wpi_limit_dwell(struct wpi_softc *, uint16_t); static uint16_t wpi_get_passive_dwell_time(struct wpi_softc *, struct ieee80211_channel *); static uint32_t wpi_get_scan_pause_time(uint32_t, uint16_t); static int wpi_scan(struct wpi_softc *, struct ieee80211_channel *); static int wpi_auth(struct wpi_softc *, struct ieee80211vap *); static int wpi_config_beacon(struct wpi_vap *); static int wpi_setup_beacon(struct wpi_softc *, struct ieee80211_node *); static void wpi_update_beacon(struct ieee80211vap *, int); static void wpi_newassoc(struct ieee80211_node *, int); static int wpi_run(struct wpi_softc *, struct ieee80211vap *); static int wpi_load_key(struct ieee80211_node *, const struct ieee80211_key *); static void wpi_load_key_cb(void *, struct ieee80211_node *); static int wpi_set_global_keys(struct ieee80211_node *); static int wpi_del_key(struct ieee80211_node *, const struct ieee80211_key *); static void wpi_del_key_cb(void *, struct ieee80211_node *); static int wpi_process_key(struct ieee80211vap *, const struct ieee80211_key *, int); static int wpi_key_set(struct ieee80211vap *, const struct ieee80211_key *); static int wpi_key_delete(struct ieee80211vap *, const struct ieee80211_key *); static int wpi_post_alive(struct wpi_softc *); static int wpi_load_bootcode(struct wpi_softc *, const uint8_t *, uint32_t); static int wpi_load_firmware(struct wpi_softc *); static int wpi_read_firmware(struct wpi_softc *); static void wpi_unload_firmware(struct wpi_softc *); static int wpi_clock_wait(struct wpi_softc *); static int wpi_apm_init(struct wpi_softc *); static void wpi_apm_stop_master(struct wpi_softc *); static void wpi_apm_stop(struct wpi_softc *); static void wpi_nic_config(struct wpi_softc *); static int wpi_hw_init(struct wpi_softc *); static void wpi_hw_stop(struct wpi_softc *); static void wpi_radio_on(void *, int); static void wpi_radio_off(void *, int); static int wpi_init(struct wpi_softc *); static void wpi_stop_locked(struct wpi_softc *); static void wpi_stop(struct wpi_softc *); static void wpi_scan_start(struct ieee80211com *); static void wpi_scan_end(struct ieee80211com *); static void wpi_set_channel(struct ieee80211com *); static void wpi_scan_curchan(struct ieee80211_scan_state *, unsigned long); static void wpi_scan_mindwell(struct ieee80211_scan_state *); static void wpi_hw_reset(void *, int); static device_method_t wpi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, wpi_probe), DEVMETHOD(device_attach, wpi_attach), DEVMETHOD(device_detach, wpi_detach), DEVMETHOD(device_shutdown, wpi_shutdown), DEVMETHOD(device_suspend, wpi_suspend), DEVMETHOD(device_resume, wpi_resume), DEVMETHOD_END }; static driver_t wpi_driver = { "wpi", wpi_methods, sizeof (struct wpi_softc) }; static devclass_t wpi_devclass; DRIVER_MODULE(wpi, pci, wpi_driver, wpi_devclass, NULL, NULL); MODULE_VERSION(wpi, 1); MODULE_DEPEND(wpi, pci, 1, 1, 1); MODULE_DEPEND(wpi, wlan, 1, 1, 1); MODULE_DEPEND(wpi, firmware, 1, 1, 1); static int wpi_probe(device_t dev) { const struct wpi_ident *ident; for (ident = wpi_ident_table; ident->name != NULL; ident++) { if (pci_get_vendor(dev) == ident->vendor && pci_get_device(dev) == ident->device) { device_set_desc(dev, ident->name); return (BUS_PROBE_DEFAULT); } } return ENXIO; } static int wpi_attach(device_t dev) { struct wpi_softc *sc = (struct wpi_softc *)device_get_softc(dev); struct ieee80211com *ic; uint8_t i; int error, rid; #ifdef WPI_DEBUG int supportsa = 1; const struct wpi_ident *ident; #endif sc->sc_dev = dev; #ifdef WPI_DEBUG error = resource_int_value(device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev), "debug", &(sc->sc_debug)); if (error != 0) sc->sc_debug = 0; #else sc->sc_debug = 0; #endif DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* * Get the offset of the PCI Express Capability Structure in PCI * Configuration Space. */ error = pci_find_cap(dev, PCIY_EXPRESS, &sc->sc_cap_off); if (error != 0) { device_printf(dev, "PCIe capability structure not found!\n"); return error; } /* * Some card's only support 802.11b/g not a, check to see if * this is one such card. A 0x0 in the subdevice table indicates * the entire subdevice range is to be ignored. */ #ifdef WPI_DEBUG for (ident = wpi_ident_table; ident->name != NULL; ident++) { if (ident->subdevice && pci_get_subdevice(dev) == ident->subdevice) { supportsa = 0; break; } } #endif /* Clear device-specific "PCI retry timeout" register (41h). */ pci_write_config(dev, 0x41, 0, 1); /* Enable bus-mastering. */ pci_enable_busmaster(dev); rid = PCIR_BAR(0); sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem == NULL) { device_printf(dev, "can't map mem space\n"); return ENOMEM; } sc->sc_st = rman_get_bustag(sc->mem); sc->sc_sh = rman_get_bushandle(sc->mem); rid = 1; if (pci_alloc_msi(dev, &rid) == 0) rid = 1; else rid = 0; /* Install interrupt handler. */ sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->irq == NULL) { device_printf(dev, "can't map interrupt\n"); error = ENOMEM; goto fail; } WPI_LOCK_INIT(sc); WPI_TX_LOCK_INIT(sc); WPI_RXON_LOCK_INIT(sc); WPI_NT_LOCK_INIT(sc); WPI_TXQ_LOCK_INIT(sc); WPI_TXQ_STATE_LOCK_INIT(sc); /* Allocate DMA memory for firmware transfers. */ if ((error = wpi_alloc_fwmem(sc)) != 0) { device_printf(dev, "could not allocate memory for firmware, error %d\n", error); goto fail; } /* Allocate shared page. */ if ((error = wpi_alloc_shared(sc)) != 0) { device_printf(dev, "could not allocate shared page\n"); goto fail; } /* Allocate TX rings - 4 for QoS purposes, 1 for commands. */ for (i = 0; i < WPI_DRV_NTXQUEUES; i++) { if ((error = wpi_alloc_tx_ring(sc, &sc->txq[i], i)) != 0) { device_printf(dev, "could not allocate TX ring %d, error %d\n", i, error); goto fail; } } /* Allocate RX ring. */ if ((error = wpi_alloc_rx_ring(sc)) != 0) { device_printf(dev, "could not allocate RX ring, error %d\n", error); goto fail; } /* Clear pending interrupts. */ WPI_WRITE(sc, WPI_INT, 0xffffffff); ic = &sc->sc_ic; ic->ic_softc = sc; ic->ic_name = device_get_nameunit(dev); ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ ic->ic_opmode = IEEE80211_M_STA; /* default to BSS mode */ /* Set device capabilities. */ ic->ic_caps = IEEE80211_C_STA /* station mode supported */ | IEEE80211_C_IBSS /* IBSS mode supported */ | IEEE80211_C_HOSTAP /* Host access point mode */ | IEEE80211_C_MONITOR /* monitor mode supported */ | IEEE80211_C_AHDEMO /* adhoc demo mode */ | IEEE80211_C_BGSCAN /* capable of bg scanning */ | IEEE80211_C_TXFRAG /* handle tx frags */ | IEEE80211_C_TXPMGT /* tx power management */ | IEEE80211_C_SHSLOT /* short slot time supported */ | IEEE80211_C_WPA /* 802.11i */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_WME /* 802.11e */ | IEEE80211_C_PMGT /* Station-side power mgmt */ ; ic->ic_cryptocaps = IEEE80211_CRYPTO_AES_CCM; /* * Read in the eeprom and also setup the channels for * net80211. We don't set the rates as net80211 does this for us */ if ((error = wpi_read_eeprom(sc, ic->ic_macaddr)) != 0) { device_printf(dev, "could not read EEPROM, error %d\n", error); goto fail; } #ifdef WPI_DEBUG if (bootverbose) { device_printf(sc->sc_dev, "Regulatory Domain: %.4s\n", sc->domain); device_printf(sc->sc_dev, "Hardware Type: %c\n", sc->type > 1 ? 'B': '?'); device_printf(sc->sc_dev, "Hardware Revision: %c\n", ((sc->rev & 0xf0) == 0xd0) ? 'D': '?'); device_printf(sc->sc_dev, "SKU %s support 802.11a\n", supportsa ? "does" : "does not"); /* XXX hw_config uses the PCIDEV for the Hardware rev. Must check what sc->rev really represents - benjsc 20070615 */ } #endif ieee80211_ifattach(ic); ic->ic_vap_create = wpi_vap_create; ic->ic_vap_delete = wpi_vap_delete; ic->ic_parent = wpi_parent; ic->ic_raw_xmit = wpi_raw_xmit; ic->ic_transmit = wpi_transmit; ic->ic_node_alloc = wpi_node_alloc; sc->sc_node_free = ic->ic_node_free; ic->ic_node_free = wpi_node_free; ic->ic_wme.wme_update = wpi_updateedca; ic->ic_update_promisc = wpi_update_promisc; ic->ic_update_mcast = wpi_update_mcast; ic->ic_newassoc = wpi_newassoc; ic->ic_scan_start = wpi_scan_start; ic->ic_scan_end = wpi_scan_end; ic->ic_set_channel = wpi_set_channel; ic->ic_scan_curchan = wpi_scan_curchan; ic->ic_scan_mindwell = wpi_scan_mindwell; ic->ic_setregdomain = wpi_setregdomain; sc->sc_update_rx_ring = wpi_update_rx_ring; sc->sc_update_tx_ring = wpi_update_tx_ring; wpi_radiotap_attach(sc); callout_init_mtx(&sc->calib_to, &sc->rxon_mtx, 0); callout_init_mtx(&sc->scan_timeout, &sc->rxon_mtx, 0); callout_init_mtx(&sc->tx_timeout, &sc->txq_state_mtx, 0); callout_init_mtx(&sc->watchdog_rfkill, &sc->sc_mtx, 0); TASK_INIT(&sc->sc_reinittask, 0, wpi_hw_reset, sc); TASK_INIT(&sc->sc_radiooff_task, 0, wpi_radio_off, sc); TASK_INIT(&sc->sc_radioon_task, 0, wpi_radio_on, sc); sc->sc_tq = taskqueue_create("wpi_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->sc_tq); error = taskqueue_start_threads(&sc->sc_tq, 1, 0, "wpi_taskq"); if (error != 0) { device_printf(dev, "can't start threads, error %d\n", error); goto fail; } wpi_sysctlattach(sc); /* * Hook our interrupt after all initialization is complete. */ error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, wpi_intr, sc, &sc->sc_ih); if (error != 0) { device_printf(dev, "can't establish interrupt, error %d\n", error); goto fail; } if (bootverbose) ieee80211_announce(ic); #ifdef WPI_DEBUG if (sc->sc_debug & WPI_DEBUG_HW) ieee80211_announce_channels(ic); #endif DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; fail: wpi_detach(dev); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } /* * Attach the interface to 802.11 radiotap. */ static void wpi_radiotap_attach(struct wpi_softc *sc) { struct wpi_rx_radiotap_header *rxtap = &sc->sc_rxtap; struct wpi_tx_radiotap_header *txtap = &sc->sc_txtap; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); ieee80211_radiotap_attach(&sc->sc_ic, &txtap->wt_ihdr, sizeof(*txtap), WPI_TX_RADIOTAP_PRESENT, &rxtap->wr_ihdr, sizeof(*rxtap), WPI_RX_RADIOTAP_PRESENT); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); } static void wpi_sysctlattach(struct wpi_softc *sc) { #ifdef WPI_DEBUG struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "debug", CTLFLAG_RW, &sc->sc_debug, sc->sc_debug, "control debugging printfs"); #endif } static void wpi_init_beacon(struct wpi_vap *wvp) { struct wpi_buf *bcn = &wvp->wv_bcbuf; struct wpi_cmd_beacon *cmd = (struct wpi_cmd_beacon *)&bcn->data; cmd->id = WPI_ID_BROADCAST; cmd->ofdm_mask = 0xff; cmd->cck_mask = 0x0f; cmd->lifetime = htole32(WPI_LIFETIME_INFINITE); /* * XXX WPI_TX_AUTO_SEQ seems to be ignored - workaround this issue * XXX by using WPI_TX_NEED_ACK instead (with some side effects). */ cmd->flags = htole32(WPI_TX_NEED_ACK | WPI_TX_INSERT_TSTAMP); bcn->code = WPI_CMD_SET_BEACON; bcn->ac = WPI_CMD_QUEUE_NUM; bcn->size = sizeof(struct wpi_cmd_beacon); } static struct ieee80211vap * wpi_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct wpi_vap *wvp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; wvp = malloc(sizeof(struct wpi_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &wvp->wv_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); if (opmode == IEEE80211_M_IBSS || opmode == IEEE80211_M_HOSTAP) { WPI_VAP_LOCK_INIT(wvp); wpi_init_beacon(wvp); } /* Override with driver methods. */ vap->iv_key_set = wpi_key_set; vap->iv_key_delete = wpi_key_delete; if (opmode == IEEE80211_M_IBSS) { wvp->wv_recv_mgmt = vap->iv_recv_mgmt; vap->iv_recv_mgmt = wpi_ibss_recv_mgmt; } wvp->wv_newstate = vap->iv_newstate; vap->iv_newstate = wpi_newstate; vap->iv_update_beacon = wpi_update_beacon; vap->iv_max_aid = WPI_ID_IBSS_MAX - WPI_ID_IBSS_MIN + 1; ieee80211_ratectl_init(vap); /* Complete setup. */ ieee80211_vap_attach(vap, ieee80211_media_change, ieee80211_media_status, mac); ic->ic_opmode = opmode; return vap; } static void wpi_vap_delete(struct ieee80211vap *vap) { struct wpi_vap *wvp = WPI_VAP(vap); struct wpi_buf *bcn = &wvp->wv_bcbuf; enum ieee80211_opmode opmode = vap->iv_opmode; ieee80211_ratectl_deinit(vap); ieee80211_vap_detach(vap); if (opmode == IEEE80211_M_IBSS || opmode == IEEE80211_M_HOSTAP) { if (bcn->m != NULL) m_freem(bcn->m); WPI_VAP_LOCK_DESTROY(wvp); } free(wvp, M_80211_VAP); } static int wpi_detach(device_t dev) { struct wpi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; uint8_t qid; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); if (ic->ic_vap_create == wpi_vap_create) { ieee80211_draintask(ic, &sc->sc_radioon_task); wpi_stop(sc); if (sc->sc_tq != NULL) { taskqueue_drain_all(sc->sc_tq); taskqueue_free(sc->sc_tq); } callout_drain(&sc->watchdog_rfkill); callout_drain(&sc->tx_timeout); callout_drain(&sc->scan_timeout); callout_drain(&sc->calib_to); ieee80211_ifdetach(ic); } /* Uninstall interrupt handler. */ if (sc->irq != NULL) { bus_teardown_intr(dev, sc->irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq), sc->irq); pci_release_msi(dev); } if (sc->txq[0].data_dmat) { /* Free DMA resources. */ for (qid = 0; qid < WPI_DRV_NTXQUEUES; qid++) wpi_free_tx_ring(sc, &sc->txq[qid]); wpi_free_rx_ring(sc); wpi_free_shared(sc); } if (sc->fw_dma.tag) wpi_free_fwmem(sc); if (sc->mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem), sc->mem); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); WPI_TXQ_STATE_LOCK_DESTROY(sc); WPI_TXQ_LOCK_DESTROY(sc); WPI_NT_LOCK_DESTROY(sc); WPI_RXON_LOCK_DESTROY(sc); WPI_TX_LOCK_DESTROY(sc); WPI_LOCK_DESTROY(sc); return 0; } static int wpi_shutdown(device_t dev) { struct wpi_softc *sc = device_get_softc(dev); wpi_stop(sc); return 0; } static int wpi_suspend(device_t dev) { struct wpi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; ieee80211_suspend_all(ic); return 0; } static int wpi_resume(device_t dev) { struct wpi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; /* Clear device-specific "PCI retry timeout" register (41h). */ pci_write_config(dev, 0x41, 0, 1); ieee80211_resume_all(ic); return 0; } /* * Grab exclusive access to NIC memory. */ static int wpi_nic_lock(struct wpi_softc *sc) { int ntries; /* Request exclusive access to NIC. */ WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ); /* Spin until we actually get the lock. */ for (ntries = 0; ntries < 1000; ntries++) { if ((WPI_READ(sc, WPI_GP_CNTRL) & (WPI_GP_CNTRL_MAC_ACCESS_ENA | WPI_GP_CNTRL_SLEEP)) == WPI_GP_CNTRL_MAC_ACCESS_ENA) return 0; DELAY(10); } device_printf(sc->sc_dev, "could not lock memory\n"); return ETIMEDOUT; } /* * Release lock on NIC memory. */ static __inline void wpi_nic_unlock(struct wpi_softc *sc) { WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ); } static __inline uint32_t wpi_prph_read(struct wpi_softc *sc, uint32_t addr) { WPI_WRITE(sc, WPI_PRPH_RADDR, WPI_PRPH_DWORD | addr); WPI_BARRIER_READ_WRITE(sc); return WPI_READ(sc, WPI_PRPH_RDATA); } static __inline void wpi_prph_write(struct wpi_softc *sc, uint32_t addr, uint32_t data) { WPI_WRITE(sc, WPI_PRPH_WADDR, WPI_PRPH_DWORD | addr); WPI_BARRIER_WRITE(sc); WPI_WRITE(sc, WPI_PRPH_WDATA, data); } static __inline void wpi_prph_setbits(struct wpi_softc *sc, uint32_t addr, uint32_t mask) { wpi_prph_write(sc, addr, wpi_prph_read(sc, addr) | mask); } static __inline void wpi_prph_clrbits(struct wpi_softc *sc, uint32_t addr, uint32_t mask) { wpi_prph_write(sc, addr, wpi_prph_read(sc, addr) & ~mask); } static __inline void wpi_prph_write_region_4(struct wpi_softc *sc, uint32_t addr, const uint32_t *data, uint32_t count) { for (; count != 0; count--, data++, addr += 4) wpi_prph_write(sc, addr, *data); } static __inline uint32_t wpi_mem_read(struct wpi_softc *sc, uint32_t addr) { WPI_WRITE(sc, WPI_MEM_RADDR, addr); WPI_BARRIER_READ_WRITE(sc); return WPI_READ(sc, WPI_MEM_RDATA); } static __inline void wpi_mem_read_region_4(struct wpi_softc *sc, uint32_t addr, uint32_t *data, int count) { for (; count > 0; count--, addr += 4) *data++ = wpi_mem_read(sc, addr); } static int wpi_read_prom_data(struct wpi_softc *sc, uint32_t addr, void *data, int count) { uint8_t *out = data; uint32_t val; int error, ntries; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); if ((error = wpi_nic_lock(sc)) != 0) return error; for (; count > 0; count -= 2, addr++) { WPI_WRITE(sc, WPI_EEPROM, addr << 2); for (ntries = 0; ntries < 10; ntries++) { val = WPI_READ(sc, WPI_EEPROM); if (val & WPI_EEPROM_READ_VALID) break; DELAY(5); } if (ntries == 10) { device_printf(sc->sc_dev, "timeout reading ROM at 0x%x\n", addr); return ETIMEDOUT; } *out++= val >> 16; if (count > 1) *out ++= val >> 24; } wpi_nic_unlock(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; } static void wpi_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs)); *(bus_addr_t *)arg = segs[0].ds_addr; } /* * Allocates a contiguous block of dma memory of the requested size and * alignment. */ static int wpi_dma_contig_alloc(struct wpi_softc *sc, struct wpi_dma_info *dma, void **kvap, bus_size_t size, bus_size_t alignment) { int error; dma->tag = NULL; dma->size = size; error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), alignment, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &dma->tag); if (error != 0) goto fail; error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &dma->map); if (error != 0) goto fail; error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, wpi_dma_map_addr, &dma->paddr, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); if (kvap != NULL) *kvap = dma->vaddr; return 0; fail: wpi_dma_contig_free(dma); return error; } static void wpi_dma_contig_free(struct wpi_dma_info *dma) { if (dma->vaddr != NULL) { bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); dma->vaddr = NULL; } if (dma->tag != NULL) { bus_dma_tag_destroy(dma->tag); dma->tag = NULL; } } /* * Allocate a shared page between host and NIC. */ static int wpi_alloc_shared(struct wpi_softc *sc) { /* Shared buffer must be aligned on a 4KB boundary. */ return wpi_dma_contig_alloc(sc, &sc->shared_dma, (void **)&sc->shared, sizeof (struct wpi_shared), 4096); } static void wpi_free_shared(struct wpi_softc *sc) { wpi_dma_contig_free(&sc->shared_dma); } /* * Allocate DMA-safe memory for firmware transfer. */ static int wpi_alloc_fwmem(struct wpi_softc *sc) { /* Must be aligned on a 16-byte boundary. */ return wpi_dma_contig_alloc(sc, &sc->fw_dma, NULL, WPI_FW_TEXT_MAXSZ + WPI_FW_DATA_MAXSZ, 16); } static void wpi_free_fwmem(struct wpi_softc *sc) { wpi_dma_contig_free(&sc->fw_dma); } static int wpi_alloc_rx_ring(struct wpi_softc *sc) { struct wpi_rx_ring *ring = &sc->rxq; bus_size_t size; int i, error; ring->cur = 0; ring->update = 0; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* Allocate RX descriptors (16KB aligned.) */ size = WPI_RX_RING_COUNT * sizeof (uint32_t); error = wpi_dma_contig_alloc(sc, &ring->desc_dma, (void **)&ring->desc, size, WPI_RING_DMA_ALIGN); if (error != 0) { device_printf(sc->sc_dev, "%s: could not allocate RX ring DMA memory, error %d\n", __func__, error); goto fail; } /* Create RX buffer DMA tag. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA tag, error %d\n", __func__, error); goto fail; } /* * Allocate and map RX buffers. */ for (i = 0; i < WPI_RX_RING_COUNT; i++) { struct wpi_rx_data *data = &ring->data[i]; bus_addr_t paddr; error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA map, error %d\n", __func__, error); goto fail; } data->m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (data->m == NULL) { device_printf(sc->sc_dev, "%s: could not allocate RX mbuf\n", __func__); error = ENOBUFS; goto fail; } error = bus_dmamap_load(ring->data_dmat, data->map, mtod(data->m, void *), MJUMPAGESIZE, wpi_dma_map_addr, &paddr, BUS_DMA_NOWAIT); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, "%s: can't map mbuf (error %d)\n", __func__, error); goto fail; } /* Set physical address of RX buffer. */ ring->desc[i] = htole32(paddr); } bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; fail: wpi_free_rx_ring(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } static void wpi_update_rx_ring(struct wpi_softc *sc) { WPI_WRITE(sc, WPI_FH_RX_WPTR, sc->rxq.cur & ~7); } static void wpi_update_rx_ring_ps(struct wpi_softc *sc) { struct wpi_rx_ring *ring = &sc->rxq; if (ring->update != 0) { /* Wait for INT_WAKEUP event. */ return; } WPI_TXQ_LOCK(sc); WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ); if (WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_SLEEP) { DPRINTF(sc, WPI_DEBUG_PWRSAVE, "%s: wakeup request\n", __func__); ring->update = 1; } else { wpi_update_rx_ring(sc); WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ); } WPI_TXQ_UNLOCK(sc); } static void wpi_reset_rx_ring(struct wpi_softc *sc) { struct wpi_rx_ring *ring = &sc->rxq; int ntries; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (wpi_nic_lock(sc) == 0) { WPI_WRITE(sc, WPI_FH_RX_CONFIG, 0); for (ntries = 0; ntries < 1000; ntries++) { if (WPI_READ(sc, WPI_FH_RX_STATUS) & WPI_FH_RX_STATUS_IDLE) break; DELAY(10); } wpi_nic_unlock(sc); } ring->cur = 0; ring->update = 0; } static void wpi_free_rx_ring(struct wpi_softc *sc) { struct wpi_rx_ring *ring = &sc->rxq; int i; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); wpi_dma_contig_free(&ring->desc_dma); for (i = 0; i < WPI_RX_RING_COUNT; i++) { struct wpi_rx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->map != NULL) bus_dmamap_destroy(ring->data_dmat, data->map); } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } static int wpi_alloc_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring, uint8_t qid) { bus_addr_t paddr; bus_size_t size; int i, error; ring->qid = qid; ring->queued = 0; ring->cur = 0; ring->pending = 0; ring->update = 0; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* Allocate TX descriptors (16KB aligned.) */ size = WPI_TX_RING_COUNT * sizeof (struct wpi_tx_desc); error = wpi_dma_contig_alloc(sc, &ring->desc_dma, (void **)&ring->desc, size, WPI_RING_DMA_ALIGN); if (error != 0) { device_printf(sc->sc_dev, "%s: could not allocate TX ring DMA memory, error %d\n", __func__, error); goto fail; } /* Update shared area with ring physical address. */ sc->shared->txbase[qid] = htole32(ring->desc_dma.paddr); bus_dmamap_sync(sc->shared_dma.tag, sc->shared_dma.map, BUS_DMASYNC_PREWRITE); size = WPI_TX_RING_COUNT * sizeof (struct wpi_tx_cmd); error = wpi_dma_contig_alloc(sc, &ring->cmd_dma, (void **)&ring->cmd, size, 4); if (error != 0) { device_printf(sc->sc_dev, "%s: could not allocate TX cmd DMA memory, error %d\n", __func__, error); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, WPI_MAX_SCATTER - 1, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create TX buf DMA tag, error %d\n", __func__, error); goto fail; } paddr = ring->cmd_dma.paddr; for (i = 0; i < WPI_TX_RING_COUNT; i++) { struct wpi_tx_data *data = &ring->data[i]; data->cmd_paddr = paddr; paddr += sizeof (struct wpi_tx_cmd); error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create TX buf DMA map, error %d\n", __func__, error); goto fail; } } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; fail: wpi_free_tx_ring(sc, ring); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } static void wpi_update_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring) { WPI_WRITE(sc, WPI_HBUS_TARG_WRPTR, ring->qid << 8 | ring->cur); } static void wpi_update_tx_ring_ps(struct wpi_softc *sc, struct wpi_tx_ring *ring) { if (ring->update != 0) { /* Wait for INT_WAKEUP event. */ return; } WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ); if (WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_SLEEP) { DPRINTF(sc, WPI_DEBUG_PWRSAVE, "%s (%d): requesting wakeup\n", __func__, ring->qid); ring->update = 1; } else { wpi_update_tx_ring(sc, ring); WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ); } } static void wpi_reset_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring) { int i; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); for (i = 0; i < WPI_TX_RING_COUNT; i++) { struct wpi_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->ni != NULL) { ieee80211_free_node(data->ni); data->ni = NULL; } } /* Clear TX descriptors. */ memset(ring->desc, 0, ring->desc_dma.size); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); ring->queued = 0; ring->cur = 0; ring->pending = 0; ring->update = 0; } static void wpi_free_tx_ring(struct wpi_softc *sc, struct wpi_tx_ring *ring) { int i; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); wpi_dma_contig_free(&ring->desc_dma); wpi_dma_contig_free(&ring->cmd_dma); for (i = 0; i < WPI_TX_RING_COUNT; i++) { struct wpi_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); } if (data->map != NULL) bus_dmamap_destroy(ring->data_dmat, data->map); } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } /* * Extract various information from EEPROM. */ static int wpi_read_eeprom(struct wpi_softc *sc, uint8_t macaddr[IEEE80211_ADDR_LEN]) { #define WPI_CHK(res) do { \ if ((error = res) != 0) \ goto fail; \ } while (0) uint8_t i; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* Adapter has to be powered on for EEPROM access to work. */ if ((error = wpi_apm_init(sc)) != 0) { device_printf(sc->sc_dev, "%s: could not power ON adapter, error %d\n", __func__, error); return error; } if ((WPI_READ(sc, WPI_EEPROM_GP) & 0x6) == 0) { device_printf(sc->sc_dev, "bad EEPROM signature\n"); error = EIO; goto fail; } /* Clear HW ownership of EEPROM. */ WPI_CLRBITS(sc, WPI_EEPROM_GP, WPI_EEPROM_GP_IF_OWNER); /* Read the hardware capabilities, revision and SKU type. */ WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_SKU_CAP, &sc->cap, sizeof(sc->cap))); WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_REVISION, &sc->rev, sizeof(sc->rev))); WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_TYPE, &sc->type, sizeof(sc->type))); sc->rev = le16toh(sc->rev); DPRINTF(sc, WPI_DEBUG_EEPROM, "cap=%x rev=%x type=%x\n", sc->cap, sc->rev, sc->type); /* Read the regulatory domain (4 ASCII characters.) */ WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_DOMAIN, sc->domain, sizeof(sc->domain))); /* Read MAC address. */ WPI_CHK(wpi_read_prom_data(sc, WPI_EEPROM_MAC, macaddr, IEEE80211_ADDR_LEN)); /* Read the list of authorized channels. */ for (i = 0; i < WPI_CHAN_BANDS_COUNT; i++) WPI_CHK(wpi_read_eeprom_channels(sc, i)); /* Read the list of TX power groups. */ for (i = 0; i < WPI_POWER_GROUPS_COUNT; i++) WPI_CHK(wpi_read_eeprom_group(sc, i)); fail: wpi_apm_stop(sc); /* Power OFF adapter. */ DPRINTF(sc, WPI_DEBUG_TRACE, error ? TRACE_STR_END_ERR : TRACE_STR_END, __func__); return error; #undef WPI_CHK } /* * Translate EEPROM flags to net80211. */ static uint32_t wpi_eeprom_channel_flags(struct wpi_eeprom_chan *channel) { uint32_t nflags; nflags = 0; if ((channel->flags & WPI_EEPROM_CHAN_ACTIVE) == 0) nflags |= IEEE80211_CHAN_PASSIVE; if ((channel->flags & WPI_EEPROM_CHAN_IBSS) == 0) nflags |= IEEE80211_CHAN_NOADHOC; if (channel->flags & WPI_EEPROM_CHAN_RADAR) { nflags |= IEEE80211_CHAN_DFS; /* XXX apparently IBSS may still be marked */ nflags |= IEEE80211_CHAN_NOADHOC; } /* XXX HOSTAP uses WPI_MODE_IBSS */ if (nflags & IEEE80211_CHAN_NOADHOC) nflags |= IEEE80211_CHAN_NOHOSTAP; return nflags; } static void wpi_read_eeprom_band(struct wpi_softc *sc, uint8_t n) { struct ieee80211com *ic = &sc->sc_ic; struct wpi_eeprom_chan *channels = sc->eeprom_channels[n]; const struct wpi_chan_band *band = &wpi_bands[n]; struct ieee80211_channel *c; uint32_t nflags; uint8_t chan, i; for (i = 0; i < band->nchan; i++) { if (!(channels[i].flags & WPI_EEPROM_CHAN_VALID)) { DPRINTF(sc, WPI_DEBUG_EEPROM, "Channel Not Valid: %d, band %d\n", band->chan[i],n); continue; } chan = band->chan[i]; nflags = wpi_eeprom_channel_flags(&channels[i]); c = &ic->ic_channels[ic->ic_nchans++]; c->ic_ieee = chan; c->ic_maxregpower = channels[i].maxpwr; c->ic_maxpower = 2*c->ic_maxregpower; if (n == 0) { /* 2GHz band */ c->ic_freq = ieee80211_ieee2mhz(chan, IEEE80211_CHAN_G); /* G =>'s B is supported */ c->ic_flags = IEEE80211_CHAN_B | nflags; c = &ic->ic_channels[ic->ic_nchans++]; c[0] = c[-1]; c->ic_flags = IEEE80211_CHAN_G | nflags; } else { /* 5GHz band */ c->ic_freq = ieee80211_ieee2mhz(chan, IEEE80211_CHAN_A); c->ic_flags = IEEE80211_CHAN_A | nflags; } /* Save maximum allowed TX power for this channel. */ sc->maxpwr[chan] = channels[i].maxpwr; DPRINTF(sc, WPI_DEBUG_EEPROM, "adding chan %d (%dMHz) flags=0x%x maxpwr=%d passive=%d," " offset %d\n", chan, c->ic_freq, channels[i].flags, sc->maxpwr[chan], IEEE80211_IS_CHAN_PASSIVE(c), ic->ic_nchans); } } /** * Read the eeprom to find out what channels are valid for the given * band and update net80211 with what we find. */ static int wpi_read_eeprom_channels(struct wpi_softc *sc, uint8_t n) { struct ieee80211com *ic = &sc->sc_ic; const struct wpi_chan_band *band = &wpi_bands[n]; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); error = wpi_read_prom_data(sc, band->addr, &sc->eeprom_channels[n], band->nchan * sizeof (struct wpi_eeprom_chan)); if (error != 0) { DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } wpi_read_eeprom_band(sc, n); ieee80211_sort_channels(ic->ic_channels, ic->ic_nchans); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; } static struct wpi_eeprom_chan * wpi_find_eeprom_channel(struct wpi_softc *sc, struct ieee80211_channel *c) { int i, j; for (j = 0; j < WPI_CHAN_BANDS_COUNT; j++) for (i = 0; i < wpi_bands[j].nchan; i++) if (wpi_bands[j].chan[i] == c->ic_ieee) return &sc->eeprom_channels[j][i]; return NULL; } /* * Enforce flags read from EEPROM. */ static int wpi_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *rd, int nchan, struct ieee80211_channel chans[]) { struct wpi_softc *sc = ic->ic_softc; int i; for (i = 0; i < nchan; i++) { struct ieee80211_channel *c = &chans[i]; struct wpi_eeprom_chan *channel; channel = wpi_find_eeprom_channel(sc, c); if (channel == NULL) { ic_printf(ic, "%s: invalid channel %u freq %u/0x%x\n", __func__, c->ic_ieee, c->ic_freq, c->ic_flags); return EINVAL; } c->ic_flags |= wpi_eeprom_channel_flags(channel); } return 0; } static int wpi_read_eeprom_group(struct wpi_softc *sc, uint8_t n) { struct wpi_power_group *group = &sc->groups[n]; struct wpi_eeprom_group rgroup; int i, error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); if ((error = wpi_read_prom_data(sc, WPI_EEPROM_POWER_GRP + n * 32, &rgroup, sizeof rgroup)) != 0) { DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } /* Save TX power group information. */ group->chan = rgroup.chan; group->maxpwr = rgroup.maxpwr; /* Retrieve temperature at which the samples were taken. */ group->temp = (int16_t)le16toh(rgroup.temp); DPRINTF(sc, WPI_DEBUG_EEPROM, "power group %d: chan=%d maxpwr=%d temp=%d\n", n, group->chan, group->maxpwr, group->temp); for (i = 0; i < WPI_SAMPLES_COUNT; i++) { group->samples[i].index = rgroup.samples[i].index; group->samples[i].power = rgroup.samples[i].power; DPRINTF(sc, WPI_DEBUG_EEPROM, "\tsample %d: index=%d power=%d\n", i, group->samples[i].index, group->samples[i].power); } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; } static __inline uint8_t wpi_add_node_entry_adhoc(struct wpi_softc *sc) { uint8_t newid = WPI_ID_IBSS_MIN; for (; newid <= WPI_ID_IBSS_MAX; newid++) { if ((sc->nodesmsk & (1 << newid)) == 0) { sc->nodesmsk |= 1 << newid; return newid; } } return WPI_ID_UNDEFINED; } static __inline uint8_t wpi_add_node_entry_sta(struct wpi_softc *sc) { sc->nodesmsk |= 1 << WPI_ID_BSS; return WPI_ID_BSS; } static __inline int wpi_check_node_entry(struct wpi_softc *sc, uint8_t id) { if (id == WPI_ID_UNDEFINED) return 0; return (sc->nodesmsk >> id) & 1; } static __inline void wpi_clear_node_table(struct wpi_softc *sc) { sc->nodesmsk = 0; } static __inline void wpi_del_node_entry(struct wpi_softc *sc, uint8_t id) { sc->nodesmsk &= ~(1 << id); } static struct ieee80211_node * wpi_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct wpi_node *wn; wn = malloc(sizeof (struct wpi_node), M_80211_NODE, M_NOWAIT | M_ZERO); if (wn == NULL) return NULL; wn->id = WPI_ID_UNDEFINED; return &wn->ni; } static void wpi_node_free(struct ieee80211_node *ni) { struct wpi_softc *sc = ni->ni_ic->ic_softc; struct wpi_node *wn = WPI_NODE(ni); if (wn->id != WPI_ID_UNDEFINED) { WPI_NT_LOCK(sc); if (wpi_check_node_entry(sc, wn->id)) { wpi_del_node_entry(sc, wn->id); wpi_del_node(sc, ni); } WPI_NT_UNLOCK(sc); } sc->sc_node_free(ni); } static __inline int wpi_check_bss_filter(struct wpi_softc *sc) { return (sc->rxon.filter & htole32(WPI_FILTER_BSS)) != 0; } static void wpi_ibss_recv_mgmt(struct ieee80211_node *ni, struct mbuf *m, int subtype, const struct ieee80211_rx_stats *rxs, int rssi, int nf) { struct ieee80211vap *vap = ni->ni_vap; struct wpi_softc *sc = vap->iv_ic->ic_softc; struct wpi_vap *wvp = WPI_VAP(vap); uint64_t ni_tstamp, rx_tstamp; wvp->wv_recv_mgmt(ni, m, subtype, rxs, rssi, nf); if (vap->iv_state == IEEE80211_S_RUN && (subtype == IEEE80211_FC0_SUBTYPE_BEACON || subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP)) { ni_tstamp = le64toh(ni->ni_tstamp.tsf); rx_tstamp = le64toh(sc->rx_tstamp); if (ni_tstamp >= rx_tstamp) { DPRINTF(sc, WPI_DEBUG_STATE, "ibss merge, tsf %ju tstamp %ju\n", (uintmax_t)rx_tstamp, (uintmax_t)ni_tstamp); (void) ieee80211_ibss_merge(ni); } } } static void wpi_restore_node(void *arg, struct ieee80211_node *ni) { struct wpi_softc *sc = arg; struct wpi_node *wn = WPI_NODE(ni); int error; WPI_NT_LOCK(sc); if (wn->id != WPI_ID_UNDEFINED) { wn->id = WPI_ID_UNDEFINED; if ((error = wpi_add_ibss_node(sc, ni)) != 0) { device_printf(sc->sc_dev, "%s: could not add IBSS node, error %d\n", __func__, error); } } WPI_NT_UNLOCK(sc); } static void wpi_restore_node_table(struct wpi_softc *sc, struct wpi_vap *wvp) { struct ieee80211com *ic = &sc->sc_ic; /* Set group keys once. */ WPI_NT_LOCK(sc); wvp->wv_gtk = 0; WPI_NT_UNLOCK(sc); ieee80211_iterate_nodes(&ic->ic_sta, wpi_restore_node, sc); ieee80211_crypto_reload_keys(ic); } /** * Called by net80211 when ever there is a change to 80211 state machine */ static int wpi_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct wpi_vap *wvp = WPI_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct wpi_softc *sc = ic->ic_softc; int error = 0; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); WPI_TXQ_LOCK(sc); if (nstate > IEEE80211_S_INIT && sc->sc_running == 0) { DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); WPI_TXQ_UNLOCK(sc); return ENXIO; } WPI_TXQ_UNLOCK(sc); DPRINTF(sc, WPI_DEBUG_STATE, "%s: %s -> %s\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate]); if (vap->iv_state == IEEE80211_S_RUN && nstate < IEEE80211_S_RUN) { if ((error = wpi_set_pslevel(sc, 0, 0, 1)) != 0) { device_printf(sc->sc_dev, "%s: could not set power saving level\n", __func__); return error; } wpi_set_led(sc, WPI_LED_LINK, 1, 0); } switch (nstate) { case IEEE80211_S_SCAN: WPI_RXON_LOCK(sc); if (wpi_check_bss_filter(sc) != 0) { sc->rxon.filter &= ~htole32(WPI_FILTER_BSS); if ((error = wpi_send_rxon(sc, 0, 1)) != 0) { device_printf(sc->sc_dev, "%s: could not send RXON\n", __func__); } } WPI_RXON_UNLOCK(sc); break; case IEEE80211_S_ASSOC: if (vap->iv_state != IEEE80211_S_RUN) break; /* FALLTHROUGH */ case IEEE80211_S_AUTH: /* * NB: do not optimize AUTH -> AUTH state transmission - * this will break powersave with non-QoS AP! */ /* * The node must be registered in the firmware before auth. * Also the associd must be cleared on RUN -> ASSOC * transitions. */ if ((error = wpi_auth(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: could not move to AUTH state, error %d\n", __func__, error); } break; case IEEE80211_S_RUN: /* * RUN -> RUN transition: * STA mode: Just restart the timers. * IBSS mode: Process IBSS merge. */ if (vap->iv_state == IEEE80211_S_RUN) { if (vap->iv_opmode != IEEE80211_M_IBSS) { WPI_RXON_LOCK(sc); wpi_calib_timeout(sc); WPI_RXON_UNLOCK(sc); break; } else { /* * Drop the BSS_FILTER bit * (there is no another way to change bssid). */ WPI_RXON_LOCK(sc); sc->rxon.filter &= ~htole32(WPI_FILTER_BSS); if ((error = wpi_send_rxon(sc, 0, 1)) != 0) { device_printf(sc->sc_dev, "%s: could not send RXON\n", __func__); } WPI_RXON_UNLOCK(sc); /* Restore all what was lost. */ wpi_restore_node_table(sc, wvp); /* XXX set conditionally? */ wpi_updateedca(ic); } } /* * !RUN -> RUN requires setting the association id * which is done with a firmware cmd. We also defer * starting the timers until that work is done. */ if ((error = wpi_run(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: could not move to RUN state\n", __func__); } break; default: break; } if (error != 0) { DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return wvp->wv_newstate(vap, nstate, arg); } static void wpi_calib_timeout(void *arg) { struct wpi_softc *sc = arg; if (wpi_check_bss_filter(sc) == 0) return; wpi_power_calibration(sc); callout_reset(&sc->calib_to, 60*hz, wpi_calib_timeout, sc); } static __inline uint8_t rate2plcp(const uint8_t rate) { switch (rate) { case 12: return 0xd; case 18: return 0xf; case 24: return 0x5; case 36: return 0x7; case 48: return 0x9; case 72: return 0xb; case 96: return 0x1; case 108: return 0x3; case 2: return 10; case 4: return 20; case 11: return 55; case 22: return 110; default: return 0; } } static __inline uint8_t plcp2rate(const uint8_t plcp) { switch (plcp) { case 0xd: return 12; case 0xf: return 18; case 0x5: return 24; case 0x7: return 36; case 0x9: return 48; case 0xb: return 72; case 0x1: return 96; case 0x3: return 108; case 10: return 2; case 20: return 4; case 55: return 11; case 110: return 22; default: return 0; } } /* Quickly determine if a given rate is CCK or OFDM. */ #define WPI_RATE_IS_OFDM(rate) ((rate) >= 12 && (rate) != 22) static void wpi_rx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc, struct wpi_rx_data *data) { struct ieee80211com *ic = &sc->sc_ic; struct wpi_rx_ring *ring = &sc->rxq; struct wpi_rx_stat *stat; struct wpi_rx_head *head; struct wpi_rx_tail *tail; struct ieee80211_frame *wh; struct ieee80211_node *ni; struct mbuf *m, *m1; bus_addr_t paddr; uint32_t flags; uint16_t len; int error; stat = (struct wpi_rx_stat *)(desc + 1); if (__predict_false(stat->len > WPI_STAT_MAXLEN)) { device_printf(sc->sc_dev, "invalid RX statistic header\n"); goto fail1; } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); head = (struct wpi_rx_head *)((caddr_t)(stat + 1) + stat->len); len = le16toh(head->len); tail = (struct wpi_rx_tail *)((caddr_t)(head + 1) + len); flags = le32toh(tail->flags); DPRINTF(sc, WPI_DEBUG_RECV, "%s: idx %d len %d stat len %u rssi %d" " rate %x chan %d tstamp %ju\n", __func__, ring->cur, le32toh(desc->len), len, (int8_t)stat->rssi, head->plcp, head->chan, (uintmax_t)le64toh(tail->tstamp)); /* Discard frames with a bad FCS early. */ if ((flags & WPI_RX_NOERROR) != WPI_RX_NOERROR) { DPRINTF(sc, WPI_DEBUG_RECV, "%s: RX flags error %x\n", __func__, flags); goto fail1; } /* Discard frames that are too short. */ if (len < sizeof (struct ieee80211_frame_ack)) { DPRINTF(sc, WPI_DEBUG_RECV, "%s: frame too short: %d\n", __func__, len); goto fail1; } m1 = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (__predict_false(m1 == NULL)) { DPRINTF(sc, WPI_DEBUG_ANY, "%s: no mbuf to restock ring\n", __func__); goto fail1; } bus_dmamap_unload(ring->data_dmat, data->map); error = bus_dmamap_load(ring->data_dmat, data->map, mtod(m1, void *), MJUMPAGESIZE, wpi_dma_map_addr, &paddr, BUS_DMA_NOWAIT); if (__predict_false(error != 0 && error != EFBIG)) { device_printf(sc->sc_dev, "%s: bus_dmamap_load failed, error %d\n", __func__, error); m_freem(m1); /* Try to reload the old mbuf. */ error = bus_dmamap_load(ring->data_dmat, data->map, mtod(data->m, void *), MJUMPAGESIZE, wpi_dma_map_addr, &paddr, BUS_DMA_NOWAIT); if (error != 0 && error != EFBIG) { panic("%s: could not load old RX mbuf", __func__); } /* Physical address may have changed. */ ring->desc[ring->cur] = htole32(paddr); bus_dmamap_sync(ring->data_dmat, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); goto fail1; } m = data->m; data->m = m1; /* Update RX descriptor. */ ring->desc[ring->cur] = htole32(paddr); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); /* Finalize mbuf. */ m->m_data = (caddr_t)(head + 1); m->m_pkthdr.len = m->m_len = len; /* Grab a reference to the source node. */ wh = mtod(m, struct ieee80211_frame *); if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED) && (flags & WPI_RX_CIPHER_MASK) == WPI_RX_CIPHER_CCMP) { /* Check whether decryption was successful or not. */ if ((flags & WPI_RX_DECRYPT_MASK) != WPI_RX_DECRYPT_OK) { DPRINTF(sc, WPI_DEBUG_RECV, "CCMP decryption failed 0x%x\n", flags); goto fail2; } m->m_flags |= M_WEP; } if (len >= sizeof(struct ieee80211_frame_min)) ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh); else ni = NULL; sc->rx_tstamp = tail->tstamp; if (ieee80211_radiotap_active(ic)) { struct wpi_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = 0; if (head->flags & htole16(WPI_STAT_FLAG_SHPREAMBLE)) tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE; tap->wr_dbm_antsignal = (int8_t)(stat->rssi + WPI_RSSI_OFFSET); tap->wr_dbm_antnoise = WPI_RSSI_OFFSET; tap->wr_tsft = tail->tstamp; tap->wr_antenna = (le16toh(head->flags) >> 4) & 0xf; tap->wr_rate = plcp2rate(head->plcp); } WPI_UNLOCK(sc); /* Send the frame to the 802.11 layer. */ if (ni != NULL) { (void)ieee80211_input(ni, m, stat->rssi, WPI_RSSI_OFFSET); /* Node is no longer needed. */ ieee80211_free_node(ni); } else (void)ieee80211_input_all(ic, m, stat->rssi, WPI_RSSI_OFFSET); WPI_LOCK(sc); return; fail2: m_freem(m); fail1: counter_u64_add(ic->ic_ierrors, 1); } static void wpi_rx_statistics(struct wpi_softc *sc, struct wpi_rx_desc *desc, struct wpi_rx_data *data) { /* Ignore */ } static void wpi_tx_done(struct wpi_softc *sc, struct wpi_rx_desc *desc) { struct wpi_tx_ring *ring = &sc->txq[desc->qid & 0x3]; struct wpi_tx_data *data = &ring->data[desc->idx]; struct wpi_tx_stat *stat = (struct wpi_tx_stat *)(desc + 1); struct mbuf *m; struct ieee80211_node *ni; struct ieee80211vap *vap; struct ieee80211com *ic; uint32_t status = le32toh(stat->status); int ackfailcnt = stat->ackfailcnt / WPI_NTRIES_DEFAULT; KASSERT(data->ni != NULL, ("no node")); KASSERT(data->m != NULL, ("no mbuf")); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); DPRINTF(sc, WPI_DEBUG_XMIT, "%s: " "qid %d idx %d retries %d btkillcnt %d rate %x duration %d " "status %x\n", __func__, desc->qid, desc->idx, stat->ackfailcnt, stat->btkillcnt, stat->rate, le32toh(stat->duration), status); /* Unmap and free mbuf. */ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m = data->m, data->m = NULL; ni = data->ni, data->ni = NULL; vap = ni->ni_vap; ic = vap->iv_ic; /* * Update rate control statistics for the node. */ if (status & WPI_TX_STATUS_FAIL) { ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_FAILURE, &ackfailcnt, NULL); } else ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_SUCCESS, &ackfailcnt, NULL); ieee80211_tx_complete(ni, m, (status & WPI_TX_STATUS_FAIL) != 0); WPI_TXQ_STATE_LOCK(sc); if (--ring->queued > 0) callout_reset(&sc->tx_timeout, 5*hz, wpi_tx_timeout, sc); else callout_stop(&sc->tx_timeout); WPI_TXQ_STATE_UNLOCK(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); } /* * Process a "command done" firmware notification. This is where we wakeup * processes waiting for a synchronous command completion. */ static void wpi_cmd_done(struct wpi_softc *sc, struct wpi_rx_desc *desc) { struct wpi_tx_ring *ring = &sc->txq[WPI_CMD_QUEUE_NUM]; struct wpi_tx_data *data; struct wpi_tx_cmd *cmd; DPRINTF(sc, WPI_DEBUG_CMD, "cmd notification qid %x idx %d flags %x " "type %s len %d\n", desc->qid, desc->idx, desc->flags, wpi_cmd_str(desc->type), le32toh(desc->len)); if ((desc->qid & WPI_RX_DESC_QID_MSK) != WPI_CMD_QUEUE_NUM) return; /* Not a command ack. */ KASSERT(ring->queued == 0, ("ring->queued must be 0")); data = &ring->data[desc->idx]; cmd = &ring->cmd[desc->idx]; /* If the command was mapped in an mbuf, free it. */ if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } wakeup(cmd); if (desc->type == WPI_CMD_SET_POWER_MODE) { struct wpi_pmgt_cmd *pcmd = (struct wpi_pmgt_cmd *)cmd->data; bus_dmamap_sync(ring->data_dmat, ring->cmd_dma.map, BUS_DMASYNC_POSTREAD); WPI_TXQ_LOCK(sc); if (le16toh(pcmd->flags) & WPI_PS_ALLOW_SLEEP) { sc->sc_update_rx_ring = wpi_update_rx_ring_ps; sc->sc_update_tx_ring = wpi_update_tx_ring_ps; } else { sc->sc_update_rx_ring = wpi_update_rx_ring; sc->sc_update_tx_ring = wpi_update_tx_ring; } WPI_TXQ_UNLOCK(sc); } } static void wpi_notif_intr(struct wpi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); uint32_t hw; bus_dmamap_sync(sc->shared_dma.tag, sc->shared_dma.map, BUS_DMASYNC_POSTREAD); hw = le32toh(sc->shared->next) & 0xfff; hw = (hw == 0) ? WPI_RX_RING_COUNT - 1 : hw - 1; while (sc->rxq.cur != hw) { sc->rxq.cur = (sc->rxq.cur + 1) % WPI_RX_RING_COUNT; struct wpi_rx_data *data = &sc->rxq.data[sc->rxq.cur]; struct wpi_rx_desc *desc; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); desc = mtod(data->m, struct wpi_rx_desc *); DPRINTF(sc, WPI_DEBUG_NOTIFY, "%s: cur=%d; qid %x idx %d flags %x type %d(%s) len %d\n", __func__, sc->rxq.cur, desc->qid, desc->idx, desc->flags, desc->type, wpi_cmd_str(desc->type), le32toh(desc->len)); if (!(desc->qid & WPI_UNSOLICITED_RX_NOTIF)) { /* Reply to a command. */ wpi_cmd_done(sc, desc); } switch (desc->type) { case WPI_RX_DONE: /* An 802.11 frame has been received. */ wpi_rx_done(sc, desc, data); if (__predict_false(sc->sc_running == 0)) { /* wpi_stop() was called. */ return; } break; case WPI_TX_DONE: /* An 802.11 frame has been transmitted. */ wpi_tx_done(sc, desc); break; case WPI_RX_STATISTICS: case WPI_BEACON_STATISTICS: wpi_rx_statistics(sc, desc, data); break; case WPI_BEACON_MISSED: { struct wpi_beacon_missed *miss = (struct wpi_beacon_missed *)(desc + 1); uint32_t expected, misses, received, threshold; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); misses = le32toh(miss->consecutive); expected = le32toh(miss->expected); received = le32toh(miss->received); threshold = MAX(2, vap->iv_bmissthreshold); DPRINTF(sc, WPI_DEBUG_BMISS, "%s: beacons missed %u(%u) (received %u/%u)\n", __func__, misses, le32toh(miss->total), received, expected); if (misses >= threshold || (received == 0 && expected >= threshold)) { WPI_RXON_LOCK(sc); if (callout_pending(&sc->scan_timeout)) { wpi_cmd(sc, WPI_CMD_SCAN_ABORT, NULL, 0, 1); } WPI_RXON_UNLOCK(sc); if (vap->iv_state == IEEE80211_S_RUN && (ic->ic_flags & IEEE80211_F_SCAN) == 0) ieee80211_beacon_miss(ic); } break; } #ifdef WPI_DEBUG case WPI_BEACON_SENT: { struct wpi_tx_stat *stat = (struct wpi_tx_stat *)(desc + 1); uint64_t *tsf = (uint64_t *)(stat + 1); uint32_t *mode = (uint32_t *)(tsf + 1); bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); DPRINTF(sc, WPI_DEBUG_BEACON, "beacon sent: rts %u, ack %u, btkill %u, rate %u, " "duration %u, status %x, tsf %ju, mode %x\n", stat->rtsfailcnt, stat->ackfailcnt, stat->btkillcnt, stat->rate, le32toh(stat->duration), le32toh(stat->status), le64toh(*tsf), le32toh(*mode)); break; } #endif case WPI_UC_READY: { struct wpi_ucode_info *uc = (struct wpi_ucode_info *)(desc + 1); /* The microcontroller is ready. */ bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); DPRINTF(sc, WPI_DEBUG_RESET, "microcode alive notification version=%d.%d " "subtype=%x alive=%x\n", uc->major, uc->minor, uc->subtype, le32toh(uc->valid)); if (le32toh(uc->valid) != 1) { device_printf(sc->sc_dev, "microcontroller initialization failed\n"); wpi_stop_locked(sc); return; } /* Save the address of the error log in SRAM. */ sc->errptr = le32toh(uc->errptr); break; } case WPI_STATE_CHANGED: { bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); uint32_t *status = (uint32_t *)(desc + 1); DPRINTF(sc, WPI_DEBUG_STATE, "state changed to %x\n", le32toh(*status)); if (le32toh(*status) & 1) { WPI_NT_LOCK(sc); wpi_clear_node_table(sc); WPI_NT_UNLOCK(sc); taskqueue_enqueue(sc->sc_tq, &sc->sc_radiooff_task); return; } break; } #ifdef WPI_DEBUG case WPI_START_SCAN: { bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); struct wpi_start_scan *scan = (struct wpi_start_scan *)(desc + 1); DPRINTF(sc, WPI_DEBUG_SCAN, "%s: scanning channel %d status %x\n", __func__, scan->chan, le32toh(scan->status)); break; } #endif case WPI_STOP_SCAN: { bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); struct wpi_stop_scan *scan = (struct wpi_stop_scan *)(desc + 1); DPRINTF(sc, WPI_DEBUG_SCAN, "scan finished nchan=%d status=%d chan=%d\n", scan->nchan, scan->status, scan->chan); WPI_RXON_LOCK(sc); callout_stop(&sc->scan_timeout); WPI_RXON_UNLOCK(sc); if (scan->status == WPI_SCAN_ABORTED) ieee80211_cancel_scan(vap); else ieee80211_scan_next(vap); break; } } if (sc->rxq.cur % 8 == 0) { /* Tell the firmware what we have processed. */ sc->sc_update_rx_ring(sc); } } } /* * Process an INT_WAKEUP interrupt raised when the microcontroller wakes up * from power-down sleep mode. */ static void wpi_wakeup_intr(struct wpi_softc *sc) { int qid; DPRINTF(sc, WPI_DEBUG_PWRSAVE, "%s: ucode wakeup from power-down sleep\n", __func__); /* Wakeup RX and TX rings. */ if (sc->rxq.update) { sc->rxq.update = 0; wpi_update_rx_ring(sc); } WPI_TXQ_LOCK(sc); for (qid = 0; qid < WPI_DRV_NTXQUEUES; qid++) { struct wpi_tx_ring *ring = &sc->txq[qid]; if (ring->update) { ring->update = 0; wpi_update_tx_ring(sc, ring); } } WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_MAC_ACCESS_REQ); WPI_TXQ_UNLOCK(sc); } /* * This function prints firmware registers */ #ifdef WPI_DEBUG static void wpi_debug_registers(struct wpi_softc *sc) { size_t i; static const uint32_t csr_tbl[] = { WPI_HW_IF_CONFIG, WPI_INT, WPI_INT_MASK, WPI_FH_INT, WPI_GPIO_IN, WPI_RESET, WPI_GP_CNTRL, WPI_EEPROM, WPI_EEPROM_GP, WPI_GIO, WPI_UCODE_GP1, WPI_UCODE_GP2, WPI_GIO_CHICKEN, WPI_ANA_PLL, WPI_DBG_HPET_MEM, }; static const uint32_t prph_tbl[] = { WPI_APMG_CLK_CTRL, WPI_APMG_PS, WPI_APMG_PCI_STT, WPI_APMG_RFKILL, }; DPRINTF(sc, WPI_DEBUG_REGISTER,"%s","\n"); for (i = 0; i < nitems(csr_tbl); i++) { DPRINTF(sc, WPI_DEBUG_REGISTER, " %-18s: 0x%08x ", wpi_get_csr_string(csr_tbl[i]), WPI_READ(sc, csr_tbl[i])); if ((i + 1) % 2 == 0) DPRINTF(sc, WPI_DEBUG_REGISTER, "\n"); } DPRINTF(sc, WPI_DEBUG_REGISTER, "\n\n"); if (wpi_nic_lock(sc) == 0) { for (i = 0; i < nitems(prph_tbl); i++) { DPRINTF(sc, WPI_DEBUG_REGISTER, " %-18s: 0x%08x ", wpi_get_prph_string(prph_tbl[i]), wpi_prph_read(sc, prph_tbl[i])); if ((i + 1) % 2 == 0) DPRINTF(sc, WPI_DEBUG_REGISTER, "\n"); } DPRINTF(sc, WPI_DEBUG_REGISTER, "\n"); wpi_nic_unlock(sc); } else { DPRINTF(sc, WPI_DEBUG_REGISTER, "Cannot access internal registers.\n"); } } #endif /* * Dump the error log of the firmware when a firmware panic occurs. Although * we can't debug the firmware because it is neither open source nor free, it * can help us to identify certain classes of problems. */ static void wpi_fatal_intr(struct wpi_softc *sc) { struct wpi_fw_dump dump; uint32_t i, offset, count; /* Check that the error log address is valid. */ if (sc->errptr < WPI_FW_DATA_BASE || sc->errptr + sizeof (dump) > WPI_FW_DATA_BASE + WPI_FW_DATA_MAXSZ) { printf("%s: bad firmware error log address 0x%08x\n", __func__, sc->errptr); return; } if (wpi_nic_lock(sc) != 0) { printf("%s: could not read firmware error log\n", __func__); return; } /* Read number of entries in the log. */ count = wpi_mem_read(sc, sc->errptr); if (count == 0 || count * sizeof (dump) > WPI_FW_DATA_MAXSZ) { printf("%s: invalid count field (count = %u)\n", __func__, count); wpi_nic_unlock(sc); return; } /* Skip "count" field. */ offset = sc->errptr + sizeof (uint32_t); printf("firmware error log (count = %u):\n", count); for (i = 0; i < count; i++) { wpi_mem_read_region_4(sc, offset, (uint32_t *)&dump, sizeof (dump) / sizeof (uint32_t)); printf(" error type = \"%s\" (0x%08X)\n", (dump.desc < nitems(wpi_fw_errmsg)) ? wpi_fw_errmsg[dump.desc] : "UNKNOWN", dump.desc); printf(" error data = 0x%08X\n", dump.data); printf(" branch link = 0x%08X%08X\n", dump.blink[0], dump.blink[1]); printf(" interrupt link = 0x%08X%08X\n", dump.ilink[0], dump.ilink[1]); printf(" time = %u\n", dump.time); offset += sizeof (dump); } wpi_nic_unlock(sc); /* Dump driver status (TX and RX rings) while we're here. */ printf("driver status:\n"); WPI_TXQ_LOCK(sc); for (i = 0; i < WPI_DRV_NTXQUEUES; i++) { struct wpi_tx_ring *ring = &sc->txq[i]; printf(" tx ring %2d: qid=%-2d cur=%-3d queued=%-3d\n", i, ring->qid, ring->cur, ring->queued); } WPI_TXQ_UNLOCK(sc); printf(" rx ring: cur=%d\n", sc->rxq.cur); } static void wpi_intr(void *arg) { struct wpi_softc *sc = arg; uint32_t r1, r2; WPI_LOCK(sc); /* Disable interrupts. */ WPI_WRITE(sc, WPI_INT_MASK, 0); r1 = WPI_READ(sc, WPI_INT); if (__predict_false(r1 == 0xffffffff || (r1 & 0xfffffff0) == 0xa5a5a5a0)) goto end; /* Hardware gone! */ r2 = WPI_READ(sc, WPI_FH_INT); DPRINTF(sc, WPI_DEBUG_INTR, "%s: reg1=0x%08x reg2=0x%08x\n", __func__, r1, r2); if (r1 == 0 && r2 == 0) goto done; /* Interrupt not for us. */ /* Acknowledge interrupts. */ WPI_WRITE(sc, WPI_INT, r1); WPI_WRITE(sc, WPI_FH_INT, r2); if (__predict_false(r1 & (WPI_INT_SW_ERR | WPI_INT_HW_ERR))) { device_printf(sc->sc_dev, "fatal firmware error\n"); #ifdef WPI_DEBUG wpi_debug_registers(sc); #endif wpi_fatal_intr(sc); DPRINTF(sc, WPI_DEBUG_HW, "(%s)\n", (r1 & WPI_INT_SW_ERR) ? "(Software Error)" : "(Hardware Error)"); taskqueue_enqueue(sc->sc_tq, &sc->sc_reinittask); goto end; } if ((r1 & (WPI_INT_FH_RX | WPI_INT_SW_RX)) || (r2 & WPI_FH_INT_RX)) wpi_notif_intr(sc); if (r1 & WPI_INT_ALIVE) wakeup(sc); /* Firmware is alive. */ if (r1 & WPI_INT_WAKEUP) wpi_wakeup_intr(sc); done: /* Re-enable interrupts. */ if (__predict_true(sc->sc_running)) WPI_WRITE(sc, WPI_INT_MASK, WPI_INT_MASK_DEF); end: WPI_UNLOCK(sc); } static void wpi_free_txfrags(struct wpi_softc *sc, uint16_t ac) { struct wpi_tx_ring *ring; struct wpi_tx_data *data; uint8_t cur; WPI_TXQ_LOCK(sc); ring = &sc->txq[ac]; while (ring->pending != 0) { ring->pending--; cur = (ring->cur + ring->pending) % WPI_TX_RING_COUNT; data = &ring->data[cur]; bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; ieee80211_node_decref(data->ni); data->ni = NULL; } WPI_TXQ_UNLOCK(sc); } static int wpi_cmd2(struct wpi_softc *sc, struct wpi_buf *buf) { struct ieee80211_frame *wh; struct wpi_tx_cmd *cmd; struct wpi_tx_data *data; struct wpi_tx_desc *desc; struct wpi_tx_ring *ring; struct mbuf *m1; bus_dma_segment_t *seg, segs[WPI_MAX_SCATTER]; uint8_t cur, pad; uint16_t hdrlen; int error, i, nsegs, totlen, frag; WPI_TXQ_LOCK(sc); KASSERT(buf->size <= sizeof(buf->data), ("buffer overflow")); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); if (__predict_false(sc->sc_running == 0)) { /* wpi_stop() was called */ error = ENETDOWN; goto end; } wh = mtod(buf->m, struct ieee80211_frame *); hdrlen = ieee80211_anyhdrsize(wh); totlen = buf->m->m_pkthdr.len; frag = ((buf->m->m_flags & (M_FRAG | M_LASTFRAG)) == M_FRAG); if (__predict_false(totlen < sizeof(struct ieee80211_frame_min))) { error = EINVAL; goto end; } if (hdrlen & 3) { /* First segment length must be a multiple of 4. */ pad = 4 - (hdrlen & 3); } else pad = 0; ring = &sc->txq[buf->ac]; cur = (ring->cur + ring->pending) % WPI_TX_RING_COUNT; desc = &ring->desc[cur]; data = &ring->data[cur]; /* Prepare TX firmware command. */ cmd = &ring->cmd[cur]; cmd->code = buf->code; cmd->flags = 0; cmd->qid = ring->qid; cmd->idx = cur; memcpy(cmd->data, buf->data, buf->size); /* Save and trim IEEE802.11 header. */ memcpy((uint8_t *)(cmd->data + buf->size), wh, hdrlen); m_adj(buf->m, hdrlen); error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, buf->m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, "%s: can't map mbuf (error %d)\n", __func__, error); goto end; } if (error != 0) { /* Too many DMA segments, linearize mbuf. */ m1 = m_collapse(buf->m, M_NOWAIT, WPI_MAX_SCATTER - 1); if (m1 == NULL) { device_printf(sc->sc_dev, "%s: could not defrag mbuf\n", __func__); error = ENOBUFS; goto end; } buf->m = m1; error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, buf->m, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(error != 0)) { /* XXX fix this (applicable to the iwn(4) too) */ /* * NB: Do not return error; * original mbuf does not exist anymore. */ device_printf(sc->sc_dev, "%s: can't map mbuf (error %d)\n", __func__, error); if (ring->qid < WPI_CMD_QUEUE_NUM) { if_inc_counter(buf->ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); if (!frag) ieee80211_free_node(buf->ni); } m_freem(buf->m); error = 0; goto end; } } KASSERT(nsegs < WPI_MAX_SCATTER, ("too many DMA segments, nsegs (%d) should be less than %d", nsegs, WPI_MAX_SCATTER)); data->m = buf->m; data->ni = buf->ni; DPRINTF(sc, WPI_DEBUG_XMIT, "%s: qid %d idx %d len %d nsegs %d\n", __func__, ring->qid, cur, totlen, nsegs); /* Fill TX descriptor. */ desc->nsegs = WPI_PAD32(totlen + pad) << 4 | (1 + nsegs); /* First DMA segment is used by the TX command. */ desc->segs[0].addr = htole32(data->cmd_paddr); desc->segs[0].len = htole32(4 + buf->size + hdrlen + pad); /* Other DMA segments are for data payload. */ seg = &segs[0]; for (i = 1; i <= nsegs; i++) { desc->segs[i].addr = htole32(seg->ds_addr); desc->segs[i].len = htole32(seg->ds_len); seg++; } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->data_dmat, ring->cmd_dma.map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); ring->pending += 1; if (!frag) { if (ring->qid < WPI_CMD_QUEUE_NUM) { WPI_TXQ_STATE_LOCK(sc); ring->queued += ring->pending; callout_reset(&sc->tx_timeout, 5*hz, wpi_tx_timeout, sc); WPI_TXQ_STATE_UNLOCK(sc); } /* Kick TX ring. */ ring->cur = (ring->cur + ring->pending) % WPI_TX_RING_COUNT; ring->pending = 0; sc->sc_update_tx_ring(sc, ring); } else ieee80211_node_incref(data->ni); end: DPRINTF(sc, WPI_DEBUG_TRACE, error ? TRACE_STR_END_ERR : TRACE_STR_END, __func__); WPI_TXQ_UNLOCK(sc); return (error); } /* * Construct the data packet for a transmit buffer. */ static int wpi_tx_data(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni) { const struct ieee80211_txparam *tp; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct wpi_node *wn = WPI_NODE(ni); struct ieee80211_channel *chan; struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct wpi_buf tx_data; struct wpi_cmd_data *tx = (struct wpi_cmd_data *)&tx_data.data; uint32_t flags; uint16_t ac, qos; uint8_t tid, type, rate; int swcrypt, ismcast, totlen; wh = mtod(m, struct ieee80211_frame *); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); swcrypt = 1; /* Select EDCA Access Category and TX ring for this frame. */ if (IEEE80211_QOS_HAS_SEQ(wh)) { qos = ((const struct ieee80211_qosframe *)wh)->i_qos[0]; tid = qos & IEEE80211_QOS_TID; } else { qos = 0; tid = 0; } ac = M_WME_GETAC(m); chan = (ni->ni_chan != IEEE80211_CHAN_ANYC) ? ni->ni_chan : ic->ic_curchan; tp = &vap->iv_txparms[ieee80211_chan2mode(chan)]; /* Choose a TX rate index. */ if (type == IEEE80211_FC0_TYPE_MGT) rate = tp->mgmtrate; else if (ismcast) rate = tp->mcastrate; else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) rate = tp->ucastrate; else if (m->m_flags & M_EAPOL) rate = tp->mgmtrate; else { /* XXX pass pktlen */ (void) ieee80211_ratectl_rate(ni, NULL, 0); rate = ni->ni_txrate; } /* Encrypt the frame if need be. */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { /* Retrieve key for TX. */ k = ieee80211_crypto_encap(ni, m); if (k == NULL) return (ENOBUFS); swcrypt = k->wk_flags & IEEE80211_KEY_SWCRYPT; /* 802.11 header may have moved. */ wh = mtod(m, struct ieee80211_frame *); } totlen = m->m_pkthdr.len; if (ieee80211_radiotap_active_vap(vap)) { struct wpi_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_rate = rate; if (k != NULL) tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP; if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) tap->wt_flags |= IEEE80211_RADIOTAP_F_FRAG; ieee80211_radiotap_tx(vap, m); } flags = 0; if (!ismcast) { /* Unicast frame, check if an ACK is expected. */ if (!qos || (qos & IEEE80211_QOS_ACKPOLICY) != IEEE80211_QOS_ACKPOLICY_NOACK) flags |= WPI_TX_NEED_ACK; } if (!IEEE80211_QOS_HAS_SEQ(wh)) flags |= WPI_TX_AUTO_SEQ; if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) flags |= WPI_TX_MORE_FRAG; /* Check if frame must be protected using RTS/CTS or CTS-to-self. */ if (!ismcast) { /* NB: Group frames are sent using CCK in 802.11b/g. */ if (totlen + IEEE80211_CRC_LEN > vap->iv_rtsthreshold) { flags |= WPI_TX_NEED_RTS; } else if ((ic->ic_flags & IEEE80211_F_USEPROT) && WPI_RATE_IS_OFDM(rate)) { if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) flags |= WPI_TX_NEED_CTS; else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) flags |= WPI_TX_NEED_RTS; } if (flags & (WPI_TX_NEED_RTS | WPI_TX_NEED_CTS)) flags |= WPI_TX_FULL_TXOP; } memset(tx, 0, sizeof (struct wpi_cmd_data)); if (type == IEEE80211_FC0_TYPE_MGT) { uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; /* Tell HW to set timestamp in probe responses. */ if (subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP) flags |= WPI_TX_INSERT_TSTAMP; if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ || subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ) tx->timeout = htole16(3); else tx->timeout = htole16(2); } if (ismcast || type != IEEE80211_FC0_TYPE_DATA) tx->id = WPI_ID_BROADCAST; else { if (wn->id == WPI_ID_UNDEFINED) { device_printf(sc->sc_dev, "%s: undefined node id\n", __func__); return (EINVAL); } tx->id = wn->id; } if (!swcrypt) { switch (k->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_AES_CCM: tx->security = WPI_CIPHER_CCMP; break; default: break; } memcpy(tx->key, k->wk_key, k->wk_keylen); } if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) { struct mbuf *next = m->m_nextpkt; tx->lnext = htole16(next->m_pkthdr.len); tx->fnext = htole32(tx->security | (flags & WPI_TX_NEED_ACK) | WPI_NEXT_STA_ID(tx->id)); } tx->len = htole16(totlen); tx->flags = htole32(flags); tx->plcp = rate2plcp(rate); tx->tid = tid; tx->lifetime = htole32(WPI_LIFETIME_INFINITE); tx->ofdm_mask = 0xff; tx->cck_mask = 0x0f; tx->rts_ntries = 7; tx->data_ntries = tp->maxretry; tx_data.ni = ni; tx_data.m = m; tx_data.size = sizeof(struct wpi_cmd_data); tx_data.code = WPI_CMD_TX_DATA; tx_data.ac = ac; return wpi_cmd2(sc, &tx_data); } static int wpi_tx_data_raw(struct wpi_softc *sc, struct mbuf *m, struct ieee80211_node *ni, const struct ieee80211_bpf_params *params) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_key *k = NULL; struct ieee80211_frame *wh; struct wpi_buf tx_data; struct wpi_cmd_data *tx = (struct wpi_cmd_data *)&tx_data.data; uint32_t flags; uint8_t ac, type, rate; int swcrypt, totlen; wh = mtod(m, struct ieee80211_frame *); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; swcrypt = 1; ac = params->ibp_pri & 3; /* Choose a TX rate index. */ rate = params->ibp_rate0; flags = 0; if (!IEEE80211_QOS_HAS_SEQ(wh)) flags |= WPI_TX_AUTO_SEQ; if ((params->ibp_flags & IEEE80211_BPF_NOACK) == 0) flags |= WPI_TX_NEED_ACK; if (params->ibp_flags & IEEE80211_BPF_RTS) flags |= WPI_TX_NEED_RTS; if (params->ibp_flags & IEEE80211_BPF_CTS) flags |= WPI_TX_NEED_CTS; if (flags & (WPI_TX_NEED_RTS | WPI_TX_NEED_CTS)) flags |= WPI_TX_FULL_TXOP; /* Encrypt the frame if need be. */ if (params->ibp_flags & IEEE80211_BPF_CRYPTO) { /* Retrieve key for TX. */ k = ieee80211_crypto_encap(ni, m); if (k == NULL) return (ENOBUFS); swcrypt = k->wk_flags & IEEE80211_KEY_SWCRYPT; /* 802.11 header may have moved. */ wh = mtod(m, struct ieee80211_frame *); } totlen = m->m_pkthdr.len; if (ieee80211_radiotap_active_vap(vap)) { struct wpi_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_rate = rate; if (params->ibp_flags & IEEE80211_BPF_CRYPTO) tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP; ieee80211_radiotap_tx(vap, m); } memset(tx, 0, sizeof (struct wpi_cmd_data)); if (type == IEEE80211_FC0_TYPE_MGT) { uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; /* Tell HW to set timestamp in probe responses. */ if (subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP) flags |= WPI_TX_INSERT_TSTAMP; if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ || subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ) tx->timeout = htole16(3); else tx->timeout = htole16(2); } if (!swcrypt) { switch (k->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_AES_CCM: tx->security = WPI_CIPHER_CCMP; break; default: break; } memcpy(tx->key, k->wk_key, k->wk_keylen); } tx->len = htole16(totlen); tx->flags = htole32(flags); tx->plcp = rate2plcp(rate); tx->id = WPI_ID_BROADCAST; tx->lifetime = htole32(WPI_LIFETIME_INFINITE); tx->rts_ntries = params->ibp_try1; tx->data_ntries = params->ibp_try0; tx_data.ni = ni; tx_data.m = m; tx_data.size = sizeof(struct wpi_cmd_data); tx_data.code = WPI_CMD_TX_DATA; tx_data.ac = ac; return wpi_cmd2(sc, &tx_data); } static __inline int wpi_tx_ring_free_space(struct wpi_softc *sc, uint16_t ac) { struct wpi_tx_ring *ring = &sc->txq[ac]; int retval; WPI_TXQ_STATE_LOCK(sc); retval = WPI_TX_RING_HIMARK - ring->queued; WPI_TXQ_STATE_UNLOCK(sc); return retval; } static int wpi_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct wpi_softc *sc = ic->ic_softc; uint16_t ac; int error = 0; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); ac = M_WME_GETAC(m); WPI_TX_LOCK(sc); /* NB: no fragments here */ if (sc->sc_running == 0 || wpi_tx_ring_free_space(sc, ac) < 1) { error = sc->sc_running ? ENOBUFS : ENETDOWN; goto unlock; } if (params == NULL) { /* * Legacy path; interpret frame contents to decide * precisely how to send the frame. */ error = wpi_tx_data(sc, m, ni); } else { /* * Caller supplied explicit parameters to use in * sending the frame. */ error = wpi_tx_data_raw(sc, m, ni, params); } unlock: WPI_TX_UNLOCK(sc); if (error != 0) { m_freem(m); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; } static int wpi_transmit(struct ieee80211com *ic, struct mbuf *m) { struct wpi_softc *sc = ic->ic_softc; struct ieee80211_node *ni; struct mbuf *mnext; uint16_t ac; int error, nmbufs; WPI_TX_LOCK(sc); DPRINTF(sc, WPI_DEBUG_XMIT, "%s: called\n", __func__); /* Check if interface is up & running. */ if (__predict_false(sc->sc_running == 0)) { error = ENXIO; goto unlock; } nmbufs = 1; for (mnext = m->m_nextpkt; mnext != NULL; mnext = mnext->m_nextpkt) nmbufs++; /* Check for available space. */ ac = M_WME_GETAC(m); if (wpi_tx_ring_free_space(sc, ac) < nmbufs) { error = ENOBUFS; goto unlock; } error = 0; ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; do { mnext = m->m_nextpkt; if (wpi_tx_data(sc, m, ni) != 0) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, nmbufs); wpi_free_txfrags(sc, ac); ieee80211_free_mbuf(m); ieee80211_free_node(ni); break; } } while((m = mnext) != NULL); DPRINTF(sc, WPI_DEBUG_XMIT, "%s: done\n", __func__); unlock: WPI_TX_UNLOCK(sc); return (error); } static void wpi_watchdog_rfkill(void *arg) { struct wpi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; DPRINTF(sc, WPI_DEBUG_WATCHDOG, "RFkill Watchdog: tick\n"); /* No need to lock firmware memory. */ if ((wpi_prph_read(sc, WPI_APMG_RFKILL) & 0x1) == 0) { /* Radio kill switch is still off. */ callout_reset(&sc->watchdog_rfkill, hz, wpi_watchdog_rfkill, sc); } else ieee80211_runtask(ic, &sc->sc_radioon_task); } static void wpi_scan_timeout(void *arg) { struct wpi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; ic_printf(ic, "scan timeout\n"); taskqueue_enqueue(sc->sc_tq, &sc->sc_reinittask); } static void wpi_tx_timeout(void *arg) { struct wpi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; ic_printf(ic, "device timeout\n"); taskqueue_enqueue(sc->sc_tq, &sc->sc_reinittask); } static void wpi_parent(struct ieee80211com *ic) { struct wpi_softc *sc = ic->ic_softc; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (ic->ic_nrunning > 0) { if (wpi_init(sc) == 0) { ieee80211_notify_radio(ic, 1); ieee80211_start_all(ic); } else { ieee80211_notify_radio(ic, 0); ieee80211_stop(vap); } } else wpi_stop(sc); } /* * Send a command to the firmware. */ static int wpi_cmd(struct wpi_softc *sc, uint8_t code, const void *buf, uint16_t size, int async) { struct wpi_tx_ring *ring = &sc->txq[WPI_CMD_QUEUE_NUM]; struct wpi_tx_desc *desc; struct wpi_tx_data *data; struct wpi_tx_cmd *cmd; struct mbuf *m; bus_addr_t paddr; uint16_t totlen; int error; WPI_TXQ_LOCK(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); if (__predict_false(sc->sc_running == 0)) { /* wpi_stop() was called */ if (code == WPI_CMD_SCAN) error = ENETDOWN; else error = 0; goto fail; } if (async == 0) WPI_LOCK_ASSERT(sc); DPRINTF(sc, WPI_DEBUG_CMD, "%s: cmd %s size %u async %d\n", __func__, wpi_cmd_str(code), size, async); desc = &ring->desc[ring->cur]; data = &ring->data[ring->cur]; totlen = 4 + size; if (size > sizeof cmd->data) { /* Command is too large to fit in a descriptor. */ if (totlen > MCLBYTES) { error = EINVAL; goto fail; } m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (m == NULL) { error = ENOMEM; goto fail; } cmd = mtod(m, struct wpi_tx_cmd *); error = bus_dmamap_load(ring->data_dmat, data->map, cmd, totlen, wpi_dma_map_addr, &paddr, BUS_DMA_NOWAIT); if (error != 0) { m_freem(m); goto fail; } data->m = m; } else { cmd = &ring->cmd[ring->cur]; paddr = data->cmd_paddr; } cmd->code = code; cmd->flags = 0; cmd->qid = ring->qid; cmd->idx = ring->cur; memcpy(cmd->data, buf, size); desc->nsegs = 1 + (WPI_PAD32(size) << 4); desc->segs[0].addr = htole32(paddr); desc->segs[0].len = htole32(totlen); if (size > sizeof cmd->data) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREWRITE); } else { bus_dmamap_sync(ring->data_dmat, ring->cmd_dma.map, BUS_DMASYNC_PREWRITE); } bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); /* Kick command ring. */ ring->cur = (ring->cur + 1) % WPI_TX_RING_COUNT; sc->sc_update_tx_ring(sc, ring); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); WPI_TXQ_UNLOCK(sc); return async ? 0 : mtx_sleep(cmd, &sc->sc_mtx, PCATCH, "wpicmd", hz); fail: DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); WPI_TXQ_UNLOCK(sc); return error; } /* * Configure HW multi-rate retries. */ static int wpi_mrr_setup(struct wpi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct wpi_mrr_setup mrr; uint8_t i; int error; /* CCK rates (not used with 802.11a). */ for (i = WPI_RIDX_CCK1; i <= WPI_RIDX_CCK11; i++) { mrr.rates[i].flags = 0; mrr.rates[i].plcp = wpi_ridx_to_plcp[i]; /* Fallback to the immediate lower CCK rate (if any.) */ mrr.rates[i].next = (i == WPI_RIDX_CCK1) ? WPI_RIDX_CCK1 : i - 1; /* Try twice at this rate before falling back to "next". */ mrr.rates[i].ntries = WPI_NTRIES_DEFAULT; } /* OFDM rates (not used with 802.11b). */ for (i = WPI_RIDX_OFDM6; i <= WPI_RIDX_OFDM54; i++) { mrr.rates[i].flags = 0; mrr.rates[i].plcp = wpi_ridx_to_plcp[i]; /* Fallback to the immediate lower rate (if any.) */ /* We allow fallback from OFDM/6 to CCK/2 in 11b/g mode. */ mrr.rates[i].next = (i == WPI_RIDX_OFDM6) ? ((ic->ic_curmode == IEEE80211_MODE_11A) ? WPI_RIDX_OFDM6 : WPI_RIDX_CCK2) : i - 1; /* Try twice at this rate before falling back to "next". */ mrr.rates[i].ntries = WPI_NTRIES_DEFAULT; } /* Setup MRR for control frames. */ mrr.which = htole32(WPI_MRR_CTL); error = wpi_cmd(sc, WPI_CMD_MRR_SETUP, &mrr, sizeof mrr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not setup MRR for control frames\n"); return error; } /* Setup MRR for data frames. */ mrr.which = htole32(WPI_MRR_DATA); error = wpi_cmd(sc, WPI_CMD_MRR_SETUP, &mrr, sizeof mrr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not setup MRR for data frames\n"); return error; } return 0; } static int wpi_add_node(struct wpi_softc *sc, struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct wpi_vap *wvp = WPI_VAP(ni->ni_vap); struct wpi_node *wn = WPI_NODE(ni); struct wpi_node_info node; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (wn->id == WPI_ID_UNDEFINED) return EINVAL; memset(&node, 0, sizeof node); IEEE80211_ADDR_COPY(node.macaddr, ni->ni_macaddr); node.id = wn->id; node.plcp = (ic->ic_curmode == IEEE80211_MODE_11A) ? wpi_ridx_to_plcp[WPI_RIDX_OFDM6] : wpi_ridx_to_plcp[WPI_RIDX_CCK1]; node.action = htole32(WPI_ACTION_SET_RATE); node.antenna = WPI_ANTENNA_BOTH; DPRINTF(sc, WPI_DEBUG_NODE, "%s: adding node %d (%s)\n", __func__, wn->id, ether_sprintf(ni->ni_macaddr)); error = wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, 1); if (error != 0) { device_printf(sc->sc_dev, "%s: wpi_cmd() call failed with error code %d\n", __func__, error); return error; } if (wvp->wv_gtk != 0) { error = wpi_set_global_keys(ni); if (error != 0) { device_printf(sc->sc_dev, "%s: error while setting global keys\n", __func__); return ENXIO; } } return 0; } /* * Broadcast node is used to send group-addressed and management frames. */ static int wpi_add_broadcast_node(struct wpi_softc *sc, int async) { struct ieee80211com *ic = &sc->sc_ic; struct wpi_node_info node; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); memset(&node, 0, sizeof node); IEEE80211_ADDR_COPY(node.macaddr, ieee80211broadcastaddr); node.id = WPI_ID_BROADCAST; node.plcp = (ic->ic_curmode == IEEE80211_MODE_11A) ? wpi_ridx_to_plcp[WPI_RIDX_OFDM6] : wpi_ridx_to_plcp[WPI_RIDX_CCK1]; node.action = htole32(WPI_ACTION_SET_RATE); node.antenna = WPI_ANTENNA_BOTH; DPRINTF(sc, WPI_DEBUG_NODE, "%s: adding broadcast node\n", __func__); return wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, async); } static int wpi_add_sta_node(struct wpi_softc *sc, struct ieee80211_node *ni) { struct wpi_node *wn = WPI_NODE(ni); int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); wn->id = wpi_add_node_entry_sta(sc); if ((error = wpi_add_node(sc, ni)) != 0) { wpi_del_node_entry(sc, wn->id); wn->id = WPI_ID_UNDEFINED; return error; } return 0; } static int wpi_add_ibss_node(struct wpi_softc *sc, struct ieee80211_node *ni) { struct wpi_node *wn = WPI_NODE(ni); int error; KASSERT(wn->id == WPI_ID_UNDEFINED, ("the node %d was added before", wn->id)); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if ((wn->id = wpi_add_node_entry_adhoc(sc)) == WPI_ID_UNDEFINED) { device_printf(sc->sc_dev, "%s: h/w table is full\n", __func__); return ENOMEM; } if ((error = wpi_add_node(sc, ni)) != 0) { wpi_del_node_entry(sc, wn->id); wn->id = WPI_ID_UNDEFINED; return error; } return 0; } static void wpi_del_node(struct wpi_softc *sc, struct ieee80211_node *ni) { struct wpi_node *wn = WPI_NODE(ni); struct wpi_cmd_del_node node; int error; KASSERT(wn->id != WPI_ID_UNDEFINED, ("undefined node id passed")); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); memset(&node, 0, sizeof node); IEEE80211_ADDR_COPY(node.macaddr, ni->ni_macaddr); node.count = 1; DPRINTF(sc, WPI_DEBUG_NODE, "%s: deleting node %d (%s)\n", __func__, wn->id, ether_sprintf(ni->ni_macaddr)); error = wpi_cmd(sc, WPI_CMD_DEL_NODE, &node, sizeof node, 1); if (error != 0) { device_printf(sc->sc_dev, "%s: could not delete node %u, error %d\n", __func__, wn->id, error); } } static int wpi_updateedca(struct ieee80211com *ic) { #define WPI_EXP2(x) ((1 << (x)) - 1) /* CWmin = 2^ECWmin - 1 */ struct wpi_softc *sc = ic->ic_softc; struct wpi_edca_params cmd; int aci, error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); memset(&cmd, 0, sizeof cmd); cmd.flags = htole32(WPI_EDCA_UPDATE); for (aci = 0; aci < WME_NUM_AC; aci++) { const struct wmeParams *ac = &ic->ic_wme.wme_chanParams.cap_wmeParams[aci]; cmd.ac[aci].aifsn = ac->wmep_aifsn; cmd.ac[aci].cwmin = htole16(WPI_EXP2(ac->wmep_logcwmin)); cmd.ac[aci].cwmax = htole16(WPI_EXP2(ac->wmep_logcwmax)); cmd.ac[aci].txoplimit = htole16(IEEE80211_TXOP_TO_US(ac->wmep_txopLimit)); DPRINTF(sc, WPI_DEBUG_EDCA, "setting WME for queue %d aifsn=%d cwmin=%d cwmax=%d " "txoplimit=%d\n", aci, cmd.ac[aci].aifsn, cmd.ac[aci].cwmin, cmd.ac[aci].cwmax, cmd.ac[aci].txoplimit); } error = wpi_cmd(sc, WPI_CMD_EDCA_PARAMS, &cmd, sizeof cmd, 1); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return error; #undef WPI_EXP2 } static void wpi_set_promisc(struct wpi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); uint32_t promisc_filter; promisc_filter = WPI_FILTER_CTL; if (vap != NULL && vap->iv_opmode != IEEE80211_M_HOSTAP) promisc_filter |= WPI_FILTER_PROMISC; if (ic->ic_promisc > 0) sc->rxon.filter |= htole32(promisc_filter); else sc->rxon.filter &= ~htole32(promisc_filter); } static void wpi_update_promisc(struct ieee80211com *ic) { struct wpi_softc *sc = ic->ic_softc; WPI_RXON_LOCK(sc); wpi_set_promisc(sc); if (wpi_send_rxon(sc, 1, 1) != 0) { device_printf(sc->sc_dev, "%s: could not send RXON\n", __func__); } WPI_RXON_UNLOCK(sc); } static void wpi_update_mcast(struct ieee80211com *ic) { /* Ignore */ } static void wpi_set_led(struct wpi_softc *sc, uint8_t which, uint8_t off, uint8_t on) { struct wpi_cmd_led led; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); led.which = which; led.unit = htole32(100000); /* on/off in unit of 100ms */ led.off = off; led.on = on; (void)wpi_cmd(sc, WPI_CMD_SET_LED, &led, sizeof led, 1); } static int wpi_set_timing(struct wpi_softc *sc, struct ieee80211_node *ni) { struct wpi_cmd_timing cmd; uint64_t val, mod; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); memset(&cmd, 0, sizeof cmd); memcpy(&cmd.tstamp, ni->ni_tstamp.data, sizeof (uint64_t)); cmd.bintval = htole16(ni->ni_intval); cmd.lintval = htole16(10); /* Compute remaining time until next beacon. */ val = (uint64_t)ni->ni_intval * IEEE80211_DUR_TU; mod = le64toh(cmd.tstamp) % val; cmd.binitval = htole32((uint32_t)(val - mod)); DPRINTF(sc, WPI_DEBUG_RESET, "timing bintval=%u tstamp=%ju, init=%u\n", ni->ni_intval, le64toh(cmd.tstamp), (uint32_t)(val - mod)); return wpi_cmd(sc, WPI_CMD_TIMING, &cmd, sizeof cmd, 1); } /* * This function is called periodically (every 60 seconds) to adjust output * power to temperature changes. */ static void wpi_power_calibration(struct wpi_softc *sc) { int temp; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); /* Update sensor data. */ temp = (int)WPI_READ(sc, WPI_UCODE_GP2); DPRINTF(sc, WPI_DEBUG_TEMP, "Temp in calibration is: %d\n", temp); /* Sanity-check read value. */ if (temp < -260 || temp > 25) { /* This can't be correct, ignore. */ DPRINTF(sc, WPI_DEBUG_TEMP, "out-of-range temperature reported: %d\n", temp); return; } DPRINTF(sc, WPI_DEBUG_TEMP, "temperature %d->%d\n", sc->temp, temp); /* Adjust Tx power if need be. */ if (abs(temp - sc->temp) <= 6) return; sc->temp = temp; if (wpi_set_txpower(sc, 1) != 0) { /* just warn, too bad for the automatic calibration... */ device_printf(sc->sc_dev,"could not adjust Tx power\n"); } } /* * Set TX power for current channel. */ static int wpi_set_txpower(struct wpi_softc *sc, int async) { struct wpi_power_group *group; struct wpi_cmd_txpower cmd; uint8_t chan; int idx, is_chan_5ghz, i; /* Retrieve current channel from last RXON. */ chan = sc->rxon.chan; is_chan_5ghz = (sc->rxon.flags & htole32(WPI_RXON_24GHZ)) == 0; /* Find the TX power group to which this channel belongs. */ if (is_chan_5ghz) { for (group = &sc->groups[1]; group < &sc->groups[4]; group++) if (chan <= group->chan) break; } else group = &sc->groups[0]; memset(&cmd, 0, sizeof cmd); cmd.band = is_chan_5ghz ? WPI_BAND_5GHZ : WPI_BAND_2GHZ; cmd.chan = htole16(chan); /* Set TX power for all OFDM and CCK rates. */ for (i = 0; i <= WPI_RIDX_MAX ; i++) { /* Retrieve TX power for this channel/rate. */ idx = wpi_get_power_index(sc, group, chan, is_chan_5ghz, i); cmd.rates[i].plcp = wpi_ridx_to_plcp[i]; if (is_chan_5ghz) { cmd.rates[i].rf_gain = wpi_rf_gain_5ghz[idx]; cmd.rates[i].dsp_gain = wpi_dsp_gain_5ghz[idx]; } else { cmd.rates[i].rf_gain = wpi_rf_gain_2ghz[idx]; cmd.rates[i].dsp_gain = wpi_dsp_gain_2ghz[idx]; } DPRINTF(sc, WPI_DEBUG_TEMP, "chan %d/ridx %d: power index %d\n", chan, i, idx); } return wpi_cmd(sc, WPI_CMD_TXPOWER, &cmd, sizeof cmd, async); } /* * Determine Tx power index for a given channel/rate combination. * This takes into account the regulatory information from EEPROM and the * current temperature. */ static int wpi_get_power_index(struct wpi_softc *sc, struct wpi_power_group *group, uint8_t chan, int is_chan_5ghz, int ridx) { /* Fixed-point arithmetic division using a n-bit fractional part. */ #define fdivround(a, b, n) \ ((((1 << n) * (a)) / (b) + (1 << n) / 2) / (1 << n)) /* Linear interpolation. */ #define interpolate(x, x1, y1, x2, y2, n) \ ((y1) + fdivround(((x) - (x1)) * ((y2) - (y1)), (x2) - (x1), n)) struct wpi_power_sample *sample; int pwr, idx; /* Default TX power is group maximum TX power minus 3dB. */ pwr = group->maxpwr / 2; /* Decrease TX power for highest OFDM rates to reduce distortion. */ switch (ridx) { case WPI_RIDX_OFDM36: pwr -= is_chan_5ghz ? 5 : 0; break; case WPI_RIDX_OFDM48: pwr -= is_chan_5ghz ? 10 : 7; break; case WPI_RIDX_OFDM54: pwr -= is_chan_5ghz ? 12 : 9; break; } /* Never exceed the channel maximum allowed TX power. */ pwr = min(pwr, sc->maxpwr[chan]); /* Retrieve TX power index into gain tables from samples. */ for (sample = group->samples; sample < &group->samples[3]; sample++) if (pwr > sample[1].power) break; /* Fixed-point linear interpolation using a 19-bit fractional part. */ idx = interpolate(pwr, sample[0].power, sample[0].index, sample[1].power, sample[1].index, 19); /*- * Adjust power index based on current temperature: * - if cooler than factory-calibrated: decrease output power * - if warmer than factory-calibrated: increase output power */ idx -= (sc->temp - group->temp) * 11 / 100; /* Decrease TX power for CCK rates (-5dB). */ if (ridx >= WPI_RIDX_CCK1) idx += 10; /* Make sure idx stays in a valid range. */ if (idx < 0) return 0; if (idx > WPI_MAX_PWR_INDEX) return WPI_MAX_PWR_INDEX; return idx; #undef interpolate #undef fdivround } /* * Set STA mode power saving level (between 0 and 5). * Level 0 is CAM (Continuously Aware Mode), 5 is for maximum power saving. */ static int wpi_set_pslevel(struct wpi_softc *sc, uint8_t dtim, int level, int async) { struct wpi_pmgt_cmd cmd; const struct wpi_pmgt *pmgt; uint32_t max, reg; uint8_t skip_dtim; int i; DPRINTF(sc, WPI_DEBUG_PWRSAVE, "%s: dtim=%d, level=%d, async=%d\n", __func__, dtim, level, async); /* Select which PS parameters to use. */ if (dtim <= 10) pmgt = &wpi_pmgt[0][level]; else pmgt = &wpi_pmgt[1][level]; memset(&cmd, 0, sizeof cmd); if (level != 0) /* not CAM */ cmd.flags |= htole16(WPI_PS_ALLOW_SLEEP); /* Retrieve PCIe Active State Power Management (ASPM). */ - reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + 0x10, 1); - if (!(reg & 0x1)) /* L0s Entry disabled. */ + reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, 1); + if (!(reg & PCIEM_LINK_CTL_ASPMC_L0S)) /* L0s Entry disabled. */ cmd.flags |= htole16(WPI_PS_PCI_PMGT); cmd.rxtimeout = htole32(pmgt->rxtimeout * IEEE80211_DUR_TU); cmd.txtimeout = htole32(pmgt->txtimeout * IEEE80211_DUR_TU); if (dtim == 0) { dtim = 1; skip_dtim = 0; } else skip_dtim = pmgt->skip_dtim; if (skip_dtim != 0) { cmd.flags |= htole16(WPI_PS_SLEEP_OVER_DTIM); max = pmgt->intval[4]; if (max == (uint32_t)-1) max = dtim * (skip_dtim + 1); else if (max > dtim) max = (max / dtim) * dtim; } else max = dtim; for (i = 0; i < 5; i++) cmd.intval[i] = htole32(MIN(max, pmgt->intval[i])); return wpi_cmd(sc, WPI_CMD_SET_POWER_MODE, &cmd, sizeof cmd, async); } static int wpi_send_btcoex(struct wpi_softc *sc) { struct wpi_bluetooth cmd; memset(&cmd, 0, sizeof cmd); cmd.flags = WPI_BT_COEX_MODE_4WIRE; cmd.lead_time = WPI_BT_LEAD_TIME_DEF; cmd.max_kill = WPI_BT_MAX_KILL_DEF; DPRINTF(sc, WPI_DEBUG_RESET, "%s: configuring bluetooth coexistence\n", __func__); return wpi_cmd(sc, WPI_CMD_BT_COEX, &cmd, sizeof(cmd), 0); } static int wpi_send_rxon(struct wpi_softc *sc, int assoc, int async) { int error; if (async) WPI_RXON_LOCK_ASSERT(sc); if (assoc && wpi_check_bss_filter(sc) != 0) { struct wpi_assoc rxon_assoc; rxon_assoc.flags = sc->rxon.flags; rxon_assoc.filter = sc->rxon.filter; rxon_assoc.ofdm_mask = sc->rxon.ofdm_mask; rxon_assoc.cck_mask = sc->rxon.cck_mask; rxon_assoc.reserved = 0; error = wpi_cmd(sc, WPI_CMD_RXON_ASSOC, &rxon_assoc, sizeof (struct wpi_assoc), async); if (error != 0) { device_printf(sc->sc_dev, "RXON_ASSOC command failed, error %d\n", error); return error; } } else { if (async) { WPI_NT_LOCK(sc); error = wpi_cmd(sc, WPI_CMD_RXON, &sc->rxon, sizeof (struct wpi_rxon), async); if (error == 0) wpi_clear_node_table(sc); WPI_NT_UNLOCK(sc); } else { error = wpi_cmd(sc, WPI_CMD_RXON, &sc->rxon, sizeof (struct wpi_rxon), async); if (error == 0) wpi_clear_node_table(sc); } if (error != 0) { device_printf(sc->sc_dev, "RXON command failed, error %d\n", error); return error; } /* Add broadcast node. */ error = wpi_add_broadcast_node(sc, async); if (error != 0) { device_printf(sc->sc_dev, "could not add broadcast node, error %d\n", error); return error; } } /* Configuration has changed, set Tx power accordingly. */ if ((error = wpi_set_txpower(sc, async)) != 0) { device_printf(sc->sc_dev, "%s: could not set TX power, error %d\n", __func__, error); return error; } return 0; } /** * Configure the card to listen to a particular channel, this transisions the * card in to being able to receive frames from remote devices. */ static int wpi_config(struct wpi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_channel *c = ic->ic_curchan; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* Set power saving level to CAM during initialization. */ if ((error = wpi_set_pslevel(sc, 0, 0, 0)) != 0) { device_printf(sc->sc_dev, "%s: could not set power saving level\n", __func__); return error; } /* Configure bluetooth coexistence. */ if ((error = wpi_send_btcoex(sc)) != 0) { device_printf(sc->sc_dev, "could not configure bluetooth coexistence\n"); return error; } /* Configure adapter. */ memset(&sc->rxon, 0, sizeof (struct wpi_rxon)); IEEE80211_ADDR_COPY(sc->rxon.myaddr, vap->iv_myaddr); /* Set default channel. */ sc->rxon.chan = ieee80211_chan2ieee(ic, c); sc->rxon.flags = htole32(WPI_RXON_TSF | WPI_RXON_CTS_TO_SELF); if (IEEE80211_IS_CHAN_2GHZ(c)) sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ); sc->rxon.filter = WPI_FILTER_MULTICAST; switch (ic->ic_opmode) { case IEEE80211_M_STA: sc->rxon.mode = WPI_MODE_STA; break; case IEEE80211_M_IBSS: sc->rxon.mode = WPI_MODE_IBSS; sc->rxon.filter |= WPI_FILTER_BEACON; break; case IEEE80211_M_HOSTAP: /* XXX workaround for beaconing */ sc->rxon.mode = WPI_MODE_IBSS; sc->rxon.filter |= WPI_FILTER_ASSOC | WPI_FILTER_PROMISC; break; case IEEE80211_M_AHDEMO: sc->rxon.mode = WPI_MODE_HOSTAP; break; case IEEE80211_M_MONITOR: sc->rxon.mode = WPI_MODE_MONITOR; break; default: device_printf(sc->sc_dev, "unknown opmode %d\n", ic->ic_opmode); return EINVAL; } sc->rxon.filter = htole32(sc->rxon.filter); wpi_set_promisc(sc); sc->rxon.cck_mask = 0x0f; /* not yet negotiated */ sc->rxon.ofdm_mask = 0xff; /* not yet negotiated */ if ((error = wpi_send_rxon(sc, 0, 0)) != 0) { device_printf(sc->sc_dev, "%s: could not send RXON\n", __func__); return error; } /* Setup rate scalling. */ if ((error = wpi_mrr_setup(sc)) != 0) { device_printf(sc->sc_dev, "could not setup MRR, error %d\n", error); return error; } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; } static uint16_t wpi_get_active_dwell_time(struct wpi_softc *sc, struct ieee80211_channel *c, uint8_t n_probes) { /* No channel? Default to 2GHz settings. */ if (c == NULL || IEEE80211_IS_CHAN_2GHZ(c)) { return (WPI_ACTIVE_DWELL_TIME_2GHZ + WPI_ACTIVE_DWELL_FACTOR_2GHZ * (n_probes + 1)); } /* 5GHz dwell time. */ return (WPI_ACTIVE_DWELL_TIME_5GHZ + WPI_ACTIVE_DWELL_FACTOR_5GHZ * (n_probes + 1)); } /* * Limit the total dwell time. * * Returns the dwell time in milliseconds. */ static uint16_t wpi_limit_dwell(struct wpi_softc *sc, uint16_t dwell_time) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); uint16_t bintval = 0; /* bintval is in TU (1.024mS) */ if (vap != NULL) bintval = vap->iv_bss->ni_intval; /* * If it's non-zero, we should calculate the minimum of * it and the DWELL_BASE. * * XXX Yes, the math should take into account that bintval * is 1.024mS, not 1mS.. */ if (bintval > 0) { DPRINTF(sc, WPI_DEBUG_SCAN, "%s: bintval=%d\n", __func__, bintval); return (MIN(dwell_time, bintval - WPI_CHANNEL_TUNE_TIME * 2)); } /* No association context? Default. */ return dwell_time; } static uint16_t wpi_get_passive_dwell_time(struct wpi_softc *sc, struct ieee80211_channel *c) { uint16_t passive; if (c == NULL || IEEE80211_IS_CHAN_2GHZ(c)) passive = WPI_PASSIVE_DWELL_BASE + WPI_PASSIVE_DWELL_TIME_2GHZ; else passive = WPI_PASSIVE_DWELL_BASE + WPI_PASSIVE_DWELL_TIME_5GHZ; /* Clamp to the beacon interval if we're associated. */ return (wpi_limit_dwell(sc, passive)); } static uint32_t wpi_get_scan_pause_time(uint32_t time, uint16_t bintval) { uint32_t mod = (time % bintval) * IEEE80211_DUR_TU; uint32_t nbeacons = time / bintval; if (mod > WPI_PAUSE_MAX_TIME) mod = WPI_PAUSE_MAX_TIME; return WPI_PAUSE_SCAN(nbeacons, mod); } /* * Send a scan request to the firmware. */ static int wpi_scan(struct wpi_softc *sc, struct ieee80211_channel *c) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_scan_state *ss = ic->ic_scan; struct ieee80211vap *vap = ss->ss_vap; struct wpi_scan_hdr *hdr; struct wpi_cmd_data *tx; struct wpi_scan_essid *essids; struct wpi_scan_chan *chan; struct ieee80211_frame *wh; struct ieee80211_rateset *rs; uint16_t bintval, buflen, dwell_active, dwell_passive; uint8_t *buf, *frm, i, nssid; int bgscan, error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* * We are absolutely not allowed to send a scan command when another * scan command is pending. */ if (callout_pending(&sc->scan_timeout)) { device_printf(sc->sc_dev, "%s: called whilst scanning!\n", __func__); error = EAGAIN; goto fail; } bgscan = wpi_check_bss_filter(sc); bintval = vap->iv_bss->ni_intval; if (bgscan != 0 && bintval < WPI_QUIET_TIME_DEFAULT + WPI_CHANNEL_TUNE_TIME * 2) { error = EOPNOTSUPP; goto fail; } buf = malloc(WPI_SCAN_MAXSZ, M_DEVBUF, M_NOWAIT | M_ZERO); if (buf == NULL) { device_printf(sc->sc_dev, "%s: could not allocate buffer for scan command\n", __func__); error = ENOMEM; goto fail; } hdr = (struct wpi_scan_hdr *)buf; /* * Move to the next channel if no packets are received within 10 msecs * after sending the probe request. */ hdr->quiet_time = htole16(WPI_QUIET_TIME_DEFAULT); hdr->quiet_threshold = htole16(1); if (bgscan != 0) { /* * Max needs to be greater than active and passive and quiet! * It's also in microseconds! */ hdr->max_svc = htole32(250 * IEEE80211_DUR_TU); hdr->pause_svc = htole32(wpi_get_scan_pause_time(100, bintval)); } hdr->filter = htole32(WPI_FILTER_MULTICAST | WPI_FILTER_BEACON); tx = (struct wpi_cmd_data *)(hdr + 1); tx->flags = htole32(WPI_TX_AUTO_SEQ); tx->id = WPI_ID_BROADCAST; tx->lifetime = htole32(WPI_LIFETIME_INFINITE); if (IEEE80211_IS_CHAN_5GHZ(c)) { /* Send probe requests at 6Mbps. */ tx->plcp = wpi_ridx_to_plcp[WPI_RIDX_OFDM6]; rs = &ic->ic_sup_rates[IEEE80211_MODE_11A]; } else { hdr->flags = htole32(WPI_RXON_24GHZ | WPI_RXON_AUTO); /* Send probe requests at 1Mbps. */ tx->plcp = wpi_ridx_to_plcp[WPI_RIDX_CCK1]; rs = &ic->ic_sup_rates[IEEE80211_MODE_11G]; } essids = (struct wpi_scan_essid *)(tx + 1); nssid = MIN(ss->ss_nssid, WPI_SCAN_MAX_ESSIDS); for (i = 0; i < nssid; i++) { essids[i].id = IEEE80211_ELEMID_SSID; essids[i].len = MIN(ss->ss_ssid[i].len, IEEE80211_NWID_LEN); memcpy(essids[i].data, ss->ss_ssid[i].ssid, essids[i].len); #ifdef WPI_DEBUG if (sc->sc_debug & WPI_DEBUG_SCAN) { printf("Scanning Essid: "); ieee80211_print_essid(essids[i].data, essids[i].len); printf("\n"); } #endif } /* * Build a probe request frame. Most of the following code is a * copy & paste of what is done in net80211. */ wh = (struct ieee80211_frame *)(essids + WPI_SCAN_MAX_ESSIDS); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_PROBE_REQ; wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; IEEE80211_ADDR_COPY(wh->i_addr1, ieee80211broadcastaddr); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, ieee80211broadcastaddr); frm = (uint8_t *)(wh + 1); frm = ieee80211_add_ssid(frm, NULL, 0); frm = ieee80211_add_rates(frm, rs); if (rs->rs_nrates > IEEE80211_RATE_SIZE) frm = ieee80211_add_xrates(frm, rs); /* Set length of probe request. */ tx->len = htole16(frm - (uint8_t *)wh); /* * Construct information about the channel that we * want to scan. The firmware expects this to be directly * after the scan probe request */ chan = (struct wpi_scan_chan *)frm; chan->chan = ieee80211_chan2ieee(ic, c); chan->flags = 0; if (nssid) { hdr->crc_threshold = WPI_SCAN_CRC_TH_DEFAULT; chan->flags |= WPI_CHAN_NPBREQS(nssid); } else hdr->crc_threshold = WPI_SCAN_CRC_TH_NEVER; if (!IEEE80211_IS_CHAN_PASSIVE(c)) chan->flags |= WPI_CHAN_ACTIVE; /* * Calculate the active/passive dwell times. */ dwell_active = wpi_get_active_dwell_time(sc, c, nssid); dwell_passive = wpi_get_passive_dwell_time(sc, c); /* Make sure they're valid. */ if (dwell_active > dwell_passive) dwell_active = dwell_passive; chan->active = htole16(dwell_active); chan->passive = htole16(dwell_passive); chan->dsp_gain = 0x6e; /* Default level */ if (IEEE80211_IS_CHAN_5GHZ(c)) chan->rf_gain = 0x3b; else chan->rf_gain = 0x28; DPRINTF(sc, WPI_DEBUG_SCAN, "Scanning %u Passive: %d\n", chan->chan, IEEE80211_IS_CHAN_PASSIVE(c)); hdr->nchan++; if (hdr->nchan == 1 && sc->rxon.chan == chan->chan) { /* XXX Force probe request transmission. */ memcpy(chan + 1, chan, sizeof (struct wpi_scan_chan)); chan++; /* Reduce unnecessary delay. */ chan->flags = 0; chan->passive = chan->active = hdr->quiet_time; hdr->nchan++; } chan++; buflen = (uint8_t *)chan - buf; hdr->len = htole16(buflen); DPRINTF(sc, WPI_DEBUG_CMD, "sending scan command nchan=%d\n", hdr->nchan); error = wpi_cmd(sc, WPI_CMD_SCAN, buf, buflen, 1); free(buf, M_DEVBUF); if (error != 0) goto fail; callout_reset(&sc->scan_timeout, 5*hz, wpi_scan_timeout, sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; fail: DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); return error; } static int wpi_auth(struct wpi_softc *sc, struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni = vap->iv_bss; struct ieee80211_channel *c = ni->ni_chan; int error; WPI_RXON_LOCK(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* Update adapter configuration. */ sc->rxon.associd = 0; sc->rxon.filter &= ~htole32(WPI_FILTER_BSS); IEEE80211_ADDR_COPY(sc->rxon.bssid, ni->ni_bssid); sc->rxon.chan = ieee80211_chan2ieee(ic, c); sc->rxon.flags = htole32(WPI_RXON_TSF | WPI_RXON_CTS_TO_SELF); if (IEEE80211_IS_CHAN_2GHZ(c)) sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ); if (ic->ic_flags & IEEE80211_F_SHSLOT) sc->rxon.flags |= htole32(WPI_RXON_SHSLOT); if (ic->ic_flags & IEEE80211_F_SHPREAMBLE) sc->rxon.flags |= htole32(WPI_RXON_SHPREAMBLE); if (IEEE80211_IS_CHAN_A(c)) { sc->rxon.cck_mask = 0; sc->rxon.ofdm_mask = 0x15; } else if (IEEE80211_IS_CHAN_B(c)) { sc->rxon.cck_mask = 0x03; sc->rxon.ofdm_mask = 0; } else { /* Assume 802.11b/g. */ sc->rxon.cck_mask = 0x0f; sc->rxon.ofdm_mask = 0x15; } DPRINTF(sc, WPI_DEBUG_STATE, "rxon chan %d flags %x cck %x ofdm %x\n", sc->rxon.chan, sc->rxon.flags, sc->rxon.cck_mask, sc->rxon.ofdm_mask); if ((error = wpi_send_rxon(sc, 0, 1)) != 0) { device_printf(sc->sc_dev, "%s: could not send RXON\n", __func__); } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); WPI_RXON_UNLOCK(sc); return error; } static int wpi_config_beacon(struct wpi_vap *wvp) { struct ieee80211vap *vap = &wvp->wv_vap; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off; struct wpi_buf *bcn = &wvp->wv_bcbuf; struct wpi_softc *sc = ic->ic_softc; struct wpi_cmd_beacon *cmd = (struct wpi_cmd_beacon *)&bcn->data; struct ieee80211_tim_ie *tie; struct mbuf *m; uint8_t *ptr; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); WPI_VAP_LOCK_ASSERT(wvp); cmd->len = htole16(bcn->m->m_pkthdr.len); cmd->plcp = (ic->ic_curmode == IEEE80211_MODE_11A) ? wpi_ridx_to_plcp[WPI_RIDX_OFDM6] : wpi_ridx_to_plcp[WPI_RIDX_CCK1]; /* XXX seems to be unused */ if (*(bo->bo_tim) == IEEE80211_ELEMID_TIM) { tie = (struct ieee80211_tim_ie *) bo->bo_tim; ptr = mtod(bcn->m, uint8_t *); cmd->tim = htole16(bo->bo_tim - ptr); cmd->timsz = tie->tim_len; } /* Necessary for recursion in ieee80211_beacon_update(). */ m = bcn->m; bcn->m = m_dup(m, M_NOWAIT); if (bcn->m == NULL) { device_printf(sc->sc_dev, "%s: could not copy beacon frame\n", __func__); error = ENOMEM; goto end; } if ((error = wpi_cmd2(sc, bcn)) != 0) { device_printf(sc->sc_dev, "%s: could not update beacon frame, error %d", __func__, error); m_freem(bcn->m); } /* Restore mbuf. */ end: bcn->m = m; return error; } static int wpi_setup_beacon(struct wpi_softc *sc, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct wpi_vap *wvp = WPI_VAP(vap); struct wpi_buf *bcn = &wvp->wv_bcbuf; struct mbuf *m; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (ni->ni_chan == IEEE80211_CHAN_ANYC) return EINVAL; m = ieee80211_beacon_alloc(ni); if (m == NULL) { device_printf(sc->sc_dev, "%s: could not allocate beacon frame\n", __func__); return ENOMEM; } WPI_VAP_LOCK(wvp); if (bcn->m != NULL) m_freem(bcn->m); bcn->m = m; error = wpi_config_beacon(wvp); WPI_VAP_UNLOCK(wvp); return error; } static void wpi_update_beacon(struct ieee80211vap *vap, int item) { struct wpi_softc *sc = vap->iv_ic->ic_softc; struct wpi_vap *wvp = WPI_VAP(vap); struct wpi_buf *bcn = &wvp->wv_bcbuf; struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off; struct ieee80211_node *ni = vap->iv_bss; int mcast = 0; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); WPI_VAP_LOCK(wvp); if (bcn->m == NULL) { bcn->m = ieee80211_beacon_alloc(ni); if (bcn->m == NULL) { device_printf(sc->sc_dev, "%s: could not allocate beacon frame\n", __func__); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); WPI_VAP_UNLOCK(wvp); return; } } WPI_VAP_UNLOCK(wvp); if (item == IEEE80211_BEACON_TIM) mcast = 1; /* TODO */ setbit(bo->bo_flags, item); ieee80211_beacon_update(ni, bcn->m, mcast); WPI_VAP_LOCK(wvp); wpi_config_beacon(wvp); WPI_VAP_UNLOCK(wvp); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); } static void wpi_newassoc(struct ieee80211_node *ni, int isnew) { struct ieee80211vap *vap = ni->ni_vap; struct wpi_softc *sc = ni->ni_ic->ic_softc; struct wpi_node *wn = WPI_NODE(ni); int error; WPI_NT_LOCK(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (vap->iv_opmode != IEEE80211_M_STA && wn->id == WPI_ID_UNDEFINED) { if ((error = wpi_add_ibss_node(sc, ni)) != 0) { device_printf(sc->sc_dev, "%s: could not add IBSS node, error %d\n", __func__, error); } } WPI_NT_UNLOCK(sc); } static int wpi_run(struct wpi_softc *sc, struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni = vap->iv_bss; struct ieee80211_channel *c = ni->ni_chan; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); if (vap->iv_opmode == IEEE80211_M_MONITOR) { /* Link LED blinks while monitoring. */ wpi_set_led(sc, WPI_LED_LINK, 5, 5); return 0; } /* XXX kernel panic workaround */ if (c == IEEE80211_CHAN_ANYC) { device_printf(sc->sc_dev, "%s: incomplete configuration\n", __func__); return EINVAL; } if ((error = wpi_set_timing(sc, ni)) != 0) { device_printf(sc->sc_dev, "%s: could not set timing, error %d\n", __func__, error); return error; } /* Update adapter configuration. */ WPI_RXON_LOCK(sc); IEEE80211_ADDR_COPY(sc->rxon.bssid, ni->ni_bssid); sc->rxon.associd = htole16(IEEE80211_NODE_AID(ni)); sc->rxon.chan = ieee80211_chan2ieee(ic, c); sc->rxon.flags = htole32(WPI_RXON_TSF | WPI_RXON_CTS_TO_SELF); if (IEEE80211_IS_CHAN_2GHZ(c)) sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ); if (ic->ic_flags & IEEE80211_F_SHSLOT) sc->rxon.flags |= htole32(WPI_RXON_SHSLOT); if (ic->ic_flags & IEEE80211_F_SHPREAMBLE) sc->rxon.flags |= htole32(WPI_RXON_SHPREAMBLE); if (IEEE80211_IS_CHAN_A(c)) { sc->rxon.cck_mask = 0; sc->rxon.ofdm_mask = 0x15; } else if (IEEE80211_IS_CHAN_B(c)) { sc->rxon.cck_mask = 0x03; sc->rxon.ofdm_mask = 0; } else { /* Assume 802.11b/g. */ sc->rxon.cck_mask = 0x0f; sc->rxon.ofdm_mask = 0x15; } sc->rxon.filter |= htole32(WPI_FILTER_BSS); DPRINTF(sc, WPI_DEBUG_STATE, "rxon chan %d flags %x\n", sc->rxon.chan, sc->rxon.flags); if ((error = wpi_send_rxon(sc, 0, 1)) != 0) { device_printf(sc->sc_dev, "%s: could not send RXON\n", __func__); return error; } /* Start periodic calibration timer. */ callout_reset(&sc->calib_to, 60*hz, wpi_calib_timeout, sc); WPI_RXON_UNLOCK(sc); if (vap->iv_opmode == IEEE80211_M_IBSS || vap->iv_opmode == IEEE80211_M_HOSTAP) { if ((error = wpi_setup_beacon(sc, ni)) != 0) { device_printf(sc->sc_dev, "%s: could not setup beacon, error %d\n", __func__, error); return error; } } if (vap->iv_opmode == IEEE80211_M_STA) { /* Add BSS node. */ WPI_NT_LOCK(sc); error = wpi_add_sta_node(sc, ni); WPI_NT_UNLOCK(sc); if (error != 0) { device_printf(sc->sc_dev, "%s: could not add BSS node, error %d\n", __func__, error); return error; } } /* Link LED always on while associated. */ wpi_set_led(sc, WPI_LED_LINK, 0, 1); /* Enable power-saving mode if requested by user. */ if ((vap->iv_flags & IEEE80211_F_PMGTON) && vap->iv_opmode != IEEE80211_M_IBSS) (void)wpi_set_pslevel(sc, 0, 3, 1); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); return 0; } static int wpi_load_key(struct ieee80211_node *ni, const struct ieee80211_key *k) { const struct ieee80211_cipher *cip = k->wk_cipher; struct ieee80211vap *vap = ni->ni_vap; struct wpi_softc *sc = ni->ni_ic->ic_softc; struct wpi_node *wn = WPI_NODE(ni); struct wpi_node_info node; uint16_t kflags; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (wpi_check_node_entry(sc, wn->id) == 0) { device_printf(sc->sc_dev, "%s: node does not exist\n", __func__); return 0; } switch (cip->ic_cipher) { case IEEE80211_CIPHER_AES_CCM: kflags = WPI_KFLAG_CCMP; break; default: device_printf(sc->sc_dev, "%s: unknown cipher %d\n", __func__, cip->ic_cipher); return 0; } kflags |= WPI_KFLAG_KID(k->wk_keyix); if (k->wk_flags & IEEE80211_KEY_GROUP) kflags |= WPI_KFLAG_MULTICAST; memset(&node, 0, sizeof node); node.id = wn->id; node.control = WPI_NODE_UPDATE; node.flags = WPI_FLAG_KEY_SET; node.kflags = htole16(kflags); memcpy(node.key, k->wk_key, k->wk_keylen); again: DPRINTF(sc, WPI_DEBUG_KEY, "%s: setting %s key id %d for node %d (%s)\n", __func__, (kflags & WPI_KFLAG_MULTICAST) ? "group" : "ucast", k->wk_keyix, node.id, ether_sprintf(ni->ni_macaddr)); error = wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, 1); if (error != 0) { device_printf(sc->sc_dev, "can't update node info, error %d\n", error); return !error; } if (!(kflags & WPI_KFLAG_MULTICAST) && &vap->iv_nw_keys[0] <= k && k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) { kflags |= WPI_KFLAG_MULTICAST; node.kflags = htole16(kflags); goto again; } return 1; } static void wpi_load_key_cb(void *arg, struct ieee80211_node *ni) { const struct ieee80211_key *k = arg; struct ieee80211vap *vap = ni->ni_vap; struct wpi_softc *sc = ni->ni_ic->ic_softc; struct wpi_node *wn = WPI_NODE(ni); int error; if (vap->iv_bss == ni && wn->id == WPI_ID_UNDEFINED) return; WPI_NT_LOCK(sc); error = wpi_load_key(ni, k); WPI_NT_UNLOCK(sc); if (error == 0) { device_printf(sc->sc_dev, "%s: error while setting key\n", __func__); } } static int wpi_set_global_keys(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_key *wk = &vap->iv_nw_keys[0]; int error = 1; for (; wk < &vap->iv_nw_keys[IEEE80211_WEP_NKID] && error; wk++) if (wk->wk_keyix != IEEE80211_KEYIX_NONE) error = wpi_load_key(ni, wk); return !error; } static int wpi_del_key(struct ieee80211_node *ni, const struct ieee80211_key *k) { struct ieee80211vap *vap = ni->ni_vap; struct wpi_softc *sc = ni->ni_ic->ic_softc; struct wpi_node *wn = WPI_NODE(ni); struct wpi_node_info node; uint16_t kflags; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (wpi_check_node_entry(sc, wn->id) == 0) { DPRINTF(sc, WPI_DEBUG_KEY, "%s: node was removed\n", __func__); return 1; /* Nothing to do. */ } kflags = WPI_KFLAG_KID(k->wk_keyix); if (k->wk_flags & IEEE80211_KEY_GROUP) kflags |= WPI_KFLAG_MULTICAST; memset(&node, 0, sizeof node); node.id = wn->id; node.control = WPI_NODE_UPDATE; node.flags = WPI_FLAG_KEY_SET; node.kflags = htole16(kflags); again: DPRINTF(sc, WPI_DEBUG_KEY, "%s: deleting %s key %d for node %d (%s)\n", __func__, (kflags & WPI_KFLAG_MULTICAST) ? "group" : "ucast", k->wk_keyix, node.id, ether_sprintf(ni->ni_macaddr)); error = wpi_cmd(sc, WPI_CMD_ADD_NODE, &node, sizeof node, 1); if (error != 0) { device_printf(sc->sc_dev, "can't update node info, error %d\n", error); return !error; } if (!(kflags & WPI_KFLAG_MULTICAST) && &vap->iv_nw_keys[0] <= k && k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) { kflags |= WPI_KFLAG_MULTICAST; node.kflags = htole16(kflags); goto again; } return 1; } static void wpi_del_key_cb(void *arg, struct ieee80211_node *ni) { const struct ieee80211_key *k = arg; struct ieee80211vap *vap = ni->ni_vap; struct wpi_softc *sc = ni->ni_ic->ic_softc; struct wpi_node *wn = WPI_NODE(ni); int error; if (vap->iv_bss == ni && wn->id == WPI_ID_UNDEFINED) return; WPI_NT_LOCK(sc); error = wpi_del_key(ni, k); WPI_NT_UNLOCK(sc); if (error == 0) { device_printf(sc->sc_dev, "%s: error while deleting key\n", __func__); } } static int wpi_process_key(struct ieee80211vap *vap, const struct ieee80211_key *k, int set) { struct ieee80211com *ic = vap->iv_ic; struct wpi_softc *sc = ic->ic_softc; struct wpi_vap *wvp = WPI_VAP(vap); struct ieee80211_node *ni; int error, ni_ref = 0; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (k->wk_flags & IEEE80211_KEY_SWCRYPT) { /* Not for us. */ return 1; } if (!(k->wk_flags & IEEE80211_KEY_RECV)) { /* XMIT keys are handled in wpi_tx_data(). */ return 1; } /* Handle group keys. */ if (&vap->iv_nw_keys[0] <= k && k < &vap->iv_nw_keys[IEEE80211_WEP_NKID]) { WPI_NT_LOCK(sc); if (set) wvp->wv_gtk |= WPI_VAP_KEY(k->wk_keyix); else wvp->wv_gtk &= ~WPI_VAP_KEY(k->wk_keyix); WPI_NT_UNLOCK(sc); if (vap->iv_state == IEEE80211_S_RUN) { ieee80211_iterate_nodes(&ic->ic_sta, set ? wpi_load_key_cb : wpi_del_key_cb, __DECONST(void *, k)); } return 1; } switch (vap->iv_opmode) { case IEEE80211_M_STA: ni = vap->iv_bss; break; case IEEE80211_M_IBSS: case IEEE80211_M_AHDEMO: case IEEE80211_M_HOSTAP: ni = ieee80211_find_vap_node(&ic->ic_sta, vap, k->wk_macaddr); if (ni == NULL) return 0; /* should not happen */ ni_ref = 1; break; default: device_printf(sc->sc_dev, "%s: unknown opmode %d\n", __func__, vap->iv_opmode); return 0; } WPI_NT_LOCK(sc); if (set) error = wpi_load_key(ni, k); else error = wpi_del_key(ni, k); WPI_NT_UNLOCK(sc); if (ni_ref) ieee80211_node_decref(ni); return error; } static int wpi_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) { return wpi_process_key(vap, k, 1); } static int wpi_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) { return wpi_process_key(vap, k, 0); } /* * This function is called after the runtime firmware notifies us of its * readiness (called in a process context). */ static int wpi_post_alive(struct wpi_softc *sc) { int ntries, error; /* Check (again) that the radio is not disabled. */ if ((error = wpi_nic_lock(sc)) != 0) return error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); /* NB: Runtime firmware must be up and running. */ if (!(wpi_prph_read(sc, WPI_APMG_RFKILL) & 1)) { device_printf(sc->sc_dev, "RF switch: radio disabled (%s)\n", __func__); wpi_nic_unlock(sc); return EPERM; /* :-) */ } wpi_nic_unlock(sc); /* Wait for thermal sensor to calibrate. */ for (ntries = 0; ntries < 1000; ntries++) { if ((sc->temp = (int)WPI_READ(sc, WPI_UCODE_GP2)) != 0) break; DELAY(10); } if (ntries == 1000) { device_printf(sc->sc_dev, "timeout waiting for thermal sensor calibration\n"); return ETIMEDOUT; } DPRINTF(sc, WPI_DEBUG_TEMP, "temperature %d\n", sc->temp); return 0; } /* * The firmware boot code is small and is intended to be copied directly into * the NIC internal memory (no DMA transfer). */ static int wpi_load_bootcode(struct wpi_softc *sc, const uint8_t *ucode, uint32_t size) { int error, ntries; DPRINTF(sc, WPI_DEBUG_HW, "Loading microcode size 0x%x\n", size); size /= sizeof (uint32_t); if ((error = wpi_nic_lock(sc)) != 0) return error; /* Copy microcode image into NIC memory. */ wpi_prph_write_region_4(sc, WPI_BSM_SRAM_BASE, (const uint32_t *)ucode, size); wpi_prph_write(sc, WPI_BSM_WR_MEM_SRC, 0); wpi_prph_write(sc, WPI_BSM_WR_MEM_DST, WPI_FW_TEXT_BASE); wpi_prph_write(sc, WPI_BSM_WR_DWCOUNT, size); /* Start boot load now. */ wpi_prph_write(sc, WPI_BSM_WR_CTRL, WPI_BSM_WR_CTRL_START); /* Wait for transfer to complete. */ for (ntries = 0; ntries < 1000; ntries++) { uint32_t status = WPI_READ(sc, WPI_FH_TX_STATUS); DPRINTF(sc, WPI_DEBUG_HW, "firmware status=0x%x, val=0x%x, result=0x%x\n", status, WPI_FH_TX_STATUS_IDLE(6), status & WPI_FH_TX_STATUS_IDLE(6)); if (status & WPI_FH_TX_STATUS_IDLE(6)) { DPRINTF(sc, WPI_DEBUG_HW, "Status Match! - ntries = %d\n", ntries); break; } DELAY(10); } if (ntries == 1000) { device_printf(sc->sc_dev, "%s: could not load boot firmware\n", __func__); wpi_nic_unlock(sc); return ETIMEDOUT; } /* Enable boot after power up. */ wpi_prph_write(sc, WPI_BSM_WR_CTRL, WPI_BSM_WR_CTRL_START_EN); wpi_nic_unlock(sc); return 0; } static int wpi_load_firmware(struct wpi_softc *sc) { struct wpi_fw_info *fw = &sc->fw; struct wpi_dma_info *dma = &sc->fw_dma; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); /* Copy initialization sections into pre-allocated DMA-safe memory. */ memcpy(dma->vaddr, fw->init.data, fw->init.datasz); bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); memcpy(dma->vaddr + WPI_FW_DATA_MAXSZ, fw->init.text, fw->init.textsz); bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); /* Tell adapter where to find initialization sections. */ if ((error = wpi_nic_lock(sc)) != 0) return error; wpi_prph_write(sc, WPI_BSM_DRAM_DATA_ADDR, dma->paddr); wpi_prph_write(sc, WPI_BSM_DRAM_DATA_SIZE, fw->init.datasz); wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_ADDR, dma->paddr + WPI_FW_DATA_MAXSZ); wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_SIZE, fw->init.textsz); wpi_nic_unlock(sc); /* Load firmware boot code. */ error = wpi_load_bootcode(sc, fw->boot.text, fw->boot.textsz); if (error != 0) { device_printf(sc->sc_dev, "%s: could not load boot firmware\n", __func__); return error; } /* Now press "execute". */ WPI_WRITE(sc, WPI_RESET, 0); /* Wait at most one second for first alive notification. */ if ((error = mtx_sleep(sc, &sc->sc_mtx, PCATCH, "wpiinit", hz)) != 0) { device_printf(sc->sc_dev, "%s: timeout waiting for adapter to initialize, error %d\n", __func__, error); return error; } /* Copy runtime sections into pre-allocated DMA-safe memory. */ memcpy(dma->vaddr, fw->main.data, fw->main.datasz); bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); memcpy(dma->vaddr + WPI_FW_DATA_MAXSZ, fw->main.text, fw->main.textsz); bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); /* Tell adapter where to find runtime sections. */ if ((error = wpi_nic_lock(sc)) != 0) return error; wpi_prph_write(sc, WPI_BSM_DRAM_DATA_ADDR, dma->paddr); wpi_prph_write(sc, WPI_BSM_DRAM_DATA_SIZE, fw->main.datasz); wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_ADDR, dma->paddr + WPI_FW_DATA_MAXSZ); wpi_prph_write(sc, WPI_BSM_DRAM_TEXT_SIZE, WPI_FW_UPDATED | fw->main.textsz); wpi_nic_unlock(sc); return 0; } static int wpi_read_firmware(struct wpi_softc *sc) { const struct firmware *fp; struct wpi_fw_info *fw = &sc->fw; const struct wpi_firmware_hdr *hdr; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); DPRINTF(sc, WPI_DEBUG_FIRMWARE, "Attempting Loading Firmware from %s module\n", WPI_FW_NAME); WPI_UNLOCK(sc); fp = firmware_get(WPI_FW_NAME); WPI_LOCK(sc); if (fp == NULL) { device_printf(sc->sc_dev, "could not load firmware image '%s'\n", WPI_FW_NAME); return EINVAL; } sc->fw_fp = fp; if (fp->datasize < sizeof (struct wpi_firmware_hdr)) { device_printf(sc->sc_dev, "firmware file too short: %zu bytes\n", fp->datasize); error = EINVAL; goto fail; } fw->size = fp->datasize; fw->data = (const uint8_t *)fp->data; /* Extract firmware header information. */ hdr = (const struct wpi_firmware_hdr *)fw->data; /* | RUNTIME FIRMWARE | INIT FIRMWARE | BOOT FW | |HDR|<--TEXT-->|<--DATA-->|<--TEXT-->|<--DATA-->|<--TEXT-->| */ fw->main.textsz = le32toh(hdr->rtextsz); fw->main.datasz = le32toh(hdr->rdatasz); fw->init.textsz = le32toh(hdr->itextsz); fw->init.datasz = le32toh(hdr->idatasz); fw->boot.textsz = le32toh(hdr->btextsz); fw->boot.datasz = 0; /* Sanity-check firmware header. */ if (fw->main.textsz > WPI_FW_TEXT_MAXSZ || fw->main.datasz > WPI_FW_DATA_MAXSZ || fw->init.textsz > WPI_FW_TEXT_MAXSZ || fw->init.datasz > WPI_FW_DATA_MAXSZ || fw->boot.textsz > WPI_FW_BOOT_TEXT_MAXSZ || (fw->boot.textsz & 3) != 0) { device_printf(sc->sc_dev, "invalid firmware header\n"); error = EINVAL; goto fail; } /* Check that all firmware sections fit. */ if (fw->size < sizeof (*hdr) + fw->main.textsz + fw->main.datasz + fw->init.textsz + fw->init.datasz + fw->boot.textsz) { device_printf(sc->sc_dev, "firmware file too short: %zu bytes\n", fw->size); error = EINVAL; goto fail; } /* Get pointers to firmware sections. */ fw->main.text = (const uint8_t *)(hdr + 1); fw->main.data = fw->main.text + fw->main.textsz; fw->init.text = fw->main.data + fw->main.datasz; fw->init.data = fw->init.text + fw->init.textsz; fw->boot.text = fw->init.data + fw->init.datasz; DPRINTF(sc, WPI_DEBUG_FIRMWARE, "Firmware Version: Major %d, Minor %d, Driver %d, \n" "runtime (text: %u, data: %u) init (text: %u, data %u) " "boot (text %u)\n", hdr->major, hdr->minor, le32toh(hdr->driver), fw->main.textsz, fw->main.datasz, fw->init.textsz, fw->init.datasz, fw->boot.textsz); DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->main.text %p\n", fw->main.text); DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->main.data %p\n", fw->main.data); DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->init.text %p\n", fw->init.text); DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->init.data %p\n", fw->init.data); DPRINTF(sc, WPI_DEBUG_FIRMWARE, "fw->boot.text %p\n", fw->boot.text); return 0; fail: wpi_unload_firmware(sc); return error; } /** * Free the referenced firmware image */ static void wpi_unload_firmware(struct wpi_softc *sc) { if (sc->fw_fp != NULL) { firmware_put(sc->fw_fp, FIRMWARE_UNLOAD); sc->fw_fp = NULL; } } static int wpi_clock_wait(struct wpi_softc *sc) { int ntries; /* Set "initialization complete" bit. */ WPI_SETBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_INIT_DONE); /* Wait for clock stabilization. */ for (ntries = 0; ntries < 2500; ntries++) { if (WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_MAC_CLOCK_READY) return 0; DELAY(100); } device_printf(sc->sc_dev, "%s: timeout waiting for clock stabilization\n", __func__); return ETIMEDOUT; } static int wpi_apm_init(struct wpi_softc *sc) { uint32_t reg; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); /* Disable L0s exit timer (NMI bug workaround). */ WPI_SETBITS(sc, WPI_GIO_CHICKEN, WPI_GIO_CHICKEN_DIS_L0S_TIMER); /* Don't wait for ICH L0s (ICH bug workaround). */ WPI_SETBITS(sc, WPI_GIO_CHICKEN, WPI_GIO_CHICKEN_L1A_NO_L0S_RX); /* Set FH wait threshold to max (HW bug under stress workaround). */ WPI_SETBITS(sc, WPI_DBG_HPET_MEM, 0xffff0000); /* Retrieve PCIe Active State Power Management (ASPM). */ - reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + 0x10, 1); + reg = pci_read_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, 1); /* Workaround for HW instability in PCIe L0->L0s->L1 transition. */ - if (reg & 0x02) /* L1 Entry enabled. */ + if (reg & PCIEM_LINK_CTL_ASPMC_L1) /* L1 Entry enabled. */ WPI_SETBITS(sc, WPI_GIO, WPI_GIO_L0S_ENA); else WPI_CLRBITS(sc, WPI_GIO, WPI_GIO_L0S_ENA); WPI_SETBITS(sc, WPI_ANA_PLL, WPI_ANA_PLL_INIT); /* Wait for clock stabilization before accessing prph. */ if ((error = wpi_clock_wait(sc)) != 0) return error; if ((error = wpi_nic_lock(sc)) != 0) return error; /* Cleanup. */ wpi_prph_write(sc, WPI_APMG_CLK_DIS, 0x00000400); wpi_prph_clrbits(sc, WPI_APMG_PS, 0x00000200); /* Enable DMA and BSM (Bootstrap State Machine). */ wpi_prph_write(sc, WPI_APMG_CLK_EN, WPI_APMG_CLK_CTRL_DMA_CLK_RQT | WPI_APMG_CLK_CTRL_BSM_CLK_RQT); DELAY(20); /* Disable L1-Active. */ wpi_prph_setbits(sc, WPI_APMG_PCI_STT, WPI_APMG_PCI_STT_L1A_DIS); wpi_nic_unlock(sc); return 0; } static void wpi_apm_stop_master(struct wpi_softc *sc) { int ntries; /* Stop busmaster DMA activity. */ WPI_SETBITS(sc, WPI_RESET, WPI_RESET_STOP_MASTER); if ((WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_PS_MASK) == WPI_GP_CNTRL_MAC_PS) return; /* Already asleep. */ for (ntries = 0; ntries < 100; ntries++) { if (WPI_READ(sc, WPI_RESET) & WPI_RESET_MASTER_DISABLED) return; DELAY(10); } device_printf(sc->sc_dev, "%s: timeout waiting for master\n", __func__); } static void wpi_apm_stop(struct wpi_softc *sc) { wpi_apm_stop_master(sc); /* Reset the entire device. */ WPI_SETBITS(sc, WPI_RESET, WPI_RESET_SW); DELAY(10); /* Clear "initialization complete" bit. */ WPI_CLRBITS(sc, WPI_GP_CNTRL, WPI_GP_CNTRL_INIT_DONE); } static void wpi_nic_config(struct wpi_softc *sc) { uint32_t rev; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); /* voodoo from the Linux "driver".. */ rev = pci_read_config(sc->sc_dev, PCIR_REVID, 1); if ((rev & 0xc0) == 0x40) WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_ALM_MB); else if (!(rev & 0x80)) WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_ALM_MM); if (sc->cap == 0x80) WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_SKU_MRC); if ((sc->rev & 0xf0) == 0xd0) WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_REV_D); else WPI_CLRBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_REV_D); if (sc->type > 1) WPI_SETBITS(sc, WPI_HW_IF_CONFIG, WPI_HW_IF_CONFIG_TYPE_B); } static int wpi_hw_init(struct wpi_softc *sc) { uint8_t chnl; int ntries, error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); /* Clear pending interrupts. */ WPI_WRITE(sc, WPI_INT, 0xffffffff); if ((error = wpi_apm_init(sc)) != 0) { device_printf(sc->sc_dev, "%s: could not power ON adapter, error %d\n", __func__, error); return error; } /* Select VMAIN power source. */ if ((error = wpi_nic_lock(sc)) != 0) return error; wpi_prph_clrbits(sc, WPI_APMG_PS, WPI_APMG_PS_PWR_SRC_MASK); wpi_nic_unlock(sc); /* Spin until VMAIN gets selected. */ for (ntries = 0; ntries < 5000; ntries++) { if (WPI_READ(sc, WPI_GPIO_IN) & WPI_GPIO_IN_VMAIN) break; DELAY(10); } if (ntries == 5000) { device_printf(sc->sc_dev, "timeout selecting power source\n"); return ETIMEDOUT; } /* Perform adapter initialization. */ wpi_nic_config(sc); /* Initialize RX ring. */ if ((error = wpi_nic_lock(sc)) != 0) return error; /* Set physical address of RX ring. */ WPI_WRITE(sc, WPI_FH_RX_BASE, sc->rxq.desc_dma.paddr); /* Set physical address of RX read pointer. */ WPI_WRITE(sc, WPI_FH_RX_RPTR_ADDR, sc->shared_dma.paddr + offsetof(struct wpi_shared, next)); WPI_WRITE(sc, WPI_FH_RX_WPTR, 0); /* Enable RX. */ WPI_WRITE(sc, WPI_FH_RX_CONFIG, WPI_FH_RX_CONFIG_DMA_ENA | WPI_FH_RX_CONFIG_RDRBD_ENA | WPI_FH_RX_CONFIG_WRSTATUS_ENA | WPI_FH_RX_CONFIG_MAXFRAG | WPI_FH_RX_CONFIG_NRBD(WPI_RX_RING_COUNT_LOG) | WPI_FH_RX_CONFIG_IRQ_DST_HOST | WPI_FH_RX_CONFIG_IRQ_TIMEOUT(1)); (void)WPI_READ(sc, WPI_FH_RSSR_TBL); /* barrier */ wpi_nic_unlock(sc); WPI_WRITE(sc, WPI_FH_RX_WPTR, (WPI_RX_RING_COUNT - 1) & ~7); /* Initialize TX rings. */ if ((error = wpi_nic_lock(sc)) != 0) return error; wpi_prph_write(sc, WPI_ALM_SCHED_MODE, 2); /* bypass mode */ wpi_prph_write(sc, WPI_ALM_SCHED_ARASTAT, 1); /* enable RA0 */ /* Enable all 6 TX rings. */ wpi_prph_write(sc, WPI_ALM_SCHED_TXFACT, 0x3f); wpi_prph_write(sc, WPI_ALM_SCHED_SBYPASS_MODE1, 0x10000); wpi_prph_write(sc, WPI_ALM_SCHED_SBYPASS_MODE2, 0x30002); wpi_prph_write(sc, WPI_ALM_SCHED_TXF4MF, 4); wpi_prph_write(sc, WPI_ALM_SCHED_TXF5MF, 5); /* Set physical address of TX rings. */ WPI_WRITE(sc, WPI_FH_TX_BASE, sc->shared_dma.paddr); WPI_WRITE(sc, WPI_FH_MSG_CONFIG, 0xffff05a5); /* Enable all DMA channels. */ for (chnl = 0; chnl < WPI_NDMACHNLS; chnl++) { WPI_WRITE(sc, WPI_FH_CBBC_CTRL(chnl), 0); WPI_WRITE(sc, WPI_FH_CBBC_BASE(chnl), 0); WPI_WRITE(sc, WPI_FH_TX_CONFIG(chnl), 0x80200008); } wpi_nic_unlock(sc); (void)WPI_READ(sc, WPI_FH_TX_BASE); /* barrier */ /* Clear "radio off" and "commands blocked" bits. */ WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_RFKILL); WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_CMD_BLOCKED); /* Clear pending interrupts. */ WPI_WRITE(sc, WPI_INT, 0xffffffff); /* Enable interrupts. */ WPI_WRITE(sc, WPI_INT_MASK, WPI_INT_MASK_DEF); /* _Really_ make sure "radio off" bit is cleared! */ WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_RFKILL); WPI_WRITE(sc, WPI_UCODE_GP1_CLR, WPI_UCODE_GP1_RFKILL); if ((error = wpi_load_firmware(sc)) != 0) { device_printf(sc->sc_dev, "%s: could not load firmware, error %d\n", __func__, error); return error; } /* Wait at most one second for firmware alive notification. */ if ((error = mtx_sleep(sc, &sc->sc_mtx, PCATCH, "wpiinit", hz)) != 0) { device_printf(sc->sc_dev, "%s: timeout waiting for adapter to initialize, error %d\n", __func__, error); return error; } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); /* Do post-firmware initialization. */ return wpi_post_alive(sc); } static void wpi_hw_stop(struct wpi_softc *sc) { uint8_t chnl, qid; int ntries; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); if (WPI_READ(sc, WPI_UCODE_GP1) & WPI_UCODE_GP1_MAC_SLEEP) wpi_nic_lock(sc); WPI_WRITE(sc, WPI_RESET, WPI_RESET_NEVO); /* Disable interrupts. */ WPI_WRITE(sc, WPI_INT_MASK, 0); WPI_WRITE(sc, WPI_INT, 0xffffffff); WPI_WRITE(sc, WPI_FH_INT, 0xffffffff); /* Make sure we no longer hold the NIC lock. */ wpi_nic_unlock(sc); if (wpi_nic_lock(sc) == 0) { /* Stop TX scheduler. */ wpi_prph_write(sc, WPI_ALM_SCHED_MODE, 0); wpi_prph_write(sc, WPI_ALM_SCHED_TXFACT, 0); /* Stop all DMA channels. */ for (chnl = 0; chnl < WPI_NDMACHNLS; chnl++) { WPI_WRITE(sc, WPI_FH_TX_CONFIG(chnl), 0); for (ntries = 0; ntries < 200; ntries++) { if (WPI_READ(sc, WPI_FH_TX_STATUS) & WPI_FH_TX_STATUS_IDLE(chnl)) break; DELAY(10); } } wpi_nic_unlock(sc); } /* Stop RX ring. */ wpi_reset_rx_ring(sc); /* Reset all TX rings. */ for (qid = 0; qid < WPI_DRV_NTXQUEUES; qid++) wpi_reset_tx_ring(sc, &sc->txq[qid]); if (wpi_nic_lock(sc) == 0) { wpi_prph_write(sc, WPI_APMG_CLK_DIS, WPI_APMG_CLK_CTRL_DMA_CLK_RQT); wpi_nic_unlock(sc); } DELAY(5); /* Power OFF adapter. */ wpi_apm_stop(sc); } static void wpi_radio_on(void *arg0, int pending) { struct wpi_softc *sc = arg0; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); device_printf(sc->sc_dev, "RF switch: radio enabled\n"); WPI_LOCK(sc); callout_stop(&sc->watchdog_rfkill); WPI_UNLOCK(sc); if (vap != NULL) ieee80211_init(vap); } static void wpi_radio_off(void *arg0, int pending) { struct wpi_softc *sc = arg0; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); device_printf(sc->sc_dev, "RF switch: radio disabled\n"); ieee80211_notify_radio(ic, 0); wpi_stop(sc); if (vap != NULL) ieee80211_stop(vap); WPI_LOCK(sc); callout_reset(&sc->watchdog_rfkill, hz, wpi_watchdog_rfkill, sc); WPI_UNLOCK(sc); } static int wpi_init(struct wpi_softc *sc) { int error = 0; WPI_LOCK(sc); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_BEGIN, __func__); if (sc->sc_running != 0) goto end; /* Check that the radio is not disabled by hardware switch. */ if (!(WPI_READ(sc, WPI_GP_CNTRL) & WPI_GP_CNTRL_RFKILL)) { device_printf(sc->sc_dev, "RF switch: radio disabled (%s)\n", __func__); callout_reset(&sc->watchdog_rfkill, hz, wpi_watchdog_rfkill, sc); error = EINPROGRESS; goto end; } /* Read firmware images from the filesystem. */ if ((error = wpi_read_firmware(sc)) != 0) { device_printf(sc->sc_dev, "%s: could not read firmware, error %d\n", __func__, error); goto end; } sc->sc_running = 1; /* Initialize hardware and upload firmware. */ error = wpi_hw_init(sc); wpi_unload_firmware(sc); if (error != 0) { device_printf(sc->sc_dev, "%s: could not initialize hardware, error %d\n", __func__, error); goto fail; } /* Configure adapter now that it is ready. */ if ((error = wpi_config(sc)) != 0) { device_printf(sc->sc_dev, "%s: could not configure device, error %d\n", __func__, error); goto fail; } DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END, __func__); WPI_UNLOCK(sc); return 0; fail: wpi_stop_locked(sc); end: DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_END_ERR, __func__); WPI_UNLOCK(sc); return error; } static void wpi_stop_locked(struct wpi_softc *sc) { WPI_LOCK_ASSERT(sc); if (sc->sc_running == 0) return; WPI_TX_LOCK(sc); WPI_TXQ_LOCK(sc); sc->sc_running = 0; WPI_TXQ_UNLOCK(sc); WPI_TX_UNLOCK(sc); WPI_TXQ_STATE_LOCK(sc); callout_stop(&sc->tx_timeout); WPI_TXQ_STATE_UNLOCK(sc); WPI_RXON_LOCK(sc); callout_stop(&sc->scan_timeout); callout_stop(&sc->calib_to); WPI_RXON_UNLOCK(sc); /* Power OFF hardware. */ wpi_hw_stop(sc); } static void wpi_stop(struct wpi_softc *sc) { WPI_LOCK(sc); wpi_stop_locked(sc); WPI_UNLOCK(sc); } /* * Callback from net80211 to start a scan. */ static void wpi_scan_start(struct ieee80211com *ic) { struct wpi_softc *sc = ic->ic_softc; wpi_set_led(sc, WPI_LED_LINK, 20, 2); } /* * Callback from net80211 to terminate a scan. */ static void wpi_scan_end(struct ieee80211com *ic) { struct wpi_softc *sc = ic->ic_softc; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap->iv_state == IEEE80211_S_RUN) wpi_set_led(sc, WPI_LED_LINK, 0, 1); } /** * Called by the net80211 framework to indicate to the driver * that the channel should be changed */ static void wpi_set_channel(struct ieee80211com *ic) { const struct ieee80211_channel *c = ic->ic_curchan; struct wpi_softc *sc = ic->ic_softc; int error; DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); WPI_LOCK(sc); sc->sc_rxtap.wr_chan_freq = htole16(c->ic_freq); sc->sc_rxtap.wr_chan_flags = htole16(c->ic_flags); WPI_UNLOCK(sc); WPI_TX_LOCK(sc); sc->sc_txtap.wt_chan_freq = htole16(c->ic_freq); sc->sc_txtap.wt_chan_flags = htole16(c->ic_flags); WPI_TX_UNLOCK(sc); /* * Only need to set the channel in Monitor mode. AP scanning and auth * are already taken care of by their respective firmware commands. */ if (ic->ic_opmode == IEEE80211_M_MONITOR) { WPI_RXON_LOCK(sc); sc->rxon.chan = ieee80211_chan2ieee(ic, c); if (IEEE80211_IS_CHAN_2GHZ(c)) { sc->rxon.flags |= htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ); } else { sc->rxon.flags &= ~htole32(WPI_RXON_AUTO | WPI_RXON_24GHZ); } if ((error = wpi_send_rxon(sc, 0, 1)) != 0) device_printf(sc->sc_dev, "%s: error %d setting channel\n", __func__, error); WPI_RXON_UNLOCK(sc); } } /** * Called by net80211 to indicate that we need to scan the current * channel. The channel is previously be set via the wpi_set_channel * callback. */ static void wpi_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { struct ieee80211vap *vap = ss->ss_vap; struct ieee80211com *ic = vap->iv_ic; struct wpi_softc *sc = ic->ic_softc; int error; WPI_RXON_LOCK(sc); error = wpi_scan(sc, ic->ic_curchan); WPI_RXON_UNLOCK(sc); if (error != 0) ieee80211_cancel_scan(vap); } /** * Called by the net80211 framework to indicate * the minimum dwell time has been met, terminate the scan. * We don't actually terminate the scan as the firmware will notify * us when it's finished and we have no way to interrupt it. */ static void wpi_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } static void wpi_hw_reset(void *arg, int pending) { struct wpi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); DPRINTF(sc, WPI_DEBUG_TRACE, TRACE_STR_DOING, __func__); ieee80211_notify_radio(ic, 0); if (vap != NULL && (ic->ic_flags & IEEE80211_F_SCAN)) ieee80211_cancel_scan(vap); wpi_stop(sc); if (vap != NULL) { ieee80211_stop(vap); ieee80211_init(vap); } } Index: projects/powernv/fs/nfsclient/nfs_clvfsops.c =================================================================== --- projects/powernv/fs/nfsclient/nfs_clvfsops.c (revision 290990) +++ projects/powernv/fs/nfsclient/nfs_clvfsops.c (revision 290991) @@ -1,1884 +1,1886 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from nfs_vfsops.c 8.12 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(nfscl, "NFSv4 client"); extern int nfscl_ticks; extern struct timeval nfsboottime; extern struct nfsstats newnfsstats; extern int nfsrv_useacl; extern int nfscl_debuglevel; extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; extern struct mtx ncl_iod_mutex; NFSCLSTATEMUTEX; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct"); SYSCTL_DECL(_vfs_nfs); static int nfs_ip_paranoia = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, ""); static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); /* how long between console messages "nfs server foo not responding" */ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); #ifdef NFS_DEBUG int nfs_debug; SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "Toggle debug flag"); #endif static int nfs_mountroot(struct mount *); static void nfs_sec_name(char *, int *); static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *, struct ucred *, struct thread *); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, struct thread *, int, int, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; static vfs_root_t nfs_root; static vfs_statfs_t nfs_statfs; static vfs_sync_t nfs_sync; static vfs_sysctl_t nfs_sysctl; static vfs_purge_t nfs_purge; /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { .vfs_init = ncl_init, .vfs_mount = nfs_mount, .vfs_cmount = nfs_cmount, .vfs_root = nfs_root, .vfs_statfs = nfs_statfs, .vfs_sync = nfs_sync, .vfs_uninit = ncl_uninit, .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, .vfs_purge = nfs_purge, }; VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); MODULE_DEPEND(nfs, nfscommon, 1, 1, 1); MODULE_DEPEND(nfs, krpc, 1, 1, 1); MODULE_DEPEND(nfs, nfssvc, 1, 1, 1); MODULE_DEPEND(nfs, nfslock, 1, 1, 1); /* * This structure is now defined in sys/nfs/nfs_diskless.c so that it * can be shared by both NFS clients. It is declared here so that it * will be defined for kernels built without NFS_ROOT, although it * isn't used in that case. */ #if !defined(NFS_ROOT) struct nfs_diskless nfs_diskless = { { { 0 } } }; struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; int nfs_diskless_valid = 0; #endif SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, &nfs_diskless_valid, 0, "Has the diskless struct been filled correctly"); SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, nfsv3_diskless.root_hostnam, 0, "Path to nfs root"); SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr), "%Ssockaddr_in", "Diskless root nfs address"); void newnfsargs_ntoh(struct nfs_args *); static int nfs_mountdiskless(char *, struct sockaddr_in *, struct nfs_args *, struct thread *, struct vnode **, struct mount *); static void nfs_convert_diskless(void); static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs); int newnfs_iosize(struct nfsmount *nmp) { int iosize, maxio; /* First, set the upper limit for iosize */ if (nmp->nm_flag & NFSMNT_NFSV4) { maxio = NFS_MAXBSIZE; } else if (nmp->nm_flag & NFSMNT_NFSV3) { if (nmp->nm_sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXBSIZE; } else { maxio = NFS_V2MAXDATA; } if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0) nmp->nm_rsize = maxio; if (nmp->nm_rsize > NFS_MAXBSIZE) nmp->nm_rsize = NFS_MAXBSIZE; if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0) nmp->nm_readdirsize = maxio; if (nmp->nm_readdirsize > nmp->nm_rsize) nmp->nm_readdirsize = nmp->nm_rsize; if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0) nmp->nm_wsize = maxio; if (nmp->nm_wsize > NFS_MAXBSIZE) nmp->nm_wsize = NFS_MAXBSIZE; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer - * space. + * space. It must also be at least NFS_DIRBLKSIZ, since + * that is the buffer size used for directories. */ iosize = imax(nmp->nm_rsize, nmp->nm_wsize); iosize = imax(iosize, PAGE_SIZE); + iosize = imax(iosize, NFS_DIRBLKSIZ); nmp->nm_mountp->mnt_stat.f_iosize = iosize; return (iosize); } static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) { args->version = NFS_ARGSVERSION; args->addr = oargs->addr; args->addrlen = oargs->addrlen; args->sotype = oargs->sotype; args->proto = oargs->proto; args->fh = oargs->fh; args->fhsize = oargs->fhsize; args->flags = oargs->flags; args->wsize = oargs->wsize; args->rsize = oargs->rsize; args->readdirsize = oargs->readdirsize; args->timeo = oargs->timeo; args->retrans = oargs->retrans; args->readahead = oargs->readahead; args->hostname = oargs->hostname; } static void nfs_convert_diskless(void) { bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, sizeof(struct ifaliasreq)); bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, sizeof(struct sockaddr_in)); nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { nfsv3_diskless.root_fhsize = NFSX_MYFH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH); } else { nfsv3_diskless.root_fhsize = NFSX_V2FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); } bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, sizeof(struct sockaddr_in)); bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); nfsv3_diskless.root_time = nfs_diskless.root_time; bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, MAXHOSTNAMELEN); nfs_diskless_valid = 3; } /* * nfs statfs call */ static int nfs_statfs(struct mount *mp, struct statfs *sbp) { struct vnode *vp; struct thread *td; struct nfsmount *nmp = VFSTONFS(mp); struct nfsvattr nfsva; struct nfsfsinfo fs; struct nfsstatfs sb; int error = 0, attrflag, gotfsinfo = 0, ret; struct nfsnode *np; td = curthread; error = vfs_busy(mp, MBF_NOWAIT); if (error) return (error); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) { vfs_unbusy(mp); return (error); } vp = NFSTOV(np); mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (!error) gotfsinfo = 1; } else mtx_unlock(&nmp->nm_mtx); if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); if (error != 0) NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; } } (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (!error) { mtx_lock(&nmp->nm_mtx); if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4)) nfscl_loadfsinfo(nmp, &fs); nfscl_loadsbinfo(nmp, &sb, sbp); sbp->f_iosize = newnfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); } else if (NFS_ISV4(vp)) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); } vput(vp); vfs_unbusy(mp); return (error); } /* * nfs version 3 fsinfo rpc call */ int ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, struct thread *td) { struct nfsfsinfo fs; struct nfsvattr nfsva; int error, attrflag; error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL); if (!error) { if (attrflag) (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); mtx_lock(&nmp->nm_mtx); nfscl_loadfsinfo(nmp, &fs); mtx_unlock(&nmp->nm_mtx); } return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - build the rootfs mount point and call mountnfs() to do the rest. * * It is assumed to be safe to read, modify, and write the nfsv3_diskless * structure, as well as other global NFS client variables here, as * nfs_mountroot() will be called once in the boot before any other NFS * client activity occurs. */ static int nfs_mountroot(struct mount *mp) { struct thread *td = curthread; struct nfsv3_diskless *nd = &nfsv3_diskless; struct socket *so; struct vnode *vp; struct ifreq ir; int error; u_long l; char buf[128]; char *cp; #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ #elif defined(NFS_ROOT) nfs_setup_diskless(); #endif if (nfs_diskless_valid == 0) return (-1); if (nfs_diskless_valid == 1) nfs_convert_diskless(); /* * XXX splnet, so networks will receive... */ splnet(); /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, td->td_ucred, td); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); #if 0 /* XXX Bad idea */ /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if(!error) break; } #endif error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); if ((cp = kern_getenv("boot.netif.mtu")) != NULL) { ir.ifr_mtu = strtol(cp, NULL, 10); bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); freeenv(cp); error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); if (error) printf("nfs_mountroot: SIOCSIFMTU: %d", error); } soclose(so); /* * If the gateway field is filled in, set it as the default route. * Note that pxeboot will set a default route of 0 if the route * is not set by the DHCP server. Check also for a value of 0 * to avoid panicking inappropriately in that situation. */ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; nd->root_args.fhsize = nd->root_fhsize; l = ntohl(nd->root_saddr.sin_addr.s_addr); snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); printf("NFS ROOT: %s\n", buf); nd->root_args.hostname = buf; if ((error = nfs_mountdiskless(buf, &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { return (error); } /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ mtx_lock(&prison0.pr_mtx); strlcpy(prison0.pr_hostname, nd->my_hostnam, sizeof(prison0.pr_hostname)); mtx_unlock(&prison0.pr_mtx); inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static int nfs_mountdiskless(char *path, struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, struct vnode **vpp, struct mount *mp) { struct sockaddr *nam; int dirlen, error; char *dirpath; /* * Find the directory path in "path", which also has the server's * name/ip address in it. */ dirpath = strchr(path, ':'); if (dirpath != NULL) dirlen = strlen(++dirpath); else dirlen = 0; nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } return (0); } static void nfs_sec_name(char *sec, int *flagsp) { if (!strcmp(sec, "krb5")) *flagsp |= NFSMNT_KERB; else if (!strcmp(sec, "krb5i")) *flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY); else if (!strcmp(sec, "krb5p")) *flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY); } static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, const char *hostname, struct ucred *cred, struct thread *td) { int s; int adjsock; char *p; s = splnet(); /* * Set read-only flag if requested; otherwise, clear it if this is * an update. If this is not an update, then either the read-only * flag is already clear, or this is a root mount and it was set * intentionally at some previous point. */ if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if (mp->mnt_flag & MNT_UPDATE) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes * no sense in that context. Also, set up appropriate retransmit * and soft timeout behavior. */ if (argp->sotype == SOCK_STREAM) { nmp->nm_flag &= ~NFSMNT_NOCONN; nmp->nm_timeo = NFS_MAXTIMEO; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_retry = INT_MAX; else nmp->nm_retry = NFS_RETRANS_TCP; } /* Also clear RDIRPLUS if NFSv2, it crashes some servers */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { argp->flags &= ~NFSMNT_RDIRPLUS; nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } /* Re-bind if rsrvd port requested and wasn't on one */ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) && (argp->flags & NFSMNT_RESVPORT); /* Also re-bind if we're switching to/from a connected UDP socket */ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != (argp->flags & NFSMNT_NOCONN)); /* Update flags atomically. Don't change the lock bits. */ nmp->nm_flag = argp->flags | nmp->nm_flag; splx(s); if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_wsize > NFS_FABLKSIZE) nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1); else nmp->nm_wsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* * Clip at the power of 2 below the size. There is an * issue (not isolated) that causes intermittent page * faults if this is not done. */ if (nmp->nm_rsize > NFS_FABLKSIZE) nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1); else nmp->nm_rsize = NFS_FABLKSIZE; } if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; } if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) nmp->nm_acregmin = argp->acregmin; else nmp->nm_acregmin = NFS_MINATTRTIMO; if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) nmp->nm_acregmax = argp->acregmax; else nmp->nm_acregmax = NFS_MAXATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) nmp->nm_acdirmin = argp->acdirmin; else nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) nmp->nm_acdirmax = argp->acdirmax; else nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; if (nmp->nm_acdirmin > nmp->nm_acdirmax) nmp->nm_acdirmin = nmp->nm_acdirmax; if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { if (argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; else nmp->nm_readahead = NFS_MAXRAHEAD; } if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { if (argp->wcommitsize < nmp->nm_wsize) nmp->nm_wcommitsize = nmp->nm_wsize; else nmp->nm_wcommitsize = argp->wcommitsize; } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); if (nmp->nm_client != NULL && adjsock) { int haslock = 0, error = 0; if (nmp->nm_sotype == SOCK_STREAM) { error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock); if (!error) haslock = 1; } if (!error) { newnfs_disconnect(&nmp->nm_sockreq); if (haslock) newnfs_sndunlock(&nmp->nm_sockreq.nr_lock); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_sotype == SOCK_DGRAM) while (newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)) { printf("newnfs_args: retrying connect\n"); (void) nfs_catnap(PSOCK, 0, "nfscon"); } } } else { nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; } if (hostname != NULL) { strlcpy(nmp->nm_hostname, hostname, sizeof(nmp->nm_hostname)); p = strchr(nmp->nm_hostname, ':'); if (p != NULL) *p = '\0'; } } static const char *nfs_opts[] = { "from", "nfs_args", "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh", "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs", "wcommitsize", NULL }; /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(struct mount *mp) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .hostname = NULL, .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error = 0, ret, len; struct sockaddr *nam = NULL; struct vnode *vp; struct thread *td; char hst[MNAMELEN]; u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; char *cp, *opt, *name, *secname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; int minvers = 0; int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen; size_t hstlen; has_nfs_args_opt = 0; if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } td = curthread; if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { error = nfs_mountroot(mp); goto out; } nfscl_init(); /* * The old mount_nfs program passed the struct nfs_args * from userspace to kernel. The new mount_nfs program * passes string options via nmount() from userspace to kernel * and we populate the struct nfs_args in the kernel. */ if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) { error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof(args)); if (error != 0) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } has_nfs_args_opt = 1; } /* Handle the new style options. */ if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) { args.acdirmin = args.acdirmax = args.acregmin = args.acregmax = 0; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0) args.flags |= NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOCONN; if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0) args.flags |= NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0) args.flags &= ~NFSMNT_NOLOCKD; if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0) args.flags |= NFSMNT_INT; if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0) args.flags |= NFSMNT_RDIRPLUS; if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0) args.flags |= NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0) args.flags &= ~NFSMNT_RESVPORT; if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0) args.flags |= NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0) args.flags &= ~NFSMNT_SOFT; if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0) args.sotype = SOCK_DGRAM; if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0) args.sotype = SOCK_STREAM; if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0) args.flags |= NFSMNT_NFSV3; if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) { args.flags |= NFSMNT_NFSV4; args.sotype = SOCK_STREAM; } if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0) args.flags |= NFSMNT_NONCONTIGWR; if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readdirsize); if (ret != 1 || args.readdirsize <= 0) { vfs_mount_error(mp, "illegal readdirsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READDIRSIZE; } if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readahead"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.readahead); if (ret != 1 || args.readahead <= 0) { vfs_mount_error(mp, "illegal readahead: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_READAHEAD; } if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal wsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.wsize); if (ret != 1 || args.wsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WSIZE; } if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal rsize"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.rsize); if (ret != 1 || args.rsize <= 0) { vfs_mount_error(mp, "illegal wsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RSIZE; } if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal retrans"); error = EINVAL; goto out; } ret = sscanf(opt, "%d", &args.retrans); if (ret != 1 || args.retrans <= 0) { vfs_mount_error(mp, "illegal retrans: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_RETRANS; } if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal actimeo: %s", opt); error = EINVAL; goto out; } args.acdirmin = args.acdirmax = args.acregmax = args.acregmin; args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX | NFSMNT_ACREGMIN | NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmin); if (ret != 1 || args.acregmin < 0) { vfs_mount_error(mp, "illegal acregmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMIN; } if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acregmax); if (ret != 1 || args.acregmax < 0) { vfs_mount_error(mp, "illegal acregmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACREGMAX; } if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmin); if (ret != 1 || args.acdirmin < 0) { vfs_mount_error(mp, "illegal acdirmin: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMIN; } if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.acdirmax); if (ret != 1 || args.acdirmax < 0) { vfs_mount_error(mp, "illegal acdirmax: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_ACDIRMAX; } if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.wcommitsize); if (ret != 1 || args.wcommitsize < 0) { vfs_mount_error(mp, "illegal wcommitsize: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_WCOMMITSIZE; } if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeo: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &args.timeo); if (ret != 1 || args.timeo <= 0) { vfs_mount_error(mp, "illegal timeout: %s", opt); error = EINVAL; goto out; } args.flags |= NFSMNT_TIMEO; } if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &nametimeo); if (ret != 1 || nametimeo < 0) { vfs_mount_error(mp, "illegal nametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &negnametimeo); if (ret != 1 || negnametimeo < 0) { vfs_mount_error(mp, "illegal negnametimeo: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == 0) { ret = sscanf(opt, "%d", &minvers); if (ret != 1 || minvers < 0 || minvers > 1 || (args.flags & NFSMNT_NFSV4) == 0) { vfs_mount_error(mp, "illegal minorversion: %s", opt); error = EINVAL; goto out; } } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * If a change from TCP->UDP is done and there are thread(s) * that have I/O RPC(s) in progress with a tranfer size * greater than NFS_MAXDGRAMDATA, those thread(s) will be * hung, retrying the RPC(s) forever. Usually these threads * will be seen doing an uninterruptible sleep on wait channel * "nfsreq". */ if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM) tprintf(td->td_proc, LOG_WARNING, "Warning: mount -u that changes TCP->UDP can result in hung threads\n"); /* * When doing an update, we can't change version, * security, switch lockd strategies or change cookie * translation */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4 | NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (has_nfs_args_opt != 0) { /* * In the 'nfs_args' case, the pointers in the args * structure are in userland - we copy them in here. */ if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error != 0) goto out; error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen); if (error != 0) goto out; bzero(&hst[hstlen], MNAMELEN - hstlen); args.hostname = hst; /* sockargs() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error != 0) goto out; } else { if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh, &args.fhsize) == 0) { if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) { vfs_mount_error(mp, "Bad file handle"); error = EINVAL; goto out; } bcopy(args.fh, nfh, args.fhsize); } else { args.fhsize = 0; } (void) vfs_getopt(mp->mnt_optnew, "hostname", (void **)&args.hostname, &len); if (args.hostname == NULL) { vfs_mount_error(mp, "Invalid hostname"); error = EINVAL; goto out; } bcopy(args.hostname, hst, MNAMELEN); hst[MNAMELEN - 1] = '\0'; } if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0) strlcpy(srvkrbname, name, sizeof (srvkrbname)); else { snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst); cp = strchr(srvkrbname, ':'); if (cp != NULL) *cp = '\0'; } srvkrbnamelen = strlen(srvkrbname); if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0) strlcpy(krbname, name, sizeof (krbname)); else krbname[0] = '\0'; krbnamelen = strlen(krbname); if (vfs_getopt(mp->mnt_optnew, "dirpath", (void **)&name, NULL) == 0) strlcpy(dirpath, name, sizeof (dirpath)); else dirpath[0] = '\0'; dirlen = strlen(dirpath); if (has_nfs_args_opt == 0) { if (vfs_getopt(mp->mnt_optnew, "addr", (void **)&args.addr, &args.addrlen) == 0) { if (args.addrlen > SOCK_MAXADDRLEN) { error = ENAMETOOLONG; goto out; } nam = malloc(args.addrlen, M_SONAME, M_WAITOK); bcopy(args.addr, nam, args.addrlen); nam->sa_len = args.addrlen; } else { vfs_mount_error(mp, "No server address"); error = EINVAL; goto out; } } args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, nametimeo, negnametimeo, minvers); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF | MNTK_USES_BCACHE; MNT_IUNLOCK(mp); } return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, int minvers) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; struct nfsclclient *clp; struct nfsclds *dsp, *tdsp; uint32_t lease; static u_int64_t clval = 0; NFSCL_DEBUG(3, "in mnt\n"); clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); FREE(nam, M_SONAME); return (0); } else { MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) + krbnamelen + dirlen + srvkrbnamelen + 2, M_NEWNFSMNT, M_WAITOK | M_ZERO); TAILQ_INIT(&nmp->nm_bufq); if (clval == 0) clval = (u_int64_t)nfsboottime.tv_sec; nmp->nm_clval = clval++; nmp->nm_krbnamelen = krbnamelen; nmp->nm_dirpathlen = dirlen; nmp->nm_srvkrbnamelen = srvkrbnamelen; if (td->td_ucred->cr_uid != (uid_t)0) { /* * nm_uid is used to get KerberosV credentials for * the nfsv4 state handling operations if there is * no host based principal set. Use the uid of * this user if not root, since they are doing the * mount. I don't think setting this for root will * work, since root normally does not have user * credentials in a credentials cache. */ nmp->nm_uid = td->td_ucred->cr_uid; } else { /* * Just set to -1, so it won't be used. */ nmp->nm_uid = (uid_t)-1; } /* Copy and null terminate all the names */ if (nmp->nm_krbnamelen > 0) { bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen); nmp->nm_name[nmp->nm_krbnamelen] = '\0'; } if (nmp->nm_dirpathlen > 0) { bcopy(dirpath, NFSMNT_DIRPATH(nmp), nmp->nm_dirpathlen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + 1] = '\0'; } if (nmp->nm_srvkrbnamelen > 0) { bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp), nmp->nm_srvkrbnamelen); nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen + nmp->nm_srvkrbnamelen + 2] = '\0'; } nmp->nm_sockreq.nr_cred = crhold(cred); mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF); mp->mnt_data = nmp; nmp->nm_getinfo = nfs_getnlminfo; nmp->nm_vinvalbuf = ncl_vinvalbuf; } vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); /* * Since nfs_decode_args() might optionally set them, these * need to be set to defaults before the call, so that the * optional settings aren't overwritten. */ nmp->nm_nametimeo = nametimeo; nmp->nm_negnametimeo = negnametimeo; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_readahead = NFS_DEFRAHEAD; /* This is empirical approximation of sqrt(hibufspace) * 256. */ nmp->nm_wcommitsize = NFS_MAXBSIZE / 256; while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace) nmp->nm_wcommitsize *= 2; nmp->nm_wcommitsize *= 256; if ((argp->flags & NFSMNT_NFSV4) != 0) nmp->nm_minorvers = minvers; else nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too * high, depending on whether we end up with negative offsets in * the client or server somewhere. 2GB-1 may be safer. * * For V3, ncl_fsinfo will adjust this as necessary. Assume maximum * that we can handle until we find out otherwise. */ if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) nmp->nm_maxfilesize = 0xffffffffLL; else nmp->nm_maxfilesize = OFF_MAX; if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) { nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; } nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; nmp->nm_fhsize = argp->fhsize; if (nmp->nm_fhsize > 0) bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); nmp->nm_nam = nam; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; nmp->nm_sockreq.nr_prog = NFS_PROG; if ((argp->flags & NFSMNT_NFSV4)) nmp->nm_sockreq.nr_vers = NFS_VER4; else if ((argp->flags & NFSMNT_NFSV3)) nmp->nm_sockreq.nr_vers = NFS_VER3; else nmp->nm_sockreq.nr_vers = NFS_VER2; if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) goto bad; /* For NFSv4.1, get the clientid now. */ if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "at getcl\n"); error = nfscl_getcl(mp, cred, td, 0, &clp); NFSCL_DEBUG(3, "aft getcl=%d\n", error); if (error != 0) goto bad; } if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && nmp->nm_dirpathlen > 0) { NFSCL_DEBUG(3, "in dirp\n"); /* * If the fhsize on the mount point == 0 for V4, the mount * path needs to be looked up. */ trycnt = 3; do { error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); NFSCL_DEBUG(3, "aft dirp=%d\n", error); if (error) (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); if (error) { error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); goto bad; } } /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == ROOTINO (2). */ if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set * non-zero for the root vnode. f_iosize will be set correctly * by nfs_statfs() before any I/O occurs. */ mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ; error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes and transfer parameters for the * mountpoint. This has the side effect of filling in * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. */ NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr)); nfsva.na_vattr.va_type = VDIR; nfsva.na_vattr.va_mode = 0777; nfsva.na_vattr.va_nlink = 100; nfsva.na_vattr.va_uid = (uid_t)0; nfsva.na_vattr.va_gid = (gid_t)0; nfsva.na_vattr.va_fileid = 2; nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); if (nmp->nm_minorvers > 0) { NFSCL_DEBUG(3, "lease=%d\n", (int)lease); NFSLOCKCLSTATE(); clp->nfsc_renew = NFSCL_RENEW(lease); clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clp->nfsc_clientidrev++; if (clp->nfsc_clientidrev == 0) clp->nfsc_clientidrev++; NFSUNLOCKCLSTATE(); /* * Mount will succeed, so the renew thread can be * started now. */ nfscl_start_renewthread(clp); nfscl_clientrelease(clp); } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); /* Mark if the mount point supports NFSv4 ACLs. */ if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 && ret == 0 && NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * Lose the lock but keep the ref. */ NFSVOPUNLOCK(*vpp, 0); return (0); } error = EIO; bad: if (clp != NULL) nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); if (nmp->nm_clp != NULL) { NFSLOCKCLSTATE(); LIST_REMOVE(nmp->nm_clp, nfsc_list); NFSUNLOCKCLSTATE(); free(nmp->nm_clp, M_NFSCLCLIENT); } TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); FREE(nam, M_SONAME); return (error); } /* * unmount system call */ static int nfs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct nfsmount *nmp; int error, flags = 0, i, trycnt = 0; struct nfsclds *dsp, *tdsp; td = curthread; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = VFSTONFS(mp); /* * Goes something like this.. * - Call vflush() to clear out vnodes for this filesystem * - Close the socket * - Free up the data structures */ /* In the forced case, cancel any outstanding requests. */ if (mntflags & MNT_FORCE) { error = newnfs_nmcancelreqs(nmp); if (error) goto out; /* For a forced close, get rid of the renew thread now */ nfscl_umount(nmp, td); } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ do { error = vflush(mp, 1, flags, td); if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30) (void) nfs_catnap(PSOCK, error, "newndm"); } while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30); if (error) goto out; /* * We are now committed to the unmount. */ if ((mntflags & MNT_FORCE) == 0) nfscl_umount(nmp, td); /* Make sure no nfsiods are assigned to this mount. */ mtx_lock(&ncl_iod_mutex); for (i = 0; i < NFS_MAXASYNCDAEMON; i++) if (ncl_iodmount[i] == nmp) { ncl_iodwant[i] = NFSIOD_AVAILABLE; ncl_iodmount[i] = NULL; } mtx_unlock(&ncl_iod_mutex); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); FREE(nmp->nm_nam, M_SONAME); if (nmp->nm_sockreq.nr_auth != NULL) AUTH_DESTROY(nmp->nm_sockreq.nr_auth); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); out: return (error); } /* * Return root of a filesystem */ static int nfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags); if (error) return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ mtx_lock(&nmp->nm_mtx); if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) { mtx_unlock(&nmp->nm_mtx); ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread); } else mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *mvp; struct thread *td; int error, allerror = 0; td = curthread; MNT_ILOCK(mp); /* * If a forced dismount is in progress, return from here so that * the umount(2) syscall doesn't get stuck in VFS_SYNC() before * calling VFS_UNMOUNT(). */ if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { MNT_IUNLOCK(mp); return (EBADF); } MNT_IUNLOCK(mp); /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* XXX Racy bv_cnt check. */ if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; NFSVOPUNLOCK(vp, 0); vrele(vp); } return (allerror); } static int nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) { struct nfsmount *nmp = VFSTONFS(mp); struct vfsquery vq; int error; bzero(&vq, sizeof(vq)); switch (op) { #if 0 case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } if (req->newptr != NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); if (val) nmp->nm_flag |= NFSMNT_NOLOCKS; else nmp->nm_flag &= ~NFSMNT_NOLOCKS; } break; #endif case VFS_CTL_QUERY: mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) vq.vq_flags |= VQ_NOTRESPLOCK; #endif error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } if (req->newptr != NULL) { error = vfs_suser(mp, req->td); if (error) return (error); error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; } break; default: return (ENOTSUP); } return (0); } /* * Purge any RPCs in progress, so that they will all return errors. * This allows dounmount() to continue as far as VFS_UNMOUNT() for a * forced dismount. */ static void nfs_purge(struct mount *mp) { struct nfsmount *nmp = VFSTONFS(mp); newnfs_nmcancelreqs(nmp); } /* * Extract the information needed by the nlm from the nfs vnode. */ static void nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp, struct sockaddr_storage *sp, int *is_v3p, off_t *sizep, struct timeval *timeop) { struct nfsmount *nmp; struct nfsnode *np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); if (fhlenp != NULL) *fhlenp = (size_t)np->n_fhp->nfh_len; if (fhp != NULL) bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len); if (sp != NULL) bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp))); if (is_v3p != NULL) *is_v3p = NFS_ISV3(vp); if (sizep != NULL) *sizep = np->n_size; if (timeop != NULL) { timeop->tv_sec = nmp->nm_timeo / NFS_HZ; timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); } } /* * This function prints out an option name, based on the conditional * argument. */ static __inline void nfscl_printopt(struct nfsmount *nmp, int testval, char *opt, char **buf, size_t *blen) { int len; if (testval != 0 && *blen > strlen(opt)) { len = snprintf(*buf, *blen, "%s", opt); if (len != strlen(opt)) printf("EEK!!\n"); *buf += len; *blen -= len; } } /* * This function printf out an options integer value. */ static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval, char *opt, char **buf, size_t *blen) { int len; if (*blen > strlen(opt) + 1) { /* Could result in truncated output string. */ len = snprintf(*buf, *blen, "%s=%d", opt, optval); if (len < *blen) { *buf += len; *blen -= len; } } } /* * Load the option flags and values into the buffer. */ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen) { char *buf; size_t blen; buf = buffer; blen = buflen; nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf, &blen); if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) { nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs", &buf, &blen); } nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0, "nfsv2", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen); nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0, ",noncontigwr", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == 0, ",lockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) == NFSMNT_NOLOCKD, ",nolockd", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i", &buf, &blen); nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY | NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen); nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen); } Index: projects/powernv/kern/kern_dump.c =================================================================== --- projects/powernv/kern/kern_dump.c (revision 290990) +++ projects/powernv/kern/kern_dump.c (revision 290991) @@ -1,393 +1,399 @@ /*- * Copyright (c) 2002 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_watchdog.h" #include #include #include #include #include #include #include #ifdef SW_WATCHDOG #include #endif #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This * is to protect us from metadata and to protect metadata from us. */ #define SIZEOF_METADATA (64*1024) #define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) #define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) off_t dumplo; /* Handle buffered writes. */ static char buffer[DEV_BSIZE]; static size_t fragsz; struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]; +#if !defined(__powerpc__) && !defined(__sparc__) void dumpsys_gen_pa_init(void) { -#if !defined(__sparc__) && !defined(__powerpc__) int n, idx; bzero(dump_map, sizeof(dump_map)); for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) { idx = n * 2; if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) break; dump_map[n].pa_start = dump_avail[idx]; dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx]; } -#endif } +#endif struct dump_pa * dumpsys_gen_pa_next(struct dump_pa *mdp) { if (mdp == NULL) return (&dump_map[0]); mdp++; if (mdp->pa_size == 0) mdp = NULL; return (mdp); } void dumpsys_gen_wbinv_all(void) { + } void dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused, void *va __unused) { + } +#if !defined(__sparc__) int dumpsys_gen_write_aux_headers(struct dumperinfo *di) { return (0); } +#endif int dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz) { size_t len; int error; while (sz) { len = DEV_BSIZE - fragsz; if (len > sz) len = sz; bcopy(ptr, buffer + fragsz, len); fragsz += len; ptr += len; sz -= len; if (fragsz == DEV_BSIZE) { error = dump_write(di, buffer, 0, dumplo, DEV_BSIZE); if (error) return (error); dumplo += DEV_BSIZE; fragsz = 0; } } return (0); } int dumpsys_buf_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); error = dump_write(di, buffer, 0, dumplo, DEV_BSIZE); dumplo += DEV_BSIZE; fragsz = 0; return (error); } CTASSERT(PAGE_SHIFT < 20); #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT)) int dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg) { struct dumperinfo *di = (struct dumperinfo*)arg; vm_paddr_t pa; void *va; uint64_t pgs; size_t counter, sz, chunk; int c, error; u_int maxdumppgs; error = 0; /* catch case in which chunk size is 0 */ counter = 0; /* Update twiddle every 16MB */ va = 0; pgs = mdp->pa_size / PAGE_SIZE; pa = mdp->pa_start; maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS); if (maxdumppgs == 0) /* seatbelt */ maxdumppgs = 1; printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs), (uintmax_t)pgs); dumpsys_wbinv_all(); while (pgs) { chunk = pgs; if (chunk > maxdumppgs) chunk = maxdumppgs; sz = chunk << PAGE_SHIFT; counter += sz; if (counter >> 24) { printf(" %ju", (uintmax_t)PG2MB(pgs)); counter &= (1 << 24) - 1; } dumpsys_map_chunk(pa, chunk, &va); #ifdef SW_WATCHDOG wdog_kern_pat(WD_LASTVAL); #endif error = dump_write(di, va, 0, dumplo, sz); dumpsys_unmap_chunk(pa, chunk, va); if (error) break; dumplo += sz; pgs -= chunk; pa += sz; /* Check for user abort. */ c = cncheckc(); if (c == 0x03) return (ECANCELED); if (c != -1) printf(" (CTRL-C to abort) "); } printf(" ... %s\n", (error) ? "fail" : "ok"); return (error); } int dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg) { struct dump_pa *mdp; int error, seqnr; seqnr = 0; mdp = dumpsys_pa_next(NULL); while (mdp != NULL) { error = (*cb)(mdp, seqnr++, arg); if (error) return (-error); mdp = dumpsys_pa_next(mdp); } return (seqnr); } +#if !defined(__sparc__) static off_t fileofs; static int cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg) { struct dumperinfo *di = (struct dumperinfo*)arg; Elf_Phdr phdr; uint64_t size; int error; size = mdp->pa_size; bzero(&phdr, sizeof(phdr)); phdr.p_type = PT_LOAD; phdr.p_flags = PF_R; /* XXX */ phdr.p_offset = fileofs; #ifdef __powerpc__ phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L); phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start); #else phdr.p_vaddr = mdp->pa_start; phdr.p_paddr = mdp->pa_start; #endif phdr.p_filesz = size; phdr.p_memsz = size; phdr.p_align = PAGE_SIZE; error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr)); fileofs += phdr.p_filesz; return (error); } static int cb_size(struct dump_pa *mdp, int seqnr, void *arg) { uint64_t *sz; sz = (uint64_t *)arg; *sz += (uint64_t)mdp->pa_size; return (0); } int dumpsys_generic(struct dumperinfo *di) { static struct kerneldumpheader kdh; Elf_Ehdr ehdr; uint64_t dumpsize; off_t hdrgap; size_t hdrsz; int error; #ifndef __powerpc__ if (do_minidump) return (minidumpsys(di)); #endif bzero(&ehdr, sizeof(ehdr)); ehdr.e_ident[EI_MAG0] = ELFMAG0; ehdr.e_ident[EI_MAG1] = ELFMAG1; ehdr.e_ident[EI_MAG2] = ELFMAG2; ehdr.e_ident[EI_MAG3] = ELFMAG3; ehdr.e_ident[EI_CLASS] = ELF_CLASS; #if BYTE_ORDER == LITTLE_ENDIAN ehdr.e_ident[EI_DATA] = ELFDATA2LSB; #else ehdr.e_ident[EI_DATA] = ELFDATA2MSB; #endif ehdr.e_ident[EI_VERSION] = EV_CURRENT; ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ ehdr.e_type = ET_CORE; ehdr.e_machine = EM_VALUE; ehdr.e_phoff = sizeof(ehdr); ehdr.e_flags = 0; ehdr.e_ehsize = sizeof(ehdr); ehdr.e_phentsize = sizeof(Elf_Phdr); ehdr.e_shentsize = sizeof(Elf_Shdr); dumpsys_pa_init(); /* Calculate dump size. */ dumpsize = 0L; ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) + DUMPSYS_NUM_AUX_HDRS; hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; fileofs = MD_ALIGN(hdrsz); dumpsize += fileofs; hdrgap = fileofs - DEV_ALIGN(hdrsz); /* Determine dump offset on device. */ if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { error = ENOSPC; goto fail; } dumplo = di->mediaoffset + di->mediasize - dumpsize; dumplo -= sizeof(kdh) * 2; mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, dumpsize, di->blocksize); printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); /* Dump leader */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Dump ELF header */ error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr)); if (error) goto fail; /* Dump program headers */ error = dumpsys_foreach_chunk(cb_dumphdr, di); if (error < 0) goto fail; error = dumpsys_write_aux_headers(di); if (error < 0) goto fail; dumpsys_buf_flush(di); /* * All headers are written using blocked I/O, so we know the * current offset is (still) block aligned. Skip the alignement * in the file to have the segment contents aligned at page * boundary. We cannot use MD_ALIGN on dumplo, because we don't * care and may very well be unaligned within the dump device. */ dumplo += hdrgap; /* Dump memory chunks (updates dumplo) */ error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di); if (error < 0) goto fail; /* Dump trailer */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; /* Signal completion, signoff and exit stage left. */ dump_write(di, NULL, 0, 0, 0); printf("\nDump complete\n"); return (0); fail: if (error < 0) error = -error; if (error == ECANCELED) printf("\nDump aborted\n"); else if (error == ENOSPC) printf("\nDump failed. Partition too small.\n"); else printf("\n** DUMP FAILED (ERROR %d) **\n", error); return (error); } +#endif Index: projects/powernv/kern/kern_jail.c =================================================================== --- projects/powernv/kern/kern_jail.c (revision 290990) +++ projects/powernv/kern/kern_jail.c (revision 290991) @@ -1,4769 +1,4774 @@ /*- * Copyright (c) 1999 Poul-Henning Kamp. * Copyright (c) 2008 Bjoern A. Zeeb. * Copyright (c) 2009 James Gritton. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* DDB */ #include #define DEFAULT_HOSTUUID "00000000-0000-0000-0000-000000000000" MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures"); /* Keep struct prison prison0 and some code in kern_jail_set() readable. */ #ifdef INET #ifdef INET6 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL #else #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL #endif #else /* !INET */ #ifdef INET6 #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL #else #define _PR_IP_SADDRSEL 0 #endif #endif /* prison0 describes what is "real" about the system. */ struct prison prison0 = { .pr_id = 0, .pr_name = "0", .pr_ref = 1, .pr_uref = 1, .pr_path = "/", .pr_securelevel = -1, .pr_devfs_rsnum = 0, .pr_childmax = JAIL_MAX, .pr_hostuuid = DEFAULT_HOSTUUID, .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), #ifdef VIMAGE .pr_flags = PR_HOST|PR_VNET|_PR_IP_SADDRSEL, #else .pr_flags = PR_HOST|_PR_IP_SADDRSEL, #endif .pr_allow = PR_ALLOW_ALL, }; MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); /* allprison, allprison_racct and lastprid are protected by allprison_lock. */ struct sx allprison_lock; SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); LIST_HEAD(, prison_racct) allprison_racct; int lastprid = 0; static int do_jail_attach(struct thread *td, struct prison *pr); static void prison_complete(void *context, int pending); static void prison_deref(struct prison *pr, int flags); static char *prison_path(struct prison *pr1, struct prison *pr2); static void prison_remove_one(struct prison *pr); #ifdef RACCT static void prison_racct_attach(struct prison *pr); static void prison_racct_modify(struct prison *pr); static void prison_racct_detach(struct prison *pr); #endif #ifdef INET static int _prison_check_ip4(const struct prison *, const struct in_addr *); static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); #endif #ifdef INET6 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); #endif /* Flags for prison_deref */ #define PD_DEREF 0x01 #define PD_DEUREF 0x02 #define PD_LOCKED 0x04 #define PD_LIST_SLOCKED 0x08 #define PD_LIST_XLOCKED 0x10 /* * Parameter names corresponding to PR_* flag values. Size values are for kvm * as we cannot figure out the size of a sparse array, or an array without a * terminating entry. */ static char *pr_flag_names[] = { [0] = "persist", #ifdef INET [7] = "ip4.saddrsel", #endif #ifdef INET6 [8] = "ip6.saddrsel", #endif }; const size_t pr_flag_names_size = sizeof(pr_flag_names); static char *pr_flag_nonames[] = { [0] = "nopersist", #ifdef INET [7] = "ip4.nosaddrsel", #endif #ifdef INET6 [8] = "ip6.nosaddrsel", #endif }; const size_t pr_flag_nonames_size = sizeof(pr_flag_nonames); struct jailsys_flags { const char *name; unsigned disable; unsigned new; } pr_flag_jailsys[] = { { "host", 0, PR_HOST }, #ifdef VIMAGE { "vnet", 0, PR_VNET }, #endif #ifdef INET { "ip4", PR_IP4_USER, PR_IP4_USER }, #endif #ifdef INET6 { "ip6", PR_IP6_USER, PR_IP6_USER }, #endif }; const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys); static char *pr_allow_names[] = { "allow.set_hostname", "allow.sysvipc", "allow.raw_sockets", "allow.chflags", "allow.mount", "allow.quotas", "allow.socket_af", "allow.mount.devfs", "allow.mount.nullfs", "allow.mount.zfs", "allow.mount.procfs", "allow.mount.tmpfs", "allow.mount.fdescfs", "allow.mount.linprocfs", "allow.mount.linsysfs", }; const size_t pr_allow_names_size = sizeof(pr_allow_names); static char *pr_allow_nonames[] = { "allow.noset_hostname", "allow.nosysvipc", "allow.noraw_sockets", "allow.nochflags", "allow.nomount", "allow.noquotas", "allow.nosocket_af", "allow.mount.nodevfs", "allow.mount.nonullfs", "allow.mount.nozfs", "allow.mount.noprocfs", "allow.mount.notmpfs", "allow.mount.nofdescfs", "allow.mount.nolinprocfs", "allow.mount.nolinsysfs", }; const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames); #define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME #define JAIL_DEFAULT_ENFORCE_STATFS 2 #define JAIL_DEFAULT_DEVFS_RSNUM 0 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; #if defined(INET) || defined(INET6) static unsigned jail_max_af_ips = 255; #endif /* * Initialize the parts of prison0 that can't be static-initialized with * constants. This is called from proc0_init() after creating thread0 cpuset. */ void prison0_init(void) { prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset); prison0.pr_osreldate = osreldate; strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease)); } #ifdef INET static int qcmp_v4(const void *ip1, const void *ip2) { in_addr_t iaa, iab; /* * We need to compare in HBO here to get the list sorted as expected * by the result of the code. Sorting NBO addresses gives you * interesting results. If you do not understand, do not try. */ iaa = ntohl(((const struct in_addr *)ip1)->s_addr); iab = ntohl(((const struct in_addr *)ip2)->s_addr); /* * Do not simply return the difference of the two numbers, the int is * not wide enough. */ if (iaa > iab) return (1); else if (iaa < iab) return (-1); else return (0); } #endif #ifdef INET6 static int qcmp_v6(const void *ip1, const void *ip2) { const struct in6_addr *ia6a, *ia6b; int i, rc; ia6a = (const struct in6_addr *)ip1; ia6b = (const struct in6_addr *)ip2; rc = 0; for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) rc = 1; else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) rc = -1; } return (rc); } #endif /* * struct jail_args { * struct jail *jail; * }; */ int sys_jail(struct thread *td, struct jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { struct jail_v0 j0; /* FreeBSD single IPv4 jails. */ bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); if (error) return (error); j.version = j0.version; j.path = j0.path; j.hostname = j0.hostname; j.ip4s = htonl(j0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ /* FreeBSD multi-IPv4/IPv6,noIP jails. */ error = copyin(uap->jail, &j, sizeof(struct jail)); if (error) return (error); break; default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int kern_jail(struct thread *td, struct jail *j) { struct iovec optiov[2 * (4 + sizeof(pr_allow_names) / sizeof(pr_allow_names[0]) #ifdef INET + 1 #endif #ifdef INET6 + 1 #endif )]; struct uio opt; char *u_path, *u_hostname, *u_name; #ifdef INET uint32_t ip4s; struct in_addr *u_ip4; #endif #ifdef INET6 struct in6_addr *u_ip6; #endif size_t tmplen; int error, enforce_statfs, fi; bzero(&optiov, sizeof(optiov)); opt.uio_iov = optiov; opt.uio_iovcnt = 0; opt.uio_offset = -1; opt.uio_resid = -1; opt.uio_segflg = UIO_SYSSPACE; opt.uio_rw = UIO_READ; opt.uio_td = td; /* Set permissions for top-level jails from sysctls. */ if (!jailed(td->td_ucred)) { for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); fi++) { optiov[opt.uio_iovcnt].iov_base = (jail_default_allow & (1 << fi)) ? pr_allow_names[fi] : pr_allow_nonames[fi]; optiov[opt.uio_iovcnt].iov_len = strlen(optiov[opt.uio_iovcnt].iov_base) + 1; opt.uio_iovcnt += 2; } optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); opt.uio_iovcnt++; enforce_statfs = jail_default_enforce_statfs; optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); opt.uio_iovcnt++; } tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; #ifdef INET ip4s = (j->version == 0) ? 1 : j->ip4s; if (ip4s > jail_max_af_ips) return (EINVAL); tmplen += ip4s * sizeof(struct in_addr); #else if (j->ip4s > 0) return (EINVAL); #endif #ifdef INET6 if (j->ip6s > jail_max_af_ips) return (EINVAL); tmplen += j->ip6s * sizeof(struct in6_addr); #else if (j->ip6s > 0) return (EINVAL); #endif u_path = malloc(tmplen, M_TEMP, M_WAITOK); u_hostname = u_path + MAXPATHLEN; u_name = u_hostname + MAXHOSTNAMELEN; #ifdef INET u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); #endif #ifdef INET6 #ifdef INET u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); #else u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); #endif #endif optiov[opt.uio_iovcnt].iov_base = "path"; optiov[opt.uio_iovcnt].iov_len = sizeof("path"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_path; error = copyinstr(j->path, u_path, MAXPATHLEN, &optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = "host.hostname"; optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_hostname; error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, &optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; if (j->jailname != NULL) { optiov[opt.uio_iovcnt].iov_base = "name"; optiov[opt.uio_iovcnt].iov_len = sizeof("name"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_name; error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, &optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; } #ifdef INET optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_ip4; optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); if (j->version == 0) u_ip4->s_addr = j->ip4s; else { error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } } opt.uio_iovcnt++; #endif #ifdef INET6 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); opt.uio_iovcnt++; optiov[opt.uio_iovcnt].iov_base = u_ip6; optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); if (error) { free(u_path, M_TEMP); return (error); } opt.uio_iovcnt++; #endif KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); free(u_path, M_TEMP); return (error); } /* * struct jail_set_args { * struct iovec *iovp; * unsigned int iovcnt; * int flags; * }; */ int sys_jail_set(struct thread *td, struct jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int kern_jail_set(struct thread *td, struct uio *optuio, int flags) { struct nameidata nd; #ifdef INET struct in_addr *ip4; #endif #ifdef INET6 struct in6_addr *ip6; #endif struct vfsopt *opt; struct vfsoptlist *opts; struct prison *pr, *deadpr, *mypr, *ppr, *tpr; struct vnode *root; char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; char *g_path, *osrelstr; #if defined(INET) || defined(INET6) struct prison *tppr; void *op; #endif unsigned long hid; size_t namelen, onamelen; int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; int fi, jid, jsys, len, level; int childmax, osreldt, rsnum, slevel; int fullpath_disabled; #if defined(INET) || defined(INET6) int ii, ij; #endif #ifdef INET int ip4s, redo_ip4; #endif #ifdef INET6 int ip6s, redo_ip6; #endif uint64_t pr_allow, ch_allow, pr_flags, ch_flags; unsigned tallow; char numbuf[12]; error = priv_check(td, PRIV_JAIL_SET); if (!error && (flags & JAIL_ATTACH)) error = priv_check(td, PRIV_JAIL_ATTACH); if (error) return (error); mypr = ppr = td->td_ucred->cr_prison; if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) return (EPERM); if (flags & ~JAIL_SET_MASK) return (EINVAL); /* * Check all the parameters before committing to anything. Not all * errors can be caught early, but we may as well try. Also, this * takes care of some expensive stuff (path lookup) before getting * the allprison lock. * * XXX Jails are not filesystems, and jail parameters are not mount * options. But it makes more sense to re-use the vfsopt code * than duplicate it under a different name. */ error = vfs_buildopts(optuio, &opts); if (error) return (error); #ifdef INET ip4 = NULL; #endif #ifdef INET6 ip6 = NULL; #endif g_path = NULL; error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); if (error == ENOENT) jid = 0; else if (error != 0) goto done_free; error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); if (error == ENOENT) gotslevel = 0; else if (error != 0) goto done_free; else gotslevel = 1; error = vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax)); if (error == ENOENT) gotchildmax = 0; else if (error != 0) goto done_free; else gotchildmax = 1; error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); if (error == ENOENT) gotenforce = 0; else if (error != 0) goto done_free; else if (enforce < 0 || enforce > 2) { error = EINVAL; goto done_free; } else gotenforce = 1; error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum)); if (error == ENOENT) gotrsnum = 0; else if (error != 0) goto done_free; else gotrsnum = 1; pr_flags = ch_flags = 0; for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); fi++) { if (pr_flag_names[fi] == NULL) continue; vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); } ch_flags |= pr_flags; for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); fi++) { error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys, sizeof(jsys)); if (error == ENOENT) continue; if (error != 0) goto done_free; switch (jsys) { case JAIL_SYS_DISABLE: if (!pr_flag_jailsys[fi].disable) { error = EINVAL; goto done_free; } pr_flags |= pr_flag_jailsys[fi].disable; break; case JAIL_SYS_NEW: pr_flags |= pr_flag_jailsys[fi].new; break; case JAIL_SYS_INHERIT: break; default: error = EINVAL; goto done_free; } ch_flags |= pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable; } if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE && !(pr_flags & PR_PERSIST)) { error = EINVAL; vfs_opterror(opts, "new jail must persist or attach"); goto done_errmsg; } #ifdef VIMAGE if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { error = EINVAL; vfs_opterror(opts, "vnet cannot be changed after creation"); goto done_errmsg; } #endif #ifdef INET if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) { error = EINVAL; vfs_opterror(opts, "ip4 cannot be changed after creation"); goto done_errmsg; } #endif #ifdef INET6 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) { error = EINVAL; vfs_opterror(opts, "ip6 cannot be changed after creation"); goto done_errmsg; } #endif pr_allow = ch_allow = 0; for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); fi++) { vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); } ch_allow |= pr_allow; error = vfs_getopt(opts, "name", (void **)&name, &len); if (error == ENOENT) name = NULL; else if (error != 0) goto done_free; else { if (len == 0 || name[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > MAXHOSTNAMELEN) { error = ENAMETOOLONG; goto done_free; } } error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); if (error == ENOENT) host = NULL; else if (error != 0) goto done_free; else { ch_flags |= PR_HOST; pr_flags |= PR_HOST; if (len == 0 || host[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > MAXHOSTNAMELEN) { error = ENAMETOOLONG; goto done_free; } } error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); if (error == ENOENT) domain = NULL; else if (error != 0) goto done_free; else { ch_flags |= PR_HOST; pr_flags |= PR_HOST; if (len == 0 || domain[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > MAXHOSTNAMELEN) { error = ENAMETOOLONG; goto done_free; } } error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); if (error == ENOENT) uuid = NULL; else if (error != 0) goto done_free; else { ch_flags |= PR_HOST; pr_flags |= PR_HOST; if (len == 0 || uuid[len - 1] != '\0') { error = EINVAL; goto done_free; } if (len > HOSTUUIDLEN) { error = ENAMETOOLONG; goto done_free; } } #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { uint32_t hid32; error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); hid = hid32; } else #endif error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); if (error == ENOENT) gothid = 0; else if (error != 0) goto done_free; else { gothid = 1; ch_flags |= PR_HOST; pr_flags |= PR_HOST; } #ifdef INET error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); if (error == ENOENT) ip4s = 0; else if (error != 0) goto done_free; else if (ip4s & (sizeof(*ip4) - 1)) { error = EINVAL; goto done_free; } else { ch_flags |= PR_IP4_USER; pr_flags |= PR_IP4_USER; if (ip4s > 0) { ip4s /= sizeof(*ip4); if (ip4s > jail_max_af_ips) { error = EINVAL; vfs_opterror(opts, "too many IPv4 addresses"); goto done_errmsg; } ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); bcopy(op, ip4, ip4s * sizeof(*ip4)); /* * IP addresses are all sorted but ip[0] to preserve * the primary IP address as given from userland. * This special IP is used for unbound outgoing * connections as well for "loopback" traffic in case * source address selection cannot find any more fitting * address to connect from. */ if (ip4s > 1) qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); /* * Check for duplicate addresses and do some simple * zero and broadcast checks. If users give other bogus * addresses it is their problem. * * We do not have to care about byte order for these * checks so we will do them in NBO. */ for (ii = 0; ii < ip4s; ii++) { if (ip4[ii].s_addr == INADDR_ANY || ip4[ii].s_addr == INADDR_BROADCAST) { error = EINVAL; goto done_free; } if ((ii+1) < ip4s && (ip4[0].s_addr == ip4[ii+1].s_addr || ip4[ii].s_addr == ip4[ii+1].s_addr)) { error = EINVAL; goto done_free; } } } } #endif #ifdef INET6 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); if (error == ENOENT) ip6s = 0; else if (error != 0) goto done_free; else if (ip6s & (sizeof(*ip6) - 1)) { error = EINVAL; goto done_free; } else { ch_flags |= PR_IP6_USER; pr_flags |= PR_IP6_USER; if (ip6s > 0) { ip6s /= sizeof(*ip6); if (ip6s > jail_max_af_ips) { error = EINVAL; vfs_opterror(opts, "too many IPv6 addresses"); goto done_errmsg; } ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); bcopy(op, ip6, ip6s * sizeof(*ip6)); if (ip6s > 1) qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); for (ii = 0; ii < ip6s; ii++) { if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { error = EINVAL; goto done_free; } if ((ii+1) < ip6s && (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) { error = EINVAL; goto done_free; } } } } #endif #if defined(VIMAGE) && (defined(INET) || defined(INET6)) if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { error = EINVAL; vfs_opterror(opts, "vnet jails cannot have IP address restrictions"); goto done_errmsg; } #endif fullpath_disabled = 0; root = NULL; error = vfs_getopt(opts, "path", (void **)&path, &len); if (error == ENOENT) path = NULL; else if (error != 0) goto done_free; else { if (flags & JAIL_UPDATE) { error = EINVAL; vfs_opterror(opts, "path cannot be changed after creation"); goto done_errmsg; } if (len == 0 || path[len - 1] != '\0') { error = EINVAL; goto done_free; } NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, td); error = namei(&nd); if (error) goto done_free; root = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); strlcpy(g_path, path, MAXPATHLEN); error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN); if (error == 0) path = g_path; else if (error == ENODEV) { /* proceed if sysctl debug.disablefullpath == 1 */ fullpath_disabled = 1; if (len < 2 || (len == 2 && path[0] == '/')) path = NULL; } else { /* exit on other errors */ goto done_free; } if (root->v_type != VDIR) { error = ENOTDIR; vput(root); goto done_free; } VOP_UNLOCK(root, 0); if (fullpath_disabled) { /* Leave room for a real-root full pathname. */ if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { error = ENAMETOOLONG; goto done_free; } } } error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len); if (error == ENOENT) osrelstr = NULL; else if (error != 0) goto done_free; else { if (flags & JAIL_UPDATE) { error = EINVAL; vfs_opterror(opts, "osrelease cannot be changed after creation"); goto done_errmsg; } if (len == 0 || len >= OSRELEASELEN) { error = EINVAL; vfs_opterror(opts, "osrelease string must be 1-%d bytes long", OSRELEASELEN - 1); goto done_errmsg; } } error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt)); if (error == ENOENT) osreldt = 0; else if (error != 0) goto done_free; else { if (flags & JAIL_UPDATE) { error = EINVAL; vfs_opterror(opts, "osreldate cannot be changed after creation"); goto done_errmsg; } if (osreldt == 0) { error = EINVAL; vfs_opterror(opts, "osreldate cannot be 0"); goto done_errmsg; } } /* * Grab the allprison lock before letting modules check their * parameters. Once we have it, do not let go so we'll have a * consistent view of the OSD list. */ sx_xlock(&allprison_lock); error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); if (error) goto done_unlock_list; /* By now, all parameters should have been noted. */ TAILQ_FOREACH(opt, opts, link) { if (!opt->seen && strcmp(opt->name, "errmsg")) { error = EINVAL; vfs_opterror(opts, "unknown parameter: %s", opt->name); goto done_unlock_list; } } /* * See if we are creating a new record or updating an existing one. * This abuses the file error codes ENOENT and EEXIST. */ cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); if (!cuflags) { error = EINVAL; vfs_opterror(opts, "no valid operation (create or update)"); goto done_unlock_list; } pr = NULL; namelc = NULL; if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { namelc = strrchr(name, '.'); jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); if (*p != '\0') jid = 0; } if (jid != 0) { /* * See if a requested jid already exists. There is an * information leak here if the jid exists but is not within * the caller's jail hierarchy. Jail creators will get EEXIST * even though they cannot see the jail, and CREATE | UPDATE * will return ENOENT which is not normally a valid error. */ if (jid < 0) { error = EINVAL; vfs_opterror(opts, "negative jid"); goto done_unlock_list; } pr = prison_find(jid); if (pr != NULL) { ppr = pr->pr_parent; /* Create: jid must not exist. */ if (cuflags == JAIL_CREATE) { mtx_unlock(&pr->pr_mtx); error = EEXIST; vfs_opterror(opts, "jail %d already exists", jid); goto done_unlock_list; } if (!prison_ischild(mypr, pr)) { mtx_unlock(&pr->pr_mtx); pr = NULL; } else if (pr->pr_uref == 0) { if (!(flags & JAIL_DYING)) { mtx_unlock(&pr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail %d is dying", jid); goto done_unlock_list; } else if ((flags & JAIL_ATTACH) || (pr_flags & PR_PERSIST)) { /* * A dying jail might be resurrected * (via attach or persist), but first * it must determine if another jail * has claimed its name. Accomplish * this by implicitly re-setting the * name. */ if (name == NULL) name = prison_name(mypr, pr); } } } if (pr == NULL) { /* Update: jid must exist. */ if (cuflags == JAIL_UPDATE) { error = ENOENT; vfs_opterror(opts, "jail %d not found", jid); goto done_unlock_list; } } } /* * If the caller provided a name, look for a jail by that name. * This has different semantics for creates and updates keyed by jid * (where the name must not already exist in a different jail), * and updates keyed by the name itself (where the name must exist * because that is the jail being updated). */ if (name != NULL) { namelc = strrchr(name, '.'); if (namelc == NULL) namelc = name; else { /* * This is a hierarchical name. Split it into the * parent and child names, and make sure the parent * exists or matches an already found jail. */ *namelc = '\0'; if (pr != NULL) { if (strncmp(name, ppr->pr_name, namelc - name) || ppr->pr_name[namelc - name] != '\0') { mtx_unlock(&pr->pr_mtx); error = EINVAL; vfs_opterror(opts, "cannot change jail's parent"); goto done_unlock_list; } } else { ppr = prison_find_name(mypr, name); if (ppr == NULL) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); goto done_unlock_list; } mtx_unlock(&ppr->pr_mtx); } name = ++namelc; } if (name[0] != '\0') { namelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; name_again: deadpr = NULL; FOREACH_PRISON_CHILD(ppr, tpr) { if (tpr != pr && tpr->pr_ref > 0 && !strcmp(tpr->pr_name + namelen, name)) { if (pr == NULL && cuflags != JAIL_CREATE) { mtx_lock(&tpr->pr_mtx); if (tpr->pr_ref > 0) { /* * Use this jail * for updates. */ if (tpr->pr_uref > 0) { pr = tpr; break; } deadpr = tpr; } mtx_unlock(&tpr->pr_mtx); } else if (tpr->pr_uref > 0) { /* * Create, or update(jid): * name must not exist in an * active sibling jail. */ error = EEXIST; if (pr != NULL) mtx_unlock(&pr->pr_mtx); vfs_opterror(opts, "jail \"%s\" already exists", name); goto done_unlock_list; } } } /* If no active jail is found, use a dying one. */ if (deadpr != NULL && pr == NULL) { if (flags & JAIL_DYING) { mtx_lock(&deadpr->pr_mtx); if (deadpr->pr_ref == 0) { mtx_unlock(&deadpr->pr_mtx); goto name_again; } pr = deadpr; } else if (cuflags == JAIL_UPDATE) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", name); goto done_unlock_list; } } /* Update: name must exist if no jid. */ else if (cuflags == JAIL_UPDATE && pr == NULL) { error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); goto done_unlock_list; } } } /* Update: must provide a jid or name. */ else if (cuflags == JAIL_UPDATE && pr == NULL) { error = ENOENT; vfs_opterror(opts, "update specified no jail"); goto done_unlock_list; } /* If there's no prison to update, create a new one and link it in. */ if (pr == NULL) { for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) if (tpr->pr_childcount >= tpr->pr_childmax) { error = EPERM; vfs_opterror(opts, "prison limit exceeded"); goto done_unlock_list; } created = 1; mtx_lock(&ppr->pr_mtx); if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { mtx_unlock(&ppr->pr_mtx); error = ENOENT; vfs_opterror(opts, "parent jail went away!"); goto done_unlock_list; } ppr->pr_ref++; ppr->pr_uref++; mtx_unlock(&ppr->pr_mtx); pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); if (jid == 0) { /* Find the next free jid. */ jid = lastprid + 1; findnext: if (jid == JAIL_MAX) jid = 1; TAILQ_FOREACH(tpr, &allprison, pr_list) { if (tpr->pr_id < jid) continue; if (tpr->pr_id > jid || tpr->pr_ref == 0) { TAILQ_INSERT_BEFORE(tpr, pr, pr_list); break; } if (jid == lastprid) { error = EAGAIN; vfs_opterror(opts, "no available jail IDs"); free(pr, M_PRISON); prison_deref(ppr, PD_DEREF | PD_DEUREF | PD_LIST_XLOCKED); goto done_releroot; } jid++; goto findnext; } lastprid = jid; } else { /* * The jail already has a jid (that did not yet exist), * so just find where to insert it. */ TAILQ_FOREACH(tpr, &allprison, pr_list) if (tpr->pr_id >= jid) { TAILQ_INSERT_BEFORE(tpr, pr, pr_list); break; } } if (tpr == NULL) TAILQ_INSERT_TAIL(&allprison, pr, pr_list); LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) tpr->pr_childcount++; pr->pr_parent = ppr; pr->pr_id = jid; /* Set some default values, and inherit some from the parent. */ if (name == NULL) name = ""; if (path == NULL) { path = "/"; root = mypr->pr_root; vref(root); } strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); pr->pr_flags |= PR_HOST; #if defined(INET) || defined(INET6) #ifdef VIMAGE if (!(pr_flags & PR_VNET)) #endif { #ifdef INET if (!(ch_flags & PR_IP4_USER)) pr->pr_flags |= PR_IP4 | PR_IP4_USER; else if (!(pr_flags & PR_IP4_USER)) { pr->pr_flags |= ppr->pr_flags & PR_IP4; if (ppr->pr_ip4 != NULL) { pr->pr_ip4s = ppr->pr_ip4s; pr->pr_ip4 = malloc(pr->pr_ip4s * sizeof(struct in_addr), M_PRISON, M_WAITOK); bcopy(ppr->pr_ip4, pr->pr_ip4, pr->pr_ip4s * sizeof(*pr->pr_ip4)); } } #endif #ifdef INET6 if (!(ch_flags & PR_IP6_USER)) pr->pr_flags |= PR_IP6 | PR_IP6_USER; else if (!(pr_flags & PR_IP6_USER)) { pr->pr_flags |= ppr->pr_flags & PR_IP6; if (ppr->pr_ip6 != NULL) { pr->pr_ip6s = ppr->pr_ip6s; pr->pr_ip6 = malloc(pr->pr_ip6s * sizeof(struct in6_addr), M_PRISON, M_WAITOK); bcopy(ppr->pr_ip6, pr->pr_ip6, pr->pr_ip6s * sizeof(*pr->pr_ip6)); } } #endif } #endif /* Source address selection is always on by default. */ pr->pr_flags |= _PR_IP_SADDRSEL; pr->pr_securelevel = ppr->pr_securelevel; pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; pr->pr_enforce_statfs = jail_default_enforce_statfs; pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum; pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate; if (osrelstr == NULL) strcpy(pr->pr_osrelease, ppr->pr_osrelease); else strcpy(pr->pr_osrelease, osrelstr); LIST_INIT(&pr->pr_children); mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); #ifdef VIMAGE /* Allocate a new vnet if specified. */ pr->pr_vnet = (pr_flags & PR_VNET) ? vnet_alloc() : ppr->pr_vnet; #endif /* * Allocate a dedicated cpuset for each jail. * Unlike other initial settings, this may return an erorr. */ error = cpuset_create_root(ppr, &pr->pr_cpuset); if (error) { prison_deref(pr, PD_LIST_XLOCKED); goto done_releroot; } mtx_lock(&pr->pr_mtx); /* * New prisons do not yet have a reference, because we do not * want other to see the incomplete prison once the * allprison_lock is downgraded. */ } else { created = 0; /* * Grab a reference for existing prisons, to ensure they * continue to exist for the duration of the call. */ pr->pr_ref++; #if defined(VIMAGE) && (defined(INET) || defined(INET6)) if ((pr->pr_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { error = EINVAL; vfs_opterror(opts, "vnet jails cannot have IP address restrictions"); goto done_deref_locked; } #endif #ifdef INET if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { error = EINVAL; vfs_opterror(opts, "ip4 cannot be changed after creation"); goto done_deref_locked; } #endif #ifdef INET6 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { error = EINVAL; vfs_opterror(opts, "ip6 cannot be changed after creation"); goto done_deref_locked; } #endif } /* Do final error checking before setting anything. */ if (gotslevel) { if (slevel < ppr->pr_securelevel) { error = EPERM; goto done_deref_locked; } } if (gotchildmax) { if (childmax >= ppr->pr_childmax) { error = EPERM; goto done_deref_locked; } } if (gotenforce) { if (enforce < ppr->pr_enforce_statfs) { error = EPERM; goto done_deref_locked; } } if (gotrsnum) { /* * devfs_rsnum is a uint16_t */ if (rsnum < 0 || rsnum > 65535) { error = EINVAL; goto done_deref_locked; } /* * Nested jails always inherit parent's devfs ruleset */ if (jailed(td->td_ucred)) { if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) { error = EPERM; goto done_deref_locked; } else rsnum = ppr->pr_devfs_rsnum; } } #ifdef INET if (ip4s > 0) { if (ppr->pr_flags & PR_IP4) { /* * Make sure the new set of IP addresses is a * subset of the parent's list. Don't worry * about the parent being unlocked, as any * setting is done with allprison_lock held. */ for (ij = 0; ij < ppr->pr_ip4s; ij++) if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) break; if (ij == ppr->pr_ip4s) { error = EPERM; goto done_deref_locked; } if (ip4s > 1) { for (ii = ij = 1; ii < ip4s; ii++) { if (ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) continue; for (; ij < ppr->pr_ip4s; ij++) if (ip4[ii].s_addr == ppr->pr_ip4[ij].s_addr) break; if (ij == ppr->pr_ip4s) break; } if (ij == ppr->pr_ip4s) { error = EPERM; goto done_deref_locked; } } } /* * Check for conflicting IP addresses. We permit them * if there is no more than one IP on each jail. If * there is a duplicate on a jail with more than one * IP stop checking and return error. */ tppr = ppr; #ifdef VIMAGE for (; tppr != &prison0; tppr = tppr->pr_parent) if (tppr->pr_flags & PR_VNET) break; #endif FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { if (tpr == pr || #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif tpr->pr_uref == 0) { descend = 0; continue; } if (!(tpr->pr_flags & PR_IP4_USER)) continue; descend = 0; if (tpr->pr_ip4 == NULL || (ip4s == 1 && tpr->pr_ip4s == 1)) continue; for (ii = 0; ii < ip4s; ii++) { if (_prison_check_ip4(tpr, &ip4[ii]) == 0) { error = EADDRINUSE; vfs_opterror(opts, "IPv4 addresses clash"); goto done_deref_locked; } } } } #endif #ifdef INET6 if (ip6s > 0) { if (ppr->pr_flags & PR_IP6) { /* * Make sure the new set of IP addresses is a * subset of the parent's list. */ for (ij = 0; ij < ppr->pr_ip6s; ij++) if (IN6_ARE_ADDR_EQUAL(&ip6[0], &ppr->pr_ip6[ij])) break; if (ij == ppr->pr_ip6s) { error = EPERM; goto done_deref_locked; } if (ip6s > 1) { for (ii = ij = 1; ii < ip6s; ii++) { if (IN6_ARE_ADDR_EQUAL(&ip6[ii], &ppr->pr_ip6[0])) continue; for (; ij < ppr->pr_ip6s; ij++) if (IN6_ARE_ADDR_EQUAL( &ip6[ii], &ppr->pr_ip6[ij])) break; if (ij == ppr->pr_ip6s) break; } if (ij == ppr->pr_ip6s) { error = EPERM; goto done_deref_locked; } } } /* Check for conflicting IP addresses. */ tppr = ppr; #ifdef VIMAGE for (; tppr != &prison0; tppr = tppr->pr_parent) if (tppr->pr_flags & PR_VNET) break; #endif FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { if (tpr == pr || #ifdef VIMAGE (tpr != tppr && (tpr->pr_flags & PR_VNET)) || #endif tpr->pr_uref == 0) { descend = 0; continue; } if (!(tpr->pr_flags & PR_IP6_USER)) continue; descend = 0; if (tpr->pr_ip6 == NULL || (ip6s == 1 && tpr->pr_ip6s == 1)) continue; for (ii = 0; ii < ip6s; ii++) { if (_prison_check_ip6(tpr, &ip6[ii]) == 0) { error = EADDRINUSE; vfs_opterror(opts, "IPv6 addresses clash"); goto done_deref_locked; } } } } #endif onamelen = namelen = 0; if (name != NULL) { /* Give a default name of the jid. */ if (name[0] == '\0') snprintf(name = numbuf, sizeof(numbuf), "%d", jid); else if (*namelc == '0' || (strtoul(namelc, &p, 10) != jid && *p == '\0')) { error = EINVAL; vfs_opterror(opts, "name cannot be numeric (unless it is the jid)"); goto done_deref_locked; } /* * Make sure the name isn't too long for the prison or its * children. */ onamelen = strlen(pr->pr_name); namelen = strlen(name); if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { error = ENAMETOOLONG; goto done_deref_locked; } FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { if (strlen(tpr->pr_name) + (namelen - onamelen) >= sizeof(pr->pr_name)) { error = ENAMETOOLONG; goto done_deref_locked; } } } if (pr_allow & ~ppr->pr_allow) { error = EPERM; goto done_deref_locked; } /* Set the parameters of the prison. */ #ifdef INET redo_ip4 = 0; if (pr_flags & PR_IP4_USER) { pr->pr_flags |= PR_IP4; free(pr->pr_ip4, M_PRISON); pr->pr_ip4s = ip4s; pr->pr_ip4 = ip4; ip4 = NULL; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (prison_restrict_ip4(tpr, NULL)) { redo_ip4 = 1; descend = 0; } } } #endif #ifdef INET6 redo_ip6 = 0; if (pr_flags & PR_IP6_USER) { pr->pr_flags |= PR_IP6; free(pr->pr_ip6, M_PRISON); pr->pr_ip6s = ip6s; pr->pr_ip6 = ip6; ip6 = NULL; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (prison_restrict_ip6(tpr, NULL)) { redo_ip6 = 1; descend = 0; } } } #endif if (gotslevel) { pr->pr_securelevel = slevel; /* Set all child jails to be at least this level. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) if (tpr->pr_securelevel < slevel) tpr->pr_securelevel = slevel; } if (gotchildmax) { pr->pr_childmax = childmax; /* Set all child jails to under this limit. */ FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level) if (tpr->pr_childmax > childmax - level) tpr->pr_childmax = childmax > level ? childmax - level : 0; } if (gotenforce) { pr->pr_enforce_statfs = enforce; /* Pass this restriction on to the children. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) if (tpr->pr_enforce_statfs < enforce) tpr->pr_enforce_statfs = enforce; } if (gotrsnum) { pr->pr_devfs_rsnum = rsnum; /* Pass this restriction on to the children. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) tpr->pr_devfs_rsnum = rsnum; } if (name != NULL) { if (ppr == &prison0) strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); else snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", ppr->pr_name, name); /* Change this component of child names. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, strlen(tpr->pr_name + onamelen) + 1); bcopy(pr->pr_name, tpr->pr_name, namelen); } } if (path != NULL) { /* Try to keep a real-rooted full pathname. */ if (fullpath_disabled && path[0] == '/' && strcmp(mypr->pr_path, "/")) snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", mypr->pr_path, path); else strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); pr->pr_root = root; } if (PR_HOST & ch_flags & ~pr_flags) { if (pr->pr_flags & PR_HOST) { /* * Copy the parent's host info. As with pr_ip4 above, * the lack of a lock on the parent is not a problem; * it is always set with allprison_lock at least * shared, and is held exclusively here. */ strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname, sizeof(pr->pr_hostname)); strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname, sizeof(pr->pr_domainname)); strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid, sizeof(pr->pr_hostuuid)); pr->pr_hostid = pr->pr_parent->pr_hostid; } } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { /* Set this prison, and any descendants without PR_HOST. */ if (host != NULL) strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname)); if (domain != NULL) strlcpy(pr->pr_domainname, domain, sizeof(pr->pr_domainname)); if (uuid != NULL) strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid)); if (gothid) pr->pr_hostid = hid; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { if (tpr->pr_flags & PR_HOST) descend = 0; else { if (host != NULL) strlcpy(tpr->pr_hostname, pr->pr_hostname, sizeof(tpr->pr_hostname)); if (domain != NULL) strlcpy(tpr->pr_domainname, pr->pr_domainname, sizeof(tpr->pr_domainname)); if (uuid != NULL) strlcpy(tpr->pr_hostuuid, pr->pr_hostuuid, sizeof(tpr->pr_hostuuid)); if (gothid) tpr->pr_hostid = hid; } } } if ((tallow = ch_allow & ~pr_allow)) { /* Clear allow bits in all children. */ FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) tpr->pr_allow &= ~tallow; } pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; /* * Persistent prisons get an extra reference, and prisons losing their * persist flag lose that reference. Only do this for existing prisons * for now, so new ones will remain unseen until after the module * handlers have completed. */ if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { if (pr_flags & PR_PERSIST) { pr->pr_ref++; pr->pr_uref++; } else { pr->pr_ref--; pr->pr_uref--; } } pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; mtx_unlock(&pr->pr_mtx); #ifdef RACCT if (racct_enable && created) prison_racct_attach(pr); #endif /* Locks may have prevented a complete restriction of child IP * addresses. If so, allocate some more memory and try again. */ #ifdef INET while (redo_ip4) { ip4s = pr->pr_ip4s; ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); mtx_lock(&pr->pr_mtx); redo_ip4 = 0; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (prison_restrict_ip4(tpr, ip4)) { if (ip4 != NULL) ip4 = NULL; else redo_ip4 = 1; } } mtx_unlock(&pr->pr_mtx); } #endif #ifdef INET6 while (redo_ip6) { ip6s = pr->pr_ip6s; ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); mtx_lock(&pr->pr_mtx); redo_ip6 = 0; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE if (tpr->pr_flags & PR_VNET) { descend = 0; continue; } #endif if (prison_restrict_ip6(tpr, ip6)) { if (ip6 != NULL) ip6 = NULL; else redo_ip6 = 1; } } mtx_unlock(&pr->pr_mtx); } #endif /* Let the modules do their work. */ sx_downgrade(&allprison_lock); if (created) { error = osd_jail_call(pr, PR_METHOD_CREATE, opts); if (error) { prison_deref(pr, PD_LIST_SLOCKED); goto done_errmsg; } } error = osd_jail_call(pr, PR_METHOD_SET, opts); if (error) { prison_deref(pr, created ? PD_LIST_SLOCKED : PD_DEREF | PD_LIST_SLOCKED); goto done_errmsg; } /* Attach this process to the prison if requested. */ if (flags & JAIL_ATTACH) { mtx_lock(&pr->pr_mtx); error = do_jail_attach(td, pr); if (error) { vfs_opterror(opts, "attach failed"); if (!created) prison_deref(pr, PD_DEREF); goto done_errmsg; } } #ifdef RACCT if (racct_enable && !created) { if (!(flags & JAIL_ATTACH)) sx_sunlock(&allprison_lock); prison_racct_modify(pr); if (!(flags & JAIL_ATTACH)) sx_slock(&allprison_lock); } #endif td->td_retval[0] = pr->pr_id; /* * Now that it is all there, drop the temporary reference from existing * prisons. Or add a reference to newly created persistent prisons * (which was not done earlier so that the prison would not be publicly * visible). */ if (!created) { prison_deref(pr, (flags & JAIL_ATTACH) ? PD_DEREF : PD_DEREF | PD_LIST_SLOCKED); } else { if (pr_flags & PR_PERSIST) { mtx_lock(&pr->pr_mtx); pr->pr_ref++; pr->pr_uref++; mtx_unlock(&pr->pr_mtx); } if (!(flags & JAIL_ATTACH)) sx_sunlock(&allprison_lock); } goto done_errmsg; done_deref_locked: prison_deref(pr, created ? PD_LOCKED | PD_LIST_XLOCKED : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); goto done_releroot; done_unlock_list: sx_xunlock(&allprison_lock); done_releroot: if (root != NULL) vrele(root); done_errmsg: if (error) { vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); if (errmsg_len > 0) { errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; if (errmsg_pos > 0) { if (optuio->uio_segflg == UIO_SYSSPACE) bcopy(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); else copyout(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); } } } done_free: #ifdef INET free(ip4, M_PRISON); #endif #ifdef INET6 free(ip6, M_PRISON); #endif if (g_path != NULL) free(g_path, M_TEMP); vfs_freeopts(opts); return (error); } /* * struct jail_get_args { * struct iovec *iovp; * unsigned int iovcnt; * int flags; * }; */ int sys_jail_get(struct thread *td, struct jail_get_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) error = copyout(auio->uio_iov, uap->iovp, uap->iovcnt * sizeof (struct iovec)); free(auio, M_IOV); return (error); } int kern_jail_get(struct thread *td, struct uio *optuio, int flags) { struct prison *pr, *mypr; struct vfsopt *opt; struct vfsoptlist *opts; char *errmsg, *name; int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; if (flags & ~JAIL_GET_MASK) return (EINVAL); /* Get the parameter list. */ error = vfs_buildopts(optuio, &opts); if (error) return (error); errmsg_pos = vfs_getopt_pos(opts, "errmsg"); mypr = td->td_ucred->cr_prison; /* * Find the prison specified by one of: lastjid, jid, name. */ sx_slock(&allprison_lock); error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); if (error == 0) { TAILQ_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id > jid && prison_ischild(mypr, pr)) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref > 0 && (pr->pr_uref > 0 || (flags & JAIL_DYING))) break; mtx_unlock(&pr->pr_mtx); } } if (pr != NULL) goto found_prison; error = ENOENT; vfs_opterror(opts, "no jail after %d", jid); goto done_unlock_list; } else if (error != ENOENT) goto done_unlock_list; error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); if (error == 0) { if (jid != 0) { pr = prison_find_child(mypr, jid); if (pr != NULL) { if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { mtx_unlock(&pr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail %d is dying", jid); goto done_unlock_list; } goto found_prison; } error = ENOENT; vfs_opterror(opts, "jail %d not found", jid); goto done_unlock_list; } } else if (error != ENOENT) goto done_unlock_list; error = vfs_getopt(opts, "name", (void **)&name, &len); if (error == 0) { if (len == 0 || name[len - 1] != '\0') { error = EINVAL; goto done_unlock_list; } pr = prison_find_name(mypr, name); if (pr != NULL) { if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { mtx_unlock(&pr->pr_mtx); error = ENOENT; vfs_opterror(opts, "jail \"%s\" is dying", name); goto done_unlock_list; } goto found_prison; } error = ENOENT; vfs_opterror(opts, "jail \"%s\" not found", name); goto done_unlock_list; } else if (error != ENOENT) goto done_unlock_list; vfs_opterror(opts, "no jail specified"); error = ENOENT; goto done_unlock_list; found_prison: /* Get the parameters of the prison. */ pr->pr_ref++; locked = PD_LOCKED; td->td_retval[0] = pr->pr_id; error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); if (error != 0 && error != ENOENT) goto done_deref; i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; error = vfs_setopt(opts, "parent", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopts(opts, "name", prison_name(mypr, pr)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, sizeof(pr->pr_cpuset->cs_id)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopts(opts, "path", prison_path(mypr, pr)); if (error != 0 && error != ENOENT) goto done_deref; #ifdef INET error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, pr->pr_ip4s * sizeof(*pr->pr_ip4)); if (error != 0 && error != ENOENT) goto done_deref; #endif #ifdef INET6 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, pr->pr_ip6s * sizeof(*pr->pr_ip6)); if (error != 0 && error != ENOENT) goto done_deref; #endif error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, sizeof(pr->pr_securelevel)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopt(opts, "children.cur", &pr->pr_childcount, sizeof(pr->pr_childcount)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopt(opts, "children.max", &pr->pr_childmax, sizeof(pr->pr_childmax)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopts(opts, "host.hostname", pr->pr_hostname); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopts(opts, "host.domainname", pr->pr_domainname); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid); if (error != 0 && error != ENOENT) goto done_deref; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { uint32_t hid32 = pr->pr_hostid; error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); } else #endif error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, sizeof(pr->pr_hostid)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, sizeof(pr->pr_enforce_statfs)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum, sizeof(pr->pr_devfs_rsnum)); if (error != 0 && error != ENOENT) goto done_deref; for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); fi++) { if (pr_flag_names[fi] == NULL) continue; i = (pr->pr_flags & (1 << fi)) ? 1 : 0; error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; i = !i; error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; } for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); fi++) { i = pr->pr_flags & (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); i = pr_flag_jailsys[fi].disable && (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; } for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); fi++) { if (pr_allow_names[fi] == NULL) continue; i = (pr->pr_allow & (1 << fi)) ? 1 : 0; error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; i = !i; error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; } i = (pr->pr_uref == 0); error = vfs_setopt(opts, "dying", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; i = !i; error = vfs_setopt(opts, "nodying", &i, sizeof(i)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate, sizeof(pr->pr_osreldate)); if (error != 0 && error != ENOENT) goto done_deref; error = vfs_setopts(opts, "osrelease", pr->pr_osrelease); if (error != 0 && error != ENOENT) goto done_deref; /* Get the module parameters. */ mtx_unlock(&pr->pr_mtx); locked = 0; error = osd_jail_call(pr, PR_METHOD_GET, opts); if (error) goto done_deref; prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); /* By now, all parameters should have been noted. */ TAILQ_FOREACH(opt, opts, link) { if (!opt->seen && strcmp(opt->name, "errmsg")) { error = EINVAL; vfs_opterror(opts, "unknown parameter: %s", opt->name); goto done_errmsg; } } /* Write the fetched parameters back to userspace. */ error = 0; TAILQ_FOREACH(opt, opts, link) { if (opt->pos >= 0 && opt->pos != errmsg_pos) { pos = 2 * opt->pos + 1; optuio->uio_iov[pos].iov_len = opt->len; if (opt->value != NULL) { if (optuio->uio_segflg == UIO_SYSSPACE) { bcopy(opt->value, optuio->uio_iov[pos].iov_base, opt->len); } else { error = copyout(opt->value, optuio->uio_iov[pos].iov_base, opt->len); if (error) break; } } } } goto done_errmsg; done_deref: prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); goto done_errmsg; done_unlock_list: sx_sunlock(&allprison_lock); done_errmsg: if (error && errmsg_pos >= 0) { vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); errmsg_pos = 2 * errmsg_pos + 1; if (errmsg_len > 0) { if (optuio->uio_segflg == UIO_SYSSPACE) bcopy(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); else copyout(errmsg, optuio->uio_iov[errmsg_pos].iov_base, errmsg_len); } } vfs_freeopts(opts); return (error); } /* * struct jail_remove_args { * int jid; * }; */ int sys_jail_remove(struct thread *td, struct jail_remove_args *uap) { struct prison *pr, *cpr, *lpr, *tpr; int descend, error; error = priv_check(td, PRIV_JAIL_REMOVE); if (error) return (error); sx_xlock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); if (pr == NULL) { sx_xunlock(&allprison_lock); return (EINVAL); } /* Remove all descendants of this prison, then remove this prison. */ pr->pr_ref++; pr->pr_flags |= PR_REMOVE; if (!LIST_EMPTY(&pr->pr_children)) { mtx_unlock(&pr->pr_mtx); lpr = NULL; FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { mtx_lock(&cpr->pr_mtx); if (cpr->pr_ref > 0) { tpr = cpr; cpr->pr_ref++; cpr->pr_flags |= PR_REMOVE; } else { /* Already removed - do not do it again. */ tpr = NULL; } mtx_unlock(&cpr->pr_mtx); if (lpr != NULL) { mtx_lock(&lpr->pr_mtx); prison_remove_one(lpr); sx_xlock(&allprison_lock); } lpr = tpr; } if (lpr != NULL) { mtx_lock(&lpr->pr_mtx); prison_remove_one(lpr); sx_xlock(&allprison_lock); } mtx_lock(&pr->pr_mtx); } prison_remove_one(pr); return (0); } static void prison_remove_one(struct prison *pr) { struct proc *p; int deuref; /* If the prison was persistent, it is not anymore. */ deuref = 0; if (pr->pr_flags & PR_PERSIST) { pr->pr_ref--; deuref = PD_DEUREF; pr->pr_flags &= ~PR_PERSIST; } /* * jail_remove added a reference. If that's the only one, remove * the prison now. */ KASSERT(pr->pr_ref > 0, ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); if (pr->pr_ref == 1) { prison_deref(pr, deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); return; } mtx_unlock(&pr->pr_mtx); sx_xunlock(&allprison_lock); /* * Kill all processes unfortunate enough to be attached to this prison. */ sx_slock(&allproc_lock); LIST_FOREACH(p, &allproc, p_list) { PROC_LOCK(p); if (p->p_state != PRS_NEW && p->p_ucred && p->p_ucred->cr_prison == pr) kern_psignal(p, SIGKILL); PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); /* Remove the temporary reference added by jail_remove. */ prison_deref(pr, deuref | PD_DEREF); } /* * struct jail_attach_args { * int jid; * }; */ int sys_jail_attach(struct thread *td, struct jail_attach_args *uap) { struct prison *pr; int error; error = priv_check(td, PRIV_JAIL_ATTACH); if (error) return (error); sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); if (pr == NULL) { sx_sunlock(&allprison_lock); return (EINVAL); } /* * Do not allow a process to attach to a prison that is not * considered to be "alive". */ if (pr->pr_uref == 0) { mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); return (EINVAL); } return (do_jail_attach(td, pr)); } static int do_jail_attach(struct thread *td, struct prison *pr) { struct prison *ppr; struct proc *p; struct ucred *newcred, *oldcred; int error; /* * XXX: Note that there is a slight race here if two threads * in the same privileged process attempt to attach to two * different jails at the same time. It is important for * user processes not to do this, or they might end up with * a process root from one prison, but attached to the jail * of another. */ pr->pr_ref++; pr->pr_uref++; mtx_unlock(&pr->pr_mtx); /* Let modules do whatever they need to prepare for attaching. */ error = osd_jail_call(pr, PR_METHOD_ATTACH, td); if (error) { prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); return (error); } sx_sunlock(&allprison_lock); /* * Reparent the newly attached process to this jail. */ ppr = td->td_ucred->cr_prison; p = td->td_proc; error = cpuset_setproc_update_set(p, pr->pr_cpuset); if (error) goto e_revert_osd; vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); if ((error = change_dir(pr->pr_root, td)) != 0) goto e_unlock; #ifdef MAC if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) goto e_unlock; #endif VOP_UNLOCK(pr->pr_root, 0); if ((error = pwd_chroot(td, pr->pr_root))) goto e_revert_osd; newcred = crget(); PROC_LOCK(p); oldcred = p->p_ucred; setsugid(p); crcopy(newcred, oldcred); newcred->cr_prison = pr; proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); #endif crfree(oldcred); prison_deref(ppr, PD_DEREF | PD_DEUREF); return (0); e_unlock: VOP_UNLOCK(pr->pr_root, 0); e_revert_osd: /* Tell modules this thread is still in its old jail after all. */ (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); prison_deref(pr, PD_DEREF | PD_DEUREF); return (error); } /* * Returns a locked prison instance, or NULL on failure. */ struct prison * prison_find(int prid) { struct prison *pr; sx_assert(&allprison_lock, SX_LOCKED); TAILQ_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref > 0) return (pr); mtx_unlock(&pr->pr_mtx); } } return (NULL); } /* * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. */ struct prison * prison_find_child(struct prison *mypr, int prid) { struct prison *pr; int descend; sx_assert(&allprison_lock, SX_LOCKED); FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref > 0) return (pr); mtx_unlock(&pr->pr_mtx); } } return (NULL); } /* * Look for the name relative to mypr. Returns a locked prison or NULL. */ struct prison * prison_find_name(struct prison *mypr, const char *name) { struct prison *pr, *deadpr; size_t mylen; int descend; sx_assert(&allprison_lock, SX_LOCKED); mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; again: deadpr = NULL; FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { if (!strcmp(pr->pr_name + mylen, name)) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref > 0) { if (pr->pr_uref > 0) return (pr); deadpr = pr; } mtx_unlock(&pr->pr_mtx); } } /* There was no valid prison - perhaps there was a dying one. */ if (deadpr != NULL) { mtx_lock(&deadpr->pr_mtx); if (deadpr->pr_ref == 0) { mtx_unlock(&deadpr->pr_mtx); goto again; } } return (deadpr); } /* * See if a prison has the specific flag set. */ int prison_flag(struct ucred *cred, unsigned flag) { /* This is an atomic read, so no locking is necessary. */ return (cred->cr_prison->pr_flags & flag); } int prison_allow(struct ucred *cred, unsigned flag) { /* This is an atomic read, so no locking is necessary. */ return (cred->cr_prison->pr_allow & flag); } /* * Remove a prison reference. If that was the last reference, remove the * prison itself - but not in this context in case there are locks held. */ void prison_free_locked(struct prison *pr) { mtx_assert(&pr->pr_mtx, MA_OWNED); pr->pr_ref--; if (pr->pr_ref == 0) { mtx_unlock(&pr->pr_mtx); TASK_INIT(&pr->pr_task, 0, prison_complete, pr); taskqueue_enqueue(taskqueue_thread, &pr->pr_task); return; } mtx_unlock(&pr->pr_mtx); } void prison_free(struct prison *pr) { mtx_lock(&pr->pr_mtx); prison_free_locked(pr); } static void prison_complete(void *context, int pending) { prison_deref((struct prison *)context, 0); } /* * Remove a prison reference (usually). This internal version assumes no * mutexes are held, except perhaps the prison itself. If there are no more * references, release and delist the prison. On completion, the prison lock * and the allprison lock are both unlocked. */ static void prison_deref(struct prison *pr, int flags) { struct prison *ppr, *tpr; if (!(flags & PD_LOCKED)) mtx_lock(&pr->pr_mtx); for (;;) { if (flags & PD_DEUREF) { pr->pr_uref--; KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0")); } if (flags & PD_DEREF) pr->pr_ref--; /* If the prison still has references, nothing else to do. */ if (pr->pr_ref > 0) { mtx_unlock(&pr->pr_mtx); if (flags & PD_LIST_SLOCKED) sx_sunlock(&allprison_lock); else if (flags & PD_LIST_XLOCKED) sx_xunlock(&allprison_lock); return; } mtx_unlock(&pr->pr_mtx); if (flags & PD_LIST_SLOCKED) { if (!sx_try_upgrade(&allprison_lock)) { sx_sunlock(&allprison_lock); sx_xlock(&allprison_lock); } } else if (!(flags & PD_LIST_XLOCKED)) sx_xlock(&allprison_lock); TAILQ_REMOVE(&allprison, pr, pr_list); LIST_REMOVE(pr, pr_sibling); ppr = pr->pr_parent; for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) tpr->pr_childcount--; sx_xunlock(&allprison_lock); #ifdef VIMAGE if (pr->pr_vnet != ppr->pr_vnet) vnet_destroy(pr->pr_vnet); #endif if (pr->pr_root != NULL) vrele(pr->pr_root); mtx_destroy(&pr->pr_mtx); #ifdef INET free(pr->pr_ip4, M_PRISON); #endif #ifdef INET6 free(pr->pr_ip6, M_PRISON); #endif if (pr->pr_cpuset != NULL) cpuset_rel(pr->pr_cpuset); osd_jail_exit(pr); #ifdef RACCT if (racct_enable) prison_racct_detach(pr); #endif free(pr, M_PRISON); /* Removing a prison frees a reference on its parent. */ pr = ppr; mtx_lock(&pr->pr_mtx); flags = PD_DEREF | PD_DEUREF; } } void prison_hold_locked(struct prison *pr) { mtx_assert(&pr->pr_mtx, MA_OWNED); KASSERT(pr->pr_ref > 0, ("Trying to hold dead prison (jid=%d).", pr->pr_id)); pr->pr_ref++; } void prison_hold(struct prison *pr) { mtx_lock(&pr->pr_mtx); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); } void prison_proc_hold(struct prison *pr) { mtx_lock(&pr->pr_mtx); KASSERT(pr->pr_uref > 0, ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); pr->pr_uref++; mtx_unlock(&pr->pr_mtx); } void prison_proc_free(struct prison *pr) { mtx_lock(&pr->pr_mtx); KASSERT(pr->pr_uref > 0, ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); prison_deref(pr, PD_DEUREF | PD_LOCKED); } #ifdef INET /* * Restrict a prison's IP address list with its parent's, possibly replacing * it. Return true if the replacement buffer was used (or would have been). */ static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) { int ii, ij, used; struct prison *ppr; ppr = pr->pr_parent; if (!(pr->pr_flags & PR_IP4_USER)) { /* This has no user settings, so just copy the parent's list. */ if (pr->pr_ip4s < ppr->pr_ip4s) { /* * There's no room for the parent's list. Use the * new list buffer, which is assumed to be big enough * (if it was passed). If there's no buffer, try to * allocate one. */ used = 1; if (newip4 == NULL) { newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), M_PRISON, M_NOWAIT); if (newip4 != NULL) used = 0; } if (newip4 != NULL) { bcopy(ppr->pr_ip4, newip4, ppr->pr_ip4s * sizeof(*newip4)); free(pr->pr_ip4, M_PRISON); pr->pr_ip4 = newip4; pr->pr_ip4s = ppr->pr_ip4s; } return (used); } pr->pr_ip4s = ppr->pr_ip4s; if (pr->pr_ip4s > 0) bcopy(ppr->pr_ip4, pr->pr_ip4, pr->pr_ip4s * sizeof(*newip4)); else if (pr->pr_ip4 != NULL) { free(pr->pr_ip4, M_PRISON); pr->pr_ip4 = NULL; } } else if (pr->pr_ip4s > 0) { /* Remove addresses that aren't in the parent. */ for (ij = 0; ij < ppr->pr_ip4s; ij++) if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) break; if (ij < ppr->pr_ip4s) ii = 1; else { bcopy(pr->pr_ip4 + 1, pr->pr_ip4, --pr->pr_ip4s * sizeof(*pr->pr_ip4)); ii = 0; } for (ij = 1; ii < pr->pr_ip4s; ) { if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { ii++; continue; } switch (ij >= ppr->pr_ip4s ? -1 : qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { case -1: bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); break; case 0: ii++; ij++; break; case 1: ij++; break; } } if (pr->pr_ip4s == 0) { free(pr->pr_ip4, M_PRISON); pr->pr_ip4 = NULL; } } return (0); } /* * Pass back primary IPv4 address of this jail. * * If not restricted return success but do not alter the address. Caller has * to make sure to initialize it correctly (e.g. INADDR_ANY). * * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. * Address returned in NBO. */ int prison_get_ip4(struct ucred *cred, struct in_addr *ia) { struct prison *pr; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP4)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP4)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip4 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } ia->s_addr = pr->pr_ip4[0].s_addr; mtx_unlock(&pr->pr_mtx); return (0); } /* * Return 1 if we should do proper source address selection or are not jailed. * We will return 0 if we should bypass source address selection in favour * of the primary jail IPv4 address. Only in this case *ia will be updated and * returned in NBO. * Return EAFNOSUPPORT, in case this jail does not allow IPv4. */ int prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) { struct prison *pr; struct in_addr lia; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); if (!jailed(cred)) return (1); pr = cred->cr_prison; if (pr->pr_flags & PR_IP4_SADDRSEL) return (1); lia.s_addr = INADDR_ANY; error = prison_get_ip4(cred, &lia); if (error) return (error); if (lia.s_addr == INADDR_ANY) return (1); ia->s_addr = lia.s_addr; return (0); } /* * Return true if pr1 and pr2 have the same IPv4 address restrictions. */ int prison_equal_ip4(struct prison *pr1, struct prison *pr2) { if (pr1 == pr2) return (1); /* * No need to lock since the PR_IP4_USER flag can't be altered for * existing prisons. */ while (pr1 != &prison0 && #ifdef VIMAGE !(pr1->pr_flags & PR_VNET) && #endif !(pr1->pr_flags & PR_IP4_USER)) pr1 = pr1->pr_parent; while (pr2 != &prison0 && #ifdef VIMAGE !(pr2->pr_flags & PR_VNET) && #endif !(pr2->pr_flags & PR_IP4_USER)) pr2 = pr2->pr_parent; return (pr1 == pr2); } /* * Make sure our (source) address is set to something meaningful to this * jail. * * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail * doesn't allow IPv4. Address passed in in NBO and returned in NBO. */ int prison_local_ip4(struct ucred *cred, struct in_addr *ia) { struct prison *pr; struct in_addr ia0; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP4)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP4)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip4 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } ia0.s_addr = ntohl(ia->s_addr); if (ia0.s_addr == INADDR_LOOPBACK) { ia->s_addr = pr->pr_ip4[0].s_addr; mtx_unlock(&pr->pr_mtx); return (0); } if (ia0.s_addr == INADDR_ANY) { /* * In case there is only 1 IPv4 address, bind directly. */ if (pr->pr_ip4s == 1) ia->s_addr = pr->pr_ip4[0].s_addr; mtx_unlock(&pr->pr_mtx); return (0); } error = _prison_check_ip4(pr, ia); mtx_unlock(&pr->pr_mtx); return (error); } /* * Rewrite destination address in case we will connect to loopback address. * * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. * Address passed in in NBO and returned in NBO. */ int prison_remote_ip4(struct ucred *cred, struct in_addr *ia) { struct prison *pr; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP4)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP4)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip4 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { ia->s_addr = pr->pr_ip4[0].s_addr; mtx_unlock(&pr->pr_mtx); return (0); } /* * Return success because nothing had to be changed. */ mtx_unlock(&pr->pr_mtx); return (0); } /* * Check if given address belongs to the jail referenced by cred/prison. * * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail * doesn't allow IPv4. Address passed in in NBO. */ static int _prison_check_ip4(const struct prison *pr, const struct in_addr *ia) { int i, a, z, d; /* * Check the primary IP. */ if (pr->pr_ip4[0].s_addr == ia->s_addr) return (0); /* * All the other IPs are sorted so we can do a binary search. */ a = 0; z = pr->pr_ip4s - 2; while (a <= z) { i = (a + z) / 2; d = qcmp_v4(&pr->pr_ip4[i+1], ia); if (d > 0) z = i - 1; else if (d < 0) a = i + 1; else return (0); } return (EADDRNOTAVAIL); } int prison_check_ip4(const struct ucred *cred, const struct in_addr *ia) { struct prison *pr; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP4)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP4)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip4 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } error = _prison_check_ip4(pr, ia); mtx_unlock(&pr->pr_mtx); return (error); } #endif #ifdef INET6 static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) { int ii, ij, used; struct prison *ppr; ppr = pr->pr_parent; if (!(pr->pr_flags & PR_IP6_USER)) { /* This has no user settings, so just copy the parent's list. */ if (pr->pr_ip6s < ppr->pr_ip6s) { /* * There's no room for the parent's list. Use the * new list buffer, which is assumed to be big enough * (if it was passed). If there's no buffer, try to * allocate one. */ used = 1; if (newip6 == NULL) { newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), M_PRISON, M_NOWAIT); if (newip6 != NULL) used = 0; } if (newip6 != NULL) { bcopy(ppr->pr_ip6, newip6, ppr->pr_ip6s * sizeof(*newip6)); free(pr->pr_ip6, M_PRISON); pr->pr_ip6 = newip6; pr->pr_ip6s = ppr->pr_ip6s; } return (used); } pr->pr_ip6s = ppr->pr_ip6s; if (pr->pr_ip6s > 0) bcopy(ppr->pr_ip6, pr->pr_ip6, pr->pr_ip6s * sizeof(*newip6)); else if (pr->pr_ip6 != NULL) { free(pr->pr_ip6, M_PRISON); pr->pr_ip6 = NULL; } } else if (pr->pr_ip6s > 0) { /* Remove addresses that aren't in the parent. */ for (ij = 0; ij < ppr->pr_ip6s; ij++) if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], &ppr->pr_ip6[ij])) break; if (ij < ppr->pr_ip6s) ii = 1; else { bcopy(pr->pr_ip6 + 1, pr->pr_ip6, --pr->pr_ip6s * sizeof(*pr->pr_ip6)); ii = 0; } for (ij = 1; ii < pr->pr_ip6s; ) { if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], &ppr->pr_ip6[0])) { ii++; continue; } switch (ij >= ppr->pr_ip6s ? -1 : qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { case -1: bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); break; case 0: ii++; ij++; break; case 1: ij++; break; } } if (pr->pr_ip6s == 0) { free(pr->pr_ip6, M_PRISON); pr->pr_ip6 = NULL; } } return 0; } /* * Pass back primary IPv6 address for this jail. * * If not restricted return success but do not alter the address. Caller has * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). * * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. */ int prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) { struct prison *pr; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP6)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP6)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip6 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); mtx_unlock(&pr->pr_mtx); return (0); } /* * Return 1 if we should do proper source address selection or are not jailed. * We will return 0 if we should bypass source address selection in favour * of the primary jail IPv6 address. Only in this case *ia will be updated and * returned in NBO. * Return EAFNOSUPPORT, in case this jail does not allow IPv6. */ int prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6) { struct prison *pr; struct in6_addr lia6; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); if (!jailed(cred)) return (1); pr = cred->cr_prison; if (pr->pr_flags & PR_IP6_SADDRSEL) return (1); lia6 = in6addr_any; error = prison_get_ip6(cred, &lia6); if (error) return (error); if (IN6_IS_ADDR_UNSPECIFIED(&lia6)) return (1); bcopy(&lia6, ia6, sizeof(struct in6_addr)); return (0); } /* * Return true if pr1 and pr2 have the same IPv6 address restrictions. */ int prison_equal_ip6(struct prison *pr1, struct prison *pr2) { if (pr1 == pr2) return (1); while (pr1 != &prison0 && #ifdef VIMAGE !(pr1->pr_flags & PR_VNET) && #endif !(pr1->pr_flags & PR_IP6_USER)) pr1 = pr1->pr_parent; while (pr2 != &prison0 && #ifdef VIMAGE !(pr2->pr_flags & PR_VNET) && #endif !(pr2->pr_flags & PR_IP6_USER)) pr2 = pr2->pr_parent; return (pr1 == pr2); } /* * Make sure our (source) address is set to something meaningful to this jail. * * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) * when needed while binding. * * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail * doesn't allow IPv6. */ int prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) { struct prison *pr; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP6)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP6)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip6 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } if (IN6_IS_ADDR_LOOPBACK(ia6)) { bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); mtx_unlock(&pr->pr_mtx); return (0); } if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { /* * In case there is only 1 IPv6 address, and v6only is true, * then bind directly. */ if (v6only != 0 && pr->pr_ip6s == 1) bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); mtx_unlock(&pr->pr_mtx); return (0); } error = _prison_check_ip6(pr, ia6); mtx_unlock(&pr->pr_mtx); return (error); } /* * Rewrite destination address in case we will connect to loopback address. * * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. */ int prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) { struct prison *pr; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP6)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP6)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip6 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } if (IN6_IS_ADDR_LOOPBACK(ia6)) { bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); mtx_unlock(&pr->pr_mtx); return (0); } /* * Return success because nothing had to be changed. */ mtx_unlock(&pr->pr_mtx); return (0); } /* * Check if given address belongs to the jail referenced by cred/prison. * * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail * doesn't allow IPv6. */ static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6) { int i, a, z, d; /* * Check the primary IP. */ if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) return (0); /* * All the other IPs are sorted so we can do a binary search. */ a = 0; z = pr->pr_ip6s - 2; while (a <= z) { i = (a + z) / 2; d = qcmp_v6(&pr->pr_ip6[i+1], ia6); if (d > 0) z = i - 1; else if (d < 0) a = i + 1; else return (0); } return (EADDRNOTAVAIL); } int prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) { struct prison *pr; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); pr = cred->cr_prison; if (!(pr->pr_flags & PR_IP6)) return (0); mtx_lock(&pr->pr_mtx); if (!(pr->pr_flags & PR_IP6)) { mtx_unlock(&pr->pr_mtx); return (0); } if (pr->pr_ip6 == NULL) { mtx_unlock(&pr->pr_mtx); return (EAFNOSUPPORT); } error = _prison_check_ip6(pr, ia6); mtx_unlock(&pr->pr_mtx); return (error); } #endif /* * Check if a jail supports the given address family. * * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT * if not. */ int prison_check_af(struct ucred *cred, int af) { struct prison *pr; int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); pr = cred->cr_prison; #ifdef VIMAGE /* Prisons with their own network stack are not limited. */ if (prison_owns_vnet(cred)) return (0); #endif error = 0; switch (af) { #ifdef INET case AF_INET: if (pr->pr_flags & PR_IP4) { mtx_lock(&pr->pr_mtx); if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) error = EAFNOSUPPORT; mtx_unlock(&pr->pr_mtx); } break; #endif #ifdef INET6 case AF_INET6: if (pr->pr_flags & PR_IP6) { mtx_lock(&pr->pr_mtx); if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) error = EAFNOSUPPORT; mtx_unlock(&pr->pr_mtx); } break; #endif case AF_LOCAL: case AF_ROUTE: break; default: if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) error = EAFNOSUPPORT; } return (error); } /* * Check if given address belongs to the jail referenced by cred (wrapper to * prison_check_ip[46]). * * Returns 0 if jail doesn't restrict the address family or if address belongs * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if * the jail doesn't allow the address family. IPv4 Address passed in in NBO. */ int prison_if(struct ucred *cred, struct sockaddr *sa) { #ifdef INET struct sockaddr_in *sai; #endif #ifdef INET6 struct sockaddr_in6 *sai6; #endif int error; KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); #ifdef VIMAGE if (prison_owns_vnet(cred)) return (0); #endif error = 0; switch (sa->sa_family) { #ifdef INET case AF_INET: sai = (struct sockaddr_in *)sa; error = prison_check_ip4(cred, &sai->sin_addr); break; #endif #ifdef INET6 case AF_INET6: sai6 = (struct sockaddr_in6 *)sa; error = prison_check_ip6(cred, &sai6->sin6_addr); break; #endif default: if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) error = EAFNOSUPPORT; } return (error); } /* * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. */ int prison_check(struct ucred *cred1, struct ucred *cred2) { return ((cred1->cr_prison == cred2->cr_prison || prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); } /* * Return 1 if p2 is a child of p1, otherwise 0. */ int prison_ischild(struct prison *pr1, struct prison *pr2) { for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) if (pr1 == pr2) return (1); return (0); } /* * Return 1 if the passed credential is in a jail, otherwise 0. */ int jailed(struct ucred *cred) { return (cred->cr_prison != &prison0); } /* * Return 1 if the passed credential is in a jail and that jail does not * have its own virtual network stack, otherwise 0. */ int jailed_without_vnet(struct ucred *cred) { if (!jailed(cred)) return (0); #ifdef VIMAGE if (prison_owns_vnet(cred)) return (0); #endif return (1); } /* * Return the correct hostname (domainname, et al) for the passed credential. */ void getcredhostname(struct ucred *cred, char *buf, size_t size) { struct prison *pr; /* * A NULL credential can be used to shortcut to the physical * system's hostname. */ pr = (cred != NULL) ? cred->cr_prison : &prison0; mtx_lock(&pr->pr_mtx); strlcpy(buf, pr->pr_hostname, size); mtx_unlock(&pr->pr_mtx); } void getcreddomainname(struct ucred *cred, char *buf, size_t size) { mtx_lock(&cred->cr_prison->pr_mtx); strlcpy(buf, cred->cr_prison->pr_domainname, size); mtx_unlock(&cred->cr_prison->pr_mtx); } void getcredhostuuid(struct ucred *cred, char *buf, size_t size) { mtx_lock(&cred->cr_prison->pr_mtx); strlcpy(buf, cred->cr_prison->pr_hostuuid, size); mtx_unlock(&cred->cr_prison->pr_mtx); } void getcredhostid(struct ucred *cred, unsigned long *hostid) { mtx_lock(&cred->cr_prison->pr_mtx); *hostid = cred->cr_prison->pr_hostid; mtx_unlock(&cred->cr_prison->pr_mtx); } #ifdef VIMAGE /* * Determine whether the prison represented by cred owns * its vnet rather than having it inherited. * * Returns 1 in case the prison owns the vnet, 0 otherwise. */ int prison_owns_vnet(struct ucred *cred) { /* * vnets cannot be added/removed after jail creation, * so no need to lock here. */ return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0); } #endif /* * Determine whether the subject represented by cred can "see" * status of a mount point. * Returns: 0 for permitted, ENOENT otherwise. * XXX: This function should be called cr_canseemount() and should be * placed in kern_prot.c. */ int prison_canseemount(struct ucred *cred, struct mount *mp) { struct prison *pr; struct statfs *sp; size_t len; pr = cred->cr_prison; if (pr->pr_enforce_statfs == 0) return (0); if (pr->pr_root->v_mount == mp) return (0); if (pr->pr_enforce_statfs == 2) return (ENOENT); /* * If jail's chroot directory is set to "/" we should be able to see * all mount-points from inside a jail. * This is ugly check, but this is the only situation when jail's * directory ends with '/'. */ if (strcmp(pr->pr_path, "/") == 0) return (0); len = strlen(pr->pr_path); sp = &mp->mnt_stat; if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) return (ENOENT); /* * Be sure that we don't have situation where jail's root directory * is "/some/path" and mount point is "/some/pathpath". */ if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') return (ENOENT); return (0); } void prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) { char jpath[MAXPATHLEN]; struct prison *pr; size_t len; pr = cred->cr_prison; if (pr->pr_enforce_statfs == 0) return; if (prison_canseemount(cred, mp) != 0) { bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); strlcpy(sp->f_mntonname, "[restricted]", sizeof(sp->f_mntonname)); return; } if (pr->pr_root->v_mount == mp) { /* * Clear current buffer data, so we are sure nothing from * the valid path left there. */ bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); *sp->f_mntonname = '/'; return; } /* * If jail's chroot directory is set to "/" we should be able to see * all mount-points from inside a jail. */ if (strcmp(pr->pr_path, "/") == 0) return; len = strlen(pr->pr_path); strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); /* * Clear current buffer data, so we are sure nothing from * the valid path left there. */ bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); if (*jpath == '\0') { /* Should never happen. */ *sp->f_mntonname = '/'; } else { strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); } } /* * Check with permission for a specific privilege is granted within jail. We * have a specific list of accepted privileges; the rest are denied. */ int prison_priv_check(struct ucred *cred, int priv) { if (!jailed(cred)) return (0); #ifdef VIMAGE /* * Privileges specific to prisons with a virtual network stack. * There might be a duplicate entry here in case the privilege * is only granted conditionally in the legacy jail case. */ switch (priv) { #ifdef notyet /* * NFS-specific privileges. */ case PRIV_NFS_DAEMON: case PRIV_NFS_LOCKD: #endif /* * Network stack privileges. */ case PRIV_NET_BRIDGE: case PRIV_NET_GRE: case PRIV_NET_BPF: case PRIV_NET_RAW: /* Dup, cond. in legacy jail case. */ case PRIV_NET_ROUTE: case PRIV_NET_TAP: case PRIV_NET_SETIFMTU: case PRIV_NET_SETIFFLAGS: case PRIV_NET_SETIFCAP: case PRIV_NET_SETIFDESCR: case PRIV_NET_SETIFNAME : case PRIV_NET_SETIFMETRIC: case PRIV_NET_SETIFPHYS: case PRIV_NET_SETIFMAC: case PRIV_NET_ADDMULTI: case PRIV_NET_DELMULTI: case PRIV_NET_HWIOCTL: case PRIV_NET_SETLLADDR: case PRIV_NET_ADDIFGROUP: case PRIV_NET_DELIFGROUP: case PRIV_NET_IFCREATE: case PRIV_NET_IFDESTROY: case PRIV_NET_ADDIFADDR: case PRIV_NET_DELIFADDR: case PRIV_NET_LAGG: case PRIV_NET_GIF: case PRIV_NET_SETIFVNET: case PRIV_NET_SETIFFIB: /* * 802.11-related privileges. */ case PRIV_NET80211_GETKEY: #ifdef notyet case PRIV_NET80211_MANAGE: /* XXX-BZ discuss with sam@ */ #endif #ifdef notyet /* * ATM privileges. */ case PRIV_NETATM_CFG: case PRIV_NETATM_ADD: case PRIV_NETATM_DEL: case PRIV_NETATM_SET: /* * Bluetooth privileges. */ case PRIV_NETBLUETOOTH_RAW: #endif /* * Netgraph and netgraph module privileges. */ case PRIV_NETGRAPH_CONTROL: #ifdef notyet case PRIV_NETGRAPH_TTY: #endif /* * IPv4 and IPv6 privileges. */ case PRIV_NETINET_IPFW: case PRIV_NETINET_DIVERT: case PRIV_NETINET_PF: case PRIV_NETINET_DUMMYNET: case PRIV_NETINET_CARP: case PRIV_NETINET_MROUTE: case PRIV_NETINET_RAW: case PRIV_NETINET_ADDRCTRL6: case PRIV_NETINET_ND6: case PRIV_NETINET_SCOPE6: case PRIV_NETINET_ALIFETIME6: case PRIV_NETINET_IPSEC: case PRIV_NETINET_BINDANY: #ifdef notyet /* * NCP privileges. */ case PRIV_NETNCP: /* * SMB privileges. */ case PRIV_NETSMB: #endif /* * No default: or deny here. * In case of no permit fall through to next switch(). */ if (cred->cr_prison->pr_flags & PR_VNET) return (0); } #endif /* VIMAGE */ switch (priv) { /* * Allow ktrace privileges for root in jail. */ case PRIV_KTRACE: #if 0 /* * Allow jailed processes to configure audit identity and * submit audit records (login, etc). In the future we may * want to further refine the relationship between audit and * jail. */ case PRIV_AUDIT_GETAUDIT: case PRIV_AUDIT_SETAUDIT: case PRIV_AUDIT_SUBMIT: #endif /* * Allow jailed processes to manipulate process UNIX * credentials in any way they see fit. */ case PRIV_CRED_SETUID: case PRIV_CRED_SETEUID: case PRIV_CRED_SETGID: case PRIV_CRED_SETEGID: case PRIV_CRED_SETGROUPS: case PRIV_CRED_SETREUID: case PRIV_CRED_SETREGID: case PRIV_CRED_SETRESUID: case PRIV_CRED_SETRESGID: /* * Jail implements visibility constraints already, so allow * jailed root to override uid/gid-based constraints. */ case PRIV_SEEOTHERGIDS: case PRIV_SEEOTHERUIDS: /* * Jail implements inter-process debugging limits already, so * allow jailed root various debugging privileges. */ case PRIV_DEBUG_DIFFCRED: case PRIV_DEBUG_SUGID: case PRIV_DEBUG_UNPRIV: /* * Allow jail to set various resource limits and login * properties, and for now, exceed process resource limits. */ case PRIV_PROC_LIMIT: case PRIV_PROC_SETLOGIN: case PRIV_PROC_SETRLIMIT: /* * System V and POSIX IPC privileges are granted in jail. */ case PRIV_IPC_READ: case PRIV_IPC_WRITE: case PRIV_IPC_ADMIN: case PRIV_IPC_MSGSIZE: case PRIV_MQ_ADMIN: /* * Jail operations within a jail work on child jails. */ case PRIV_JAIL_ATTACH: case PRIV_JAIL_SET: case PRIV_JAIL_REMOVE: /* * Jail implements its own inter-process limits, so allow * root processes in jail to change scheduling on other * processes in the same jail. Likewise for signalling. */ case PRIV_SCHED_DIFFCRED: case PRIV_SCHED_CPUSET: case PRIV_SIGNAL_DIFFCRED: case PRIV_SIGNAL_SUGID: /* * Allow jailed processes to write to sysctls marked as jail * writable. */ case PRIV_SYSCTL_WRITEJAIL: /* * Allow root in jail to manage a variety of quota * properties. These should likely be conditional on a * configuration option. */ case PRIV_VFS_GETQUOTA: case PRIV_VFS_SETQUOTA: /* * Since Jail relies on chroot() to implement file system * protections, grant many VFS privileges to root in jail. * Be careful to exclude mount-related and NFS-related * privileges. */ case PRIV_VFS_READ: case PRIV_VFS_WRITE: case PRIV_VFS_ADMIN: case PRIV_VFS_EXEC: case PRIV_VFS_LOOKUP: case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ case PRIV_VFS_CHFLAGS_DEV: case PRIV_VFS_CHOWN: case PRIV_VFS_CHROOT: case PRIV_VFS_RETAINSUGID: case PRIV_VFS_FCHROOT: case PRIV_VFS_LINK: case PRIV_VFS_SETGID: case PRIV_VFS_STAT: case PRIV_VFS_STICKYFILE: /* * As in the non-jail case, non-root users are expected to be * able to read kernel/phyiscal memory (provided /dev/[k]mem * exists in the jail and they have permission to access it). */ case PRIV_KMEM_READ: return (0); /* * Depending on the global setting, allow privilege of * setting system flags. */ case PRIV_VFS_SYSFLAGS: if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) return (0); else return (EPERM); /* * Depending on the global setting, allow privilege of * mounting/unmounting file systems. */ case PRIV_VFS_MOUNT: case PRIV_VFS_UNMOUNT: case PRIV_VFS_MOUNT_NONUSER: case PRIV_VFS_MOUNT_OWNER: if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT && cred->cr_prison->pr_enforce_statfs < 2) return (0); else return (EPERM); /* * Allow jailed root to bind reserved ports and reuse in-use * ports. */ case PRIV_NETINET_RESERVEDPORT: case PRIV_NETINET_REUSEPORT: return (0); /* * Allow jailed root to set certian IPv4/6 (option) headers. */ case PRIV_NETINET_SETHDROPTS: return (0); /* * Conditionally allow creating raw sockets in jail. */ case PRIV_NETINET_RAW: if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) return (0); else return (EPERM); /* * Since jail implements its own visibility limits on netstat * sysctls, allow getcred. This allows identd to work in * jail. */ case PRIV_NETINET_GETCRED: return (0); /* * Allow jailed root to set loginclass. */ case PRIV_PROC_SETLOGINCLASS: return (0); default: /* * In all remaining cases, deny the privilege request. This * includes almost all network privileges, many system * configuration privileges. */ return (EPERM); } } /* * Return the part of pr2's name that is relative to pr1, or the whole name * if it does not directly follow. */ char * prison_name(struct prison *pr1, struct prison *pr2) { char *name; /* Jails see themselves as "0" (if they see themselves at all). */ if (pr1 == pr2) return "0"; name = pr2->pr_name; if (prison_ischild(pr1, pr2)) { /* * pr1 isn't locked (and allprison_lock may not be either) * so its length can't be counted on. But the number of dots * can be counted on - and counted. */ for (; pr1 != &prison0; pr1 = pr1->pr_parent) name = strchr(name, '.') + 1; } return (name); } /* * Return the part of pr2's path that is relative to pr1, or the whole path * if it does not directly follow. */ static char * prison_path(struct prison *pr1, struct prison *pr2) { char *path1, *path2; int len1; path1 = pr1->pr_path; path2 = pr2->pr_path; if (!strcmp(path1, "/")) return (path2); len1 = strlen(path1); if (strncmp(path1, path2, len1)) return (path2); if (path2[len1] == '\0') return "/"; if (path2[len1] == '/') return (path2 + len1); return (path2); } /* * Jail-related sysctls. */ static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, "Jails"); static int sysctl_jail_list(SYSCTL_HANDLER_ARGS) { struct xprison *xp; struct prison *pr, *cpr; #ifdef INET struct in_addr *ip4 = NULL; int ip4s = 0; #endif #ifdef INET6 struct in6_addr *ip6 = NULL; int ip6s = 0; #endif int descend, error; xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); pr = req->td->td_ucred->cr_prison; error = 0; sx_slock(&allprison_lock); FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { #if defined(INET) || defined(INET6) again: #endif mtx_lock(&cpr->pr_mtx); #ifdef INET if (cpr->pr_ip4s > 0) { if (ip4s < cpr->pr_ip4s) { ip4s = cpr->pr_ip4s; mtx_unlock(&cpr->pr_mtx); ip4 = realloc(ip4, ip4s * sizeof(struct in_addr), M_TEMP, M_WAITOK); goto again; } bcopy(cpr->pr_ip4, ip4, cpr->pr_ip4s * sizeof(struct in_addr)); } #endif #ifdef INET6 if (cpr->pr_ip6s > 0) { if (ip6s < cpr->pr_ip6s) { ip6s = cpr->pr_ip6s; mtx_unlock(&cpr->pr_mtx); ip6 = realloc(ip6, ip6s * sizeof(struct in6_addr), M_TEMP, M_WAITOK); goto again; } bcopy(cpr->pr_ip6, ip6, cpr->pr_ip6s * sizeof(struct in6_addr)); } #endif if (cpr->pr_ref == 0) { mtx_unlock(&cpr->pr_mtx); continue; } bzero(xp, sizeof(*xp)); xp->pr_version = XPRISON_VERSION; xp->pr_id = cpr->pr_id; xp->pr_state = cpr->pr_uref > 0 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); #ifdef INET xp->pr_ip4s = cpr->pr_ip4s; #endif #ifdef INET6 xp->pr_ip6s = cpr->pr_ip6s; #endif mtx_unlock(&cpr->pr_mtx); error = SYSCTL_OUT(req, xp, sizeof(*xp)); if (error) break; #ifdef INET if (xp->pr_ip4s > 0) { error = SYSCTL_OUT(req, ip4, xp->pr_ip4s * sizeof(struct in_addr)); if (error) break; } #endif #ifdef INET6 if (xp->pr_ip6s > 0) { error = SYSCTL_OUT(req, ip6, xp->pr_ip6s * sizeof(struct in6_addr)); if (error) break; } #endif } sx_sunlock(&allprison_lock); free(xp, M_TEMP); #ifdef INET free(ip4, M_TEMP); #endif #ifdef INET6 free(ip6, M_TEMP); #endif return (error); } SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_list, "S", "List of active jails"); static int sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) { int error, injail; injail = jailed(req->td->td_ucred); error = SYSCTL_OUT(req, &injail, sizeof(injail)); return (error); } SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); static int sysctl_jail_vnet(SYSCTL_HANDLER_ARGS) { int error, havevnet; #ifdef VIMAGE struct ucred *cred = req->td->td_ucred; havevnet = jailed(cred) && prison_owns_vnet(cred); #else havevnet = 0; #endif error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet)); return (error); } SYSCTL_PROC(_security_jail, OID_AUTO, vnet, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_vnet, "I", "Jail owns VNET?"); #if defined(INET) || defined(INET6) SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, &jail_max_af_ips, 0, "Number of IP addresses a jail may have at most per address family"); #endif /* * Default parameters for jail(2) compatability. For historical reasons, * the sysctl names have varying similarity to the parameter names. Prisons * just see their own parameters, and can't change them. */ static int sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) { struct prison *pr; int allow, error, i; pr = req->td->td_ucred->cr_prison; allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; /* Get the current flag value, and convert it to a boolean. */ i = (allow & arg2) ? 1 : 0; if (arg1 != NULL) i = !i; error = sysctl_handle_int(oidp, &i, 0, req); if (error || !req->newptr) return (error); i = i ? arg2 : 0; if (arg1 != NULL) i ^= arg2; /* * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 * for writing. */ mtx_lock(&prison0.pr_mtx); jail_default_allow = (jail_default_allow & ~arg2) | i; mtx_unlock(&prison0.pr_mtx); return (0); } SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", "Processes in jail can set their hostnames"); SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", "Processes in jail are limited to creating UNIX/IP/route sockets only"); SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", "Processes in jail can use System V IPC primitives"); SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", "Prison root can create raw sockets"); SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", "Processes in jail can alter system file flags"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", "Processes in jail can mount/unmount jail-friendly file systems"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the devfs file system"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_fdescfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_FDESCFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the fdescfs file system"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the nullfs file system"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_procfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_PROCFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the procfs file system"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_linprocfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_LINPROCFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the linprocfs file system"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_linsysfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_LINSYSFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the linsysfs file system"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_tmpfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_TMPFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the tmpfs file system"); SYSCTL_PROC(_security_jail, OID_AUTO, mount_zfs_allowed, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, PR_ALLOW_MOUNT_ZFS, sysctl_jail_default_allow, "I", "Processes in jail can mount the zfs file system"); static int sysctl_jail_default_level(SYSCTL_HANDLER_ARGS) { struct prison *pr; int level, error; pr = req->td->td_ucred->cr_prison; level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); error = sysctl_handle_int(oidp, &level, 0, req); if (error || !req->newptr) return (error); *(int *)arg1 = level; return (0); } SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), sysctl_jail_default_level, "I", "Processes in jail cannot see all mounted file systems"); SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum), sysctl_jail_default_level, "I", "Ruleset for the devfs filesystem in jail"); /* * Nodes to describe jail parameters. Maximum length of string parameters * is returned in the string itself, and the other parameters exist merely * to make themselves and their types known. */ SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, "Jail parameters"); int sysctl_jail_param(SYSCTL_HANDLER_ARGS) { int i; long l; size_t s; char numbuf[12]; switch (oidp->oid_kind & CTLTYPE) { case CTLTYPE_LONG: case CTLTYPE_ULONG: l = 0; #ifdef SCTL_MASK32 if (!(req->flags & SCTL_MASK32)) #endif return (SYSCTL_OUT(req, &l, sizeof(l))); case CTLTYPE_INT: case CTLTYPE_UINT: i = 0; return (SYSCTL_OUT(req, &i, sizeof(i))); case CTLTYPE_STRING: snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2); return (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); case CTLTYPE_STRUCT: s = (size_t)arg2; return (SYSCTL_OUT(req, &s, sizeof(s))); } return (0); } /* * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at * jail creation time but cannot be changed in an existing jail. */ SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail secure level"); SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail value for kern.osreldate and uname -K"); SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, "Jail value for kern.osrelease and uname -r"); SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail cannot see all mounted file systems"); SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW, "I", "Ruleset for in-jail devfs mounts"); SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail persistence"); #ifdef VIMAGE SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN, "E,jailsys", "Virtual network stack"); #endif SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, "B", "Jail is in the process of shutting down"); SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails"); SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD, "I", "Current number of child jails"); SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW, "I", "Maximum number of child jails"); SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info"); SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail hostname"); SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail NIS domainname"); SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, "Jail host UUID"); SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, "LU", "Jail host ID"); SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); #ifdef INET SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN, "Jail IPv4 address virtualization"); SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), "S,in_addr,a", "Jail IPv4 addresses"); SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW, "B", "Do (not) use IPv4 source address selection rather than the " "primary jail IPv4 address."); #endif #ifdef INET6 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN, "Jail IPv6 address virtualization"); SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), "S,in6_addr,a", "Jail IPv6 addresses"); SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW, "B", "Do (not) use IPv6 source address selection rather than the " "primary jail IPv6 address."); #endif SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may set hostname"); SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may use SYSV IPC"); SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may create raw sockets"); SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may alter system file flags"); SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may set file quotas"); SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags"); SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount/unmount jail-friendly file systems in general"); SYSCTL_JAIL_PARAM(_allow_mount, devfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the devfs file system"); SYSCTL_JAIL_PARAM(_allow_mount, fdescfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the fdescfs file system"); SYSCTL_JAIL_PARAM(_allow_mount, nullfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the nullfs file system"); SYSCTL_JAIL_PARAM(_allow_mount, procfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the procfs file system"); SYSCTL_JAIL_PARAM(_allow_mount, linprocfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the linprocfs file system"); SYSCTL_JAIL_PARAM(_allow_mount, linsysfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the linsysfs file system"); SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the tmpfs file system"); SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may mount the zfs file system"); #ifdef RACCT void prison_racct_foreach(void (*callback)(struct racct *racct, - void *arg2, void *arg3), void *arg2, void *arg3) + void *arg2, void *arg3), void (*pre)(void), void (*post)(void), + void *arg2, void *arg3) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_slock(&allprison_lock); + if (pre != NULL) + (pre)(); LIST_FOREACH(prr, &allprison_racct, prr_next) (callback)(prr->prr_racct, arg2, arg3); + if (post != NULL) + (post)(); sx_sunlock(&allprison_lock); } static struct prison_racct * prison_racct_find_locked(const char *name) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN) return (NULL); LIST_FOREACH(prr, &allprison_racct, prr_next) { if (strcmp(name, prr->prr_name) != 0) continue; /* Found prison_racct with a matching name? */ prison_racct_hold(prr); return (prr); } /* Add new prison_racct. */ prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK); racct_create(&prr->prr_racct); strcpy(prr->prr_name, name); refcount_init(&prr->prr_refcount, 1); LIST_INSERT_HEAD(&allprison_racct, prr, prr_next); return (prr); } struct prison_racct * prison_racct_find(const char *name) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_xlock(&allprison_lock); prr = prison_racct_find_locked(name); sx_xunlock(&allprison_lock); return (prr); } void prison_racct_hold(struct prison_racct *prr) { ASSERT_RACCT_ENABLED(); refcount_acquire(&prr->prr_refcount); } static void prison_racct_free_locked(struct prison_racct *prr) { ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); if (refcount_release(&prr->prr_refcount)) { racct_destroy(&prr->prr_racct); LIST_REMOVE(prr, prr_next); free(prr, M_PRISON_RACCT); } } void prison_racct_free(struct prison_racct *prr) { int old; ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_UNLOCKED); old = prr->prr_refcount; if (old > 1 && atomic_cmpset_int(&prr->prr_refcount, old, old - 1)) return; sx_xlock(&allprison_lock); prison_racct_free_locked(prr); sx_xunlock(&allprison_lock); } static void prison_racct_attach(struct prison *pr) { struct prison_racct *prr; ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_XLOCKED); prr = prison_racct_find_locked(pr->pr_name); KASSERT(prr != NULL, ("cannot find prison_racct")); pr->pr_prison_racct = prr; } /* * Handle jail renaming. From the racct point of view, renaming means * moving from one prison_racct to another. */ static void prison_racct_modify(struct prison *pr) { struct proc *p; struct ucred *cred; struct prison_racct *oldprr; ASSERT_RACCT_ENABLED(); sx_slock(&allproc_lock); sx_xlock(&allprison_lock); if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) { sx_xunlock(&allprison_lock); sx_sunlock(&allproc_lock); return; } oldprr = pr->pr_prison_racct; pr->pr_prison_racct = NULL; prison_racct_attach(pr); /* * Move resource utilisation records. */ racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct); /* * Force rctl to reattach rules to processes. */ FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); cred = crhold(p->p_ucred); PROC_UNLOCK(p); racct_proc_ucred_changed(p, cred, cred); crfree(cred); } sx_sunlock(&allproc_lock); prison_racct_free_locked(oldprr); sx_xunlock(&allprison_lock); } static void prison_racct_detach(struct prison *pr) { ASSERT_RACCT_ENABLED(); sx_assert(&allprison_lock, SA_UNLOCKED); if (pr->pr_prison_racct == NULL) return; prison_racct_free(pr->pr_prison_racct); pr->pr_prison_racct = NULL; } #endif /* RACCT */ #ifdef DDB static void db_show_prison(struct prison *pr) { int fi; #if defined(INET) || defined(INET6) int ii; #endif unsigned jsf; #ifdef INET6 char ip6buf[INET6_ADDRSTRLEN]; #endif db_printf("prison %p:\n", pr); db_printf(" jid = %d\n", pr->pr_id); db_printf(" name = %s\n", pr->pr_name); db_printf(" parent = %p\n", pr->pr_parent); db_printf(" ref = %d\n", pr->pr_ref); db_printf(" uref = %d\n", pr->pr_uref); db_printf(" path = %s\n", pr->pr_path); db_printf(" cpuset = %d\n", pr->pr_cpuset ? pr->pr_cpuset->cs_id : -1); #ifdef VIMAGE db_printf(" vnet = %p\n", pr->pr_vnet); #endif db_printf(" root = %p\n", pr->pr_root); db_printf(" securelevel = %d\n", pr->pr_securelevel); db_printf(" devfs_rsnum = %d\n", pr->pr_devfs_rsnum); db_printf(" children.max = %d\n", pr->pr_childmax); db_printf(" children.cur = %d\n", pr->pr_childcount); db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); db_printf(" flags = 0x%x", pr->pr_flags); for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); fi++) if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi))) db_printf(" %s", pr_flag_names[fi]); for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); fi++) { jsf = pr->pr_flags & (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name, pr_flag_jailsys[fi].disable && (jsf == pr_flag_jailsys[fi].disable) ? "disable" : (jsf == pr_flag_jailsys[fi].new) ? "new" : "inherit"); } db_printf(" allow = 0x%x", pr->pr_allow); for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); fi++) if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi))) db_printf(" %s", pr_allow_names[fi]); db_printf("\n"); db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); db_printf(" host.hostname = %s\n", pr->pr_hostname); db_printf(" host.domainname = %s\n", pr->pr_domainname); db_printf(" host.hostuuid = %s\n", pr->pr_hostuuid); db_printf(" host.hostid = %lu\n", pr->pr_hostid); #ifdef INET db_printf(" ip4s = %d\n", pr->pr_ip4s); for (ii = 0; ii < pr->pr_ip4s; ii++) db_printf(" %s %s\n", ii == 0 ? "ip4.addr =" : " ", inet_ntoa(pr->pr_ip4[ii])); #endif #ifdef INET6 db_printf(" ip6s = %d\n", pr->pr_ip6s); for (ii = 0; ii < pr->pr_ip6s; ii++) db_printf(" %s %s\n", ii == 0 ? "ip6.addr =" : " ", ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); #endif } DB_SHOW_COMMAND(prison, db_show_prison_command) { struct prison *pr; if (!have_addr) { /* * Show all prisons in the list, and prison0 which is not * listed. */ db_show_prison(&prison0); if (!db_pager_quit) { TAILQ_FOREACH(pr, &allprison, pr_list) { db_show_prison(pr); if (db_pager_quit) break; } } return; } if (addr == 0) pr = &prison0; else { /* Look for a prison with the ID and with references. */ TAILQ_FOREACH(pr, &allprison, pr_list) if (pr->pr_id == addr && pr->pr_ref > 0) break; if (pr == NULL) /* Look again, without requiring a reference. */ TAILQ_FOREACH(pr, &allprison, pr_list) if (pr->pr_id == addr) break; if (pr == NULL) /* Assume address points to a valid prison. */ pr = (struct prison *)addr; } db_show_prison(pr); } #endif /* DDB */ Index: projects/powernv/kern/kern_loginclass.c =================================================================== --- projects/powernv/kern/kern_loginclass.c (revision 290990) +++ projects/powernv/kern/kern_loginclass.c (revision 290991) @@ -1,245 +1,250 @@ /*- * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Processes may set login class name using setloginclass(2). This * is usually done through call to setusercontext(3), by programs * such as login(1), based on information from master.passwd(5). Kernel * uses this information to enforce per-class resource limits. Current * login class can be determined using id(1). Login class is inherited * from the parent process during fork(2). If not set, it defaults * to "default". * * Code in this file implements setloginclass(2) and getloginclass(2) * system calls, and maintains class name storage and retrieval. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_LOGINCLASS, "loginclass", "loginclass structures"); LIST_HEAD(, loginclass) loginclasses; /* * Lock protecting loginclasses list. */ static struct rwlock loginclasses_lock; RW_SYSINIT(loginclasses_init, &loginclasses_lock, "loginclasses lock"); void loginclass_hold(struct loginclass *lc) { refcount_acquire(&lc->lc_refcount); } void loginclass_free(struct loginclass *lc) { int old; old = lc->lc_refcount; if (old > 1 && atomic_cmpset_int(&lc->lc_refcount, old, old - 1)) return; rw_wlock(&loginclasses_lock); if (!refcount_release(&lc->lc_refcount)) { rw_wunlock(&loginclasses_lock); return; } racct_destroy(&lc->lc_racct); LIST_REMOVE(lc, lc_next); rw_wunlock(&loginclasses_lock); free(lc, M_LOGINCLASS); } /* * Look up a loginclass struct for the parameter name. * loginclasses_lock must be locked. * Increase refcount on loginclass struct returned. */ static struct loginclass * loginclass_lookup(const char *name) { struct loginclass *lc; rw_assert(&loginclasses_lock, RA_LOCKED); LIST_FOREACH(lc, &loginclasses, lc_next) if (strcmp(name, lc->lc_name) == 0) { loginclass_hold(lc); break; } return (lc); } /* * Return loginclass structure with a corresponding name. Not * performance critical, as it's used mainly by setloginclass(2), * which happens once per login session. Caller has to use * loginclass_free() on the returned value when it's no longer * needed. */ struct loginclass * loginclass_find(const char *name) { struct loginclass *lc, *new_lc; if (name[0] == '\0' || strlen(name) >= MAXLOGNAME) return (NULL); rw_rlock(&loginclasses_lock); lc = loginclass_lookup(name); rw_runlock(&loginclasses_lock); if (lc != NULL) return (lc); new_lc = malloc(sizeof(*new_lc), M_LOGINCLASS, M_ZERO | M_WAITOK); racct_create(&new_lc->lc_racct); refcount_init(&new_lc->lc_refcount, 1); strcpy(new_lc->lc_name, name); rw_wlock(&loginclasses_lock); /* * There's a chance someone created our loginclass while we * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate loginclass. */ if ((lc = loginclass_lookup(name)) == NULL) { LIST_INSERT_HEAD(&loginclasses, new_lc, lc_next); rw_wunlock(&loginclasses_lock); lc = new_lc; } else { rw_wunlock(&loginclasses_lock); racct_destroy(&new_lc->lc_racct); free(new_lc, M_LOGINCLASS); } return (lc); } /* * Get login class name. */ #ifndef _SYS_SYSPROTO_H_ struct getloginclass_args { char *namebuf; size_t namelen; }; #endif /* ARGSUSED */ int sys_getloginclass(struct thread *td, struct getloginclass_args *uap) { struct loginclass *lc; size_t lcnamelen; lc = td->td_ucred->cr_loginclass; lcnamelen = strlen(lc->lc_name) + 1; if (lcnamelen > uap->namelen) return (ERANGE); return (copyout(lc->lc_name, uap->namebuf, lcnamelen)); } /* * Set login class name. */ #ifndef _SYS_SYSPROTO_H_ struct setloginclass_args { const char *namebuf; }; #endif /* ARGSUSED */ int sys_setloginclass(struct thread *td, struct setloginclass_args *uap) { struct proc *p = td->td_proc; int error; char lcname[MAXLOGNAME]; struct loginclass *newlc; struct ucred *newcred, *oldcred; error = priv_check(td, PRIV_PROC_SETLOGINCLASS); if (error != 0) return (error); error = copyinstr(uap->namebuf, lcname, sizeof(lcname), NULL); if (error != 0) return (error); newlc = loginclass_find(lcname); if (newlc == NULL) return (EINVAL); newcred = crget(); PROC_LOCK(p); oldcred = crcopysafe(p, newcred); newcred->cr_loginclass = newlc; proc_set_cred(p, newcred); PROC_UNLOCK(p); #ifdef RACCT racct_proc_ucred_changed(p, oldcred, newcred); #endif loginclass_free(oldcred->cr_loginclass); crfree(oldcred); return (0); } void loginclass_racct_foreach(void (*callback)(struct racct *racct, - void *arg2, void *arg3), void *arg2, void *arg3) + void *arg2, void *arg3), void (*pre)(void), void (*post)(void), + void *arg2, void *arg3) { struct loginclass *lc; rw_rlock(&loginclasses_lock); + if (pre != NULL) + (pre)(); LIST_FOREACH(lc, &loginclasses, lc_next) (callback)(lc->lc_racct, arg2, arg3); + if (post != NULL) + (post)(); rw_runlock(&loginclasses_lock); } Index: projects/powernv/kern/kern_racct.c =================================================================== --- projects/powernv/kern/kern_racct.c (revision 290990) +++ projects/powernv/kern/kern_racct.c (revision 290991) @@ -1,1300 +1,1316 @@ /*- * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RCTL #include #endif #ifdef RACCT FEATURE(racct, "Resource Accounting"); /* * Do not block processes that have their %cpu usage <= pcpu_threshold. */ static int pcpu_threshold = 1; #ifdef RACCT_DEFAULT_TO_DISABLED int racct_enable = 0; #else int racct_enable = 1; #endif SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting"); SYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable, 0, "Enable RACCT/RCTL"); SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold, 0, "Processes with higher %cpu usage than this value can be throttled."); /* * How many seconds it takes to use the scheduler %cpu calculations. When a * process starts, we compute its %cpu usage by dividing its runtime by the * process wall clock time. After RACCT_PCPU_SECS pass, we use the value * provided by the scheduler. */ #define RACCT_PCPU_SECS 3 static struct mtx racct_lock; MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF); static uma_zone_t racct_zone; static void racct_sub_racct(struct racct *dest, const struct racct *src); static void racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount); static void racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount); SDT_PROVIDER_DEFINE(racct); SDT_PROBE_DEFINE3(racct, kernel, rusage, add, "struct proc *", "int", "uint64_t"); SDT_PROBE_DEFINE3(racct, kernel, rusage, add__failure, "struct proc *", "int", "uint64_t"); SDT_PROBE_DEFINE3(racct, kernel, rusage, add__cred, "struct ucred *", "int", "uint64_t"); SDT_PROBE_DEFINE3(racct, kernel, rusage, add__force, "struct proc *", "int", "uint64_t"); SDT_PROBE_DEFINE3(racct, kernel, rusage, set, "struct proc *", "int", "uint64_t"); SDT_PROBE_DEFINE3(racct, kernel, rusage, set__failure, "struct proc *", "int", "uint64_t"); SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, "struct proc *", "int", "uint64_t"); SDT_PROBE_DEFINE3(racct, kernel, rusage, sub__cred, "struct ucred *", "int", "uint64_t"); SDT_PROBE_DEFINE1(racct, kernel, racct, create, "struct racct *"); SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, "struct racct *"); SDT_PROBE_DEFINE2(racct, kernel, racct, join, "struct racct *", "struct racct *"); SDT_PROBE_DEFINE2(racct, kernel, racct, join__failure, "struct racct *", "struct racct *"); SDT_PROBE_DEFINE2(racct, kernel, racct, leave, "struct racct *", "struct racct *"); int racct_types[] = { [RACCT_CPU] = RACCT_IN_MILLIONS, [RACCT_DATA] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_STACK] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_CORE] = RACCT_DENIABLE, [RACCT_RSS] = RACCT_RECLAIMABLE, [RACCT_MEMLOCK] = RACCT_RECLAIMABLE | RACCT_DENIABLE, [RACCT_NPROC] = RACCT_RECLAIMABLE | RACCT_DENIABLE, [RACCT_NOFILE] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_VMEM] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_NPTS] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_SWAP] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_NTHR] = RACCT_RECLAIMABLE | RACCT_DENIABLE, [RACCT_MSGQQUEUED] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_MSGQSIZE] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_NMSGQ] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_NSEM] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_NSEMOP] = RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE, [RACCT_NSHM] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_SHMSIZE] = RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY, [RACCT_WALLCLOCK] = RACCT_IN_MILLIONS, [RACCT_PCTCPU] = RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS }; static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE; #ifdef SCHED_4BSD /* * Contains intermediate values for %cpu calculations to avoid using floating * point in the kernel. * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20) * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to * zero so the calculations are more straightforward. */ fixpt_t ccpu_exp[] = { [0] = FSCALE * 1, [1] = FSCALE * 0.95122942450071400909, [2] = FSCALE * 0.90483741803595957316, [3] = FSCALE * 0.86070797642505780722, [4] = FSCALE * 0.81873075307798185866, [5] = FSCALE * 0.77880078307140486824, [6] = FSCALE * 0.74081822068171786606, [7] = FSCALE * 0.70468808971871343435, [8] = FSCALE * 0.67032004603563930074, [9] = FSCALE * 0.63762815162177329314, [10] = FSCALE * 0.60653065971263342360, [11] = FSCALE * 0.57694981038048669531, [12] = FSCALE * 0.54881163609402643262, [13] = FSCALE * 0.52204577676101604789, [14] = FSCALE * 0.49658530379140951470, [15] = FSCALE * 0.47236655274101470713, [16] = FSCALE * 0.44932896411722159143, [17] = FSCALE * 0.42741493194872666992, [18] = FSCALE * 0.40656965974059911188, [19] = FSCALE * 0.38674102345450120691, [20] = FSCALE * 0.36787944117144232159, [21] = FSCALE * 0.34993774911115535467, [22] = FSCALE * 0.33287108369807955328, [23] = FSCALE * 0.31663676937905321821, [24] = FSCALE * 0.30119421191220209664, [25] = FSCALE * 0.28650479686019010032, [26] = FSCALE * 0.27253179303401260312, [27] = FSCALE * 0.25924026064589150757, [28] = FSCALE * 0.24659696394160647693, [29] = FSCALE * 0.23457028809379765313, [30] = FSCALE * 0.22313016014842982893, [31] = FSCALE * 0.21224797382674305771, [32] = FSCALE * 0.20189651799465540848, [33] = FSCALE * 0.19204990862075411423, [34] = FSCALE * 0.18268352405273465022, [35] = FSCALE * 0.17377394345044512668, [36] = FSCALE * 0.16529888822158653829, [37] = FSCALE * 0.15723716631362761621, [38] = FSCALE * 0.14956861922263505264, [39] = FSCALE * 0.14227407158651357185, [40] = FSCALE * 0.13533528323661269189, [41] = FSCALE * 0.12873490358780421886, [42] = FSCALE * 0.12245642825298191021, [43] = FSCALE * 0.11648415777349695786, [44] = FSCALE * 0.11080315836233388333, [45] = FSCALE * 0.10539922456186433678, [46] = FSCALE * 0.10025884372280373372, [47] = FSCALE * 0.09536916221554961888, [48] = FSCALE * 0.09071795328941250337, [49] = FSCALE * 0.08629358649937051097, [50] = FSCALE * 0.08208499862389879516, [51] = FSCALE * 0.07808166600115315231, [52] = FSCALE * 0.07427357821433388042, [53] = FSCALE * 0.07065121306042958674, [54] = FSCALE * 0.06720551273974976512, [55] = FSCALE * 0.06392786120670757270, [56] = FSCALE * 0.06081006262521796499, [57] = FSCALE * 0.05784432087483846296, [58] = FSCALE * 0.05502322005640722902, [59] = FSCALE * 0.05233970594843239308, [60] = FSCALE * 0.04978706836786394297, [61] = FSCALE * 0.04735892439114092119, [62] = FSCALE * 0.04504920239355780606, [63] = FSCALE * 0.04285212686704017991, [64] = FSCALE * 0.04076220397836621516, [65] = FSCALE * 0.03877420783172200988, [66] = FSCALE * 0.03688316740124000544, [67] = FSCALE * 0.03508435410084502588, [68] = FSCALE * 0.03337326996032607948, [69] = FSCALE * 0.03174563637806794323, [70] = FSCALE * 0.03019738342231850073, [71] = FSCALE * 0.02872463965423942912, [72] = FSCALE * 0.02732372244729256080, [73] = FSCALE * 0.02599112877875534358, [74] = FSCALE * 0.02472352647033939120, [75] = FSCALE * 0.02351774585600910823, [76] = FSCALE * 0.02237077185616559577, [77] = FSCALE * 0.02127973643837716938, [78] = FSCALE * 0.02024191144580438847, [79] = FSCALE * 0.01925470177538692429, [80] = FSCALE * 0.01831563888873418029, [81] = FSCALE * 0.01742237463949351138, [82] = FSCALE * 0.01657267540176124754, [83] = FSCALE * 0.01576441648485449082, [84] = FSCALE * 0.01499557682047770621, [85] = FSCALE * 0.01426423390899925527, [86] = FSCALE * 0.01356855901220093175, [87] = FSCALE * 0.01290681258047986886, [88] = FSCALE * 0.01227733990306844117, [89] = FSCALE * 0.01167856697039544521, [90] = FSCALE * 0.01110899653824230649, [91] = FSCALE * 0.01056720438385265337, [92] = FSCALE * 0.01005183574463358164, [93] = FSCALE * 0.00956160193054350793, [94] = FSCALE * 0.00909527710169581709, [95] = FSCALE * 0.00865169520312063417, [96] = FSCALE * 0.00822974704902002884, [97] = FSCALE * 0.00782837754922577143, [98] = FSCALE * 0.00744658307092434051, [99] = FSCALE * 0.00708340892905212004, [100] = FSCALE * 0.00673794699908546709, [101] = FSCALE * 0.00640933344625638184, [102] = FSCALE * 0.00609674656551563610, [103] = FSCALE * 0.00579940472684214321, [104] = FSCALE * 0.00551656442076077241, [105] = FSCALE * 0.00524751839918138427, [106] = FSCALE * 0.00499159390691021621, [107] = FSCALE * 0.00474815099941147558, [108] = FSCALE * 0.00451658094261266798, [109] = FSCALE * 0.00429630469075234057, [110] = FSCALE * 0.00408677143846406699, }; #endif #define CCPU_EXP_MAX 110 /* * This function is analogical to the getpcpu() function in the ps(1) command. * They should both calculate in the same way so that the racct %cpu * calculations are consistent with the values showed by the ps(1) tool. * The calculations are more complex in the 4BSD scheduler because of the value * of the ccpu variable. In ULE it is defined to be zero which saves us some * work. */ static uint64_t racct_getpcpu(struct proc *p, u_int pcpu) { u_int swtime; #ifdef SCHED_4BSD fixpt_t pctcpu, pctcpu_next; #endif #ifdef SMP struct pcpu *pc; int found; #endif fixpt_t p_pctcpu; struct thread *td; ASSERT_RACCT_ENABLED(); /* * If the process is swapped out, we count its %cpu usage as zero. * This behaviour is consistent with the userland ps(1) tool. */ if ((p->p_flag & P_INMEM) == 0) return (0); swtime = (ticks - p->p_swtick) / hz; /* * For short-lived processes, the sched_pctcpu() returns small * values even for cpu intensive processes. Therefore we use * our own estimate in this case. */ if (swtime < RACCT_PCPU_SECS) return (pcpu); p_pctcpu = 0; FOREACH_THREAD_IN_PROC(p, td) { if (td == PCPU_GET(idlethread)) continue; #ifdef SMP found = 0; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (td == pc->pc_idlethread) { found = 1; break; } } if (found) continue; #endif thread_lock(td); #ifdef SCHED_4BSD pctcpu = sched_pctcpu(td); /* Count also the yet unfinished second. */ pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT; pctcpu_next += sched_pctcpu_delta(td); p_pctcpu += max(pctcpu, pctcpu_next); #else /* * In ULE the %cpu statistics are updated on every * sched_pctcpu() call. So special calculations to * account for the latest (unfinished) second are * not needed. */ p_pctcpu += sched_pctcpu(td); #endif thread_unlock(td); } #ifdef SCHED_4BSD if (swtime <= CCPU_EXP_MAX) return ((100 * (uint64_t)p_pctcpu * 1000000) / (FSCALE - ccpu_exp[swtime])); #endif return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE); } static void racct_add_racct(struct racct *dest, const struct racct *src) { int i; ASSERT_RACCT_ENABLED(); mtx_assert(&racct_lock, MA_OWNED); /* * Update resource usage in dest. */ for (i = 0; i <= RACCT_MAX; i++) { KASSERT(dest->r_resources[i] >= 0, ("%s: resource %d propagation meltdown: dest < 0", __func__, i)); KASSERT(src->r_resources[i] >= 0, ("%s: resource %d propagation meltdown: src < 0", __func__, i)); dest->r_resources[i] += src->r_resources[i]; } } static void racct_sub_racct(struct racct *dest, const struct racct *src) { int i; ASSERT_RACCT_ENABLED(); mtx_assert(&racct_lock, MA_OWNED); /* * Update resource usage in dest. */ for (i = 0; i <= RACCT_MAX; i++) { if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) { KASSERT(dest->r_resources[i] >= 0, ("%s: resource %d propagation meltdown: dest < 0", __func__, i)); KASSERT(src->r_resources[i] >= 0, ("%s: resource %d propagation meltdown: src < 0", __func__, i)); KASSERT(src->r_resources[i] <= dest->r_resources[i], ("%s: resource %d propagation meltdown: src > dest", __func__, i)); } if (RACCT_CAN_DROP(i)) { dest->r_resources[i] -= src->r_resources[i]; if (dest->r_resources[i] < 0) { KASSERT(RACCT_IS_SLOPPY(i) || RACCT_IS_DECAYING(i), ("%s: resource %d usage < 0", __func__, i)); dest->r_resources[i] = 0; } } } } void racct_create(struct racct **racctp) { if (!racct_enable) return; SDT_PROBE1(racct, kernel, racct, create, racctp); KASSERT(*racctp == NULL, ("racct already allocated")); *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO); } static void racct_destroy_locked(struct racct **racctp) { int i; struct racct *racct; ASSERT_RACCT_ENABLED(); SDT_PROBE1(racct, kernel, racct, destroy, racctp); mtx_assert(&racct_lock, MA_OWNED); KASSERT(racctp != NULL, ("NULL racctp")); KASSERT(*racctp != NULL, ("NULL racct")); racct = *racctp; for (i = 0; i <= RACCT_MAX; i++) { if (RACCT_IS_SLOPPY(i)) continue; if (!RACCT_IS_RECLAIMABLE(i)) continue; KASSERT(racct->r_resources[i] == 0, ("destroying non-empty racct: " "%ju allocated for resource %d\n", racct->r_resources[i], i)); } uma_zfree(racct_zone, racct); *racctp = NULL; } void racct_destroy(struct racct **racct) { if (!racct_enable) return; mtx_lock(&racct_lock); racct_destroy_locked(racct); mtx_unlock(&racct_lock); } /* * Increase consumption of 'resource' by 'amount' for 'racct' * and all its parents. Differently from other cases, 'amount' here * may be less than zero. */ static void racct_adjust_resource(struct racct *racct, int resource, uint64_t amount) { ASSERT_RACCT_ENABLED(); mtx_assert(&racct_lock, MA_OWNED); KASSERT(racct != NULL, ("NULL racct")); racct->r_resources[resource] += amount; if (racct->r_resources[resource] < 0) { KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource), ("%s: resource %d usage < 0", __func__, resource)); racct->r_resources[resource] = 0; } /* * There are some cases where the racct %cpu resource would grow * beyond 100% per core. For example in racct_proc_exit() we add * the process %cpu usage to the ucred racct containers. If too * many processes terminated in a short time span, the ucred %cpu * resource could grow too much. Also, the 4BSD scheduler sometimes * returns for a thread more than 100% cpu usage. So we set a sane * boundary here to 100% * the maxumum number of CPUs. */ if ((resource == RACCT_PCTCPU) && (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU)) racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU; } static int racct_add_locked(struct proc *p, int resource, uint64_t amount) { #ifdef RCTL int error; #endif ASSERT_RACCT_ENABLED(); SDT_PROBE3(racct, kernel, rusage, add, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. */ PROC_LOCK_ASSERT(p, MA_OWNED); #ifdef RCTL error = rctl_enforce(p, resource, amount); if (error && RACCT_IS_DENIABLE(resource)) { SDT_PROBE3(racct, kernel, rusage, add__failure, p, resource, amount); return (error); } #endif racct_adjust_resource(p->p_racct, resource, amount); racct_add_cred_locked(p->p_ucred, resource, amount); return (0); } /* * Increase allocation of 'resource' by 'amount' for process 'p'. * Return 0 if it's below limits, or errno, if it's not. */ int racct_add(struct proc *p, int resource, uint64_t amount) { int error; if (!racct_enable) return (0); mtx_lock(&racct_lock); error = racct_add_locked(p, resource, amount); mtx_unlock(&racct_lock); return (error); } static void racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount) { struct prison *pr; ASSERT_RACCT_ENABLED(); SDT_PROBE3(racct, kernel, rusage, add__cred, cred, resource, amount); racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount); for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, amount); racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount); } /* * Increase allocation of 'resource' by 'amount' for credential 'cred'. * Doesn't check for limits and never fails. * * XXX: Shouldn't this ever return an error? */ void racct_add_cred(struct ucred *cred, int resource, uint64_t amount) { if (!racct_enable) return; mtx_lock(&racct_lock); racct_add_cred_locked(cred, resource, amount); mtx_unlock(&racct_lock); } /* * Increase allocation of 'resource' by 'amount' for process 'p'. * Doesn't check for limits and never fails. */ void racct_add_force(struct proc *p, int resource, uint64_t amount) { if (!racct_enable) return; SDT_PROBE3(racct, kernel, rusage, add__force, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. */ PROC_LOCK_ASSERT(p, MA_OWNED); mtx_lock(&racct_lock); racct_adjust_resource(p->p_racct, resource, amount); mtx_unlock(&racct_lock); racct_add_cred(p->p_ucred, resource, amount); } static int racct_set_locked(struct proc *p, int resource, uint64_t amount) { int64_t old_amount, decayed_amount; int64_t diff_proc, diff_cred; #ifdef RCTL int error; #endif ASSERT_RACCT_ENABLED(); SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. */ PROC_LOCK_ASSERT(p, MA_OWNED); old_amount = p->p_racct->r_resources[resource]; /* * The diffs may be negative. */ diff_proc = amount - old_amount; if (RACCT_IS_DECAYING(resource)) { /* * Resources in per-credential racct containers may decay. * If this is the case, we need to calculate the difference * between the new amount and the proportional value of the * old amount that has decayed in the ucred racct containers. */ decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; diff_cred = amount - decayed_amount; } else diff_cred = diff_proc; #ifdef notyet KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource), ("%s: usage of non-droppable resource %d dropping", __func__, resource)); #endif #ifdef RCTL if (diff_proc > 0) { error = rctl_enforce(p, resource, diff_proc); if (error && RACCT_IS_DENIABLE(resource)) { SDT_PROBE3(racct, kernel, rusage, set__failure, p, resource, amount); return (error); } } #endif racct_adjust_resource(p->p_racct, resource, diff_proc); if (diff_cred > 0) racct_add_cred_locked(p->p_ucred, resource, diff_cred); else if (diff_cred < 0) racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); return (0); } /* * Set allocation of 'resource' to 'amount' for process 'p'. * Return 0 if it's below limits, or errno, if it's not. * * Note that decreasing the allocation always returns 0, * even if it's above the limit. */ int racct_set(struct proc *p, int resource, uint64_t amount) { int error; if (!racct_enable) return (0); mtx_lock(&racct_lock); error = racct_set_locked(p, resource, amount); mtx_unlock(&racct_lock); return (error); } static void racct_set_force_locked(struct proc *p, int resource, uint64_t amount) { int64_t old_amount, decayed_amount; int64_t diff_proc, diff_cred; ASSERT_RACCT_ENABLED(); SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. */ PROC_LOCK_ASSERT(p, MA_OWNED); old_amount = p->p_racct->r_resources[resource]; /* * The diffs may be negative. */ diff_proc = amount - old_amount; if (RACCT_IS_DECAYING(resource)) { /* * Resources in per-credential racct containers may decay. * If this is the case, we need to calculate the difference * between the new amount and the proportional value of the * old amount that has decayed in the ucred racct containers. */ decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE; diff_cred = amount - decayed_amount; } else diff_cred = diff_proc; racct_adjust_resource(p->p_racct, resource, diff_proc); if (diff_cred > 0) racct_add_cred_locked(p->p_ucred, resource, diff_cred); else if (diff_cred < 0) racct_sub_cred_locked(p->p_ucred, resource, -diff_cred); } void racct_set_force(struct proc *p, int resource, uint64_t amount) { if (!racct_enable) return; mtx_lock(&racct_lock); racct_set_force_locked(p, resource, amount); mtx_unlock(&racct_lock); } /* * Returns amount of 'resource' the process 'p' can keep allocated. * Allocating more than that would be denied, unless the resource * is marked undeniable. Amount of already allocated resource does * not matter. */ uint64_t racct_get_limit(struct proc *p, int resource) { if (!racct_enable) return (UINT64_MAX); #ifdef RCTL return (rctl_get_limit(p, resource)); #else return (UINT64_MAX); #endif } /* * Returns amount of 'resource' the process 'p' can keep allocated. * Allocating more than that would be denied, unless the resource * is marked undeniable. Amount of already allocated resource does * matter. */ uint64_t racct_get_available(struct proc *p, int resource) { if (!racct_enable) return (UINT64_MAX); #ifdef RCTL return (rctl_get_available(p, resource)); #else return (UINT64_MAX); #endif } /* * Returns amount of the %cpu resource that process 'p' can add to its %cpu * utilization. Adding more than that would lead to the process being * throttled. */ static int64_t racct_pcpu_available(struct proc *p) { ASSERT_RACCT_ENABLED(); #ifdef RCTL return (rctl_pcpu_available(p)); #else return (INT64_MAX); #endif } /* * Decrease allocation of 'resource' by 'amount' for process 'p'. */ void racct_sub(struct proc *p, int resource, uint64_t amount) { if (!racct_enable) return; SDT_PROBE3(racct, kernel, rusage, sub, p, resource, amount); /* * We need proc lock to dereference p->p_ucred. */ PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(RACCT_CAN_DROP(resource), ("%s: called for non-droppable resource %d", __func__, resource)); mtx_lock(&racct_lock); KASSERT(amount <= p->p_racct->r_resources[resource], ("%s: freeing %ju of resource %d, which is more " "than allocated %jd for %s (pid %d)", __func__, amount, resource, (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid)); racct_adjust_resource(p->p_racct, resource, -amount); racct_sub_cred_locked(p->p_ucred, resource, amount); mtx_unlock(&racct_lock); } static void racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount) { struct prison *pr; ASSERT_RACCT_ENABLED(); SDT_PROBE3(racct, kernel, rusage, sub__cred, cred, resource, amount); #ifdef notyet KASSERT(RACCT_CAN_DROP(resource), ("%s: called for resource %d which can not drop", __func__, resource)); #endif racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount); for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource, -amount); racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount); } /* * Decrease allocation of 'resource' by 'amount' for credential 'cred'. */ void racct_sub_cred(struct ucred *cred, int resource, uint64_t amount) { if (!racct_enable) return; mtx_lock(&racct_lock); racct_sub_cred_locked(cred, resource, amount); mtx_unlock(&racct_lock); } /* * Inherit resource usage information from the parent process. */ int racct_proc_fork(struct proc *parent, struct proc *child) { int i, error = 0; if (!racct_enable) return (0); /* * Create racct for the child process. */ racct_create(&child->p_racct); PROC_LOCK(parent); PROC_LOCK(child); mtx_lock(&racct_lock); #ifdef RCTL error = rctl_proc_fork(parent, child); if (error != 0) goto out; #endif /* Init process cpu time. */ child->p_prev_runtime = 0; child->p_throttled = 0; /* * Inherit resource usage. */ for (i = 0; i <= RACCT_MAX; i++) { if (parent->p_racct->r_resources[i] == 0 || !RACCT_IS_INHERITABLE(i)) continue; error = racct_set_locked(child, i, parent->p_racct->r_resources[i]); if (error != 0) goto out; } error = racct_add_locked(child, RACCT_NPROC, 1); error += racct_add_locked(child, RACCT_NTHR, 1); out: mtx_unlock(&racct_lock); PROC_UNLOCK(child); PROC_UNLOCK(parent); if (error != 0) racct_proc_exit(child); return (error); } /* * Called at the end of fork1(), to handle rules that require the process * to be fully initialized. */ void racct_proc_fork_done(struct proc *child) { #ifdef RCTL if (!racct_enable) return; PROC_LOCK(child); mtx_lock(&racct_lock); rctl_enforce(child, RACCT_NPROC, 0); rctl_enforce(child, RACCT_NTHR, 0); mtx_unlock(&racct_lock); PROC_UNLOCK(child); #endif } void racct_proc_exit(struct proc *p) { int i; uint64_t runtime; struct timeval wallclock; uint64_t pct_estimate, pct; if (!racct_enable) return; PROC_LOCK(p); /* * We don't need to calculate rux, proc_reap() has already done this. */ runtime = cputick2usec(p->p_rux.rux_runtime); #ifdef notyet KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); #else if (runtime < p->p_prev_runtime) runtime = p->p_prev_runtime; #endif microuptime(&wallclock); timevalsub(&wallclock, &p->p_stats->p_start); if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { pct_estimate = (1000000 * runtime * 100) / ((uint64_t)wallclock.tv_sec * 1000000 + wallclock.tv_usec); } else pct_estimate = 0; pct = racct_getpcpu(p, pct_estimate); mtx_lock(&racct_lock); racct_set_locked(p, RACCT_CPU, runtime); racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct); for (i = 0; i <= RACCT_MAX; i++) { if (p->p_racct->r_resources[i] == 0) continue; if (!RACCT_IS_RECLAIMABLE(i)) continue; racct_set_locked(p, i, 0); } mtx_unlock(&racct_lock); PROC_UNLOCK(p); #ifdef RCTL rctl_racct_release(p->p_racct); #endif racct_destroy(&p->p_racct); } /* * Called after credentials change, to move resource utilisation * between raccts. */ void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred, struct ucred *newcred) { struct uidinfo *olduip, *newuip; struct loginclass *oldlc, *newlc; struct prison *oldpr, *newpr, *pr; if (!racct_enable) return; PROC_LOCK_ASSERT(p, MA_NOTOWNED); newuip = newcred->cr_ruidinfo; olduip = oldcred->cr_ruidinfo; newlc = newcred->cr_loginclass; oldlc = oldcred->cr_loginclass; newpr = newcred->cr_prison; oldpr = oldcred->cr_prison; mtx_lock(&racct_lock); if (newuip != olduip) { racct_sub_racct(olduip->ui_racct, p->p_racct); racct_add_racct(newuip->ui_racct, p->p_racct); } if (newlc != oldlc) { racct_sub_racct(oldlc->lc_racct, p->p_racct); racct_add_racct(newlc->lc_racct, p->p_racct); } if (newpr != oldpr) { for (pr = oldpr; pr != NULL; pr = pr->pr_parent) racct_sub_racct(pr->pr_prison_racct->prr_racct, p->p_racct); for (pr = newpr; pr != NULL; pr = pr->pr_parent) racct_add_racct(pr->pr_prison_racct->prr_racct, p->p_racct); } mtx_unlock(&racct_lock); #ifdef RCTL rctl_proc_ucred_changed(p, newcred); #endif } void racct_move(struct racct *dest, struct racct *src) { ASSERT_RACCT_ENABLED(); mtx_lock(&racct_lock); racct_add_racct(dest, src); racct_sub_racct(src, src); mtx_unlock(&racct_lock); } static void racct_proc_throttle(struct proc *p) { struct thread *td; #ifdef SMP int cpuid; #endif ASSERT_RACCT_ENABLED(); PROC_LOCK_ASSERT(p, MA_OWNED); /* * Do not block kernel processes. Also do not block processes with * low %cpu utilization to improve interactivity. */ if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) || (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold)) return; p->p_throttled = 1; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); switch (td->td_state) { case TDS_RUNQ: /* * If the thread is on the scheduler run-queue, we can * not just remove it from there. So we set the flag * TDF_NEEDRESCHED for the thread, so that once it is * running, it is taken off the cpu as soon as possible. */ td->td_flags |= TDF_NEEDRESCHED; break; case TDS_RUNNING: /* * If the thread is running, we request a context * switch for it by setting the TDF_NEEDRESCHED flag. */ td->td_flags |= TDF_NEEDRESCHED; #ifdef SMP cpuid = td->td_oncpu; if ((cpuid != NOCPU) && (td != curthread)) ipi_cpu(cpuid, IPI_AST); #endif break; default: break; } thread_unlock(td); } } static void racct_proc_wakeup(struct proc *p) { ASSERT_RACCT_ENABLED(); PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_throttled) { p->p_throttled = 0; wakeup(p->p_racct); } } static void racct_decay_resource(struct racct *racct, void * res, void* dummy) { int resource; int64_t r_old, r_new; ASSERT_RACCT_ENABLED(); + mtx_assert(&racct_lock, MA_OWNED); resource = *(int *)res; r_old = racct->r_resources[resource]; /* If there is nothing to decay, just exit. */ if (r_old <= 0) return; - mtx_lock(&racct_lock); r_new = r_old * RACCT_DECAY_FACTOR / FSCALE; racct->r_resources[resource] = r_new; +} + +static void +racct_decay_pre(void) +{ + + mtx_lock(&racct_lock); +} + +static void +racct_decay_post(void) +{ + mtx_unlock(&racct_lock); } static void racct_decay(int resource) { ASSERT_RACCT_ENABLED(); - ui_racct_foreach(racct_decay_resource, &resource, NULL); - loginclass_racct_foreach(racct_decay_resource, &resource, NULL); - prison_racct_foreach(racct_decay_resource, &resource, NULL); + ui_racct_foreach(racct_decay_resource, racct_decay_pre, + racct_decay_post, &resource, NULL); + loginclass_racct_foreach(racct_decay_resource, racct_decay_pre, + racct_decay_post, &resource, NULL); + prison_racct_foreach(racct_decay_resource, racct_decay_pre, + racct_decay_post, &resource, NULL); } static void racctd(void) { struct thread *td; struct proc *p; struct timeval wallclock; uint64_t runtime; uint64_t pct, pct_estimate; ASSERT_RACCT_ENABLED(); for (;;) { racct_decay(RACCT_PCTCPU); sx_slock(&allproc_lock); LIST_FOREACH(p, &zombproc, p_list) { PROC_LOCK(p); racct_set(p, RACCT_PCTCPU, 0); PROC_UNLOCK(p); } FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state != PRS_NORMAL) { PROC_UNLOCK(p); continue; } microuptime(&wallclock); timevalsub(&wallclock, &p->p_stats->p_start); PROC_STATLOCK(p); FOREACH_THREAD_IN_PROC(p, td) ruxagg(p, td); runtime = cputick2usec(p->p_rux.rux_runtime); PROC_STATUNLOCK(p); #ifdef notyet KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); #else if (runtime < p->p_prev_runtime) runtime = p->p_prev_runtime; #endif p->p_prev_runtime = runtime; if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { pct_estimate = (1000000 * runtime * 100) / ((uint64_t)wallclock.tv_sec * 1000000 + wallclock.tv_usec); } else pct_estimate = 0; pct = racct_getpcpu(p, pct_estimate); mtx_lock(&racct_lock); racct_set_force_locked(p, RACCT_PCTCPU, pct); racct_set_locked(p, RACCT_CPU, runtime); racct_set_locked(p, RACCT_WALLCLOCK, (uint64_t)wallclock.tv_sec * 1000000 + wallclock.tv_usec); mtx_unlock(&racct_lock); PROC_UNLOCK(p); } /* * To ensure that processes are throttled in a fair way, we need * to iterate over all processes again and check the limits * for %cpu resource only after ucred racct containers have been * properly filled. */ FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state != PRS_NORMAL) { PROC_UNLOCK(p); continue; } if (racct_pcpu_available(p) <= 0) racct_proc_throttle(p); else if (p->p_throttled) racct_proc_wakeup(p); PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); pause("-", hz); } } static struct kproc_desc racctd_kp = { "racctd", racctd, NULL }; static void racctd_init(void) { if (!racct_enable) return; kproc_start(&racctd_kp); } SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL); static void racct_init(void) { if (!racct_enable) return; racct_zone = uma_zcreate("racct", sizeof(struct racct), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); /* * XXX: Move this somewhere. */ prison0.pr_prison_racct = prison_racct_find("0"); } SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL); #endif /* !RACCT */ Index: projects/powernv/kern/kern_rctl.c =================================================================== --- projects/powernv/kern/kern_rctl.c (revision 290990) +++ projects/powernv/kern/kern_rctl.c (revision 290991) @@ -1,1951 +1,1972 @@ /*- * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RCTL #ifndef RACCT #error "The RCTL option requires the RACCT option" #endif FEATURE(rctl, "Resource Limits"); #define HRF_DEFAULT 0 #define HRF_DONT_INHERIT 1 #define HRF_DONT_ACCUMULATE 2 #define RCTL_MAX_INBUFSIZE 4 * 1024 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024 #define RCTL_LOG_BUFSIZE 128 #define RCTL_PCPU_SHIFT (10 * 1000000) unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE; SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits"); SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN, &rctl_maxbufsize, 0, "Maximum output buffer size"); /* * 'rctl_rule_link' connects a rule with every racct it's related to. * For example, rule 'user:X:openfiles:deny=N/process' is linked * with uidinfo for user X, and to each process of that user. */ struct rctl_rule_link { LIST_ENTRY(rctl_rule_link) rrl_next; struct rctl_rule *rrl_rule; int rrl_exceeded; }; struct dict { const char *d_name; int d_value; }; static struct dict subjectnames[] = { { "process", RCTL_SUBJECT_TYPE_PROCESS }, { "user", RCTL_SUBJECT_TYPE_USER }, { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS }, { "jail", RCTL_SUBJECT_TYPE_JAIL }, { NULL, -1 }}; static struct dict resourcenames[] = { { "cputime", RACCT_CPU }, { "datasize", RACCT_DATA }, { "stacksize", RACCT_STACK }, { "coredumpsize", RACCT_CORE }, { "memoryuse", RACCT_RSS }, { "memorylocked", RACCT_MEMLOCK }, { "maxproc", RACCT_NPROC }, { "openfiles", RACCT_NOFILE }, { "vmemoryuse", RACCT_VMEM }, { "pseudoterminals", RACCT_NPTS }, { "swapuse", RACCT_SWAP }, { "nthr", RACCT_NTHR }, { "msgqqueued", RACCT_MSGQQUEUED }, { "msgqsize", RACCT_MSGQSIZE }, { "nmsgq", RACCT_NMSGQ }, { "nsem", RACCT_NSEM }, { "nsemop", RACCT_NSEMOP }, { "nshm", RACCT_NSHM }, { "shmsize", RACCT_SHMSIZE }, { "wallclock", RACCT_WALLCLOCK }, { "pcpu", RACCT_PCTCPU }, { NULL, -1 }}; static struct dict actionnames[] = { { "sighup", RCTL_ACTION_SIGHUP }, { "sigint", RCTL_ACTION_SIGINT }, { "sigquit", RCTL_ACTION_SIGQUIT }, { "sigill", RCTL_ACTION_SIGILL }, { "sigtrap", RCTL_ACTION_SIGTRAP }, { "sigabrt", RCTL_ACTION_SIGABRT }, { "sigemt", RCTL_ACTION_SIGEMT }, { "sigfpe", RCTL_ACTION_SIGFPE }, { "sigkill", RCTL_ACTION_SIGKILL }, { "sigbus", RCTL_ACTION_SIGBUS }, { "sigsegv", RCTL_ACTION_SIGSEGV }, { "sigsys", RCTL_ACTION_SIGSYS }, { "sigpipe", RCTL_ACTION_SIGPIPE }, { "sigalrm", RCTL_ACTION_SIGALRM }, { "sigterm", RCTL_ACTION_SIGTERM }, { "sigurg", RCTL_ACTION_SIGURG }, { "sigstop", RCTL_ACTION_SIGSTOP }, { "sigtstp", RCTL_ACTION_SIGTSTP }, { "sigchld", RCTL_ACTION_SIGCHLD }, { "sigttin", RCTL_ACTION_SIGTTIN }, { "sigttou", RCTL_ACTION_SIGTTOU }, { "sigio", RCTL_ACTION_SIGIO }, { "sigxcpu", RCTL_ACTION_SIGXCPU }, { "sigxfsz", RCTL_ACTION_SIGXFSZ }, { "sigvtalrm", RCTL_ACTION_SIGVTALRM }, { "sigprof", RCTL_ACTION_SIGPROF }, { "sigwinch", RCTL_ACTION_SIGWINCH }, { "siginfo", RCTL_ACTION_SIGINFO }, { "sigusr1", RCTL_ACTION_SIGUSR1 }, { "sigusr2", RCTL_ACTION_SIGUSR2 }, { "sigthr", RCTL_ACTION_SIGTHR }, { "deny", RCTL_ACTION_DENY }, { "log", RCTL_ACTION_LOG }, { "devctl", RCTL_ACTION_DEVCTL }, { NULL, -1 }}; static void rctl_init(void); SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL); static uma_zone_t rctl_rule_link_zone; static uma_zone_t rctl_rule_zone; static struct rwlock rctl_lock; RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock"); static int rctl_rule_fully_specified(const struct rctl_rule *rule); static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule); static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits"); static const char * rctl_subject_type_name(int subject) { int i; for (i = 0; subjectnames[i].d_name != NULL; i++) { if (subjectnames[i].d_value == subject) return (subjectnames[i].d_name); } panic("rctl_subject_type_name: unknown subject type %d", subject); } static const char * rctl_action_name(int action) { int i; for (i = 0; actionnames[i].d_name != NULL; i++) { if (actionnames[i].d_value == action) return (actionnames[i].d_name); } panic("rctl_action_name: unknown action %d", action); } const char * rctl_resource_name(int resource) { int i; for (i = 0; resourcenames[i].d_name != NULL; i++) { if (resourcenames[i].d_value == resource) return (resourcenames[i].d_name); } panic("rctl_resource_name: unknown resource %d", resource); } /* * Return the amount of resource that can be allocated by 'p' before * hitting 'rule'. */ static int64_t rctl_available_resource(const struct proc *p, const struct rctl_rule *rule) { int resource; int64_t available = INT64_MAX; struct ucred *cred = p->p_ucred; ASSERT_RACCT_ENABLED(); rw_assert(&rctl_lock, RA_LOCKED); resource = rule->rr_resource; switch (rule->rr_per) { case RCTL_SUBJECT_TYPE_PROCESS: available = rule->rr_amount - p->p_racct->r_resources[resource]; break; case RCTL_SUBJECT_TYPE_USER: available = rule->rr_amount - cred->cr_ruidinfo->ui_racct->r_resources[resource]; break; case RCTL_SUBJECT_TYPE_LOGINCLASS: available = rule->rr_amount - cred->cr_loginclass->lc_racct->r_resources[resource]; break; case RCTL_SUBJECT_TYPE_JAIL: available = rule->rr_amount - cred->cr_prison->pr_prison_racct->prr_racct-> r_resources[resource]; break; default: panic("rctl_compute_available: unknown per %d", rule->rr_per); } return (available); } /* * Return non-zero if allocating 'amount' by proc 'p' would exceed * resource limit specified by 'rule'. */ static int rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule, int64_t amount) { int64_t available; ASSERT_RACCT_ENABLED(); rw_assert(&rctl_lock, RA_LOCKED); available = rctl_available_resource(p, rule); if (available >= amount) return (0); return (1); } /* * Special version of rctl_available() function for the %cpu resource. * We slightly cheat here and return less than we normally would. */ int64_t rctl_pcpu_available(const struct proc *p) { struct rctl_rule *rule; struct rctl_rule_link *link; int64_t available, minavailable, limit; ASSERT_RACCT_ENABLED(); minavailable = INT64_MAX; limit = 0; rw_rlock(&rctl_lock); LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { rule = link->rrl_rule; if (rule->rr_resource != RACCT_PCTCPU) continue; if (rule->rr_action != RCTL_ACTION_DENY) continue; available = rctl_available_resource(p, rule); if (available < minavailable) { minavailable = available; limit = rule->rr_amount; } } rw_runlock(&rctl_lock); /* * Return slightly less than actual value of the available * %cpu resource. This makes %cpu throttling more agressive * and lets us act sooner than the limits are already exceeded. */ if (limit != 0) { if (limit > 2 * RCTL_PCPU_SHIFT) minavailable -= RCTL_PCPU_SHIFT; else minavailable -= (limit / 2); } return (minavailable); } /* * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition * to what it keeps allocated now. Returns non-zero if the allocation should * be denied, 0 otherwise. */ int rctl_enforce(struct proc *p, int resource, uint64_t amount) { struct rctl_rule *rule; struct rctl_rule_link *link; struct sbuf sb; int should_deny = 0; char *buf; static int curtime = 0; static struct timeval lasttime; ASSERT_RACCT_ENABLED(); rw_rlock(&rctl_lock); /* * There may be more than one matching rule; go through all of them. * Denial should be done last, after logging and sending signals. */ LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { rule = link->rrl_rule; if (rule->rr_resource != resource) continue; if (!rctl_would_exceed(p, rule, amount)) { link->rrl_exceeded = 0; continue; } switch (rule->rr_action) { case RCTL_ACTION_DENY: should_deny = 1; continue; case RCTL_ACTION_LOG: /* * If rrl_exceeded != 0, it means we've already * logged a warning for this process. */ if (link->rrl_exceeded != 0) continue; /* * If the process state is not fully initialized yet, * we can't access most of the required fields, e.g. * p->p_comm. This happens when called from fork1(). * Ignore this rule for now; it will be processed just * after fork, when called from racct_proc_fork_done(). */ if (p->p_state != PRS_NORMAL) continue; if (!ppsratecheck(&lasttime, &curtime, 10)) continue; buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); if (buf == NULL) { printf("rctl_enforce: out of memory\n"); continue; } sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); rctl_rule_to_sbuf(&sb, rule); sbuf_finish(&sb); printf("rctl: rule \"%s\" matched by pid %d " "(%s), uid %d, jail %s\n", sbuf_data(&sb), p->p_pid, p->p_comm, p->p_ucred->cr_uid, p->p_ucred->cr_prison->pr_prison_racct->prr_name); sbuf_delete(&sb); free(buf, M_RCTL); link->rrl_exceeded = 1; continue; case RCTL_ACTION_DEVCTL: if (link->rrl_exceeded != 0) continue; if (p->p_state != PRS_NORMAL) continue; buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT); if (buf == NULL) { printf("rctl_enforce: out of memory\n"); continue; } sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN); sbuf_printf(&sb, "rule="); rctl_rule_to_sbuf(&sb, rule); sbuf_printf(&sb, " pid=%d ruid=%d jail=%s", p->p_pid, p->p_ucred->cr_ruid, p->p_ucred->cr_prison->pr_prison_racct->prr_name); sbuf_finish(&sb); devctl_notify_f("RCTL", "rule", "matched", sbuf_data(&sb), M_NOWAIT); sbuf_delete(&sb); free(buf, M_RCTL); link->rrl_exceeded = 1; continue; default: if (link->rrl_exceeded != 0) continue; if (p->p_state != PRS_NORMAL) continue; KASSERT(rule->rr_action > 0 && rule->rr_action <= RCTL_ACTION_SIGNAL_MAX, ("rctl_enforce: unknown action %d", rule->rr_action)); /* * We're using the fact that RCTL_ACTION_SIG* values * are equal to their counterparts from sys/signal.h. */ kern_psignal(p, rule->rr_action); link->rrl_exceeded = 1; continue; } } rw_runlock(&rctl_lock); if (should_deny) { /* * Return fake error code; the caller should change it * into one proper for the situation - EFSIZ, ENOMEM etc. */ return (EDOOFUS); } return (0); } uint64_t rctl_get_limit(struct proc *p, int resource) { struct rctl_rule *rule; struct rctl_rule_link *link; uint64_t amount = UINT64_MAX; ASSERT_RACCT_ENABLED(); rw_rlock(&rctl_lock); /* * There may be more than one matching rule; go through all of them. * Denial should be done last, after logging and sending signals. */ LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { rule = link->rrl_rule; if (rule->rr_resource != resource) continue; if (rule->rr_action != RCTL_ACTION_DENY) continue; if (rule->rr_amount < amount) amount = rule->rr_amount; } rw_runlock(&rctl_lock); return (amount); } uint64_t rctl_get_available(struct proc *p, int resource) { struct rctl_rule *rule; struct rctl_rule_link *link; int64_t available, minavailable, allocated; minavailable = INT64_MAX; ASSERT_RACCT_ENABLED(); rw_rlock(&rctl_lock); /* * There may be more than one matching rule; go through all of them. * Denial should be done last, after logging and sending signals. */ LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { rule = link->rrl_rule; if (rule->rr_resource != resource) continue; if (rule->rr_action != RCTL_ACTION_DENY) continue; available = rctl_available_resource(p, rule); if (available < minavailable) minavailable = available; } rw_runlock(&rctl_lock); /* * XXX: Think about this _hard_. */ allocated = p->p_racct->r_resources[resource]; if (minavailable < INT64_MAX - allocated) minavailable += allocated; if (minavailable < 0) minavailable = 0; return (minavailable); } static int rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter) { ASSERT_RACCT_ENABLED(); if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) { if (rule->rr_subject_type != filter->rr_subject_type) return (0); switch (filter->rr_subject_type) { case RCTL_SUBJECT_TYPE_PROCESS: if (filter->rr_subject.rs_proc != NULL && rule->rr_subject.rs_proc != filter->rr_subject.rs_proc) return (0); break; case RCTL_SUBJECT_TYPE_USER: if (filter->rr_subject.rs_uip != NULL && rule->rr_subject.rs_uip != filter->rr_subject.rs_uip) return (0); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: if (filter->rr_subject.rs_loginclass != NULL && rule->rr_subject.rs_loginclass != filter->rr_subject.rs_loginclass) return (0); break; case RCTL_SUBJECT_TYPE_JAIL: if (filter->rr_subject.rs_prison_racct != NULL && rule->rr_subject.rs_prison_racct != filter->rr_subject.rs_prison_racct) return (0); break; default: panic("rctl_rule_matches: unknown subject type %d", filter->rr_subject_type); } } if (filter->rr_resource != RACCT_UNDEFINED) { if (rule->rr_resource != filter->rr_resource) return (0); } if (filter->rr_action != RCTL_ACTION_UNDEFINED) { if (rule->rr_action != filter->rr_action) return (0); } if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) { if (rule->rr_amount != filter->rr_amount) return (0); } if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) { if (rule->rr_per != filter->rr_per) return (0); } return (1); } static int str2value(const char *str, int *value, struct dict *table) { int i; if (value == NULL) return (EINVAL); for (i = 0; table[i].d_name != NULL; i++) { if (strcasecmp(table[i].d_name, str) == 0) { *value = table[i].d_value; return (0); } } return (EINVAL); } static int str2id(const char *str, id_t *value) { char *end; if (str == NULL) return (EINVAL); *value = strtoul(str, &end, 10); if ((size_t)(end - str) != strlen(str)) return (EINVAL); return (0); } static int str2int64(const char *str, int64_t *value) { char *end; if (str == NULL) return (EINVAL); *value = strtoul(str, &end, 10); if ((size_t)(end - str) != strlen(str)) return (EINVAL); return (0); } /* * Connect the rule to the racct, increasing refcount for the rule. */ static void rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule) { struct rctl_rule_link *link; ASSERT_RACCT_ENABLED(); KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); rctl_rule_acquire(rule); link = uma_zalloc(rctl_rule_link_zone, M_WAITOK); link->rrl_rule = rule; link->rrl_exceeded = 0; rw_wlock(&rctl_lock); LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); rw_wunlock(&rctl_lock); } static int rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule) { struct rctl_rule_link *link; ASSERT_RACCT_ENABLED(); KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); rw_assert(&rctl_lock, RA_WLOCKED); link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT); if (link == NULL) return (ENOMEM); rctl_rule_acquire(rule); link->rrl_rule = rule; link->rrl_exceeded = 0; LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next); return (0); } /* * Remove limits for a rules matching the filter and release * the refcounts for the rules, possibly freeing them. Returns * the number of limit structures removed. */ static int rctl_racct_remove_rules(struct racct *racct, const struct rctl_rule *filter) { int removed = 0; struct rctl_rule_link *link, *linktmp; ASSERT_RACCT_ENABLED(); rw_assert(&rctl_lock, RA_WLOCKED); LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) { if (!rctl_rule_matches(link->rrl_rule, filter)) continue; LIST_REMOVE(link, rrl_next); rctl_rule_release(link->rrl_rule); uma_zfree(rctl_rule_link_zone, link); removed++; } return (removed); } static void rctl_rule_acquire_subject(struct rctl_rule *rule) { ASSERT_RACCT_ENABLED(); switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_UNDEFINED: case RCTL_SUBJECT_TYPE_PROCESS: break; case RCTL_SUBJECT_TYPE_JAIL: if (rule->rr_subject.rs_prison_racct != NULL) prison_racct_hold(rule->rr_subject.rs_prison_racct); break; case RCTL_SUBJECT_TYPE_USER: if (rule->rr_subject.rs_uip != NULL) uihold(rule->rr_subject.rs_uip); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: if (rule->rr_subject.rs_loginclass != NULL) loginclass_hold(rule->rr_subject.rs_loginclass); break; default: panic("rctl_rule_acquire_subject: unknown subject type %d", rule->rr_subject_type); } } static void rctl_rule_release_subject(struct rctl_rule *rule) { ASSERT_RACCT_ENABLED(); switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_UNDEFINED: case RCTL_SUBJECT_TYPE_PROCESS: break; case RCTL_SUBJECT_TYPE_JAIL: if (rule->rr_subject.rs_prison_racct != NULL) prison_racct_free(rule->rr_subject.rs_prison_racct); break; case RCTL_SUBJECT_TYPE_USER: if (rule->rr_subject.rs_uip != NULL) uifree(rule->rr_subject.rs_uip); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: if (rule->rr_subject.rs_loginclass != NULL) loginclass_free(rule->rr_subject.rs_loginclass); break; default: panic("rctl_rule_release_subject: unknown subject type %d", rule->rr_subject_type); } } struct rctl_rule * rctl_rule_alloc(int flags) { struct rctl_rule *rule; ASSERT_RACCT_ENABLED(); rule = uma_zalloc(rctl_rule_zone, flags); if (rule == NULL) return (NULL); rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; rule->rr_subject.rs_proc = NULL; rule->rr_subject.rs_uip = NULL; rule->rr_subject.rs_loginclass = NULL; rule->rr_subject.rs_prison_racct = NULL; rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; rule->rr_resource = RACCT_UNDEFINED; rule->rr_action = RCTL_ACTION_UNDEFINED; rule->rr_amount = RCTL_AMOUNT_UNDEFINED; refcount_init(&rule->rr_refcount, 1); return (rule); } struct rctl_rule * rctl_rule_duplicate(const struct rctl_rule *rule, int flags) { struct rctl_rule *copy; ASSERT_RACCT_ENABLED(); copy = uma_zalloc(rctl_rule_zone, flags); if (copy == NULL) return (NULL); copy->rr_subject_type = rule->rr_subject_type; copy->rr_subject.rs_proc = rule->rr_subject.rs_proc; copy->rr_subject.rs_uip = rule->rr_subject.rs_uip; copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass; copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct; copy->rr_per = rule->rr_per; copy->rr_resource = rule->rr_resource; copy->rr_action = rule->rr_action; copy->rr_amount = rule->rr_amount; refcount_init(©->rr_refcount, 1); rctl_rule_acquire_subject(copy); return (copy); } void rctl_rule_acquire(struct rctl_rule *rule) { ASSERT_RACCT_ENABLED(); KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); refcount_acquire(&rule->rr_refcount); } static void rctl_rule_free(void *context, int pending) { struct rctl_rule *rule; rule = (struct rctl_rule *)context; ASSERT_RACCT_ENABLED(); KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0")); /* * We don't need locking here; rule is guaranteed to be inaccessible. */ rctl_rule_release_subject(rule); uma_zfree(rctl_rule_zone, rule); } void rctl_rule_release(struct rctl_rule *rule) { ASSERT_RACCT_ENABLED(); KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0")); if (refcount_release(&rule->rr_refcount)) { /* * rctl_rule_release() is often called when iterating * over all the uidinfo structures in the system, * holding uihashtbl_lock. Since rctl_rule_free() * might end up calling uifree(), this would lead * to lock recursion. Use taskqueue to avoid this. */ TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule); taskqueue_enqueue(taskqueue_thread, &rule->rr_task); } } static int rctl_rule_fully_specified(const struct rctl_rule *rule) { ASSERT_RACCT_ENABLED(); switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_UNDEFINED: return (0); case RCTL_SUBJECT_TYPE_PROCESS: if (rule->rr_subject.rs_proc == NULL) return (0); break; case RCTL_SUBJECT_TYPE_USER: if (rule->rr_subject.rs_uip == NULL) return (0); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: if (rule->rr_subject.rs_loginclass == NULL) return (0); break; case RCTL_SUBJECT_TYPE_JAIL: if (rule->rr_subject.rs_prison_racct == NULL) return (0); break; default: panic("rctl_rule_fully_specified: unknown subject type %d", rule->rr_subject_type); } if (rule->rr_resource == RACCT_UNDEFINED) return (0); if (rule->rr_action == RCTL_ACTION_UNDEFINED) return (0); if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED) return (0); if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED) return (0); return (1); } static int rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep) { int error = 0; char *subjectstr, *subject_idstr, *resourcestr, *actionstr, *amountstr, *perstr; struct rctl_rule *rule; id_t id; ASSERT_RACCT_ENABLED(); rule = rctl_rule_alloc(M_WAITOK); subjectstr = strsep(&rulestr, ":"); subject_idstr = strsep(&rulestr, ":"); resourcestr = strsep(&rulestr, ":"); actionstr = strsep(&rulestr, "=/"); amountstr = strsep(&rulestr, "/"); perstr = rulestr; if (subjectstr == NULL || subjectstr[0] == '\0') rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED; else { error = str2value(subjectstr, &rule->rr_subject_type, subjectnames); if (error != 0) goto out; } if (subject_idstr == NULL || subject_idstr[0] == '\0') { rule->rr_subject.rs_proc = NULL; rule->rr_subject.rs_uip = NULL; rule->rr_subject.rs_loginclass = NULL; rule->rr_subject.rs_prison_racct = NULL; } else { switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_UNDEFINED: error = EINVAL; goto out; case RCTL_SUBJECT_TYPE_PROCESS: error = str2id(subject_idstr, &id); if (error != 0) goto out; sx_assert(&allproc_lock, SA_LOCKED); rule->rr_subject.rs_proc = pfind(id); if (rule->rr_subject.rs_proc == NULL) { error = ESRCH; goto out; } PROC_UNLOCK(rule->rr_subject.rs_proc); break; case RCTL_SUBJECT_TYPE_USER: error = str2id(subject_idstr, &id); if (error != 0) goto out; rule->rr_subject.rs_uip = uifind(id); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: rule->rr_subject.rs_loginclass = loginclass_find(subject_idstr); if (rule->rr_subject.rs_loginclass == NULL) { error = ENAMETOOLONG; goto out; } break; case RCTL_SUBJECT_TYPE_JAIL: rule->rr_subject.rs_prison_racct = prison_racct_find(subject_idstr); if (rule->rr_subject.rs_prison_racct == NULL) { error = ENAMETOOLONG; goto out; } break; default: panic("rctl_string_to_rule: unknown subject type %d", rule->rr_subject_type); } } if (resourcestr == NULL || resourcestr[0] == '\0') rule->rr_resource = RACCT_UNDEFINED; else { error = str2value(resourcestr, &rule->rr_resource, resourcenames); if (error != 0) goto out; } if (actionstr == NULL || actionstr[0] == '\0') rule->rr_action = RCTL_ACTION_UNDEFINED; else { error = str2value(actionstr, &rule->rr_action, actionnames); if (error != 0) goto out; } if (amountstr == NULL || amountstr[0] == '\0') rule->rr_amount = RCTL_AMOUNT_UNDEFINED; else { error = str2int64(amountstr, &rule->rr_amount); if (error != 0) goto out; if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) rule->rr_amount *= 1000000; } if (perstr == NULL || perstr[0] == '\0') rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED; else { error = str2value(perstr, &rule->rr_per, subjectnames); if (error != 0) goto out; } out: if (error == 0) *rulep = rule; else rctl_rule_release(rule); return (error); } /* * Link a rule with all the subjects it applies to. */ int rctl_rule_add(struct rctl_rule *rule) { struct proc *p; struct ucred *cred; struct uidinfo *uip; struct prison *pr; struct prison_racct *prr; struct loginclass *lc; struct rctl_rule *rule2; int match; ASSERT_RACCT_ENABLED(); KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified")); /* * Some rules just don't make sense. Note that the one below * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU, * for example, is not deniable in the racct sense, but the * limit is enforced in a different way, so "deny" rules for %CPU * do make sense. */ if (rule->rr_action == RCTL_ACTION_DENY && (rule->rr_resource == RACCT_CPU || rule->rr_resource == RACCT_WALLCLOCK)) return (EOPNOTSUPP); if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS && RACCT_IS_SLOPPY(rule->rr_resource)) return (EOPNOTSUPP); /* * Make sure there are no duplicated rules. Also, for the "deny" * rules, remove ones differing only by "amount". */ if (rule->rr_action == RCTL_ACTION_DENY) { rule2 = rctl_rule_duplicate(rule, M_WAITOK); rule2->rr_amount = RCTL_AMOUNT_UNDEFINED; rctl_rule_remove(rule2); rctl_rule_release(rule2); } else rctl_rule_remove(rule); switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_PROCESS: p = rule->rr_subject.rs_proc; KASSERT(p != NULL, ("rctl_rule_add: NULL proc")); rctl_racct_add_rule(p->p_racct, rule); /* * In case of per-process rule, we don't have anything more * to do. */ return (0); case RCTL_SUBJECT_TYPE_USER: uip = rule->rr_subject.rs_uip; KASSERT(uip != NULL, ("rctl_rule_add: NULL uip")); rctl_racct_add_rule(uip->ui_racct, rule); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: lc = rule->rr_subject.rs_loginclass; KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass")); rctl_racct_add_rule(lc->lc_racct, rule); break; case RCTL_SUBJECT_TYPE_JAIL: prr = rule->rr_subject.rs_prison_racct; KASSERT(prr != NULL, ("rctl_rule_add: NULL pr")); rctl_racct_add_rule(prr->prr_racct, rule); break; default: panic("rctl_rule_add: unknown subject type %d", rule->rr_subject_type); } /* * Now go through all the processes and add the new rule to the ones * it applies to. */ sx_assert(&allproc_lock, SA_LOCKED); FOREACH_PROC_IN_SYSTEM(p) { cred = p->p_ucred; switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_USER: if (cred->cr_uidinfo == rule->rr_subject.rs_uip || cred->cr_ruidinfo == rule->rr_subject.rs_uip) break; continue; case RCTL_SUBJECT_TYPE_LOGINCLASS: if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) break; continue; case RCTL_SUBJECT_TYPE_JAIL: match = 0; for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { match = 1; break; } } if (match) break; continue; default: panic("rctl_rule_add: unknown subject type %d", rule->rr_subject_type); } rctl_racct_add_rule(p->p_racct, rule); } return (0); } static void +rctl_rule_pre_callback(void) +{ + + rw_wlock(&rctl_lock); +} + +static void +rctl_rule_post_callback(void) +{ + + rw_wunlock(&rctl_lock); +} + +static void rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3) { struct rctl_rule *filter = (struct rctl_rule *)arg2; int found = 0; ASSERT_RACCT_ENABLED(); + rw_assert(&rctl_lock, RA_WLOCKED); - rw_wlock(&rctl_lock); found += rctl_racct_remove_rules(racct, filter); - rw_wunlock(&rctl_lock); *((int *)arg3) += found; } /* * Remove all rules that match the filter. */ int rctl_rule_remove(struct rctl_rule *filter) { int found = 0; struct proc *p; ASSERT_RACCT_ENABLED(); if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS && filter->rr_subject.rs_proc != NULL) { p = filter->rr_subject.rs_proc; rw_wlock(&rctl_lock); found = rctl_racct_remove_rules(p->p_racct, filter); rw_wunlock(&rctl_lock); if (found) return (0); return (ESRCH); } - loginclass_racct_foreach(rctl_rule_remove_callback, filter, - (void *)&found); - ui_racct_foreach(rctl_rule_remove_callback, filter, - (void *)&found); - prison_racct_foreach(rctl_rule_remove_callback, filter, - (void *)&found); + loginclass_racct_foreach(rctl_rule_remove_callback, + rctl_rule_pre_callback, rctl_rule_post_callback, + filter, (void *)&found); + ui_racct_foreach(rctl_rule_remove_callback, + rctl_rule_pre_callback, rctl_rule_post_callback, + filter, (void *)&found); + prison_racct_foreach(rctl_rule_remove_callback, + rctl_rule_pre_callback, rctl_rule_post_callback, + filter, (void *)&found); sx_assert(&allproc_lock, SA_LOCKED); rw_wlock(&rctl_lock); FOREACH_PROC_IN_SYSTEM(p) { found += rctl_racct_remove_rules(p->p_racct, filter); } rw_wunlock(&rctl_lock); if (found) return (0); return (ESRCH); } /* * Appends a rule to the sbuf. */ static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule) { int64_t amount; ASSERT_RACCT_ENABLED(); sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type)); switch (rule->rr_subject_type) { case RCTL_SUBJECT_TYPE_PROCESS: if (rule->rr_subject.rs_proc == NULL) sbuf_printf(sb, ":"); else sbuf_printf(sb, "%d:", rule->rr_subject.rs_proc->p_pid); break; case RCTL_SUBJECT_TYPE_USER: if (rule->rr_subject.rs_uip == NULL) sbuf_printf(sb, ":"); else sbuf_printf(sb, "%d:", rule->rr_subject.rs_uip->ui_uid); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: if (rule->rr_subject.rs_loginclass == NULL) sbuf_printf(sb, ":"); else sbuf_printf(sb, "%s:", rule->rr_subject.rs_loginclass->lc_name); break; case RCTL_SUBJECT_TYPE_JAIL: if (rule->rr_subject.rs_prison_racct == NULL) sbuf_printf(sb, ":"); else sbuf_printf(sb, "%s:", rule->rr_subject.rs_prison_racct->prr_name); break; default: panic("rctl_rule_to_sbuf: unknown subject type %d", rule->rr_subject_type); } amount = rule->rr_amount; if (amount != RCTL_AMOUNT_UNDEFINED && RACCT_IS_IN_MILLIONS(rule->rr_resource)) amount /= 1000000; sbuf_printf(sb, "%s:%s=%jd", rctl_resource_name(rule->rr_resource), rctl_action_name(rule->rr_action), amount); if (rule->rr_per != rule->rr_subject_type) sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per)); } /* * Routine used by RCTL syscalls to read in input string. */ static int rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen) { int error; char *str; ASSERT_RACCT_ENABLED(); if (inbuflen <= 0) return (EINVAL); if (inbuflen > RCTL_MAX_INBUFSIZE) return (E2BIG); str = malloc(inbuflen + 1, M_RCTL, M_WAITOK); error = copyinstr(inbufp, str, inbuflen, NULL); if (error != 0) { free(str, M_RCTL); return (error); } *inputstr = str; return (0); } /* * Routine used by RCTL syscalls to write out output string. */ static int rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen) { int error; ASSERT_RACCT_ENABLED(); if (outputsbuf == NULL) return (0); sbuf_finish(outputsbuf); if (outbuflen < sbuf_len(outputsbuf) + 1) { sbuf_delete(outputsbuf); return (ERANGE); } error = copyout(sbuf_data(outputsbuf), outbufp, sbuf_len(outputsbuf) + 1); sbuf_delete(outputsbuf); return (error); } static struct sbuf * rctl_racct_to_sbuf(struct racct *racct, int sloppy) { int i; int64_t amount; struct sbuf *sb; ASSERT_RACCT_ENABLED(); sb = sbuf_new_auto(); for (i = 0; i <= RACCT_MAX; i++) { if (sloppy == 0 && RACCT_IS_SLOPPY(i)) continue; amount = racct->r_resources[i]; if (RACCT_IS_IN_MILLIONS(i)) amount /= 1000000; sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount); } sbuf_setpos(sb, sbuf_len(sb) - 1); return (sb); } int sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) { int error; char *inputstr; struct rctl_rule *filter; struct sbuf *outputsbuf = NULL; struct proc *p; struct uidinfo *uip; struct loginclass *lc; struct prison_racct *prr; if (!racct_enable) return (ENOSYS); error = priv_check(td, PRIV_RCTL_GET_RACCT); if (error != 0) return (error); error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); if (error != 0) return (error); sx_slock(&allproc_lock); error = rctl_string_to_rule(inputstr, &filter); free(inputstr, M_RCTL); if (error != 0) { sx_sunlock(&allproc_lock); return (error); } switch (filter->rr_subject_type) { case RCTL_SUBJECT_TYPE_PROCESS: p = filter->rr_subject.rs_proc; if (p == NULL) { error = EINVAL; goto out; } outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0); break; case RCTL_SUBJECT_TYPE_USER: uip = filter->rr_subject.rs_uip; if (uip == NULL) { error = EINVAL; goto out; } outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1); break; case RCTL_SUBJECT_TYPE_LOGINCLASS: lc = filter->rr_subject.rs_loginclass; if (lc == NULL) { error = EINVAL; goto out; } outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1); break; case RCTL_SUBJECT_TYPE_JAIL: prr = filter->rr_subject.rs_prison_racct; if (prr == NULL) { error = EINVAL; goto out; } outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1); break; default: error = EINVAL; } out: rctl_rule_release(filter); sx_sunlock(&allproc_lock); if (error != 0) return (error); error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen); return (error); } static void rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3) { struct rctl_rule *filter = (struct rctl_rule *)arg2; struct rctl_rule_link *link; struct sbuf *sb = (struct sbuf *)arg3; ASSERT_RACCT_ENABLED(); + rw_assert(&rctl_lock, RA_LOCKED); - rw_rlock(&rctl_lock); LIST_FOREACH(link, &racct->r_rule_links, rrl_next) { if (!rctl_rule_matches(link->rrl_rule, filter)) continue; rctl_rule_to_sbuf(sb, link->rrl_rule); sbuf_printf(sb, ","); } - rw_runlock(&rctl_lock); } int sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) { int error; size_t bufsize; char *inputstr, *buf; struct sbuf *sb; struct rctl_rule *filter; struct rctl_rule_link *link; struct proc *p; if (!racct_enable) return (ENOSYS); error = priv_check(td, PRIV_RCTL_GET_RULES); if (error != 0) return (error); error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); if (error != 0) return (error); sx_slock(&allproc_lock); error = rctl_string_to_rule(inputstr, &filter); free(inputstr, M_RCTL); if (error != 0) { sx_sunlock(&allproc_lock); return (error); } bufsize = uap->outbuflen; if (bufsize > rctl_maxbufsize) { sx_sunlock(&allproc_lock); return (E2BIG); } buf = malloc(bufsize, M_RCTL, M_WAITOK); sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); KASSERT(sb != NULL, ("sbuf_new failed")); FOREACH_PROC_IN_SYSTEM(p) { rw_rlock(&rctl_lock); LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { /* * Non-process rules will be added to the buffer later. * Adding them here would result in duplicated output. */ if (link->rrl_rule->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) continue; if (!rctl_rule_matches(link->rrl_rule, filter)) continue; rctl_rule_to_sbuf(sb, link->rrl_rule); sbuf_printf(sb, ","); } rw_runlock(&rctl_lock); } - loginclass_racct_foreach(rctl_get_rules_callback, filter, sb); - ui_racct_foreach(rctl_get_rules_callback, filter, sb); - prison_racct_foreach(rctl_get_rules_callback, filter, sb); + loginclass_racct_foreach(rctl_get_rules_callback, + rctl_rule_pre_callback, rctl_rule_post_callback, + filter, sb); + ui_racct_foreach(rctl_get_rules_callback, + rctl_rule_pre_callback, rctl_rule_post_callback, + filter, sb); + prison_racct_foreach(rctl_get_rules_callback, + rctl_rule_pre_callback, rctl_rule_post_callback, + filter, sb); if (sbuf_error(sb) == ENOMEM) { error = ERANGE; goto out; } /* * Remove trailing ",". */ if (sbuf_len(sb) > 0) sbuf_setpos(sb, sbuf_len(sb) - 1); error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); out: rctl_rule_release(filter); sx_sunlock(&allproc_lock); free(buf, M_RCTL); return (error); } int sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) { int error; size_t bufsize; char *inputstr, *buf; struct sbuf *sb; struct rctl_rule *filter; struct rctl_rule_link *link; if (!racct_enable) return (ENOSYS); error = priv_check(td, PRIV_RCTL_GET_LIMITS); if (error != 0) return (error); error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); if (error != 0) return (error); sx_slock(&allproc_lock); error = rctl_string_to_rule(inputstr, &filter); free(inputstr, M_RCTL); if (error != 0) { sx_sunlock(&allproc_lock); return (error); } if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) { rctl_rule_release(filter); sx_sunlock(&allproc_lock); return (EINVAL); } if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) { rctl_rule_release(filter); sx_sunlock(&allproc_lock); return (EOPNOTSUPP); } if (filter->rr_subject.rs_proc == NULL) { rctl_rule_release(filter); sx_sunlock(&allproc_lock); return (EINVAL); } bufsize = uap->outbuflen; if (bufsize > rctl_maxbufsize) { rctl_rule_release(filter); sx_sunlock(&allproc_lock); return (E2BIG); } buf = malloc(bufsize, M_RCTL, M_WAITOK); sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); KASSERT(sb != NULL, ("sbuf_new failed")); rw_rlock(&rctl_lock); LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links, rrl_next) { rctl_rule_to_sbuf(sb, link->rrl_rule); sbuf_printf(sb, ","); } rw_runlock(&rctl_lock); if (sbuf_error(sb) == ENOMEM) { error = ERANGE; goto out; } /* * Remove trailing ",". */ if (sbuf_len(sb) > 0) sbuf_setpos(sb, sbuf_len(sb) - 1); error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen); out: rctl_rule_release(filter); sx_sunlock(&allproc_lock); free(buf, M_RCTL); return (error); } int sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) { int error; struct rctl_rule *rule; char *inputstr; if (!racct_enable) return (ENOSYS); error = priv_check(td, PRIV_RCTL_ADD_RULE); if (error != 0) return (error); error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); if (error != 0) return (error); sx_slock(&allproc_lock); error = rctl_string_to_rule(inputstr, &rule); free(inputstr, M_RCTL); if (error != 0) { sx_sunlock(&allproc_lock); return (error); } /* * The 'per' part of a rule is optional. */ if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED && rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) rule->rr_per = rule->rr_subject_type; if (!rctl_rule_fully_specified(rule)) { error = EINVAL; goto out; } error = rctl_rule_add(rule); out: rctl_rule_release(rule); sx_sunlock(&allproc_lock); return (error); } int sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) { int error; struct rctl_rule *filter; char *inputstr; if (!racct_enable) return (ENOSYS); error = priv_check(td, PRIV_RCTL_REMOVE_RULE); if (error != 0) return (error); error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen); if (error != 0) return (error); sx_slock(&allproc_lock); error = rctl_string_to_rule(inputstr, &filter); free(inputstr, M_RCTL); if (error != 0) { sx_sunlock(&allproc_lock); return (error); } error = rctl_rule_remove(filter); rctl_rule_release(filter); sx_sunlock(&allproc_lock); return (error); } /* * Update RCTL rule list after credential change. */ void rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred) { int rulecnt, i; struct rctl_rule_link *link, *newlink; struct uidinfo *newuip; struct loginclass *newlc; struct prison_racct *newprr; LIST_HEAD(, rctl_rule_link) newrules; ASSERT_RACCT_ENABLED(); newuip = newcred->cr_ruidinfo; newlc = newcred->cr_loginclass; newprr = newcred->cr_prison->pr_prison_racct; LIST_INIT(&newrules); again: /* * First, count the rules that apply to the process with new * credentials. */ rulecnt = 0; rw_rlock(&rctl_lock); LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { if (link->rrl_rule->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS) rulecnt++; } LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) rulecnt++; LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) rulecnt++; LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) rulecnt++; rw_runlock(&rctl_lock); /* * Create temporary list. We've dropped the rctl_lock in order * to use M_WAITOK. */ for (i = 0; i < rulecnt; i++) { newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK); newlink->rrl_rule = NULL; LIST_INSERT_HEAD(&newrules, newlink, rrl_next); } newlink = LIST_FIRST(&newrules); /* * Assign rules to the newly allocated list entries. */ rw_wlock(&rctl_lock); LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { if (link->rrl_rule->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS) { if (newlink == NULL) goto goaround; rctl_rule_acquire(link->rrl_rule); newlink->rrl_rule = link->rrl_rule; newlink = LIST_NEXT(newlink, rrl_next); rulecnt--; } } LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) { if (newlink == NULL) goto goaround; rctl_rule_acquire(link->rrl_rule); newlink->rrl_rule = link->rrl_rule; newlink = LIST_NEXT(newlink, rrl_next); rulecnt--; } LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) { if (newlink == NULL) goto goaround; rctl_rule_acquire(link->rrl_rule); newlink->rrl_rule = link->rrl_rule; newlink = LIST_NEXT(newlink, rrl_next); rulecnt--; } LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) { if (newlink == NULL) goto goaround; rctl_rule_acquire(link->rrl_rule); newlink->rrl_rule = link->rrl_rule; newlink = LIST_NEXT(newlink, rrl_next); rulecnt--; } if (rulecnt == 0) { /* * Free the old rule list. */ while (!LIST_EMPTY(&p->p_racct->r_rule_links)) { link = LIST_FIRST(&p->p_racct->r_rule_links); LIST_REMOVE(link, rrl_next); rctl_rule_release(link->rrl_rule); uma_zfree(rctl_rule_link_zone, link); } /* * Replace lists and we're done. * * XXX: Is there any way to switch list heads instead * of iterating here? */ while (!LIST_EMPTY(&newrules)) { newlink = LIST_FIRST(&newrules); LIST_REMOVE(newlink, rrl_next); LIST_INSERT_HEAD(&p->p_racct->r_rule_links, newlink, rrl_next); } rw_wunlock(&rctl_lock); return; } goaround: rw_wunlock(&rctl_lock); /* * Rule list changed while we were not holding the rctl_lock. * Free the new list and try again. */ while (!LIST_EMPTY(&newrules)) { newlink = LIST_FIRST(&newrules); LIST_REMOVE(newlink, rrl_next); if (newlink->rrl_rule != NULL) rctl_rule_release(newlink->rrl_rule); uma_zfree(rctl_rule_link_zone, newlink); } goto again; } /* * Assign RCTL rules to the newly created process. */ int rctl_proc_fork(struct proc *parent, struct proc *child) { int error; struct rctl_rule_link *link; struct rctl_rule *rule; LIST_INIT(&child->p_racct->r_rule_links); ASSERT_RACCT_ENABLED(); KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent)); rw_wlock(&rctl_lock); /* * Go through limits applicable to the parent and assign them * to the child. Rules with 'process' subject have to be duplicated * in order to make their rr_subject point to the new process. */ LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) { if (link->rrl_rule->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS) { rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT); if (rule == NULL) goto fail; KASSERT(rule->rr_subject.rs_proc == parent, ("rule->rr_subject.rs_proc != parent")); rule->rr_subject.rs_proc = child; error = rctl_racct_add_rule_locked(child->p_racct, rule); rctl_rule_release(rule); if (error != 0) goto fail; } else { error = rctl_racct_add_rule_locked(child->p_racct, link->rrl_rule); if (error != 0) goto fail; } } rw_wunlock(&rctl_lock); return (0); fail: while (!LIST_EMPTY(&child->p_racct->r_rule_links)) { link = LIST_FIRST(&child->p_racct->r_rule_links); LIST_REMOVE(link, rrl_next); rctl_rule_release(link->rrl_rule); uma_zfree(rctl_rule_link_zone, link); } rw_wunlock(&rctl_lock); return (EAGAIN); } /* * Release rules attached to the racct. */ void rctl_racct_release(struct racct *racct) { struct rctl_rule_link *link; ASSERT_RACCT_ENABLED(); rw_wlock(&rctl_lock); while (!LIST_EMPTY(&racct->r_rule_links)) { link = LIST_FIRST(&racct->r_rule_links); LIST_REMOVE(link, rrl_next); rctl_rule_release(link->rrl_rule); uma_zfree(rctl_rule_link_zone, link); } rw_wunlock(&rctl_lock); } static void rctl_init(void) { if (!racct_enable) return; rctl_rule_link_zone = uma_zcreate("rctl_rule_link", sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); } #else /* !RCTL */ int sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap) { return (ENOSYS); } int sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) { return (ENOSYS); } int sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap) { return (ENOSYS); } int sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap) { return (ENOSYS); } int sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap) { return (ENOSYS); } #endif /* !RCTL */ Index: projects/powernv/kern/kern_resource.c =================================================================== --- projects/powernv/kern/kern_resource.c (revision 290990) +++ projects/powernv/kern/kern_resource.c (revision 290991) @@ -1,1438 +1,1443 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static struct rwlock uihashtbl_lock; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); static void ruxagg_locked(struct rusage_ext *rux, struct thread *td); static __inline int lim_shared(struct plimit *limp); /* * Resource controls and accounting. */ #ifndef _SYS_SYSPROTO_H_ struct getpriority_args { int which; int who; }; #endif int sys_getpriority(struct thread *td, register struct getpriority_args *uap) { struct proc *p; struct pgrp *pg; int error, low; error = 0; low = PRIO_MAX + 1; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) low = td->td_proc->p_nice; else { p = pfind(uap->who); if (p == NULL) break; if (p_cansee(td, p) == 0) low = p->p_nice; PROC_UNLOCK(p); } break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0 && p->p_ucred->cr_uid == uap->who) { if (p->p_nice < low) low = p->p_nice; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (low == PRIO_MAX + 1 && error == 0) error = ESRCH; td->td_retval[0] = low; return (error); } #ifndef _SYS_SYSPROTO_H_ struct setpriority_args { int which; int who; int prio; }; #endif int sys_setpriority(struct thread *td, struct setpriority_args *uap) { struct proc *curp, *p; struct pgrp *pg; int found = 0, error = 0; curp = td->td_proc; switch (uap->which) { case PRIO_PROCESS: if (uap->who == 0) { PROC_LOCK(curp); error = donice(td, curp, uap->prio); PROC_UNLOCK(curp); } else { p = pfind(uap->who); if (p == NULL) break; error = p_cansee(td, p); if (error == 0) error = donice(td, p, uap->prio); PROC_UNLOCK(p); } found++; break; case PRIO_PGRP: sx_slock(&proctree_lock); if (uap->who == 0) { pg = curp->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(uap->who); if (pg == NULL) { sx_sunlock(&proctree_lock); break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p->p_ucred->cr_uid == uap->who && p_cansee(td, p) == 0) { error = donice(td, p, uap->prio); found++; } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (found == 0 && error == 0) error = ESRCH; return (error); } /* * Set "nice" for a (whole) process. */ static int donice(struct thread *td, struct proc *p, int n) { int error; PROC_LOCK_ASSERT(p, MA_OWNED); if ((error = p_cansched(td, p))) return (error); if (n > PRIO_MAX) n = PRIO_MAX; if (n < PRIO_MIN) n = PRIO_MIN; if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) return (EACCES); sched_nice(p, n); return (0); } static int unprivileged_idprio; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW, &unprivileged_idprio, 0, "Allow non-root users to set an idle priority"); /* * Set realtime priority for LWP. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_thread_args { int function; lwpid_t lwpid; struct rtprio *rtp; }; #endif int sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) { struct proc *p; struct rtprio rtp; struct thread *td1; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; if (uap->lwpid == 0 || uap->lwpid == td->td_tid) { p = td->td_proc; td1 = td; PROC_LOCK(p); } else { /* Only look up thread in current process */ td1 = tdfind(uap->lwpid, curproc->p_pid); if (td1 == NULL) return (ESRCH); p = td1->td_proc; } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; pri_to_rtp(td1, &rtp); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* Disallow setting rtprio in most cases if not superuser. */ /* * Realtime priority has to be restricted for reasons which * should be obvious. However, for idleprio processes, there is * a potential for system deadlock if an idleprio process gains * a lock on a resource that other processes need (and the * idleprio process can't run due to a CPU-bound normal * process). Fix me! XXX * * This problem is not only related to idleprio process. * A user level program can obtain a file lock and hold it * indefinitely. Additionally, without idleprio processes it is * still conceivable that a program with low priority will never * get to run. In short, allowing this feature might make it * easier to lock a resource indefinitely, but it is not the * only thing that makes it possible. */ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME || (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE && unprivileged_idprio == 0)) { error = priv_check(td, PRIV_SCHED_RTPRIO); if (error) break; } error = rtp_to_pri(&rtp, td1); break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } /* * Set realtime priority. */ #ifndef _SYS_SYSPROTO_H_ struct rtprio_args { int function; pid_t pid; struct rtprio *rtp; }; #endif int sys_rtprio(struct thread *td, register struct rtprio_args *uap) { struct proc *p; struct thread *tdp; struct rtprio rtp; int cierror, error; /* Perform copyin before acquiring locks if needed. */ if (uap->function == RTP_SET) cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); else cierror = 0; if (uap->pid == 0) { p = td->td_proc; PROC_LOCK(p); } else { p = pfind(uap->pid); if (p == NULL) return (ESRCH); } switch (uap->function) { case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; /* * Return OUR priority if no pid specified, * or if one is, report the highest priority * in the process. There isn't much more you can do as * there is only room to return a single priority. * Note: specifying our own pid is not the same * as leaving it zero. */ if (uap->pid == 0) { pri_to_rtp(td, &rtp); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; FOREACH_THREAD_IN_PROC(p, tdp) { pri_to_rtp(tdp, &rtp2); if (rtp2.type < rtp.type || (rtp2.type == rtp.type && rtp2.prio < rtp.prio)) { rtp.type = rtp2.type; rtp.prio = rtp2.prio; } } } PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: if ((error = p_cansched(td, p)) || (error = cierror)) break; /* * Disallow setting rtprio in most cases if not superuser. * See the comment in sys_rtprio_thread about idprio * threads holding a lock. */ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME || (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE && !unprivileged_idprio)) { error = priv_check(td, PRIV_SCHED_RTPRIO); if (error) break; } /* * If we are setting our own priority, set just our * thread but if we are doing another process, * do all the threads on that process. If we * specify our own pid we do the latter. */ if (uap->pid == 0) { error = rtp_to_pri(&rtp, td); } else { FOREACH_THREAD_IN_PROC(p, td) { if ((error = rtp_to_pri(&rtp, td)) != 0) break; } } break; default: error = EINVAL; break; } PROC_UNLOCK(p); return (error); } int rtp_to_pri(struct rtprio *rtp, struct thread *td) { u_char newpri, oldclass, oldpri; switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); newpri = PRI_MIN_REALTIME + rtp->prio; break; case RTP_PRIO_NORMAL: if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) return (EINVAL); newpri = PRI_MIN_TIMESHARE + rtp->prio; break; case RTP_PRIO_IDLE: if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); newpri = PRI_MIN_IDLE + rtp->prio; break; default: return (EINVAL); } thread_lock(td); oldclass = td->td_pri_class; sched_class(td, rtp->type); /* XXX fix */ oldpri = td->td_user_pri; sched_user_prio(td, newpri); if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL || td->td_pri_class != RTP_PRIO_NORMAL)) sched_prio(td, td->td_user_pri); if (TD_ON_UPILOCK(td) && oldpri != newpri) { critical_enter(); thread_unlock(td); umtx_pi_adjust(td, oldpri); critical_exit(); } else thread_unlock(td); return (0); } void pri_to_rtp(struct thread *td, struct rtprio *rtp) { thread_lock(td); switch (PRI_BASE(td->td_pri_class)) { case PRI_REALTIME: rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; break; case PRI_TIMESHARE: rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE; break; case PRI_IDLE: rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE; break; default: break; } rtp->type = td->td_pri_class; thread_unlock(td); } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int osetrlimit(struct thread *td, register struct osetrlimit_args *uap) { struct orlimit olim; struct rlimit lim; int error; if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit)))) return (error); lim.rlim_cur = olim.rlim_cur; lim.rlim_max = olim.rlim_max; error = kern_setrlimit(td, uap->which, &lim); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ogetrlimit_args { u_int which; struct orlimit *rlp; }; #endif int ogetrlimit(struct thread *td, register struct ogetrlimit_args *uap) { struct orlimit olim; struct rlimit rl; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); lim_rlimit(td, uap->which, &rl); /* * XXX would be more correct to convert only RLIM_INFINITY to the * old RLIM_INFINITY and fail with EOVERFLOW for other larger * values. Most 64->32 and 32->16 conversions, including not * unimportant ones of uids are even more broken than what we * do here (they blindly truncate). We don't do this correctly * here since we have little experience with EOVERFLOW yet. * Elsewhere, getuid() can't fail... */ olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur; olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max; error = copyout(&olim, uap->rlp, sizeof(olim)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct __setrlimit_args { u_int which; struct rlimit *rlp; }; #endif int sys_setrlimit(struct thread *td, register struct __setrlimit_args *uap) { struct rlimit alim; int error; if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit)))) return (error); error = kern_setrlimit(td, uap->which, &alim); return (error); } static void lim_cb(void *arg) { struct rlimit rlim; struct thread *td; struct proc *p; p = arg; PROC_LOCK_ASSERT(p, MA_OWNED); /* * Check if the process exceeds its cpu resource allocation. If * it reaches the max, arrange to kill the process in ast(). */ if (p->p_cpulimit == RLIM_INFINITY) return; PROC_STATLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); } PROC_STATUNLOCK(p); if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { lim_rlimit_proc(p, RLIMIT_CPU, &rlim); if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { killproc(p, "exceeded maximum CPU limit"); } else { if (p->p_cpulimit < rlim.rlim_max) p->p_cpulimit += 5; kern_psignal(p, SIGXCPU); } } if ((p->p_flag & P_WEXIT) == 0) callout_reset_sbt(&p->p_limco, SBT_1S, 0, lim_cb, p, C_PREL(1)); } int kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp) { return (kern_proc_setrlimit(td, td->td_proc, which, limp)); } int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp) { struct plimit *newlim, *oldlim; register struct rlimit *alimp; struct rlimit oldssiz; int error; if (which >= RLIM_NLIMITS) return (EINVAL); /* * Preserve historical bugs by treating negative limits as unsigned. */ if (limp->rlim_cur < 0) limp->rlim_cur = RLIM_INFINITY; if (limp->rlim_max < 0) limp->rlim_max = RLIM_INFINITY; oldssiz.rlim_cur = 0; newlim = lim_alloc(); PROC_LOCK(p); oldlim = p->p_limit; alimp = &oldlim->pl_rlimit[which]; if (limp->rlim_cur > alimp->rlim_max || limp->rlim_max > alimp->rlim_max) if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) { PROC_UNLOCK(p); lim_free(newlim); return (error); } if (limp->rlim_cur > limp->rlim_max) limp->rlim_cur = limp->rlim_max; lim_copy(newlim, oldlim); alimp = &newlim->pl_rlimit[which]; switch (which) { case RLIMIT_CPU: if (limp->rlim_cur != RLIM_INFINITY && p->p_cpulimit == RLIM_INFINITY) callout_reset_sbt(&p->p_limco, SBT_1S, 0, lim_cb, p, C_PREL(1)); p->p_cpulimit = limp->rlim_cur; break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) limp->rlim_cur = maxdsiz; if (limp->rlim_max > maxdsiz) limp->rlim_max = maxdsiz; break; case RLIMIT_STACK: if (limp->rlim_cur > maxssiz) limp->rlim_cur = maxssiz; if (limp->rlim_max > maxssiz) limp->rlim_max = maxssiz; oldssiz = *alimp; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(&oldssiz, RLIMIT_STACK); break; case RLIMIT_NOFILE: if (limp->rlim_cur > maxfilesperproc) limp->rlim_cur = maxfilesperproc; if (limp->rlim_max > maxfilesperproc) limp->rlim_max = maxfilesperproc; break; case RLIMIT_NPROC: if (limp->rlim_cur > maxprocperuid) limp->rlim_cur = maxprocperuid; if (limp->rlim_max > maxprocperuid) limp->rlim_max = maxprocperuid; if (limp->rlim_cur < 1) limp->rlim_cur = 1; if (limp->rlim_max < 1) limp->rlim_max = 1; break; } if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(limp, which); *alimp = *limp; p->p_limit = newlim; PROC_UPDATE_COW(p); PROC_UNLOCK(p); lim_free(oldlim); if (which == RLIMIT_STACK && /* * Skip calls from exec_new_vmspace(), done when stack is * not mapped yet. */ (td != curthread || (p->p_flag & P_INEXEC) == 0)) { /* * Stack is allocated to the max at exec time with only * "rlim_cur" bytes accessible. If stack limit is going * up make more accessible, if going down make inaccessible. */ if (limp->rlim_cur != oldssiz.rlim_cur) { vm_offset_t addr; vm_size_t size; vm_prot_t prot; if (limp->rlim_cur > oldssiz.rlim_cur) { prot = p->p_sysent->sv_stackprot; size = limp->rlim_cur - oldssiz.rlim_cur; addr = p->p_sysent->sv_usrstack - limp->rlim_cur; } else { prot = VM_PROT_NONE; size = oldssiz.rlim_cur - limp->rlim_cur; addr = p->p_sysent->sv_usrstack - oldssiz.rlim_cur; } addr = trunc_page(addr); size = round_page(size); (void)vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, FALSE); } } return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getrlimit_args { u_int which; struct rlimit *rlp; }; #endif /* ARGSUSED */ int sys_getrlimit(struct thread *td, register struct __getrlimit_args *uap) { struct rlimit rlim; int error; if (uap->which >= RLIM_NLIMITS) return (EINVAL); lim_rlimit(td, uap->which, &rlim); error = copyout(&rlim, uap->rlp, sizeof(struct rlimit)); return (error); } /* * Transform the running time and tick information for children of proc p * into user and system time usage. */ void calccru(struct proc *p, struct timeval *up, struct timeval *sp) { PROC_LOCK_ASSERT(p, MA_OWNED); calcru1(p, &p->p_crux, up, sp); } /* * Transform the running time and tick information in proc p into user * and system time usage. If appropriate, include the current time slice * on this CPU. */ void calcru(struct proc *p, struct timeval *up, struct timeval *sp) { struct thread *td; uint64_t runtime, u; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_STATLOCK_ASSERT(p, MA_OWNED); /* * If we are getting stats for the current process, then add in the * stats that this thread has accumulated in its current time slice. * We reset the thread and CPU state as if we had performed a context * switch right here. */ td = curthread; if (td->td_proc == p) { u = cpu_ticks(); runtime = u - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, u); } /* Make sure the per-thread stats are current. */ FOREACH_THREAD_IN_PROC(p, td) { if (td->td_incruntime == 0) continue; ruxagg(p, td); } calcru1(p, &p->p_rux, up, sp); } /* Collect resource usage for a single thread. */ void rufetchtd(struct thread *td, struct rusage *ru) { struct proc *p; uint64_t runtime, u; p = td->td_proc; PROC_STATLOCK_ASSERT(p, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * If we are getting stats for the current thread, then add in the * stats that this thread has accumulated in its current time slice. * We reset the thread and CPU state as if we had performed a context * switch right here. */ if (td == curthread) { u = cpu_ticks(); runtime = u - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, u); } ruxagg(p, td); *ru = td->td_ru; calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime); } static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp) { /* {user, system, interrupt, total} {ticks, usec}: */ uint64_t ut, uu, st, su, it, tt, tu; ut = ruxp->rux_uticks; st = ruxp->rux_sticks; it = ruxp->rux_iticks; tt = ut + st + it; if (tt == 0) { /* Avoid divide by zero */ st = 1; tt = 1; } tu = cputick2usec(ruxp->rux_runtime); if ((int64_t)tu < 0) { /* XXX: this should be an assert /phk */ printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", (intmax_t)tu, p->p_pid, p->p_comm); tu = ruxp->rux_tu; } if (tu >= ruxp->rux_tu) { /* * The normal case, time increased. * Enforce monotonicity of bucketed numbers. */ uu = (tu * ut) / tt; if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; su = (tu * st) / tt; if (su < ruxp->rux_su) su = ruxp->rux_su; } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { /* * When we calibrate the cputicker, it is not uncommon to * see the presumably fixed frequency increase slightly over * time as a result of thermal stabilization and NTP * discipline (of the reference clock). We therefore ignore * a bit of backwards slop because we expect to catch up * shortly. We use a 3 microsecond limit to catch low * counts and a 1% limit for high counts. */ uu = ruxp->rux_uu; su = ruxp->rux_su; tu = ruxp->rux_tu; } else { /* tu < ruxp->rux_tu */ /* * What happened here was likely that a laptop, which ran at * a reduced clock frequency at boot, kicked into high gear. * The wisdom of spamming this message in that case is * dubious, but it might also be indicative of something * serious, so lets keep it and hope laptops can be made * more truthful about their CPU speed via ACPI. */ printf("calcru: runtime went backwards from %ju usec " "to %ju usec for pid %d (%s)\n", (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, p->p_pid, p->p_comm); uu = (tu * ut) / tt; su = (tu * st) / tt; } ruxp->rux_uu = uu; ruxp->rux_su = su; ruxp->rux_tu = tu; up->tv_sec = uu / 1000000; up->tv_usec = uu % 1000000; sp->tv_sec = su / 1000000; sp->tv_usec = su % 1000000; } #ifndef _SYS_SYSPROTO_H_ struct getrusage_args { int who; struct rusage *rusage; }; #endif int sys_getrusage(register struct thread *td, register struct getrusage_args *uap) { struct rusage ru; int error; error = kern_getrusage(td, uap->who, &ru); if (error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int kern_getrusage(struct thread *td, int who, struct rusage *rup) { struct proc *p; int error; error = 0; p = td->td_proc; PROC_LOCK(p); switch (who) { case RUSAGE_SELF: rufetchcalc(p, rup, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_CHILDREN: *rup = p->p_stats->p_cru; calccru(p, &rup->ru_utime, &rup->ru_stime); break; case RUSAGE_THREAD: PROC_STATLOCK(p); thread_lock(td); rufetchtd(td, rup); thread_unlock(td); PROC_STATUNLOCK(p); break; default: error = EINVAL; } PROC_UNLOCK(p); return (error); } void rucollect(struct rusage *ru, struct rusage *ru2) { long *ip, *ip2; int i; if (ru->ru_maxrss < ru2->ru_maxrss) ru->ru_maxrss = ru2->ru_maxrss; ip = &ru->ru_first; ip2 = &ru2->ru_first; for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) *ip++ += *ip2++; } void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, struct rusage_ext *rux2) { rux->rux_runtime += rux2->rux_runtime; rux->rux_uticks += rux2->rux_uticks; rux->rux_sticks += rux2->rux_sticks; rux->rux_iticks += rux2->rux_iticks; rux->rux_uu += rux2->rux_uu; rux->rux_su += rux2->rux_su; rux->rux_tu += rux2->rux_tu; rucollect(ru, ru2); } /* * Aggregate tick counts into the proc's rusage_ext. */ static void ruxagg_locked(struct rusage_ext *rux, struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED); rux->rux_runtime += td->td_incruntime; rux->rux_uticks += td->td_uticks; rux->rux_sticks += td->td_sticks; rux->rux_iticks += td->td_iticks; } void ruxagg(struct proc *p, struct thread *td) { thread_lock(td); ruxagg_locked(&p->p_rux, td); ruxagg_locked(&td->td_rux, td); td->td_incruntime = 0; td->td_uticks = 0; td->td_iticks = 0; td->td_sticks = 0; thread_unlock(td); } /* * Update the rusage_ext structure and fetch a valid aggregate rusage * for proc p if storage for one is supplied. */ void rufetch(struct proc *p, struct rusage *ru) { struct thread *td; PROC_STATLOCK_ASSERT(p, MA_OWNED); *ru = p->p_ru; if (p->p_numthreads > 0) { FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); rucollect(ru, &td->td_ru); } } } /* * Atomically perform a rufetch and a calcru together. * Consumers, can safely assume the calcru is executed only once * rufetch is completed. */ void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, struct timeval *sp) { PROC_STATLOCK(p); rufetch(p, ru); calcru(p, up, sp); PROC_STATUNLOCK(p); } /* * Allocate a new resource limits structure and initialize its * reference count and mutex pointer. */ struct plimit * lim_alloc() { struct plimit *limp; limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK); refcount_init(&limp->pl_refcnt, 1); return (limp); } struct plimit * lim_hold(struct plimit *limp) { refcount_acquire(&limp->pl_refcnt); return (limp); } static __inline int lim_shared(struct plimit *limp) { return (limp->pl_refcnt > 1); } void lim_fork(struct proc *p1, struct proc *p2) { PROC_LOCK_ASSERT(p1, MA_OWNED); PROC_LOCK_ASSERT(p2, MA_OWNED); p2->p_limit = lim_hold(p1->p_limit); callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); if (p1->p_cpulimit != RLIM_INFINITY) callout_reset_sbt(&p2->p_limco, SBT_1S, 0, lim_cb, p2, C_PREL(1)); } void lim_free(struct plimit *limp) { if (refcount_release(&limp->pl_refcnt)) free((void *)limp, M_PLIMIT); } /* * Make a copy of the plimit structure. * We share these structures copy-on-write after fork. */ void lim_copy(struct plimit *dst, struct plimit *src) { KASSERT(!lim_shared(dst), ("lim_copy to shared limit")); bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit)); } /* * Return the hard limit for a particular system resource. The * which parameter specifies the index into the rlimit array. */ rlim_t lim_max(struct thread *td, int which) { struct rlimit rl; lim_rlimit(td, which, &rl); return (rl.rlim_max); } rlim_t lim_max_proc(struct proc *p, int which) { struct rlimit rl; lim_rlimit_proc(p, which, &rl); return (rl.rlim_max); } /* * Return the current (soft) limit for a particular system resource. * The which parameter which specifies the index into the rlimit array */ rlim_t lim_cur(struct thread *td, int which) { struct rlimit rl; lim_rlimit(td, which, &rl); return (rl.rlim_cur); } rlim_t lim_cur_proc(struct proc *p, int which) { struct rlimit rl; lim_rlimit_proc(p, which, &rl); return (rl.rlim_cur); } /* * Return a copy of the entire rlimit structure for the system limit * specified by 'which' in the rlimit structure pointed to by 'rlp'. */ void lim_rlimit(struct thread *td, int which, struct rlimit *rlp) { struct proc *p = td->td_proc; MPASS(td == curthread); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = td->td_limit->pl_rlimit[which]; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(rlp, which); } void lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp) { PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = p->p_limit->pl_rlimit[which]; if (p->p_sysent->sv_fixlimit != NULL) p->p_sysent->sv_fixlimit(rlp, which); } void uihashinit() { uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); rw_init(&uihashtbl_lock, "uidinfo hash"); } /* * Look up a uidinfo struct for the parameter uid. * uihashtbl_lock must be locked. * Increase refcount on uidinfo struct returned. */ static struct uidinfo * uilookup(uid_t uid) { struct uihashhead *uipp; struct uidinfo *uip; rw_assert(&uihashtbl_lock, RA_LOCKED); uipp = UIHASH(uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) { uihold(uip); break; } return (uip); } /* * Find or allocate a struct uidinfo for a particular uid. * Returns with uidinfo struct referenced. * uifree() should be called on a struct uidinfo when released. */ struct uidinfo * uifind(uid_t uid) { struct uidinfo *new_uip, *uip; rw_rlock(&uihashtbl_lock); uip = uilookup(uid); rw_runlock(&uihashtbl_lock); if (uip != NULL) return (uip); new_uip = malloc(sizeof(*new_uip), M_UIDINFO, M_WAITOK | M_ZERO); racct_create(&new_uip->ui_racct); refcount_init(&new_uip->ui_ref, 1); new_uip->ui_uid = uid; mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF); rw_wlock(&uihashtbl_lock); /* * There's a chance someone created our uidinfo while we * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate uidinfo. */ if ((uip = uilookup(uid)) == NULL) { LIST_INSERT_HEAD(UIHASH(uid), new_uip, ui_hash); rw_wunlock(&uihashtbl_lock); uip = new_uip; } else { rw_wunlock(&uihashtbl_lock); racct_destroy(&new_uip->ui_racct); mtx_destroy(&new_uip->ui_vmsize_mtx); free(new_uip, M_UIDINFO); } return (uip); } /* * Place another refcount on a uidinfo struct. */ void uihold(struct uidinfo *uip) { refcount_acquire(&uip->ui_ref); } /*- * Since uidinfo structs have a long lifetime, we use an * opportunistic refcounting scheme to avoid locking the lookup hash * for each release. * * If the refcount hits 0, we need to free the structure, * which means we need to lock the hash. * Optimal case: * After locking the struct and lowering the refcount, if we find * that we don't need to free, simply unlock and return. * Suboptimal case: * If refcount lowering results in need to free, bump the count * back up, lose the lock and acquire the locks in the proper * order to try again. */ void uifree(struct uidinfo *uip) { int old; /* Prepare for optimal case. */ old = uip->ui_ref; if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1)) return; /* Prepare for suboptimal case. */ rw_wlock(&uihashtbl_lock); if (refcount_release(&uip->ui_ref) == 0) { rw_wunlock(&uihashtbl_lock); return; } racct_destroy(&uip->ui_racct); LIST_REMOVE(uip, ui_hash); rw_wunlock(&uihashtbl_lock); if (uip->ui_sbsize != 0) printf("freeing uidinfo: uid = %d, sbsize = %ld\n", uip->ui_uid, uip->ui_sbsize); if (uip->ui_proccnt != 0) printf("freeing uidinfo: uid = %d, proccnt = %ld\n", uip->ui_uid, uip->ui_proccnt); if (uip->ui_vmsize != 0) printf("freeing uidinfo: uid = %d, swapuse = %lld\n", uip->ui_uid, (unsigned long long)uip->ui_vmsize); mtx_destroy(&uip->ui_vmsize_mtx); free(uip, M_UIDINFO); } #ifdef RACCT void ui_racct_foreach(void (*callback)(struct racct *racct, - void *arg2, void *arg3), void *arg2, void *arg3) + void *arg2, void *arg3), void (*pre)(void), void (*post)(void), + void *arg2, void *arg3) { struct uidinfo *uip; struct uihashhead *uih; rw_rlock(&uihashtbl_lock); + if (pre != NULL) + (pre)(); for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) { LIST_FOREACH(uip, uih, ui_hash) { (callback)(uip->ui_racct, arg2, arg3); } } + if (post != NULL) + (post)(); rw_runlock(&uihashtbl_lock); } #endif static inline int chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name) { /* Don't allow them to exceed max, but allow subtraction. */ if (diff > 0 && max != 0) { if (atomic_fetchadd_long(limit, (long)diff) + diff > max) { atomic_subtract_long(limit, (long)diff); return (0); } } else { atomic_add_long(limit, (long)diff); if (*limit < 0) printf("negative %s for uid = %d\n", name, uip->ui_uid); } return (1); } /* * Change the count associated with number of processes * a given user is using. When 'max' is 0, don't enforce a limit */ int chgproccnt(struct uidinfo *uip, int diff, rlim_t max) { return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt")); } /* * Change the total socket buffer size a user has used. */ int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max) { int diff, rv; diff = to - *hiwat; if (diff > 0 && max == 0) { rv = 0; } else { rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize"); if (rv != 0) *hiwat = to; } return (rv); } /* * Change the count associated with number of pseudo-terminals * a given user is using. When 'max' is 0, don't enforce a limit */ int chgptscnt(struct uidinfo *uip, int diff, rlim_t max) { return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt")); } int chgkqcnt(struct uidinfo *uip, int diff, rlim_t max) { return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt")); } Index: projects/powernv/mips/atheros/apb.c =================================================================== --- projects/powernv/mips/atheros/apb.c (revision 290990) +++ projects/powernv/mips/atheros/apb.c (revision 290991) @@ -1,541 +1,543 @@ /*- * Copyright (c) 2009, Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define APB_INTR_PMC 5 #undef APB_DEBUG #ifdef APB_DEBUG #define dprintf printf #else #define dprintf(x, arg...) #endif /* APB_DEBUG */ #define DEVTOAPB(dev) ((struct apb_ivar *) device_get_ivars(dev)) static int apb_activate_resource(device_t, device_t, int, int, struct resource *); static device_t apb_add_child(device_t, u_int, const char *, int); static struct resource * apb_alloc_resource(device_t, device_t, int, int *, u_long, u_long, u_long, u_int); static int apb_attach(device_t); static int apb_deactivate_resource(device_t, device_t, int, int, struct resource *); static struct resource_list * apb_get_resource_list(device_t, device_t); static void apb_hinted_child(device_t, const char *, int); static int apb_filter(void *); static int apb_probe(device_t); static int apb_release_resource(device_t, device_t, int, int, struct resource *); static int apb_setup_intr(device_t, device_t, struct resource *, int, driver_filter_t *, driver_intr_t *, void *, void **); static int apb_teardown_intr(device_t, device_t, struct resource *, void *); static void apb_mask_irq(void *source) { unsigned int irq = (unsigned int)source; uint32_t reg; reg = ATH_READ_REG(AR71XX_MISC_INTR_MASK); ATH_WRITE_REG(AR71XX_MISC_INTR_MASK, reg & ~(1 << irq)); } static void apb_unmask_irq(void *source) { uint32_t reg; unsigned int irq = (unsigned int)source; reg = ATH_READ_REG(AR71XX_MISC_INTR_MASK); ATH_WRITE_REG(AR71XX_MISC_INTR_MASK, reg | (1 << irq)); } static int apb_probe(device_t dev) { return (BUS_PROBE_NOWILDCARD); } static int apb_attach(device_t dev) { struct apb_softc *sc = device_get_softc(dev); int rid = 0; device_set_desc(dev, "APB Bus bridge"); sc->apb_mem_rman.rm_type = RMAN_ARRAY; sc->apb_mem_rman.rm_descr = "APB memory window"; if (rman_init(&sc->apb_mem_rman) != 0 || rman_manage_region(&sc->apb_mem_rman, AR71XX_APB_BASE, AR71XX_APB_BASE + AR71XX_APB_SIZE - 1) != 0) panic("apb_attach: failed to set up memory rman"); sc->apb_irq_rman.rm_type = RMAN_ARRAY; sc->apb_irq_rman.rm_descr = "APB IRQ"; if (rman_init(&sc->apb_irq_rman) != 0 || rman_manage_region(&sc->apb_irq_rman, APB_IRQ_BASE, APB_IRQ_END) != 0) panic("apb_attach: failed to set up IRQ rman"); if ((sc->sc_misc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "unable to allocate IRQ resource\n"); return (ENXIO); } if ((bus_setup_intr(dev, sc->sc_misc_irq, INTR_TYPE_MISC, apb_filter, NULL, sc, &sc->sc_misc_ih))) { device_printf(dev, "WARNING: unable to register interrupt handler\n"); return (ENXIO); } bus_generic_probe(dev); bus_enumerate_hinted_children(dev); bus_generic_attach(dev); /* * Unmask performance counter IRQ */ apb_unmask_irq((void*)APB_INTR_PMC); sc->sc_intr_counter[APB_INTR_PMC] = mips_intrcnt_create("apb irq5: pmc"); return (0); } static struct resource * apb_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct apb_softc *sc = device_get_softc(bus); struct apb_ivar *ivar = device_get_ivars(child); struct resource *rv; struct resource_list_entry *rle; struct rman *rm; int isdefault, needactivate, passthrough; isdefault = (start == 0UL && end == ~0UL); needactivate = flags & RF_ACTIVE; /* * Pass memory requests to nexus device */ passthrough = (device_get_parent(child) != bus); rle = NULL; dprintf("%s: entry (%p, %p, %d, %d, %p, %p, %ld, %d)\n", __func__, bus, child, type, *rid, (void *)(intptr_t)start, (void *)(intptr_t)end, count, flags); if (passthrough) return (BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type, rid, start, end, count, flags)); /* * If this is an allocation of the "default" range for a given RID, * and we know what the resources for this device are (ie. they aren't * maintained by a child bus), then work out the start/end values. */ if (isdefault) { rle = resource_list_find(&ivar->resources, type, *rid); if (rle == NULL) { return (NULL); } if (rle->res != NULL) { panic("%s: resource entry is busy", __func__); } start = rle->start; end = rle->end; count = rle->count; dprintf("%s: default resource (%p, %p, %ld)\n", __func__, (void *)(intptr_t)start, (void *)(intptr_t)end, count); } switch (type) { case SYS_RES_IRQ: rm = &sc->apb_irq_rman; break; case SYS_RES_MEMORY: rm = &sc->apb_mem_rman; break; default: printf("%s: unknown resource type %d\n", __func__, type); return (0); } rv = rman_reserve_resource(rm, start, end, count, flags, child); if (rv == 0) { printf("%s: could not reserve resource\n", __func__); return (0); } rman_set_rid(rv, *rid); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { printf("%s: could not activate resource\n", __func__); rman_release_resource(rv); return (0); } } return (rv); } static int apb_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { /* XXX: should we mask/unmask IRQ here? */ return (BUS_ACTIVATE_RESOURCE(device_get_parent(bus), child, type, rid, r)); } static int apb_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { /* XXX: should we mask/unmask IRQ here? */ return (BUS_DEACTIVATE_RESOURCE(device_get_parent(bus), child, type, rid, r)); } static int apb_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { struct resource_list *rl; struct resource_list_entry *rle; rl = apb_get_resource_list(dev, child); if (rl == NULL) return (EINVAL); rle = resource_list_find(rl, type, rid); if (rle == NULL) return (EINVAL); rman_release_resource(r); rle->res = NULL; return (0); } static int apb_setup_intr(device_t bus, device_t child, struct resource *ires, int flags, driver_filter_t *filt, driver_intr_t *handler, void *arg, void **cookiep) { struct apb_softc *sc = device_get_softc(bus); struct intr_event *event; int irq, error; irq = rman_get_start(ires); if (irq > APB_IRQ_END) panic("%s: bad irq %d", __func__, irq); event = sc->sc_eventstab[irq]; if (event == NULL) { error = intr_event_create(&event, (void *)irq, 0, irq, apb_mask_irq, apb_unmask_irq, NULL, NULL, "apb intr%d:", irq); if (error == 0) { sc->sc_eventstab[irq] = event; sc->sc_intr_counter[irq] = mips_intrcnt_create(event->ie_name); } else return (error); } intr_event_add_handler(event, device_get_nameunit(child), filt, handler, arg, intr_priority(flags), flags, cookiep); mips_intrcnt_setname(sc->sc_intr_counter[irq], event->ie_fullname); apb_unmask_irq((void*)irq); return (0); } static int apb_teardown_intr(device_t dev, device_t child, struct resource *ires, void *cookie) { struct apb_softc *sc = device_get_softc(dev); int irq, result; irq = rman_get_start(ires); if (irq > APB_IRQ_END) panic("%s: bad irq %d", __func__, irq); if (sc->sc_eventstab[irq] == NULL) panic("Trying to teardown unoccupied IRQ"); apb_mask_irq((void*)irq); result = intr_event_remove_handler(cookie); if (!result) sc->sc_eventstab[irq] = NULL; return (result); } static int apb_filter(void *arg) { struct apb_softc *sc = arg; struct intr_event *event; uint32_t reg, irq; struct thread *td; struct trapframe *tf; reg = ATH_READ_REG(AR71XX_MISC_INTR_STATUS); for (irq = 0; irq < APB_NIRQS; irq++) { if (reg & (1 << irq)) { switch (ar71xx_soc) { case AR71XX_SOC_AR7240: case AR71XX_SOC_AR7241: case AR71XX_SOC_AR7242: case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: /* ACK/clear the given interrupt */ ATH_WRITE_REG(AR71XX_MISC_INTR_STATUS, (1 << irq)); break; default: /* fallthrough */ break; } event = sc->sc_eventstab[irq]; if (!event || TAILQ_EMPTY(&event->ie_handlers)) { if (irq == APB_INTR_PMC) { td = PCPU_GET(curthread); tf = td->td_intr_frame; if (pmc_intr) (*pmc_intr)(PCPU_GET(cpuid), tf); mips_intrcnt_inc(sc->sc_intr_counter[irq]); continue; } /* Ignore timer interrupts */ if (irq != 0 && irq != 8 && irq != 9 && irq != 10) printf("Stray APB IRQ %d\n", irq); continue; } intr_event_handle(event, PCPU_GET(curthread)->td_intr_frame); mips_intrcnt_inc(sc->sc_intr_counter[irq]); } } return (FILTER_HANDLED); } static void apb_hinted_child(device_t bus, const char *dname, int dunit) { device_t child; long maddr; int msize; int irq; int result; int mem_hints_count; child = BUS_ADD_CHILD(bus, 0, dname, dunit); /* * Set hard-wired resources for hinted child using * specific RIDs. */ mem_hints_count = 0; if (resource_long_value(dname, dunit, "maddr", &maddr) == 0) mem_hints_count++; if (resource_int_value(dname, dunit, "msize", &msize) == 0) mem_hints_count++; /* check if all info for mem resource has been provided */ if ((mem_hints_count > 0) && (mem_hints_count < 2)) { printf("Either maddr or msize hint is missing for %s%d\n", dname, dunit); } else if (mem_hints_count) { result = bus_set_resource(child, SYS_RES_MEMORY, 0, maddr, msize); if (result != 0) device_printf(bus, "warning: bus_set_resource() failed\n"); } if (resource_int_value(dname, dunit, "irq", &irq) == 0) { result = bus_set_resource(child, SYS_RES_IRQ, 0, irq, 1); if (result != 0) device_printf(bus, "warning: bus_set_resource() failed\n"); } } static device_t apb_add_child(device_t bus, u_int order, const char *name, int unit) { device_t child; struct apb_ivar *ivar; ivar = malloc(sizeof(struct apb_ivar), M_DEVBUF, M_WAITOK | M_ZERO); if (ivar == NULL) { printf("Failed to allocate ivar\n"); return (0); } resource_list_init(&ivar->resources); child = device_add_child_ordered(bus, order, name, unit); if (child == NULL) { printf("Can't add child %s%d ordered\n", name, unit); return (0); } device_set_ivars(child, ivar); return (child); } /* * Helper routine for bus_generic_rl_get_resource/bus_generic_rl_set_resource * Provides pointer to resource_list for these routines */ static struct resource_list * apb_get_resource_list(device_t dev, device_t child) { struct apb_ivar *ivar; ivar = device_get_ivars(child); return (&(ivar->resources)); } static int apb_print_all_resources(device_t dev) { struct apb_ivar *ndev = DEVTOAPB(dev); struct resource_list *rl = &ndev->resources; int retval = 0; if (STAILQ_FIRST(rl)) retval += printf(" at"); retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx"); retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld"); return (retval); } static int apb_print_child(device_t bus, device_t child) { int retval = 0; retval += bus_print_child_header(bus, child); retval += apb_print_all_resources(child); if (device_get_flags(child)) retval += printf(" flags %#x", device_get_flags(child)); retval += printf(" on %s\n", device_get_nameunit(bus)); return (retval); } static device_method_t apb_methods[] = { DEVMETHOD(bus_activate_resource, apb_activate_resource), DEVMETHOD(bus_add_child, apb_add_child), DEVMETHOD(bus_alloc_resource, apb_alloc_resource), DEVMETHOD(bus_deactivate_resource, apb_deactivate_resource), DEVMETHOD(bus_get_resource_list, apb_get_resource_list), DEVMETHOD(bus_hinted_child, apb_hinted_child), DEVMETHOD(bus_release_resource, apb_release_resource), DEVMETHOD(bus_setup_intr, apb_setup_intr), DEVMETHOD(bus_teardown_intr, apb_teardown_intr), DEVMETHOD(device_attach, apb_attach), DEVMETHOD(device_probe, apb_probe), DEVMETHOD(bus_get_resource, bus_generic_rl_get_resource), DEVMETHOD(bus_set_resource, bus_generic_rl_set_resource), DEVMETHOD(bus_print_child, apb_print_child), DEVMETHOD_END }; static driver_t apb_driver = { "apb", apb_methods, sizeof(struct apb_softc), }; static devclass_t apb_devclass; DRIVER_MODULE(apb, nexus, apb_driver, apb_devclass, 0, 0); Index: projects/powernv/mips/atheros/ar71xx_ehci.c =================================================================== --- projects/powernv/mips/atheros/ar71xx_ehci.c (revision 290990) +++ projects/powernv/mips/atheros/ar71xx_ehci.c (revision 290991) @@ -1,276 +1,278 @@ /*- * Copyright (c) 2008 Sam Leffler. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * AR71XX attachment driver for the USB Enhanced Host Controller. */ #include __FBSDID("$FreeBSD$"); #include "opt_bus.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for stuff in ar71xx_cpudef.h */ #include #include #define EHCI_HC_DEVSTR "AR71XX Integrated USB 2.0 controller" struct ar71xx_ehci_softc { ehci_softc_t base; /* storage for EHCI code */ }; static device_attach_t ar71xx_ehci_attach; static device_detach_t ar71xx_ehci_detach; bs_r_1_proto(reversed); bs_w_1_proto(reversed); static int ar71xx_ehci_probe(device_t self) { device_set_desc(self, EHCI_HC_DEVSTR); return (BUS_PROBE_NOWILDCARD); } static void ar71xx_ehci_intr(void *arg) { /* XXX TODO: should really see if this was our interrupt.. */ ar71xx_device_flush_ddr(AR71XX_CPU_DDR_FLUSH_USB); ehci_interrupt(arg); } static int ar71xx_ehci_attach(device_t self) { struct ar71xx_ehci_softc *isc = device_get_softc(self); ehci_softc_t *sc = &isc->base; int err; int rid; /* initialise some bus fields */ sc->sc_bus.parent = self; sc->sc_bus.devices = sc->sc_devices; sc->sc_bus.devices_max = EHCI_MAX_DEVICES; sc->sc_bus.dma_bits = 32; /* get all DMA memory */ if (usb_bus_mem_alloc_all(&sc->sc_bus, USB_GET_DMA_TAG(self), &ehci_iterate_hw_softc)) { return (ENOMEM); } sc->sc_bus.usbrev = USB_REV_2_0; /* NB: hints fix the memory location and irq */ rid = 0; sc->sc_io_res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->sc_io_res) { device_printf(self, "Could not map memory\n"); goto error; } /* * Craft special resource for bus space ops that handle * byte-alignment of non-word addresses. */ sc->sc_io_tag = ar71xx_bus_space_reversed; sc->sc_io_hdl = rman_get_bushandle(sc->sc_io_res); sc->sc_io_size = rman_get_size(sc->sc_io_res); rid = 0; sc->sc_irq_res = bus_alloc_resource_any(self, SYS_RES_IRQ, &rid, RF_ACTIVE | RF_SHAREABLE); if (sc->sc_irq_res == NULL) { device_printf(self, "Could not allocate irq\n"); goto error; } sc->sc_bus.bdev = device_add_child(self, "usbus", -1); if (!sc->sc_bus.bdev) { device_printf(self, "Could not add USB device\n"); goto error; } device_set_ivars(sc->sc_bus.bdev, &sc->sc_bus); device_set_desc(sc->sc_bus.bdev, EHCI_HC_DEVSTR); sprintf(sc->sc_vendor, "Atheros"); err = bus_setup_intr(self, sc->sc_irq_res, INTR_TYPE_BIO | INTR_MPSAFE, NULL, ar71xx_ehci_intr, sc, &sc->sc_intr_hdl); if (err) { device_printf(self, "Could not setup irq, %d\n", err); sc->sc_intr_hdl = NULL; goto error; } /* * Arrange to force Host mode, select big-endian byte alignment, * and arrange to not terminate reset operations (the adapter * will ignore it if we do but might as well save a reg write). * Also, the controller has an embedded Transaction Translator * which means port speed must be read from the Port Status * register following a port enable. */ sc->sc_flags = EHCI_SCFLG_SETMODE; switch (ar71xx_soc) { case AR71XX_SOC_AR7241: case AR71XX_SOC_AR7242: case AR71XX_SOC_AR9130: case AR71XX_SOC_AR9132: case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: sc->sc_flags |= EHCI_SCFLG_TT | EHCI_SCFLG_NORESTERM; break; default: /* fallthrough */ break; } /* * ehci_reset() needs the correct offset to access the host controller * registers. The AR724x/AR913x offsets aren't 0. */ sc->sc_offs = EHCI_CAPLENGTH(EREAD4(sc, EHCI_CAPLEN_HCIVERSION)); (void) ehci_reset(sc); err = ehci_init(sc); if (!err) { err = device_probe_and_attach(sc->sc_bus.bdev); } if (err) { device_printf(self, "USB init failed err=%d\n", err); goto error; } return (0); error: ar71xx_ehci_detach(self); return (ENXIO); } static int ar71xx_ehci_detach(device_t self) { struct ar71xx_ehci_softc *isc = device_get_softc(self); ehci_softc_t *sc = &isc->base; device_t bdev; int err; if (sc->sc_bus.bdev) { bdev = sc->sc_bus.bdev; device_detach(bdev); device_delete_child(self, bdev); } /* during module unload there are lots of children leftover */ device_delete_children(self); if (sc->sc_irq_res && sc->sc_intr_hdl) { /* * only call ehci_detach() after ehci_init() */ ehci_detach(sc); err = bus_teardown_intr(self, sc->sc_irq_res, sc->sc_intr_hdl); if (err) /* XXX or should we panic? */ device_printf(self, "Could not tear down irq, %d\n", err); sc->sc_intr_hdl = NULL; } if (sc->sc_irq_res) { bus_release_resource(self, SYS_RES_IRQ, 0, sc->sc_irq_res); sc->sc_irq_res = NULL; } if (sc->sc_io_res) { bus_release_resource(self, SYS_RES_MEMORY, 0, sc->sc_io_res); sc->sc_io_res = NULL; } usb_bus_mem_free_all(&sc->sc_bus, &ehci_iterate_hw_softc); return (0); } static device_method_t ehci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ar71xx_ehci_probe), DEVMETHOD(device_attach, ar71xx_ehci_attach), DEVMETHOD(device_detach, ar71xx_ehci_detach), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD_END }; static driver_t ehci_driver = { .name = "ehci", .methods = ehci_methods, .size = sizeof(struct ar71xx_ehci_softc), }; static devclass_t ehci_devclass; DRIVER_MODULE(ehci, nexus, ehci_driver, ehci_devclass, 0, 0); DRIVER_MODULE(ehci, apb, ehci_driver, ehci_devclass, 0, 0); MODULE_DEPEND(ehci, usb, 1, 1, 1); Index: projects/powernv/mips/atheros/ar71xx_gpio.c =================================================================== --- projects/powernv/mips/atheros/ar71xx_gpio.c (revision 290990) +++ projects/powernv/mips/atheros/ar71xx_gpio.c (revision 290991) @@ -1,572 +1,589 @@ /*- * Copyright (c) 2009, Oleksandr Tymoshenko * Copyright (c) 2009, Luiz Otavio O Souza. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * GPIO driver for AR71xx */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include "gpio_if.h" #define DEFAULT_CAPS (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT) /* * Helpers */ static void ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, uint32_t mask); static void ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, uint32_t mask); static void ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin, uint32_t flags); /* * Driver stuff */ static int ar71xx_gpio_probe(device_t dev); static int ar71xx_gpio_attach(device_t dev); static int ar71xx_gpio_detach(device_t dev); static int ar71xx_gpio_filter(void *arg); static void ar71xx_gpio_intr(void *arg); /* * GPIO interface */ static device_t ar71xx_gpio_get_bus(device_t); static int ar71xx_gpio_pin_max(device_t dev, int *maxpin); static int ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps); static int ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags); static int ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name); static int ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags); static int ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value); static int ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val); static int ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin); /* * Enable/disable the GPIO function control space. * * This is primarily for the AR71xx, which has SPI CS1/CS2, UART, SLIC, I2S * as GPIO pin options. */ static void ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, uint32_t mask) { + + /* + * XXX TODO: refactor this out into a per-chipset method. + */ if (ar71xx_soc == AR71XX_SOC_AR9341 || ar71xx_soc == AR71XX_SOC_AR9342 || ar71xx_soc == AR71XX_SOC_AR9344 || + ar71xx_soc == AR71XX_SOC_QCA9533 || + ar71xx_soc == AR71XX_SOC_QCA9533_V2 || ar71xx_soc == AR71XX_SOC_QCA9556 || ar71xx_soc == AR71XX_SOC_QCA9558) GPIO_SET_BITS(sc, AR934X_GPIO_REG_FUNC, mask); else GPIO_SET_BITS(sc, AR71XX_GPIO_FUNCTION, mask); } static void ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, uint32_t mask) { + + /* + * XXX TODO: refactor this out into a per-chipset method. + */ if (ar71xx_soc == AR71XX_SOC_AR9341 || ar71xx_soc == AR71XX_SOC_AR9342 || ar71xx_soc == AR71XX_SOC_AR9344 || + ar71xx_soc == AR71XX_SOC_QCA9533 || + ar71xx_soc == AR71XX_SOC_QCA9533_V2 || ar71xx_soc == AR71XX_SOC_QCA9556 || ar71xx_soc == AR71XX_SOC_QCA9558) GPIO_CLEAR_BITS(sc, AR934X_GPIO_REG_FUNC, mask); else GPIO_CLEAR_BITS(sc, AR71XX_GPIO_FUNCTION, mask); } static void ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin, unsigned int flags) { uint32_t mask; mask = 1 << pin->gp_pin; /* * Manage input/output */ if (flags & (GPIO_PIN_INPUT|GPIO_PIN_OUTPUT)) { pin->gp_flags &= ~(GPIO_PIN_INPUT|GPIO_PIN_OUTPUT); if (flags & GPIO_PIN_OUTPUT) { pin->gp_flags |= GPIO_PIN_OUTPUT; GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask); } else { pin->gp_flags |= GPIO_PIN_INPUT; GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask); } } } static device_t ar71xx_gpio_get_bus(device_t dev) { struct ar71xx_gpio_softc *sc; sc = device_get_softc(dev); return (sc->busdev); } static int ar71xx_gpio_pin_max(device_t dev, int *maxpin) { switch (ar71xx_soc) { case AR71XX_SOC_AR9130: case AR71XX_SOC_AR9132: *maxpin = AR91XX_GPIO_PINS - 1; break; case AR71XX_SOC_AR7240: case AR71XX_SOC_AR7241: case AR71XX_SOC_AR7242: *maxpin = AR724X_GPIO_PINS - 1; break; case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: *maxpin = AR933X_GPIO_COUNT - 1; break; case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: *maxpin = AR934X_GPIO_COUNT - 1; + break; + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: + *maxpin = QCA953X_GPIO_COUNT - 1; break; case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: *maxpin = QCA955X_GPIO_COUNT - 1; break; default: *maxpin = AR71XX_GPIO_PINS - 1; } return (0); } static int ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); GPIO_LOCK(sc); *caps = sc->gpio_pins[i].gp_caps; GPIO_UNLOCK(sc); return (0); } static int ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); GPIO_LOCK(sc); *flags = sc->gpio_pins[i].gp_flags; GPIO_UNLOCK(sc); return (0); } static int ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); GPIO_LOCK(sc); memcpy(name, sc->gpio_pins[i].gp_name, GPIOMAXNAME); GPIO_UNLOCK(sc); return (0); } static int ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags) { int i; struct ar71xx_gpio_softc *sc = device_get_softc(dev); for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); ar71xx_gpio_pin_configure(sc, &sc->gpio_pins[i], flags); return (0); } static int ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); if (value) GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin)); else GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin)); return (0); } static int ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); *val = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0; return (0); } static int ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin) { int res, i; struct ar71xx_gpio_softc *sc = device_get_softc(dev); for (i = 0; i < sc->gpio_npins; i++) { if (sc->gpio_pins[i].gp_pin == pin) break; } if (i >= sc->gpio_npins) return (EINVAL); res = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0; if (res) GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin)); else GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin)); return (0); } static int ar71xx_gpio_filter(void *arg) { /* TODO: something useful */ return (FILTER_STRAY); } static void ar71xx_gpio_intr(void *arg) { struct ar71xx_gpio_softc *sc = arg; GPIO_LOCK(sc); /* TODO: something useful */ GPIO_UNLOCK(sc); } static int ar71xx_gpio_probe(device_t dev) { device_set_desc(dev, "Atheros AR71XX GPIO driver"); return (0); } static int ar71xx_gpio_attach(device_t dev) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); int i, j, maxpin; int mask, pinon; uint32_t oe; KASSERT((device_get_unit(dev) == 0), ("ar71xx_gpio: Only one gpio module supported")); mtx_init(&sc->gpio_mtx, device_get_nameunit(dev), NULL, MTX_DEF); /* Map control/status registers. */ sc->gpio_mem_rid = 0; sc->gpio_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->gpio_mem_rid, RF_ACTIVE); if (sc->gpio_mem_res == NULL) { device_printf(dev, "couldn't map memory\n"); ar71xx_gpio_detach(dev); return (ENXIO); } if ((sc->gpio_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->gpio_irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "unable to allocate IRQ resource\n"); ar71xx_gpio_detach(dev); return (ENXIO); } if ((bus_setup_intr(dev, sc->gpio_irq_res, INTR_TYPE_MISC, ar71xx_gpio_filter, ar71xx_gpio_intr, sc, &sc->gpio_ih))) { device_printf(dev, "WARNING: unable to register interrupt handler\n"); ar71xx_gpio_detach(dev); return (ENXIO); } sc->dev = dev; /* Enable function bits that are required */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "function_set", &mask) == 0) { device_printf(dev, "function_set: 0x%x\n", mask); ar71xx_gpio_function_enable(sc, mask); } /* Disable function bits that are required */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "function_clear", &mask) == 0) { device_printf(dev, "function_clear: 0x%x\n", mask); ar71xx_gpio_function_disable(sc, mask); } /* Disable interrupts for all pins. */ GPIO_WRITE(sc, AR71XX_GPIO_INT_MASK, 0); /* Initialise all pins specified in the mask, up to the pin count */ (void) ar71xx_gpio_pin_max(dev, &maxpin); if (resource_int_value(device_get_name(dev), device_get_unit(dev), "pinmask", &mask) != 0) mask = 0; if (resource_int_value(device_get_name(dev), device_get_unit(dev), "pinon", &pinon) != 0) pinon = 0; device_printf(dev, "gpio pinmask=0x%x\n", mask); for (j = 0; j <= maxpin; j++) { if ((mask & (1 << j)) == 0) continue; sc->gpio_npins++; } /* Iniatilize the GPIO pins, keep the loader settings. */ oe = GPIO_READ(sc, AR71XX_GPIO_OE); sc->gpio_pins = malloc(sizeof(*sc->gpio_pins) * sc->gpio_npins, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0, j = 0; j <= maxpin; j++) { if ((mask & (1 << j)) == 0) continue; snprintf(sc->gpio_pins[i].gp_name, GPIOMAXNAME, "pin %d", j); sc->gpio_pins[i].gp_pin = j; sc->gpio_pins[i].gp_caps = DEFAULT_CAPS; if (oe & (1 << j)) sc->gpio_pins[i].gp_flags = GPIO_PIN_OUTPUT; else sc->gpio_pins[i].gp_flags = GPIO_PIN_INPUT; i++; } /* Turn on the hinted pins. */ for (i = 0; i < sc->gpio_npins; i++) { j = sc->gpio_pins[i].gp_pin; if ((pinon & (1 << j)) != 0) { ar71xx_gpio_pin_setflags(dev, j, GPIO_PIN_OUTPUT); ar71xx_gpio_pin_set(dev, j, 1); } } /* * Search through the function hints, in case there's some * overrides such as LNA control. * * hint.gpio.X.func..gpiofunc= * hint.gpio.X.func..gpiomode=1 (for output, default low) */ for (i = 0; i <= maxpin; i++) { char buf[32]; int gpiofunc, gpiomode; snprintf(buf, 32, "func.%d.gpiofunc", i); if (resource_int_value(device_get_name(dev), device_get_unit(dev), buf, &gpiofunc) != 0) continue; /* Get the mode too */ snprintf(buf, 32, "func.%d.gpiomode", i); if (resource_int_value(device_get_name(dev), device_get_unit(dev), buf, &gpiomode) != 0) continue; /* We only handle mode=1 for now */ if (gpiomode != 1) continue; device_printf(dev, "%s: GPIO %d: func=%d, mode=%d\n", __func__, i, gpiofunc, gpiomode); /* Set output (bit == 0) */ oe = GPIO_READ(sc, AR71XX_GPIO_OE); oe &= ~ (1 << i); GPIO_WRITE(sc, AR71XX_GPIO_OE, oe); /* Set pin value = 0, so it stays low by default */ oe = GPIO_READ(sc, AR71XX_GPIO_OUT); oe &= ~ (1 << i); GPIO_WRITE(sc, AR71XX_GPIO_OUT, oe); /* Finally: Set the output config */ ar71xx_gpio_ouput_configure(i, gpiofunc); } sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { ar71xx_gpio_detach(dev); return (ENXIO); } return (0); } static int ar71xx_gpio_detach(device_t dev) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->gpio_mtx), ("gpio mutex not initialized")); gpiobus_detach_bus(dev); if (sc->gpio_ih) bus_teardown_intr(dev, sc->gpio_irq_res, sc->gpio_ih); if (sc->gpio_irq_res) bus_release_resource(dev, SYS_RES_IRQ, sc->gpio_irq_rid, sc->gpio_irq_res); if (sc->gpio_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->gpio_mem_rid, sc->gpio_mem_res); if (sc->gpio_pins) free(sc->gpio_pins, M_DEVBUF); mtx_destroy(&sc->gpio_mtx); return(0); } static device_method_t ar71xx_gpio_methods[] = { DEVMETHOD(device_probe, ar71xx_gpio_probe), DEVMETHOD(device_attach, ar71xx_gpio_attach), DEVMETHOD(device_detach, ar71xx_gpio_detach), /* GPIO protocol */ DEVMETHOD(gpio_get_bus, ar71xx_gpio_get_bus), DEVMETHOD(gpio_pin_max, ar71xx_gpio_pin_max), DEVMETHOD(gpio_pin_getname, ar71xx_gpio_pin_getname), DEVMETHOD(gpio_pin_getflags, ar71xx_gpio_pin_getflags), DEVMETHOD(gpio_pin_getcaps, ar71xx_gpio_pin_getcaps), DEVMETHOD(gpio_pin_setflags, ar71xx_gpio_pin_setflags), DEVMETHOD(gpio_pin_get, ar71xx_gpio_pin_get), DEVMETHOD(gpio_pin_set, ar71xx_gpio_pin_set), DEVMETHOD(gpio_pin_toggle, ar71xx_gpio_pin_toggle), {0, 0}, }; static driver_t ar71xx_gpio_driver = { "gpio", ar71xx_gpio_methods, sizeof(struct ar71xx_gpio_softc), }; static devclass_t ar71xx_gpio_devclass; DRIVER_MODULE(ar71xx_gpio, apb, ar71xx_gpio_driver, ar71xx_gpio_devclass, 0, 0); Index: projects/powernv/mips/atheros/ar71xx_setup.c =================================================================== --- projects/powernv/mips/atheros/ar71xx_setup.c (revision 290990) +++ projects/powernv/mips/atheros/ar71xx_setup.c (revision 290991) @@ -1,217 +1,235 @@ /*- * Copyright (c) 2010 Adrian Chadd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include +#include #include #define AR71XX_SYS_TYPE_LEN 128 static char ar71xx_sys_type[AR71XX_SYS_TYPE_LEN]; enum ar71xx_soc_type ar71xx_soc; struct ar71xx_cpu_def * ar71xx_cpu_ops = NULL; void ar71xx_detect_sys_type(void) { char *chip = "????"; uint32_t id; uint32_t major; uint32_t minor; uint32_t rev = 0; id = ATH_READ_REG(AR71XX_RST_RESET_REG_REV_ID); major = id & REV_ID_MAJOR_MASK; switch (major) { case REV_ID_MAJOR_AR71XX: minor = id & AR71XX_REV_ID_MINOR_MASK; rev = id >> AR71XX_REV_ID_REVISION_SHIFT; rev &= AR71XX_REV_ID_REVISION_MASK; ar71xx_cpu_ops = &ar71xx_chip_def; switch (minor) { case AR71XX_REV_ID_MINOR_AR7130: ar71xx_soc = AR71XX_SOC_AR7130; chip = "7130"; break; case AR71XX_REV_ID_MINOR_AR7141: ar71xx_soc = AR71XX_SOC_AR7141; chip = "7141"; break; case AR71XX_REV_ID_MINOR_AR7161: ar71xx_soc = AR71XX_SOC_AR7161; chip = "7161"; break; } break; case REV_ID_MAJOR_AR7240: ar71xx_soc = AR71XX_SOC_AR7240; chip = "7240"; ar71xx_cpu_ops = &ar724x_chip_def; rev = (id & AR724X_REV_ID_REVISION_MASK); break; case REV_ID_MAJOR_AR7241: ar71xx_soc = AR71XX_SOC_AR7241; chip = "7241"; ar71xx_cpu_ops = &ar724x_chip_def; rev = (id & AR724X_REV_ID_REVISION_MASK); break; case REV_ID_MAJOR_AR7242: ar71xx_soc = AR71XX_SOC_AR7242; chip = "7242"; ar71xx_cpu_ops = &ar724x_chip_def; rev = (id & AR724X_REV_ID_REVISION_MASK); break; case REV_ID_MAJOR_AR913X: minor = id & AR91XX_REV_ID_MINOR_MASK; rev = id >> AR91XX_REV_ID_REVISION_SHIFT; rev &= AR91XX_REV_ID_REVISION_MASK; ar71xx_cpu_ops = &ar91xx_chip_def; switch (minor) { case AR91XX_REV_ID_MINOR_AR9130: ar71xx_soc = AR71XX_SOC_AR9130; chip = "9130"; break; case AR91XX_REV_ID_MINOR_AR9132: ar71xx_soc = AR71XX_SOC_AR9132; chip = "9132"; break; } break; case REV_ID_MAJOR_AR9330: minor = 0; rev = (id & AR933X_REV_ID_REVISION_MASK); chip = "9330"; ar71xx_cpu_ops = &ar933x_chip_def; ar71xx_soc = AR71XX_SOC_AR9330; break; case REV_ID_MAJOR_AR9331: minor = 1; rev = (id & AR933X_REV_ID_REVISION_MASK); chip = "9331"; ar71xx_soc = AR71XX_SOC_AR9331; ar71xx_cpu_ops = &ar933x_chip_def; break; case REV_ID_MAJOR_AR9341: minor = 0; rev = (id & AR934X_REV_ID_REVISION_MASK); chip = "9341"; ar71xx_soc = AR71XX_SOC_AR9341; ar71xx_cpu_ops = &ar934x_chip_def; break; case REV_ID_MAJOR_AR9342: minor = 0; rev = (id & AR934X_REV_ID_REVISION_MASK); chip = "9342"; ar71xx_soc = AR71XX_SOC_AR9342; ar71xx_cpu_ops = &ar934x_chip_def; break; case REV_ID_MAJOR_AR9344: minor = 0; rev = (id & AR934X_REV_ID_REVISION_MASK); chip = "9344"; ar71xx_soc = AR71XX_SOC_AR9344; ar71xx_cpu_ops = &ar934x_chip_def; + break; + + case REV_ID_MAJOR_QCA9533: + minor = 0; + rev = (id & QCA953X_REV_ID_REVISION_MASK); + chip = "9533"; + ar71xx_soc = AR71XX_SOC_QCA9533; + ar71xx_cpu_ops = &qca953x_chip_def; + break; + + case REV_ID_MAJOR_QCA9533_V2: + minor = 0; + rev = (id & QCA953X_REV_ID_REVISION_MASK); + chip = "9533v2"; + ar71xx_soc = AR71XX_SOC_QCA9533_V2; + ar71xx_cpu_ops = &qca953x_chip_def; break; case REV_ID_MAJOR_QCA9556: minor = 0; rev = (id & QCA955X_REV_ID_REVISION_MASK); chip = "9556"; ar71xx_soc = AR71XX_SOC_QCA9556; ar71xx_cpu_ops = &qca955x_chip_def; break; case REV_ID_MAJOR_QCA9558: minor = 0; rev = (id & QCA955X_REV_ID_REVISION_MASK); chip = "9558"; ar71xx_soc = AR71XX_SOC_QCA9558; ar71xx_cpu_ops = &qca955x_chip_def; break; default: panic("ar71xx: unknown chip id:0x%08x\n", id); } sprintf(ar71xx_sys_type, "Atheros AR%s rev %u", chip, rev); } const char * ar71xx_get_system_type(void) { return ar71xx_sys_type; } Index: projects/powernv/mips/atheros/ar71xx_setup.h =================================================================== --- projects/powernv/mips/atheros/ar71xx_setup.h (revision 290990) +++ projects/powernv/mips/atheros/ar71xx_setup.h (revision 290991) @@ -1,55 +1,57 @@ /*- * Copyright (c) 2010 Adrian Chadd * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef __AR71XX_SETUP_H__ #define __AR71XX_SETUP_H__ enum ar71xx_soc_type { AR71XX_SOC_UNKNOWN, AR71XX_SOC_AR7130, AR71XX_SOC_AR7141, AR71XX_SOC_AR7161, AR71XX_SOC_AR7240, AR71XX_SOC_AR7241, AR71XX_SOC_AR7242, AR71XX_SOC_AR9130, AR71XX_SOC_AR9132, AR71XX_SOC_AR9330, AR71XX_SOC_AR9331, AR71XX_SOC_AR9341, AR71XX_SOC_AR9342, AR71XX_SOC_AR9344, AR71XX_SOC_QCA9556, AR71XX_SOC_QCA9558, + AR71XX_SOC_QCA9533, + AR71XX_SOC_QCA9533_V2, }; extern enum ar71xx_soc_type ar71xx_soc; extern void ar71xx_detect_sys_type(void); extern const char *ar71xx_get_system_type(void); #endif Index: projects/powernv/mips/atheros/files.ar71xx =================================================================== --- projects/powernv/mips/atheros/files.ar71xx (revision 290990) +++ projects/powernv/mips/atheros/files.ar71xx (revision 290991) @@ -1,33 +1,34 @@ # $FreeBSD$ mips/atheros/apb.c optional ar71xx_apb mips/atheros/ar71xx_gpio.c optional gpio mips/atheros/ar71xx_machdep.c standard mips/atheros/ar71xx_ehci.c optional ehci mips/atheros/ar71xx_ohci.c optional ohci mips/atheros/ar71xx_pci.c optional ar71xx_pci pci mips/atheros/ar724x_pci.c optional ar724x_pci pci mips/atheros/ar71xx_pci_bus_space.c optional pci mips/atheros/ar71xx_spi.c optional ar71xx_spi mips/atheros/ar71xx_macaddr.c standard mips/atheros/pcf2123_rtc.c optional pcf2123_rtc ar71xx_spi mips/atheros/ar71xx_wdog.c optional ar71xx_wdog mips/atheros/if_arge.c optional arge mips/atheros/uart_bus_ar71xx.c optional uart_ar71xx mips/atheros/uart_cpu_ar71xx.c optional uart_ar71xx mips/atheros/uart_bus_ar933x.c optional uart_ar933x mips/atheros/uart_cpu_ar933x.c optional uart_ar933x mips/atheros/uart_dev_ar933x.c optional uart_ar933x mips/atheros/ar71xx_bus_space_reversed.c standard mips/mips/intr_machdep.c standard mips/mips/tick.c standard mips/atheros/ar71xx_setup.c standard mips/atheros/ar71xx_chip.c standard mips/atheros/ar724x_chip.c standard mips/atheros/ar91xx_chip.c standard mips/atheros/ar933x_chip.c standard mips/atheros/ar934x_chip.c standard +mips/atheros/qca953x_chip.c standard mips/atheros/qca955x_chip.c standard mips/atheros/ar71xx_fixup.c optional ar71xx_ath_eeprom mips/atheros/qca955x_apb.c optional qca955x_apb mips/atheros/qca955x_pci.c optional qca955x_pci pci Index: projects/powernv/mips/atheros/if_arge.c =================================================================== --- projects/powernv/mips/atheros/if_arge.c (revision 290990) +++ projects/powernv/mips/atheros/if_arge.c (revision 290991) @@ -1,2699 +1,2720 @@ /*- * Copyright (c) 2009, Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * AR71XX gigabit ethernet driver */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include "opt_arge.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_arge.h" #if defined(ARGE_MDIO) #include #include #include "mdio_if.h" #endif MODULE_DEPEND(arge, ether, 1, 1, 1); MODULE_DEPEND(arge, miibus, 1, 1, 1); MODULE_VERSION(arge, 1); #include "miibus_if.h" #include #include #include /* XXX tsk! */ +#include /* XXX tsk! */ #include /* XXX tsk! */ #include #include #include #include typedef enum { ARGE_DBG_MII = 0x00000001, ARGE_DBG_INTR = 0x00000002, ARGE_DBG_TX = 0x00000004, ARGE_DBG_RX = 0x00000008, ARGE_DBG_ERR = 0x00000010, ARGE_DBG_RESET = 0x00000020, ARGE_DBG_PLL = 0x00000040, } arge_debug_flags; static const char * arge_miicfg_str[] = { "NONE", "GMII", "MII", "RGMII", "RMII", "SGMII" }; #ifdef ARGE_DEBUG #define ARGEDEBUG(_sc, _m, ...) \ do { \ if ((_m) & (_sc)->arge_debug) \ device_printf((_sc)->arge_dev, __VA_ARGS__); \ } while (0) #else #define ARGEDEBUG(_sc, _m, ...) #endif static int arge_attach(device_t); static int arge_detach(device_t); static void arge_flush_ddr(struct arge_softc *); static int arge_ifmedia_upd(struct ifnet *); static void arge_ifmedia_sts(struct ifnet *, struct ifmediareq *); static int arge_ioctl(struct ifnet *, u_long, caddr_t); static void arge_init(void *); static void arge_init_locked(struct arge_softc *); static void arge_link_task(void *, int); static void arge_update_link_locked(struct arge_softc *sc); static void arge_set_pll(struct arge_softc *, int, int); static int arge_miibus_readreg(device_t, int, int); static void arge_miibus_statchg(device_t); static int arge_miibus_writereg(device_t, int, int, int); static int arge_probe(device_t); static void arge_reset_dma(struct arge_softc *); static int arge_resume(device_t); static int arge_rx_ring_init(struct arge_softc *); static void arge_rx_ring_free(struct arge_softc *sc); static int arge_tx_ring_init(struct arge_softc *); static void arge_tx_ring_free(struct arge_softc *); #ifdef DEVICE_POLLING static int arge_poll(struct ifnet *, enum poll_cmd, int); #endif static int arge_shutdown(device_t); static void arge_start(struct ifnet *); static void arge_start_locked(struct ifnet *); static void arge_stop(struct arge_softc *); static int arge_suspend(device_t); static int arge_rx_locked(struct arge_softc *); static void arge_tx_locked(struct arge_softc *); static void arge_intr(void *); static int arge_intr_filter(void *); static void arge_tick(void *); static void arge_hinted_child(device_t bus, const char *dname, int dunit); /* * ifmedia callbacks for multiPHY MAC */ void arge_multiphy_mediastatus(struct ifnet *, struct ifmediareq *); int arge_multiphy_mediachange(struct ifnet *); static void arge_dmamap_cb(void *, bus_dma_segment_t *, int, int); static int arge_dma_alloc(struct arge_softc *); static void arge_dma_free(struct arge_softc *); static int arge_newbuf(struct arge_softc *, int); static __inline void arge_fixup_rx(struct mbuf *); static device_method_t arge_methods[] = { /* Device interface */ DEVMETHOD(device_probe, arge_probe), DEVMETHOD(device_attach, arge_attach), DEVMETHOD(device_detach, arge_detach), DEVMETHOD(device_suspend, arge_suspend), DEVMETHOD(device_resume, arge_resume), DEVMETHOD(device_shutdown, arge_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, arge_miibus_readreg), DEVMETHOD(miibus_writereg, arge_miibus_writereg), DEVMETHOD(miibus_statchg, arge_miibus_statchg), /* bus interface */ DEVMETHOD(bus_add_child, device_add_child_ordered), DEVMETHOD(bus_hinted_child, arge_hinted_child), DEVMETHOD_END }; static driver_t arge_driver = { "arge", arge_methods, sizeof(struct arge_softc) }; static devclass_t arge_devclass; DRIVER_MODULE(arge, nexus, arge_driver, arge_devclass, 0, 0); DRIVER_MODULE(miibus, arge, miibus_driver, miibus_devclass, 0, 0); #if defined(ARGE_MDIO) static int argemdio_probe(device_t); static int argemdio_attach(device_t); static int argemdio_detach(device_t); /* * Declare an additional, separate driver for accessing the MDIO bus. */ static device_method_t argemdio_methods[] = { /* Device interface */ DEVMETHOD(device_probe, argemdio_probe), DEVMETHOD(device_attach, argemdio_attach), DEVMETHOD(device_detach, argemdio_detach), /* bus interface */ DEVMETHOD(bus_add_child, device_add_child_ordered), /* MDIO access */ DEVMETHOD(mdio_readreg, arge_miibus_readreg), DEVMETHOD(mdio_writereg, arge_miibus_writereg), }; DEFINE_CLASS_0(argemdio, argemdio_driver, argemdio_methods, sizeof(struct arge_softc)); static devclass_t argemdio_devclass; DRIVER_MODULE(miiproxy, arge, miiproxy_driver, miiproxy_devclass, 0, 0); DRIVER_MODULE(argemdio, nexus, argemdio_driver, argemdio_devclass, 0, 0); DRIVER_MODULE(mdio, argemdio, mdio_driver, mdio_devclass, 0, 0); #endif static struct mtx miibus_mtx; MTX_SYSINIT(miibus_mtx, &miibus_mtx, "arge mii lock", MTX_DEF); /* * Flushes all * * XXX this needs to be done at interrupt time! Grr! */ static void arge_flush_ddr(struct arge_softc *sc) { switch (sc->arge_mac_unit) { case 0: ar71xx_device_flush_ddr(AR71XX_CPU_DDR_FLUSH_GE0); break; case 1: ar71xx_device_flush_ddr(AR71XX_CPU_DDR_FLUSH_GE1); break; default: device_printf(sc->arge_dev, "%s: unknown unit (%d)\n", __func__, sc->arge_mac_unit); break; } } static int arge_probe(device_t dev) { device_set_desc(dev, "Atheros AR71xx built-in ethernet interface"); return (BUS_PROBE_NOWILDCARD); } #ifdef ARGE_DEBUG static void arge_attach_intr_sysctl(device_t dev, struct sysctl_oid_list *parent) { struct arge_softc *sc = device_get_softc(dev); struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); char sn[8]; int i; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "intr", CTLFLAG_RD, NULL, "Interrupt statistics"); child = SYSCTL_CHILDREN(tree); for (i = 0; i < 32; i++) { snprintf(sn, sizeof(sn), "%d", i); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, sn, CTLFLAG_RD, &sc->intr_stats.count[i], 0, ""); } } #endif static void arge_attach_sysctl(device_t dev) { struct arge_softc *sc = device_get_softc(dev); struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); #ifdef ARGE_DEBUG SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "debug", CTLFLAG_RW, &sc->arge_debug, 0, "arge interface debugging flags"); arge_attach_intr_sysctl(dev, SYSCTL_CHILDREN(tree)); #endif SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_pkts_aligned", CTLFLAG_RW, &sc->stats.tx_pkts_aligned, 0, "number of TX aligned packets"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_pkts_unaligned", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned, 0, "number of TX unaligned packets"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_pkts_unaligned_start", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_start, 0, "number of TX unaligned packets (start)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_pkts_unaligned_len", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_len, 0, "number of TX unaligned packets (len)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_pkts_nosegs", CTLFLAG_RW, &sc->stats.tx_pkts_nosegs, 0, "number of TX packets fail with no ring slots avail"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "intr_stray_filter", CTLFLAG_RW, &sc->stats.intr_stray, 0, "number of stray interrupts (filter)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "intr_stray_intr", CTLFLAG_RW, &sc->stats.intr_stray2, 0, "number of stray interrupts (intr)"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "intr_ok", CTLFLAG_RW, &sc->stats.intr_ok, 0, "number of OK interrupts"); #ifdef ARGE_DEBUG SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_prod", CTLFLAG_RW, &sc->arge_cdata.arge_tx_prod, 0, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_cons", CTLFLAG_RW, &sc->arge_cdata.arge_tx_cons, 0, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_cnt", CTLFLAG_RW, &sc->arge_cdata.arge_tx_cnt, 0, ""); #endif } static void arge_reset_mac(struct arge_softc *sc) { uint32_t reg; uint32_t reset_reg; ARGEDEBUG(sc, ARGE_DBG_RESET, "%s called\n", __func__); /* Step 1. Soft-reset MAC */ ARGE_SET_BITS(sc, AR71XX_MAC_CFG1, MAC_CFG1_SOFT_RESET); DELAY(20); /* Step 2. Punt the MAC core from the central reset register */ /* * XXX TODO: migrate this (and other) chip specific stuff into * a chipdef method. */ if (sc->arge_mac_unit == 0) { reset_reg = RST_RESET_GE0_MAC; } else { reset_reg = RST_RESET_GE1_MAC; } /* * AR934x (and later) also needs the MDIO block reset. * XXX should methodize this! */ if (ar71xx_soc == AR71XX_SOC_AR9341 || ar71xx_soc == AR71XX_SOC_AR9342 || ar71xx_soc == AR71XX_SOC_AR9344) { if (sc->arge_mac_unit == 0) { reset_reg |= AR934X_RESET_GE0_MDIO; } else { reset_reg |= AR934X_RESET_GE1_MDIO; } } if (ar71xx_soc == AR71XX_SOC_QCA9556 || ar71xx_soc == AR71XX_SOC_QCA9558) { if (sc->arge_mac_unit == 0) { reset_reg |= QCA955X_RESET_GE0_MDIO; } else { reset_reg |= QCA955X_RESET_GE1_MDIO; } } + + if (ar71xx_soc == AR71XX_SOC_QCA9533 || + ar71xx_soc == AR71XX_SOC_QCA9533_V2) { + if (sc->arge_mac_unit == 0) { + reset_reg |= QCA953X_RESET_GE0_MDIO; + } else { + reset_reg |= QCA953X_RESET_GE1_MDIO; + } + } + ar71xx_device_stop(reset_reg); DELAY(100); ar71xx_device_start(reset_reg); /* Step 3. Reconfigure MAC block */ ARGE_WRITE(sc, AR71XX_MAC_CFG1, MAC_CFG1_SYNC_RX | MAC_CFG1_RX_ENABLE | MAC_CFG1_SYNC_TX | MAC_CFG1_TX_ENABLE); reg = ARGE_READ(sc, AR71XX_MAC_CFG2); reg |= MAC_CFG2_ENABLE_PADCRC | MAC_CFG2_LENGTH_FIELD ; ARGE_WRITE(sc, AR71XX_MAC_CFG2, reg); ARGE_WRITE(sc, AR71XX_MAC_MAX_FRAME_LEN, 1536); } /* * These values map to the divisor values programmed into * AR71XX_MAC_MII_CFG. * * The index of each value corresponds to the divisor section * value in AR71XX_MAC_MII_CFG (ie, table[0] means '0' in * AR71XX_MAC_MII_CFG, table[1] means '1', etc.) */ static const uint32_t ar71xx_mdio_div_table[] = { 4, 4, 6, 8, 10, 14, 20, 28, }; static const uint32_t ar7240_mdio_div_table[] = { 2, 2, 4, 6, 8, 12, 18, 26, 32, 40, 48, 56, 62, 70, 78, 96, }; static const uint32_t ar933x_mdio_div_table[] = { 4, 4, 6, 8, 10, 14, 20, 28, 34, 42, 50, 58, 66, 74, 82, 98, }; /* * Lookup the divisor to use based on the given frequency. * * Returns the divisor to use, or -ve on error. */ static int arge_mdio_get_divider(struct arge_softc *sc, unsigned long mdio_clock) { unsigned long ref_clock, t; const uint32_t *table; int ndivs; int i; /* * This is the base MDIO frequency on the SoC. * The dividers .. well, divide. Duh. */ ref_clock = ar71xx_mdio_freq(); /* * If either clock is undefined, just tell the * caller to fall through to the defaults. */ if (ref_clock == 0 || mdio_clock == 0) return (-EINVAL); /* * Pick the correct table! */ switch (ar71xx_soc) { case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: table = ar933x_mdio_div_table; ndivs = nitems(ar933x_mdio_div_table); break; case AR71XX_SOC_AR7240: case AR71XX_SOC_AR7241: case AR71XX_SOC_AR7242: table = ar7240_mdio_div_table; ndivs = nitems(ar7240_mdio_div_table); break; default: table = ar71xx_mdio_div_table; ndivs = nitems(ar71xx_mdio_div_table); } /* * Now, walk through the list and find the first divisor * that falls under the target MDIO frequency. * * The divisors go up, but the corresponding frequencies * are actually decreasing. */ for (i = 0; i < ndivs; i++) { t = ref_clock / table[i]; if (t <= mdio_clock) { return (i); } } ARGEDEBUG(sc, ARGE_DBG_RESET, "No divider found; MDIO=%lu Hz; target=%lu Hz\n", ref_clock, mdio_clock); return (-ENOENT); } /* * Fetch the MDIO bus clock rate. * * For now, the default is DIV_28 for everything * bar AR934x, which will be DIV_58. * * It will definitely need updating to take into account * the MDIO bus core clock rate and the target clock * rate for the chip. */ static uint32_t arge_fetch_mdiobus_clock_rate(struct arge_softc *sc) { int mdio_freq, div; /* * Is the MDIO frequency defined? If so, find a divisor that * makes reasonable sense. Don't overshoot the frequency. */ if (resource_int_value(device_get_name(sc->arge_dev), device_get_unit(sc->arge_dev), "mdio_freq", &mdio_freq) == 0) { sc->arge_mdiofreq = mdio_freq; div = arge_mdio_get_divider(sc, sc->arge_mdiofreq); if (bootverbose) device_printf(sc->arge_dev, "%s: mdio ref freq=%llu Hz, target freq=%llu Hz," " divisor index=%d\n", __func__, (unsigned long long) ar71xx_mdio_freq(), (unsigned long long) mdio_freq, div); if (div >= 0) return (div); } /* * Default value(s). * * XXX obviously these need .. fixing. * * From Linux/OpenWRT: * * + 7240? DIV_6 * + Builtin-switch port and not 934x? DIV_10 * + Not built-in switch port and 934x? DIV_58 * + .. else DIV_28. */ switch (ar71xx_soc) { case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: return (MAC_MII_CFG_CLOCK_DIV_58); break; default: return (MAC_MII_CFG_CLOCK_DIV_28); } } static void arge_reset_miibus(struct arge_softc *sc) { uint32_t mdio_div; mdio_div = arge_fetch_mdiobus_clock_rate(sc); /* * XXX AR934x and later; should we be also resetting the * MDIO block(s) using the reset register block? */ /* Reset MII bus; program in the default divisor */ ARGE_WRITE(sc, AR71XX_MAC_MII_CFG, MAC_MII_CFG_RESET | mdio_div); DELAY(100); ARGE_WRITE(sc, AR71XX_MAC_MII_CFG, mdio_div); DELAY(100); } static void arge_fetch_pll_config(struct arge_softc *sc) { long int val; if (resource_long_value(device_get_name(sc->arge_dev), device_get_unit(sc->arge_dev), "pll_10", &val) == 0) { sc->arge_pllcfg.pll_10 = val; device_printf(sc->arge_dev, "%s: pll_10 = 0x%x\n", __func__, (int) val); } if (resource_long_value(device_get_name(sc->arge_dev), device_get_unit(sc->arge_dev), "pll_100", &val) == 0) { sc->arge_pllcfg.pll_100 = val; device_printf(sc->arge_dev, "%s: pll_100 = 0x%x\n", __func__, (int) val); } if (resource_long_value(device_get_name(sc->arge_dev), device_get_unit(sc->arge_dev), "pll_1000", &val) == 0) { sc->arge_pllcfg.pll_1000 = val; device_printf(sc->arge_dev, "%s: pll_1000 = 0x%x\n", __func__, (int) val); } } static int arge_attach(device_t dev) { struct ifnet *ifp; struct arge_softc *sc; int error = 0, rid, i; uint32_t hint; long eeprom_mac_addr = 0; int miicfg = 0; int readascii = 0; int local_mac = 0; uint8_t local_macaddr[ETHER_ADDR_LEN]; char * local_macstr; char devid_str[32]; int count; sc = device_get_softc(dev); sc->arge_dev = dev; sc->arge_mac_unit = device_get_unit(dev); /* * See if there's a "board" MAC address hint available for * this particular device. * * This is in the environment - it'd be nice to use the resource_*() * routines, but at the moment the system is booting, the resource hints * are set to the 'static' map so they're not pulling from kenv. */ snprintf(devid_str, 32, "hint.%s.%d.macaddr", device_get_name(dev), device_get_unit(dev)); if ((local_macstr = kern_getenv(devid_str)) != NULL) { uint32_t tmpmac[ETHER_ADDR_LEN]; /* Have a MAC address; should use it */ device_printf(dev, "Overriding MAC address from environment: '%s'\n", local_macstr); /* Extract out the MAC address */ /* XXX this should all be a generic method */ count = sscanf(local_macstr, "%x%*c%x%*c%x%*c%x%*c%x%*c%x", &tmpmac[0], &tmpmac[1], &tmpmac[2], &tmpmac[3], &tmpmac[4], &tmpmac[5]); if (count == 6) { /* Valid! */ local_mac = 1; for (i = 0; i < ETHER_ADDR_LEN; i++) local_macaddr[i] = tmpmac[i]; } /* Done! */ freeenv(local_macstr); local_macstr = NULL; } /* * Hardware workarounds. */ switch (ar71xx_soc) { case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: /* Arbitrary alignment */ sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE; sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE; break; default: sc->arge_hw_flags |= ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE; sc->arge_hw_flags |= ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE; break; } /* * Some units (eg the TP-Link WR-1043ND) do not have a convenient * EEPROM location to read the ethernet MAC address from. * OpenWRT simply snaffles it from a fixed location. * * Since multiple units seem to use this feature, include * a method of setting the MAC address based on an flash location * in CPU address space. * * Some vendors have decided to store the mac address as a literal * string of 18 characters in xx:xx:xx:xx:xx:xx format instead of * an array of numbers. Expose a hint to turn on this conversion * feature via strtol() */ if (local_mac == 0 && resource_long_value(device_get_name(dev), device_get_unit(dev), "eeprommac", &eeprom_mac_addr) == 0) { local_mac = 1; int i; const char *mac = (const char *) MIPS_PHYS_TO_KSEG1(eeprom_mac_addr); device_printf(dev, "Overriding MAC from EEPROM\n"); if (resource_int_value(device_get_name(dev), device_get_unit(dev), "readascii", &readascii) == 0) { device_printf(dev, "Vendor stores MAC in ASCII format\n"); for (i = 0; i < 6; i++) { local_macaddr[i] = strtol(&(mac[i*3]), NULL, 16); } } else { for (i = 0; i < 6; i++) { local_macaddr[i] = mac[i]; } } } KASSERT(((sc->arge_mac_unit == 0) || (sc->arge_mac_unit == 1)), ("if_arge: Only MAC0 and MAC1 supported")); /* * Fetch the PLL configuration. */ arge_fetch_pll_config(sc); /* * Get the MII configuration, if applicable. */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "miimode", &miicfg) == 0) { /* XXX bounds check? */ device_printf(dev, "%s: overriding MII mode to '%s'\n", __func__, arge_miicfg_str[miicfg]); sc->arge_miicfg = miicfg; } /* * Get which PHY of 5 available we should use for this unit */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "phymask", &sc->arge_phymask) != 0) { /* * Use port 4 (WAN) for GE0. For any other port use * its PHY the same as its unit number */ if (sc->arge_mac_unit == 0) sc->arge_phymask = (1 << 4); else /* Use all phys up to 4 */ sc->arge_phymask = (1 << 4) - 1; device_printf(dev, "No PHY specified, using mask %d\n", sc->arge_phymask); } /* * Get default/hard-coded media & duplex mode. */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "media", &hint) != 0) hint = 0; if (hint == 1000) sc->arge_media_type = IFM_1000_T; else if (hint == 100) sc->arge_media_type = IFM_100_TX; else if (hint == 10) sc->arge_media_type = IFM_10_T; else sc->arge_media_type = 0; if (resource_int_value(device_get_name(dev), device_get_unit(dev), "fduplex", &hint) != 0) hint = 1; if (hint) sc->arge_duplex_mode = IFM_FDX; else sc->arge_duplex_mode = 0; mtx_init(&sc->arge_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->arge_stat_callout, &sc->arge_mtx, 0); TASK_INIT(&sc->arge_link_task, 0, arge_link_task, sc); /* Map control/status registers. */ sc->arge_rid = 0; sc->arge_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->arge_rid, RF_ACTIVE | RF_SHAREABLE); if (sc->arge_res == NULL) { device_printf(dev, "couldn't map memory\n"); error = ENXIO; goto fail; } /* Allocate interrupts */ rid = 0; sc->arge_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->arge_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } /* Allocate ifnet structure. */ ifp = sc->arge_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "couldn't allocate ifnet structure\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = arge_ioctl; ifp->if_start = arge_start; ifp->if_init = arge_init; sc->arge_if_flags = ifp->if_flags; /* XXX: add real size */ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); /* Tell the upper layer(s) we support long frames. */ ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable = ifp->if_capabilities; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* If there's a local mac defined, copy that in */ if (local_mac == 1) { (void) ar71xx_mac_addr_init(sc->arge_eaddr, local_macaddr, 0, 0); } else { /* * No MAC address configured. Generate the random one. */ if (bootverbose) device_printf(dev, "Generating random ethernet address.\n"); (void) ar71xx_mac_addr_random_init(sc->arge_eaddr); } if (arge_dma_alloc(sc) != 0) { error = ENXIO; goto fail; } /* * Don't do this for the MDIO bus case - it's already done * as part of the MDIO bus attachment. */ #if !defined(ARGE_MDIO) /* Initialize the MAC block */ arge_reset_mac(sc); arge_reset_miibus(sc); #endif /* Configure MII mode, just for convienence */ if (sc->arge_miicfg != 0) ar71xx_device_set_mii_if(sc->arge_mac_unit, sc->arge_miicfg); /* * Set all Ethernet address registers to the same initial values * set all four addresses to 66-88-aa-cc-dd-ee */ ARGE_WRITE(sc, AR71XX_MAC_STA_ADDR1, (sc->arge_eaddr[2] << 24) | (sc->arge_eaddr[3] << 16) | (sc->arge_eaddr[4] << 8) | sc->arge_eaddr[5]); ARGE_WRITE(sc, AR71XX_MAC_STA_ADDR2, (sc->arge_eaddr[0] << 8) | sc->arge_eaddr[1]); ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG0, FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT); /* * SoC specific bits. */ switch (ar71xx_soc) { case AR71XX_SOC_AR7240: case AR71XX_SOC_AR7241: case AR71XX_SOC_AR7242: case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG1, 0x0010ffff); ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG2, 0x015500aa); break; /* AR71xx, AR913x */ default: ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG1, 0x0fff0000); ARGE_WRITE(sc, AR71XX_MAC_FIFO_CFG2, 0x00001fff); } ARGE_WRITE(sc, AR71XX_MAC_FIFO_RX_FILTMATCH, FIFO_RX_FILTMATCH_DEFAULT); ARGE_WRITE(sc, AR71XX_MAC_FIFO_RX_FILTMASK, FIFO_RX_FILTMASK_DEFAULT); #if defined(ARGE_MDIO) sc->arge_miiproxy = mii_attach_proxy(sc->arge_dev); #endif device_printf(sc->arge_dev, "finishing attachment, phymask %04x" ", proxy %s \n", sc->arge_phymask, sc->arge_miiproxy == NULL ? "null" : "set"); for (i = 0; i < ARGE_NPHY; i++) { if (((1 << i) & sc->arge_phymask) != 0) { error = mii_attach(sc->arge_miiproxy != NULL ? sc->arge_miiproxy : sc->arge_dev, &sc->arge_miibus, sc->arge_ifp, arge_ifmedia_upd, arge_ifmedia_sts, BMSR_DEFCAPMASK, i, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(sc->arge_dev, "unable to attach" " PHY %d: %d\n", i, error); goto fail; } } } if (sc->arge_miibus == NULL) { /* no PHY, so use hard-coded values */ ifmedia_init(&sc->arge_ifmedia, 0, arge_multiphy_mediachange, arge_multiphy_mediastatus); ifmedia_add(&sc->arge_ifmedia, IFM_ETHER | sc->arge_media_type | sc->arge_duplex_mode, 0, NULL); ifmedia_set(&sc->arge_ifmedia, IFM_ETHER | sc->arge_media_type | sc->arge_duplex_mode); arge_set_pll(sc, sc->arge_media_type, sc->arge_duplex_mode); } /* Call MI attach routine. */ ether_ifattach(sc->arge_ifp, sc->arge_eaddr); /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(sc->arge_dev, sc->arge_irq, INTR_TYPE_NET | INTR_MPSAFE, arge_intr_filter, arge_intr, sc, &sc->arge_intrhand); if (error) { device_printf(sc->arge_dev, "couldn't set up irq\n"); ether_ifdetach(sc->arge_ifp); goto fail; } /* setup sysctl variables */ arge_attach_sysctl(sc->arge_dev); fail: if (error) arge_detach(dev); return (error); } static int arge_detach(device_t dev) { struct arge_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->arge_ifp; KASSERT(mtx_initialized(&sc->arge_mtx), ("arge mutex not initialized")); /* These should only be active if attach succeeded */ if (device_is_attached(dev)) { ARGE_LOCK(sc); sc->arge_detach = 1; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif arge_stop(sc); ARGE_UNLOCK(sc); taskqueue_drain(taskqueue_swi, &sc->arge_link_task); ether_ifdetach(ifp); } if (sc->arge_miibus) device_delete_child(dev, sc->arge_miibus); if (sc->arge_miiproxy) device_delete_child(dev, sc->arge_miiproxy); bus_generic_detach(dev); if (sc->arge_intrhand) bus_teardown_intr(dev, sc->arge_irq, sc->arge_intrhand); if (sc->arge_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->arge_rid, sc->arge_res); if (ifp) if_free(ifp); arge_dma_free(sc); mtx_destroy(&sc->arge_mtx); return (0); } static int arge_suspend(device_t dev) { panic("%s", __func__); return 0; } static int arge_resume(device_t dev) { panic("%s", __func__); return 0; } static int arge_shutdown(device_t dev) { struct arge_softc *sc; sc = device_get_softc(dev); ARGE_LOCK(sc); arge_stop(sc); ARGE_UNLOCK(sc); return (0); } static void arge_hinted_child(device_t bus, const char *dname, int dunit) { BUS_ADD_CHILD(bus, 0, dname, dunit); device_printf(bus, "hinted child %s%d\n", dname, dunit); } static int arge_mdio_busy(struct arge_softc *sc) { int i,result; for (i = 0; i < ARGE_MII_TIMEOUT; i++) { DELAY(5); ARGE_MDIO_BARRIER_READ(sc); result = ARGE_MDIO_READ(sc, AR71XX_MAC_MII_INDICATOR); if (! result) return (0); DELAY(5); } return (-1); } static int arge_miibus_readreg(device_t dev, int phy, int reg) { struct arge_softc * sc = device_get_softc(dev); int result; uint32_t addr = (phy << MAC_MII_PHY_ADDR_SHIFT) | (reg & MAC_MII_REG_MASK); mtx_lock(&miibus_mtx); ARGE_MDIO_BARRIER_RW(sc); ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CMD, MAC_MII_CMD_WRITE); ARGE_MDIO_BARRIER_WRITE(sc); ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_ADDR, addr); ARGE_MDIO_BARRIER_WRITE(sc); ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CMD, MAC_MII_CMD_READ); if (arge_mdio_busy(sc) != 0) { mtx_unlock(&miibus_mtx); ARGEDEBUG(sc, ARGE_DBG_MII, "%s timedout\n", __func__); /* XXX: return ERRNO istead? */ return (-1); } ARGE_MDIO_BARRIER_READ(sc); result = ARGE_MDIO_READ(sc, AR71XX_MAC_MII_STATUS) & MAC_MII_STATUS_MASK; ARGE_MDIO_BARRIER_RW(sc); ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CMD, MAC_MII_CMD_WRITE); mtx_unlock(&miibus_mtx); ARGEDEBUG(sc, ARGE_DBG_MII, "%s: phy=%d, reg=%02x, value[%08x]=%04x\n", __func__, phy, reg, addr, result); return (result); } static int arge_miibus_writereg(device_t dev, int phy, int reg, int data) { struct arge_softc * sc = device_get_softc(dev); uint32_t addr = (phy << MAC_MII_PHY_ADDR_SHIFT) | (reg & MAC_MII_REG_MASK); ARGEDEBUG(sc, ARGE_DBG_MII, "%s: phy=%d, reg=%02x, value=%04x\n", __func__, phy, reg, data); mtx_lock(&miibus_mtx); ARGE_MDIO_BARRIER_RW(sc); ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_ADDR, addr); ARGE_MDIO_BARRIER_WRITE(sc); ARGE_MDIO_WRITE(sc, AR71XX_MAC_MII_CONTROL, data); ARGE_MDIO_BARRIER_WRITE(sc); if (arge_mdio_busy(sc) != 0) { mtx_unlock(&miibus_mtx); ARGEDEBUG(sc, ARGE_DBG_MII, "%s timedout\n", __func__); /* XXX: return ERRNO istead? */ return (-1); } mtx_unlock(&miibus_mtx); return (0); } static void arge_miibus_statchg(device_t dev) { struct arge_softc *sc; sc = device_get_softc(dev); taskqueue_enqueue(taskqueue_swi, &sc->arge_link_task); } static void arge_link_task(void *arg, int pending) { struct arge_softc *sc; sc = (struct arge_softc *)arg; ARGE_LOCK(sc); arge_update_link_locked(sc); ARGE_UNLOCK(sc); } static void arge_update_link_locked(struct arge_softc *sc) { struct mii_data *mii; struct ifnet *ifp; uint32_t media, duplex; mii = device_get_softc(sc->arge_miibus); ifp = sc->arge_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { return; } /* * If we have a static media type configured, then * use that. Some PHY configurations (eg QCA955x -> AR8327) * use a static speed/duplex between the SoC and switch, * even though the front-facing PHY speed changes. */ if (sc->arge_media_type != 0) { ARGEDEBUG(sc, ARGE_DBG_MII, "%s: fixed; media=%d, duplex=%d\n", __func__, sc->arge_media_type, sc->arge_duplex_mode); if (mii->mii_media_status & IFM_ACTIVE) { sc->arge_link_status = 1; } else { sc->arge_link_status = 0; } arge_set_pll(sc, sc->arge_media_type, sc->arge_duplex_mode); } if (mii->mii_media_status & IFM_ACTIVE) { media = IFM_SUBTYPE(mii->mii_media_active); if (media != IFM_NONE) { sc->arge_link_status = 1; duplex = mii->mii_media_active & IFM_GMASK; ARGEDEBUG(sc, ARGE_DBG_MII, "%s: media=%d, duplex=%d\n", __func__, media, duplex); arge_set_pll(sc, media, duplex); } } else { sc->arge_link_status = 0; } } static void arge_set_pll(struct arge_softc *sc, int media, int duplex) { uint32_t cfg, ifcontrol, rx_filtmask; uint32_t fifo_tx, pll; int if_speed; /* * XXX Verify - is this valid for all chips? * QCA955x (and likely some of the earlier chips!) define * this as nibble mode and byte mode, and those have to do * with the interface type (MII/SMII versus GMII/RGMII.) */ ARGEDEBUG(sc, ARGE_DBG_PLL, "set_pll(%04x, %s)\n", media, duplex == IFM_FDX ? "full" : "half"); cfg = ARGE_READ(sc, AR71XX_MAC_CFG2); cfg &= ~(MAC_CFG2_IFACE_MODE_1000 | MAC_CFG2_IFACE_MODE_10_100 | MAC_CFG2_FULL_DUPLEX); if (duplex == IFM_FDX) cfg |= MAC_CFG2_FULL_DUPLEX; ifcontrol = ARGE_READ(sc, AR71XX_MAC_IFCONTROL); ifcontrol &= ~MAC_IFCONTROL_SPEED; rx_filtmask = ARGE_READ(sc, AR71XX_MAC_FIFO_RX_FILTMASK); rx_filtmask &= ~FIFO_RX_MASK_BYTE_MODE; switch(media) { case IFM_10_T: cfg |= MAC_CFG2_IFACE_MODE_10_100; if_speed = 10; break; case IFM_100_TX: cfg |= MAC_CFG2_IFACE_MODE_10_100; ifcontrol |= MAC_IFCONTROL_SPEED; if_speed = 100; break; case IFM_1000_T: case IFM_1000_SX: cfg |= MAC_CFG2_IFACE_MODE_1000; rx_filtmask |= FIFO_RX_MASK_BYTE_MODE; if_speed = 1000; break; default: if_speed = 100; device_printf(sc->arge_dev, "Unknown media %d\n", media); } ARGEDEBUG(sc, ARGE_DBG_PLL, "%s: if_speed=%d\n", __func__, if_speed); switch (ar71xx_soc) { case AR71XX_SOC_AR7240: case AR71XX_SOC_AR7241: case AR71XX_SOC_AR7242: case AR71XX_SOC_AR9330: case AR71XX_SOC_AR9331: case AR71XX_SOC_AR9341: case AR71XX_SOC_AR9342: case AR71XX_SOC_AR9344: + case AR71XX_SOC_QCA9533: + case AR71XX_SOC_QCA9533_V2: case AR71XX_SOC_QCA9556: case AR71XX_SOC_QCA9558: fifo_tx = 0x01f00140; break; case AR71XX_SOC_AR9130: case AR71XX_SOC_AR9132: fifo_tx = 0x00780fff; break; /* AR71xx */ default: fifo_tx = 0x008001ff; } ARGE_WRITE(sc, AR71XX_MAC_CFG2, cfg); ARGE_WRITE(sc, AR71XX_MAC_IFCONTROL, ifcontrol); ARGE_WRITE(sc, AR71XX_MAC_FIFO_RX_FILTMASK, rx_filtmask); ARGE_WRITE(sc, AR71XX_MAC_FIFO_TX_THRESHOLD, fifo_tx); /* fetch PLL registers */ pll = ar71xx_device_get_eth_pll(sc->arge_mac_unit, if_speed); ARGEDEBUG(sc, ARGE_DBG_PLL, "%s: pll=0x%x\n", __func__, pll); /* Override if required by platform data */ if (if_speed == 10 && sc->arge_pllcfg.pll_10 != 0) pll = sc->arge_pllcfg.pll_10; else if (if_speed == 100 && sc->arge_pllcfg.pll_100 != 0) pll = sc->arge_pllcfg.pll_100; else if (if_speed == 1000 && sc->arge_pllcfg.pll_1000 != 0) pll = sc->arge_pllcfg.pll_1000; ARGEDEBUG(sc, ARGE_DBG_PLL, "%s: final pll=0x%x\n", __func__, pll); /* XXX ensure pll != 0 */ ar71xx_device_set_pll_ge(sc->arge_mac_unit, if_speed, pll); /* set MII registers */ /* * This was introduced to match what the Linux ag71xx ethernet * driver does. For the AR71xx case, it does set the port * MII speed. However, if this is done, non-gigabit speeds * are not at all reliable when speaking via RGMII through * 'bridge' PHY port that's pretending to be a local PHY. * * Until that gets root caused, and until an AR71xx + normal * PHY board is tested, leave this disabled. */ #if 0 ar71xx_device_set_mii_speed(sc->arge_mac_unit, if_speed); #endif } static void arge_reset_dma(struct arge_softc *sc) { ARGEDEBUG(sc, ARGE_DBG_RESET, "%s: called\n", __func__); ARGE_WRITE(sc, AR71XX_DMA_RX_CONTROL, 0); ARGE_WRITE(sc, AR71XX_DMA_TX_CONTROL, 0); ARGE_WRITE(sc, AR71XX_DMA_RX_DESC, 0); ARGE_WRITE(sc, AR71XX_DMA_TX_DESC, 0); /* Clear all possible RX interrupts */ while(ARGE_READ(sc, AR71XX_DMA_RX_STATUS) & DMA_RX_STATUS_PKT_RECVD) ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_PKT_RECVD); /* * Clear all possible TX interrupts */ while(ARGE_READ(sc, AR71XX_DMA_TX_STATUS) & DMA_TX_STATUS_PKT_SENT) ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_PKT_SENT); /* * Now Rx/Tx errors */ ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_BUS_ERROR | DMA_RX_STATUS_OVERFLOW); ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_BUS_ERROR | DMA_TX_STATUS_UNDERRUN); /* * Force a DDR flush so any pending data is properly * flushed to RAM before underlying buffers are freed. */ arge_flush_ddr(sc); } static void arge_init(void *xsc) { struct arge_softc *sc = xsc; ARGE_LOCK(sc); arge_init_locked(sc); ARGE_UNLOCK(sc); } static void arge_init_locked(struct arge_softc *sc) { struct ifnet *ifp = sc->arge_ifp; struct mii_data *mii; ARGE_LOCK_ASSERT(sc); if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) return; /* Init circular RX list. */ if (arge_rx_ring_init(sc) != 0) { device_printf(sc->arge_dev, "initialization failed: no memory for rx buffers\n"); arge_stop(sc); return; } /* Init tx descriptors. */ arge_tx_ring_init(sc); arge_reset_dma(sc); if (sc->arge_miibus) { mii = device_get_softc(sc->arge_miibus); mii_mediachg(mii); } else { /* * Sun always shines over multiPHY interface */ sc->arge_link_status = 1; } ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (sc->arge_miibus) { callout_reset(&sc->arge_stat_callout, hz, arge_tick, sc); arge_update_link_locked(sc); } ARGE_WRITE(sc, AR71XX_DMA_TX_DESC, ARGE_TX_RING_ADDR(sc, 0)); ARGE_WRITE(sc, AR71XX_DMA_RX_DESC, ARGE_RX_RING_ADDR(sc, 0)); /* Start listening */ ARGE_WRITE(sc, AR71XX_DMA_RX_CONTROL, DMA_RX_CONTROL_EN); /* Enable interrupts */ ARGE_WRITE(sc, AR71XX_DMA_INTR, DMA_INTR_ALL); } /* * Return whether the mbuf chain is correctly aligned * for the arge TX engine. * * All the MACs have a length requirement: any non-final * fragment (ie, descriptor with MORE bit set) needs to have * a length divisible by 4. * * The AR71xx, AR913x require the start address also be * DWORD aligned. The later MACs don't. */ static int arge_mbuf_chain_is_tx_aligned(struct arge_softc *sc, struct mbuf *m0) { struct mbuf *m; for (m = m0; m != NULL; m = m->m_next) { /* * Only do this for chips that require it. */ if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) && (mtod(m, intptr_t) & 3) != 0) { sc->stats.tx_pkts_unaligned_start++; return 0; } /* * All chips have this requirement for length. */ if ((m->m_next != NULL) && ((m->m_len & 0x03) != 0)) { sc->stats.tx_pkts_unaligned_len++; return 0; } } return 1; } /* * Encapsulate an mbuf chain in a descriptor by coupling the mbuf data * pointers to the fragment pointers. */ static int arge_encap(struct arge_softc *sc, struct mbuf **m_head) { struct arge_txdesc *txd; struct arge_desc *desc, *prev_desc; bus_dma_segment_t txsegs[ARGE_MAXFRAGS]; int error, i, nsegs, prod, prev_prod; struct mbuf *m; ARGE_LOCK_ASSERT(sc); /* * Fix mbuf chain based on hardware alignment constraints. */ m = *m_head; if (! arge_mbuf_chain_is_tx_aligned(sc, m)) { sc->stats.tx_pkts_unaligned++; m = m_defrag(*m_head, M_NOWAIT); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m; } else sc->stats.tx_pkts_aligned++; prod = sc->arge_cdata.arge_tx_prod; txd = &sc->arge_cdata.arge_txdesc[prod]; error = bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { panic("EFBIG"); } else if (error != 0) return (error); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check number of available descriptors. */ if (sc->arge_cdata.arge_tx_cnt + nsegs >= (ARGE_TX_RING_COUNT - 2)) { bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap); sc->stats.tx_pkts_nosegs++; return (ENOBUFS); } txd->tx_m = *m_head; bus_dmamap_sync(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap, BUS_DMASYNC_PREWRITE); /* * Make a list of descriptors for this packet. DMA controller will * walk through it while arge_link is not zero. * * Since we're in a endless circular buffer, ensure that * the first descriptor in a multi-descriptor ring is always * set to EMPTY, then un-do it when we're done populating. */ prev_prod = prod; desc = prev_desc = NULL; for (i = 0; i < nsegs; i++) { uint32_t tmp; desc = &sc->arge_rdata.arge_tx_ring[prod]; /* * Set DESC_EMPTY so the hardware (hopefully) stops at this * point. We don't want it to start transmitting descriptors * before we've finished fleshing this out. */ tmp = ARGE_DMASIZE(txsegs[i].ds_len); if (i == 0) tmp |= ARGE_DESC_EMPTY; desc->packet_ctrl = tmp; /* XXX Note: only relevant for older MACs; but check length! */ if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) && (txsegs[i].ds_addr & 3)) panic("TX packet address unaligned\n"); desc->packet_addr = txsegs[i].ds_addr; /* link with previous descriptor */ if (prev_desc) prev_desc->packet_ctrl |= ARGE_DESC_MORE; sc->arge_cdata.arge_tx_cnt++; prev_desc = desc; ARGE_INC(prod, ARGE_TX_RING_COUNT); } /* Update producer index. */ sc->arge_cdata.arge_tx_prod = prod; /* * The descriptors are updated, so enable the first one. */ desc = &sc->arge_rdata.arge_tx_ring[prev_prod]; desc->packet_ctrl &= ~ ARGE_DESC_EMPTY; /* Sync descriptors. */ bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag, sc->arge_cdata.arge_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Flush writes */ ARGE_BARRIER_WRITE(sc); /* Start transmitting */ ARGEDEBUG(sc, ARGE_DBG_TX, "%s: setting DMA_TX_CONTROL_EN\n", __func__); ARGE_WRITE(sc, AR71XX_DMA_TX_CONTROL, DMA_TX_CONTROL_EN); return (0); } static void arge_start(struct ifnet *ifp) { struct arge_softc *sc; sc = ifp->if_softc; ARGE_LOCK(sc); arge_start_locked(ifp); ARGE_UNLOCK(sc); } static void arge_start_locked(struct ifnet *ifp) { struct arge_softc *sc; struct mbuf *m_head; int enq = 0; sc = ifp->if_softc; ARGE_LOCK_ASSERT(sc); ARGEDEBUG(sc, ARGE_DBG_TX, "%s: beginning\n", __func__); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || sc->arge_link_status == 0 ) return; /* * Before we go any further, check whether we're already full. * The below check errors out immediately if the ring is full * and never gets a chance to set this flag. Although it's * likely never needed, this at least avoids an unexpected * situation. */ if (sc->arge_cdata.arge_tx_cnt >= ARGE_TX_RING_COUNT - 2) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; ARGEDEBUG(sc, ARGE_DBG_ERR, "%s: tx_cnt %d >= max %d; setting IFF_DRV_OACTIVE\n", __func__, sc->arge_cdata.arge_tx_cnt, ARGE_TX_RING_COUNT - 2); return; } arge_flush_ddr(sc); for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->arge_cdata.arge_tx_cnt < ARGE_TX_RING_COUNT - 2; ) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Pack the data into the transmit ring. */ if (arge_encap(sc, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); } ARGEDEBUG(sc, ARGE_DBG_TX, "%s: finished; queued %d packets\n", __func__, enq); } static void arge_stop(struct arge_softc *sc) { struct ifnet *ifp; ARGE_LOCK_ASSERT(sc); ifp = sc->arge_ifp; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (sc->arge_miibus) callout_stop(&sc->arge_stat_callout); /* mask out interrupts */ ARGE_WRITE(sc, AR71XX_DMA_INTR, 0); arge_reset_dma(sc); /* Flush FIFO and free any existing mbufs */ arge_flush_ddr(sc); arge_rx_ring_free(sc); arge_tx_ring_free(sc); } static int arge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct arge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct mii_data *mii; int error; #ifdef DEVICE_POLLING int mask; #endif switch (command) { case SIOCSIFFLAGS: ARGE_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { if (((ifp->if_flags ^ sc->arge_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) { /* XXX: handle promisc & multi flags */ } } else { if (!sc->arge_detach) arge_init_locked(sc); } } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; arge_stop(sc); } sc->arge_if_flags = ifp->if_flags; ARGE_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: /* XXX: implement SIOCDELMULTI */ error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: if (sc->arge_miibus) { mii = device_get_softc(sc->arge_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } else error = ifmedia_ioctl(ifp, ifr, &sc->arge_ifmedia, command); break; case SIOCSIFCAP: /* XXX: Check other capabilities */ #ifdef DEVICE_POLLING mask = ifp->if_capenable ^ ifr->ifr_reqcap; if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { ARGE_WRITE(sc, AR71XX_DMA_INTR, 0); error = ether_poll_register(arge_poll, ifp); if (error) return error; ARGE_LOCK(sc); ifp->if_capenable |= IFCAP_POLLING; ARGE_UNLOCK(sc); } else { ARGE_WRITE(sc, AR71XX_DMA_INTR, DMA_INTR_ALL); error = ether_poll_deregister(ifp); ARGE_LOCK(sc); ifp->if_capenable &= ~IFCAP_POLLING; ARGE_UNLOCK(sc); } } error = 0; break; #endif default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* * Set media options. */ static int arge_ifmedia_upd(struct ifnet *ifp) { struct arge_softc *sc; struct mii_data *mii; struct mii_softc *miisc; int error; sc = ifp->if_softc; ARGE_LOCK(sc); mii = device_get_softc(sc->arge_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); error = mii_mediachg(mii); ARGE_UNLOCK(sc); return (error); } /* * Report current media status. */ static void arge_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct arge_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->arge_miibus); ARGE_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; ARGE_UNLOCK(sc); } struct arge_dmamap_arg { bus_addr_t arge_busaddr; }; static void arge_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct arge_dmamap_arg *ctx; if (error != 0) return; ctx = arg; ctx->arge_busaddr = segs[0].ds_addr; } static int arge_dma_alloc(struct arge_softc *sc) { struct arge_dmamap_arg ctx; struct arge_txdesc *txd; struct arge_rxdesc *rxd; int error, i; int arge_tx_align, arge_rx_align; /* Assume 4 byte alignment by default */ arge_tx_align = 4; arge_rx_align = 4; if (sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_1BYTE) arge_tx_align = 1; if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE) arge_rx_align = 1; /* Create parent DMA tag. */ error = bus_dma_tag_create( bus_get_dma_tag(sc->arge_dev), /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->arge_cdata.arge_parent_tag); if (error != 0) { device_printf(sc->arge_dev, "failed to create parent DMA tag\n"); goto fail; } /* Create tag for Tx ring. */ error = bus_dma_tag_create( sc->arge_cdata.arge_parent_tag, /* parent */ ARGE_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ARGE_TX_DMA_SIZE, /* maxsize */ 1, /* nsegments */ ARGE_TX_DMA_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->arge_cdata.arge_tx_ring_tag); if (error != 0) { device_printf(sc->arge_dev, "failed to create Tx ring DMA tag\n"); goto fail; } /* Create tag for Rx ring. */ error = bus_dma_tag_create( sc->arge_cdata.arge_parent_tag, /* parent */ ARGE_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ARGE_RX_DMA_SIZE, /* maxsize */ 1, /* nsegments */ ARGE_RX_DMA_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->arge_cdata.arge_rx_ring_tag); if (error != 0) { device_printf(sc->arge_dev, "failed to create Rx ring DMA tag\n"); goto fail; } /* Create tag for Tx buffers. */ error = bus_dma_tag_create( sc->arge_cdata.arge_parent_tag, /* parent */ arge_tx_align, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES * ARGE_MAXFRAGS, /* maxsize */ ARGE_MAXFRAGS, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->arge_cdata.arge_tx_tag); if (error != 0) { device_printf(sc->arge_dev, "failed to create Tx DMA tag\n"); goto fail; } /* Create tag for Rx buffers. */ error = bus_dma_tag_create( sc->arge_cdata.arge_parent_tag, /* parent */ arge_rx_align, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ ARGE_MAXFRAGS, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->arge_cdata.arge_rx_tag); if (error != 0) { device_printf(sc->arge_dev, "failed to create Rx DMA tag\n"); goto fail; } /* Allocate DMA'able memory and load the DMA map for Tx ring. */ error = bus_dmamem_alloc(sc->arge_cdata.arge_tx_ring_tag, (void **)&sc->arge_rdata.arge_tx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->arge_cdata.arge_tx_ring_map); if (error != 0) { device_printf(sc->arge_dev, "failed to allocate DMA'able memory for Tx ring\n"); goto fail; } ctx.arge_busaddr = 0; error = bus_dmamap_load(sc->arge_cdata.arge_tx_ring_tag, sc->arge_cdata.arge_tx_ring_map, sc->arge_rdata.arge_tx_ring, ARGE_TX_DMA_SIZE, arge_dmamap_cb, &ctx, 0); if (error != 0 || ctx.arge_busaddr == 0) { device_printf(sc->arge_dev, "failed to load DMA'able memory for Tx ring\n"); goto fail; } sc->arge_rdata.arge_tx_ring_paddr = ctx.arge_busaddr; /* Allocate DMA'able memory and load the DMA map for Rx ring. */ error = bus_dmamem_alloc(sc->arge_cdata.arge_rx_ring_tag, (void **)&sc->arge_rdata.arge_rx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->arge_cdata.arge_rx_ring_map); if (error != 0) { device_printf(sc->arge_dev, "failed to allocate DMA'able memory for Rx ring\n"); goto fail; } ctx.arge_busaddr = 0; error = bus_dmamap_load(sc->arge_cdata.arge_rx_ring_tag, sc->arge_cdata.arge_rx_ring_map, sc->arge_rdata.arge_rx_ring, ARGE_RX_DMA_SIZE, arge_dmamap_cb, &ctx, 0); if (error != 0 || ctx.arge_busaddr == 0) { device_printf(sc->arge_dev, "failed to load DMA'able memory for Rx ring\n"); goto fail; } sc->arge_rdata.arge_rx_ring_paddr = ctx.arge_busaddr; /* Create DMA maps for Tx buffers. */ for (i = 0; i < ARGE_TX_RING_COUNT; i++) { txd = &sc->arge_cdata.arge_txdesc[i]; txd->tx_m = NULL; txd->tx_dmamap = NULL; error = bus_dmamap_create(sc->arge_cdata.arge_tx_tag, 0, &txd->tx_dmamap); if (error != 0) { device_printf(sc->arge_dev, "failed to create Tx dmamap\n"); goto fail; } } /* Create DMA maps for Rx buffers. */ if ((error = bus_dmamap_create(sc->arge_cdata.arge_rx_tag, 0, &sc->arge_cdata.arge_rx_sparemap)) != 0) { device_printf(sc->arge_dev, "failed to create spare Rx dmamap\n"); goto fail; } for (i = 0; i < ARGE_RX_RING_COUNT; i++) { rxd = &sc->arge_cdata.arge_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_dmamap = NULL; error = bus_dmamap_create(sc->arge_cdata.arge_rx_tag, 0, &rxd->rx_dmamap); if (error != 0) { device_printf(sc->arge_dev, "failed to create Rx dmamap\n"); goto fail; } } fail: return (error); } static void arge_dma_free(struct arge_softc *sc) { struct arge_txdesc *txd; struct arge_rxdesc *rxd; int i; /* Tx ring. */ if (sc->arge_cdata.arge_tx_ring_tag) { if (sc->arge_rdata.arge_tx_ring_paddr) bus_dmamap_unload(sc->arge_cdata.arge_tx_ring_tag, sc->arge_cdata.arge_tx_ring_map); if (sc->arge_rdata.arge_tx_ring) bus_dmamem_free(sc->arge_cdata.arge_tx_ring_tag, sc->arge_rdata.arge_tx_ring, sc->arge_cdata.arge_tx_ring_map); sc->arge_rdata.arge_tx_ring = NULL; sc->arge_rdata.arge_tx_ring_paddr = 0; bus_dma_tag_destroy(sc->arge_cdata.arge_tx_ring_tag); sc->arge_cdata.arge_tx_ring_tag = NULL; } /* Rx ring. */ if (sc->arge_cdata.arge_rx_ring_tag) { if (sc->arge_rdata.arge_rx_ring_paddr) bus_dmamap_unload(sc->arge_cdata.arge_rx_ring_tag, sc->arge_cdata.arge_rx_ring_map); if (sc->arge_rdata.arge_rx_ring) bus_dmamem_free(sc->arge_cdata.arge_rx_ring_tag, sc->arge_rdata.arge_rx_ring, sc->arge_cdata.arge_rx_ring_map); sc->arge_rdata.arge_rx_ring = NULL; sc->arge_rdata.arge_rx_ring_paddr = 0; bus_dma_tag_destroy(sc->arge_cdata.arge_rx_ring_tag); sc->arge_cdata.arge_rx_ring_tag = NULL; } /* Tx buffers. */ if (sc->arge_cdata.arge_tx_tag) { for (i = 0; i < ARGE_TX_RING_COUNT; i++) { txd = &sc->arge_cdata.arge_txdesc[i]; if (txd->tx_dmamap) { bus_dmamap_destroy(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap); txd->tx_dmamap = NULL; } } bus_dma_tag_destroy(sc->arge_cdata.arge_tx_tag); sc->arge_cdata.arge_tx_tag = NULL; } /* Rx buffers. */ if (sc->arge_cdata.arge_rx_tag) { for (i = 0; i < ARGE_RX_RING_COUNT; i++) { rxd = &sc->arge_cdata.arge_rxdesc[i]; if (rxd->rx_dmamap) { bus_dmamap_destroy(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap); rxd->rx_dmamap = NULL; } } if (sc->arge_cdata.arge_rx_sparemap) { bus_dmamap_destroy(sc->arge_cdata.arge_rx_tag, sc->arge_cdata.arge_rx_sparemap); sc->arge_cdata.arge_rx_sparemap = 0; } bus_dma_tag_destroy(sc->arge_cdata.arge_rx_tag); sc->arge_cdata.arge_rx_tag = NULL; } if (sc->arge_cdata.arge_parent_tag) { bus_dma_tag_destroy(sc->arge_cdata.arge_parent_tag); sc->arge_cdata.arge_parent_tag = NULL; } } /* * Initialize the transmit descriptors. */ static int arge_tx_ring_init(struct arge_softc *sc) { struct arge_ring_data *rd; struct arge_txdesc *txd; bus_addr_t addr; int i; sc->arge_cdata.arge_tx_prod = 0; sc->arge_cdata.arge_tx_cons = 0; sc->arge_cdata.arge_tx_cnt = 0; rd = &sc->arge_rdata; bzero(rd->arge_tx_ring, sizeof(rd->arge_tx_ring)); for (i = 0; i < ARGE_TX_RING_COUNT; i++) { if (i == ARGE_TX_RING_COUNT - 1) addr = ARGE_TX_RING_ADDR(sc, 0); else addr = ARGE_TX_RING_ADDR(sc, i + 1); rd->arge_tx_ring[i].packet_ctrl = ARGE_DESC_EMPTY; rd->arge_tx_ring[i].next_desc = addr; txd = &sc->arge_cdata.arge_txdesc[i]; txd->tx_m = NULL; } bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag, sc->arge_cdata.arge_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /* * Free the Tx ring, unload any pending dma transaction and free the mbuf. */ static void arge_tx_ring_free(struct arge_softc *sc) { struct arge_txdesc *txd; int i; /* Free the Tx buffers. */ for (i = 0; i < ARGE_TX_RING_COUNT; i++) { txd = &sc->arge_cdata.arge_txdesc[i]; if (txd->tx_dmamap) { bus_dmamap_sync(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap); } if (txd->tx_m) m_freem(txd->tx_m); txd->tx_m = NULL; } } /* * Initialize the RX descriptors and allocate mbufs for them. Note that * we arrange the descriptors in a closed ring, so that the last descriptor * points back to the first. */ static int arge_rx_ring_init(struct arge_softc *sc) { struct arge_ring_data *rd; struct arge_rxdesc *rxd; bus_addr_t addr; int i; sc->arge_cdata.arge_rx_cons = 0; rd = &sc->arge_rdata; bzero(rd->arge_rx_ring, sizeof(rd->arge_rx_ring)); for (i = 0; i < ARGE_RX_RING_COUNT; i++) { rxd = &sc->arge_cdata.arge_rxdesc[i]; if (rxd->rx_m != NULL) { device_printf(sc->arge_dev, "%s: ring[%d] rx_m wasn't free?\n", __func__, i); } rxd->rx_m = NULL; rxd->desc = &rd->arge_rx_ring[i]; if (i == ARGE_RX_RING_COUNT - 1) addr = ARGE_RX_RING_ADDR(sc, 0); else addr = ARGE_RX_RING_ADDR(sc, i + 1); rd->arge_rx_ring[i].next_desc = addr; if (arge_newbuf(sc, i) != 0) { return (ENOBUFS); } } bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag, sc->arge_cdata.arge_rx_ring_map, BUS_DMASYNC_PREWRITE); return (0); } /* * Free all the buffers in the RX ring. * * TODO: ensure that DMA is disabled and no pending DMA * is lurking in the FIFO. */ static void arge_rx_ring_free(struct arge_softc *sc) { int i; struct arge_rxdesc *rxd; ARGE_LOCK_ASSERT(sc); for (i = 0; i < ARGE_RX_RING_COUNT; i++) { rxd = &sc->arge_cdata.arge_rxdesc[i]; /* Unmap the mbuf */ if (rxd->rx_m != NULL) { bus_dmamap_unload(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap); m_free(rxd->rx_m); rxd->rx_m = NULL; } } } /* * Initialize an RX descriptor and attach an MBUF cluster. */ static int arge_newbuf(struct arge_softc *sc, int idx) { struct arge_desc *desc; struct arge_rxdesc *rxd; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; /* XXX TODO: should just allocate an explicit 2KiB buffer */ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; /* * Add extra space to "adjust" (copy) the packet back to be aligned * for purposes of IPv4/IPv6 header contents. */ if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) m_adj(m, sizeof(uint64_t)); /* * If it's a 1-byte aligned buffer, then just offset it two bytes * and that will give us a hopefully correctly DWORD aligned * L3 payload - and we won't have to undo it afterwards. */ else if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE) m_adj(m, sizeof(uint16_t)); if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag, sc->arge_cdata.arge_rx_sparemap, m, segs, &nsegs, 0) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc->arge_cdata.arge_rxdesc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_unload(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap); } map = rxd->rx_dmamap; rxd->rx_dmamap = sc->arge_cdata.arge_rx_sparemap; sc->arge_cdata.arge_rx_sparemap = map; rxd->rx_m = m; desc = rxd->desc; if ((sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) && segs[0].ds_addr & 3) panic("RX packet address unaligned"); desc->packet_addr = segs[0].ds_addr; desc->packet_ctrl = ARGE_DESC_EMPTY | ARGE_DMASIZE(segs[0].ds_len); bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag, sc->arge_cdata.arge_rx_ring_map, BUS_DMASYNC_PREWRITE); return (0); } /* * Move the data backwards 16 bits to (hopefully!) ensure the * IPv4/IPv6 payload is aligned. * * This is required for earlier hardware where the RX path * requires DWORD aligned buffers. */ static __inline void arge_fixup_rx(struct mbuf *m) { int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - 1; for (i = 0; i < m->m_len / sizeof(uint16_t); i++) { *dst++ = *src++; } if (m->m_len % sizeof(uint16_t)) *(uint8_t *)dst = *(uint8_t *)src; m->m_data -= ETHER_ALIGN; } #ifdef DEVICE_POLLING static int arge_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct arge_softc *sc = ifp->if_softc; int rx_npkts = 0; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ARGE_LOCK(sc); arge_tx_locked(sc); rx_npkts = arge_rx_locked(sc); ARGE_UNLOCK(sc); } return (rx_npkts); } #endif /* DEVICE_POLLING */ static void arge_tx_locked(struct arge_softc *sc) { struct arge_txdesc *txd; struct arge_desc *cur_tx; struct ifnet *ifp; uint32_t ctrl; int cons, prod; ARGE_LOCK_ASSERT(sc); cons = sc->arge_cdata.arge_tx_cons; prod = sc->arge_cdata.arge_tx_prod; ARGEDEBUG(sc, ARGE_DBG_TX, "%s: cons=%d, prod=%d\n", __func__, cons, prod); if (cons == prod) return; bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag, sc->arge_cdata.arge_tx_ring_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); ifp = sc->arge_ifp; /* * Go through our tx list and free mbufs for those * frames that have been transmitted. */ for (; cons != prod; ARGE_INC(cons, ARGE_TX_RING_COUNT)) { cur_tx = &sc->arge_rdata.arge_tx_ring[cons]; ctrl = cur_tx->packet_ctrl; /* Check if descriptor has "finished" flag */ if ((ctrl & ARGE_DESC_EMPTY) == 0) break; ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_PKT_SENT); sc->arge_cdata.arge_tx_cnt--; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; txd = &sc->arge_cdata.arge_txdesc[cons]; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); bus_dmamap_sync(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->arge_cdata.arge_tx_tag, txd->tx_dmamap); /* Free only if it's first descriptor in list */ if (txd->tx_m) m_freem(txd->tx_m); txd->tx_m = NULL; /* reset descriptor */ cur_tx->packet_addr = 0; } sc->arge_cdata.arge_tx_cons = cons; bus_dmamap_sync(sc->arge_cdata.arge_tx_ring_tag, sc->arge_cdata.arge_tx_ring_map, BUS_DMASYNC_PREWRITE); } static int arge_rx_locked(struct arge_softc *sc) { struct arge_rxdesc *rxd; struct ifnet *ifp = sc->arge_ifp; int cons, prog, packet_len, i; struct arge_desc *cur_rx; struct mbuf *m; int rx_npkts = 0; ARGE_LOCK_ASSERT(sc); cons = sc->arge_cdata.arge_rx_cons; bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag, sc->arge_cdata.arge_rx_ring_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (prog = 0; prog < ARGE_RX_RING_COUNT; ARGE_INC(cons, ARGE_RX_RING_COUNT)) { cur_rx = &sc->arge_rdata.arge_rx_ring[cons]; rxd = &sc->arge_cdata.arge_rxdesc[cons]; m = rxd->rx_m; if ((cur_rx->packet_ctrl & ARGE_DESC_EMPTY) != 0) break; ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_PKT_RECVD); prog++; packet_len = ARGE_DMASIZE(cur_rx->packet_ctrl); bus_dmamap_sync(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); m = rxd->rx_m; /* * If the MAC requires 4 byte alignment then the RX setup * routine will have pre-offset things; so un-offset it here. */ if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) arge_fixup_rx(m); m->m_pkthdr.rcvif = ifp; /* Skip 4 bytes of CRC */ m->m_pkthdr.len = m->m_len = packet_len - ETHER_CRC_LEN; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rx_npkts++; ARGE_UNLOCK(sc); (*ifp->if_input)(ifp, m); ARGE_LOCK(sc); cur_rx->packet_addr = 0; } if (prog > 0) { i = sc->arge_cdata.arge_rx_cons; for (; prog > 0 ; prog--) { if (arge_newbuf(sc, i) != 0) { device_printf(sc->arge_dev, "Failed to allocate buffer\n"); break; } ARGE_INC(i, ARGE_RX_RING_COUNT); } bus_dmamap_sync(sc->arge_cdata.arge_rx_ring_tag, sc->arge_cdata.arge_rx_ring_map, BUS_DMASYNC_PREWRITE); sc->arge_cdata.arge_rx_cons = cons; } return (rx_npkts); } static int arge_intr_filter(void *arg) { struct arge_softc *sc = arg; uint32_t status, ints; status = ARGE_READ(sc, AR71XX_DMA_INTR_STATUS); ints = ARGE_READ(sc, AR71XX_DMA_INTR); ARGEDEBUG(sc, ARGE_DBG_INTR, "int mask(filter) = %b\n", ints, "\20\10RX_BUS_ERROR\7RX_OVERFLOW\5RX_PKT_RCVD" "\4TX_BUS_ERROR\2TX_UNDERRUN\1TX_PKT_SENT"); ARGEDEBUG(sc, ARGE_DBG_INTR, "status(filter) = %b\n", status, "\20\10RX_BUS_ERROR\7RX_OVERFLOW\5RX_PKT_RCVD" "\4TX_BUS_ERROR\2TX_UNDERRUN\1TX_PKT_SENT"); if (status & DMA_INTR_ALL) { sc->arge_intr_status |= status; ARGE_WRITE(sc, AR71XX_DMA_INTR, 0); sc->stats.intr_ok++; return (FILTER_SCHEDULE_THREAD); } sc->arge_intr_status = 0; sc->stats.intr_stray++; return (FILTER_STRAY); } static void arge_intr(void *arg) { struct arge_softc *sc = arg; uint32_t status; struct ifnet *ifp = sc->arge_ifp; #ifdef ARGE_DEBUG int i; #endif status = ARGE_READ(sc, AR71XX_DMA_INTR_STATUS); status |= sc->arge_intr_status; ARGEDEBUG(sc, ARGE_DBG_INTR, "int status(intr) = %b\n", status, "\20\10\7RX_OVERFLOW\5RX_PKT_RCVD" "\4TX_BUS_ERROR\2TX_UNDERRUN\1TX_PKT_SENT"); /* * Is it our interrupt at all? */ if (status == 0) { sc->stats.intr_stray2++; return; } #ifdef ARGE_DEBUG for (i = 0; i < 32; i++) { if (status & (1U << i)) { sc->intr_stats.count[i]++; } } #endif if (status & DMA_INTR_RX_BUS_ERROR) { ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_BUS_ERROR); device_printf(sc->arge_dev, "RX bus error"); return; } if (status & DMA_INTR_TX_BUS_ERROR) { ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_BUS_ERROR); device_printf(sc->arge_dev, "TX bus error"); return; } ARGE_LOCK(sc); arge_flush_ddr(sc); if (status & DMA_INTR_RX_PKT_RCVD) arge_rx_locked(sc); /* * RX overrun disables the receiver. * Clear indication and re-enable rx. */ if ( status & DMA_INTR_RX_OVERFLOW) { ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_OVERFLOW); ARGE_WRITE(sc, AR71XX_DMA_RX_CONTROL, DMA_RX_CONTROL_EN); sc->stats.rx_overflow++; } if (status & DMA_INTR_TX_PKT_SENT) arge_tx_locked(sc); /* * Underrun turns off TX. Clear underrun indication. * If there's anything left in the ring, reactivate the tx. */ if (status & DMA_INTR_TX_UNDERRUN) { ARGE_WRITE(sc, AR71XX_DMA_TX_STATUS, DMA_TX_STATUS_UNDERRUN); sc->stats.tx_underflow++; ARGEDEBUG(sc, ARGE_DBG_TX, "%s: TX underrun; tx_cnt=%d\n", __func__, sc->arge_cdata.arge_tx_cnt); if (sc->arge_cdata.arge_tx_cnt > 0 ) { ARGE_WRITE(sc, AR71XX_DMA_TX_CONTROL, DMA_TX_CONTROL_EN); } } /* * If we've finished TXing and there's space for more packets * to be queued for TX, do so. Otherwise we may end up in a * situation where the interface send queue was filled * whilst the hardware queue was full, then the hardware * queue was drained by the interface send queue wasn't, * and thus if_start() is never called to kick-start * the send process (and all subsequent packets are simply * discarded. * * XXX TODO: make sure that the hardware deals nicely * with the possibility of the queue being enabled above * after a TX underrun, then having the hardware queue added * to below. */ if (status & (DMA_INTR_TX_PKT_SENT | DMA_INTR_TX_UNDERRUN) && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { if (!IFQ_IS_EMPTY(&ifp->if_snd)) arge_start_locked(ifp); } /* * We handled all bits, clear status */ sc->arge_intr_status = 0; ARGE_UNLOCK(sc); /* * re-enable all interrupts */ ARGE_WRITE(sc, AR71XX_DMA_INTR, DMA_INTR_ALL); } static void arge_tick(void *xsc) { struct arge_softc *sc = xsc; struct mii_data *mii; ARGE_LOCK_ASSERT(sc); if (sc->arge_miibus) { mii = device_get_softc(sc->arge_miibus); mii_tick(mii); callout_reset(&sc->arge_stat_callout, hz, arge_tick, sc); } } int arge_multiphy_mediachange(struct ifnet *ifp) { struct arge_softc *sc = ifp->if_softc; struct ifmedia *ifm = &sc->arge_ifmedia; struct ifmedia_entry *ife = ifm->ifm_cur; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (IFM_SUBTYPE(ife->ifm_media) == IFM_AUTO) { device_printf(sc->arge_dev, "AUTO is not supported for multiphy MAC"); return (EINVAL); } /* * Ignore everything */ return (0); } void arge_multiphy_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct arge_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER | sc->arge_media_type | sc->arge_duplex_mode; } #if defined(ARGE_MDIO) static int argemdio_probe(device_t dev) { device_set_desc(dev, "Atheros AR71xx built-in ethernet interface, MDIO controller"); return (0); } static int argemdio_attach(device_t dev) { struct arge_softc *sc; int error = 0; sc = device_get_softc(dev); sc->arge_dev = dev; sc->arge_mac_unit = device_get_unit(dev); sc->arge_rid = 0; sc->arge_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->arge_rid, RF_ACTIVE | RF_SHAREABLE); if (sc->arge_res == NULL) { device_printf(dev, "couldn't map memory\n"); error = ENXIO; goto fail; } /* Reset MAC - required for AR71xx MDIO to successfully occur */ arge_reset_mac(sc); /* Reset MII bus */ arge_reset_miibus(sc); bus_generic_probe(dev); bus_enumerate_hinted_children(dev); error = bus_generic_attach(dev); fail: return (error); } static int argemdio_detach(device_t dev) { return (0); } #endif Index: projects/powernv/mips/atheros/qca953x_chip.c =================================================================== --- projects/powernv/mips/atheros/qca953x_chip.c (nonexistent) +++ projects/powernv/mips/atheros/qca953x_chip.c (revision 290991) @@ -0,0 +1,393 @@ +/*- + * Copyright (c) 2015 Adrian Chadd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +#include + +static void +qca953x_chip_detect_mem_size(void) +{ +} + +static void +qca953x_chip_detect_sys_frequency(void) +{ + unsigned long ref_rate; + unsigned long cpu_rate; + unsigned long ddr_rate; + unsigned long ahb_rate; + uint32_t pll, out_div, ref_div, nint, frac, clk_ctrl, postdiv; + uint32_t cpu_pll, ddr_pll; + uint32_t bootstrap; + + bootstrap = ATH_READ_REG(QCA953X_RESET_REG_BOOTSTRAP); + if (bootstrap & QCA953X_BOOTSTRAP_REF_CLK_40) + ref_rate = 40 * 1000 * 1000; + else + ref_rate = 25 * 1000 * 1000; + + pll = ATH_READ_REG(QCA953X_PLL_CPU_CONFIG_REG); + out_div = (pll >> QCA953X_PLL_CPU_CONFIG_OUTDIV_SHIFT) & + QCA953X_PLL_CPU_CONFIG_OUTDIV_MASK; + ref_div = (pll >> QCA953X_PLL_CPU_CONFIG_REFDIV_SHIFT) & + QCA953X_PLL_CPU_CONFIG_REFDIV_MASK; + nint = (pll >> QCA953X_PLL_CPU_CONFIG_NINT_SHIFT) & + QCA953X_PLL_CPU_CONFIG_NINT_MASK; + frac = (pll >> QCA953X_PLL_CPU_CONFIG_NFRAC_SHIFT) & + QCA953X_PLL_CPU_CONFIG_NFRAC_MASK; + + cpu_pll = nint * ref_rate / ref_div; + cpu_pll += frac * (ref_rate >> 6) / ref_div; + cpu_pll /= (1 << out_div); + + pll = ATH_READ_REG(QCA953X_PLL_DDR_CONFIG_REG); + out_div = (pll >> QCA953X_PLL_DDR_CONFIG_OUTDIV_SHIFT) & + QCA953X_PLL_DDR_CONFIG_OUTDIV_MASK; + ref_div = (pll >> QCA953X_PLL_DDR_CONFIG_REFDIV_SHIFT) & + QCA953X_PLL_DDR_CONFIG_REFDIV_MASK; + nint = (pll >> QCA953X_PLL_DDR_CONFIG_NINT_SHIFT) & + QCA953X_PLL_DDR_CONFIG_NINT_MASK; + frac = (pll >> QCA953X_PLL_DDR_CONFIG_NFRAC_SHIFT) & + QCA953X_PLL_DDR_CONFIG_NFRAC_MASK; + + ddr_pll = nint * ref_rate / ref_div; + ddr_pll += frac * (ref_rate >> 6) / (ref_div << 4); + ddr_pll /= (1 << out_div); + + clk_ctrl = ATH_READ_REG(QCA953X_PLL_CLK_CTRL_REG); + + postdiv = (clk_ctrl >> QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_SHIFT) & + QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_MASK; + + if (clk_ctrl & QCA953X_PLL_CLK_CTRL_CPU_PLL_BYPASS) + cpu_rate = ref_rate; + else if (clk_ctrl & QCA953X_PLL_CLK_CTRL_CPUCLK_FROM_CPUPLL) + cpu_rate = cpu_pll / (postdiv + 1); + else + cpu_rate = ddr_pll / (postdiv + 1); + + postdiv = (clk_ctrl >> QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_SHIFT) & + QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_MASK; + + if (clk_ctrl & QCA953X_PLL_CLK_CTRL_DDR_PLL_BYPASS) + ddr_rate = ref_rate; + else if (clk_ctrl & QCA953X_PLL_CLK_CTRL_DDRCLK_FROM_DDRPLL) + ddr_rate = ddr_pll / (postdiv + 1); + else + ddr_rate = cpu_pll / (postdiv + 1); + + postdiv = (clk_ctrl >> QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_SHIFT) & + QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_MASK; + + if (clk_ctrl & QCA953X_PLL_CLK_CTRL_AHB_PLL_BYPASS) + ahb_rate = ref_rate; + else if (clk_ctrl & QCA953X_PLL_CLK_CTRL_AHBCLK_FROM_DDRPLL) + ahb_rate = ddr_pll / (postdiv + 1); + else + ahb_rate = cpu_pll / (postdiv + 1); + + u_ar71xx_ddr_freq = ddr_rate; + u_ar71xx_cpu_freq = cpu_rate; + u_ar71xx_ahb_freq = ahb_rate; + + u_ar71xx_wdt_freq = ref_rate; + u_ar71xx_uart_freq = ref_rate; + u_ar71xx_mdio_freq = ref_rate; + u_ar71xx_refclk = ref_rate; +} + +static void +qca953x_chip_device_stop(uint32_t mask) +{ + uint32_t reg; + + reg = ATH_READ_REG(QCA953X_RESET_REG_RESET_MODULE); + ATH_WRITE_REG(QCA953X_RESET_REG_RESET_MODULE, reg | mask); +} + +static void +qca953x_chip_device_start(uint32_t mask) +{ + uint32_t reg; + + reg = ATH_READ_REG(QCA953X_RESET_REG_RESET_MODULE); + ATH_WRITE_REG(QCA953X_RESET_REG_RESET_MODULE, reg & ~mask); +} + +static int +qca953x_chip_device_stopped(uint32_t mask) +{ + uint32_t reg; + + reg = ATH_READ_REG(QCA953X_RESET_REG_RESET_MODULE); + return ((reg & mask) == mask); +} + +static void +qca953x_chip_set_mii_speed(uint32_t unit, uint32_t speed) +{ + + /* XXX TODO */ + return; +} + +static void +qca953x_chip_set_pll_ge(int unit, int speed, uint32_t pll) +{ + switch (unit) { + case 0: + ATH_WRITE_REG(QCA953X_PLL_ETH_XMII_CONTROL_REG, pll); + break; + case 1: + ATH_WRITE_REG(QCA953X_PLL_ETH_SGMII_CONTROL_REG, pll); + break; + default: + printf("%s: invalid PLL set for arge unit: %d\n", + __func__, unit); + return; + } +} + +static void +qca953x_chip_ddr_flush(ar71xx_flush_ddr_id_t id) +{ + + switch (id) { + case AR71XX_CPU_DDR_FLUSH_GE0: + ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_GE0); + break; + case AR71XX_CPU_DDR_FLUSH_GE1: + ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_GE1); + break; + case AR71XX_CPU_DDR_FLUSH_USB: + ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_USB); + break; + case AR71XX_CPU_DDR_FLUSH_PCIE: + ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_PCIE); + break; + case AR71XX_CPU_DDR_FLUSH_WMAC: + ar71xx_ddr_flush(QCA953X_DDR_REG_FLUSH_WMAC); + break; + default: + printf("%s: invalid flush (%d)\n", __func__, id); + } +} + +static uint32_t +qca953x_chip_get_eth_pll(unsigned int mac, int speed) +{ + uint32_t pll; + + switch (speed) { + case 10: + pll = QCA953X_PLL_VAL_10; + break; + case 100: + pll = QCA953X_PLL_VAL_100; + break; + case 1000: + pll = QCA953X_PLL_VAL_1000; + break; + default: + printf("%s%d: invalid speed %d\n", __func__, mac, speed); + pll = 0; + } + return (pll); +} + +static void +qca953x_chip_reset_ethernet_switch(void) +{ +} + +static void +qca953x_configure_gmac(uint32_t gmac_cfg) +{ + uint32_t reg; + + reg = ATH_READ_REG(QCA953X_GMAC_REG_ETH_CFG); + printf("%s: ETH_CFG=0x%08x\n", __func__, reg); + reg &= ~(QCA953X_ETH_CFG_SW_ONLY_MODE | + QCA953X_ETH_CFG_SW_PHY_SWAP | + QCA953X_ETH_CFG_SW_APB_ACCESS | + QCA953X_ETH_CFG_SW_ACC_MSB_FIRST); + + reg |= gmac_cfg; + ATH_WRITE_REG(QCA953X_GMAC_REG_ETH_CFG, reg); +} + +static void +qca953x_chip_init_usb_peripheral(void) +{ + uint32_t bootstrap; + + bootstrap = ATH_READ_REG(QCA953X_RESET_REG_BOOTSTRAP); + + ar71xx_device_stop(QCA953X_RESET_USBSUS_OVERRIDE); + DELAY(1000); + + ar71xx_device_start(QCA953X_RESET_USB_PHY); + DELAY(1000); + + ar71xx_device_start(QCA953X_RESET_USB_PHY_ANALOG); + DELAY(1000); + + ar71xx_device_start(QCA953X_RESET_USB_HOST); + DELAY(1000); +} + +static void +qca953x_chip_set_mii_if(uint32_t unit, uint32_t mii_mode) +{ + + /* + * XXX ! + * + * Nothing to see here; although gmac0 can have its + * MII configuration changed, the register values + * are slightly different. + */ +} + +/* + * XXX TODO: fetch default MII divider configuration + */ + +static void +qca953x_chip_reset_wmac(void) +{ + + /* XXX TODO */ +} + +static void +qca953x_chip_init_gmac(void) +{ + long gmac_cfg; + + if (resource_long_value("qca953x_gmac", 0, "gmac_cfg", + &gmac_cfg) == 0) { + printf("%s: gmac_cfg=0x%08lx\n", + __func__, + (long) gmac_cfg); + qca953x_configure_gmac((uint32_t) gmac_cfg); + } +} + +/* + * Reset the NAND Flash Controller. + * + * + active=1 means "make it active". + * + active=0 means "make it inactive". + */ +static void +qca953x_chip_reset_nfc(int active) +{ +} + +/* + * Configure the GPIO output mux setup. + * + * The QCA953x has an output mux which allowed + * certain functions to be configured on any pin. + * Specifically, the switch PHY link LEDs and + * WMAC external RX LNA switches are not limited to + * a specific GPIO pin. + */ +static void +qca953x_chip_gpio_output_configure(int gpio, uint8_t func) +{ + uint32_t reg, s; + uint32_t t; + + if (gpio > QCA953X_GPIO_COUNT) + return; + + reg = QCA953X_GPIO_REG_OUT_FUNC0 + 4 * (gpio / 4); + s = 8 * (gpio % 4); + + /* read-modify-write */ + t = ATH_READ_REG(AR71XX_GPIO_BASE + reg); + t &= ~(0xff << s); + t |= func << s; + ATH_WRITE_REG(AR71XX_GPIO_BASE + reg, t); + + /* flush write */ + ATH_READ_REG(AR71XX_GPIO_BASE + reg); +} + +struct ar71xx_cpu_def qca953x_chip_def = { + &qca953x_chip_detect_mem_size, + &qca953x_chip_detect_sys_frequency, + &qca953x_chip_device_stop, + &qca953x_chip_device_start, + &qca953x_chip_device_stopped, + &qca953x_chip_set_pll_ge, + &qca953x_chip_set_mii_speed, + &qca953x_chip_set_mii_if, + &qca953x_chip_get_eth_pll, + &qca953x_chip_ddr_flush, + &qca953x_chip_init_usb_peripheral, + &qca953x_chip_reset_ethernet_switch, + &qca953x_chip_reset_wmac, + &qca953x_chip_init_gmac, + &qca953x_chip_reset_nfc, + &qca953x_chip_gpio_output_configure, +}; Property changes on: projects/powernv/mips/atheros/qca953x_chip.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/powernv/mips/atheros/qca953x_chip.h =================================================================== --- projects/powernv/mips/atheros/qca953x_chip.h (nonexistent) +++ projects/powernv/mips/atheros/qca953x_chip.h (revision 290991) @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2015 Adrian Chadd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* $FreeBSD$ */ + +#ifndef __QCA953X_CHIP_H__ +#define __QCA953X_CHIP_H__ + +extern struct ar71xx_cpu_def qca953x_chip_def; + +#endif Property changes on: projects/powernv/mips/atheros/qca953x_chip.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/powernv/mips/atheros/qca953xreg.h =================================================================== --- projects/powernv/mips/atheros/qca953xreg.h (nonexistent) +++ projects/powernv/mips/atheros/qca953xreg.h (revision 290991) @@ -0,0 +1,195 @@ +/*- + * Copyright (c) 2015 Adrian Chadd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef __QCA953XREG_H__ +#define __QCA953XREG_H__ + +#define BIT(x) (1 << (x)) + +/* Revision ID information */ +#define REV_ID_MAJOR_QCA9533 0x0140 +#define REV_ID_MAJOR_QCA9533_V2 0x0160 +#define QCA953X_REV_ID_REVISION_MASK 0xf + +/* Big enough to cover APB and SPI, and most peripherals */ +/* + * it needs to cover SPI because right now the if_ath_ahb + * code uses rman to map in the SPI address into memory + * to read data instead of us squirreling it away at early + * boot-time and using the firmware interface. + * + * if_ath_ahb.c should use the same firmware interface + * that if_ath_pci.c uses. + */ +#define QCA953X_GMAC_BASE (AR71XX_APB_BASE + 0x00070000) +#define QCA953X_GMAC_SIZE 0x14 +#define QCA953X_WMAC_BASE (AR71XX_APB_BASE + 0x00100000) +#define QCA953X_WMAC_SIZE 0x20000 +#define QCA953X_EHCI_BASE 0x1b000000 +#define QCA953X_EHCI_SIZE 0x200 +#define QCA953X_SRIF_BASE (AR71XX_APB_BASE + 0x00116000) +#define QCA953X_SRIF_SIZE 0x1000 + +#define QCA953X_PCI_CFG_BASE0 0x14000000 +#define QCA953X_PCI_CTRL_BASE0 (AR71XX_APB_BASE + 0x000f0000) +#define QCA953X_PCI_CRP_BASE0 (AR71XX_APB_BASE + 0x000c0000) +#define QCA953X_PCI_MEM_BASE0 0x10000000 +#define QCA953X_PCI_MEM_SIZE 0x02000000 + +/* PLL Block */ +#define QCA953X_PLL_CPU_CONFIG_REG (AR71XX_PLL_CPU_BASE + 0x00) +#define QCA953X_PLL_DDR_CONFIG_REG (AR71XX_PLL_CPU_BASE + 0x04) +#define QCA953X_PLL_CLK_CTRL_REG (AR71XX_PLL_CPU_BASE + 0x08) + +#define QCA953X_PLL_ETH_XMII_CONTROL_REG (AR71XX_PLL_CPU_BASE + 0x2c) +#define QCA953X_PLL_ETH_SGMII_CONTROL_REG (AR71XX_PLL_CPU_BASE + 0x48) + +#define QCA953X_PLL_CPU_CONFIG_NFRAC_SHIFT 0 +#define QCA953X_PLL_CPU_CONFIG_NFRAC_MASK 0x3f +#define QCA953X_PLL_CPU_CONFIG_NINT_SHIFT 6 +#define QCA953X_PLL_CPU_CONFIG_NINT_MASK 0x3f +#define QCA953X_PLL_CPU_CONFIG_REFDIV_SHIFT 12 +#define QCA953X_PLL_CPU_CONFIG_REFDIV_MASK 0x1f +#define QCA953X_PLL_CPU_CONFIG_OUTDIV_SHIFT 19 +#define QCA953X_PLL_CPU_CONFIG_OUTDIV_MASK 0x3 + +#define QCA953X_PLL_DDR_CONFIG_NFRAC_SHIFT 0 +#define QCA953X_PLL_DDR_CONFIG_NFRAC_MASK 0x3ff +#define QCA953X_PLL_DDR_CONFIG_NINT_SHIFT 10 +#define QCA953X_PLL_DDR_CONFIG_NINT_MASK 0x3f +#define QCA953X_PLL_DDR_CONFIG_REFDIV_SHIFT 16 +#define QCA953X_PLL_DDR_CONFIG_REFDIV_MASK 0x1f +#define QCA953X_PLL_DDR_CONFIG_OUTDIV_SHIFT 23 +#define QCA953X_PLL_DDR_CONFIG_OUTDIV_MASK 0x7 + +#define QCA953X_PLL_CLK_CTRL_CPU_PLL_BYPASS BIT(2) +#define QCA953X_PLL_CLK_CTRL_DDR_PLL_BYPASS BIT(3) +#define QCA953X_PLL_CLK_CTRL_AHB_PLL_BYPASS BIT(4) +#define QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_SHIFT 5 +#define QCA953X_PLL_CLK_CTRL_CPU_POST_DIV_MASK 0x1f +#define QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_SHIFT 10 +#define QCA953X_PLL_CLK_CTRL_DDR_POST_DIV_MASK 0x1f +#define QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_SHIFT 15 +#define QCA953X_PLL_CLK_CTRL_AHB_POST_DIV_MASK 0x1f +#define QCA953X_PLL_CLK_CTRL_CPUCLK_FROM_CPUPLL BIT(20) +#define QCA953X_PLL_CLK_CTRL_DDRCLK_FROM_DDRPLL BIT(21) +#define QCA953X_PLL_CLK_CTRL_AHBCLK_FROM_DDRPLL BIT(24) + +#define QCA953X_PLL_VAL_1000 0x16000000 +#define QCA953X_PLL_VAL_100 0x00000101 +#define QCA953X_PLL_VAL_10 0x00001616 + +/* Reset block */ + +#define QCA953X_RESET_REG_RESET_MODULE (AR71XX_RST_BLOCK_BASE + 0x1c) +#define QCA953X_RESET_USB_EXT_PWR BIT(29) +#define QCA953X_RESET_EXTERNAL BIT(28) +#define QCA953X_RESET_RTC BIT(27) +#define QCA953X_RESET_FULL_CHIP BIT(24) +#define QCA953X_RESET_GE1_MDIO BIT(23) +#define QCA953X_RESET_GE0_MDIO BIT(22) +#define QCA953X_RESET_CPU_NMI BIT(21) +#define QCA953X_RESET_CPU_COLD BIT(20) +#define QCA953X_RESET_DDR BIT(16) +#define QCA953X_RESET_USB_PHY_PLL_PWD_EXT BIT(15) +#define QCA953X_RESET_GE1_MAC BIT(13) +#define QCA953X_RESET_ETH_SWITCH_ANALOG BIT(12) +#define QCA953X_RESET_USB_PHY_ANALOG BIT(11) +#define QCA953X_RESET_GE0_MAC BIT(9) +#define QCA953X_RESET_ETH_SWITCH BIT(8) +#define QCA953X_RESET_PCIE_PHY BIT(7) +#define QCA953X_RESET_PCIE BIT(6) +#define QCA953X_RESET_USB_HOST BIT(5) +#define QCA953X_RESET_USB_PHY BIT(4) +#define QCA953X_RESET_USBSUS_OVERRIDE BIT(3) + +#define QCA953X_RESET_REG_BOOTSTRAP (AR71XX_RST_BLOCK_BASE + 0xb0) +#define QCA953X_BOOTSTRAP_SW_OPTION2 BIT(12) +#define QCA953X_BOOTSTRAP_SW_OPTION1 BIT(11) +#define QCA953X_BOOTSTRAP_EJTAG_MODE BIT(5) +#define QCA953X_BOOTSTRAP_REF_CLK_40 BIT(4) +#define QCA953X_BOOTSTRAP_SDRAM_DISABLED BIT(1) +#define QCA953X_BOOTSTRAP_DDR1 BIT(0) + +#define QCA953X_RESET_REG_EXT_INT_STATUS (AR71XX_RST_BLOCK_BASE + 0xac) + +#define QCA953X_DDR_REG_FLUSH_GE0 (AR71XX_APB_BASE + 0x9c) +#define QCA953X_DDR_REG_FLUSH_GE1 (AR71XX_APB_BASE + 0xa0) +#define QCA953X_DDR_REG_FLUSH_USB (AR71XX_APB_BASE + 0xa4) +#define QCA953X_DDR_REG_FLUSH_PCIE (AR71XX_APB_BASE + 0xa8) +#define QCA953X_DDR_REG_FLUSH_WMAC (AR71XX_APB_BASE + 0xac) + +/* GPIO block */ +#define QCA953X_GPIO_REG_OUT_FUNC0 0x2c +#define QCA953X_GPIO_REG_OUT_FUNC1 0x30 +#define QCA953X_GPIO_REG_OUT_FUNC2 0x34 +#define QCA953X_GPIO_REG_OUT_FUNC3 0x38 +#define QCA953X_GPIO_REG_OUT_FUNC4 0x3c +#define QCA953X_GPIO_REG_IN_ENABLE0 0x44 +#define QCA953X_GPIO_REG_FUNC 0x6c + +#define QCA953X_GPIO_OUT_MUX_SPI_CS1 10 +#define QCA953X_GPIO_OUT_MUX_SPI_CS2 11 +#define QCA953X_GPIO_OUT_MUX_SPI_CS0 9 +#define QCA953X_GPIO_OUT_MUX_SPI_CLK 8 +#define QCA953X_GPIO_OUT_MUX_SPI_MOSI 12 +#define QCA953X_GPIO_OUT_MUX_LED_LINK1 41 +#define QCA953X_GPIO_OUT_MUX_LED_LINK2 42 +#define QCA953X_GPIO_OUT_MUX_LED_LINK3 43 +#define QCA953X_GPIO_OUT_MUX_LED_LINK4 44 +#define QCA953X_GPIO_OUT_MUX_LED_LINK5 45 + +#define QCA953X_GPIO_COUNT 18 + +/* GMAC block */ +#define QCA953X_GMAC_REG_ETH_CFG (QCA953X_GMAC_BASE + 0x00) + +#define QCA953X_ETH_CFG_SW_ONLY_MODE BIT(6) +#define QCA953X_ETH_CFG_SW_PHY_SWAP BIT(7) +#define QCA953X_ETH_CFG_SW_APB_ACCESS BIT(9) +#define QCA953X_ETH_CFG_SW_ACC_MSB_FIRST BIT(13) + +/* SRIF block */ +#define QCA953X_SRIF_CPU_DPLL1_REG 0x1c0 +#define QCA953X_SRIF_CPU_DPLL2_REG 0x1c4 +#define QCA953X_SRIF_CPU_DPLL3_REG 0x1c8 + +#define QCA953X_SRIF_DDR_DPLL1_REG 0x240 +#define QCA953X_SRIF_DDR_DPLL2_REG 0x244 +#define QCA953X_SRIF_DDR_DPLL3_REG 0x248 + +#define QCA953X_SRIF_DPLL1_REFDIV_SHIFT 27 +#define QCA953X_SRIF_DPLL1_REFDIV_MASK 0x1f +#define QCA953X_SRIF_DPLL1_NINT_SHIFT 18 +#define QCA953X_SRIF_DPLL1_NINT_MASK 0x1ff +#define QCA953X_SRIF_DPLL1_NFRAC_MASK 0x0003ffff + +#define QCA953X_SRIF_DPLL2_LOCAL_PLL BIT(30) +#define QCA953X_SRIF_DPLL2_OUTDIV_SHIFT 13 +#define QCA953X_SRIF_DPLL2_OUTDIV_MASK 0x7 + +#endif /* __QCA953XREG_H__ */ Property changes on: projects/powernv/mips/atheros/qca953xreg.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/powernv/net/pfkeyv2.h =================================================================== --- projects/powernv/net/pfkeyv2.h (revision 290990) +++ projects/powernv/net/pfkeyv2.h (revision 290991) @@ -1,441 +1,441 @@ /* $FreeBSD$ */ /* $KAME: pfkeyv2.h,v 1.37 2003/09/06 05:15:43 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file has been derived rfc 2367, * And added some flags of SADB_KEY_FLAGS_ as SADB_X_EXT_. * sakane@ydc.co.jp */ #ifndef _NET_PFKEYV2_H_ #define _NET_PFKEYV2_H_ /* This file defines structures and symbols for the PF_KEY Version 2 key management interface. It was written at the U.S. Naval Research Laboratory. This file is in the public domain. The authors ask that you leave this credit intact on any copies of this file. */ #ifndef __PFKEY_V2_H #define __PFKEY_V2_H 1 #define PF_KEY_V2 2 #define PFKEYV2_REVISION 199806L #define SADB_RESERVED 0 #define SADB_GETSPI 1 #define SADB_UPDATE 2 #define SADB_ADD 3 #define SADB_DELETE 4 #define SADB_GET 5 #define SADB_ACQUIRE 6 #define SADB_REGISTER 7 #define SADB_EXPIRE 8 #define SADB_FLUSH 9 #define SADB_DUMP 10 #define SADB_X_PROMISC 11 #define SADB_X_PCHANGE 12 #define SADB_X_SPDUPDATE 13 #define SADB_X_SPDADD 14 #define SADB_X_SPDDELETE 15 /* by policy index */ #define SADB_X_SPDGET 16 #define SADB_X_SPDACQUIRE 17 #define SADB_X_SPDDUMP 18 #define SADB_X_SPDFLUSH 19 #define SADB_X_SPDSETIDX 20 #define SADB_X_SPDEXPIRE 21 #define SADB_X_SPDDELETE2 22 /* by policy id */ #define SADB_MAX 22 struct sadb_msg { u_int8_t sadb_msg_version; u_int8_t sadb_msg_type; u_int8_t sadb_msg_errno; u_int8_t sadb_msg_satype; u_int16_t sadb_msg_len; u_int16_t sadb_msg_reserved; u_int32_t sadb_msg_seq; u_int32_t sadb_msg_pid; }; struct sadb_ext { u_int16_t sadb_ext_len; u_int16_t sadb_ext_type; }; struct sadb_sa { u_int16_t sadb_sa_len; u_int16_t sadb_sa_exttype; u_int32_t sadb_sa_spi; u_int8_t sadb_sa_replay; u_int8_t sadb_sa_state; u_int8_t sadb_sa_auth; u_int8_t sadb_sa_encrypt; u_int32_t sadb_sa_flags; }; struct sadb_lifetime { u_int16_t sadb_lifetime_len; u_int16_t sadb_lifetime_exttype; u_int32_t sadb_lifetime_allocations; u_int64_t sadb_lifetime_bytes; u_int64_t sadb_lifetime_addtime; u_int64_t sadb_lifetime_usetime; }; struct sadb_address { u_int16_t sadb_address_len; u_int16_t sadb_address_exttype; u_int8_t sadb_address_proto; u_int8_t sadb_address_prefixlen; u_int16_t sadb_address_reserved; }; struct sadb_key { u_int16_t sadb_key_len; u_int16_t sadb_key_exttype; u_int16_t sadb_key_bits; u_int16_t sadb_key_reserved; }; struct sadb_ident { u_int16_t sadb_ident_len; u_int16_t sadb_ident_exttype; u_int16_t sadb_ident_type; u_int16_t sadb_ident_reserved; u_int64_t sadb_ident_id; }; struct sadb_sens { u_int16_t sadb_sens_len; u_int16_t sadb_sens_exttype; u_int32_t sadb_sens_dpd; u_int8_t sadb_sens_sens_level; u_int8_t sadb_sens_sens_len; u_int8_t sadb_sens_integ_level; u_int8_t sadb_sens_integ_len; u_int32_t sadb_sens_reserved; }; struct sadb_prop { u_int16_t sadb_prop_len; u_int16_t sadb_prop_exttype; u_int8_t sadb_prop_replay; u_int8_t sadb_prop_reserved[3]; }; struct sadb_comb { u_int8_t sadb_comb_auth; u_int8_t sadb_comb_encrypt; u_int16_t sadb_comb_flags; u_int16_t sadb_comb_auth_minbits; u_int16_t sadb_comb_auth_maxbits; u_int16_t sadb_comb_encrypt_minbits; u_int16_t sadb_comb_encrypt_maxbits; u_int32_t sadb_comb_reserved; u_int32_t sadb_comb_soft_allocations; u_int32_t sadb_comb_hard_allocations; u_int64_t sadb_comb_soft_bytes; u_int64_t sadb_comb_hard_bytes; u_int64_t sadb_comb_soft_addtime; u_int64_t sadb_comb_hard_addtime; u_int64_t sadb_comb_soft_usetime; u_int64_t sadb_comb_hard_usetime; }; struct sadb_supported { u_int16_t sadb_supported_len; u_int16_t sadb_supported_exttype; u_int32_t sadb_supported_reserved; }; struct sadb_alg { u_int8_t sadb_alg_id; u_int8_t sadb_alg_ivlen; u_int16_t sadb_alg_minbits; u_int16_t sadb_alg_maxbits; u_int16_t sadb_alg_reserved; }; struct sadb_spirange { u_int16_t sadb_spirange_len; u_int16_t sadb_spirange_exttype; u_int32_t sadb_spirange_min; u_int32_t sadb_spirange_max; u_int32_t sadb_spirange_reserved; }; struct sadb_x_kmprivate { u_int16_t sadb_x_kmprivate_len; u_int16_t sadb_x_kmprivate_exttype; u_int32_t sadb_x_kmprivate_reserved; }; /* * XXX Additional SA Extension. * mode: tunnel or transport * reqid: to make SA unique nevertheless the address pair of SA are same. * Mainly it's for VPN. */ struct sadb_x_sa2 { u_int16_t sadb_x_sa2_len; u_int16_t sadb_x_sa2_exttype; u_int8_t sadb_x_sa2_mode; u_int8_t sadb_x_sa2_reserved1; u_int16_t sadb_x_sa2_reserved2; u_int32_t sadb_x_sa2_sequence; /* lowermost 32bit of sequence number */ u_int32_t sadb_x_sa2_reqid; }; /* XXX Policy Extension */ struct sadb_x_policy { u_int16_t sadb_x_policy_len; u_int16_t sadb_x_policy_exttype; u_int16_t sadb_x_policy_type; /* See policy type of ipsec.h */ u_int8_t sadb_x_policy_dir; /* direction, see ipsec.h */ u_int8_t sadb_x_policy_reserved; u_int32_t sadb_x_policy_id; - u_int32_t sadb_x_policy_reserved2; + u_int32_t sadb_x_policy_priority; }; _Static_assert(sizeof(struct sadb_x_policy) == 16, "struct size mismatch"); /* * When policy_type == IPSEC, it is followed by some of * the ipsec policy request. * [total length of ipsec policy requests] * = (sadb_x_policy_len * sizeof(uint64_t) - sizeof(struct sadb_x_policy)) */ /* XXX IPsec Policy Request Extension */ /* * This structure is aligned 8 bytes. */ struct sadb_x_ipsecrequest { u_int16_t sadb_x_ipsecrequest_len; /* structure length in 64 bits. */ u_int16_t sadb_x_ipsecrequest_proto; /* See ipsec.h */ u_int8_t sadb_x_ipsecrequest_mode; /* See IPSEC_MODE_XX in ipsec.h. */ u_int8_t sadb_x_ipsecrequest_level; /* See IPSEC_LEVEL_XX in ipsec.h */ u_int16_t sadb_x_ipsecrequest_reqid; /* See ipsec.h */ /* * followed by source IP address of SA, and immediately followed by * destination IP address of SA. These encoded into two of sockaddr * structure without any padding. Must set each sa_len exactly. * Each of length of the sockaddr structure are not aligned to 64bits, * but sum of x_request and addresses is aligned to 64bits. */ }; /* NAT-Traversal type, see RFC 3948 (and drafts). */ struct sadb_x_nat_t_type { u_int16_t sadb_x_nat_t_type_len; u_int16_t sadb_x_nat_t_type_exttype; u_int8_t sadb_x_nat_t_type_type; u_int8_t sadb_x_nat_t_type_reserved[3]; }; _Static_assert(sizeof(struct sadb_x_nat_t_type) == 8, "struct size mismatch"); /* NAT-Traversal source or destination port. */ struct sadb_x_nat_t_port { u_int16_t sadb_x_nat_t_port_len; u_int16_t sadb_x_nat_t_port_exttype; u_int16_t sadb_x_nat_t_port_port; u_int16_t sadb_x_nat_t_port_reserved; }; _Static_assert(sizeof(struct sadb_x_nat_t_port) == 8, "struct size mismatch"); /* ESP fragmentation size. */ struct sadb_x_nat_t_frag { u_int16_t sadb_x_nat_t_frag_len; u_int16_t sadb_x_nat_t_frag_exttype; u_int16_t sadb_x_nat_t_frag_fraglen; u_int16_t sadb_x_nat_t_frag_reserved; }; _Static_assert(sizeof(struct sadb_x_nat_t_frag) == 8, "struct size mismatch"); #define SADB_EXT_RESERVED 0 #define SADB_EXT_SA 1 #define SADB_EXT_LIFETIME_CURRENT 2 #define SADB_EXT_LIFETIME_HARD 3 #define SADB_EXT_LIFETIME_SOFT 4 #define SADB_EXT_ADDRESS_SRC 5 #define SADB_EXT_ADDRESS_DST 6 #define SADB_EXT_ADDRESS_PROXY 7 #define SADB_EXT_KEY_AUTH 8 #define SADB_EXT_KEY_ENCRYPT 9 #define SADB_EXT_IDENTITY_SRC 10 #define SADB_EXT_IDENTITY_DST 11 #define SADB_EXT_SENSITIVITY 12 #define SADB_EXT_PROPOSAL 13 #define SADB_EXT_SUPPORTED_AUTH 14 #define SADB_EXT_SUPPORTED_ENCRYPT 15 #define SADB_EXT_SPIRANGE 16 #define SADB_X_EXT_KMPRIVATE 17 #define SADB_X_EXT_POLICY 18 #define SADB_X_EXT_SA2 19 #define SADB_X_EXT_NAT_T_TYPE 20 #define SADB_X_EXT_NAT_T_SPORT 21 #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 /* Deprecated. */ #define SADB_X_EXT_NAT_T_OAI 23 /* Peer's NAT_OA for src of SA. */ #define SADB_X_EXT_NAT_T_OAR 24 /* Peer's NAT_OA for dst of SA. */ #define SADB_X_EXT_NAT_T_FRAG 25 /* Manual MTU override. */ #define SADB_EXT_MAX 25 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 #define SADB_SATYPE_ESP 3 #define SADB_SATYPE_RSVP 5 #define SADB_SATYPE_OSPFV2 6 #define SADB_SATYPE_RIPV2 7 #define SADB_SATYPE_MIP 8 #define SADB_X_SATYPE_IPCOMP 9 /*#define SADB_X_SATYPE_POLICY 10 obsolete, do not reuse */ #define SADB_X_SATYPE_TCPSIGNATURE 11 #define SADB_SATYPE_MAX 12 #define SADB_SASTATE_LARVAL 0 #define SADB_SASTATE_MATURE 1 #define SADB_SASTATE_DYING 2 #define SADB_SASTATE_DEAD 3 #define SADB_SASTATE_MAX 3 #define SADB_SAFLAGS_PFS 1 /* * Though some of these numbers (both _AALG and _EALG) appear to be * IKEv2 numbers and others original IKE numbers, they have no meaning. * These are constants that the various IKE daemons use to tell the kernel * what cipher to use. * * Do not use these constants directly to decide which Transformation ID * to send. You are responsible for mapping them yourself. */ #define SADB_AALG_NONE 0 #define SADB_AALG_MD5HMAC 2 #define SADB_AALG_SHA1HMAC 3 #define SADB_AALG_MAX 252 #define SADB_X_AALG_SHA2_256 5 #define SADB_X_AALG_SHA2_384 6 #define SADB_X_AALG_SHA2_512 7 #define SADB_X_AALG_RIPEMD160HMAC 8 #define SADB_X_AALG_AES_XCBC_MAC 9 /* RFC3566 */ #define SADB_X_AALG_AES128GMAC 11 /* RFC4543 + Errata1821 */ #define SADB_X_AALG_AES192GMAC 12 #define SADB_X_AALG_AES256GMAC 13 #define SADB_X_AALG_MD5 249 /* Keyed MD5 */ #define SADB_X_AALG_SHA 250 /* Keyed SHA */ #define SADB_X_AALG_NULL 251 /* null authentication */ #define SADB_X_AALG_TCP_MD5 252 /* Keyed TCP-MD5 (RFC2385) */ #define SADB_EALG_NONE 0 #define SADB_EALG_DESCBC 2 #define SADB_EALG_3DESCBC 3 #define SADB_X_EALG_CAST128CBC 6 #define SADB_X_EALG_BLOWFISHCBC 7 #define SADB_EALG_NULL 11 #define SADB_X_EALG_RIJNDAELCBC 12 #define SADB_X_EALG_AES 12 #define SADB_X_EALG_AESCTR 13 #define SADB_X_EALG_AESGCM8 18 /* RFC4106 */ #define SADB_X_EALG_AESGCM12 19 #define SADB_X_EALG_AESGCM16 20 #define SADB_X_EALG_CAMELLIACBC 22 #define SADB_X_EALG_AESGMAC 23 /* RFC4543 + Errata1821 */ #define SADB_EALG_MAX 23 /* !!! keep updated !!! */ /* private allocations - based on RFC2407/IANA assignment */ #define SADB_X_CALG_NONE 0 #define SADB_X_CALG_OUI 1 #define SADB_X_CALG_DEFLATE 2 #define SADB_X_CALG_LZS 3 #define SADB_X_CALG_MAX 4 #define SADB_IDENTTYPE_RESERVED 0 #define SADB_IDENTTYPE_PREFIX 1 #define SADB_IDENTTYPE_FQDN 2 #define SADB_IDENTTYPE_USERFQDN 3 #define SADB_X_IDENTTYPE_ADDR 4 #define SADB_IDENTTYPE_MAX 4 /* `flags' in sadb_sa structure holds followings */ #define SADB_X_EXT_NONE 0x0000 /* i.e. new format. */ #define SADB_X_EXT_OLD 0x0001 /* old format. */ #define SADB_X_EXT_IV4B 0x0010 /* IV length of 4 bytes in use */ #define SADB_X_EXT_DERIV 0x0020 /* DES derived */ #define SADB_X_EXT_CYCSEQ 0x0040 /* allowing to cyclic sequence. */ /* three of followings are exclusive flags each them */ #define SADB_X_EXT_PSEQ 0x0000 /* sequencial padding for ESP */ #define SADB_X_EXT_PRAND 0x0100 /* random padding for ESP */ #define SADB_X_EXT_PZERO 0x0200 /* zero padding for ESP */ #define SADB_X_EXT_PMASK 0x0300 /* mask for padding flag */ #if 1 #define SADB_X_EXT_RAWCPI 0x0080 /* use well known CPI (IPComp) */ #endif #define SADB_KEY_FLAGS_MAX 0x0fff /* SPI size for PF_KEYv2 */ #define PFKEY_SPI_SIZE sizeof(u_int32_t) /* Identifier for menber of lifetime structure */ #define SADB_X_LIFETIME_ALLOCATIONS 0 #define SADB_X_LIFETIME_BYTES 1 #define SADB_X_LIFETIME_ADDTIME 2 #define SADB_X_LIFETIME_USETIME 3 /* The rate for SOFT lifetime against HARD one. */ #define PFKEY_SOFT_LIFETIME_RATE 80 /* Utilities */ #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) #define PFKEY_EXTLEN(msg) \ PFKEY_UNUNIT64(((struct sadb_ext *)(msg))->sadb_ext_len) #define PFKEY_ADDR_PREFIX(ext) \ (((struct sadb_address *)(ext))->sadb_address_prefixlen) #define PFKEY_ADDR_PROTO(ext) \ (((struct sadb_address *)(ext))->sadb_address_proto) #define PFKEY_ADDR_SADDR(ext) \ ((struct sockaddr *)((caddr_t)(ext) + sizeof(struct sadb_address))) /* in 64bits */ #define PFKEY_UNUNIT64(a) ((a) << 3) #define PFKEY_UNIT64(a) ((a) >> 3) #endif /* __PFKEY_V2_H */ #endif /* _NET_PFKEYV2_H_ */ Index: projects/powernv/netinet6/ip6_forward.c =================================================================== --- projects/powernv/netinet6/ip6_forward.c (revision 290990) +++ projects/powernv/netinet6/ip6_forward.c (revision 290991) @@ -1,627 +1,627 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_ipstealth.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #include #include #endif /* IPSEC */ /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful * icmp message because icmp doesn't have a large enough repertoire * of codes and types. * * If not forwarding, just drop the packet. This could be confusing * if ipforwarding was zero but some routing protocol was advancing * us as a gateway to somewhere. However, we must let the routing * protocol deal with that. * */ void ip6_forward(struct mbuf *m, int srcrt) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct sockaddr_in6 *dst = NULL; struct rtentry *rt = NULL; struct route_in6 rin6; int error, type = 0, code = 0; struct mbuf *mcopy = NULL; struct ifnet *origifp; /* maybe unnecessary */ u_int32_t inzone, outzone; struct in6_addr src_in6, dst_in6, odst; #ifdef IPSEC struct secpolicy *sp = NULL; #endif #ifdef SCTP int sw_csum; #endif struct m_tag *fwd_tag; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; /* * Do not forward packets to multicast destination (should be handled * by ip6_mforward(). * Do not forward packets with unspecified source. It was discussed * in July 2000, on the ipngwg mailing list. */ if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { IP6STAT_INC(ip6s_cantforward); /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { V_ip6_log_time = time_uptime; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif)); } m_freem(m); return; } #ifdef IPSEC /* * Check if this packet has an active SA and needs to be dropped * instead of forwarded. */ if (ip6_ipsec_fwd(m) != 0) { IP6STAT_INC(ip6s_cantforward); m_freem(m); return; } #endif /* IPSEC */ #ifdef IPSTEALTH if (!V_ip6stealth) { #endif if (ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ icmp6_error(m, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT, 0); return; } ip6->ip6_hlim -= IPV6_HLIMDEC; #ifdef IPSTEALTH } #endif /* * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU - * size of IPv6 + ICMPv6 headers) bytes of the packet in case * we need to generate an ICMP6 message to the src. * Thanks to M_EXT, in most cases copy will not occur. * * It is important to save it before IPsec processing as IPsec * processing may modify the mbuf. */ mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN)); #ifdef IPSEC /* get a security policy for this packet */ sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error); if (sp == NULL) { IPSEC6STAT_INC(ips_out_inval); IP6STAT_INC(ip6s_cantforward); if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } m_freem(m); return; } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: /* * This packet is just discarded. */ IPSEC6STAT_INC(ips_out_polvio); IP6STAT_INC(ip6s_cantforward); KEY_FREESP(&sp); if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } m_freem(m); return; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ KEY_FREESP(&sp); goto skip_ipsec; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* XXX should be panic ? */ printf("ip6_forward: No IPsec request specified.\n"); IP6STAT_INC(ip6s_cantforward); KEY_FREESP(&sp); if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } m_freem(m); return; } /* do IPsec */ break; case IPSEC_POLICY_ENTRUST: default: /* should be panic ?? */ printf("ip6_forward: Invalid policy found. %d\n", sp->policy); KEY_FREESP(&sp); goto skip_ipsec; } { struct ipsecrequest *isr = NULL; /* * when the kernel forwards a packet, it is not proper to apply * IPsec transport mode to the packet. This check avoid from this. * at present, if there is even a transport mode SA request in the * security policy, the kernel does not apply IPsec to the packet. * this check is not enough because the following case is valid. * ipsec esp/tunnel/xxx-xxx/require esp/transport//require; */ for (isr = sp->req; isr; isr = isr->next) { if (isr->saidx.mode == IPSEC_MODE_ANY) goto doipsectunnel; if (isr->saidx.mode == IPSEC_MODE_TUNNEL) goto doipsectunnel; } /* * if there's no need for tunnel mode IPsec, skip. */ if (!isr) goto skip_ipsec; doipsectunnel: /* * All the extension headers will become inaccessible * (since they can be encrypted). * Don't panic, we need no more updates to extension headers * on inner IPv6 packet (since they are now encapsulated). * * IPv6 [ESP|AH] IPv6 [extension headers] payload */ /* * If we need to encapsulate the packet, do it here * ipsec6_proces_packet will send the packet using ip6_output */ error = ipsec6_process_packet(m, sp->req); /* Release SP if an error occured */ if (error != 0) KEY_FREESP(&sp); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We * will just continue to process the packet without * IPsec processing. */ error = 0; goto skip_ipsec; } if (error) { /* mbuf is already reclaimed in ipsec6_process_packet. */ switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /* FALLTHROUGH */ case ENOENT: /* don't show these error codes to the user */ break; } IP6STAT_INC(ip6s_cantforward); if (mcopy) { #if 0 /* XXX: what icmp ? */ #else m_freem(mcopy); #endif } return; } else { /* * In the FAST IPSec case we have already * re-injected the packet and it has been freed * by the ipsec_done() function. So, just clean * up after ourselves. */ m = NULL; goto freecopy; } } skip_ipsec: #endif again: bzero(&rin6, sizeof(struct route_in6)); dst = (struct sockaddr_in6 *)&rin6.ro_dst; dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; again2: rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); if (rin6.ro_rt != NULL) RT_UNLOCK(rin6.ro_rt); else { IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) { icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); } goto bad; } rt = rin6.ro_rt; /* * Source scope check: if a packet can't be delivered to its * destination for the reason that the destination is beyond the scope * of the source address, discard the packet and return an icmp6 * destination unreachable error with Code 2 (beyond scope of source * address). We use a local copy of ip6_src, since in6_setscope() * will possibly modify its first argument. * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1] */ src_in6 = ip6->ip6_src; if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) { /* XXX: this should not happen */ IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); goto bad; } if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); goto bad; } if (inzone != outzone) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { V_ip6_log_time = time_uptime; log(LOG_DEBUG, "cannot forward " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } if (mcopy) icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_BEYONDSCOPE, 0); goto bad; } /* * Destination scope check: if a packet is going to break the scope * zone of packet's destination address, discard it. This case should * usually be prevented by appropriately-configured routing table, but * we need an explicit check because we may mistakenly forward the * packet to a different zone by (e.g.) a default route. */ dst_in6 = ip6->ip6_dst; if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 || in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 || inzone != outzone) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); goto bad; } if (rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)rt->rt_gateway; /* * If we are to forward the packet using the same interface * as one we got the packet from, perhaps we should send a redirect * to sender to shortcut a hop. * Only send redirect if source is sending directly to us, * and if packet was not source routed (or has any options). * Also, don't send redirect if forwarding using a route * modified by a redirect. */ if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) { if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) { /* * If the incoming interface is equal to the outgoing * one, and the link attached to the interface is * point-to-point, then it will be highly probable * that a routing loop occurs. Thus, we immediately * drop the packet and send an ICMPv6 error message. * * type/code is based on suggestion by Rich Draves. * not sure if it is the best pick. */ icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); goto bad; } type = ND_REDIRECT; } /* * Fake scoped addresses. Note that even link-local source or * destinaion can appear, if the originating node just sends the * packet to us (without address resolution for the destination). * Since both icmp6_error and icmp6_redirect_output fill the embedded * link identifiers, we can do this stuff after making a copy for * returning an error. */ if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { /* * See corresponding comments in ip6_output. * XXX: but is it possible that ip6_forward() sends a packet * to a loopback interface? I don't think so, and thus * I bark here. (jinmei@kame.net) * XXX: it is common to route invalid packets to loopback. * also, the codepath will be visited on use of ::1 in * rthdr. (itojun) */ #if 1 if (0) #else if ((rt->rt_flags & (RTF_BLACKHOLE|RTF_REJECT)) == 0) #endif { printf("ip6_forward: outgoing interface is loopback. " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } /* we can just use rcvif in forwarding. */ origifp = m->m_pkthdr.rcvif; } else origifp = rt->rt_ifp; /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto pass; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, NULL); if (error != 0 || m == NULL) goto freecopy; /* consumed by filter */ ip6 = mtod(m, struct ip6_hdr *); /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ if (in6_localip(&ip6->ip6_dst)) m->m_flags |= M_FASTFWD_OURS; else goto again; /* Redo the routing table lookup. */ } /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto out; } /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { dst = (struct sockaddr_in6 *)&rin6.ro_dst; bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); goto again2; } pass: /* See if the size was changed by the packet filter. */ if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); if (mcopy) { u_long mtu; #ifdef IPSEC size_t ipsechdrsiz; #endif /* IPSEC */ mtu = IN6_LINKMTU(rt->rt_ifp); #ifdef IPSEC /* * When we do IPsec tunnel ingress, we need to play * with the link value (decrement IPsec header size * from mtu value). The code is much simpler than v4 * case, as we have the outgoing interface for * encapsulated packet as "rt->rt_ifp". */ ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, NULL); if (ipsechdrsiz < mtu) mtu -= ipsechdrsiz; /* * if mtu becomes less than minimum MTU, * tell minimum MTU (and I'll need to fragment it). */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU; #endif /* IPSEC */ icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); } goto bad; } - error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst); + error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL); if (error) { in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); IP6STAT_INC(ip6s_cantforward); } else { IP6STAT_INC(ip6s_forward); in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward); if (type) IP6STAT_INC(ip6s_redirectsent); else { if (mcopy) goto freecopy; } } if (mcopy == NULL) goto out; switch (error) { case 0: if (type == ND_REDIRECT) { icmp6_redirect_output(mcopy, rt); goto out; } goto freecopy; case EMSGSIZE: /* xxx MTU is constant in PPP? */ goto freecopy; case ENOBUFS: /* Tell source to slow down like source quench in IP? */ goto freecopy; case ENETUNREACH: /* shouldn't happen, checked above */ case EHOSTUNREACH: case ENETDOWN: case EHOSTDOWN: default: type = ICMP6_DST_UNREACH; code = ICMP6_DST_UNREACH_ADDR; break; } icmp6_error(mcopy, type, code, 0); goto out; freecopy: m_freem(mcopy); goto out; bad: m_freem(m); out: if (rt != NULL) RTFREE(rt); } Index: projects/powernv/netinet6/ip6_output.c =================================================================== --- projects/powernv/netinet6/ip6_output.c (revision 290990) +++ projects/powernv/netinet6/ip6_output.c (revision 290991) @@ -1,2997 +1,2997 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #include #include #endif /* IPSEC */ #ifdef SCTP #include #include #endif #include #include #ifdef FLOWTABLE #include #endif extern int in6_mcast_loop; struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, struct ucred *, int); static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *); static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, struct ucred *, int, int, int); static int ip6_copyexthdr(struct mbuf **, caddr_t, int); static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static int ip6_getpmtu(struct route_in6 *, struct route_in6 *, struct ifnet *, struct in6_addr *, u_long *, int *, u_int); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); /* * Make an extension header from option data. hp is the source, and * mp is the destination. */ #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } while (/*CONSTCOND*/ 0) /* * Form a chain of extension headers. * m is the extension header mbuf * mp is the previous mbuf in the chain * p is the next header * i is the type of option. */ #define MAKE_CHAIN(m, mp, p, i)\ do {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } while (/*CONSTCOND*/ 0) void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) { u_short csum; csum = in_cksum_skip(m, offset + plen, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ if (offset + sizeof(u_short) > m->m_len) { printf("%s: delayed m_pullup, m->len: %d plen %u off %u " "csum_flags=%b\n", __func__, m->m_len, plen, offset, (int)m->m_pkthdr.csum_flags, CSUM_BITS); /* * XXX this should not happen, but if it does, the correct * behavior may be to insert the checksum in the appropriate * next mbuf in the chain. */ return; } *(u_short *)(m->m_data + offset) = csum; } int ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, int mtu, uint32_t id) { struct mbuf *m, **mnext, *m_frgpart; struct ip6_hdr *ip6, *mhip6; struct ip6_frag *ip6f; int off; int error; int tlen = m0->m_pkthdr.len; m = m0; ip6 = mtod(m, struct ip6_hdr *); mnext = &m->m_nextpkt; for (off = hlen; off < tlen; off += mtu) { m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { IP6STAT_INC(ip6s_odropped); return (error); } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + mtu >= tlen) mtu = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(mtu + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copy(m0, off, mtu)) == 0) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m_cat(m, m_frgpart); m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f); m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; m->m_pkthdr.rcvif = NULL; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; IP6STAT_INC(ip6s_ofragments); in6_ifstat_inc(ifp, ifs6_out_fragcreat); } return (0); } /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route_in6 ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_mtu. * * ifpp - XXX: just for statistics */ /* * XXX TODO: no flowid is assigned for outbound flows? */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct inpcb *inp) { struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; int hlen, tlen, len; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; struct in6_addr odst; int error = 0; struct in6_ifaddr *ia = NULL; u_long mtu; int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr finaldst, src0, dst0; u_int32_t zone; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int sw_csum, tso; int needfiblookup; uint32_t fibnum; struct m_tag *fwd_tag = NULL; uint32_t id; ip6 = mtod(m, struct ip6_hdr *); if (ip6 == NULL) { printf ("ip6 is NULL"); goto bad; } if (inp != NULL) { M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { /* unconditionally set flowid */ m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } finaldst = ip6->ip6_dst; bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ if (opt->ip6po_rthdr) { /* * Destination options header(1st part) * This only makes sense with a routing header. * See Section 9.2 of RFC 3542. * Disabling this part just for MIP6 convenience is * a bad idea. We need to think carefully about a * way to make the advanced API coexist with MIP6 * options, which might automatically be inserted in * the kernel. */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); } /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } #ifdef IPSEC /* * IPSec checking which handles several cases. * FAST IPSEC: We re-injected the packet. * XXX: need scope argument. */ switch(ip6_ipsec_output(&m, inp, &error)) { case 1: /* Bad packet */ goto freehdrs; case -1: /* IPSec done */ goto done; case 0: /* No IPSec */ default: break; } #endif /* IPSEC */ /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If there is at least one extension header, * separate IP6 header from the payload. */ if (optlen && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ u_char *nexthdrp = &ip6->ip6_nxt; mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. the goal here is to make mprev point the * mbuf prior to dest2. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); /* * If there is a routing header, discard the packet. */ if (exthdrs.ip6e_rthdr) { error = EINVAL; goto bad; } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_UNSPECSRC) == 0) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } IP6STAT_INC(ip6s_localout); /* * Route packet. */ if (ro == 0) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET6, m, (struct route *)ro); #endif fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); again: /* * if specified, try to fill in the traffic class field. * do not override if a non-zero value is already set. * we check the diffserv field and the ecn field separately. */ if (opt && opt->ip6po_tclass >= 0) { int mask = 0; if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) mask |= 0xfc; if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) mask |= 0x03; if (mask != 0) ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); } /* fill in or override the hop limit field, if necessary. */ if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (im6o != NULL) ip6->ip6_hlim = im6o->im6o_multicast_hlim; else ip6->ip6_hlim = V_ip6_defmcasthlim; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if (ro->ro_rt && fwd_tag == NULL) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; } else { if (fwd_tag == NULL) { bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; } error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp, &rt, fibnum); if (error != 0) { if (ifp != NULL) in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } } if (rt == NULL) { /* * If in6_selectroute() does not return a route entry, * dst may not have been updated. */ *dst = dst_sa; /* XXX */ } /* * then rt (for unicast) and ifp must be non-NULL valid values. */ if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); } if (rt != NULL) { ia = (struct in6_ifaddr *)(rt->rt_ifa); counter_u64_add(rt->rt_pksent, 1); } /* * The outgoing interface must be in the zone of source and * destination addresses. */ origifp = ifp; src0 = ip6->ip6_src; if (in6_setscope(&src0, origifp, &zone)) goto badscope; bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id) goto badscope; dst0 = ip6->ip6_dst; if (in6_setscope(&dst0, origifp, &zone)) goto badscope; /* re-initialize to be sure */ bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) { goto badscope; } /* We should use ia_ifp to support the case of * sending packets to an address of our own. */ if (ia != NULL && ia->ia_ifp) ifp = ia->ia_ifp; /* scope check is done. */ goto routefound; badscope: IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(origifp, ifs6_out_discard); if (error == 0) error = EHOSTUNREACH; /* XXX */ goto bad; routefound: if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (opt && opt->ip6po_nextroute.ro_rt) { /* * The nexthop is explicitly specified by the * application. We assume the next hop is an IPv6 * address. */ dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; } else if ((rt->rt_flags & RTF_GATEWAY)) dst = (struct sockaddr_in6 *)rt->rt_gateway; } if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ } else { m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if (!(ifp->if_flags & IFF_MULTICAST)) { IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } if ((im6o == NULL && in6_mcast_loop) || (im6o && im6o->im6o_multicast_loop)) { /* * Loop back multicast datagram if not expressly * forbidden to do so, even if we have not joined * the address; protocols will filter it later, * thus deferring a hash lookup and lock acquisition * at the expense of an m_copym(). */ ip6_mloopback(ifp, m); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IPV6_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip6_mloopback(), * above, will be forwarded by the ip6_input() routine, * if necessary. */ if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { /* * XXX: ip6_mforward expects that rcvif is NULL * when it is called from the originating path. * However, it may not always be the case. */ m->m_pkthdr.rcvif = NULL; if (ip6_mforward(ip6, ifp, m) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* Determine path MTU. */ if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, &alwaysfrag, fibnum)) != 0) goto bad; /* * The caller of this function may specify to use the minimum MTU * in some cases. * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU * setting. The logic is a bit complicated; by default, unicast * packets will follow path MTU while multicast packets will be sent at * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets * including unicast ones will be sent at the minimum MTU. Multicast * packets will always be sent at the minimum MTU unless * IP6PO_MINMTU_DISABLE is explicitly specified. * See RFC 3542 for more details. */ if (mtu > IPV6_MMTU) { if ((flags & IPV6_MINMTU)) mtu = IPV6_MMTU; else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) mtu = IPV6_MMTU; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && (opt == NULL || opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { mtu = IPV6_MMTU; } } /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy; /* XXX unused */ u_int32_t plen = 0; /* XXX: ip6_process will check the value */ #ifdef DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not contiguous"); #endif /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * contiguous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy, &plen) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; ip6 = mtod(m, struct ip6_hdr *); needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ if (in6_localip(&ip6->ip6_dst)) { m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } else needfiblookup = 1; /* Redo the routing table lookup. */ } /* See if fib was changed by packet filter. */ if (fibnum != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; fibnum = M_GETFIB(m); RO_RTFREE(ro); needfiblookup = 1; } if (needfiblookup) goto again; /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { dst = (struct sockaddr_in6 *)&ro->ro_dst; bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); goto again; } passout: /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. * * the logic here is rather complex: * 1: normal case (dontfrag == 0, alwaysfrag == 0) * 1-a: send as is if tlen <= path mtu * 1-b: fragment if tlen > path mtu * * 2: if user asks us not to fragment (dontfrag == 1) * 2-a: send as is if tlen <= interface mtu * 2-b: error if tlen > interface mtu * * 3: if we always need to attach fragment header (alwaysfrag == 1) * always fragment * * 4: if dontfrag == 1 && alwaysfrag == 1 * error, as we cannot handle this conflicting request */ sw_csum = m->m_pkthdr.csum_flags; if (!hdrsplit) { tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0; sw_csum &= ~ifp->if_hwassist; } else tso = 0; /* * If we added extension headers, we will not do TSO and calculate the * checksums ourselves for now. * XXX-BZ Need a framework to know when the NIC can handle it, even * with ext. hdrs. */ if (sw_csum & CSUM_DELAY_DATA_IPV6) { sw_csum &= ~CSUM_DELAY_DATA_IPV6; in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); } #ifdef SCTP if (sw_csum & CSUM_SCTP_IPV6) { sw_csum &= ~CSUM_SCTP_IPV6; sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); } #endif m->m_pkthdr.csum_flags &= ifp->if_hwassist; tlen = m->m_pkthdr.len; if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso) dontfrag = 1; else dontfrag = 0; if (dontfrag && alwaysfrag) { /* case 4 */ /* conflicting request - can't transmit */ error = EMSGSIZE; goto bad; } if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* case 2-b */ /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the * outgoing interface. * Notify the error by sending IPV6_PATHMTU ancillary data if * application wanted to know the MTU value. Also return an * error code (this is not described in the API spec). */ if (inp != NULL) ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu); error = EMSGSIZE; goto bad; } /* * transmit packet without fragmentation */ if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ counter_u64_add(ia6->ia_ifa.ifa_opackets, 1); counter_u64_add(ia6->ia_ifa.ifa_obytes, m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } - error = nd6_output_ifp(ifp, origifp, m, dst); + error = nd6_output_ifp(ifp, origifp, m, dst, NULL); goto done; } /* * try to fragment the packet. case 1-b and 3 */ if (mtu < IPV6_MMTU) { /* path MTU cannot be less than IPV6_MMTU */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } /* * If the interface will not calculate checksums on * fragmented packets, then do it here. * XXX-BZ handle the hw offloading case. Need flags. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { in6_delayed_cksum(m, plen, hlen); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { sctp_delayed_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } #endif /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ m0 = m; id = htonl(ip6_randomid()); if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id))) goto sendorfree; in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } - error = nd6_output_ifp(ifp, origifp, m, dst); + error = nd6_output_ifp(ifp, origifp, m, dst, NULL); } else m_freem(m); } if (error == 0) IP6STAT_INC(ip6s_fragmented); done: if (ro == &ip6route) RO_RTFREE(ro); if (ro_pmtu == &ip6route) RO_RTFREE(ro_pmtu); return (error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* FALLTHROUGH */ bad: if (m) m_freem(m); goto done; } static int ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) { struct mbuf *m; if (hlen > MCLBYTES) return (ENOBUFS); /* XXX */ if (hlen > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return (0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) { struct mbuf *mopt; u_char *optbuf; u_int32_t v; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == 0) { mopt = m_get(M_NOWAIT, MT_DATA); if (mopt == NULL) return (ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { /* * XXX assumption: * - exthdrs->ip6e_hbh is not referenced from places * other than exthdrs. * - exthdrs->ip6e_hbh is not an mbuf chain. */ int oldoptlen = mopt->m_len; struct mbuf *n; /* * XXX: give up if the whole (new) hbh header does * not fit even in an mbuf cluster. */ if (oldoptlen + JUMBOOPTLEN > MCLBYTES) return (ENOBUFS); /* * As a consequence, we must always prepare a cluster * at this point. */ n = m_getcl(M_NOWAIT, MT_DATA, 0); if (n == NULL) return (ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen); optbuf = mtod(n, caddr_t) + oldoptlen; m_freem(mopt); mopt = exthdrs->ip6e_hbh = n; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); bcopy(&v, &optbuf[4], sizeof(u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return (0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, struct ip6_frag **frghdrp) { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_NOWAIT); if (n == 0) return (ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if (M_WRITABLE(mlast) && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; mfrg = m_get(M_NOWAIT, MT_DATA); if (mfrg == NULL) return (ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return (0); } static int ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, int *alwaysfragp, u_int fibnum) { u_int32_t mtu = 0; int alwaysfrag = 0; int error = 0; if (ro_pmtu != ro) { /* The first hop and the final destination may differ. */ struct sockaddr_in6 *sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; if (ro_pmtu->ro_rt && ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { RTFREE(ro_pmtu->ro_rt); ro_pmtu->ro_rt = (struct rtentry *)NULL; } if (ro_pmtu->ro_rt == NULL) { bzero(sa6_dst, sizeof(*sa6_dst)); sa6_dst->sin6_family = AF_INET6; sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; in6_rtalloc(ro_pmtu, fibnum); } } if (ro_pmtu->ro_rt) { u_int32_t ifmtu; struct in_conninfo inc; bzero(&inc, sizeof(inc)); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; if (ifp == NULL) ifp = ro_pmtu->ro_rt->rt_ifp; ifmtu = IN6_LINKMTU(ifp); mtu = tcp_hc_getmtu(&inc); if (mtu) mtu = min(mtu, ro_pmtu->ro_rt->rt_mtu); else mtu = ro_pmtu->ro_rt->rt_mtu; if (mtu == 0) mtu = ifmtu; else if (mtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: * if we record ICMPv6 too big message with * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU * or smaller, with framgent header attached. * (fragment header is needed regardless from the * packet size, for translators to identify packets) */ alwaysfrag = 1; mtu = IPV6_MMTU; } } else if (ifp) { mtu = IN6_LINKMTU(ifp); } else error = EHOSTUNREACH; /* XXX */ *mtup = mtu; if (alwaysfragp) *alwaysfragp = alwaysfrag; return (error); } /* * IP6 socket option processing. */ int ip6_ctloutput(struct socket *so, struct sockopt *sopt) { int optdatalen, uproto; void *optdata; struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct thread *td; #ifdef RSS uint32_t rss_bucket; int retval; #endif level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; td = sopt->sopt_td; error = 0; optval = 0; uproto = (int)so->so_proto->pr_protocol; if (level != IPPROTO_IPV6) { error = EINVAL; if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEADDR) != 0) in6p->inp_flags2 |= INP_REUSEADDR; else in6p->inp_flags2 &= ~INP_REUSEADDR; INP_WUNLOCK(in6p); error = 0; break; case SO_REUSEPORT: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEPORT) != 0) in6p->inp_flags2 |= INP_REUSEPORT; else in6p->inp_flags2 &= ~INP_REUSEPORT; INP_WUNLOCK(in6p); error = 0; break; case SO_SETFIB: INP_WLOCK(in6p); in6p->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(in6p); error = 0; break; default: break; } } } else { /* level == IPPROTO_IPV6 */ switch (op) { case SOPT_SET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif { struct mbuf *m; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; } /* * Use of some Hop-by-Hop options or some * Destination options, might require special * privilege. That is, normal applications * (without special privilege) might be forbidden * from setting certain options in outgoing packets, * and might never see certain options in received * packets. [RFC 2292 Section 6] * KAME specific note: * KAME prevents non-privileged users from sending or * receiving ANY hbh/dst options in order to avoid * overhead of parsing options in the kernel. */ case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) break; } /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_RECVTCLASS: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RECVRSSBUCKETID: #endif case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_BINDMULTI: #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: #endif if (optname == IPV6_BINDANY && td != NULL) { error = priv_check(td, PRIV_NETINET_BINDANY); if (error) break; } if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->inp_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ INP_WLOCK(in6p); \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTSET2292(bit) \ do { \ INP_WLOCK(in6p); \ in6p->inp_flags |= IN6P_RFC2292; \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) #define OPTSET2(bit, val) do { \ INP_WLOCK(in6p); \ if (val) \ in6p->inp_flags2 |= bit; \ else \ in6p->inp_flags2 &= ~bit; \ INP_WUNLOCK(in6p); \ } while (0) #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) case IPV6_RECVPKTINFO: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: { struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_RECVHOPLIMIT: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPLIMIT); break; case IPV6_RECVHOPOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDRDSTOPTS); break; case IPV6_RECVRTHDR: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: /* * We ignore this option for TCP * sockets. * (RFC3542 leaves this case * unspecified.) */ if (uproto != IPPROTO_TCP) OPTSET(IN6P_MTU); break; case IPV6_RECVFLOWID: OPTSET2(INP_RECVFLOWID, optval); break; #ifdef RSS case IPV6_RECVRSSBUCKETID: OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ if (in6p->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) in6p->inp_vflag &= ~INP_IPV4; else in6p->inp_vflag |= INP_IPV4; break; case IPV6_RECVTCLASS: /* cannot mix with RFC2292 XXX */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: OPTSET(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: OPTSET(INP_BINDANY); break; case IPV6_BINDMULTI: OPTSET2(INP_BINDMULTI, optval); break; #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { in6p->inp_rss_listen_bucket = optval; OPTSET2(INP_RSS_BUCKET_SET, 1); } else { error = EINVAL; } break; #endif } break; case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: if (optlen != sizeof(optval)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; { struct ip6_pktopts **optp; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: /* RFC 2292 */ if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_2292PKTINFO: OPTSET2292(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: OPTSET2292(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: /* * Check super-user privilege. * See comments for IPV6_RECVHOPOPTS. */ if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_HOPOPTS); break; case IPV6_2292DSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_2292RTHDR: OPTSET2292(IN6P_RTHDR); break; } break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: { /* new advanced API (RFC3542) */ u_char *optbuf; u_char optbuf_storage[MCLBYTES]; int optlen; struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } /* * We only ensure valsize is not too large * here. Further validation will be done * later. */ error = sooptcopyin(sopt, optbuf_storage, sizeof(optbuf_storage), 0); if (error) break; optlen = sopt->sopt_valsize; optbuf = optbuf_storage; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: case IPV6_MSFILTER: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: error = ip6_setmoptions(in6p, sopt); break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; INP_WLOCK(in6p); switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags &= ~(INP_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags |= INP_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->inp_flags &= ~(INP_HIGHPORT); in6p->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } INP_WUNLOCK(in6p); break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req; struct mbuf *m; if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; req = mtod(m, caddr_t); error = ipsec_set_policy(in6p, optname, req, m->m_len, (sopt->sopt_td != NULL) ? sopt->sopt_td->td_ucred : NULL); m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif /* * RFC3542 (effectively) deprecated the * semantics of the 2292-style pktoptions. * Since it was not reliable in nature (i.e., * applications had to expect the lack of some * information after all), it would make sense * to simplify this part by always returning * empty data. */ sopt->sopt_valsize = 0; break; case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: case IPV6_UNICAST_HOPS: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_FLOWID: case IPV6_FLOWTYPE: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RSSBUCKETID: case IPV6_RECVRSSBUCKETID: #endif switch (optname) { case IPV6_RECVHOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: optval = OPTBIT(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: optval = OPTBIT(IN6P_RTHDRDSTOPTS); break; case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; case IPV6_RECVPKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_RECVHOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_RECVRTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: optval = OPTBIT(IN6P_MTU); break; case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->inp_flags; if (flags & INP_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & INP_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } case IPV6_RECVTCLASS: optval = OPTBIT(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: optval = OPTBIT(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: optval = OPTBIT(INP_BINDANY); break; case IPV6_FLOWID: optval = in6p->inp_flowid; break; case IPV6_FLOWTYPE: optval = in6p->inp_flowtype; break; case IPV6_RECVFLOWID: optval = OPTBIT2(INP_RECVFLOWID); break; #ifdef RSS case IPV6_RSSBUCKETID: retval = rss_hash2bucket(in6p->inp_flowid, in6p->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; else error = EINVAL; break; case IPV6_RECVRSSBUCKETID: optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif case IPV6_BINDMULTI: optval = OPTBIT2(INP_BINDMULTI); break; } if (error) break; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PATHMTU: { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; struct route_in6 sro; bzero(&sro, sizeof(sro)); if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); /* * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. */ error = ip6_getpmtu(&sro, NULL, NULL, &in6p->in6p_faddr, &pmtu, NULL, so->so_fibnum); if (sro.ro_rt) RTFREE(sro.ro_rt); if (error) break; if (pmtu > IPV6_MAXPACKET) pmtu = IPV6_MAXPACKET; bzero(&mtuinfo, sizeof(mtuinfo)); mtuinfo.ip6m_mtu = (u_int32_t)pmtu; optdata = (void *)&mtuinfo; optdatalen = sizeof(mtuinfo); error = sooptcopyout(sopt, optdata, optdatalen); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292RTHDR: case IPV6_2292DSTOPTS: switch (optname) { case IPV6_2292PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_2292RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_2292DSTOPTS: optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: error = ip6_getpcbopt(in6p->in6p_outputopts, optname, sopt); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_MSFILTER: error = ip6_getmoptions(in6p, sopt); break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m = NULL; struct mbuf **mp = &m; size_t ovalsize = sopt->sopt_valsize; caddr_t oval = (caddr_t)sopt->sopt_val; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; sopt->sopt_valsize = ovalsize; sopt->sopt_val = oval; if (m) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec_get_policy(in6p, req, len, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0 && m) m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; } } return (error); } int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0, optval, optlen; const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); struct inpcb *in6p = sotoinpcb(so); int level, op, optname; level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; if (level != IPPROTO_IPV6) { return (EINVAL); } switch (optname) { case IPV6_CHECKSUM: /* * For ICMPv6 sockets, no modification allowed for checksum * offset, permit "no change" values to help existing apps. * * RFC3542 says: "An attempt to set IPV6_CHECKSUM * for an ICMPv6 socket will fail." * The current behavior does not meet RFC3542. */ switch (op) { case SOPT_SET: if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; if ((optval % 2) != 0) { /* the API assumes even offset values */ error = EINVAL; } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (optval != icmp6off) error = EINVAL; } else in6p->in6p_cksum = optval; break; case SOPT_GET: if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) optval = icmp6off; else optval = in6p->in6p_cksum; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EINVAL; break; } break; default: error = ENOPROTOOPT; break; } return (error); } /* * Set up IP6 options in pcb for insertion in output packets or * specifying behavior of outgoing packets. */ static int ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so, struct sockopt *sopt) { struct ip6_pktopts *opt = *pktopt; int error = 0; struct thread *td = sopt->sopt_td; /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, -1); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = NULL; if (!m || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ free(opt, M_IP6OPT); return (0); } /* set options specified by user. */ if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ? td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) { ip6_clearpktopts(opt, -1); /* XXX: discard all options */ free(opt, M_IP6OPT); return (error); } *pktopt = opt; return (0); } /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void ip6_initpktopts(struct ip6_pktopts *opt) { bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ opt->ip6po_tclass = -1; /* -1 means default traffic class */ opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM; } static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, struct ucred *cred, int uproto) { struct ip6_pktopts *opt; if (*pktopt == NULL) { *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, M_WAITOK); ip6_initpktopts(*pktopt); } opt = *pktopt; return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto)); } static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) { void *optdata = NULL; int optdatalen = 0; struct ip6_ext *ip6e; int error = 0; struct in6_pktinfo null_pktinfo; int deftclass = 0, on; int defminmtu = IP6PO_MINMTU_MCASTONLY; int defpreftemp = IP6PO_TEMPADDR_SYSTEM; switch (optname) { case IPV6_PKTINFO: optdata = (void *)&null_pktinfo; if (pktopt && pktopt->ip6po_pktinfo) { bcopy(pktopt->ip6po_pktinfo, &null_pktinfo, sizeof(null_pktinfo)); in6_clearscope(&null_pktinfo.ipi6_addr); } else { /* XXX: we don't have to do this every time... */ bzero(&null_pktinfo, sizeof(null_pktinfo)); } optdatalen = sizeof(struct in6_pktinfo); break; case IPV6_TCLASS: if (pktopt && pktopt->ip6po_tclass >= 0) optdata = (void *)&pktopt->ip6po_tclass; else optdata = (void *)&deftclass; optdatalen = sizeof(int); break; case IPV6_HOPOPTS: if (pktopt && pktopt->ip6po_hbh) { optdata = (void *)pktopt->ip6po_hbh; ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDR: if (pktopt && pktopt->ip6po_rthdr) { optdata = (void *)pktopt->ip6po_rthdr; ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDRDSTOPTS: if (pktopt && pktopt->ip6po_dest1) { optdata = (void *)pktopt->ip6po_dest1; ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_DSTOPTS: if (pktopt && pktopt->ip6po_dest2) { optdata = (void *)pktopt->ip6po_dest2; ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_NEXTHOP: if (pktopt && pktopt->ip6po_nexthop) { optdata = (void *)pktopt->ip6po_nexthop; optdatalen = pktopt->ip6po_nexthop->sa_len; } break; case IPV6_USE_MIN_MTU: if (pktopt) optdata = (void *)&pktopt->ip6po_minmtu; else optdata = (void *)&defminmtu; optdatalen = sizeof(int); break; case IPV6_DONTFRAG: if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) on = 1; else on = 0; optdata = (void *)&on; optdatalen = sizeof(on); break; case IPV6_PREFER_TEMPADDR: if (pktopt) optdata = (void *)&pktopt->ip6po_prefer_tempaddr; else optdata = (void *)&defpreftemp; optdatalen = sizeof(int); break; default: /* should not happen */ #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); #endif return (ENOPROTOOPT); } error = sooptcopyout(sopt, optdata, optdatalen); return (error); } void ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) { if (pktopt == NULL) return; if (optname == -1 || optname == IPV6_PKTINFO) { if (pktopt->ip6po_pktinfo) free(pktopt->ip6po_pktinfo, M_IP6OPT); pktopt->ip6po_pktinfo = NULL; } if (optname == -1 || optname == IPV6_HOPLIMIT) pktopt->ip6po_hlim = -1; if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { if (pktopt->ip6po_nextroute.ro_rt) { RTFREE(pktopt->ip6po_nextroute.ro_rt); pktopt->ip6po_nextroute.ro_rt = NULL; } if (pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; } if (optname == -1 || optname == IPV6_HOPOPTS) { if (pktopt->ip6po_hbh) free(pktopt->ip6po_hbh, M_IP6OPT); pktopt->ip6po_hbh = NULL; } if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { if (pktopt->ip6po_dest1) free(pktopt->ip6po_dest1, M_IP6OPT); pktopt->ip6po_dest1 = NULL; } if (optname == -1 || optname == IPV6_RTHDR) { if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; if (pktopt->ip6po_route.ro_rt) { RTFREE(pktopt->ip6po_route.ro_rt); pktopt->ip6po_route.ro_rt = NULL; } } if (optname == -1 || optname == IPV6_DSTOPTS) { if (pktopt->ip6po_dest2) free(pktopt->ip6po_dest2, M_IP6OPT); pktopt->ip6po_dest2 = NULL; } } #define PKTOPT_EXTHDRCPY(type) \ do {\ if (src->type) {\ int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ if (dst->type == NULL && canwait == M_NOWAIT)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ } while (/*CONSTCOND*/ 0) static int copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) { if (dst == NULL || src == NULL) { printf("ip6_clearpktopts: invalid argument\n"); return (EINVAL); } dst->ip6po_hlim = src->ip6po_hlim; dst->ip6po_tclass = src->ip6po_tclass; dst->ip6po_flags = src->ip6po_flags; dst->ip6po_minmtu = src->ip6po_minmtu; dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); PKTOPT_EXTHDRCPY(ip6po_dest2); PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return (0); bad: ip6_clearpktopts(dst, -1); return (ENOBUFS); } #undef PKTOPT_EXTHDRCPY struct ip6_pktopts * ip6_copypktopts(struct ip6_pktopts *src, int canwait) { int error; struct ip6_pktopts *dst; dst = malloc(sizeof(*dst), M_IP6OPT, canwait); if (dst == NULL) return (NULL); ip6_initpktopts(dst); if ((error = copypktopts(dst, src, canwait)) != 0) { free(dst, M_IP6OPT); return (NULL); } return (dst); } void ip6_freepcbopts(struct ip6_pktopts *pktopt) { if (pktopt == NULL) return; ip6_clearpktopts(pktopt, -1); free(pktopt, M_IP6OPT); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto) { struct cmsghdr *cm = 0; if (control == NULL || opt == NULL) return (EINVAL); ip6_initpktopts(opt); if (stickyopt) { int error; /* * If stickyopt is provided, make a local copy of the options * for this particular packet, then override them by ancillary * objects. * XXX: copypktopts() does not copy the cached route to a next * hop (if any). This is not very good in terms of efficiency, * but we can allow this since this option should be rarely * used. */ if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) return (error); } /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return (EINVAL); for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { int error; if (control->m_len < CMSG_LEN(0)) return (EINVAL); cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return (EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto); if (error) return (error); } return (0); } /* * Set a particular packet option, as a sticky option or an ancillary data * item. "len" can be 0 only when it's a sticky option. * We have 4 cases of combination of "sticky" and "cmsg": * "sticky=0, cmsg=0": impossible * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data * "sticky=1, cmsg=0": RFC3542 socket option * "sticky=1, cmsg=1": RFC2292 socket option */ static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, struct ucred *cred, int sticky, int cmsg, int uproto) { int minmtupolicy, preftemp; int error; if (!sticky && !cmsg) { #ifdef DIAGNOSTIC printf("ip6_setpktopt: impossible case\n"); #endif return (EINVAL); } /* * IPV6_2292xxx is for backward compatibility to RFC2292, and should * not be specified in the context of RFC3542. Conversely, * RFC3542 types should not be specified in the context of RFC2292. */ if (!cmsg) { switch (optname) { case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292NEXTHOP: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: case IPV6_2292PKTOPTIONS: return (ENOPROTOOPT); } } if (sticky && cmsg) { switch (optname) { case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_NEXTHOP: case IPV6_HOPOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_USE_MIN_MTU: case IPV6_DONTFRAG: case IPV6_TCLASS: case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */ return (ENOPROTOOPT); } } switch (optname) { case IPV6_2292PKTINFO: case IPV6_PKTINFO: { struct ifnet *ifp = NULL; struct in6_pktinfo *pktinfo; if (len != sizeof(struct in6_pktinfo)) return (EINVAL); pktinfo = (struct in6_pktinfo *)buf; /* * An application can clear any sticky IPV6_PKTINFO option by * doing a "regular" setsockopt with ipi6_addr being * in6addr_any and ipi6_ifindex being zero. * [RFC 3542, Section 6] */ if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && pktinfo->ipi6_ifindex == 0 && IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { ip6_clearpktopts(opt, optname); break; } if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { return (EINVAL); } if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr)) return (EINVAL); /* validate the interface index if specified. */ if (pktinfo->ipi6_ifindex > V_if_index) return (ENXIO); if (pktinfo->ipi6_ifindex) { ifp = ifnet_byindex(pktinfo->ipi6_ifindex); if (ifp == NULL) return (ENXIO); } if (ifp != NULL && ( ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) return (ENETDOWN); if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { struct in6_ifaddr *ia; in6_setscope(&pktinfo->ipi6_addr, ifp, NULL); ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr); if (ia == NULL) return (EADDRNOTAVAIL); ifa_free(&ia->ia_ifa); } /* * We store the address anyway, and let in6_selectsrc() * validate the specified address. This is because ipi6_addr * may not have enough information about its scope zone, and * we may need additional information (such as outgoing * interface or the scope zone of a destination address) to * disambiguate the scope. * XXX: the delay of the validation may confuse the * application when it is used as a sticky option. */ if (opt->ip6po_pktinfo == NULL) { opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), M_IP6OPT, M_NOWAIT); if (opt->ip6po_pktinfo == NULL) return (ENOBUFS); } bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); break; } case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: { int *hlimp; /* * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT * to simplify the ordering among hoplimit options. */ if (optname == IPV6_HOPLIMIT && sticky) return (ENOPROTOOPT); if (len != sizeof(int)) return (EINVAL); hlimp = (int *)buf; if (*hlimp < -1 || *hlimp > 255) return (EINVAL); opt->ip6po_hlim = *hlimp; break; } case IPV6_TCLASS: { int tclass; if (len != sizeof(int)) return (EINVAL); tclass = *(int *)buf; if (tclass < -1 || tclass > 255) return (EINVAL); opt->ip6po_tclass = tclass; break; } case IPV6_2292NEXTHOP: case IPV6_NEXTHOP: if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { /* just remove the option */ ip6_clearpktopts(opt, IPV6_NEXTHOP); break; } /* check if cmsg_len is large enough for sa_len */ if (len < sizeof(struct sockaddr) || len < *buf) return (EINVAL); switch (((struct sockaddr *)buf)->sa_family) { case AF_INET6: { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; int error; if (sa6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { return (EINVAL); } if ((error = sa6_embedscope(sa6, V_ip6_use_defzone)) != 0) { return (error); } break; } case AF_LINK: /* should eventually be supported */ default: return (EAFNOSUPPORT); } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_NEXTHOP); opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); if (opt->ip6po_nexthop == NULL) return (ENOBUFS); bcopy(buf, opt->ip6po_nexthop, *buf); break; case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; /* * XXX: We don't allow a non-privileged user to set ANY HbH * options, since per-option restriction has too much * overhead. */ if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, IPV6_HOPOPTS); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_hbh)) return (EINVAL); hbh = (struct ip6_hbh *)buf; hbhlen = (hbh->ip6h_len + 1) << 3; if (len != hbhlen) return (EINVAL); /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_HOPOPTS); opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_hbh == NULL) return (ENOBUFS); bcopy(hbh, opt->ip6po_hbh, hbhlen); break; } case IPV6_2292DSTOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: { struct ip6_dest *dest, **newdest = NULL; int destlen; if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */ error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, optname); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_dest)) return (EINVAL); dest = (struct ip6_dest *)buf; destlen = (dest->ip6d_len + 1) << 3; if (len != destlen) return (EINVAL); /* * Determine the position that the destination options header * should be inserted; before or after the routing header. */ switch (optname) { case IPV6_2292DSTOPTS: /* * The old advacned API is ambiguous on this point. * Our approach is to determine the position based * according to the existence of a routing header. * Note, however, that this depends on the order of the * extension headers in the ancillary data; the 1st * part of the destination options header must appear * before the routing header in the ancillary data, * too. * RFC3542 solved the ambiguity by introducing * separate ancillary data or option types. */ if (opt->ip6po_rthdr == NULL) newdest = &opt->ip6po_dest1; else newdest = &opt->ip6po_dest2; break; case IPV6_RTHDRDSTOPTS: newdest = &opt->ip6po_dest1; break; case IPV6_DSTOPTS: newdest = &opt->ip6po_dest2; break; } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, optname); *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); if (*newdest == NULL) return (ENOBUFS); bcopy(dest, *newdest, destlen); break; } case IPV6_2292RTHDR: case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; if (len == 0) { ip6_clearpktopts(opt, IPV6_RTHDR); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_rthdr)) return (EINVAL); rth = (struct ip6_rthdr *)buf; rthlen = (rth->ip6r_len + 1) << 3; if (len != rthlen) return (EINVAL); switch (rth->ip6r_type) { case IPV6_RTHDR_TYPE_0: if (rth->ip6r_len == 0) /* must contain one addr */ return (EINVAL); if (rth->ip6r_len % 2) /* length must be even */ return (EINVAL); if (rth->ip6r_len / 2 != rth->ip6r_segleft) return (EINVAL); break; default: return (EINVAL); /* not supported */ } /* turn off the previous option */ ip6_clearpktopts(opt, IPV6_RTHDR); opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_rthdr == NULL) return (ENOBUFS); bcopy(rth, opt->ip6po_rthdr, rthlen); break; } case IPV6_USE_MIN_MTU: if (len != sizeof(int)) return (EINVAL); minmtupolicy = *(int *)buf; if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && minmtupolicy != IP6PO_MINMTU_DISABLE && minmtupolicy != IP6PO_MINMTU_ALL) { return (EINVAL); } opt->ip6po_minmtu = minmtupolicy; break; case IPV6_DONTFRAG: if (len != sizeof(int)) return (EINVAL); if (uproto == IPPROTO_TCP || *(int *)buf == 0) { /* * we ignore this option for TCP sockets. * (RFC3542 leaves this case unspecified.) */ opt->ip6po_flags &= ~IP6PO_DONTFRAG; } else opt->ip6po_flags |= IP6PO_DONTFRAG; break; case IPV6_PREFER_TEMPADDR: if (len != sizeof(int)) return (EINVAL); preftemp = *(int *)buf; if (preftemp != IP6PO_TEMPADDR_SYSTEM && preftemp != IP6PO_TEMPADDR_NOTPREFER && preftemp != IP6PO_TEMPADDR_PREFER) { return (EINVAL); } opt->ip6po_prefer_tempaddr = preftemp; break; default: return (ENOPROTOOPT); } /* end of switch */ return (0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(struct ifnet *ifp, const struct mbuf *m) { struct mbuf *copym; struct ip6_hdr *ip6; copym = m_copy(m, 0, M_COPYALL); if (copym == NULL) return; /* * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ if (!M_WRITABLE(copym) || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } ip6 = mtod(copym, struct ip6_hdr *); /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } if_simloop(ifp, copym, AF_INET6, 0); } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { m_freem(m); return ENOBUFS; } m_move_pkthdr(mh, m); M_ALIGN(mh, sizeof(*ip6)); m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(struct inpcb *in6p) { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); if (in6p->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen } Index: projects/powernv/netinet6/nd6.c =================================================================== --- projects/powernv/netinet6/nd6.c (revision 290990) +++ projects/powernv/netinet6/nd6.c (revision 290991) @@ -1,2431 +1,2431 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ #define SIN6(s) ((const struct sockaddr_in6 *)(s)) /* timer values */ VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ VNET_DEFINE(int, nd6_delay) = 5; /* delay first probe time 5 second */ VNET_DEFINE(int, nd6_umaxtries) = 3; /* maximum unicast query */ VNET_DEFINE(int, nd6_mmaxtries) = 3; /* maximum multicast query */ VNET_DEFINE(int, nd6_useloopback) = 1; /* use loopback interface for * local traffic */ VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage * collection timer */ /* preventing too many loops in ND option parsing */ static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper * layer hints */ static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved * ND entries */ #define V_nd6_maxndopt VNET(nd6_maxndopt) #define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) #ifdef ND6_DEBUG VNET_DEFINE(int, nd6_debug) = 1; #else VNET_DEFINE(int, nd6_debug) = 0; #endif static eventhandler_tag lle_event_eh; /* for debugging? */ #if 0 static int nd6_inuse, nd6_allocated; #endif VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int); static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); static void nd6_free(struct llentry *, int); static void nd6_free_redirect(const struct llentry *); static void nd6_llinfo_timer(void *); static void nd6_llinfo_settimer_locked(struct llentry *, long); static void clear_llinfo_pqueue(struct llentry *); static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int nd6_resolve_slow(struct ifnet *, struct mbuf *, const struct sockaddr_in6 *, u_char *, uint32_t *); static int nd6_need_cache(struct ifnet *); static VNET_DEFINE(struct callout, nd6_slowtimo_ch); #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) VNET_DEFINE(struct callout, nd6_timer_ch); static void nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) { struct rt_addrinfo rtinfo; struct sockaddr_in6 dst; struct sockaddr_dl gw; struct ifnet *ifp; int type; LLE_WLOCK_ASSERT(lle); if (lltable_get_af(lle->lle_tbl) != AF_INET6) return; switch (evt) { case LLENTRY_RESOLVED: type = RTM_ADD; KASSERT(lle->la_flags & LLE_VALID, ("%s: %p resolved but not valid?", __func__, lle)); break; case LLENTRY_EXPIRED: type = RTM_DELETE; break; default: return; } ifp = lltable_get_ifp(lle->lle_tbl); bzero(&dst, sizeof(dst)); bzero(&gw, sizeof(gw)); bzero(&rtinfo, sizeof(rtinfo)); lltable_fill_sa_entry(lle, (struct sockaddr *)&dst); dst.sin6_scope_id = in6_getscopezone(ifp, in6_addrscope(&dst.sin6_addr)); gw.sdl_len = sizeof(struct sockaddr_dl); gw.sdl_family = AF_LINK; gw.sdl_alen = ifp->if_addrlen; gw.sdl_index = ifp->if_index; gw.sdl_type = ifp->if_type; if (evt == LLENTRY_RESOLVED) bcopy(&lle->ll_addr, gw.sdl_data, ifp->if_addrlen); rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | ( type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); } void nd6_init(void) { LIST_INIT(&V_nd_prefix); /* initialization of the default router list */ TAILQ_INIT(&V_nd_defrouter); /* start timer */ callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); nd6_dad_init(); if (IS_DEFAULT_VNET(curvnet)) lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event, NULL, EVENTHANDLER_PRI_ANY); } #ifdef VIMAGE void nd6_destroy() { callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); if (IS_DEFAULT_VNET(curvnet)) EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); } #endif struct nd_ifinfo * nd6_ifattach(struct ifnet *ifp) { struct nd_ifinfo *nd; nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO); nd->initialized = 1; nd->chlim = IPV6_DEFHLIM; nd->basereachable = REACHABLE_TIME; nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); nd->retrans = RETRANS_TIMER; nd->flags = ND6_IFF_PERFORMNUD; /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by * default regardless of the V_ip6_auto_linklocal configuration to * give a reasonable default behavior. */ if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) || (ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_AUTO_LINKLOCAL; /* * A loopback interface does not need to accept RTADV. * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by * default regardless of the V_ip6_accept_rtadv configuration to * prevent the interface from accepting RA messages arrived * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV. */ if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK) && (ifp->if_type != IFT_BRIDGE)) nd->flags |= ND6_IFF_ACCEPT_RTADV; if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_NO_RADR; /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */ nd6_setmtu0(ifp, nd); return nd; } void nd6_ifdetach(struct nd_ifinfo *nd) { free(nd, M_IP6NDP); } /* * Reset ND level link MTU. This function is called when the physical MTU * changes, which means we might have to adjust the ND level MTU. */ void nd6_setmtu(struct ifnet *ifp) { nd6_setmtu0(ifp, ND_IFINFO(ifp)); } /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */ void nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi) { u_int32_t omaxmtu; omaxmtu = ndi->maxmtu; switch (ifp->if_type) { case IFT_ARCNET: ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */ break; case IFT_FDDI: ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */ break; case IFT_ISO88025: ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu); break; default: ndi->maxmtu = ifp->if_mtu; break; } /* * Decreasing the interface MTU under IPV6 minimum MTU may cause * undesirable situation. We thus notify the operator of the change * explicitly. The check for omaxmtu is necessary to restrict the * log to the case of changing the MTU, not initializing it. */ if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { log(LOG_NOTICE, "nd6_setmtu0: " "new link MTU on %s (%lu) is too small for IPv6\n", if_name(ifp), (unsigned long)ndi->maxmtu); } if (ndi->maxmtu > V_in6_maxmtu) in6_setmaxmtu(); /* check all interfaces just in case */ } void nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) { bzero(ndopts, sizeof(*ndopts)); ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; ndopts->nd_opts_last = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); if (icmp6len == 0) { ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } } /* * Take one ND option. */ struct nd_opt_hdr * nd6_option(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int olen; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return NULL; if (ndopts->nd_opts_done) return NULL; nd_opt = ndopts->nd_opts_search; /* make sure nd_opt_len is inside the buffer */ if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { bzero(ndopts, sizeof(*ndopts)); return NULL; } olen = nd_opt->nd_opt_len << 3; if (olen == 0) { /* * Message validation requires that all included * options have a length that is greater than zero. */ bzero(ndopts, sizeof(*ndopts)); return NULL; } ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); if (ndopts->nd_opts_search > ndopts->nd_opts_last) { /* option overruns the end of buffer, invalid */ bzero(ndopts, sizeof(*ndopts)); return NULL; } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { /* reached the end of options chain */ ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } return nd_opt; } /* * Parse multiple ND options. * This function is much easier to use, for ND routines that do not need * multiple options of the same type. */ int nd6_options(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int i = 0; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return 0; while (1) { nd_opt = nd6_option(ndopts); if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { /* * Message validation requires that all included * options have a length that is greater than zero. */ ICMP6STAT_INC(icp6s_nd_badopt); bzero(ndopts, sizeof(*ndopts)); return -1; } if (nd_opt == NULL) goto skip1; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LINKADDR: case ND_OPT_TARGET_LINKADDR: case ND_OPT_MTU: case ND_OPT_REDIRECTED_HEADER: case ND_OPT_NONCE: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { nd6log((LOG_INFO, "duplicated ND6 option found (type=%d)\n", nd_opt->nd_opt_type)); /* XXX bark? */ } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFORMATION: if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } ndopts->nd_opts_pi_end = (struct nd_opt_prefix_info *)nd_opt; break; /* What about ND_OPT_ROUTE_INFO? RFC 4191 */ case ND_OPT_RDNSS: /* RFC 6106 */ case ND_OPT_DNSSL: /* RFC 6106 */ /* * Silently ignore options we know and do not care about * in the kernel. */ break; default: /* * Unknown options must be silently ignored, * to accomodate future extension to the protocol. */ nd6log((LOG_DEBUG, "nd6_options: unsupported option %d - " "option ignored\n", nd_opt->nd_opt_type)); } skip1: i++; if (i > V_nd6_maxndopt) { ICMP6STAT_INC(icp6s_nd_toomanyopt); nd6log((LOG_INFO, "too many loop in nd opt\n")); break; } if (ndopts->nd_opts_done) break; } return 0; } /* * ND6 timer routine to handle ND6 entries */ static void nd6_llinfo_settimer_locked(struct llentry *ln, long tick) { int canceled; LLE_WLOCK_ASSERT(ln); if (tick < 0) { ln->la_expire = 0; ln->ln_ntick = 0; canceled = callout_stop(&ln->lle_timer); } else { ln->la_expire = time_uptime + tick / hz; LLE_ADDREF(ln); if (tick > INT_MAX) { ln->ln_ntick = tick - INT_MAX; canceled = callout_reset(&ln->lle_timer, INT_MAX, nd6_llinfo_timer, ln); } else { ln->ln_ntick = 0; canceled = callout_reset(&ln->lle_timer, tick, nd6_llinfo_timer, ln); } } if (canceled > 0) LLE_REMREF(ln); } /* * Gets source address of the first packet in hold queue * and stores it in @src. * Returns pointer to @src (if hold queue is not empty) or NULL. * * Set noinline to be dtrace-friendly */ static __noinline struct in6_addr * nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) { struct ip6_hdr hdr; struct mbuf *m; if (ln->la_hold == NULL) return (NULL); /* * assume every packet in la_hold has the same IP header */ m = ln->la_hold; if (sizeof(hdr) > m->m_len) return (NULL); m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr); *src = hdr.ip6_src; return (src); } /* * Switch @lle state to new state optionally arming timers. * * Set noinline to be dtrace-friendly */ __noinline void nd6_llinfo_setstate(struct llentry *lle, int newstate) { struct ifnet *ifp; long delay; delay = 0; switch (newstate) { case ND6_LLINFO_INCOMPLETE: ifp = lle->lle_tbl->llt_ifp; delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000; break; case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(lle)) { ifp = lle->lle_tbl->llt_ifp; delay = (long)ND_IFINFO(ifp)->reachable * hz; } break; case ND6_LLINFO_STALE: delay = (long)V_nd6_gctimer * hz; break; case ND6_LLINFO_DELAY: lle->la_asked = 0; delay = (long)V_nd6_delay * hz; break; } if (delay > 0) nd6_llinfo_settimer_locked(lle, delay); lle->ln_state = newstate; } /* * Timer-dependent part of nd state machine. * * Set noinline to be dtrace-friendly */ static __noinline void nd6_llinfo_timer(void *arg) { struct llentry *ln; struct in6_addr *dst, *pdst, *psrc, src; struct ifnet *ifp; struct nd_ifinfo *ndi = NULL; int send_ns; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); ln = (struct llentry *)arg; LLE_WLOCK(ln); if (callout_pending(&ln->lle_timer)) { /* * Here we are a bit odd here in the treatment of * active/pending. If the pending bit is set, it got * rescheduled before I ran. The active * bit we ignore, since if it was stopped * in ll_tablefree() and was currently running * it would have return 0 so the code would * not have deleted it since the callout could * not be stopped so we want to go through * with the delete here now. If the callout * was restarted, the pending bit will be back on and * we just want to bail since the callout_reset would * return 1 and our reference would have been removed * by nd6_llinfo_settimer_locked above since canceled * would have been 1. */ LLE_WUNLOCK(ln); return; } ifp = ln->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); ndi = ND_IFINFO(ifp); send_ns = 0; dst = &ln->r_l3addr.addr6; pdst = dst; if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { ln->ln_ntick -= INT_MAX; nd6_llinfo_settimer_locked(ln, INT_MAX); } else { ln->ln_ntick = 0; nd6_llinfo_settimer_locked(ln, ln->ln_ntick); } goto done; } if (ln->la_flags & LLE_STATIC) { goto done; } if (ln->la_flags & LLE_DELETED) { nd6_free(ln, 0); ln = NULL; goto done; } switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: if (ln->la_asked < V_nd6_mmaxtries) { ln->la_asked++; send_ns = 1; /* Send NS to multicast address */ pdst = NULL; } else { struct mbuf *m = ln->la_hold; if (m) { struct mbuf *m0; /* * assuming every packet in la_hold has the * same IP header. Send error after unlock. */ m0 = m->m_nextpkt; m->m_nextpkt = NULL; ln->la_hold = m0; clear_llinfo_pqueue(ln); } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); nd6_free(ln, 0); ln = NULL; if (m != NULL) icmp6_error2(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, ifp); } break; case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(ln)) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); break; case ND6_LLINFO_STALE: /* Garbage Collection(RFC 2461 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); nd6_free(ln, 1); ln = NULL; } break; case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ ln->la_asked = 1; nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE); send_ns = 1; } else nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */ break; case ND6_LLINFO_PROBE: if (ln->la_asked < V_nd6_umaxtries) { ln->la_asked++; send_ns = 1; } else { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); nd6_free(ln, 0); ln = NULL; } break; default: panic("%s: paths in a dark night can be confusing: %d", __func__, ln->ln_state); } done: if (send_ns != 0) { nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); psrc = nd6_llinfo_get_holdsrc(ln, &src); LLE_FREE_LOCKED(ln); ln = NULL; nd6_ns_output(ifp, psrc, pdst, dst, NULL); } if (ln != NULL) LLE_FREE_LOCKED(ln); CURVNET_RESTORE(); } /* * ND6 timer routine to expire default route list and prefix list */ void nd6_timer(void *arg) { CURVNET_SET((struct vnet *) arg); struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, nd6_timer, curvnet); /* expire default router list */ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->expire && dr->expire < time_uptime) defrtrlist_del(dr); } /* * expire interface addresses. * in the past the loop was inside prefix expiry processing. * However, from a stricter speci-confrmance standpoint, we should * rather separate address lifetimes and prefix lifetimes. * * XXXRW: in6_ifaddrhead locking. */ addrloop: TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) { /* check address lifetime */ if (IFA6_IS_INVALID(ia6)) { int regen = 0; /* * If the expiring address is temporary, try * regenerating a new one. This would be useful when * we suspended a laptop PC, then turned it on after a * period that could invalidate all temporary * addresses. Although we may have to restart the * loop (see below), it must be after purging the * address. Otherwise, we'd see an infinite loop of * regeneration. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { if (regen_tmpaddr(ia6) == 0) regen = 1; } in6_purgeaddr(&ia6->ia_ifa); if (regen) goto addrloop; /* XXX: see below */ } else if (IFA6_IS_DEPRECATED(ia6)) { int oldflags = ia6->ia6_flags; ia6->ia6_flags |= IN6_IFF_DEPRECATED; /* * If a temporary address has just become deprecated, * regenerate a new one if possible. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (oldflags & IN6_IFF_DEPRECATED) == 0) { if (regen_tmpaddr(ia6) == 0) { /* * A new temporary address is * generated. * XXX: this means the address chain * has changed while we are still in * the loop. Although the change * would not cause disaster (because * it's not a deletion, but an * addition,) we'd rather restart the * loop just for safety. Or does this * significantly reduce performance?? */ goto addrloop; } } } else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) { /* * Schedule DAD for a tentative address. This happens * if the interface was down or not running * when the address was configured. */ int delay; delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); nd6_dad_start((struct ifaddr *)ia6, delay); } else { /* * Check status of the interface. If it is down, * mark the address as tentative for future DAD. */ if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 || (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ND_IFINFO(ia6->ia_ifp)->flags & ND6_IFF_IFDISABLED) != 0) { ia6->ia6_flags &= ~IN6_IFF_DUPLICATED; ia6->ia6_flags |= IN6_IFF_TENTATIVE; } /* * A new RA might have made a deprecated address * preferred. */ ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; } } /* expire prefix list */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { /* * check prefix lifetime. * since pltime is just for autoconf, pltime processing for * prefix is not necessary. */ if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) { /* * address expiration and prefix expiration are * separate. NEVER perform in6_purgeaddr here. */ prelist_remove(pr); } } CURVNET_RESTORE(); } /* * ia6 - deprecated/invalidated temporary address */ static int regen_tmpaddr(struct in6_ifaddr *ia6) { struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; ifp = ia6->ia_ifa.ifa_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *it6; if (ifa->ifa_addr->sa_family != AF_INET6) continue; it6 = (struct in6_ifaddr *)ifa; /* ignore no autoconf addresses. */ if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; /* ignore autoconf addresses with different prefixes. */ if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) continue; /* * Now we are looking at an autoconf address with the same * prefix as ours. If the address is temporary and is still * preferred, do not create another one. It would be rare, but * could happen, for example, when we resume a laptop PC after * a long period. */ if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && !IFA6_IS_DEPRECATED(it6)) { public_ifa6 = NULL; break; } /* * This is a public autoconf address that has the same prefix * as ours. If it is preferred, keep it. We can't break the * loop here, because there may be a still-preferred temporary * address with the prefix. */ if (!IFA6_IS_DEPRECATED(it6)) public_ifa6 = it6; } if (public_ifa6 != NULL) ifa_ref(&public_ifa6->ia_ifa); IF_ADDR_RUNLOCK(ifp); if (public_ifa6 != NULL) { int e; if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) { ifa_free(&public_ifa6->ia_ifa); log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" " tmp addr,errno=%d\n", e); return (-1); } ifa_free(&public_ifa6->ia_ifa); return (0); } return (-1); } /* * Nuke neighbor cache/prefix/default router management table, right before * ifp goes away. */ void nd6_purge(struct ifnet *ifp) { struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; /* * Nuke default router list entries toward ifp. * We defer removal of default router list entries that is installed * in the routing table, in order to keep additional side effects as * small as possible. */ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->installed) continue; if (dr->ifp == ifp) defrtrlist_del(dr); } TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (!dr->installed) continue; if (dr->ifp == ifp) defrtrlist_del(dr); } /* Nuke prefix list entries toward ifp */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { if (pr->ndpr_ifp == ifp) { /* * Because if_detach() does *not* release prefixes * while purging addresses the reference count will * still be above zero. We therefore reset it to * make sure that the prefix really gets purged. */ pr->ndpr_refcnt = 0; /* * Previously, pr->ndpr_addr is removed as well, * but I strongly believe we don't have to do it. * nd6_purge() is only called from in6_ifdetach(), * which removes all the associated interface addresses * by itself. * (jinmei@kame.net 20010129) */ prelist_remove(pr); } } /* cancel default outgoing interface setting */ if (V_nd6_defifindex == ifp->if_index) nd6_setdefaultiface(0); if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* Refresh default router list. */ defrouter_select(); } /* XXXXX * We do not nuke the neighbor cache entries here any more * because the neighbor cache is kept in if_afdata[AF_INET6]. * nd6_purge() is invoked by in6_ifdetach() which is called * from if_detach() where everything gets purged. So let * in6_domifdetach() do the actual L2 table purging work. */ } /* * the caller acquires and releases the lock on the lltbls * Returns the llentry locked */ struct llentry * nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; IF_AFDATA_LOCK_ASSERT(ifp); ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6); return (ln); } struct llentry * nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6); if (ln != NULL) ln->ln_state = ND6_LLINFO_NOSTATE; return (ln); } /* * Test whether a given IPv6 address is a neighbor or not, ignoring * the actual neighbor cache. The neighbor cache is ignored in order * to not reenter the routing code from within itself. */ static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct nd_prefix *pr; struct ifaddr *dstaddr; /* * A link-local address is always a neighbor. * XXX: a link does not necessarily specify a single interface. */ if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { struct sockaddr_in6 sin6_copy; u_int32_t zone; /* * We need sin6_copy since sa6_recoverscope() may modify the * content (XXX). */ sin6_copy = *addr; if (sa6_recoverscope(&sin6_copy)) return (0); /* XXX: should be impossible */ if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) return (0); if (sin6_copy.sin6_scope_id == zone) return (1); else return (0); } /* * If the address matches one of our addresses, * it should be a neighbor. * If the address matches one of our on-link prefixes, it should be a * neighbor. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_ifp != ifp) continue; if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { struct rtentry *rt; /* Always use the default FIB here. */ rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0, RT_DEFAULT_FIB); if (rt == NULL) continue; /* * This is the case where multiple interfaces * have the same prefix, but only one is installed * into the routing table and that prefix entry * is not the one being examined here. In the case * where RADIX_MPATH is enabled, multiple route * entries (of the same rt_key value) will be * installed because the interface addresses all * differ. */ if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) { RTFREE_LOCKED(rt); continue; } RTFREE_LOCKED(rt); } if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &addr->sin6_addr, &pr->ndpr_mask)) return (1); } /* * If the address is assigned on the node of the other side of * a p2p interface, the address should be a neighbor. */ dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS); if (dstaddr != NULL) { if (dstaddr->ifa_ifp == ifp) { ifa_free(dstaddr); return (1); } ifa_free(dstaddr); } /* * If the default router list is empty, all addresses are regarded * as on-link, and thus, as a neighbor. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && TAILQ_EMPTY(&V_nd_defrouter) && V_nd6_defifindex == ifp->if_index) { return (1); } return (0); } /* * Detect if a given IPv6 address identifies a neighbor on a given link. * XXX: should take care of the destination of a p2p link? */ int nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct llentry *lle; int rc = 0; IF_AFDATA_UNLOCK_ASSERT(ifp); if (nd6_is_new_addr_neighbor(addr, ifp)) return (1); /* * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ IF_AFDATA_RLOCK(ifp); if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { LLE_RUNLOCK(lle); rc = 1; } IF_AFDATA_RUNLOCK(ifp); return (rc); } /* * Free an nd6 llinfo entry. * Since the function would cause significant changes in the kernel, DO NOT * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. * * Set noinline to be dtrace-friendly */ static __noinline void nd6_free(struct llentry *ln, int gc) { struct nd_defrouter *dr; struct ifnet *ifp; LLE_WLOCK_ASSERT(ln); /* * we used to have pfctlinput(PRC_HOSTDEAD) here. * even though it is not harmful, it was not really necessary. */ /* cancel timer */ nd6_llinfo_settimer_locked(ln, -1); ifp = ln->lle_tbl->llt_ifp; if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { dr = defrouter_lookup(&ln->r_l3addr.addr6, ifp); if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { /* * If the reason for the deletion is just garbage * collection, and the neighbor is an active default * router, do not delete it. Instead, reset the GC * timer using the router's lifetime. * Simply deleting the entry would affect default * router selection, which is not necessarily a good * thing, especially when we're using router preference * values. * XXX: the check for ln_state would be redundant, * but we intentionally keep it just in case. */ if (dr->expire > time_uptime) nd6_llinfo_settimer_locked(ln, (dr->expire - time_uptime) * hz); else nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); LLE_REMREF(ln); LLE_WUNLOCK(ln); return; } if (dr) { /* * Unreachablity of a router might affect the default * router selection and on-link detection of advertised * prefixes. */ /* * Temporarily fake the state to choose a new default * router and to perform on-link determination of * prefixes correctly. * Below the state will be set correctly, * or the entry itself will be deleted. */ ln->ln_state = ND6_LLINFO_INCOMPLETE; } if (ln->ln_router || dr) { /* * We need to unlock to avoid a LOR with rt6_flush() with the * rnh and for the calls to pfxlist_onlink_check() and * defrouter_select() in the block further down for calls * into nd6_lookup(). We still hold a ref. */ LLE_WUNLOCK(ln); /* * rt6_flush must be called whether or not the neighbor * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ rt6_flush(&ln->r_l3addr.addr6, ifp); } if (dr) { /* * Since defrouter_select() does not affect the * on-link determination and MIP6 needs the check * before the default router selection, we perform * the check now. */ pfxlist_onlink_check(); /* * Refresh default router list. */ defrouter_select(); } /* * If this entry was added by an on-link redirect, remove the * corresponding host route. */ if (ln->la_flags & LLE_REDIRECT) nd6_free_redirect(ln); if (ln->ln_router || dr) LLE_WLOCK(ln); } /* * Save to unlock. We still hold an extra reference and will not * free(9) in llentry_free() if someone else holds one as well. */ LLE_WUNLOCK(ln); IF_AFDATA_LOCK(ifp); LLE_WLOCK(ln); /* Guard against race with other llentry_free(). */ if (ln->la_flags & LLE_LINKED) { /* Remove callout reference */ LLE_REMREF(ln); lltable_unlink_entry(ln->lle_tbl, ln); } IF_AFDATA_UNLOCK(ifp); llentry_free(ln); } /* * Remove the rtentry for the given llentry, * both of which were installed by a redirect. */ static void nd6_free_redirect(const struct llentry *ln) { int fibnum; struct rtentry *rt; struct radix_node_head *rnh; struct sockaddr_in6 sin6; lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { rnh = rt_tables_get_rnh(fibnum, AF_INET6); if (rnh == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED, fibnum); if (rt) { if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC)) rt_expunge(rnh, rt); RTFREE_LOCKED(rt); } RADIX_NODE_HEAD_UNLOCK(rnh); } } /* * Rejuvenate this function for routing operations related * processing. */ void nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) { struct sockaddr_in6 *gateway; struct nd_defrouter *dr; struct ifnet *ifp; gateway = (struct sockaddr_in6 *)rt->rt_gateway; ifp = rt->rt_ifp; switch (req) { case RTM_ADD: break; case RTM_DELETE: if (!ifp) return; /* * Only indirect routes are interesting. */ if ((rt->rt_flags & RTF_GATEWAY) == 0) return; /* * check for default route */ if (IN6_ARE_ADDR_EQUAL(&in6addr_any, &SIN6(rt_key(rt))->sin6_addr)) { dr = defrouter_lookup(&gateway->sin6_addr, ifp); if (dr != NULL) dr->installed = 0; } break; } } int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) { struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; int error = 0; if (ifp->if_afdata[AF_INET6] == NULL) return (EPFNOSUPPORT); switch (cmd) { case OSIOCGIFINFO_IN6: #define ND ndi->ndi /* XXX: old ndp(8) assumes a positive value for linkmtu. */ bzero(&ND, sizeof(ND)); ND.linkmtu = IN6_LINKMTU(ifp); ND.maxmtu = ND_IFINFO(ifp)->maxmtu; ND.basereachable = ND_IFINFO(ifp)->basereachable; ND.reachable = ND_IFINFO(ifp)->reachable; ND.retrans = ND_IFINFO(ifp)->retrans; ND.flags = ND_IFINFO(ifp)->flags; ND.recalctm = ND_IFINFO(ifp)->recalctm; ND.chlim = ND_IFINFO(ifp)->chlim; break; case SIOCGIFINFO_IN6: ND = *ND_IFINFO(ifp); break; case SIOCSIFINFO_IN6: /* * used to change host variables from userland. * intented for a use on router to reflect RA configurations. */ /* 0 means 'unspecified' */ if (ND.linkmtu != 0) { if (ND.linkmtu < IPV6_MMTU || ND.linkmtu > IN6_LINKMTU(ifp)) { error = EINVAL; break; } ND_IFINFO(ifp)->linkmtu = ND.linkmtu; } if (ND.basereachable != 0) { int obasereachable = ND_IFINFO(ifp)->basereachable; ND_IFINFO(ifp)->basereachable = ND.basereachable; if (ND.basereachable != obasereachable) ND_IFINFO(ifp)->reachable = ND_COMPUTE_RTIME(ND.basereachable); } if (ND.retrans != 0) ND_IFINFO(ifp)->retrans = ND.retrans; if (ND.chlim != 0) ND_IFINFO(ifp)->chlim = ND.chlim; /* FALLTHROUGH */ case SIOCSIFINFO_FLAGS: { struct ifaddr *ifa; struct in6_ifaddr *ia; if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && !(ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 1->0 transision */ /* * If the interface is marked as ND6_IFF_IFDISABLED and * has an link-local address with IN6_IFF_DUPLICATED, * do not clear ND6_IFF_IFDISABLED. * See RFC 4862, Section 5.4.5. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } IF_ADDR_RUNLOCK(ifp); if (ifa != NULL) { /* LLA is duplicated. */ ND.flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "Cannot enable an interface" " with a link-local address marked" " duplicate.\n"); } else { ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED; if (ifp->if_flags & IFF_UP) in6_if_up(ifp); } } else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && (ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 0->1 transision */ /* Mark all IPv6 address as tentative. */ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; if (V_ip6_dad_count > 0 && (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; ia->ia6_flags |= IN6_IFF_TENTATIVE; } IF_ADDR_RUNLOCK(ifp); } } if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) { if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) { /* auto_linklocal 0->1 transision */ /* If no link-local address on ifp, configure */ ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL; in6_ifattach(ifp, NULL); } else if (!(ND.flags & ND6_IFF_IFDISABLED) && ifp->if_flags & IFF_UP) { /* * When the IF already has * ND6_IFF_AUTO_LINKLOCAL, no link-local * address is assigned, and IFF_UP, try to * assign one. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } IF_ADDR_RUNLOCK(ifp); if (ifa != NULL) /* No LLA is configured. */ in6_ifattach(ifp, NULL); } } } ND_IFINFO(ifp)->flags = ND.flags; break; #undef ND case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ /* sync kernel routing table with the default router list */ defrouter_reset(); defrouter_select(); break; case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { struct in6_ifaddr *ia, *ia_next; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* XXX */ /* do we really have to remove addresses as well? */ /* XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link, ia_next) { if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ia->ia6_ndpr == pr) in6_purgeaddr(&ia->ia_ifa); } prelist_remove(pr); } break; } case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ struct nd_defrouter *dr, *next; defrouter_reset(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { defrtrlist_del(dr); } defrouter_select(); break; } case SIOCGNBRINFO_IN6: { struct llentry *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) return (error); IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&nb_addr, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { error = EINVAL; break; } nbi->state = ln->ln_state; nbi->asked = ln->la_asked; nbi->isrouter = ln->ln_router; if (ln->la_expire == 0) nbi->expire = 0; else nbi->expire = ln->la_expire + (time_second - time_uptime); LLE_RUNLOCK(ln); break; } case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ ndif->ifindex = V_nd6_defifindex; break; case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ return (nd6_setdefaultiface(ndif->ifindex)); } return (error); } /* * Calculates new isRouter value based on provided parameters and * returns it. */ static int nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr, int ln_router) { /* * ICMP6 type dependent behavior. * * NS: clear IsRouter if new entry * RS: clear IsRouter * RA: set IsRouter if there's lladdr * redir: clear IsRouter if new entry * * RA case, (1): * The spec says that we must set IsRouter in the following cases: * - If lladdr exist, set IsRouter. This means (1-5). * - If it is old entry (!newentry), set IsRouter. This means (7). * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. * A quetion arises for (1) case. (1) case has no lladdr in the * neighbor cache, this is similar to (6). * This case is rare but we figured that we MUST NOT set IsRouter. * * is_new old_addr new_addr NS RS RA redir * D R * 0 n n (1) c ? s * 0 y n (2) c s s * 0 n y (3) c s s * 0 y y (4) c s s * 0 y y (5) c s s * 1 -- n (6) c c c s * 1 -- y (7) c c s c s * * (c=clear s=set) */ switch (type & 0xff) { case ND_NEIGHBOR_SOLICIT: /* * New entry must have is_router flag cleared. */ if (is_new) /* (6-7) */ ln_router = 0; break; case ND_REDIRECT: /* * If the icmp is a redirect to a better router, always set the * is_router flag. Otherwise, if the entry is newly created, * clear the flag. [RFC 2461, sec 8.3] */ if (code == ND_REDIRECT_ROUTER) ln_router = 1; else { if (is_new) /* (6-7) */ ln_router = 0; } break; case ND_ROUTER_SOLICIT: /* * is_router flag must always be cleared. */ ln_router = 0; break; case ND_ROUTER_ADVERT: /* * Mark an entry with lladdr as a router. */ if ((!is_new && (old_addr || new_addr)) || /* (2-5) */ (is_new && new_addr)) { /* (7) */ ln_router = 1; } break; } return (ln_router); } /* * Create neighbor cache entry and cache link-layer address, * on reception of inbound ND6 packets. (RS/RA/NS/redirect) * * type - ICMP6 type * code - type dependent information * */ void nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int lladdrlen, int type, int code) { struct llentry *ln = NULL, *ln_tmp; int is_newentry; int do_update; int olladdr; int llchange; int flags; uint16_t router = 0; struct sockaddr_in6 sin6; struct mbuf *chain = NULL; IF_AFDATA_UNLOCK_ASSERT(ifp); KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__)); KASSERT(from != NULL, ("%s: from == NULL", __func__)); /* nothing must be updated for unspecified address */ if (IN6_IS_ADDR_UNSPECIFIED(from)) return; /* * Validation about ifp->if_addrlen and lladdrlen must be done in * the caller. * * XXX If the link does not have link-layer adderss, what should * we do? (ifp->if_addrlen == 0) * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ flags = lladdr ? LLE_EXCLUSIVE : 0; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(from, flags, ifp); IF_AFDATA_RUNLOCK(ifp); is_newentry = 0; if (ln == NULL) { flags |= LLE_EXCLUSIVE; ln = nd6_alloc(from, 0, ifp); if (ln == NULL) return; /* * Since we already know all the data for the new entry, * fill it before insertion. */ if (lladdr != NULL) lltable_set_entry_addr(ifp, ln, lladdr); IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Prefer any existing lle over newly-created one */ ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp); if (ln_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), ln); IF_AFDATA_WUNLOCK(ifp); if (ln_tmp == NULL) { /* No existing lle, mark as new entry (6,7) */ is_newentry = 1; nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); if (lladdr != NULL) /* (7) */ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } else { lltable_free_entry(LLTABLE6(ifp), ln); ln = ln_tmp; ln_tmp = NULL; } } /* do nothing if static ndp is set */ if ((ln->la_flags & LLE_STATIC)) { if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); return; } olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { llchange = bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen); } else if (!olladdr && lladdr) llchange = 1; else llchange = 0; /* * newentry olladdr lladdr llchange (*=record) * 0 n n -- (1) * 0 y n -- (2) * 0 n y y (3) * STALE * 0 y y n (4) * * 0 y y y (5) * STALE * 1 -- n -- (6) NOSTATE(= PASSIVE) * 1 -- y -- (7) * STALE */ do_update = 0; if (is_newentry == 0 && llchange != 0) { do_update = 1; /* (3,5) */ /* * Record source link-layer address * XXX is it dependent to ifp->if_type? */ lltable_set_entry_addr(ifp, ln, lladdr); nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (ln->la_hold != NULL) nd6_grab_holdchain(ln, &chain, &sin6); } /* Calculates new router status */ router = nd6_is_router(type, code, is_newentry, olladdr, lladdr != NULL ? 1 : 0, ln->ln_router); ln->ln_router = router; /* Mark non-router redirects with special flag */ if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER) ln->la_flags |= LLE_REDIRECT; if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); if (chain != NULL) nd6_flush_holdchain(ifp, ifp, chain, &sin6); /* * When the link-layer address of a router changes, select the * best router again. In particular, when the neighbor entry is newly * created, it might affect the selection policy. * Question: can we restrict the first condition to the "is_newentry" * case? * XXX: when we hear an RA from a new router with the link-layer * address option, defrouter_select() is called twice, since * defrtrlist_update called the function as well. However, I believe * we can compromise the overhead, since it only happens the first * time. * XXX: although defrouter_select() should not have a bad effect * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ if ((do_update || is_newentry) && router && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * guaranteed recursion */ defrouter_select(); } } static void nd6_slowtimo(void *arg) { CURVNET_SET((struct vnet *) arg); struct nd_ifinfo *nd6if; struct ifnet *ifp; callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_afdata[AF_INET6] == NULL) continue; nd6if = ND_IFINFO(ifp); if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { /* * Since reachable time rarely changes by router * advertisements, we SHOULD insure that a new random * value gets recomputed at least once every few hours. * (RFC 2461, 6.3.4) */ nd6if->recalctm = V_nd6_recalc_reachtm_interval; nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } IFNET_RUNLOCK_NOSLEEP(); CURVNET_RESTORE(); } void nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain, struct sockaddr_in6 *sin6) { LLE_WLOCK_ASSERT(ln); *chain = ln->la_hold; ln->la_hold = NULL; lltable_fill_sa_entry(ln, (struct sockaddr *)sin6); if (ln->ln_state == ND6_LLINFO_STALE) { /* * The first time we send a packet to a * neighbor whose entry is STALE, we have * to change the state to DELAY and a sets * a timer to expire in DELAY_FIRST_PROBE_TIME * seconds to ensure do neighbor unreachability * detection on expiration. * (RFC 2461 7.3.3) */ nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY); } } int nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, - struct sockaddr_in6 *dst) + struct sockaddr_in6 *dst, struct route *ro) { int error; int ip6len; struct ip6_hdr *ip6; struct m_tag *mtag; #ifdef MAC mac_netinet6_nd6_send(ifp, m); #endif /* * If called from nd6_ns_output() (NS), nd6_na_output() (NA), * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA * as handled by rtsol and rtadvd), mbufs will be tagged for SeND * to be diverted to user space. When re-injected into the kernel, * send_output() will directly dispatch them to the outgoing interface. */ if (send_sendso_input_hook != NULL) { mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); if (mtag != NULL) { ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); /* Use the SEND socket */ error = send_sendso_input_hook(m, ifp, SND_OUT, ip6len); /* -1 == no app on SEND socket */ if (error == 0 || error != -1) return (error); } } m_clrprotoflags(m); /* Avoid confusing lower layers. */ IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL, mtod(m, struct ip6_hdr *)); if ((ifp->if_flags & IFF_LOOPBACK) == 0) origifp = ifp; - error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, NULL); + error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro); return (error); } /* * Do L2 address resolution for @sa_dst address. Stores found * address in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * If destination LLE does not exists or lle state modification * is required, call "slow" version. * * Return values: * - 0 on success (address copied to buffer). * - EWOULDBLOCK (no local error, but address is still unresolved) * - other errors (alloc failure, etc) */ int nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags) { struct llentry *ln = NULL; const struct sockaddr_in6 *dst6; if (pflags != NULL) *pflags = 0; dst6 = (const struct sockaddr_in6 *)sa_dst; /* discard the packet if IPv6 operation is disabled on the interface */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { m_freem(m); return (ENETDOWN); /* better error? */ } if (m != NULL && m->m_flags & M_MCAST) { switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_BRIDGE: case IFT_ISO88025: ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr, desten); return (0); default: m_freem(m); return (EAFNOSUPPORT); } } IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&dst6->sin6_addr, 0, ifp); IF_AFDATA_RUNLOCK(ifp); /* * Perform fast path for the following cases: * 1) lle state is REACHABLE * 2) lle state is DELAY (NS message sent) * * Every other case involves lle modification, so we handle * them separately. */ if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE && ln->ln_state != ND6_LLINFO_DELAY)) { /* Fall back to slow processing path */ if (ln != NULL) LLE_RUNLOCK(ln); return (nd6_resolve_slow(ifp, m, dst6, desten, pflags)); } bcopy(&ln->ll_addr, desten, ifp->if_addrlen); if (pflags != NULL) *pflags = ln->la_flags; LLE_RUNLOCK(ln); return (0); } /* * Do L2 address resolution for @sa_dst address. Stores found * address in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * Heavy version. * Function assume that destination LLE does not exist, * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired. * * Set noinline to be dtrace-friendly */ static __noinline int nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags) { struct llentry *lle = NULL, *lle_tmp; struct in6_addr *psrc, src; int send_ns; /* * Address resolution or Neighbor Unreachability Detection * for the next hop. * At this point, the destination of the packet must be a unicast * or an anycast address(i.e. not a multicast). */ if (lle == NULL) { IF_AFDATA_RLOCK(ifp); lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. */ lle = nd6_alloc(&dst->sin6_addr, 0, ifp); if (lle == NULL) { char ip6buf[INET6_ADDRSTRLEN]; log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " "(ln=%p)\n", ip6_sprintf(ip6buf, &dst->sin6_addr), lle); m_freem(m); return (ENOBUFS); } IF_AFDATA_WLOCK(ifp); LLE_WLOCK(lle); /* Prefer any existing entry over newly-created one */ lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); if (lle_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), lle); IF_AFDATA_WUNLOCK(ifp); if (lle_tmp != NULL) { lltable_free_entry(LLTABLE6(ifp), lle); lle = lle_tmp; lle_tmp = NULL; } } } if (lle == NULL) { if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { m_freem(m); return (ENOBUFS); } if (m != NULL) m_freem(m); return (ENOBUFS); } LLE_WLOCK_ASSERT(lle); /* * The first time we send a packet to a neighbor whose entry is * STALE, we have to change the state to DELAY and a sets a timer to * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do * neighbor unreachability detection on expiration. * (RFC 2461 7.3.3) */ if (lle->ln_state == ND6_LLINFO_STALE) nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY); /* * If the neighbor cache entry has a state other than INCOMPLETE * (i.e. its link-layer address is already resolved), just * send the packet. */ if (lle->ln_state > ND6_LLINFO_INCOMPLETE) { bcopy(&lle->ll_addr, desten, ifp->if_addrlen); if (pflags != NULL) *pflags = lle->la_flags; LLE_WUNLOCK(lle); return (0); } /* * There is a neighbor cache entry, but no ethernet address * response yet. Append this latest packet to the end of the * packet queue in the mbuf, unless the number of the packet * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, * the oldest packet in the queue will be removed. */ if (lle->la_hold != NULL) { struct mbuf *m_hold; int i; i = 0; for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){ i++; if (m_hold->m_nextpkt == NULL) { m_hold->m_nextpkt = m; break; } } while (i >= V_nd6_maxqueuelen) { m_hold = lle->la_hold; lle->la_hold = lle->la_hold->m_nextpkt; m_freem(m_hold); i--; } } else { lle->la_hold = m; } /* * If there has been no NS for the neighbor after entering the * INCOMPLETE state, send the first solicitation. * Note that for newly-created lle la_asked will be 0, * so we will transition from ND6_LLINFO_NOSTATE to * ND6_LLINFO_INCOMPLETE state here. */ psrc = NULL; send_ns = 0; if (lle->la_asked == 0) { lle->la_asked++; send_ns = 1; psrc = nd6_llinfo_get_holdsrc(lle, &src); nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE); } LLE_WUNLOCK(lle); if (send_ns != 0) nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL); return (EWOULDBLOCK); } int nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, struct sockaddr_in6 *dst) { struct mbuf *m, *m_head; struct ifnet *outifp; int error = 0; m_head = chain; if ((ifp->if_flags & IFF_LOOPBACK) != 0) outifp = origifp; else outifp = ifp; while (m_head) { m = m_head; m_head = m_head->m_nextpkt; - error = nd6_output_ifp(ifp, origifp, m, dst); + error = nd6_output_ifp(ifp, origifp, m, dst, NULL); } /* * XXX * note that intermediate errors are blindly ignored */ return (error); } static int nd6_need_cache(struct ifnet *ifp) { /* * XXX: we currently do not make neighbor cache on any interface * other than ARCnet, Ethernet, FDDI and GIF. * * RFC2893 says: * - unidirectional tunnels needs no ND */ switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_PROPVIRTUAL: return (1); default: return (0); } } /* * Add pernament ND6 link-layer record for given * interface address. * * Very similar to IPv4 arp_ifinit(), but: * 1) IPv6 DAD is performed in different place * 2) It is called by IPv6 protocol stack in contrast to * arp_ifinit() which is typically called in SIOCSIFADDR * driver ioctl handler. * */ int nd6_add_ifa_lle(struct in6_ifaddr *ia) { struct ifnet *ifp; struct llentry *ln, *ln_tmp; struct sockaddr *dst; ifp = ia->ia_ifa.ifa_ifp; if (nd6_need_cache(ifp) == 0) return (0); ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; dst = (struct sockaddr *)&ia->ia_addr; ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst); if (ln == NULL) return (ENOBUFS); IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Unlink any entry if exists */ ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst); if (ln_tmp != NULL) lltable_unlink_entry(LLTABLE6(ifp), ln_tmp); lltable_link_entry(LLTABLE6(ifp), ln); IF_AFDATA_WUNLOCK(ifp); if (ln_tmp != NULL) EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); LLE_WUNLOCK(ln); if (ln_tmp != NULL) llentry_free(ln_tmp); return (0); } /* * Removes either all lle entries for given @ia, or lle * corresponding to @ia address. */ void nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all) { struct sockaddr_in6 mask, addr; struct sockaddr *saddr, *smask; struct ifnet *ifp; ifp = ia->ia_ifa.ifa_ifp; memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); saddr = (struct sockaddr *)&addr; smask = (struct sockaddr *)&mask; if (all != 0) lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC); else lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr); } static void clear_llinfo_pqueue(struct llentry *ln) { struct mbuf *m_hold, *m_hold_next; for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_freem(m_hold); } ln->la_hold = NULL; return; } static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet6_icmp6); #endif SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, CTLFLAG_RD, nd6_sysctl_drlist, ""); SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, CTLFLAG_RD, nd6_sysctl_prlist, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), ""); static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) { struct in6_defrouter d; struct nd_defrouter *dr; int error; if (req->newptr) return (EPERM); bzero(&d, sizeof(d)); d.rtaddr.sin6_family = AF_INET6; d.rtaddr.sin6_len = sizeof(d.rtaddr); /* * XXX locking */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { d.rtaddr.sin6_addr = dr->rtaddr; error = sa6_recoverscope(&d.rtaddr); if (error != 0) return (error); d.flags = dr->flags; d.rtlifetime = dr->rtlifetime; d.expire = dr->expire + (time_second - time_uptime); d.if_index = dr->ifp->if_index; error = SYSCTL_OUT(req, &d, sizeof(d)); if (error != 0) return (error); } return (0); } static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) { struct in6_prefix p; struct sockaddr_in6 s6; struct nd_prefix *pr; struct nd_pfxrouter *pfr; time_t maxexpire; int error; char ip6buf[INET6_ADDRSTRLEN]; if (req->newptr) return (EPERM); bzero(&p, sizeof(p)); p.origin = PR_ORIG_RA; bzero(&s6, sizeof(s6)); s6.sin6_family = AF_INET6; s6.sin6_len = sizeof(s6); /* * XXX locking */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { p.prefix = pr->ndpr_prefix; if (sa6_recoverscope(&p.prefix)) { log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &p.prefix.sin6_addr)); /* XXX: press on... */ } p.raflags = pr->ndpr_raf; p.prefixlen = pr->ndpr_plen; p.vltime = pr->ndpr_vltime; p.pltime = pr->ndpr_pltime; p.if_index = pr->ndpr_ifp->if_index; if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) p.expire = 0; else { /* XXX: we assume time_t is signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate) p.expire = pr->ndpr_lastupdate + pr->ndpr_vltime + (time_second - time_uptime); else p.expire = maxexpire; } p.refcnt = pr->ndpr_refcnt; p.flags = pr->ndpr_stateflags; p.advrtrs = 0; LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) p.advrtrs++; error = SYSCTL_OUT(req, &p, sizeof(p)); if (error != 0) return (error); LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { s6.sin6_addr = pfr->router->rtaddr; if (sa6_recoverscope(&s6)) log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &pfr->router->rtaddr)); error = SYSCTL_OUT(req, &s6, sizeof(s6)); if (error != 0) return (error); } } return (0); } Index: projects/powernv/netinet6/nd6.h =================================================================== --- projects/powernv/netinet6/nd6.h (revision 290990) +++ projects/powernv/netinet6/nd6.h (revision 290991) @@ -1,458 +1,458 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $ * $FreeBSD$ */ #ifndef _NETINET6_ND6_H_ #define _NETINET6_ND6_H_ /* see net/route.h, or net/if_inarp.h */ #ifndef RTF_ANNOUNCE #define RTF_ANNOUNCE RTF_PROTO2 #endif #include #include struct llentry; #define ND6_LLINFO_NOSTATE -2 /* * We don't need the WAITDELETE state any more, but we keep the definition * in a comment line instead of removing it. This is necessary to avoid * unintentionally reusing the value for another purpose, which might * affect backward compatibility with old applications. * (20000711 jinmei@kame.net) */ /* #define ND6_LLINFO_WAITDELETE -1 */ #define ND6_LLINFO_INCOMPLETE 0 #define ND6_LLINFO_REACHABLE 1 #define ND6_LLINFO_STALE 2 #define ND6_LLINFO_DELAY 3 #define ND6_LLINFO_PROBE 4 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) #define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) struct nd_ifinfo { u_int32_t linkmtu; /* LinkMTU */ u_int32_t maxmtu; /* Upper bound of LinkMTU */ u_int32_t basereachable; /* BaseReachableTime */ u_int32_t reachable; /* Reachable Time */ u_int32_t retrans; /* Retrans Timer */ u_int32_t flags; /* Flags */ int recalctm; /* BaseReacable re-calculation timer */ u_int8_t chlim; /* CurHopLimit */ u_int8_t initialized; /* Flag to see the entry is initialized */ /* the following 3 members are for privacy extension for addrconf */ u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */ u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */ u_int8_t randomid[8]; /* current random ID */ }; #define ND6_IFF_PERFORMNUD 0x1 #define ND6_IFF_ACCEPT_RTADV 0x2 #define ND6_IFF_PREFER_SOURCE 0x4 /* Not used in FreeBSD. */ #define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to * DAD failure. (XXX: not ND-specific) */ #define ND6_IFF_DONT_SET_IFROUTE 0x10 #define ND6_IFF_AUTO_LINKLOCAL 0x20 #define ND6_IFF_NO_RADR 0x40 #define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */ #define ND6_IFF_NO_DAD 0x100 #ifdef _KERNEL #define ND_IFINFO(ifp) \ (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo) #define IN6_LINKMTU(ifp) \ ((ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) \ ? ND_IFINFO(ifp)->linkmtu \ : ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) \ ? ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu)) #endif struct in6_nbrinfo { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ struct in6_addr addr; /* IPv6 address of the neighbor */ long asked; /* number of queries already sent for this addr */ int isrouter; /* if it acts as a router */ int state; /* reachability state */ int expire; /* lifetime for NDP state transition */ }; #define DRLSTSIZ 10 #define PRLSTSIZ 10 struct in6_drlist { char ifname[IFNAMSIZ]; struct { struct in6_addr rtaddr; u_char flags; u_short rtlifetime; u_long expire; u_short if_index; } defrouter[DRLSTSIZ]; }; struct in6_defrouter { struct sockaddr_in6 rtaddr; u_char flags; u_short rtlifetime; u_long expire; u_short if_index; }; #ifdef _KERNEL struct in6_oprlist { char ifname[IFNAMSIZ]; struct { struct in6_addr prefix; struct prf_ra raflags; u_char prefixlen; u_char origin; u_long vltime; u_long pltime; u_long expire; u_short if_index; u_short advrtrs; /* number of advertisement routers */ struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */ } prefix[PRLSTSIZ]; }; #endif struct in6_prlist { char ifname[IFNAMSIZ]; struct { struct in6_addr prefix; struct prf_ra raflags; u_char prefixlen; u_char origin; u_int32_t vltime; u_int32_t pltime; time_t expire; u_short if_index; u_short advrtrs; /* number of advertisement routers */ struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */ } prefix[PRLSTSIZ]; }; struct in6_prefix { struct sockaddr_in6 prefix; struct prf_ra raflags; u_char prefixlen; u_char origin; u_int32_t vltime; u_int32_t pltime; time_t expire; u_int32_t flags; int refcnt; u_short if_index; u_short advrtrs; /* number of advertisement routers */ /* struct sockaddr_in6 advrtr[] */ }; #ifdef _KERNEL struct in6_ondireq { char ifname[IFNAMSIZ]; struct { u_int32_t linkmtu; /* LinkMTU */ u_int32_t maxmtu; /* Upper bound of LinkMTU */ u_int32_t basereachable; /* BaseReachableTime */ u_int32_t reachable; /* Reachable Time */ u_int32_t retrans; /* Retrans Timer */ u_int32_t flags; /* Flags */ int recalctm; /* BaseReacable re-calculation timer */ u_int8_t chlim; /* CurHopLimit */ u_int8_t receivedra; } ndi; }; #endif struct in6_ndireq { char ifname[IFNAMSIZ]; struct nd_ifinfo ndi; }; struct in6_ndifreq { char ifname[IFNAMSIZ]; u_long ifindex; }; /* Prefix status */ #define NDPRF_ONLINK 0x1 #define NDPRF_DETACHED 0x2 /* protocol constants */ #define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */ #define RTR_SOLICITATION_INTERVAL 4 /* 4sec */ #define MAX_RTR_SOLICITATIONS 3 #define ND6_INFINITE_LIFETIME 0xffffffff #ifdef _KERNEL /* node constants */ #define MAX_REACHABLE_TIME 3600000 /* msec */ #define REACHABLE_TIME 30000 /* msec */ #define RETRANS_TIMER 1000 /* msec */ #define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */ #define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */ #define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */ #define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */ #define TEMPADDR_REGEN_ADVANCE 5 /* sec */ #define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */ #define ND_COMPUTE_RTIME(x) \ (((MIN_RANDOM_FACTOR * (x >> 10)) + (arc4random() & \ ((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000) TAILQ_HEAD(nd_drhead, nd_defrouter); struct nd_defrouter { TAILQ_ENTRY(nd_defrouter) dr_entry; struct in6_addr rtaddr; u_char flags; /* flags on RA message */ u_short rtlifetime; u_long expire; struct ifnet *ifp; int installed; /* is installed into kernel routing table */ }; struct nd_prefixctl { struct ifnet *ndpr_ifp; /* prefix */ struct sockaddr_in6 ndpr_prefix; u_char ndpr_plen; u_int32_t ndpr_vltime; /* advertised valid lifetime */ u_int32_t ndpr_pltime; /* advertised preferred lifetime */ struct prf_ra ndpr_flags; }; struct nd_prefix { struct ifnet *ndpr_ifp; LIST_ENTRY(nd_prefix) ndpr_entry; struct sockaddr_in6 ndpr_prefix; /* prefix */ struct in6_addr ndpr_mask; /* netmask derived from the prefix */ u_int32_t ndpr_vltime; /* advertised valid lifetime */ u_int32_t ndpr_pltime; /* advertised preferred lifetime */ time_t ndpr_expire; /* expiration time of the prefix */ time_t ndpr_preferred; /* preferred time of the prefix */ time_t ndpr_lastupdate; /* reception time of last advertisement */ struct prf_ra ndpr_flags; u_int32_t ndpr_stateflags; /* actual state flags */ /* list of routers that advertise the prefix: */ LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs; u_char ndpr_plen; int ndpr_refcnt; /* reference couter from addresses */ }; #define ndpr_raf ndpr_flags #define ndpr_raf_onlink ndpr_flags.onlink #define ndpr_raf_auto ndpr_flags.autonomous #define ndpr_raf_router ndpr_flags.router /* * Message format for use in obtaining information about prefixes * from inet6 sysctl function */ struct inet6_ndpr_msghdr { u_short inpm_msglen; /* to skip over non-understood messages */ u_char inpm_version; /* future binary compatibility */ u_char inpm_type; /* message type */ struct in6_addr inpm_prefix; u_long prm_vltim; u_long prm_pltime; u_long prm_expire; u_long prm_preferred; struct in6_prflags prm_flags; u_short prm_index; /* index for associated ifp */ u_char prm_plen; /* length of prefix in bits */ }; #define prm_raf_onlink prm_flags.prf_ra.onlink #define prm_raf_auto prm_flags.prf_ra.autonomous #define prm_statef_onlink prm_flags.prf_state.onlink #define prm_rrf_decrvalid prm_flags.prf_rr.decrvalid #define prm_rrf_decrprefd prm_flags.prf_rr.decrprefd struct nd_pfxrouter { LIST_ENTRY(nd_pfxrouter) pfr_entry; struct nd_defrouter *router; }; LIST_HEAD(nd_prhead, nd_prefix); /* nd6.c */ VNET_DECLARE(int, nd6_prune); VNET_DECLARE(int, nd6_delay); VNET_DECLARE(int, nd6_umaxtries); VNET_DECLARE(int, nd6_mmaxtries); VNET_DECLARE(int, nd6_useloopback); VNET_DECLARE(int, nd6_maxnudhint); VNET_DECLARE(int, nd6_gctimer); VNET_DECLARE(struct nd_drhead, nd_defrouter); VNET_DECLARE(struct nd_prhead, nd_prefix); VNET_DECLARE(int, nd6_debug); VNET_DECLARE(int, nd6_onlink_ns_rfc4861); #define V_nd6_prune VNET(nd6_prune) #define V_nd6_delay VNET(nd6_delay) #define V_nd6_umaxtries VNET(nd6_umaxtries) #define V_nd6_mmaxtries VNET(nd6_mmaxtries) #define V_nd6_useloopback VNET(nd6_useloopback) #define V_nd6_maxnudhint VNET(nd6_maxnudhint) #define V_nd6_gctimer VNET(nd6_gctimer) #define V_nd_defrouter VNET(nd_defrouter) #define V_nd_prefix VNET(nd_prefix) #define V_nd6_debug VNET(nd6_debug) #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) #define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) VNET_DECLARE(struct callout, nd6_timer_ch); #define V_nd6_timer_ch VNET(nd6_timer_ch) /* nd6_rtr.c */ VNET_DECLARE(int, nd6_defifindex); VNET_DECLARE(int, ip6_desync_factor); /* seconds */ VNET_DECLARE(u_int32_t, ip6_temp_preferred_lifetime); /* seconds */ VNET_DECLARE(u_int32_t, ip6_temp_valid_lifetime); /* seconds */ VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */ #define V_nd6_defifindex VNET(nd6_defifindex) #define V_ip6_desync_factor VNET(ip6_desync_factor) #define V_ip6_temp_preferred_lifetime VNET(ip6_temp_preferred_lifetime) #define V_ip6_temp_valid_lifetime VNET(ip6_temp_valid_lifetime) #define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance) union nd_opts { struct nd_opt_hdr *nd_opt_array[16]; /* max = ND_OPT_NONCE */ struct { struct nd_opt_hdr *zero; struct nd_opt_hdr *src_lladdr; struct nd_opt_hdr *tgt_lladdr; struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */ struct nd_opt_rd_hdr *rh; struct nd_opt_mtu *mtu; struct nd_opt_hdr *__res6; struct nd_opt_hdr *__res7; struct nd_opt_hdr *__res8; struct nd_opt_hdr *__res9; struct nd_opt_hdr *__res10; struct nd_opt_hdr *__res11; struct nd_opt_hdr *__res12; struct nd_opt_hdr *__res13; struct nd_opt_nonce *nonce; struct nd_opt_hdr *__res15; struct nd_opt_hdr *search; /* multiple opts */ struct nd_opt_hdr *last; /* multiple opts */ int done; struct nd_opt_prefix_info *pi_end;/* multiple opts, end */ } nd_opt_each; }; #define nd_opts_src_lladdr nd_opt_each.src_lladdr #define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr #define nd_opts_pi nd_opt_each.pi_beg #define nd_opts_pi_end nd_opt_each.pi_end #define nd_opts_rh nd_opt_each.rh #define nd_opts_mtu nd_opt_each.mtu #define nd_opts_nonce nd_opt_each.nonce #define nd_opts_search nd_opt_each.search #define nd_opts_last nd_opt_each.last #define nd_opts_done nd_opt_each.done /* XXX: need nd6_var.h?? */ /* nd6.c */ void nd6_init(void); #ifdef VIMAGE void nd6_destroy(void); #endif struct nd_ifinfo *nd6_ifattach(struct ifnet *); void nd6_ifdetach(struct nd_ifinfo *); int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); void nd6_option_init(void *, int, union nd_opts *); struct nd_opt_hdr *nd6_option(union nd_opts *); int nd6_options(union nd_opts *); struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *); struct llentry *nd6_alloc(const struct in6_addr *, int, struct ifnet *); void nd6_setmtu(struct ifnet *); void nd6_llinfo_setstate(struct llentry *lle, int newstate); void nd6_timer(void *); void nd6_purge(struct ifnet *); int nd6_resolve(struct ifnet *, int, struct mbuf *, const struct sockaddr *, u_char *, uint32_t *); int nd6_ioctl(u_long, caddr_t, struct ifnet *); void nd6_cache_lladdr(struct ifnet *, struct in6_addr *, char *, int, int, int); void nd6_grab_holdchain(struct llentry *, struct mbuf **, struct sockaddr_in6 *); int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *); int nd6_add_ifa_lle(struct in6_ifaddr *); void nd6_rem_ifa_lle(struct in6_ifaddr *, int); int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *); + struct sockaddr_in6 *, struct route *); /* nd6_nbr.c */ void nd6_na_input(struct mbuf *, int, int); void nd6_na_output(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *); void nd6_ns_input(struct mbuf *, int, int); void nd6_ns_output(struct ifnet *, const struct in6_addr *, const struct in6_addr *, const struct in6_addr *, uint8_t *); caddr_t nd6_ifptomac(struct ifnet *); void nd6_dad_init(void); void nd6_dad_start(struct ifaddr *, int); void nd6_dad_stop(struct ifaddr *); /* nd6_rtr.c */ void nd6_rs_input(struct mbuf *, int, int); void nd6_ra_input(struct mbuf *, int, int); void prelist_del(struct nd_prefix *); void defrouter_reset(void); void defrouter_select(void); void defrtrlist_del(struct nd_defrouter *); void prelist_remove(struct nd_prefix *); int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *, struct nd_prefix **); void pfxlist_onlink_check(void); struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *); struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *); void rt6_flush(struct in6_addr *, struct ifnet *); int nd6_setdefaultiface(int); int in6_tmpifadd(const struct in6_ifaddr *, int, int); #endif /* _KERNEL */ #endif /* _NETINET6_ND6_H_ */ Index: projects/powernv/netipsec/ipsec.h =================================================================== --- projects/powernv/netipsec/ipsec.h (revision 290990) +++ projects/powernv/netipsec/ipsec.h (revision 290991) @@ -1,372 +1,373 @@ /* $FreeBSD$ */ /* $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #ifndef _NETIPSEC_IPSEC_H_ #define _NETIPSEC_IPSEC_H_ #if defined(_KERNEL) && !defined(_LKM) && !defined(KLD_MODULE) #include "opt_inet.h" #include "opt_ipsec.h" #endif #include #include #ifdef _KERNEL #include #include #include #define IPSEC_ASSERT(_c,_m) KASSERT(_c, _m) #define IPSEC_IS_PRIVILEGED_SO(_so) \ ((_so)->so_cred != NULL && \ priv_check_cred((_so)->so_cred, PRIV_NETINET_IPSEC, 0) \ == 0) /* * Security Policy Index * Ensure that both address families in the "src" and "dst" are same. * When the value of the ul_proto is ICMPv6, the port field in "src" * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code. */ struct secpolicyindex { u_int8_t dir; /* direction of packet flow, see below */ union sockaddr_union src; /* IP src address for SP */ union sockaddr_union dst; /* IP dst address for SP */ u_int8_t prefs; /* prefix length in bits for src */ u_int8_t prefd; /* prefix length in bits for dst */ u_int16_t ul_proto; /* upper layer Protocol */ #ifdef notyet uid_t uids; uid_t uidd; gid_t gids; gid_t gidd; #endif }; /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; struct secpolicyindex spidx; /* selector */ struct ipsecrequest *req; /* pointer to the ipsec request tree, */ /* if policy == IPSEC else this value == NULL.*/ u_int refcnt; /* reference count */ u_int policy; /* policy_type per pfkeyv2.h */ u_int state; #define IPSEC_SPSTATE_DEAD 0 #define IPSEC_SPSTATE_ALIVE 1 + u_int32_t priority; /* priority of this policy */ u_int32_t id; /* It's unique number on the system. */ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and * validtime are zero. * "lifetime" is passed by sadb_lifetime.sadb_lifetime_addtime. * "validtime" is passed by sadb_lifetime.sadb_lifetime_usetime. */ time_t created; /* time created the policy */ time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ }; /* Request for IPsec */ struct ipsecrequest { struct ipsecrequest *next; /* pointer to next structure */ /* If NULL, it means the end of chain. */ struct secasindex saidx;/* hint for search proper SA */ /* if __ss_len == 0 then no address specified.*/ u_int level; /* IPsec level defined below. */ struct secasvar *sav; /* place holder of SA for use */ struct secpolicy *sp; /* back pointer to SP */ struct rwlock lock; /* to interlock updates */ }; /* * Need recursion for when crypto callbacks happen directly, * as in the case of software crypto. Need to look at how * hard it is to remove this... */ #define IPSECREQUEST_LOCK_INIT(_isr) \ rw_init_flags(&(_isr)->lock, "ipsec request", RW_RECURSE) #define IPSECREQUEST_LOCK(_isr) rw_rlock(&(_isr)->lock) #define IPSECREQUEST_UNLOCK(_isr) rw_runlock(&(_isr)->lock) #define IPSECREQUEST_WLOCK(_isr) rw_wlock(&(_isr)->lock) #define IPSECREQUEST_WUNLOCK(_isr) rw_wunlock(&(_isr)->lock) #define IPSECREQUEST_UPGRADE(_isr) rw_try_upgrade(&(_isr)->lock) #define IPSECREQUEST_DOWNGRADE(_isr) rw_downgrade(&(_isr)->lock) #define IPSECREQUEST_LOCK_DESTROY(_isr) rw_destroy(&(_isr)->lock) #define IPSECREQUEST_LOCK_ASSERT(_isr) rw_assert(&(_isr)->lock, RA_LOCKED) /* security policy in PCB */ struct inpcbpolicy { struct secpolicy *sp_in; struct secpolicy *sp_out; int priv; /* privileged socket ? */ }; /* SP acquiring list table. */ struct secspacq { LIST_ENTRY(secspacq) chain; struct secpolicyindex spidx; time_t created; /* for lifetime */ int count; /* for lifetime */ /* XXX: here is mbuf place holder to be sent ? */ }; #endif /* _KERNEL */ /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 #define IPSEC_ULPROTO_ANY 255 #define IPSEC_PROTO_ANY 255 /* mode of security protocol */ /* NOTE: DON'T use IPSEC_MODE_ANY at SPD. It's only use in SAD */ #define IPSEC_MODE_ANY 0 /* i.e. wildcard. */ #define IPSEC_MODE_TRANSPORT 1 #define IPSEC_MODE_TUNNEL 2 #define IPSEC_MODE_TCPMD5 3 /* TCP MD5 mode */ /* * Direction of security policy. * NOTE: Since INVALID is used just as flag. * The other are used for loop counter too. */ #define IPSEC_DIR_ANY 0 #define IPSEC_DIR_INBOUND 1 #define IPSEC_DIR_OUTBOUND 2 #define IPSEC_DIR_MAX 3 #define IPSEC_DIR_INVALID 4 /* Policy level */ /* * IPSEC, ENTRUST and BYPASS are allowed for setsockopt() in PCB, * DISCARD, IPSEC and NONE are allowed for setkey() in SPD. * DISCARD and NONE are allowed for system default. */ #define IPSEC_POLICY_DISCARD 0 /* discarding packet */ #define IPSEC_POLICY_NONE 1 /* through IPsec engine */ #define IPSEC_POLICY_IPSEC 2 /* do IPsec */ #define IPSEC_POLICY_ENTRUST 3 /* consulting SPD if present. */ #define IPSEC_POLICY_BYPASS 4 /* only for privileged socket. */ /* Security protocol level */ #define IPSEC_LEVEL_DEFAULT 0 /* reference to system default */ #define IPSEC_LEVEL_USE 1 /* use SA if present. */ #define IPSEC_LEVEL_REQUIRE 2 /* require SA. */ #define IPSEC_LEVEL_UNIQUE 3 /* unique SA. */ #define IPSEC_MANUAL_REQID_MAX 0x3fff /* * if security policy level == unique, this id * indicate to a relative SA for use, else is * zero. * 1 - 0x3fff are reserved for manual keying. * 0 are reserved for above reason. Others is * for kernel use. * Note that this id doesn't identify SA * by only itself. */ #define IPSEC_REPLAYWSIZE 32 /* statistics for ipsec processing */ struct ipsecstat { uint64_t ips_in_polvio; /* input: sec policy violation */ uint64_t ips_in_nomem; /* input: no memory available */ uint64_t ips_in_inval; /* input: generic error */ uint64_t ips_out_polvio; /* output: sec policy violation */ uint64_t ips_out_nosa; /* output: SA unavailable */ uint64_t ips_out_nomem; /* output: no memory available */ uint64_t ips_out_noroute; /* output: no route available */ uint64_t ips_out_inval; /* output: generic error */ uint64_t ips_out_bundlesa; /* output: bundled SA processed */ uint64_t ips_mbcoalesced; /* mbufs coalesced during clone */ uint64_t ips_clcoalesced; /* clusters coalesced during clone */ uint64_t ips_clcopied; /* clusters copied during clone */ uint64_t ips_mbinserted; /* mbufs inserted during makespace */ /* * Temporary statistics for performance analysis. */ /* See where ESP/AH/IPCOMP header land in mbuf on input */ uint64_t ips_input_front; uint64_t ips_input_middle; uint64_t ips_input_end; }; /* * Definitions for IPsec & Key sysctl operations. */ #define IPSECCTL_STATS 1 /* stats */ #define IPSECCTL_DEF_POLICY 2 #define IPSECCTL_DEF_ESP_TRANSLEV 3 /* int; ESP transport mode */ #define IPSECCTL_DEF_ESP_NETLEV 4 /* int; ESP tunnel mode */ #define IPSECCTL_DEF_AH_TRANSLEV 5 /* int; AH transport mode */ #define IPSECCTL_DEF_AH_NETLEV 6 /* int; AH tunnel mode */ #if 0 /* obsolete, do not reuse */ #define IPSECCTL_INBOUND_CALL_IKE 7 #endif #define IPSECCTL_AH_CLEARTOS 8 #define IPSECCTL_AH_OFFSETMASK 9 #define IPSECCTL_DFBIT 10 #define IPSECCTL_ECN 11 #define IPSECCTL_DEBUG 12 #define IPSECCTL_ESP_RANDPAD 13 #ifdef _KERNEL #include VNET_DECLARE(int, ipsec_debug); #define V_ipsec_debug VNET(ipsec_debug) #ifdef REGRESSION VNET_DECLARE(int, ipsec_replay); VNET_DECLARE(int, ipsec_integrity); #define V_ipsec_replay VNET(ipsec_replay) #define V_ipsec_integrity VNET(ipsec_integrity) #endif VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec4stat); VNET_DECLARE(int, ip4_esp_trans_deflev); VNET_DECLARE(int, ip4_esp_net_deflev); VNET_DECLARE(int, ip4_ah_trans_deflev); VNET_DECLARE(int, ip4_ah_net_deflev); VNET_DECLARE(int, ip4_ah_offsetmask); VNET_DECLARE(int, ip4_ipsec_dfbit); VNET_DECLARE(int, ip4_ipsec_ecn); VNET_DECLARE(int, ip4_esp_randpad); VNET_DECLARE(int, crypto_support); #define IPSECSTAT_INC(name) \ VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1) #define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) #define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) #define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) #define V_ip4_ah_net_deflev VNET(ip4_ah_net_deflev) #define V_ip4_ah_offsetmask VNET(ip4_ah_offsetmask) #define V_ip4_ipsec_dfbit VNET(ip4_ipsec_dfbit) #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) #define V_ip4_esp_randpad VNET(ip4_esp_randpad) #define V_crypto_support VNET(crypto_support) #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) /* for openbsd compatibility */ #define DPRINTF(x) do { if (V_ipsec_debug) printf x; } while (0) extern struct ipsecrequest *ipsec_newisr(void); extern void ipsec_delisr(struct ipsecrequest *); struct tdb_ident; extern struct secpolicy *ipsec_getpolicy(struct tdb_ident*, u_int); struct inpcb; extern struct secpolicy *ipsec4_checkpolicy(struct mbuf *, u_int, int *, struct inpcb *); extern struct secpolicy * ipsec_getpolicybyaddr(struct mbuf *, u_int, int *); struct inpcb; extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **); extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *); extern u_int ipsec_get_reqlevel(struct ipsecrequest *); extern int ipsec_set_policy(struct inpcb *inp, int optname, caddr_t request, size_t len, struct ucred *cred); extern int ipsec_get_policy(struct inpcb *inpcb, caddr_t request, size_t len, struct mbuf **mp); extern int ipsec_delete_pcbpolicy(struct inpcb *); extern int ipsec4_in_reject(struct mbuf *, struct inpcb *); struct secas; struct tcpcb; extern int ipsec_chkreplay(u_int32_t, struct secasvar *); extern int ipsec_updatereplay(u_int32_t, struct secasvar *); extern size_t ipsec_hdrsiz(struct mbuf *, u_int, struct inpcb *); extern size_t ipsec_hdrsiz_tcp(struct tcpcb *); union sockaddr_union; extern char *ipsec_address(union sockaddr_union *, char *, socklen_t); extern char *ipsec_logsastr(struct secasvar *, char *, size_t); extern void ipsec_dumpmbuf(struct mbuf *); struct m_tag; extern int ah4_input(struct mbuf **mp, int *offp, int proto); extern void ah4_ctlinput(int cmd, struct sockaddr *sa, void *); extern int esp4_input(struct mbuf **mp, int *offp, int proto); extern void esp4_ctlinput(int cmd, struct sockaddr *sa, void *); extern int ipcomp4_input(struct mbuf **mp, int *offp, int proto); extern int ipsec_common_input(struct mbuf *m, int, int, int, int); extern int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff); extern int ipsec4_process_packet(struct mbuf *, struct ipsecrequest *); extern int ipsec_process_done(struct mbuf *, struct ipsecrequest *); extern struct mbuf *ipsec_copypkt(struct mbuf *); extern void m_checkalignment(const char* where, struct mbuf *m0, int off, int len); extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off); extern caddr_t m_pad(struct mbuf *m, int n); extern int m_striphdr(struct mbuf *m, int skip, int hlen); #ifdef DEV_ENC #define ENC_BEFORE 0x0001 #define ENC_AFTER 0x0002 #define ENC_IN 0x0100 #define ENC_OUT 0x0200 extern int ipsec_filter(struct mbuf **, int, int); extern void ipsec_bpf(struct mbuf *, struct secasvar *, int, int); #endif #endif /* _KERNEL */ #ifndef _KERNEL extern caddr_t ipsec_set_policy(char *, int); extern int ipsec_get_policylen(caddr_t); extern char *ipsec_dump_policy(caddr_t, char *); extern const char *ipsec_strerror(void); #endif /* ! KERNEL */ #endif /* _NETIPSEC_IPSEC_H_ */ Index: projects/powernv/netipsec/key.c =================================================================== --- projects/powernv/netipsec/key.c (revision 290990) +++ projects/powernv/netipsec/key.c (revision 290991) @@ -1,7847 +1,7870 @@ /* $FreeBSD$ */ /* $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $ */ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This code is referd to RFC 2367 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif /* INET6 */ #if defined(INET) || defined(INET6) #include #endif #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include /* randomness */ #include #define FULLMASK 0xff #define _BITS(bytes) ((bytes) << 3) /* * Note on SA reference counting: * - SAs that are not in DEAD state will have (total external reference + 1) * following value in reference count field. they cannot be freed and are * referenced from SA header. * - SAs that are in DEAD state will have (total external reference) * in reference count field. they are ready to be freed. reference from * SA header will be removed in key_delsav(), when the reference count * field hits 0 (= no external reference other than from SA header. */ VNET_DEFINE(u_int32_t, key_debug_level) = 0; static VNET_DEFINE(u_int, key_spi_trycnt) = 1000; static VNET_DEFINE(u_int32_t, key_spi_minval) = 0x100; static VNET_DEFINE(u_int32_t, key_spi_maxval) = 0x0fffffff; /* XXX */ static VNET_DEFINE(u_int32_t, policy_id) = 0; /*interval to initialize randseed,1(m)*/ static VNET_DEFINE(u_int, key_int_random) = 60; /* interval to expire acquiring, 30(s)*/ static VNET_DEFINE(u_int, key_larval_lifetime) = 30; /* counter for blocking SADB_ACQUIRE.*/ static VNET_DEFINE(int, key_blockacq_count) = 10; /* lifetime for blocking SADB_ACQUIRE.*/ static VNET_DEFINE(int, key_blockacq_lifetime) = 20; /* preferred old sa rather than new sa.*/ static VNET_DEFINE(int, key_preferred_oldsa) = 1; #define V_key_spi_trycnt VNET(key_spi_trycnt) #define V_key_spi_minval VNET(key_spi_minval) #define V_key_spi_maxval VNET(key_spi_maxval) #define V_policy_id VNET(policy_id) #define V_key_int_random VNET(key_int_random) #define V_key_larval_lifetime VNET(key_larval_lifetime) #define V_key_blockacq_count VNET(key_blockacq_count) #define V_key_blockacq_lifetime VNET(key_blockacq_lifetime) #define V_key_preferred_oldsa VNET(key_preferred_oldsa) static VNET_DEFINE(u_int32_t, acq_seq) = 0; #define V_acq_seq VNET(acq_seq) /* SPD */ static VNET_DEFINE(TAILQ_HEAD(_sptree, secpolicy), sptree[IPSEC_DIR_MAX]); static struct rmlock sptree_lock; #define V_sptree VNET(sptree) #define SPTREE_LOCK_INIT() rm_init(&sptree_lock, "sptree") #define SPTREE_LOCK_DESTROY() rm_destroy(&sptree_lock) #define SPTREE_RLOCK_TRACKER struct rm_priotracker sptree_tracker #define SPTREE_RLOCK() rm_rlock(&sptree_lock, &sptree_tracker) #define SPTREE_RUNLOCK() rm_runlock(&sptree_lock, &sptree_tracker) #define SPTREE_RLOCK_ASSERT() rm_assert(&sptree_lock, RA_RLOCKED) #define SPTREE_WLOCK() rm_wlock(&sptree_lock) #define SPTREE_WUNLOCK() rm_wunlock(&sptree_lock) #define SPTREE_WLOCK_ASSERT() rm_assert(&sptree_lock, RA_WLOCKED) #define SPTREE_UNLOCK_ASSERT() rm_assert(&sptree_lock, RA_UNLOCKED) static VNET_DEFINE(LIST_HEAD(_sahtree, secashead), sahtree); /* SAD */ #define V_sahtree VNET(sahtree) static struct mtx sahtree_lock; #define SAHTREE_LOCK_INIT() \ mtx_init(&sahtree_lock, "sahtree", \ "fast ipsec security association database", MTX_DEF) #define SAHTREE_LOCK_DESTROY() mtx_destroy(&sahtree_lock) #define SAHTREE_LOCK() mtx_lock(&sahtree_lock) #define SAHTREE_UNLOCK() mtx_unlock(&sahtree_lock) #define SAHTREE_LOCK_ASSERT() mtx_assert(&sahtree_lock, MA_OWNED) /* registed list */ static VNET_DEFINE(LIST_HEAD(_regtree, secreg), regtree[SADB_SATYPE_MAX + 1]); #define V_regtree VNET(regtree) static struct mtx regtree_lock; #define REGTREE_LOCK_INIT() \ mtx_init(®tree_lock, "regtree", "fast ipsec regtree", MTX_DEF) #define REGTREE_LOCK_DESTROY() mtx_destroy(®tree_lock) #define REGTREE_LOCK() mtx_lock(®tree_lock) #define REGTREE_UNLOCK() mtx_unlock(®tree_lock) #define REGTREE_LOCK_ASSERT() mtx_assert(®tree_lock, MA_OWNED) static VNET_DEFINE(LIST_HEAD(_acqtree, secacq), acqtree); /* acquiring list */ #define V_acqtree VNET(acqtree) static struct mtx acq_lock; #define ACQ_LOCK_INIT() \ mtx_init(&acq_lock, "acqtree", "fast ipsec acquire list", MTX_DEF) #define ACQ_LOCK_DESTROY() mtx_destroy(&acq_lock) #define ACQ_LOCK() mtx_lock(&acq_lock) #define ACQ_UNLOCK() mtx_unlock(&acq_lock) #define ACQ_LOCK_ASSERT() mtx_assert(&acq_lock, MA_OWNED) /* SP acquiring list */ static VNET_DEFINE(LIST_HEAD(_spacqtree, secspacq), spacqtree); #define V_spacqtree VNET(spacqtree) static struct mtx spacq_lock; #define SPACQ_LOCK_INIT() \ mtx_init(&spacq_lock, "spacqtree", \ "fast ipsec security policy acquire list", MTX_DEF) #define SPACQ_LOCK_DESTROY() mtx_destroy(&spacq_lock) #define SPACQ_LOCK() mtx_lock(&spacq_lock) #define SPACQ_UNLOCK() mtx_unlock(&spacq_lock) #define SPACQ_LOCK_ASSERT() mtx_assert(&spacq_lock, MA_OWNED) /* search order for SAs */ static const u_int saorder_state_valid_prefer_old[] = { SADB_SASTATE_DYING, SADB_SASTATE_MATURE, }; static const u_int saorder_state_valid_prefer_new[] = { SADB_SASTATE_MATURE, SADB_SASTATE_DYING, }; static const u_int saorder_state_alive[] = { /* except DEAD */ SADB_SASTATE_MATURE, SADB_SASTATE_DYING, SADB_SASTATE_LARVAL }; static const u_int saorder_state_any[] = { SADB_SASTATE_MATURE, SADB_SASTATE_DYING, SADB_SASTATE_LARVAL, SADB_SASTATE_DEAD }; static const int minsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ sizeof(struct sadb_sa), /* SADB_EXT_SA */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_CURRENT */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_HARD */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_SOFT */ sizeof(struct sadb_address), /* SADB_EXT_ADDRESS_SRC */ sizeof(struct sadb_address), /* SADB_EXT_ADDRESS_DST */ sizeof(struct sadb_address), /* SADB_EXT_ADDRESS_PROXY */ sizeof(struct sadb_key), /* SADB_EXT_KEY_AUTH */ sizeof(struct sadb_key), /* SADB_EXT_KEY_ENCRYPT */ sizeof(struct sadb_ident), /* SADB_EXT_IDENTITY_SRC */ sizeof(struct sadb_ident), /* SADB_EXT_IDENTITY_DST */ sizeof(struct sadb_sens), /* SADB_EXT_SENSITIVITY */ sizeof(struct sadb_prop), /* SADB_EXT_PROPOSAL */ sizeof(struct sadb_supported), /* SADB_EXT_SUPPORTED_AUTH */ sizeof(struct sadb_supported), /* SADB_EXT_SUPPORTED_ENCRYPT */ sizeof(struct sadb_spirange), /* SADB_EXT_SPIRANGE */ 0, /* SADB_X_EXT_KMPRIVATE */ sizeof(struct sadb_x_policy), /* SADB_X_EXT_POLICY */ sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */ sizeof(struct sadb_address), /* SADB_X_EXT_NAT_T_OAI */ sizeof(struct sadb_address), /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ }; static const int maxsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ sizeof(struct sadb_sa), /* SADB_EXT_SA */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_CURRENT */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_HARD */ sizeof(struct sadb_lifetime), /* SADB_EXT_LIFETIME_SOFT */ 0, /* SADB_EXT_ADDRESS_SRC */ 0, /* SADB_EXT_ADDRESS_DST */ 0, /* SADB_EXT_ADDRESS_PROXY */ 0, /* SADB_EXT_KEY_AUTH */ 0, /* SADB_EXT_KEY_ENCRYPT */ 0, /* SADB_EXT_IDENTITY_SRC */ 0, /* SADB_EXT_IDENTITY_DST */ 0, /* SADB_EXT_SENSITIVITY */ 0, /* SADB_EXT_PROPOSAL */ 0, /* SADB_EXT_SUPPORTED_AUTH */ 0, /* SADB_EXT_SUPPORTED_ENCRYPT */ sizeof(struct sadb_spirange), /* SADB_EXT_SPIRANGE */ 0, /* SADB_X_EXT_KMPRIVATE */ 0, /* SADB_X_EXT_POLICY */ sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ sizeof(struct sadb_x_nat_t_type),/* SADB_X_EXT_NAT_T_TYPE */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_SPORT */ sizeof(struct sadb_x_nat_t_port),/* SADB_X_EXT_NAT_T_DPORT */ 0, /* SADB_X_EXT_NAT_T_OAI */ 0, /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ }; static VNET_DEFINE(int, ipsec_esp_keymin) = 256; static VNET_DEFINE(int, ipsec_esp_auth) = 0; static VNET_DEFINE(int, ipsec_ah_keymin) = 128; #define V_ipsec_esp_keymin VNET(ipsec_esp_keymin) #define V_ipsec_esp_auth VNET(ipsec_esp_auth) #define V_ipsec_ah_keymin VNET(ipsec_ah_keymin) #ifdef SYSCTL_DECL SYSCTL_DECL(_net_key); #endif SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL, debug, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_debug_level), 0, ""); /* max count of trial for the decision of spi value */ SYSCTL_INT(_net_key, KEYCTL_SPI_TRY, spi_trycnt, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_trycnt), 0, ""); /* minimum spi value to allocate automatically. */ SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE, spi_minval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_minval), 0, ""); /* maximun spi value to allocate automatically. */ SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE, spi_maxval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_spi_maxval), 0, ""); /* interval to initialize randseed */ SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT, int_random, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_int_random), 0, ""); /* lifetime for larval SA */ SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME, larval_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_larval_lifetime), 0, ""); /* counter for blocking to send SADB_ACQUIRE to IKEd */ SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT, blockacq_count, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_count), 0, ""); /* lifetime for blocking to send SADB_ACQUIRE to IKEd */ SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME, blockacq_lifetime, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_blockacq_lifetime), 0, ""); /* ESP auth */ SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH, esp_auth, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_auth), 0, ""); /* minimum ESP key length */ SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN, esp_keymin, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_esp_keymin), 0, ""); /* minimum AH key length */ SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN, ah_keymin, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_ah_keymin), 0, ""); /* perfered old SA rather than new SA */ SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, preferred_oldsa, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_preferred_oldsa), 0, ""); #define __LIST_CHAINED(elm) \ (!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL)) #define LIST_INSERT_TAIL(head, elm, type, field) \ do {\ struct type *curelm = LIST_FIRST(head); \ if (curelm == NULL) {\ LIST_INSERT_HEAD(head, elm, field); \ } else { \ while (LIST_NEXT(curelm, field)) \ curelm = LIST_NEXT(curelm, field);\ LIST_INSERT_AFTER(curelm, elm, field);\ }\ } while (0) #define KEY_CHKSASTATE(head, sav, name) \ do { \ if ((head) != (sav)) { \ ipseclog((LOG_DEBUG, "%s: state mismatched (TREE=%d SA=%d)\n", \ (name), (head), (sav))); \ continue; \ } \ } while (0) #define KEY_CHKSPDIR(head, sp, name) \ do { \ if ((head) != (sp)) { \ ipseclog((LOG_DEBUG, "%s: direction mismatched (TREE=%d SP=%d), " \ "anyway continue.\n", \ (name), (head), (sp))); \ } \ } while (0) MALLOC_DEFINE(M_IPSEC_SA, "secasvar", "ipsec security association"); MALLOC_DEFINE(M_IPSEC_SAH, "sahead", "ipsec sa head"); MALLOC_DEFINE(M_IPSEC_SP, "ipsecpolicy", "ipsec security policy"); MALLOC_DEFINE(M_IPSEC_SR, "ipsecrequest", "ipsec security request"); MALLOC_DEFINE(M_IPSEC_MISC, "ipsec-misc", "ipsec miscellaneous"); MALLOC_DEFINE(M_IPSEC_SAQ, "ipsec-saq", "ipsec sa acquire"); MALLOC_DEFINE(M_IPSEC_SAR, "ipsec-reg", "ipsec sa acquire"); /* * set parameters into secpolicyindex buffer. * Must allocate secpolicyindex buffer passed to this function. */ #define KEY_SETSECSPIDX(_dir, s, d, ps, pd, ulp, idx) \ do { \ bzero((idx), sizeof(struct secpolicyindex)); \ (idx)->dir = (_dir); \ (idx)->prefs = (ps); \ (idx)->prefd = (pd); \ (idx)->ul_proto = (ulp); \ bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ } while (0) /* * set parameters into secasindex buffer. * Must allocate secasindex buffer before calling this function. */ #define KEY_SETSECASIDX(p, m, r, s, d, idx) \ do { \ bzero((idx), sizeof(struct secasindex)); \ (idx)->proto = (p); \ (idx)->mode = (m); \ (idx)->reqid = (r); \ bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ } while (0) /* key statistics */ struct _keystat { u_long getspi_count; /* the avarage of count to try to get new SPI */ } keystat; struct sadb_msghdr { struct sadb_msg *msg; struct sadb_ext *ext[SADB_EXT_MAX + 1]; int extoff[SADB_EXT_MAX + 1]; int extlen[SADB_EXT_MAX + 1]; }; #ifndef IPSEC_DEBUG2 static struct callout key_timer; #endif static struct secasvar *key_allocsa_policy(const struct secasindex *); static void key_freesp_so(struct secpolicy **); static struct secasvar *key_do_allocsa_policy(struct secashead *, u_int); static void key_unlink(struct secpolicy *); static struct secpolicy *key_getsp(struct secpolicyindex *); static struct secpolicy *key_getspbyid(u_int32_t); static u_int32_t key_newreqid(void); static struct mbuf *key_gather_mbuf(struct mbuf *, const struct sadb_msghdr *, int, int, ...); static int key_spdadd(struct socket *, struct mbuf *, const struct sadb_msghdr *); static u_int32_t key_getnewspid(void); static int key_spddelete(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spddelete2(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spdget(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spdflush(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spddump(struct socket *, struct mbuf *, const struct sadb_msghdr *); static struct mbuf *key_setdumpsp(struct secpolicy *, u_int8_t, u_int32_t, u_int32_t); static u_int key_getspreqmsglen(struct secpolicy *); static int key_spdexpire(struct secpolicy *); static struct secashead *key_newsah(struct secasindex *); static void key_delsah(struct secashead *); static struct secasvar *key_newsav(struct mbuf *, const struct sadb_msghdr *, struct secashead *, int *, const char*, int); #define KEY_NEWSAV(m, sadb, sah, e) \ key_newsav(m, sadb, sah, e, __FILE__, __LINE__) static void key_delsav(struct secasvar *); static struct secashead *key_getsah(struct secasindex *); static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t); static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t); static int key_setsaval(struct secasvar *, struct mbuf *, const struct sadb_msghdr *); static int key_mature(struct secasvar *); static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, u_int32_t, pid_t, u_int16_t); static struct mbuf *key_setsadbsa(struct secasvar *); static struct mbuf *key_setsadbaddr(u_int16_t, const struct sockaddr *, u_int8_t, u_int16_t); #ifdef IPSEC_NAT_T static struct mbuf *key_setsadbxport(u_int16_t, u_int16_t); static struct mbuf *key_setsadbxtype(u_int16_t); #endif static void key_porttosaddr(struct sockaddr *, u_int16_t); #define KEY_PORTTOSADDR(saddr, port) \ key_porttosaddr((struct sockaddr *)(saddr), (port)) static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t, - u_int32_t); + u_int32_t, u_int32_t); static struct seckey *key_dup_keymsg(const struct sadb_key *, u_int, struct malloc_type *); static struct seclifetime *key_dup_lifemsg(const struct sadb_lifetime *src, struct malloc_type *type); #ifdef INET6 static int key_ismyaddr6(struct sockaddr_in6 *); #endif /* flags for key_cmpsaidx() */ #define CMP_HEAD 1 /* protocol, addresses. */ #define CMP_MODE_REQID 2 /* additionally HEAD, reqid, mode. */ #define CMP_REQID 3 /* additionally HEAD, reaid. */ #define CMP_EXACTLY 4 /* all elements. */ static int key_cmpsaidx(const struct secasindex *, const struct secasindex *, int); static int key_cmpspidx_exactly(struct secpolicyindex *, struct secpolicyindex *); static int key_cmpspidx_withmask(struct secpolicyindex *, struct secpolicyindex *); static int key_sockaddrcmp(const struct sockaddr *, const struct sockaddr *, int); static int key_bbcmp(const void *, const void *, u_int); static u_int16_t key_satype2proto(u_int8_t); static u_int8_t key_proto2satype(u_int16_t); static int key_getspi(struct socket *, struct mbuf *, const struct sadb_msghdr *); static u_int32_t key_do_getnewspi(struct sadb_spirange *, struct secasindex *); static int key_update(struct socket *, struct mbuf *, const struct sadb_msghdr *); #ifdef IPSEC_DOSEQCHECK static struct secasvar *key_getsavbyseq(struct secashead *, u_int32_t); #endif static int key_add(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_setident(struct secashead *, struct mbuf *, const struct sadb_msghdr *); static struct mbuf *key_getmsgbuf_x1(struct mbuf *, const struct sadb_msghdr *); static int key_delete(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_delete_all(struct socket *, struct mbuf *, const struct sadb_msghdr *, u_int16_t); static int key_get(struct socket *, struct mbuf *, const struct sadb_msghdr *); static void key_getcomb_setlifetime(struct sadb_comb *); static struct mbuf *key_getcomb_esp(void); static struct mbuf *key_getcomb_ah(void); static struct mbuf *key_getcomb_ipcomp(void); static struct mbuf *key_getprop(const struct secasindex *); static int key_acquire(const struct secasindex *, struct secpolicy *); static struct secacq *key_newacq(const struct secasindex *); static struct secacq *key_getacq(const struct secasindex *); static struct secacq *key_getacqbyseq(u_int32_t); static struct secspacq *key_newspacq(struct secpolicyindex *); static struct secspacq *key_getspacq(struct secpolicyindex *); static int key_acquire2(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_register(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_expire(struct secasvar *, int); static int key_flush(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_dump(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_promisc(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_senderror(struct socket *, struct mbuf *, int); static int key_validate_ext(const struct sadb_ext *, int); static int key_align(struct mbuf *, struct sadb_msghdr *); static struct mbuf *key_setlifetime(struct seclifetime *src, u_int16_t exttype); static struct mbuf *key_setkey(struct seckey *src, u_int16_t exttype); #if 0 static const char *key_getfqdn(void); static const char *key_getuserfqdn(void); #endif static void key_sa_chgstate(struct secasvar *, u_int8_t); static __inline void sa_initref(struct secasvar *sav) { refcount_init(&sav->refcnt, 1); } static __inline void sa_addref(struct secasvar *sav) { refcount_acquire(&sav->refcnt); IPSEC_ASSERT(sav->refcnt != 0, ("SA refcnt overflow")); } static __inline int sa_delref(struct secasvar *sav) { IPSEC_ASSERT(sav->refcnt > 0, ("SA refcnt underflow")); return (refcount_release(&sav->refcnt)); } #define SP_ADDREF(p) refcount_acquire(&(p)->refcnt) #define SP_DELREF(p) refcount_release(&(p)->refcnt) /* * Update the refcnt while holding the SPTREE lock. */ void key_addref(struct secpolicy *sp) { SP_ADDREF(sp); } /* * Return 0 when there are known to be no SP's for the specified * direction. Otherwise return 1. This is used by IPsec code * to optimize performance. */ int key_havesp(u_int dir) { return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ? TAILQ_FIRST(&V_sptree[dir]) != NULL : 1); } /* %%% IPsec policy management */ /* * allocating a SP for OUTBOUND or INBOUND packet. * Must call key_freesp() later. * OUT: NULL: not found * others: found and return the pointer. */ struct secpolicy * key_allocsp(struct secpolicyindex *spidx, u_int dir, const char* where, int tag) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(spidx != NULL, ("null spidx")); IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", dir)); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s from %s:%u\n", __func__, where, tag)); /* get a SP entry */ KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("*** objects\n"); kdebug_secpolicyindex(spidx)); SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[dir], chain) { KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("*** in SPD\n"); kdebug_secpolicyindex(&sp->spidx)); if (key_cmpspidx_withmask(&sp->spidx, spidx)) goto found; } sp = NULL; found: if (sp) { /* sanity check */ KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); /* found a SPD entry */ sp->lastused = time_second; SP_ADDREF(sp); } SPTREE_RUNLOCK(); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); return sp; } /* * allocating a SP for OUTBOUND or INBOUND packet. * Must call key_freesp() later. * OUT: NULL: not found * others: found and return the pointer. */ struct secpolicy * key_allocsp2(u_int32_t spi, union sockaddr_union *dst, u_int8_t proto, u_int dir, const char* where, int tag) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(dst != NULL, ("null dst")); IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", dir)); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s from %s:%u\n", __func__, where, tag)); /* get a SP entry */ KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("*** objects\n"); printf("spi %u proto %u dir %u\n", spi, proto, dir); kdebug_sockaddr(&dst->sa)); SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[dir], chain) { KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("*** in SPD\n"); kdebug_secpolicyindex(&sp->spidx)); /* compare simple values, then dst address */ if (sp->spidx.ul_proto != proto) continue; /* NB: spi's must exist and match */ if (!sp->req || !sp->req->sav || sp->req->sav->spi != spi) continue; if (key_sockaddrcmp(&sp->spidx.dst.sa, &dst->sa, 1) == 0) goto found; } sp = NULL; found: if (sp) { /* sanity check */ KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); /* found a SPD entry */ sp->lastused = time_second; SP_ADDREF(sp); } SPTREE_RUNLOCK(); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); return sp; } #if 0 /* * return a policy that matches this particular inbound packet. * XXX slow */ struct secpolicy * key_gettunnel(const struct sockaddr *osrc, const struct sockaddr *odst, const struct sockaddr *isrc, const struct sockaddr *idst, const char* where, int tag) { struct secpolicy *sp; const int dir = IPSEC_DIR_INBOUND; struct ipsecrequest *r1, *r2, *p; struct secpolicyindex spidx; KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s from %s:%u\n", __func__, where, tag)); if (isrc->sa_family != idst->sa_family) { ipseclog((LOG_ERR, "%s: protocol family mismatched %d != %d\n.", __func__, isrc->sa_family, idst->sa_family)); sp = NULL; goto done; } SPTREE_LOCK(); LIST_FOREACH(sp, &V_sptree[dir], chain) { if (sp->state == IPSEC_SPSTATE_DEAD) continue; r1 = r2 = NULL; for (p = sp->req; p; p = p->next) { if (p->saidx.mode != IPSEC_MODE_TUNNEL) continue; r1 = r2; r2 = p; if (!r1) { /* here we look at address matches only */ spidx = sp->spidx; if (isrc->sa_len > sizeof(spidx.src) || idst->sa_len > sizeof(spidx.dst)) continue; bcopy(isrc, &spidx.src, isrc->sa_len); bcopy(idst, &spidx.dst, idst->sa_len); if (!key_cmpspidx_withmask(&sp->spidx, &spidx)) continue; } else { if (key_sockaddrcmp(&r1->saidx.src.sa, isrc, 0) || key_sockaddrcmp(&r1->saidx.dst.sa, idst, 0)) continue; } if (key_sockaddrcmp(&r2->saidx.src.sa, osrc, 0) || key_sockaddrcmp(&r2->saidx.dst.sa, odst, 0)) continue; goto found; } } sp = NULL; found: if (sp) { sp->lastused = time_second; SP_ADDREF(sp); } SPTREE_UNLOCK(); done: KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); return sp; } #endif /* * allocating an SA entry for an *OUTBOUND* packet. * checking each request entries in SP, and acquire an SA if need. * OUT: 0: there are valid requests. * ENOENT: policy may be valid, but SA with REQUIRE is on acquiring. */ int key_checkrequest(struct ipsecrequest *isr, const struct secasindex *saidx) { u_int level; int error; struct secasvar *sav; IPSEC_ASSERT(isr != NULL, ("null isr")); IPSEC_ASSERT(saidx != NULL, ("null saidx")); IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TRANSPORT || saidx->mode == IPSEC_MODE_TUNNEL, ("unexpected policy %u", saidx->mode)); /* * XXX guard against protocol callbacks from the crypto * thread as they reference ipsecrequest.sav which we * temporarily null out below. Need to rethink how we * handle bundled SA's in the callback thread. */ IPSECREQUEST_LOCK_ASSERT(isr); /* get current level */ level = ipsec_get_reqlevel(isr); /* * We check new SA in the IPsec request because a different * SA may be involved each time this request is checked, either * because new SAs are being configured, or this request is * associated with an unconnected datagram socket, or this request * is associated with a system default policy. * * key_allocsa_policy should allocate the oldest SA available. * See key_do_allocsa_policy(), and draft-jenkins-ipsec-rekeying-03.txt. */ sav = key_allocsa_policy(saidx); if (sav != isr->sav) { /* SA need to be updated. */ if (!IPSECREQUEST_UPGRADE(isr)) { /* Kick everyone off. */ IPSECREQUEST_UNLOCK(isr); IPSECREQUEST_WLOCK(isr); } if (isr->sav != NULL) KEY_FREESAV(&isr->sav); isr->sav = sav; IPSECREQUEST_DOWNGRADE(isr); } else if (sav != NULL) KEY_FREESAV(&sav); /* When there is SA. */ if (isr->sav != NULL) { if (isr->sav->state != SADB_SASTATE_MATURE && isr->sav->state != SADB_SASTATE_DYING) return EINVAL; return 0; } /* there is no SA */ error = key_acquire(saidx, isr->sp); if (error != 0) { /* XXX What should I do ? */ ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", __func__, error)); return error; } if (level != IPSEC_LEVEL_REQUIRE) { /* XXX sigh, the interface to this routine is botched */ IPSEC_ASSERT(isr->sav == NULL, ("unexpected SA")); return 0; } else { return ENOENT; } } /* * allocating a SA for policy entry from SAD. * NOTE: searching SAD of aliving state. * OUT: NULL: not found. * others: found and return the pointer. */ static struct secasvar * key_allocsa_policy(const struct secasindex *saidx) { #define N(a) _ARRAYLEN(a) struct secashead *sah; struct secasvar *sav; u_int stateidx, arraysize; const u_int *state_valid; state_valid = NULL; /* silence gcc */ arraysize = 0; /* silence gcc */ SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) { if (V_key_preferred_oldsa) { state_valid = saorder_state_valid_prefer_old; arraysize = N(saorder_state_valid_prefer_old); } else { state_valid = saorder_state_valid_prefer_new; arraysize = N(saorder_state_valid_prefer_new); } break; } } SAHTREE_UNLOCK(); if (sah == NULL) return NULL; /* search valid state */ for (stateidx = 0; stateidx < arraysize; stateidx++) { sav = key_do_allocsa_policy(sah, state_valid[stateidx]); if (sav != NULL) return sav; } return NULL; #undef N } /* * searching SAD with direction, protocol, mode and state. * called by key_allocsa_policy(). * OUT: * NULL : not found * others : found, pointer to a SA. */ static struct secasvar * key_do_allocsa_policy(struct secashead *sah, u_int state) { struct secasvar *sav, *nextsav, *candidate, *d; /* initilize */ candidate = NULL; SAHTREE_LOCK(); for (sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { nextsav = LIST_NEXT(sav, chain); /* sanity check */ KEY_CHKSASTATE(sav->state, state, __func__); /* initialize */ if (candidate == NULL) { candidate = sav; continue; } /* Which SA is the better ? */ IPSEC_ASSERT(candidate->lft_c != NULL, ("null candidate lifetime")); IPSEC_ASSERT(sav->lft_c != NULL, ("null sav lifetime")); /* What the best method is to compare ? */ if (V_key_preferred_oldsa) { if (candidate->lft_c->addtime > sav->lft_c->addtime) { candidate = sav; } continue; /*NOTREACHED*/ } /* preferred new sa rather than old sa */ if (candidate->lft_c->addtime < sav->lft_c->addtime) { d = candidate; candidate = sav; } else d = sav; /* * prepared to delete the SA when there is more * suitable candidate and the lifetime of the SA is not * permanent. */ if (d->lft_h->addtime != 0) { struct mbuf *m, *result; u_int8_t satype; key_sa_chgstate(d, SADB_SASTATE_DEAD); IPSEC_ASSERT(d->refcnt > 0, ("bogus ref count")); satype = key_proto2satype(d->sah->saidx.proto); if (satype == 0) goto msgfail; m = key_setsadbmsg(SADB_DELETE, 0, satype, 0, 0, d->refcnt - 1); if (!m) goto msgfail; result = m; /* set sadb_address for saidx's. */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &d->sah->saidx.src.sa, d->sah->saidx.src.sa.sa_len << 3, IPSEC_ULPROTO_ANY); if (!m) goto msgfail; m_cat(result, m); /* set sadb_address for saidx's. */ m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &d->sah->saidx.dst.sa, d->sah->saidx.dst.sa.sa_len << 3, IPSEC_ULPROTO_ANY); if (!m) goto msgfail; m_cat(result, m); /* create SA extension */ m = key_setsadbsa(d); if (!m) goto msgfail; m_cat(result, m); if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) goto msgfail; } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); if (key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED)) goto msgfail; msgfail: KEY_FREESAV(&d); } } if (candidate) { sa_addref(candidate); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s cause refcnt++:%d SA:%p\n", __func__, candidate->refcnt, candidate)); } SAHTREE_UNLOCK(); return candidate; } /* * allocating a usable SA entry for a *INBOUND* packet. * Must call key_freesav() later. * OUT: positive: pointer to a usable sav (i.e. MATURE or DYING state). * NULL: not found, or error occured. * * In the comparison, no source address is used--for RFC2401 conformance. * To quote, from section 4.1: * A security association is uniquely identified by a triple consisting * of a Security Parameter Index (SPI), an IP Destination Address, and a * security protocol (AH or ESP) identifier. * Note that, however, we do need to keep source address in IPsec SA. * IKE specification and PF_KEY specification do assume that we * keep source address in IPsec SA. We see a tricky situation here. */ struct secasvar * key_allocsa(union sockaddr_union *dst, u_int proto, u_int32_t spi, const char* where, int tag) { struct secashead *sah; struct secasvar *sav; u_int stateidx, arraysize, state; const u_int *saorder_state_valid; #ifdef IPSEC_NAT_T int natt_chkport; #endif IPSEC_ASSERT(dst != NULL, ("null dst address")); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s from %s:%u\n", __func__, where, tag)); #ifdef IPSEC_NAT_T natt_chkport = (dst->sa.sa_family == AF_INET && dst->sa.sa_len == sizeof(struct sockaddr_in) && dst->sin.sin_port != 0); #endif /* * searching SAD. * XXX: to be checked internal IP header somewhere. Also when * IPsec tunnel packet is received. But ESP tunnel mode is * encrypted so we can't check internal IP header. */ SAHTREE_LOCK(); if (V_key_preferred_oldsa) { saorder_state_valid = saorder_state_valid_prefer_old; arraysize = _ARRAYLEN(saorder_state_valid_prefer_old); } else { saorder_state_valid = saorder_state_valid_prefer_new; arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); } LIST_FOREACH(sah, &V_sahtree, chain) { int checkport; /* search valid state */ for (stateidx = 0; stateidx < arraysize; stateidx++) { state = saorder_state_valid[stateidx]; LIST_FOREACH(sav, &sah->savtree[state], chain) { /* sanity check */ KEY_CHKSASTATE(sav->state, state, __func__); /* do not return entries w/ unusable state */ if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) continue; if (proto != sav->sah->saidx.proto) continue; if (spi != sav->spi) continue; checkport = 0; #ifdef IPSEC_NAT_T /* * Really only check ports when this is a NAT-T * SA. Otherwise other lookups providing ports * might suffer. */ if (sav->natt_type && natt_chkport) checkport = 1; #endif #if 0 /* don't check src */ /* check src address */ if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, checkport) != 0) continue; #endif /* check dst address */ if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, checkport) != 0) continue; sa_addref(sav); goto done; } } } sav = NULL; done: SAHTREE_UNLOCK(); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s return SA:%p; refcnt %u\n", __func__, sav, sav ? sav->refcnt : 0)); return sav; } /* * Must be called after calling key_allocsp(). * For both the packet without socket and key_freeso(). */ void _key_freesp(struct secpolicy **spp, const char* where, int tag) { struct ipsecrequest *isr, *nextisr; struct secpolicy *sp = *spp; IPSEC_ASSERT(sp != NULL, ("null sp")); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s SP:%p (ID=%u) from %s:%u; refcnt now %u\n", __func__, sp, sp->id, where, tag, sp->refcnt)); if (SP_DELREF(sp) == 0) return; *spp = NULL; for (isr = sp->req; isr != NULL; isr = nextisr) { if (isr->sav != NULL) { KEY_FREESAV(&isr->sav); isr->sav = NULL; } nextisr = isr->next; ipsec_delisr(isr); } free(sp, M_IPSEC_SP); } static void key_unlink(struct secpolicy *sp) { IPSEC_ASSERT(sp != NULL, ("null sp")); IPSEC_ASSERT(sp->spidx.dir == IPSEC_DIR_INBOUND || sp->spidx.dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", sp->spidx.dir)); SPTREE_UNLOCK_ASSERT(); SPTREE_WLOCK(); if (sp->state == IPSEC_SPSTATE_DEAD) { SPTREE_WUNLOCK(); return; } sp->state = IPSEC_SPSTATE_DEAD; TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); SPTREE_WUNLOCK(); KEY_FREESP(&sp); } /* + * insert a secpolicy into the SP database. Lower priorities first + */ +static void +key_insertsp(struct secpolicy *newsp) +{ + struct secpolicy *sp; + + SPTREE_WLOCK(); + TAILQ_FOREACH(sp, &V_sptree[newsp->spidx.dir], chain) { + if (newsp->priority < sp->priority) { + TAILQ_INSERT_BEFORE(sp, newsp, chain); + goto done; + } + } + + TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain); + +done: + newsp->state = IPSEC_SPSTATE_ALIVE; + SPTREE_WUNLOCK(); +} + +/* * Must be called after calling key_allocsp(). * For the packet with socket. */ void key_freeso(struct socket *so) { IPSEC_ASSERT(so != NULL, ("null so")); switch (so->so_proto->pr_domain->dom_family) { #if defined(INET) || defined(INET6) #ifdef INET case PF_INET: #endif #ifdef INET6 case PF_INET6: #endif { struct inpcb *pcb = sotoinpcb(so); /* Does it have a PCB ? */ if (pcb == NULL) return; key_freesp_so(&pcb->inp_sp->sp_in); key_freesp_so(&pcb->inp_sp->sp_out); } break; #endif /* INET || INET6 */ default: ipseclog((LOG_DEBUG, "%s: unknown address family=%d.\n", __func__, so->so_proto->pr_domain->dom_family)); return; } } static void key_freesp_so(struct secpolicy **sp) { IPSEC_ASSERT(sp != NULL && *sp != NULL, ("null sp")); if ((*sp)->policy == IPSEC_POLICY_ENTRUST || (*sp)->policy == IPSEC_POLICY_BYPASS) return; IPSEC_ASSERT((*sp)->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", (*sp)->policy)); KEY_FREESP(sp); } void key_addrefsa(struct secasvar *sav, const char* where, int tag) { IPSEC_ASSERT(sav != NULL, ("null sav")); IPSEC_ASSERT(sav->refcnt > 0, ("refcount must exist")); sa_addref(sav); } /* * Must be called after calling key_allocsa(). * This function is called by key_freesp() to free some SA allocated * for a policy. */ void key_freesav(struct secasvar **psav, const char* where, int tag) { struct secasvar *sav = *psav; IPSEC_ASSERT(sav != NULL, ("null sav")); if (sa_delref(sav)) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); *psav = NULL; key_delsav(sav); } else { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); } } /* %%% SPD management */ /* * search SPD * OUT: NULL : not found * others : found, pointer to a SP. */ static struct secpolicy * key_getsp(struct secpolicyindex *spidx) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; IPSEC_ASSERT(spidx != NULL, ("null spidx")); SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[spidx->dir], chain) { if (key_cmpspidx_exactly(spidx, &sp->spidx)) { SP_ADDREF(sp); break; } } SPTREE_RUNLOCK(); return sp; } /* * get SP by index. * OUT: NULL : not found * others : found, pointer to a SP. */ static struct secpolicy * key_getspbyid(u_int32_t id) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_INBOUND], chain) { if (sp->id == id) { SP_ADDREF(sp); goto done; } } TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_OUTBOUND], chain) { if (sp->id == id) { SP_ADDREF(sp); goto done; } } done: SPTREE_RUNLOCK(); return sp; } struct secpolicy * key_newsp(const char* where, int tag) { struct secpolicy *newsp = NULL; newsp = (struct secpolicy *) malloc(sizeof(struct secpolicy), M_IPSEC_SP, M_NOWAIT|M_ZERO); if (newsp) refcount_init(&newsp->refcnt, 1); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s from %s:%u return SP:%p\n", __func__, where, tag, newsp)); return newsp; } /* * create secpolicy structure from sadb_x_policy structure. * NOTE: `state', `secpolicyindex' in secpolicy structure are not set, * so must be set properly later. */ struct secpolicy * key_msg2sp(struct sadb_x_policy *xpl0, size_t len, int *error) { struct secpolicy *newsp; IPSEC_ASSERT(xpl0 != NULL, ("null xpl0")); IPSEC_ASSERT(len >= sizeof(*xpl0), ("policy too short: %zu", len)); if (len != PFKEY_EXTLEN(xpl0)) { ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n", __func__)); *error = EINVAL; return NULL; } if ((newsp = KEY_NEWSP()) == NULL) { *error = ENOBUFS; return NULL; } newsp->spidx.dir = xpl0->sadb_x_policy_dir; newsp->policy = xpl0->sadb_x_policy_type; + newsp->priority = xpl0->sadb_x_policy_priority; /* check policy */ switch (xpl0->sadb_x_policy_type) { case IPSEC_POLICY_DISCARD: case IPSEC_POLICY_NONE: case IPSEC_POLICY_ENTRUST: case IPSEC_POLICY_BYPASS: newsp->req = NULL; break; case IPSEC_POLICY_IPSEC: { int tlen; struct sadb_x_ipsecrequest *xisr; struct ipsecrequest **p_isr = &newsp->req; /* validity check */ if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) { ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n", __func__)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } tlen = PFKEY_EXTLEN(xpl0) - sizeof(*xpl0); xisr = (struct sadb_x_ipsecrequest *)(xpl0 + 1); while (tlen > 0) { /* length check */ if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) { ipseclog((LOG_DEBUG, "%s: invalid ipsecrequest " "length.\n", __func__)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } /* allocate request buffer */ /* NB: data structure is zero'd */ *p_isr = ipsec_newisr(); if ((*p_isr) == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); KEY_FREESP(&newsp); *error = ENOBUFS; return NULL; } /* set values */ switch (xisr->sadb_x_ipsecrequest_proto) { case IPPROTO_ESP: case IPPROTO_AH: case IPPROTO_IPCOMP: break; default: ipseclog((LOG_DEBUG, "%s: invalid proto type=%u\n", __func__, xisr->sadb_x_ipsecrequest_proto)); KEY_FREESP(&newsp); *error = EPROTONOSUPPORT; return NULL; } (*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto; switch (xisr->sadb_x_ipsecrequest_mode) { case IPSEC_MODE_TRANSPORT: case IPSEC_MODE_TUNNEL: break; case IPSEC_MODE_ANY: default: ipseclog((LOG_DEBUG, "%s: invalid mode=%u\n", __func__, xisr->sadb_x_ipsecrequest_mode)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } (*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode; switch (xisr->sadb_x_ipsecrequest_level) { case IPSEC_LEVEL_DEFAULT: case IPSEC_LEVEL_USE: case IPSEC_LEVEL_REQUIRE: break; case IPSEC_LEVEL_UNIQUE: /* validity check */ /* * If range violation of reqid, kernel will * update it, don't refuse it. */ if (xisr->sadb_x_ipsecrequest_reqid > IPSEC_MANUAL_REQID_MAX) { ipseclog((LOG_DEBUG, "%s: reqid=%d range " "violation, updated by kernel.\n", __func__, xisr->sadb_x_ipsecrequest_reqid)); xisr->sadb_x_ipsecrequest_reqid = 0; } /* allocate new reqid id if reqid is zero. */ if (xisr->sadb_x_ipsecrequest_reqid == 0) { u_int32_t reqid; if ((reqid = key_newreqid()) == 0) { KEY_FREESP(&newsp); *error = ENOBUFS; return NULL; } (*p_isr)->saidx.reqid = reqid; xisr->sadb_x_ipsecrequest_reqid = reqid; } else { /* set it for manual keying. */ (*p_isr)->saidx.reqid = xisr->sadb_x_ipsecrequest_reqid; } break; default: ipseclog((LOG_DEBUG, "%s: invalid level=%u\n", __func__, xisr->sadb_x_ipsecrequest_level)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } (*p_isr)->level = xisr->sadb_x_ipsecrequest_level; /* set IP addresses if there */ if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) { struct sockaddr *paddr; paddr = (struct sockaddr *)(xisr + 1); /* validity check */ if (paddr->sa_len > sizeof((*p_isr)->saidx.src)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } bcopy(paddr, &(*p_isr)->saidx.src, paddr->sa_len); paddr = (struct sockaddr *)((caddr_t)paddr + paddr->sa_len); /* validity check */ if (paddr->sa_len > sizeof((*p_isr)->saidx.dst)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } bcopy(paddr, &(*p_isr)->saidx.dst, paddr->sa_len); } (*p_isr)->sp = newsp; /* initialization for the next. */ p_isr = &(*p_isr)->next; tlen -= xisr->sadb_x_ipsecrequest_len; /* validity check */ if (tlen < 0) { ipseclog((LOG_DEBUG, "%s: becoming tlen < 0.\n", __func__)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } xisr = (struct sadb_x_ipsecrequest *)((caddr_t)xisr + xisr->sadb_x_ipsecrequest_len); } } break; default: ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); KEY_FREESP(&newsp); *error = EINVAL; return NULL; } *error = 0; return newsp; } static u_int32_t key_newreqid() { static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; auto_reqid = (auto_reqid == ~0 ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1); /* XXX should be unique check */ return auto_reqid; } /* * copy secpolicy struct to sadb_x_policy structure indicated. */ struct mbuf * key_sp2msg(struct secpolicy *sp) { struct sadb_x_policy *xpl; int tlen; caddr_t p; struct mbuf *m; IPSEC_ASSERT(sp != NULL, ("null policy")); tlen = key_getspreqmsglen(sp); m = m_get2(tlen, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, tlen); m->m_len = tlen; xpl = mtod(m, struct sadb_x_policy *); bzero(xpl, tlen); xpl->sadb_x_policy_len = PFKEY_UNIT64(tlen); xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; xpl->sadb_x_policy_type = sp->policy; xpl->sadb_x_policy_dir = sp->spidx.dir; xpl->sadb_x_policy_id = sp->id; + xpl->sadb_x_policy_priority = sp->priority; p = (caddr_t)xpl + sizeof(*xpl); /* if is the policy for ipsec ? */ if (sp->policy == IPSEC_POLICY_IPSEC) { struct sadb_x_ipsecrequest *xisr; struct ipsecrequest *isr; for (isr = sp->req; isr != NULL; isr = isr->next) { xisr = (struct sadb_x_ipsecrequest *)p; xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto; xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode; xisr->sadb_x_ipsecrequest_level = isr->level; xisr->sadb_x_ipsecrequest_reqid = isr->saidx.reqid; p += sizeof(*xisr); bcopy(&isr->saidx.src, p, isr->saidx.src.sa.sa_len); p += isr->saidx.src.sa.sa_len; bcopy(&isr->saidx.dst, p, isr->saidx.dst.sa.sa_len); p += isr->saidx.src.sa.sa_len; xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr) + isr->saidx.src.sa.sa_len + isr->saidx.dst.sa.sa_len); } } return m; } /* m will not be freed nor modified */ static struct mbuf * key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp, int ndeep, int nitem, ...) { va_list ap; int idx; int i; struct mbuf *result = NULL, *n; int len; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); va_start(ap, nitem); for (i = 0; i < nitem; i++) { idx = va_arg(ap, int); if (idx < 0 || idx > SADB_EXT_MAX) goto fail; /* don't attempt to pull empty extension */ if (idx == SADB_EXT_RESERVED && mhp->msg == NULL) continue; if (idx != SADB_EXT_RESERVED && (mhp->ext[idx] == NULL || mhp->extlen[idx] == 0)) continue; if (idx == SADB_EXT_RESERVED) { len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); IPSEC_ASSERT(len <= MHLEN, ("header too big %u", len)); MGETHDR(n, M_NOWAIT, MT_DATA); if (!n) goto fail; n->m_len = len; n->m_next = NULL; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t)); } else if (i < ndeep) { len = mhp->extlen[idx]; n = m_get2(len, M_NOWAIT, MT_DATA, 0); if (n == NULL) goto fail; m_align(n, len); n->m_len = len; m_copydata(m, mhp->extoff[idx], mhp->extlen[idx], mtod(n, caddr_t)); } else { n = m_copym(m, mhp->extoff[idx], mhp->extlen[idx], M_NOWAIT); } if (n == NULL) goto fail; if (result) m_cat(result, n); else result = n; } va_end(ap); if ((result->m_flags & M_PKTHDR) != 0) { result->m_pkthdr.len = 0; for (n = result; n; n = n->m_next) result->m_pkthdr.len += n->m_len; } return result; fail: m_freem(result); va_end(ap); return NULL; } /* * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing * add an entry to SP database, when received * * from the user(?). * Adding to SP database, * and send * * to the socket which was send. * * SPDADD set a unique policy entry. * SPDSETIDX like SPDADD without a part of policy requests. * SPDUPDATE replace a unique policy entry. * * m will always be freed. */ static int key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0, *xpl; struct sadb_lifetime *lft = NULL; struct secpolicyindex spidx; struct secpolicy *newsp; int error; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || mhp->ext[SADB_X_EXT_POLICY] == NULL) { ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL) { if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(struct sadb_lifetime)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } lft = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; } src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; /* * Note: do not parse SADB_X_EXT_NAT_T_* here: * we are processing traffic endpoints. */ /* make secindex */ /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, &spidx); /* checking the direciton. */ switch (xpl0->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: case IPSEC_DIR_OUTBOUND: break; default: ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); mhp->msg->sadb_msg_errno = EINVAL; return 0; } /* check policy */ /* key_spdadd() accepts DISCARD, NONE and IPSEC. */ if (xpl0->sadb_x_policy_type == IPSEC_POLICY_ENTRUST || xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { ipseclog((LOG_DEBUG, "%s: Invalid policy type.\n", __func__)); return key_senderror(so, m, EINVAL); } /* policy requests are mandatory when action is ipsec. */ if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX && xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC && mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { ipseclog((LOG_DEBUG, "%s: some policy requests part required\n", __func__)); return key_senderror(so, m, EINVAL); } /* * checking there is SP already or not. * SPDUPDATE doesn't depend on whether there is a SP or not. * If the type is either SPDADD or SPDSETIDX AND a SP is found, * then error. */ newsp = key_getsp(&spidx); if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { if (newsp) { key_unlink(newsp); KEY_FREESP(&newsp); } } else { if (newsp != NULL) { KEY_FREESP(&newsp); ipseclog((LOG_DEBUG, "%s: a SP entry exists already.\n", __func__)); return key_senderror(so, m, EEXIST); } } /* XXX: there is race between key_getsp and key_msg2sp. */ /* allocation new SP entry */ if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { return key_senderror(so, m, error); } if ((newsp->id = key_getnewspid()) == 0) { KEY_FREESP(&newsp); return key_senderror(so, m, ENOBUFS); } /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, &newsp->spidx); /* sanity check on addr pair */ if (((struct sockaddr *)(src0 + 1))->sa_family != ((struct sockaddr *)(dst0+ 1))->sa_family) { KEY_FREESP(&newsp); return key_senderror(so, m, EINVAL); } if (((struct sockaddr *)(src0 + 1))->sa_len != ((struct sockaddr *)(dst0+ 1))->sa_len) { KEY_FREESP(&newsp); return key_senderror(so, m, EINVAL); } #if 1 if (newsp->req && newsp->req->saidx.src.sa.sa_family && newsp->req->saidx.dst.sa.sa_family) { if (newsp->req->saidx.src.sa.sa_family != newsp->req->saidx.dst.sa.sa_family) { KEY_FREESP(&newsp); return key_senderror(so, m, EINVAL); } } #endif newsp->created = time_second; newsp->lastused = newsp->created; newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; - SPTREE_WLOCK(); - TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain); - newsp->state = IPSEC_SPSTATE_ALIVE; - SPTREE_WUNLOCK(); + key_insertsp(newsp); /* delete the entry in spacqtree */ if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { struct secspacq *spacq = key_getspacq(&spidx); if (spacq != NULL) { /* reset counter in order to deletion by timehandler. */ spacq->created = time_second; spacq->count = 0; SPACQ_UNLOCK(); } } { struct mbuf *n, *mpolicy; struct sadb_msg *newmsg; int off; /* * Note: do not send SADB_X_EXT_NAT_T_* here: * we are sending traffic endpoints. */ /* create new sadb_msg to reply. */ if (lft) { n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_LIFETIME_HARD, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); } else { n = key_gather_mbuf(m, mhp, 2, 4, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); } if (!n) return key_senderror(so, m, ENOBUFS); if (n->m_len < sizeof(*newmsg)) { n = m_pullup(n, sizeof(*newmsg)); if (!n) return key_senderror(so, m, ENOBUFS); } newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); off = 0; mpolicy = m_pulldown(n, PFKEY_ALIGN8(sizeof(struct sadb_msg)), sizeof(*xpl), &off); if (mpolicy == NULL) { /* n is already freed */ return key_senderror(so, m, ENOBUFS); } xpl = (struct sadb_x_policy *)(mtod(mpolicy, caddr_t) + off); if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) { m_freem(n); return key_senderror(so, m, EINVAL); } xpl->sadb_x_policy_id = newsp->id; m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * get new policy id. * OUT: * 0: failure. * others: success. */ static u_int32_t key_getnewspid() { u_int32_t newid = 0; int count = V_key_spi_trycnt; /* XXX */ struct secpolicy *sp; /* when requesting to allocate spi ranged */ while (count--) { newid = (V_policy_id = (V_policy_id == ~0 ? 1 : V_policy_id + 1)); if ((sp = key_getspbyid(newid)) == NULL) break; KEY_FREESP(&sp); } if (count == 0 || newid == 0) { ipseclog((LOG_DEBUG, "%s: to allocate policy id is failed.\n", __func__)); return 0; } return newid; } /* * SADB_SPDDELETE processing * receive * * from the user(?), and set SADB_SASTATE_DEAD, * and send, * * to the ikmpd. * policy(*) including direction of policy. * * m will always be freed. */ static int key_spddelete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0; struct secpolicyindex spidx; struct secpolicy *sp; IPSEC_ASSERT(so != NULL, ("null so")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || mhp->ext[SADB_X_EXT_POLICY] == NULL) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; /* * Note: do not parse SADB_X_EXT_NAT_T_* here: * we are processing traffic endpoints. */ /* make secindex */ /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, &spidx); /* checking the direciton. */ switch (xpl0->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: case IPSEC_DIR_OUTBOUND: break; default: ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); return key_senderror(so, m, EINVAL); } /* Is there SP in SPD ? */ if ((sp = key_getsp(&spidx)) == NULL) { ipseclog((LOG_DEBUG, "%s: no SP found.\n", __func__)); return key_senderror(so, m, EINVAL); } /* save policy id to buffer to be returned. */ xpl0->sadb_x_policy_id = sp->id; key_unlink(sp); KEY_FREESP(&sp); { struct mbuf *n; struct sadb_msg *newmsg; /* * Note: do not send SADB_X_EXT_NAT_T_* here: * we are sending traffic endpoints. */ /* create new sadb_msg to reply. */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) return key_senderror(so, m, ENOBUFS); newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * SADB_SPDDELETE2 processing * receive * * from the user(?), and set SADB_SASTATE_DEAD, * and send, * * to the ikmpd. * policy(*) including direction of policy. * * m will always be freed. */ static int key_spddelete2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { u_int32_t id; struct secpolicy *sp; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); if (mhp->ext[SADB_X_EXT_POLICY] == NULL || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); return key_senderror(so, m, EINVAL); } key_unlink(sp); KEY_FREESP(&sp); { struct mbuf *n, *nn; struct sadb_msg *newmsg; int off, len; /* create new sadb_msg to reply. */ len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); MGETHDR(n, M_NOWAIT, MT_DATA); if (n && len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } } if (!n) return key_senderror(so, m, ENOBUFS); n->m_len = len; n->m_next = NULL; off = 0; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); IPSEC_ASSERT(off == len, ("length inconsistency (off %u len %u)", off, len)); n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY], mhp->extlen[SADB_X_EXT_POLICY], M_NOWAIT); if (!n->m_next) { m_freem(n); return key_senderror(so, m, ENOBUFS); } n->m_pkthdr.len = 0; for (nn = n; nn; nn = nn->m_next) n->m_pkthdr.len += nn->m_len; newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * SADB_X_SPDGET processing * receive * * from the user(?), * and send, * * to the ikmpd. * policy(*) including direction of policy. * * m will always be freed. */ static int key_spdget(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { u_int32_t id; struct secpolicy *sp; struct mbuf *n; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); if (mhp->ext[SADB_X_EXT_POLICY] == NULL || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); return key_senderror(so, m, ENOENT); } n = key_setdumpsp(sp, SADB_X_SPDGET, mhp->msg->sadb_msg_seq, mhp->msg->sadb_msg_pid); KEY_FREESP(&sp); if (n != NULL) { m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } else return key_senderror(so, m, ENOBUFS); } /* * SADB_X_SPDACQUIRE processing. * Acquire policy and SA(s) for a *OUTBOUND* packet. * send * * to KMD, and expect to receive * with SADB_X_SPDACQUIRE if error occured, * or * * with SADB_X_SPDUPDATE from KMD by PF_KEY. * policy(*) is without policy requests. * * 0 : succeed * others: error number */ int key_spdacquire(struct secpolicy *sp) { struct mbuf *result = NULL, *m; struct secspacq *newspacq; IPSEC_ASSERT(sp != NULL, ("null secpolicy")); IPSEC_ASSERT(sp->req == NULL, ("policy exists")); IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("policy not IPSEC %u", sp->policy)); /* Get an entry to check whether sent message or not. */ newspacq = key_getspacq(&sp->spidx); if (newspacq != NULL) { if (V_key_blockacq_count < newspacq->count) { /* reset counter and do send message. */ newspacq->count = 0; } else { /* increment counter and do nothing. */ newspacq->count++; SPACQ_UNLOCK(); return (0); } SPACQ_UNLOCK(); } else { /* make new entry for blocking to send SADB_ACQUIRE. */ newspacq = key_newspacq(&sp->spidx); if (newspacq == NULL) return ENOBUFS; } /* create new sadb_msg to reply. */ m = key_setsadbmsg(SADB_X_SPDACQUIRE, 0, 0, 0, 0, 0); if (!m) return ENOBUFS; result = m; result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return key_sendup_mbuf(NULL, m, KEY_SENDUP_REGISTERED); } /* * SADB_SPDFLUSH processing * receive * * from the user, and free all entries in secpctree. * and send, * * to the user. * NOTE: what to do is only marking SADB_SASTATE_DEAD. * * m will always be freed. */ static int key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { TAILQ_HEAD(, secpolicy) drainq; struct sadb_msg *newmsg; struct secpolicy *sp, *nextsp; u_int dir; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); if (m->m_len != PFKEY_ALIGN8(sizeof(struct sadb_msg))) return key_senderror(so, m, EINVAL); TAILQ_INIT(&drainq); SPTREE_WLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { TAILQ_CONCAT(&drainq, &V_sptree[dir], chain); } /* * We need to set state to DEAD for each policy to be sure, * that another thread won't try to unlink it. */ TAILQ_FOREACH(sp, &drainq, chain) sp->state = IPSEC_SPSTATE_DEAD; SPTREE_WUNLOCK(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); KEY_FREESP(&sp); sp = nextsp; } if (sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } if (m->m_next) m_freem(m->m_next); m->m_next = NULL; m->m_pkthdr.len = m->m_len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); newmsg = mtod(m, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len); return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } /* * SADB_SPDDUMP processing * receive * * from the user, and dump all SP leaves * and send, * ..... * to the ikmpd. * * m will always be freed. */ static int key_spddump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; int cnt; u_int dir; struct mbuf *n; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* search SPD entry and get buffer size. */ cnt = 0; SPTREE_RLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { TAILQ_FOREACH(sp, &V_sptree[dir], chain) { cnt++; } } if (cnt == 0) { SPTREE_RUNLOCK(); return key_senderror(so, m, ENOENT); } for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { TAILQ_FOREACH(sp, &V_sptree[dir], chain) { --cnt; n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, mhp->msg->sadb_msg_pid); if (n) key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } SPTREE_RUNLOCK(); m_freem(m); return 0; } static struct mbuf * key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq, u_int32_t pid) { struct mbuf *result = NULL, *m; struct seclifetime lt; m = key_setsadbmsg(type, 0, SADB_SATYPE_UNSPEC, seq, pid, sp->refcnt); if (!m) goto fail; result = m; /* * Note: do not send SADB_X_EXT_NAT_T_* here: * we are sending traffic endpoints. */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, sp->spidx.prefs, sp->spidx.ul_proto); if (!m) goto fail; m_cat(result, m); m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sp->spidx.dst.sa, sp->spidx.prefd, sp->spidx.ul_proto); if (!m) goto fail; m_cat(result, m); m = key_sp2msg(sp); if (!m) goto fail; m_cat(result, m); if(sp->lifetime){ lt.addtime=sp->created; lt.usetime= sp->lastused; m = key_setlifetime(<, SADB_EXT_LIFETIME_CURRENT); if (!m) goto fail; m_cat(result, m); lt.addtime=sp->lifetime; lt.usetime= sp->validtime; m = key_setlifetime(<, SADB_EXT_LIFETIME_HARD); if (!m) goto fail; m_cat(result, m); } if ((result->m_flags & M_PKTHDR) == 0) goto fail; if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) goto fail; } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return result; fail: m_freem(result); return NULL; } /* * get PFKEY message length for security policy and request. */ static u_int key_getspreqmsglen(struct secpolicy *sp) { u_int tlen; tlen = sizeof(struct sadb_x_policy); /* if is the policy for ipsec ? */ if (sp->policy != IPSEC_POLICY_IPSEC) return tlen; /* get length of ipsec requests */ { struct ipsecrequest *isr; int len; for (isr = sp->req; isr != NULL; isr = isr->next) { len = sizeof(struct sadb_x_ipsecrequest) + isr->saidx.src.sa.sa_len + isr->saidx.dst.sa.sa_len; tlen += PFKEY_ALIGN8(len); } } return tlen; } /* * SADB_SPDEXPIRE processing * send * * to KMD by PF_KEY. * * OUT: 0 : succeed * others : error number */ static int key_spdexpire(struct secpolicy *sp) { struct mbuf *result = NULL, *m; int len; int error = -1; struct sadb_lifetime *lt; /* XXX: Why do we lock ? */ IPSEC_ASSERT(sp != NULL, ("null secpolicy")); /* set msg header */ m = key_setsadbmsg(SADB_X_SPDEXPIRE, 0, 0, 0, 0, 0); if (!m) { error = ENOBUFS; goto fail; } result = m; /* create lifetime extension (current and hard) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) { error = ENOBUFS; goto fail; } m_align(m, len); m->m_len = len; bzero(mtod(m, caddr_t), len); lt = mtod(m, struct sadb_lifetime *); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lt->sadb_lifetime_allocations = 0; lt->sadb_lifetime_bytes = 0; lt->sadb_lifetime_addtime = sp->created; lt->sadb_lifetime_usetime = sp->lastused; lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lt->sadb_lifetime_allocations = 0; lt->sadb_lifetime_bytes = 0; lt->sadb_lifetime_addtime = sp->lifetime; lt->sadb_lifetime_usetime = sp->validtime; m_cat(result, m); /* * Note: do not send SADB_X_EXT_NAT_T_* here: * we are sending traffic endpoints. */ /* set sadb_address for source */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, sp->spidx.prefs, sp->spidx.ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* set sadb_address for destination */ m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sp->spidx.dst.sa, sp->spidx.prefd, sp->spidx.ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* set secpolicy */ m = key_sp2msg(sp); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { error = ENOBUFS; goto fail; } } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: if (result) m_freem(result); return error; } /* %%% SAD management */ /* * allocating a memory for new SA head, and copy from the values of mhp. * OUT: NULL : failure due to the lack of memory. * others : pointer to new SA head. */ static struct secashead * key_newsah(struct secasindex *saidx) { struct secashead *newsah; IPSEC_ASSERT(saidx != NULL, ("null saidx")); newsah = malloc(sizeof(struct secashead), M_IPSEC_SAH, M_NOWAIT|M_ZERO); if (newsah != NULL) { int i; for (i = 0; i < sizeof(newsah->savtree)/sizeof(newsah->savtree[0]); i++) LIST_INIT(&newsah->savtree[i]); newsah->saidx = *saidx; /* add to saidxtree */ newsah->state = SADB_SASTATE_MATURE; SAHTREE_LOCK(); LIST_INSERT_HEAD(&V_sahtree, newsah, chain); SAHTREE_UNLOCK(); } return(newsah); } /* * delete SA index and all SA registerd. */ static void key_delsah(struct secashead *sah) { struct secasvar *sav, *nextsav; u_int stateidx; int zombie = 0; IPSEC_ASSERT(sah != NULL, ("NULL sah")); SAHTREE_LOCK_ASSERT(); /* searching all SA registerd in the secindex. */ for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_any); stateidx++) { u_int state = saorder_state_any[stateidx]; LIST_FOREACH_SAFE(sav, &sah->savtree[state], chain, nextsav) { if (sav->refcnt == 0) { /* sanity check */ KEY_CHKSASTATE(state, sav->state, __func__); /* * do NOT call KEY_FREESAV here: * it will only delete the sav if refcnt == 1, * where we already know that refcnt == 0 */ key_delsav(sav); } else { /* give up to delete this sa */ zombie++; } } } if (!zombie) { /* delete only if there are savs */ /* remove from tree of SA index */ if (__LIST_CHAINED(sah)) LIST_REMOVE(sah, chain); free(sah, M_IPSEC_SAH); } } /* * allocating a new SA with LARVAL state. key_add() and key_getspi() call, * and copy the values of mhp into new buffer. * When SAD message type is GETSPI: * to set sequence number from acq_seq++, * to set zero to SPI. * not to call key_setsava(). * OUT: NULL : fail * others : pointer to new secasvar. * * does not modify mbuf. does not free mbuf on error. */ static struct secasvar * key_newsav(struct mbuf *m, const struct sadb_msghdr *mhp, struct secashead *sah, int *errp, const char *where, int tag) { struct secasvar *newsav; const struct sadb_sa *xsa; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); IPSEC_ASSERT(sah != NULL, ("null secashead")); newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT|M_ZERO); if (newsav == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); *errp = ENOBUFS; goto done; } switch (mhp->msg->sadb_msg_type) { case SADB_GETSPI: newsav->spi = 0; #ifdef IPSEC_DOSEQCHECK /* sync sequence number */ if (mhp->msg->sadb_msg_seq == 0) newsav->seq = (V_acq_seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq)); else #endif newsav->seq = mhp->msg->sadb_msg_seq; break; case SADB_ADD: /* sanity check */ if (mhp->ext[SADB_EXT_SA] == NULL) { free(newsav, M_IPSEC_SA); newsav = NULL; ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); *errp = EINVAL; goto done; } xsa = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; newsav->spi = xsa->sadb_sa_spi; newsav->seq = mhp->msg->sadb_msg_seq; break; default: free(newsav, M_IPSEC_SA); newsav = NULL; *errp = EINVAL; goto done; } /* copy sav values */ if (mhp->msg->sadb_msg_type != SADB_GETSPI) { *errp = key_setsaval(newsav, m, mhp); if (*errp) { free(newsav, M_IPSEC_SA); newsav = NULL; goto done; } } SECASVAR_LOCK_INIT(newsav); /* reset created */ newsav->created = time_second; newsav->pid = mhp->msg->sadb_msg_pid; /* add to satree */ newsav->sah = sah; sa_initref(newsav); newsav->state = SADB_SASTATE_LARVAL; SAHTREE_LOCK(); LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_LARVAL], newsav, secasvar, chain); SAHTREE_UNLOCK(); done: KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s from %s:%u return SP:%p\n", __func__, where, tag, newsav)); return newsav; } /* * free() SA variable entry. */ static void key_cleansav(struct secasvar *sav) { /* * Cleanup xform state. Note that zeroize'ing causes the * keys to be cleared; otherwise we must do it ourself. */ if (sav->tdb_xform != NULL) { sav->tdb_xform->xf_zeroize(sav); sav->tdb_xform = NULL; } else { if (sav->key_auth != NULL) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); if (sav->key_enc != NULL) bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc)); } if (sav->key_auth != NULL) { if (sav->key_auth->key_data != NULL) free(sav->key_auth->key_data, M_IPSEC_MISC); free(sav->key_auth, M_IPSEC_MISC); sav->key_auth = NULL; } if (sav->key_enc != NULL) { if (sav->key_enc->key_data != NULL) free(sav->key_enc->key_data, M_IPSEC_MISC); free(sav->key_enc, M_IPSEC_MISC); sav->key_enc = NULL; } if (sav->sched) { bzero(sav->sched, sav->schedlen); free(sav->sched, M_IPSEC_MISC); sav->sched = NULL; } if (sav->replay != NULL) { free(sav->replay, M_IPSEC_MISC); sav->replay = NULL; } if (sav->lft_c != NULL) { free(sav->lft_c, M_IPSEC_MISC); sav->lft_c = NULL; } if (sav->lft_h != NULL) { free(sav->lft_h, M_IPSEC_MISC); sav->lft_h = NULL; } if (sav->lft_s != NULL) { free(sav->lft_s, M_IPSEC_MISC); sav->lft_s = NULL; } } /* * free() SA variable entry. */ static void key_delsav(struct secasvar *sav) { IPSEC_ASSERT(sav != NULL, ("null sav")); IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); /* remove from SA header */ if (__LIST_CHAINED(sav)) LIST_REMOVE(sav, chain); key_cleansav(sav); SECASVAR_LOCK_DESTROY(sav); free(sav, M_IPSEC_SA); } /* * search SAD. * OUT: * NULL : not found * others : found, pointer to a SA. */ static struct secashead * key_getsah(struct secasindex *saidx) { struct secashead *sah; SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) break; } SAHTREE_UNLOCK(); return sah; } /* * check not to be duplicated SPI. * NOTE: this function is too slow due to searching all SAD. * OUT: * NULL : not found * others : found, pointer to a SA. */ static struct secasvar * key_checkspidup(struct secasindex *saidx, u_int32_t spi) { struct secashead *sah; struct secasvar *sav; /* check address family */ if (saidx->src.sa.sa_family != saidx->dst.sa.sa_family) { ipseclog((LOG_DEBUG, "%s: address family mismatched.\n", __func__)); return NULL; } sav = NULL; /* check all SAD */ SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (!key_ismyaddr((struct sockaddr *)&sah->saidx.dst)) continue; sav = key_getsavbyspi(sah, spi); if (sav != NULL) break; } SAHTREE_UNLOCK(); return sav; } /* * search SAD litmited alive SA, protocol, SPI. * OUT: * NULL : not found * others : found, pointer to a SA. */ static struct secasvar * key_getsavbyspi(struct secashead *sah, u_int32_t spi) { struct secasvar *sav; u_int stateidx, state; sav = NULL; SAHTREE_LOCK_ASSERT(); /* search all status */ for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) { state = saorder_state_alive[stateidx]; LIST_FOREACH(sav, &sah->savtree[state], chain) { /* sanity check */ if (sav->state != state) { ipseclog((LOG_DEBUG, "%s: " "invalid sav->state (queue: %d SA: %d)\n", __func__, state, sav->state)); continue; } if (sav->spi == spi) return sav; } } return NULL; } /* * copy SA values from PF_KEY message except *SPI, SEQ, PID, STATE and TYPE*. * You must update these if need. * OUT: 0: success. * !0: failure. * * does not modify mbuf. does not free mbuf on error. */ static int key_setsaval(struct secasvar *sav, struct mbuf *m, const struct sadb_msghdr *mhp) { int error = 0; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* initialization */ sav->replay = NULL; sav->key_auth = NULL; sav->key_enc = NULL; sav->sched = NULL; sav->schedlen = 0; sav->lft_c = NULL; sav->lft_h = NULL; sav->lft_s = NULL; sav->tdb_xform = NULL; /* transform */ sav->tdb_encalgxform = NULL; /* encoding algorithm */ sav->tdb_authalgxform = NULL; /* authentication algorithm */ sav->tdb_compalgxform = NULL; /* compression algorithm */ /* Initialize even if NAT-T not compiled in: */ sav->natt_type = 0; sav->natt_esp_frag_len = 0; /* SA */ if (mhp->ext[SADB_EXT_SA] != NULL) { const struct sadb_sa *sa0; sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) { error = EINVAL; goto fail; } sav->alg_auth = sa0->sadb_sa_auth; sav->alg_enc = sa0->sadb_sa_encrypt; sav->flags = sa0->sadb_sa_flags; /* replay window */ if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) { sav->replay = (struct secreplay *) malloc(sizeof(struct secreplay)+sa0->sadb_sa_replay, M_IPSEC_MISC, M_NOWAIT|M_ZERO); if (sav->replay == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); error = ENOBUFS; goto fail; } if (sa0->sadb_sa_replay != 0) sav->replay->bitmap = (caddr_t)(sav->replay+1); sav->replay->wsize = sa0->sadb_sa_replay; } } /* Authentication keys */ if (mhp->ext[SADB_EXT_KEY_AUTH] != NULL) { const struct sadb_key *key0; int len; key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; len = mhp->extlen[SADB_EXT_KEY_AUTH]; error = 0; if (len < sizeof(*key0)) { error = EINVAL; goto fail; } switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: case SADB_SATYPE_ESP: case SADB_X_SATYPE_TCPSIGNATURE: if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && sav->alg_auth != SADB_X_AALG_NULL) error = EINVAL; break; case SADB_X_SATYPE_IPCOMP: default: error = EINVAL; break; } if (error) { ipseclog((LOG_DEBUG, "%s: invalid key_auth values.\n", __func__)); goto fail; } sav->key_auth = (struct seckey *)key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_auth == NULL ) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); error = ENOBUFS; goto fail; } } /* Encryption key */ if (mhp->ext[SADB_EXT_KEY_ENCRYPT] != NULL) { const struct sadb_key *key0; int len; key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; error = 0; if (len < sizeof(*key0)) { error = EINVAL; goto fail; } switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_ESP: if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && sav->alg_enc != SADB_EALG_NULL) { error = EINVAL; break; } sav->key_enc = (struct seckey *)key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_enc == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); error = ENOBUFS; goto fail; } break; case SADB_X_SATYPE_IPCOMP: if (len != PFKEY_ALIGN8(sizeof(struct sadb_key))) error = EINVAL; sav->key_enc = NULL; /*just in case*/ break; case SADB_SATYPE_AH: case SADB_X_SATYPE_TCPSIGNATURE: default: error = EINVAL; break; } if (error) { ipseclog((LOG_DEBUG, "%s: invalid key_enc value.\n", __func__)); goto fail; } } /* set iv */ sav->ivlen = 0; switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: error = xform_init(sav, XF_AH); break; case SADB_SATYPE_ESP: error = xform_init(sav, XF_ESP); break; case SADB_X_SATYPE_IPCOMP: error = xform_init(sav, XF_IPCOMP); break; case SADB_X_SATYPE_TCPSIGNATURE: error = xform_init(sav, XF_TCPSIGNATURE); break; } if (error) { ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n", __func__, mhp->msg->sadb_msg_satype)); goto fail; } /* reset created */ sav->created = time_second; /* make lifetime for CURRENT */ sav->lft_c = malloc(sizeof(struct seclifetime), M_IPSEC_MISC, M_NOWAIT); if (sav->lft_c == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); error = ENOBUFS; goto fail; } sav->lft_c->allocations = 0; sav->lft_c->bytes = 0; sav->lft_c->addtime = time_second; sav->lft_c->usetime = 0; /* lifetimes for HARD and SOFT */ { const struct sadb_lifetime *lft0; lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; if (lft0 != NULL) { if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { error = EINVAL; goto fail; } sav->lft_h = key_dup_lifemsg(lft0, M_IPSEC_MISC); if (sav->lft_h == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); error = ENOBUFS; goto fail; } /* to be initialize ? */ } lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT]; if (lft0 != NULL) { if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) { error = EINVAL; goto fail; } sav->lft_s = key_dup_lifemsg(lft0, M_IPSEC_MISC); if (sav->lft_s == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); error = ENOBUFS; goto fail; } /* to be initialize ? */ } } return 0; fail: /* initialization */ key_cleansav(sav); return error; } /* * validation with a secasvar entry, and set SADB_SATYPE_MATURE. * OUT: 0: valid * other: errno */ static int key_mature(struct secasvar *sav) { int error; /* check SPI value */ switch (sav->sah->saidx.proto) { case IPPROTO_ESP: case IPPROTO_AH: /* * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values * 1-255 reserved by IANA for future use, * 0 for implementation specific, local use. */ if (ntohl(sav->spi) <= 255) { ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n", __func__, (u_int32_t)ntohl(sav->spi))); return EINVAL; } break; } /* check satype */ switch (sav->sah->saidx.proto) { case IPPROTO_ESP: /* check flags */ if ((sav->flags & (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) == (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) { ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " "given to old-esp.\n", __func__)); return EINVAL; } error = xform_init(sav, XF_ESP); break; case IPPROTO_AH: /* check flags */ if (sav->flags & SADB_X_EXT_DERIV) { ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " "given to AH SA.\n", __func__)); return EINVAL; } if (sav->alg_enc != SADB_EALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " "mismated.\n", __func__)); return(EINVAL); } error = xform_init(sav, XF_AH); break; case IPPROTO_IPCOMP: if (sav->alg_auth != SADB_AALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " "mismated.\n", __func__)); return(EINVAL); } if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 && ntohl(sav->spi) >= 0x10000) { ipseclog((LOG_DEBUG, "%s: invalid cpi for IPComp.\n", __func__)); return(EINVAL); } error = xform_init(sav, XF_IPCOMP); break; case IPPROTO_TCP: if (sav->alg_enc != SADB_EALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " "mismated.\n", __func__)); return(EINVAL); } error = xform_init(sav, XF_TCPSIGNATURE); break; default: ipseclog((LOG_DEBUG, "%s: Invalid satype.\n", __func__)); error = EPROTONOSUPPORT; break; } if (error == 0) { SAHTREE_LOCK(); key_sa_chgstate(sav, SADB_SASTATE_MATURE); SAHTREE_UNLOCK(); } return (error); } /* * subroutine for SADB_GET and SADB_DUMP. */ static struct mbuf * key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, u_int32_t seq, u_int32_t pid) { struct mbuf *result = NULL, *tres = NULL, *m; int i; int dumporder[] = { SADB_EXT_SA, SADB_X_EXT_SA2, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, SADB_EXT_LIFETIME_CURRENT, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH, SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY, #ifdef IPSEC_NAT_T SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, #endif }; m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt); if (m == NULL) goto fail; result = m; for (i = sizeof(dumporder)/sizeof(dumporder[0]) - 1; i >= 0; i--) { m = NULL; switch (dumporder[i]) { case SADB_EXT_SA: m = key_setsadbsa(sav); if (!m) goto fail; break; case SADB_X_EXT_SA2: m = key_setsadbxsa2(sav->sah->saidx.mode, sav->replay ? sav->replay->count : 0, sav->sah->saidx.reqid); if (!m) goto fail; break; case SADB_EXT_ADDRESS_SRC: m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sav->sah->saidx.src.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) goto fail; break; case SADB_EXT_ADDRESS_DST: m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sav->sah->saidx.dst.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) goto fail; break; case SADB_EXT_KEY_AUTH: if (!sav->key_auth) continue; m = key_setkey(sav->key_auth, SADB_EXT_KEY_AUTH); if (!m) goto fail; break; case SADB_EXT_KEY_ENCRYPT: if (!sav->key_enc) continue; m = key_setkey(sav->key_enc, SADB_EXT_KEY_ENCRYPT); if (!m) goto fail; break; case SADB_EXT_LIFETIME_CURRENT: if (!sav->lft_c) continue; m = key_setlifetime(sav->lft_c, SADB_EXT_LIFETIME_CURRENT); if (!m) goto fail; break; case SADB_EXT_LIFETIME_HARD: if (!sav->lft_h) continue; m = key_setlifetime(sav->lft_h, SADB_EXT_LIFETIME_HARD); if (!m) goto fail; break; case SADB_EXT_LIFETIME_SOFT: if (!sav->lft_s) continue; m = key_setlifetime(sav->lft_s, SADB_EXT_LIFETIME_SOFT); if (!m) goto fail; break; #ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: m = key_setsadbxtype(sav->natt_type); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_DPORT: m = key_setsadbxport( KEY_PORTFROMSADDR(&sav->sah->saidx.dst), SADB_X_EXT_NAT_T_DPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_SPORT: m = key_setsadbxport( KEY_PORTFROMSADDR(&sav->sah->saidx.src), SADB_X_EXT_NAT_T_SPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_OAI: case SADB_X_EXT_NAT_T_OAR: case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; #endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: /* XXX: should we brought from SPD ? */ case SADB_EXT_SENSITIVITY: default: continue; } if (!m) goto fail; if (tres) m_cat(m, tres); tres = m; } m_cat(result, tres); if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) goto fail; } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return result; fail: m_freem(result); m_freem(tres); return NULL; } /* * set data into sadb_msg. */ static struct mbuf * key_setsadbmsg(u_int8_t type, u_int16_t tlen, u_int8_t satype, u_int32_t seq, pid_t pid, u_int16_t reserved) { struct mbuf *m; struct sadb_msg *p; int len; len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); if (len > MCLBYTES) return NULL; MGETHDR(m, M_NOWAIT, MT_DATA); if (m && len > MHLEN) { if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); m = NULL; } } if (!m) return NULL; m->m_pkthdr.len = m->m_len = len; m->m_next = NULL; p = mtod(m, struct sadb_msg *); bzero(p, len); p->sadb_msg_version = PF_KEY_V2; p->sadb_msg_type = type; p->sadb_msg_errno = 0; p->sadb_msg_satype = satype; p->sadb_msg_len = PFKEY_UNIT64(tlen); p->sadb_msg_reserved = reserved; p->sadb_msg_seq = seq; p->sadb_msg_pid = (u_int32_t)pid; return m; } /* * copy secasvar data into sadb_address. */ static struct mbuf * key_setsadbsa(struct secasvar *sav) { struct mbuf *m; struct sadb_sa *p; int len; len = PFKEY_ALIGN8(sizeof(struct sadb_sa)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_sa *); bzero(p, len); p->sadb_sa_len = PFKEY_UNIT64(len); p->sadb_sa_exttype = SADB_EXT_SA; p->sadb_sa_spi = sav->spi; p->sadb_sa_replay = (sav->replay != NULL ? sav->replay->wsize : 0); p->sadb_sa_state = sav->state; p->sadb_sa_auth = sav->alg_auth; p->sadb_sa_encrypt = sav->alg_enc; p->sadb_sa_flags = sav->flags; return m; } /* * set data into sadb_address. */ static struct mbuf * key_setsadbaddr(u_int16_t exttype, const struct sockaddr *saddr, u_int8_t prefixlen, u_int16_t ul_proto) { struct mbuf *m; struct sadb_address *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_address)) + PFKEY_ALIGN8(saddr->sa_len); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_address *); bzero(p, len); p->sadb_address_len = PFKEY_UNIT64(len); p->sadb_address_exttype = exttype; p->sadb_address_proto = ul_proto; if (prefixlen == FULLMASK) { switch (saddr->sa_family) { case AF_INET: prefixlen = sizeof(struct in_addr) << 3; break; case AF_INET6: prefixlen = sizeof(struct in6_addr) << 3; break; default: ; /*XXX*/ } } p->sadb_address_prefixlen = prefixlen; p->sadb_address_reserved = 0; bcopy(saddr, mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(struct sadb_address)), saddr->sa_len); return m; } /* * set data into sadb_x_sa2. */ static struct mbuf * key_setsadbxsa2(u_int8_t mode, u_int32_t seq, u_int32_t reqid) { struct mbuf *m; struct sadb_x_sa2 *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa2)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_sa2 *); bzero(p, len); p->sadb_x_sa2_len = PFKEY_UNIT64(len); p->sadb_x_sa2_exttype = SADB_X_EXT_SA2; p->sadb_x_sa2_mode = mode; p->sadb_x_sa2_reserved1 = 0; p->sadb_x_sa2_reserved2 = 0; p->sadb_x_sa2_sequence = seq; p->sadb_x_sa2_reqid = reqid; return m; } #ifdef IPSEC_NAT_T /* * Set a type in sadb_x_nat_t_type. */ static struct mbuf * key_setsadbxtype(u_int16_t type) { struct mbuf *m; size_t len; struct sadb_x_nat_t_type *p; len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_type)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_nat_t_type *); bzero(p, len); p->sadb_x_nat_t_type_len = PFKEY_UNIT64(len); p->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; p->sadb_x_nat_t_type_type = type; return (m); } /* * Set a port in sadb_x_nat_t_port. * In contrast to default RFC 2367 behaviour, port is in network byte order. */ static struct mbuf * key_setsadbxport(u_int16_t port, u_int16_t type) { struct mbuf *m; size_t len; struct sadb_x_nat_t_port *p; len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_port)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_nat_t_port *); bzero(p, len); p->sadb_x_nat_t_port_len = PFKEY_UNIT64(len); p->sadb_x_nat_t_port_exttype = type; p->sadb_x_nat_t_port_port = port; return (m); } /* * Get port from sockaddr. Port is in network byte order. */ u_int16_t key_portfromsaddr(struct sockaddr *sa) { switch (sa->sa_family) { #ifdef INET case AF_INET: return ((struct sockaddr_in *)sa)->sin_port; #endif #ifdef INET6 case AF_INET6: return ((struct sockaddr_in6 *)sa)->sin6_port; #endif } KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s unexpected address family %d\n", __func__, sa->sa_family)); return (0); } #endif /* IPSEC_NAT_T */ /* * Set port in struct sockaddr. Port is in network byte order. */ static void key_porttosaddr(struct sockaddr *sa, u_int16_t port) { switch (sa->sa_family) { #ifdef INET case AF_INET: ((struct sockaddr_in *)sa)->sin_port = port; break; #endif #ifdef INET6 case AF_INET6: ((struct sockaddr_in6 *)sa)->sin6_port = port; break; #endif default: ipseclog((LOG_DEBUG, "%s: unexpected address family %d.\n", __func__, sa->sa_family)); break; } } /* * set data into sadb_x_policy */ static struct mbuf * -key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id) +key_setsadbxpolicy(u_int16_t type, u_int8_t dir, u_int32_t id, u_int32_t priority) { struct mbuf *m; struct sadb_x_policy *p; size_t len; len = PFKEY_ALIGN8(sizeof(struct sadb_x_policy)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_x_policy *); bzero(p, len); p->sadb_x_policy_len = PFKEY_UNIT64(len); p->sadb_x_policy_exttype = SADB_X_EXT_POLICY; p->sadb_x_policy_type = type; p->sadb_x_policy_dir = dir; p->sadb_x_policy_id = id; + p->sadb_x_policy_priority = priority; return m; } /* %%% utilities */ /* Take a key message (sadb_key) from the socket and turn it into one * of the kernel's key structures (seckey). * * IN: pointer to the src * OUT: NULL no more memory */ struct seckey * key_dup_keymsg(const struct sadb_key *src, u_int len, struct malloc_type *type) { struct seckey *dst; dst = (struct seckey *)malloc(sizeof(struct seckey), type, M_NOWAIT); if (dst != NULL) { dst->bits = src->sadb_key_bits; dst->key_data = (char *)malloc(len, type, M_NOWAIT); if (dst->key_data != NULL) { bcopy((const char *)src + sizeof(struct sadb_key), dst->key_data, len); } else { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); free(dst, type); dst = NULL; } } else { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); } return dst; } /* Take a lifetime message (sadb_lifetime) passed in on a socket and * turn it into one of the kernel's lifetime structures (seclifetime). * * IN: pointer to the destination, source and malloc type * OUT: NULL, no more memory */ static struct seclifetime * key_dup_lifemsg(const struct sadb_lifetime *src, struct malloc_type *type) { struct seclifetime *dst = NULL; dst = (struct seclifetime *)malloc(sizeof(struct seclifetime), type, M_NOWAIT); if (dst == NULL) { /* XXX counter */ ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); } else { dst->allocations = src->sadb_lifetime_allocations; dst->bytes = src->sadb_lifetime_bytes; dst->addtime = src->sadb_lifetime_addtime; dst->usetime = src->sadb_lifetime_usetime; } return dst; } /* compare my own address * OUT: 1: true, i.e. my address. * 0: false */ int key_ismyaddr(struct sockaddr *sa) { IPSEC_ASSERT(sa != NULL, ("null sockaddr")); switch (sa->sa_family) { #ifdef INET case AF_INET: return (in_localip(satosin(sa)->sin_addr)); #endif #ifdef INET6 case AF_INET6: return key_ismyaddr6((struct sockaddr_in6 *)sa); #endif } return 0; } #ifdef INET6 /* * compare my own address for IPv6. * 1: ours * 0: other */ static int key_ismyaddr6(struct sockaddr_in6 *sin6) { struct in6_addr in6; if (!IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) return (in6_localip(&sin6->sin6_addr)); /* Convert address into kernel-internal form */ in6 = sin6->sin6_addr; in6.s6_addr16[1] = htons(sin6->sin6_scope_id & 0xffff); return (in6_localip(&in6)); } #endif /*INET6*/ /* * compare two secasindex structure. * flag can specify to compare 2 saidxes. * compare two secasindex structure without both mode and reqid. * don't compare port. * IN: * saidx0: source, it can be in SAD. * saidx1: object. * OUT: * 1 : equal * 0 : not equal */ static int key_cmpsaidx(const struct secasindex *saidx0, const struct secasindex *saidx1, int flag) { int chkport = 0; /* sanity */ if (saidx0 == NULL && saidx1 == NULL) return 1; if (saidx0 == NULL || saidx1 == NULL) return 0; if (saidx0->proto != saidx1->proto) return 0; if (flag == CMP_EXACTLY) { if (saidx0->mode != saidx1->mode) return 0; if (saidx0->reqid != saidx1->reqid) return 0; if (bcmp(&saidx0->src, &saidx1->src, saidx0->src.sa.sa_len) != 0 || bcmp(&saidx0->dst, &saidx1->dst, saidx0->dst.sa.sa_len) != 0) return 0; } else { /* CMP_MODE_REQID, CMP_REQID, CMP_HEAD */ if (flag == CMP_MODE_REQID ||flag == CMP_REQID) { /* * If reqid of SPD is non-zero, unique SA is required. * The result must be of same reqid in this case. */ if (saidx1->reqid != 0 && saidx0->reqid != saidx1->reqid) return 0; } if (flag == CMP_MODE_REQID) { if (saidx0->mode != IPSEC_MODE_ANY && saidx0->mode != saidx1->mode) return 0; } #ifdef IPSEC_NAT_T /* * If NAT-T is enabled, check ports for tunnel mode. * Do not check ports if they are set to zero in the SPD. * Also do not do it for native transport mode, as there * is no port information available in the SP. */ if ((saidx1->mode == IPSEC_MODE_TUNNEL || (saidx1->mode == IPSEC_MODE_TRANSPORT && saidx1->proto == IPPROTO_ESP)) && saidx1->src.sa.sa_family == AF_INET && saidx1->dst.sa.sa_family == AF_INET && ((const struct sockaddr_in *)(&saidx1->src))->sin_port && ((const struct sockaddr_in *)(&saidx1->dst))->sin_port) chkport = 1; #endif /* IPSEC_NAT_T */ if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, chkport) != 0) { return 0; } if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, chkport) != 0) { return 0; } } return 1; } /* * compare two secindex structure exactly. * IN: * spidx0: source, it is often in SPD. * spidx1: object, it is often from PFKEY message. * OUT: * 1 : equal * 0 : not equal */ static int key_cmpspidx_exactly(struct secpolicyindex *spidx0, struct secpolicyindex *spidx1) { /* sanity */ if (spidx0 == NULL && spidx1 == NULL) return 1; if (spidx0 == NULL || spidx1 == NULL) return 0; if (spidx0->prefs != spidx1->prefs || spidx0->prefd != spidx1->prefd || spidx0->ul_proto != spidx1->ul_proto) return 0; return key_sockaddrcmp(&spidx0->src.sa, &spidx1->src.sa, 1) == 0 && key_sockaddrcmp(&spidx0->dst.sa, &spidx1->dst.sa, 1) == 0; } /* * compare two secindex structure with mask. * IN: * spidx0: source, it is often in SPD. * spidx1: object, it is often from IP header. * OUT: * 1 : equal * 0 : not equal */ static int key_cmpspidx_withmask(struct secpolicyindex *spidx0, struct secpolicyindex *spidx1) { /* sanity */ if (spidx0 == NULL && spidx1 == NULL) return 1; if (spidx0 == NULL || spidx1 == NULL) return 0; if (spidx0->src.sa.sa_family != spidx1->src.sa.sa_family || spidx0->dst.sa.sa_family != spidx1->dst.sa.sa_family || spidx0->src.sa.sa_len != spidx1->src.sa.sa_len || spidx0->dst.sa.sa_len != spidx1->dst.sa.sa_len) return 0; /* if spidx.ul_proto == IPSEC_ULPROTO_ANY, ignore. */ if (spidx0->ul_proto != (u_int16_t)IPSEC_ULPROTO_ANY && spidx0->ul_proto != spidx1->ul_proto) return 0; switch (spidx0->src.sa.sa_family) { case AF_INET: if (spidx0->src.sin.sin_port != IPSEC_PORT_ANY && spidx0->src.sin.sin_port != spidx1->src.sin.sin_port) return 0; if (!key_bbcmp(&spidx0->src.sin.sin_addr, &spidx1->src.sin.sin_addr, spidx0->prefs)) return 0; break; case AF_INET6: if (spidx0->src.sin6.sin6_port != IPSEC_PORT_ANY && spidx0->src.sin6.sin6_port != spidx1->src.sin6.sin6_port) return 0; /* * scope_id check. if sin6_scope_id is 0, we regard it * as a wildcard scope, which matches any scope zone ID. */ if (spidx0->src.sin6.sin6_scope_id && spidx1->src.sin6.sin6_scope_id && spidx0->src.sin6.sin6_scope_id != spidx1->src.sin6.sin6_scope_id) return 0; if (!key_bbcmp(&spidx0->src.sin6.sin6_addr, &spidx1->src.sin6.sin6_addr, spidx0->prefs)) return 0; break; default: /* XXX */ if (bcmp(&spidx0->src, &spidx1->src, spidx0->src.sa.sa_len) != 0) return 0; break; } switch (spidx0->dst.sa.sa_family) { case AF_INET: if (spidx0->dst.sin.sin_port != IPSEC_PORT_ANY && spidx0->dst.sin.sin_port != spidx1->dst.sin.sin_port) return 0; if (!key_bbcmp(&spidx0->dst.sin.sin_addr, &spidx1->dst.sin.sin_addr, spidx0->prefd)) return 0; break; case AF_INET6: if (spidx0->dst.sin6.sin6_port != IPSEC_PORT_ANY && spidx0->dst.sin6.sin6_port != spidx1->dst.sin6.sin6_port) return 0; /* * scope_id check. if sin6_scope_id is 0, we regard it * as a wildcard scope, which matches any scope zone ID. */ if (spidx0->dst.sin6.sin6_scope_id && spidx1->dst.sin6.sin6_scope_id && spidx0->dst.sin6.sin6_scope_id != spidx1->dst.sin6.sin6_scope_id) return 0; if (!key_bbcmp(&spidx0->dst.sin6.sin6_addr, &spidx1->dst.sin6.sin6_addr, spidx0->prefd)) return 0; break; default: /* XXX */ if (bcmp(&spidx0->dst, &spidx1->dst, spidx0->dst.sa.sa_len) != 0) return 0; break; } /* XXX Do we check other field ? e.g. flowinfo */ return 1; } /* returns 0 on match */ static int key_sockaddrcmp(const struct sockaddr *sa1, const struct sockaddr *sa2, int port) { #ifdef satosin #undef satosin #endif #define satosin(s) ((const struct sockaddr_in *)s) #ifdef satosin6 #undef satosin6 #endif #define satosin6(s) ((const struct sockaddr_in6 *)s) if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) return 1; switch (sa1->sa_family) { case AF_INET: if (sa1->sa_len != sizeof(struct sockaddr_in)) return 1; if (satosin(sa1)->sin_addr.s_addr != satosin(sa2)->sin_addr.s_addr) { return 1; } if (port && satosin(sa1)->sin_port != satosin(sa2)->sin_port) return 1; break; case AF_INET6: if (sa1->sa_len != sizeof(struct sockaddr_in6)) return 1; /*EINVAL*/ if (satosin6(sa1)->sin6_scope_id != satosin6(sa2)->sin6_scope_id) { return 1; } if (!IN6_ARE_ADDR_EQUAL(&satosin6(sa1)->sin6_addr, &satosin6(sa2)->sin6_addr)) { return 1; } if (port && satosin6(sa1)->sin6_port != satosin6(sa2)->sin6_port) { return 1; } break; default: if (bcmp(sa1, sa2, sa1->sa_len) != 0) return 1; break; } return 0; #undef satosin #undef satosin6 } /* * compare two buffers with mask. * IN: * addr1: source * addr2: object * bits: Number of bits to compare * OUT: * 1 : equal * 0 : not equal */ static int key_bbcmp(const void *a1, const void *a2, u_int bits) { const unsigned char *p1 = a1; const unsigned char *p2 = a2; /* XXX: This could be considerably faster if we compare a word * at a time, but it is complicated on LSB Endian machines */ /* Handle null pointers */ if (p1 == NULL || p2 == NULL) return (p1 == p2); while (bits >= 8) { if (*p1++ != *p2++) return 0; bits -= 8; } if (bits > 0) { u_int8_t mask = ~((1<<(8-bits))-1); if ((*p1 & mask) != (*p2 & mask)) return 0; } return 1; /* Match! */ } static void key_flush_spd(time_t now) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; u_int dir; /* SPD */ for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { restart: SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[dir], chain) { if (sp->lifetime == 0 && sp->validtime == 0) continue; if ((sp->lifetime && now - sp->created > sp->lifetime) || (sp->validtime && now - sp->lastused > sp->validtime)) { SP_ADDREF(sp); SPTREE_RUNLOCK(); key_spdexpire(sp); key_unlink(sp); KEY_FREESP(&sp); goto restart; } } SPTREE_RUNLOCK(); } } static void key_flush_sad(time_t now) { struct secashead *sah, *nextsah; struct secasvar *sav, *nextsav; /* SAD */ SAHTREE_LOCK(); LIST_FOREACH_SAFE(sah, &V_sahtree, chain, nextsah) { /* if sah has been dead, then delete it and process next sah. */ if (sah->state == SADB_SASTATE_DEAD) { key_delsah(sah); continue; } /* if LARVAL entry doesn't become MATURE, delete it. */ LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_LARVAL], chain, nextsav) { /* Need to also check refcnt for a larval SA ??? */ if (now - sav->created > V_key_larval_lifetime) KEY_FREESAV(&sav); } /* * check MATURE entry to start to send expire message * whether or not. */ LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_MATURE], chain, nextsav) { /* we don't need to check. */ if (sav->lft_s == NULL) continue; /* sanity check */ if (sav->lft_c == NULL) { ipseclog((LOG_DEBUG,"%s: there is no CURRENT " "time, why?\n", __func__)); continue; } /* * RFC 2367: * HARD lifetimes MUST take precedence over SOFT * lifetimes, meaning if the HARD and SOFT lifetimes * are the same, the HARD lifetime will appear on the * EXPIRE message. */ /* check HARD lifetime */ if ((sav->lft_h->addtime != 0 && now - sav->created > sav->lft_h->addtime) || (sav->lft_h->bytes != 0 && sav->lft_h->bytes < sav->lft_c->bytes)) { key_sa_chgstate(sav, SADB_SASTATE_DEAD); key_expire(sav, 1); KEY_FREESAV(&sav); } /* check SOFT lifetime */ else if ((sav->lft_s->addtime != 0 && now - sav->created > sav->lft_s->addtime) || (sav->lft_s->bytes != 0 && sav->lft_s->bytes < sav->lft_c->bytes)) { key_sa_chgstate(sav, SADB_SASTATE_DYING); key_expire(sav, 0); } } /* check DYING entry to change status to DEAD. */ LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DYING], chain, nextsav) { /* we don't need to check. */ if (sav->lft_h == NULL) continue; /* sanity check */ if (sav->lft_c == NULL) { ipseclog((LOG_DEBUG, "%s: there is no CURRENT " "time, why?\n", __func__)); continue; } if (sav->lft_h->addtime != 0 && now - sav->created > sav->lft_h->addtime) { key_sa_chgstate(sav, SADB_SASTATE_DEAD); key_expire(sav, 1); KEY_FREESAV(&sav); } #if 0 /* XXX Should we keep to send expire message until HARD lifetime ? */ else if (sav->lft_s != NULL && sav->lft_s->addtime != 0 && now - sav->created > sav->lft_s->addtime) { /* * XXX: should be checked to be * installed the valid SA. */ /* * If there is no SA then sending * expire message. */ key_expire(sav, 0); } #endif /* check HARD lifetime by bytes */ else if (sav->lft_h->bytes != 0 && sav->lft_h->bytes < sav->lft_c->bytes) { key_sa_chgstate(sav, SADB_SASTATE_DEAD); key_expire(sav, 1); KEY_FREESAV(&sav); } } /* delete entry in DEAD */ LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DEAD], chain, nextsav) { /* sanity check */ if (sav->state != SADB_SASTATE_DEAD) { ipseclog((LOG_DEBUG, "%s: invalid sav->state " "(queue: %d SA: %d): kill it anyway\n", __func__, SADB_SASTATE_DEAD, sav->state)); } /* * do not call key_freesav() here. * sav should already be freed, and sav->refcnt * shows other references to sav * (such as from SPD). */ } } SAHTREE_UNLOCK(); } static void key_flush_acq(time_t now) { struct secacq *acq, *nextacq; /* ACQ tree */ ACQ_LOCK(); for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { nextacq = LIST_NEXT(acq, chain); if (now - acq->created > V_key_blockacq_lifetime && __LIST_CHAINED(acq)) { LIST_REMOVE(acq, chain); free(acq, M_IPSEC_SAQ); } } ACQ_UNLOCK(); } static void key_flush_spacq(time_t now) { struct secspacq *acq, *nextacq; /* SP ACQ tree */ SPACQ_LOCK(); for (acq = LIST_FIRST(&V_spacqtree); acq != NULL; acq = nextacq) { nextacq = LIST_NEXT(acq, chain); if (now - acq->created > V_key_blockacq_lifetime && __LIST_CHAINED(acq)) { LIST_REMOVE(acq, chain); free(acq, M_IPSEC_SAQ); } } SPACQ_UNLOCK(); } /* * time handler. * scanning SPD and SAD to check status for each entries, * and do to remove or to expire. * XXX: year 2038 problem may remain. */ static void key_timehandler(void *arg) { VNET_ITERATOR_DECL(vnet_iter); time_t now = time_second; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); key_flush_spd(now); key_flush_sad(now); key_flush_acq(now); key_flush_spacq(now); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); #ifndef IPSEC_DEBUG2 /* do exchange to tick time !! */ callout_schedule(&key_timer, hz); #endif /* IPSEC_DEBUG2 */ } u_long key_random() { u_long value; key_randomfill(&value, sizeof(value)); return value; } void key_randomfill(void *p, size_t l) { size_t n; u_long v; static int warn = 1; n = 0; n = (size_t)read_random(p, (u_int)l); /* last resort */ while (n < l) { v = random(); bcopy(&v, (u_int8_t *)p + n, l - n < sizeof(v) ? l - n : sizeof(v)); n += sizeof(v); if (warn) { printf("WARNING: pseudo-random number generator " "used for IPsec processing\n"); warn = 0; } } } /* * map SADB_SATYPE_* to IPPROTO_*. * if satype == SADB_SATYPE then satype is mapped to ~0. * OUT: * 0: invalid satype. */ static u_int16_t key_satype2proto(u_int8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: return IPSEC_PROTO_ANY; case SADB_SATYPE_AH: return IPPROTO_AH; case SADB_SATYPE_ESP: return IPPROTO_ESP; case SADB_X_SATYPE_IPCOMP: return IPPROTO_IPCOMP; case SADB_X_SATYPE_TCPSIGNATURE: return IPPROTO_TCP; default: return 0; } /* NOTREACHED */ } /* * map IPPROTO_* to SADB_SATYPE_* * OUT: * 0: invalid protocol type. */ static u_int8_t key_proto2satype(u_int16_t proto) { switch (proto) { case IPPROTO_AH: return SADB_SATYPE_AH; case IPPROTO_ESP: return SADB_SATYPE_ESP; case IPPROTO_IPCOMP: return SADB_X_SATYPE_IPCOMP; case IPPROTO_TCP: return SADB_X_SATYPE_TCPSIGNATURE; default: return 0; } /* NOTREACHED */ } /* %%% PF_KEY */ /* * SADB_GETSPI processing is to receive * * from the IKMPd, to assign a unique spi value, to hang on the INBOUND * tree with the status of LARVAL, and send * * to the IKMPd. * * IN: mhp: pointer to the pointer to each header. * OUT: NULL if fail. * other if success, return pointer to the message to send. */ static int key_getspi(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *newsah; struct secasvar *newsav; u_int8_t proto; u_int32_t spi; u_int8_t mode; u_int32_t reqid; int error; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_X_EXT_SA2] != NULL) { mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } else { mode = IPSEC_MODE_ANY; reqid = 0; } src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } /* * Make sure the port numbers are zero. * In case of NAT-T we will update them later if needed. */ switch (((struct sockaddr *)(src0 + 1))->sa_family) { case AF_INET: if (((struct sockaddr *)(src0 + 1))->sa_len != sizeof(struct sockaddr_in)) return key_senderror(so, m, EINVAL); ((struct sockaddr_in *)(src0 + 1))->sin_port = 0; break; case AF_INET6: if (((struct sockaddr *)(src0 + 1))->sa_len != sizeof(struct sockaddr_in6)) return key_senderror(so, m, EINVAL); ((struct sockaddr_in6 *)(src0 + 1))->sin6_port = 0; break; default: ; /*???*/ } switch (((struct sockaddr *)(dst0 + 1))->sa_family) { case AF_INET: if (((struct sockaddr *)(dst0 + 1))->sa_len != sizeof(struct sockaddr_in)) return key_senderror(so, m, EINVAL); ((struct sockaddr_in *)(dst0 + 1))->sin_port = 0; break; case AF_INET6: if (((struct sockaddr *)(dst0 + 1))->sa_len != sizeof(struct sockaddr_in6)) return key_senderror(so, m, EINVAL); ((struct sockaddr_in6 *)(dst0 + 1))->sin6_port = 0; break; default: ; /*???*/ } /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); #ifdef IPSEC_NAT_T /* * Handle NAT-T info if present. * We made sure the port numbers are zero above, so we do * not have to worry in case we do not update them. */ if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL) ipseclog((LOG_DEBUG, "%s: NAT-T OAi present\n", __func__)); if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) ipseclog((LOG_DEBUG, "%s: NAT-T OAr present\n", __func__)); if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { struct sadb_x_nat_t_type *type; struct sadb_x_nat_t_port *sport, *dport; if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { ipseclog((LOG_DEBUG, "%s: invalid nat-t message " "passed.\n", __func__)); return key_senderror(so, m, EINVAL); } sport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_SPORT]; dport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_DPORT]; if (sport) KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); if (dport) KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); } #endif /* SPI allocation */ spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE], &saidx); if (spi == 0) return key_senderror(so, m, EINVAL); /* get a SA index */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA index */ if ((newsah = key_newsah(&saidx)) == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); return key_senderror(so, m, ENOBUFS); } } /* get a new SA */ /* XXX rewrite */ newsav = KEY_NEWSAV(m, mhp, newsah, &error); if (newsav == NULL) { /* XXX don't free new SA index allocated in above. */ return key_senderror(so, m, error); } /* set spi */ newsav->spi = htonl(spi); /* delete the entry in acqtree */ if (mhp->msg->sadb_msg_seq != 0) { struct secacq *acq; if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) != NULL) { /* reset counter in order to deletion by timehandler. */ acq->created = time_second; acq->count = 0; } } { struct mbuf *n, *nn; struct sadb_sa *m_sa; struct sadb_msg *newmsg; int off, len; /* create new sadb_msg to reply. */ len = PFKEY_ALIGN8(sizeof(struct sadb_msg)) + PFKEY_ALIGN8(sizeof(struct sadb_sa)); MGETHDR(n, M_NOWAIT, MT_DATA); if (len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } } if (!n) return key_senderror(so, m, ENOBUFS); n->m_len = len; n->m_next = NULL; off = 0; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); m_sa = (struct sadb_sa *)(mtod(n, caddr_t) + off); m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa)); m_sa->sadb_sa_exttype = SADB_EXT_SA; m_sa->sadb_sa_spi = htonl(spi); off += PFKEY_ALIGN8(sizeof(struct sadb_sa)); IPSEC_ASSERT(off == len, ("length inconsistency (off %u len %u)", off, len)); n->m_next = key_gather_mbuf(m, mhp, 0, 2, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n->m_next) { m_freem(n); return key_senderror(so, m, ENOBUFS); } if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); } n->m_pkthdr.len = 0; for (nn = n; nn; nn = nn->m_next) n->m_pkthdr.len += nn->m_len; newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_seq = newsav->seq; newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } /* * allocating new SPI * called by key_getspi(). * OUT: * 0: failure. * others: success. */ static u_int32_t key_do_getnewspi(struct sadb_spirange *spirange, struct secasindex *saidx) { u_int32_t newspi; u_int32_t min, max; int count = V_key_spi_trycnt; /* set spi range to allocate */ if (spirange != NULL) { min = spirange->sadb_spirange_min; max = spirange->sadb_spirange_max; } else { min = V_key_spi_minval; max = V_key_spi_maxval; } /* IPCOMP needs 2-byte SPI */ if (saidx->proto == IPPROTO_IPCOMP) { u_int32_t t; if (min >= 0x10000) min = 0xffff; if (max >= 0x10000) max = 0xffff; if (min > max) { t = min; min = max; max = t; } } if (min == max) { if (key_checkspidup(saidx, min) != NULL) { ipseclog((LOG_DEBUG, "%s: SPI %u exists already.\n", __func__, min)); return 0; } count--; /* taking one cost. */ newspi = min; } else { /* init SPI */ newspi = 0; /* when requesting to allocate spi ranged */ while (count--) { /* generate pseudo-random SPI value ranged. */ newspi = min + (key_random() % (max - min + 1)); if (key_checkspidup(saidx, newspi) == NULL) break; } if (count == 0 || newspi == 0) { ipseclog((LOG_DEBUG, "%s: to allocate spi is failed.\n", __func__)); return 0; } } /* statistics */ keystat.getspi_count = (keystat.getspi_count + V_key_spi_trycnt - count) / 2; return newspi; } /* * SADB_UPDATE processing * receive * * from the ikmpd, and update a secasvar entry whose status is SADB_SASTATE_LARVAL. * and send * * to the ikmpd. * * m will always be freed. */ static int key_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; #ifdef IPSEC_NAT_T struct sadb_x_nat_t_type *type; struct sadb_x_nat_t_port *sport, *dport; struct sadb_address *iaddr, *raddr; struct sadb_x_nat_t_frag *frag; #endif struct secasindex saidx; struct secashead *sah; struct secasvar *sav; u_int16_t proto; u_int8_t mode; u_int32_t reqid; int error; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_EXT_SA] == NULL || mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_X_EXT_SA2] != NULL) { mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } else { mode = IPSEC_MODE_ANY; reqid = 0; } /* XXX boundary checking for other extensions */ sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); /* * Make sure the port numbers are zero. * In case of NAT-T we will update them later if needed. */ KEY_PORTTOSADDR(&saidx.src, 0); KEY_PORTTOSADDR(&saidx.dst, 0); #ifdef IPSEC_NAT_T /* * Handle NAT-T info if present. */ if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { ipseclog((LOG_DEBUG, "%s: invalid message.\n", __func__)); return key_senderror(so, m, EINVAL); } type = (struct sadb_x_nat_t_type *) mhp->ext[SADB_X_EXT_NAT_T_TYPE]; sport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_SPORT]; dport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_DPORT]; } else { type = 0; sport = dport = 0; } if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { ipseclog((LOG_DEBUG, "%s: invalid message\n", __func__)); return key_senderror(so, m, EINVAL); } iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); } else { iaddr = raddr = NULL; } if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { ipseclog((LOG_DEBUG, "%s: invalid message\n", __func__)); return key_senderror(so, m, EINVAL); } frag = (struct sadb_x_nat_t_frag *) mhp->ext[SADB_X_EXT_NAT_T_FRAG]; } else { frag = 0; } #endif /* get a SA header */ if ((sah = key_getsah(&saidx)) == NULL) { ipseclog((LOG_DEBUG, "%s: no SA index found.\n", __func__)); return key_senderror(so, m, ENOENT); } /* set spidx if there */ /* XXX rewrite */ error = key_setident(sah, m, mhp); if (error) return key_senderror(so, m, error); /* find a SA with sequence number. */ #ifdef IPSEC_DOSEQCHECK if (mhp->msg->sadb_msg_seq != 0 && (sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq)) == NULL) { ipseclog((LOG_DEBUG, "%s: no larval SA with sequence %u " "exists.\n", __func__, mhp->msg->sadb_msg_seq)); return key_senderror(so, m, ENOENT); } #else SAHTREE_LOCK(); sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); SAHTREE_UNLOCK(); if (sav == NULL) { ipseclog((LOG_DEBUG, "%s: no such a SA found (spi:%u)\n", __func__, (u_int32_t)ntohl(sa0->sadb_sa_spi))); return key_senderror(so, m, EINVAL); } #endif /* validity check */ if (sav->sah->saidx.proto != proto) { ipseclog((LOG_DEBUG, "%s: protocol mismatched " "(DB=%u param=%u)\n", __func__, sav->sah->saidx.proto, proto)); return key_senderror(so, m, EINVAL); } #ifdef IPSEC_DOSEQCHECK if (sav->spi != sa0->sadb_sa_spi) { ipseclog((LOG_DEBUG, "%s: SPI mismatched (DB:%u param:%u)\n", __func__, (u_int32_t)ntohl(sav->spi), (u_int32_t)ntohl(sa0->sadb_sa_spi))); return key_senderror(so, m, EINVAL); } #endif if (sav->pid != mhp->msg->sadb_msg_pid) { ipseclog((LOG_DEBUG, "%s: pid mismatched (DB:%u param:%u)\n", __func__, sav->pid, mhp->msg->sadb_msg_pid)); return key_senderror(so, m, EINVAL); } /* copy sav values */ error = key_setsaval(sav, m, mhp); if (error) { KEY_FREESAV(&sav); return key_senderror(so, m, error); } #ifdef IPSEC_NAT_T /* * Handle more NAT-T info if present, * now that we have a sav to fill. */ if (type) sav->natt_type = type->sadb_x_nat_t_type_type; if (sport) KEY_PORTTOSADDR(&sav->sah->saidx.src, sport->sadb_x_nat_t_port_port); if (dport) KEY_PORTTOSADDR(&sav->sah->saidx.dst, dport->sadb_x_nat_t_port_port); #if 0 /* * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. * We should actually check for a minimum MTU here, if we * want to support it in ip_output. */ if (frag) sav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; #endif #endif /* check SA values to be mature. */ if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) { KEY_FREESAV(&sav); return key_senderror(so, m, 0); } { struct mbuf *n; /* set msg buf from mhp */ n = key_getmsgbuf_x1(m, mhp); if (n == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL. * only called by key_update(). * OUT: * NULL : not found * others : found, pointer to a SA. */ #ifdef IPSEC_DOSEQCHECK static struct secasvar * key_getsavbyseq(struct secashead *sah, u_int32_t seq) { struct secasvar *sav; u_int state; state = SADB_SASTATE_LARVAL; /* search SAD with sequence number ? */ LIST_FOREACH(sav, &sah->savtree[state], chain) { KEY_CHKSASTATE(state, sav->state, __func__); if (sav->seq == seq) { sa_addref(sav); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP %s cause refcnt++:%d SA:%p\n", __func__, sav->refcnt, sav)); return sav; } } return NULL; } #endif /* * SADB_ADD processing * add an entry to SA database, when received * * from the ikmpd, * and send * * to the ikmpd. * * IGNORE identity and sensitivity messages. * * m will always be freed. */ static int key_add(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; #ifdef IPSEC_NAT_T struct sadb_x_nat_t_type *type; struct sadb_address *iaddr, *raddr; struct sadb_x_nat_t_frag *frag; #endif struct secasindex saidx; struct secashead *newsah; struct secasvar *newsav; u_int16_t proto; u_int8_t mode; u_int32_t reqid; int error; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_EXT_SA] == NULL || mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { /* XXX need more */ ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_X_EXT_SA2] != NULL) { mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } else { mode = IPSEC_MODE_ANY; reqid = 0; } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); /* * Make sure the port numbers are zero. * In case of NAT-T we will update them later if needed. */ KEY_PORTTOSADDR(&saidx.src, 0); KEY_PORTTOSADDR(&saidx.dst, 0); #ifdef IPSEC_NAT_T /* * Handle NAT-T info if present. */ if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { struct sadb_x_nat_t_port *sport, *dport; if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { ipseclog((LOG_DEBUG, "%s: invalid message.\n", __func__)); return key_senderror(so, m, EINVAL); } type = (struct sadb_x_nat_t_type *) mhp->ext[SADB_X_EXT_NAT_T_TYPE]; sport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_SPORT]; dport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_DPORT]; if (sport) KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); if (dport) KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); } else { type = 0; } if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { ipseclog((LOG_DEBUG, "%s: invalid message\n", __func__)); return key_senderror(so, m, EINVAL); } iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); } else { iaddr = raddr = NULL; } if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { ipseclog((LOG_DEBUG, "%s: invalid message\n", __func__)); return key_senderror(so, m, EINVAL); } frag = (struct sadb_x_nat_t_frag *) mhp->ext[SADB_X_EXT_NAT_T_FRAG]; } else { frag = 0; } #endif /* get a SA header */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA header */ if ((newsah = key_newsah(&saidx)) == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); return key_senderror(so, m, ENOBUFS); } } /* set spidx if there */ /* XXX rewrite */ error = key_setident(newsah, m, mhp); if (error) { return key_senderror(so, m, error); } /* create new SA entry. */ /* We can create new SA only if SPI is differenct. */ SAHTREE_LOCK(); newsav = key_getsavbyspi(newsah, sa0->sadb_sa_spi); SAHTREE_UNLOCK(); if (newsav != NULL) { ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__)); return key_senderror(so, m, EEXIST); } newsav = KEY_NEWSAV(m, mhp, newsah, &error); if (newsav == NULL) { return key_senderror(so, m, error); } #ifdef IPSEC_NAT_T /* * Handle more NAT-T info if present, * now that we have a sav to fill. */ if (type) newsav->natt_type = type->sadb_x_nat_t_type_type; #if 0 /* * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. * We should actually check for a minimum MTU here, if we * want to support it in ip_output. */ if (frag) newsav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; #endif #endif /* check SA values to be mature. */ if ((error = key_mature(newsav)) != 0) { KEY_FREESAV(&newsav); return key_senderror(so, m, error); } /* * don't call key_freesav() here, as we would like to keep the SA * in the database on success. */ { struct mbuf *n; /* set msg buf from mhp */ n = key_getmsgbuf_x1(m, mhp); if (n == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* m is retained */ static int key_setident(struct secashead *sah, struct mbuf *m, const struct sadb_msghdr *mhp) { const struct sadb_ident *idsrc, *iddst; int idsrclen, iddstlen; IPSEC_ASSERT(sah != NULL, ("null secashead")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* don't make buffer if not there */ if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL && mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { sah->idents = NULL; sah->identd = NULL; return 0; } if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL || mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { ipseclog((LOG_DEBUG, "%s: invalid identity.\n", __func__)); return EINVAL; } idsrc = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_SRC]; iddst = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_DST]; idsrclen = mhp->extlen[SADB_EXT_IDENTITY_SRC]; iddstlen = mhp->extlen[SADB_EXT_IDENTITY_DST]; /* validity check */ if (idsrc->sadb_ident_type != iddst->sadb_ident_type) { ipseclog((LOG_DEBUG, "%s: ident type mismatch.\n", __func__)); return EINVAL; } switch (idsrc->sadb_ident_type) { case SADB_IDENTTYPE_PREFIX: case SADB_IDENTTYPE_FQDN: case SADB_IDENTTYPE_USERFQDN: default: /* XXX do nothing */ sah->idents = NULL; sah->identd = NULL; return 0; } /* make structure */ sah->idents = malloc(sizeof(struct secident), M_IPSEC_MISC, M_NOWAIT); if (sah->idents == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return ENOBUFS; } sah->identd = malloc(sizeof(struct secident), M_IPSEC_MISC, M_NOWAIT); if (sah->identd == NULL) { free(sah->idents, M_IPSEC_MISC); sah->idents = NULL; ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return ENOBUFS; } sah->idents->type = idsrc->sadb_ident_type; sah->idents->id = idsrc->sadb_ident_id; sah->identd->type = iddst->sadb_ident_type; sah->identd->id = iddst->sadb_ident_id; return 0; } /* * m will not be freed on return. * it is caller's responsibility to free the result. */ static struct mbuf * key_getmsgbuf_x1(struct mbuf *m, const struct sadb_msghdr *mhp) { struct mbuf *n; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* create new sadb_msg to reply. */ n = key_gather_mbuf(m, mhp, 1, 9, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_X_EXT_SA2, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST); if (!n) return NULL; if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return NULL; } mtod(n, struct sadb_msg *)->sadb_msg_errno = 0; mtod(n, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); return n; } /* * SADB_DELETE processing * receive * * from the ikmpd, and set SADB_SASTATE_DEAD, * and send, * * to the ikmpd. * * m will always be freed. */ static int key_delete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; struct secasvar *sav = NULL; u_int16_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_EXT_SA] == NULL) { /* * Caller wants us to delete all non-LARVAL SAs * that match the src/dst. This is used during * IKE INITIAL-CONTACT. */ ipseclog((LOG_DEBUG, "%s: doing delete all.\n", __func__)); return key_delete_all(so, m, mhp, proto); } else if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); /* * Make sure the port numbers are zero. * In case of NAT-T we will update them later if needed. */ KEY_PORTTOSADDR(&saidx.src, 0); KEY_PORTTOSADDR(&saidx.dst, 0); #ifdef IPSEC_NAT_T /* * Handle NAT-T info if present. */ if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { struct sadb_x_nat_t_port *sport, *dport; if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { ipseclog((LOG_DEBUG, "%s: invalid message.\n", __func__)); return key_senderror(so, m, EINVAL); } sport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_SPORT]; dport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_DPORT]; if (sport) KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); if (dport) KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); } #endif /* get a SA header */ SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) continue; /* get a SA with SPI. */ sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); if (sav) break; } if (sah == NULL) { SAHTREE_UNLOCK(); ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); return key_senderror(so, m, ENOENT); } key_sa_chgstate(sav, SADB_SASTATE_DEAD); KEY_FREESAV(&sav); SAHTREE_UNLOCK(); { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) return key_senderror(so, m, ENOBUFS); if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return key_senderror(so, m, ENOBUFS); } newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * delete all SAs for src/dst. Called from key_delete(). */ static int key_delete_all(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp, u_int16_t proto) { struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; struct secasvar *sav, *nextsav; u_int stateidx, state; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); /* * Make sure the port numbers are zero. * In case of NAT-T we will update them later if needed. */ KEY_PORTTOSADDR(&saidx.src, 0); KEY_PORTTOSADDR(&saidx.dst, 0); #ifdef IPSEC_NAT_T /* * Handle NAT-T info if present. */ if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { struct sadb_x_nat_t_port *sport, *dport; if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { ipseclog((LOG_DEBUG, "%s: invalid message.\n", __func__)); return key_senderror(so, m, EINVAL); } sport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_SPORT]; dport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_DPORT]; if (sport) KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); if (dport) KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); } #endif SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) continue; /* Delete all non-LARVAL SAs. */ for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) { state = saorder_state_alive[stateidx]; if (state == SADB_SASTATE_LARVAL) continue; for (sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { nextsav = LIST_NEXT(sav, chain); /* sanity check */ if (sav->state != state) { ipseclog((LOG_DEBUG, "%s: invalid " "sav->state (queue %d SA %d)\n", __func__, state, sav->state)); continue; } key_sa_chgstate(sav, SADB_SASTATE_DEAD); KEY_FREESAV(&sav); } } } SAHTREE_UNLOCK(); { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 3, SADB_EXT_RESERVED, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) return key_senderror(so, m, ENOBUFS); if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) return key_senderror(so, m, ENOBUFS); } newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } /* * SADB_GET processing * receive * * from the ikmpd, and get a SP and a SA to respond, * and send, * * to the ikmpd. * * m will always be freed. */ static int key_get(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; struct secasvar *sav = NULL; u_int16_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_EXT_SA] == NULL || mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); /* * Make sure the port numbers are zero. * In case of NAT-T we will update them later if needed. */ KEY_PORTTOSADDR(&saidx.src, 0); KEY_PORTTOSADDR(&saidx.dst, 0); #ifdef IPSEC_NAT_T /* * Handle NAT-T info if present. */ if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { struct sadb_x_nat_t_port *sport, *dport; if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { ipseclog((LOG_DEBUG, "%s: invalid message.\n", __func__)); return key_senderror(so, m, EINVAL); } sport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_SPORT]; dport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_DPORT]; if (sport) KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); if (dport) KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); } #endif /* get a SA header */ SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) continue; /* get a SA with SPI. */ sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); if (sav) break; } SAHTREE_UNLOCK(); if (sah == NULL) { ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); return key_senderror(so, m, ENOENT); } { struct mbuf *n; u_int8_t satype; /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { ipseclog((LOG_DEBUG, "%s: there was invalid proto in SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, mhp->msg->sadb_msg_pid); if (!n) return key_senderror(so, m, ENOBUFS); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } /* XXX make it sysctl-configurable? */ static void key_getcomb_setlifetime(struct sadb_comb *comb) { comb->sadb_comb_soft_allocations = 1; comb->sadb_comb_hard_allocations = 1; comb->sadb_comb_soft_bytes = 0; comb->sadb_comb_hard_bytes = 0; comb->sadb_comb_hard_addtime = 86400; /* 1 day */ comb->sadb_comb_soft_addtime = comb->sadb_comb_soft_addtime * 80 / 100; comb->sadb_comb_soft_usetime = 28800; /* 8 hours */ comb->sadb_comb_hard_usetime = comb->sadb_comb_hard_usetime * 80 / 100; } /* * XXX reorder combinations by preference * XXX no idea if the user wants ESP authentication or not */ static struct mbuf * key_getcomb_esp() { struct sadb_comb *comb; struct enc_xform *algo; struct mbuf *result = NULL, *m, *n; int encmin; int i, off, o; int totlen; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_EALG_MAX; i++) { algo = esp_algorithm_lookup(i); if (algo == NULL) continue; /* discard algorithms with key size smaller than system min */ if (_BITS(algo->maxkey) < V_ipsec_esp_keymin) continue; if (_BITS(algo->minkey) < V_ipsec_esp_keymin) encmin = V_ipsec_esp_keymin; else encmin = _BITS(algo->minkey); if (V_ipsec_esp_auth) m = key_getcomb_ah(); else { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; bzero(mtod(m, caddr_t), m->m_len); } } if (!m) goto fail; totlen = 0; for (n = m; n; n = n->m_next) totlen += n->m_len; IPSEC_ASSERT((totlen % l) == 0, ("totlen=%u, l=%u", totlen, l)); for (off = 0; off < totlen; off += l) { n = m_pulldown(m, off, l, &o); if (!n) { /* m is already freed */ goto fail; } comb = (struct sadb_comb *)(mtod(n, caddr_t) + o); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_encrypt = i; comb->sadb_comb_encrypt_minbits = encmin; comb->sadb_comb_encrypt_maxbits = _BITS(algo->maxkey); } if (!result) result = m; else m_cat(result, m); } return result; fail: if (result) m_freem(result); return NULL; } static void key_getsizes_ah(const struct auth_hash *ah, int alg, u_int16_t* min, u_int16_t* max) { *min = *max = ah->keysize; if (ah->keysize == 0) { /* * Transform takes arbitrary key size but algorithm * key size is restricted. Enforce this here. */ switch (alg) { case SADB_X_AALG_MD5: *min = *max = 16; break; case SADB_X_AALG_SHA: *min = *max = 20; break; case SADB_X_AALG_NULL: *min = 1; *max = 256; break; case SADB_X_AALG_SHA2_256: *min = *max = 32; break; case SADB_X_AALG_SHA2_384: *min = *max = 48; break; case SADB_X_AALG_SHA2_512: *min = *max = 64; break; default: DPRINTF(("%s: unknown AH algorithm %u\n", __func__, alg)); break; } } } /* * XXX reorder combinations by preference */ static struct mbuf * key_getcomb_ah() { struct sadb_comb *comb; struct auth_hash *algo; struct mbuf *m; u_int16_t minkeysize, maxkeysize; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_AALG_MAX; i++) { #if 1 /* we prefer HMAC algorithms, not old algorithms */ if (i != SADB_AALG_SHA1HMAC && i != SADB_AALG_MD5HMAC && i != SADB_X_AALG_SHA2_256 && i != SADB_X_AALG_SHA2_384 && i != SADB_X_AALG_SHA2_512) continue; #endif algo = ah_algorithm_lookup(i); if (!algo) continue; key_getsizes_ah(algo, i, &minkeysize, &maxkeysize); /* discard algorithms with key size smaller than system min */ if (_BITS(minkeysize) < V_ipsec_ah_keymin) continue; if (!m) { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; } } else M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; comb = mtod(m, struct sadb_comb *); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_auth = i; comb->sadb_comb_auth_minbits = _BITS(minkeysize); comb->sadb_comb_auth_maxbits = _BITS(maxkeysize); } return m; } /* * not really an official behavior. discussed in pf_key@inner.net in Sep2000. * XXX reorder combinations by preference */ static struct mbuf * key_getcomb_ipcomp() { struct sadb_comb *comb; struct comp_algo *algo; struct mbuf *m; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_X_CALG_MAX; i++) { algo = ipcomp_algorithm_lookup(i); if (!algo) continue; if (!m) { IPSEC_ASSERT(l <= MLEN, ("l=%u > MLEN=%lu", l, (u_long) MLEN)); MGET(m, M_NOWAIT, MT_DATA); if (m) { M_ALIGN(m, l); m->m_len = l; m->m_next = NULL; } } else M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; comb = mtod(m, struct sadb_comb *); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_encrypt = i; /* what should we set into sadb_comb_*_{min,max}bits? */ } return m; } /* * XXX no way to pass mode (transport/tunnel) to userland * XXX replay checking? * XXX sysctl interface to ipsec_{ah,esp}_keymin */ static struct mbuf * key_getprop(const struct secasindex *saidx) { struct sadb_prop *prop; struct mbuf *m, *n; const int l = PFKEY_ALIGN8(sizeof(struct sadb_prop)); int totlen; switch (saidx->proto) { case IPPROTO_ESP: m = key_getcomb_esp(); break; case IPPROTO_AH: m = key_getcomb_ah(); break; case IPPROTO_IPCOMP: m = key_getcomb_ipcomp(); break; default: return NULL; } if (!m) return NULL; M_PREPEND(m, l, M_NOWAIT); if (!m) return NULL; totlen = 0; for (n = m; n; n = n->m_next) totlen += n->m_len; prop = mtod(m, struct sadb_prop *); bzero(prop, sizeof(*prop)); prop->sadb_prop_len = PFKEY_UNIT64(totlen); prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; prop->sadb_prop_replay = 32; /* XXX */ return m; } /* * SADB_ACQUIRE processing called by key_checkrequest() and key_acquire2(). * send * * to KMD, and expect to receive * with SADB_ACQUIRE if error occured, * or * with SADB_GETSPI * from KMD by PF_KEY. * * XXX x_policy is outside of RFC2367 (KAME extension). * XXX sensitivity is not supported. * XXX for ipcomp, RFC2367 does not define how to fill in proposal. * see comment for key_getcomb_ipcomp(). * * OUT: * 0 : succeed * others: error number */ static int key_acquire(const struct secasindex *saidx, struct secpolicy *sp) { union sockaddr_union addr; struct mbuf *result, *m; struct secacq *newacq; u_int32_t seq; int error; u_int16_t ul_proto; u_int8_t mask, satype; IPSEC_ASSERT(saidx != NULL, ("null saidx")); satype = key_proto2satype(saidx->proto); IPSEC_ASSERT(satype != 0, ("null satype, protocol %u", saidx->proto)); error = -1; result = NULL; ul_proto = IPSEC_ULPROTO_ANY; /* * We never do anything about acquirng SA. There is anather * solution that kernel blocks to send SADB_ACQUIRE message until * getting something message from IKEd. In later case, to be * managed with ACQUIRING list. */ /* Get an entry to check whether sending message or not. */ if ((newacq = key_getacq(saidx)) != NULL) { if (V_key_blockacq_count < newacq->count) { /* reset counter and do send message. */ newacq->count = 0; } else { /* increment counter and do nothing. */ newacq->count++; return 0; } } else { /* make new entry for blocking to send SADB_ACQUIRE. */ if ((newacq = key_newacq(saidx)) == NULL) return ENOBUFS; } seq = newacq->seq; m = key_setsadbmsg(SADB_ACQUIRE, 0, satype, seq, 0, 0); if (!m) { error = ENOBUFS; goto fail; } result = m; /* * No SADB_X_EXT_NAT_T_* here: we do not know * anything related to NAT-T at this time. */ /* * set sadb_address for saidx's. * * Note that if sp is supplied, then we're being called from * key_checkrequest and should supply port and protocol information. */ if (sp != NULL && (sp->spidx.ul_proto == IPPROTO_TCP || sp->spidx.ul_proto == IPPROTO_UDP)) ul_proto = sp->spidx.ul_proto; addr = saidx->src; mask = FULLMASK; if (ul_proto != IPSEC_ULPROTO_ANY) { switch (sp->spidx.src.sa.sa_family) { case AF_INET: if (sp->spidx.src.sin.sin_port != IPSEC_PORT_ANY) { addr.sin.sin_port = sp->spidx.src.sin.sin_port; mask = sp->spidx.prefs; } break; case AF_INET6: if (sp->spidx.src.sin6.sin6_port != IPSEC_PORT_ANY) { addr.sin6.sin6_port = sp->spidx.src.sin6.sin6_port; mask = sp->spidx.prefs; } break; default: break; } } m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &addr.sa, mask, ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); addr = saidx->dst; mask = FULLMASK; if (ul_proto != IPSEC_ULPROTO_ANY) { switch (sp->spidx.dst.sa.sa_family) { case AF_INET: if (sp->spidx.dst.sin.sin_port != IPSEC_PORT_ANY) { addr.sin.sin_port = sp->spidx.dst.sin.sin_port; mask = sp->spidx.prefd; } break; case AF_INET6: if (sp->spidx.dst.sin6.sin6_port != IPSEC_PORT_ANY) { addr.sin6.sin6_port = sp->spidx.dst.sin6.sin6_port; mask = sp->spidx.prefd; } break; default: break; } } m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &addr.sa, mask, ul_proto); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* XXX proxy address (optional) */ /* set sadb_x_policy */ if (sp) { - m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id); + m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, sp->priority); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); } /* XXX identity (optional) */ #if 0 if (idexttype && fqdn) { /* create identity extension (FQDN) */ struct sadb_ident *id; int fqdnlen; fqdnlen = strlen(fqdn) + 1; /* +1 for terminating-NUL */ id = (struct sadb_ident *)p; bzero(id, sizeof(*id) + PFKEY_ALIGN8(fqdnlen)); id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(fqdnlen)); id->sadb_ident_exttype = idexttype; id->sadb_ident_type = SADB_IDENTTYPE_FQDN; bcopy(fqdn, id + 1, fqdnlen); p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(fqdnlen); } if (idexttype) { /* create identity extension (USERFQDN) */ struct sadb_ident *id; int userfqdnlen; if (userfqdn) { /* +1 for terminating-NUL */ userfqdnlen = strlen(userfqdn) + 1; } else userfqdnlen = 0; id = (struct sadb_ident *)p; bzero(id, sizeof(*id) + PFKEY_ALIGN8(userfqdnlen)); id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(userfqdnlen)); id->sadb_ident_exttype = idexttype; id->sadb_ident_type = SADB_IDENTTYPE_USERFQDN; /* XXX is it correct? */ if (curproc && curproc->p_cred) id->sadb_ident_id = curproc->p_cred->p_ruid; if (userfqdn && userfqdnlen) bcopy(userfqdn, id + 1, userfqdnlen); p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(userfqdnlen); } #endif /* XXX sensitivity (optional) */ /* create proposal/combination extension */ m = key_getprop(saidx); #if 0 /* * spec conformant: always attach proposal/combination extension, * the problem is that we have no way to attach it for ipcomp, * due to the way sadb_comb is declared in RFC2367. */ if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); #else /* * outside of spec; make proposal/combination extension optional. */ if (m) m_cat(result, m); #endif if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { error = ENOBUFS; goto fail; } } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: if (result) m_freem(result); return error; } static struct secacq * key_newacq(const struct secasindex *saidx) { struct secacq *newacq; /* get new entry */ newacq = malloc(sizeof(struct secacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO); if (newacq == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return NULL; } /* copy secindex */ bcopy(saidx, &newacq->saidx, sizeof(newacq->saidx)); newacq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq); newacq->created = time_second; newacq->count = 0; /* add to acqtree */ ACQ_LOCK(); LIST_INSERT_HEAD(&V_acqtree, newacq, chain); ACQ_UNLOCK(); return newacq; } static struct secacq * key_getacq(const struct secasindex *saidx) { struct secacq *acq; ACQ_LOCK(); LIST_FOREACH(acq, &V_acqtree, chain) { if (key_cmpsaidx(saidx, &acq->saidx, CMP_EXACTLY)) break; } ACQ_UNLOCK(); return acq; } static struct secacq * key_getacqbyseq(u_int32_t seq) { struct secacq *acq; ACQ_LOCK(); LIST_FOREACH(acq, &V_acqtree, chain) { if (acq->seq == seq) break; } ACQ_UNLOCK(); return acq; } static struct secspacq * key_newspacq(struct secpolicyindex *spidx) { struct secspacq *acq; /* get new entry */ acq = malloc(sizeof(struct secspacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO); if (acq == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return NULL; } /* copy secindex */ bcopy(spidx, &acq->spidx, sizeof(acq->spidx)); acq->created = time_second; acq->count = 0; /* add to spacqtree */ SPACQ_LOCK(); LIST_INSERT_HEAD(&V_spacqtree, acq, chain); SPACQ_UNLOCK(); return acq; } static struct secspacq * key_getspacq(struct secpolicyindex *spidx) { struct secspacq *acq; SPACQ_LOCK(); LIST_FOREACH(acq, &V_spacqtree, chain) { if (key_cmpspidx_exactly(spidx, &acq->spidx)) { /* NB: return holding spacq_lock */ return acq; } } SPACQ_UNLOCK(); return NULL; } /* * SADB_ACQUIRE processing, * in first situation, is receiving * * from the ikmpd, and clear sequence of its secasvar entry. * * In second situation, is receiving * * from a user land process, and return * * to the socket. * * m will always be freed. */ static int key_acquire2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { const struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; u_int16_t proto; int error; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* * Error message from KMd. * We assume that if error was occured in IKEd, the length of PFKEY * message is equal to the size of sadb_msg structure. * We do not raise error even if error occured in this function. */ if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) { struct secacq *acq; /* check sequence number */ if (mhp->msg->sadb_msg_seq == 0) { ipseclog((LOG_DEBUG, "%s: must specify sequence " "number.\n", __func__)); m_freem(m); return 0; } if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) == NULL) { /* * the specified larval SA is already gone, or we got * a bogus sequence number. we can silently ignore it. */ m_freem(m); return 0; } /* reset acq counter in order to deletion by timehander. */ acq->created = time_second; acq->count = 0; m_freem(m); return 0; } /* * This message is from user land. */ /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || mhp->ext[SADB_EXT_PROPOSAL] == NULL) { /* error */ ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_PROPOSAL] < sizeof(struct sadb_prop)) { /* error */ ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); /* * Make sure the port numbers are zero. * In case of NAT-T we will update them later if needed. */ KEY_PORTTOSADDR(&saidx.src, 0); KEY_PORTTOSADDR(&saidx.dst, 0); #ifndef IPSEC_NAT_T /* * Handle NAT-T info if present. */ if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { struct sadb_x_nat_t_port *sport, *dport; if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { ipseclog((LOG_DEBUG, "%s: invalid message.\n", __func__)); return key_senderror(so, m, EINVAL); } sport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_SPORT]; dport = (struct sadb_x_nat_t_port *) mhp->ext[SADB_X_EXT_NAT_T_DPORT]; if (sport) KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); if (dport) KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); } #endif /* get a SA index */ SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, &saidx, CMP_MODE_REQID)) break; } SAHTREE_UNLOCK(); if (sah != NULL) { ipseclog((LOG_DEBUG, "%s: a SA exists already.\n", __func__)); return key_senderror(so, m, EEXIST); } error = key_acquire(&saidx, NULL); if (error != 0) { ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", __func__, mhp->msg->sadb_msg_errno)); return key_senderror(so, m, error); } return key_sendup_mbuf(so, m, KEY_SENDUP_REGISTERED); } /* * SADB_REGISTER processing. * If SATYPE_UNSPEC has been passed as satype, only return sabd_supported. * receive * * from the ikmpd, and register a socket to send PF_KEY messages, * and send * * to KMD by PF_KEY. * If socket is detached, must free from regnode. * * m will always be freed. */ static int key_register(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct secreg *reg, *newreg = 0; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* check for invalid register message */ if (mhp->msg->sadb_msg_satype >= sizeof(V_regtree)/sizeof(V_regtree[0])) return key_senderror(so, m, EINVAL); /* When SATYPE_UNSPEC is specified, only return sabd_supported. */ if (mhp->msg->sadb_msg_satype == SADB_SATYPE_UNSPEC) goto setmsg; /* check whether existing or not */ REGTREE_LOCK(); LIST_FOREACH(reg, &V_regtree[mhp->msg->sadb_msg_satype], chain) { if (reg->so == so) { REGTREE_UNLOCK(); ipseclog((LOG_DEBUG, "%s: socket exists already.\n", __func__)); return key_senderror(so, m, EEXIST); } } /* create regnode */ newreg = malloc(sizeof(struct secreg), M_IPSEC_SAR, M_NOWAIT|M_ZERO); if (newreg == NULL) { REGTREE_UNLOCK(); ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } newreg->so = so; ((struct keycb *)sotorawcb(so))->kp_registered++; /* add regnode to regtree. */ LIST_INSERT_HEAD(&V_regtree[mhp->msg->sadb_msg_satype], newreg, chain); REGTREE_UNLOCK(); setmsg: { struct mbuf *n; struct sadb_msg *newmsg; struct sadb_supported *sup; u_int len, alen, elen; int off; int i; struct sadb_alg *alg; /* create new sadb_msg to reply. */ alen = 0; for (i = 1; i <= SADB_AALG_MAX; i++) { if (ah_algorithm_lookup(i)) alen += sizeof(struct sadb_alg); } if (alen) alen += sizeof(struct sadb_supported); elen = 0; for (i = 1; i <= SADB_EALG_MAX; i++) { if (esp_algorithm_lookup(i)) elen += sizeof(struct sadb_alg); } if (elen) elen += sizeof(struct sadb_supported); len = sizeof(struct sadb_msg) + alen + elen; if (len > MCLBYTES) return key_senderror(so, m, ENOBUFS); MGETHDR(n, M_NOWAIT, MT_DATA); if (len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_freem(n); n = NULL; } } if (!n) return key_senderror(so, m, ENOBUFS); n->m_pkthdr.len = n->m_len = len; n->m_next = NULL; off = 0; m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); newmsg = mtod(n, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(len); off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); /* for authentication algorithm */ if (alen) { sup = (struct sadb_supported *)(mtod(n, caddr_t) + off); sup->sadb_supported_len = PFKEY_UNIT64(alen); sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_AALG_MAX; i++) { struct auth_hash *aalgo; u_int16_t minkeysize, maxkeysize; aalgo = ah_algorithm_lookup(i); if (!aalgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); alg->sadb_alg_id = i; alg->sadb_alg_ivlen = 0; key_getsizes_ah(aalgo, i, &minkeysize, &maxkeysize); alg->sadb_alg_minbits = _BITS(minkeysize); alg->sadb_alg_maxbits = _BITS(maxkeysize); off += PFKEY_ALIGN8(sizeof(*alg)); } } /* for encryption algorithm */ if (elen) { sup = (struct sadb_supported *)(mtod(n, caddr_t) + off); sup->sadb_supported_len = PFKEY_UNIT64(elen); sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_EALG_MAX; i++) { struct enc_xform *ealgo; ealgo = esp_algorithm_lookup(i); if (!ealgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); alg->sadb_alg_id = i; - alg->sadb_alg_ivlen = ealgo->blocksize; + alg->sadb_alg_ivlen = ealgo->ivsize; alg->sadb_alg_minbits = _BITS(ealgo->minkey); alg->sadb_alg_maxbits = _BITS(ealgo->maxkey); off += PFKEY_ALIGN8(sizeof(struct sadb_alg)); } } IPSEC_ASSERT(off == len, ("length assumption failed (off %u len %u)", off, len)); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_REGISTERED); } } /* * free secreg entry registered. * XXX: I want to do free a socket marked done SADB_RESIGER to socket. */ void key_freereg(struct socket *so) { struct secreg *reg; int i; IPSEC_ASSERT(so != NULL, ("NULL so")); /* * check whether existing or not. * check all type of SA, because there is a potential that * one socket is registered to multiple type of SA. */ REGTREE_LOCK(); for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_FOREACH(reg, &V_regtree[i], chain) { if (reg->so == so && __LIST_CHAINED(reg)) { LIST_REMOVE(reg, chain); free(reg, M_IPSEC_SAR); break; } } } REGTREE_UNLOCK(); } /* * SADB_EXPIRE processing * send * * to KMD by PF_KEY. * NOTE: We send only soft lifetime extension. * * OUT: 0 : succeed * others : error number */ static int key_expire(struct secasvar *sav, int hard) { int satype; struct mbuf *result = NULL, *m; int len; int error = -1; struct sadb_lifetime *lt; IPSEC_ASSERT (sav != NULL, ("null sav")); IPSEC_ASSERT (sav->sah != NULL, ("null sa header")); /* set msg header */ satype = key_proto2satype(sav->sah->saidx.proto); IPSEC_ASSERT(satype != 0, ("invalid proto, satype %u", satype)); m = key_setsadbmsg(SADB_EXPIRE, 0, satype, sav->seq, 0, sav->refcnt); if (!m) { error = ENOBUFS; goto fail; } result = m; /* create SA extension */ m = key_setsadbsa(sav); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* create SA extension */ m = key_setsadbxsa2(sav->sah->saidx.mode, sav->replay ? sav->replay->count : 0, sav->sah->saidx.reqid); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* create lifetime extension (current and soft) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) { error = ENOBUFS; goto fail; } m_align(m, len); m->m_len = len; bzero(mtod(m, caddr_t), len); lt = mtod(m, struct sadb_lifetime *); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lt->sadb_lifetime_allocations = sav->lft_c->allocations; lt->sadb_lifetime_bytes = sav->lft_c->bytes; lt->sadb_lifetime_addtime = sav->lft_c->addtime; lt->sadb_lifetime_usetime = sav->lft_c->usetime; lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); if (hard) { lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lt->sadb_lifetime_allocations = sav->lft_h->allocations; lt->sadb_lifetime_bytes = sav->lft_h->bytes; lt->sadb_lifetime_addtime = sav->lft_h->addtime; lt->sadb_lifetime_usetime = sav->lft_h->usetime; } else { lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lt->sadb_lifetime_allocations = sav->lft_s->allocations; lt->sadb_lifetime_bytes = sav->lft_s->bytes; lt->sadb_lifetime_addtime = sav->lft_s->addtime; lt->sadb_lifetime_usetime = sav->lft_s->usetime; } m_cat(result, m); /* set sadb_address for source */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sav->sah->saidx.src.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* set sadb_address for destination */ m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sav->sah->saidx.dst.sa, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); /* * XXX-BZ Handle NAT-T extensions here. */ if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { error = ENOBUFS; goto fail; } } result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: if (result) m_freem(result); return error; } /* * SADB_FLUSH processing * receive * * from the ikmpd, and free all entries in secastree. * and send, * * to the ikmpd. * NOTE: to do is only marking SADB_SASTATE_DEAD. * * m will always be freed. */ static int key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct sadb_msg *newmsg; struct secashead *sah, *nextsah; struct secasvar *sav, *nextsav; u_int16_t proto; u_int8_t state; u_int stateidx; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } /* no SATYPE specified, i.e. flushing all SA. */ SAHTREE_LOCK(); for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) { nextsah = LIST_NEXT(sah, chain); if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && proto != sah->saidx.proto) continue; for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) { state = saorder_state_any[stateidx]; for (sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { nextsav = LIST_NEXT(sav, chain); key_sa_chgstate(sav, SADB_SASTATE_DEAD); KEY_FREESAV(&sav); } } sah->state = SADB_SASTATE_DEAD; } SAHTREE_UNLOCK(); if (m->m_len < sizeof(struct sadb_msg) || sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); return key_senderror(so, m, ENOBUFS); } if (m->m_next) m_freem(m->m_next); m->m_next = NULL; m->m_pkthdr.len = m->m_len = sizeof(struct sadb_msg); newmsg = mtod(m, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len); return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } /* * SADB_DUMP processing * dump all entries including status of DEAD in SAD. * receive * * from the ikmpd, and dump all secasvar leaves * and send, * ..... * to the ikmpd. * * m will always be freed. */ static int key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { struct secashead *sah; struct secasvar *sav; u_int16_t proto; u_int stateidx; u_int8_t satype; u_int8_t state; int cnt; struct sadb_msg *newmsg; struct mbuf *n; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); return key_senderror(so, m, EINVAL); } /* count sav entries to be sent to the userland. */ cnt = 0; SAHTREE_LOCK(); LIST_FOREACH(sah, &V_sahtree, chain) { if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && proto != sah->saidx.proto) continue; for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_any); stateidx++) { state = saorder_state_any[stateidx]; LIST_FOREACH(sav, &sah->savtree[state], chain) { cnt++; } } } if (cnt == 0) { SAHTREE_UNLOCK(); return key_senderror(so, m, ENOENT); } /* send this to the userland, one at a time. */ newmsg = NULL; LIST_FOREACH(sah, &V_sahtree, chain) { if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && proto != sah->saidx.proto) continue; /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { SAHTREE_UNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_any); stateidx++) { state = saorder_state_any[stateidx]; LIST_FOREACH(sav, &sah->savtree[state], chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, --cnt, mhp->msg->sadb_msg_pid); if (!n) { SAHTREE_UNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } } SAHTREE_UNLOCK(); m_freem(m); return 0; } /* * SADB_X_PROMISC processing * * m will always be freed. */ static int key_promisc(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { int olen; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); olen = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len); if (olen < sizeof(struct sadb_msg)) { #if 1 return key_senderror(so, m, EINVAL); #else m_freem(m); return 0; #endif } else if (olen == sizeof(struct sadb_msg)) { /* enable/disable promisc mode */ struct keycb *kp; if ((kp = (struct keycb *)sotorawcb(so)) == NULL) return key_senderror(so, m, EINVAL); mhp->msg->sadb_msg_errno = 0; switch (mhp->msg->sadb_msg_satype) { case 0: case 1: kp->kp_promisc = mhp->msg->sadb_msg_satype; break; default: return key_senderror(so, m, EINVAL); } /* send the original message back to everyone */ mhp->msg->sadb_msg_errno = 0; return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } else { /* send packet as is */ m_adj(m, PFKEY_ALIGN8(sizeof(struct sadb_msg))); /* TODO: if sadb_msg_seq is specified, send to specific pid */ return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } } static int (*key_typesw[])(struct socket *, struct mbuf *, const struct sadb_msghdr *) = { NULL, /* SADB_RESERVED */ key_getspi, /* SADB_GETSPI */ key_update, /* SADB_UPDATE */ key_add, /* SADB_ADD */ key_delete, /* SADB_DELETE */ key_get, /* SADB_GET */ key_acquire2, /* SADB_ACQUIRE */ key_register, /* SADB_REGISTER */ NULL, /* SADB_EXPIRE */ key_flush, /* SADB_FLUSH */ key_dump, /* SADB_DUMP */ key_promisc, /* SADB_X_PROMISC */ NULL, /* SADB_X_PCHANGE */ key_spdadd, /* SADB_X_SPDUPDATE */ key_spdadd, /* SADB_X_SPDADD */ key_spddelete, /* SADB_X_SPDDELETE */ key_spdget, /* SADB_X_SPDGET */ NULL, /* SADB_X_SPDACQUIRE */ key_spddump, /* SADB_X_SPDDUMP */ key_spdflush, /* SADB_X_SPDFLUSH */ key_spdadd, /* SADB_X_SPDSETIDX */ NULL, /* SADB_X_SPDEXPIRE */ key_spddelete2, /* SADB_X_SPDDELETE2 */ }; /* * parse sadb_msg buffer to process PFKEYv2, * and create a data to response if needed. * I think to be dealed with mbuf directly. * IN: * msgp : pointer to pointer to a received buffer pulluped. * This is rewrited to response. * so : pointer to socket. * OUT: * length for buffer to send to user process. */ int key_parse(struct mbuf *m, struct socket *so) { struct sadb_msg *msg; struct sadb_msghdr mh; u_int orglen; int error; int target; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); #if 0 /*kdebug_sadb assumes msg in linear buffer*/ KEYDEBUG(KEYDEBUG_KEY_DUMP, ipseclog((LOG_DEBUG, "%s: passed sadb_msg\n", __func__)); kdebug_sadb(msg)); #endif if (m->m_len < sizeof(struct sadb_msg)) { m = m_pullup(m, sizeof(struct sadb_msg)); if (!m) return ENOBUFS; } msg = mtod(m, struct sadb_msg *); orglen = PFKEY_UNUNIT64(msg->sadb_msg_len); target = KEY_SENDUP_ONE; if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len != m->m_pkthdr.len) { ipseclog((LOG_DEBUG, "%s: invalid message length.\n",__func__)); PFKEYSTAT_INC(out_invlen); error = EINVAL; goto senderror; } if (msg->sadb_msg_version != PF_KEY_V2) { ipseclog((LOG_DEBUG, "%s: PF_KEY version %u is mismatched.\n", __func__, msg->sadb_msg_version)); PFKEYSTAT_INC(out_invver); error = EINVAL; goto senderror; } if (msg->sadb_msg_type > SADB_MAX) { ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n", __func__, msg->sadb_msg_type)); PFKEYSTAT_INC(out_invmsgtype); error = EINVAL; goto senderror; } /* for old-fashioned code - should be nuked */ if (m->m_pkthdr.len > MCLBYTES) { m_freem(m); return ENOBUFS; } if (m->m_next) { struct mbuf *n; MGETHDR(n, M_NOWAIT, MT_DATA); if (n && m->m_pkthdr.len > MHLEN) { if (!(MCLGET(n, M_NOWAIT))) { m_free(n); n = NULL; } } if (!n) { m_freem(m); return ENOBUFS; } m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; n->m_next = NULL; m_freem(m); m = n; } /* align the mbuf chain so that extensions are in contiguous region. */ error = key_align(m, &mh); if (error) return error; msg = mh.msg; /* check SA type */ switch (msg->sadb_msg_satype) { case SADB_SATYPE_UNSPEC: switch (msg->sadb_msg_type) { case SADB_GETSPI: case SADB_UPDATE: case SADB_ADD: case SADB_DELETE: case SADB_GET: case SADB_ACQUIRE: case SADB_EXPIRE: ipseclog((LOG_DEBUG, "%s: must specify satype " "when msg type=%u.\n", __func__, msg->sadb_msg_type)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } break; case SADB_SATYPE_AH: case SADB_SATYPE_ESP: case SADB_X_SATYPE_IPCOMP: case SADB_X_SATYPE_TCPSIGNATURE: switch (msg->sadb_msg_type) { case SADB_X_SPDADD: case SADB_X_SPDDELETE: case SADB_X_SPDGET: case SADB_X_SPDDUMP: case SADB_X_SPDFLUSH: case SADB_X_SPDSETIDX: case SADB_X_SPDUPDATE: case SADB_X_SPDDELETE2: ipseclog((LOG_DEBUG, "%s: illegal satype=%u\n", __func__, msg->sadb_msg_type)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } break; case SADB_SATYPE_RSVP: case SADB_SATYPE_OSPFV2: case SADB_SATYPE_RIPV2: case SADB_SATYPE_MIP: ipseclog((LOG_DEBUG, "%s: type %u isn't supported.\n", __func__, msg->sadb_msg_satype)); PFKEYSTAT_INC(out_invsatype); error = EOPNOTSUPP; goto senderror; case 1: /* XXX: What does it do? */ if (msg->sadb_msg_type == SADB_X_PROMISC) break; /*FALLTHROUGH*/ default: ipseclog((LOG_DEBUG, "%s: invalid type %u is passed.\n", __func__, msg->sadb_msg_satype)); PFKEYSTAT_INC(out_invsatype); error = EINVAL; goto senderror; } /* check field of upper layer protocol and address family */ if (mh.ext[SADB_EXT_ADDRESS_SRC] != NULL && mh.ext[SADB_EXT_ADDRESS_DST] != NULL) { struct sadb_address *src0, *dst0; u_int plen; src0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_DST]); /* check upper layer protocol */ if (src0->sadb_address_proto != dst0->sadb_address_proto) { ipseclog((LOG_DEBUG, "%s: upper layer protocol " "mismatched.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } /* check family */ if (PFKEY_ADDR_SADDR(src0)->sa_family != PFKEY_ADDR_SADDR(dst0)->sa_family) { ipseclog((LOG_DEBUG, "%s: address family mismatched.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } if (PFKEY_ADDR_SADDR(src0)->sa_len != PFKEY_ADDR_SADDR(dst0)->sa_len) { ipseclog((LOG_DEBUG, "%s: address struct size " "mismatched.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } switch (PFKEY_ADDR_SADDR(src0)->sa_family) { case AF_INET: if (PFKEY_ADDR_SADDR(src0)->sa_len != sizeof(struct sockaddr_in)) { PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } break; case AF_INET6: if (PFKEY_ADDR_SADDR(src0)->sa_len != sizeof(struct sockaddr_in6)) { PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } break; default: ipseclog((LOG_DEBUG, "%s: unsupported address family\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EAFNOSUPPORT; goto senderror; } switch (PFKEY_ADDR_SADDR(src0)->sa_family) { case AF_INET: plen = sizeof(struct in_addr) << 3; break; case AF_INET6: plen = sizeof(struct in6_addr) << 3; break; default: plen = 0; /*fool gcc*/ break; } /* check max prefix length */ if (src0->sadb_address_prefixlen > plen || dst0->sadb_address_prefixlen > plen) { ipseclog((LOG_DEBUG, "%s: illegal prefixlen.\n", __func__)); PFKEYSTAT_INC(out_invaddr); error = EINVAL; goto senderror; } /* * prefixlen == 0 is valid because there can be a case when * all addresses are matched. */ } if (msg->sadb_msg_type >= sizeof(key_typesw)/sizeof(key_typesw[0]) || key_typesw[msg->sadb_msg_type] == NULL) { PFKEYSTAT_INC(out_invmsgtype); error = EINVAL; goto senderror; } return (*key_typesw[msg->sadb_msg_type])(so, m, &mh); senderror: msg->sadb_msg_errno = error; return key_sendup_mbuf(so, m, target); } static int key_senderror(struct socket *so, struct mbuf *m, int code) { struct sadb_msg *msg; IPSEC_ASSERT(m->m_len >= sizeof(struct sadb_msg), ("mbuf too small, len %u", m->m_len)); msg = mtod(m, struct sadb_msg *); msg->sadb_msg_errno = code; return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); } /* * set the pointer to each header into message buffer. * m will be freed on error. * XXX larger-than-MCLBYTES extension? */ static int key_align(struct mbuf *m, struct sadb_msghdr *mhp) { struct mbuf *n; struct sadb_ext *ext; size_t off, end; int extlen; int toff; IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(m->m_len >= sizeof(struct sadb_msg), ("mbuf too small, len %u", m->m_len)); /* initialize */ bzero(mhp, sizeof(*mhp)); mhp->msg = mtod(m, struct sadb_msg *); mhp->ext[0] = (struct sadb_ext *)mhp->msg; /*XXX backward compat */ end = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len); extlen = end; /*just in case extlen is not updated*/ for (off = sizeof(struct sadb_msg); off < end; off += extlen) { n = m_pulldown(m, off, sizeof(struct sadb_ext), &toff); if (!n) { /* m is already freed */ return ENOBUFS; } ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff); /* set pointer */ switch (ext->sadb_ext_type) { case SADB_EXT_SA: case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_LIFETIME_CURRENT: case SADB_EXT_LIFETIME_HARD: case SADB_EXT_LIFETIME_SOFT: case SADB_EXT_KEY_AUTH: case SADB_EXT_KEY_ENCRYPT: case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: case SADB_EXT_SENSITIVITY: case SADB_EXT_PROPOSAL: case SADB_EXT_SUPPORTED_AUTH: case SADB_EXT_SUPPORTED_ENCRYPT: case SADB_EXT_SPIRANGE: case SADB_X_EXT_POLICY: case SADB_X_EXT_SA2: #ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: case SADB_X_EXT_NAT_T_SPORT: case SADB_X_EXT_NAT_T_DPORT: case SADB_X_EXT_NAT_T_OAI: case SADB_X_EXT_NAT_T_OAR: case SADB_X_EXT_NAT_T_FRAG: #endif /* duplicate check */ /* * XXX Are there duplication payloads of either * KEY_AUTH or KEY_ENCRYPT ? */ if (mhp->ext[ext->sadb_ext_type] != NULL) { ipseclog((LOG_DEBUG, "%s: duplicate ext_type " "%u\n", __func__, ext->sadb_ext_type)); m_freem(m); PFKEYSTAT_INC(out_dupext); return EINVAL; } break; default: ipseclog((LOG_DEBUG, "%s: invalid ext_type %u\n", __func__, ext->sadb_ext_type)); m_freem(m); PFKEYSTAT_INC(out_invexttype); return EINVAL; } extlen = PFKEY_UNUNIT64(ext->sadb_ext_len); if (key_validate_ext(ext, extlen)) { m_freem(m); PFKEYSTAT_INC(out_invlen); return EINVAL; } n = m_pulldown(m, off, extlen, &toff); if (!n) { /* m is already freed */ return ENOBUFS; } ext = (struct sadb_ext *)(mtod(n, caddr_t) + toff); mhp->ext[ext->sadb_ext_type] = ext; mhp->extoff[ext->sadb_ext_type] = off; mhp->extlen[ext->sadb_ext_type] = extlen; } if (off != end) { m_freem(m); PFKEYSTAT_INC(out_invlen); return EINVAL; } return 0; } static int key_validate_ext(const struct sadb_ext *ext, int len) { const struct sockaddr *sa; enum { NONE, ADDR } checktype = NONE; int baselen = 0; const int sal = offsetof(struct sockaddr, sa_len) + sizeof(sa->sa_len); if (len != PFKEY_UNUNIT64(ext->sadb_ext_len)) return EINVAL; /* if it does not match minimum/maximum length, bail */ if (ext->sadb_ext_type >= sizeof(minsize) / sizeof(minsize[0]) || ext->sadb_ext_type >= sizeof(maxsize) / sizeof(maxsize[0])) return EINVAL; if (!minsize[ext->sadb_ext_type] || len < minsize[ext->sadb_ext_type]) return EINVAL; if (maxsize[ext->sadb_ext_type] && len > maxsize[ext->sadb_ext_type]) return EINVAL; /* more checks based on sadb_ext_type XXX need more */ switch (ext->sadb_ext_type) { case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: baselen = PFKEY_ALIGN8(sizeof(struct sadb_address)); checktype = ADDR; break; case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: if (((const struct sadb_ident *)ext)->sadb_ident_type == SADB_X_IDENTTYPE_ADDR) { baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident)); checktype = ADDR; } else checktype = NONE; break; default: checktype = NONE; break; } switch (checktype) { case NONE: break; case ADDR: sa = (const struct sockaddr *)(((const u_int8_t*)ext)+baselen); if (len < baselen + sal) return EINVAL; if (baselen + PFKEY_ALIGN8(sa->sa_len) != len) return EINVAL; break; } return 0; } void key_init(void) { int i; for (i = 0; i < IPSEC_DIR_MAX; i++) TAILQ_INIT(&V_sptree[i]); LIST_INIT(&V_sahtree); for (i = 0; i <= SADB_SATYPE_MAX; i++) LIST_INIT(&V_regtree[i]); LIST_INIT(&V_acqtree); LIST_INIT(&V_spacqtree); if (!IS_DEFAULT_VNET(curvnet)) return; SPTREE_LOCK_INIT(); REGTREE_LOCK_INIT(); SAHTREE_LOCK_INIT(); ACQ_LOCK_INIT(); SPACQ_LOCK_INIT(); #ifndef IPSEC_DEBUG2 callout_init(&key_timer, 1); callout_reset(&key_timer, hz, key_timehandler, NULL); #endif /*IPSEC_DEBUG2*/ /* initialize key statistics */ keystat.getspi_count = 1; printf("IPsec: Initialized Security Association Processing.\n"); } #ifdef VIMAGE void key_destroy(void) { TAILQ_HEAD(, secpolicy) drainq; struct secpolicy *sp, *nextsp; struct secacq *acq, *nextacq; struct secspacq *spacq, *nextspacq; struct secashead *sah, *nextsah; struct secreg *reg; int i; TAILQ_INIT(&drainq); SPTREE_WLOCK(); for (i = 0; i < IPSEC_DIR_MAX; i++) { TAILQ_CONCAT(&drainq, &V_sptree[i], chain); } SPTREE_WUNLOCK(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); KEY_FREESP(&sp); sp = nextsp; } SAHTREE_LOCK(); for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) { nextsah = LIST_NEXT(sah, chain); if (__LIST_CHAINED(sah)) { LIST_REMOVE(sah, chain); free(sah, M_IPSEC_SAH); } } SAHTREE_UNLOCK(); REGTREE_LOCK(); for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_FOREACH(reg, &V_regtree[i], chain) { if (__LIST_CHAINED(reg)) { LIST_REMOVE(reg, chain); free(reg, M_IPSEC_SAR); break; } } } REGTREE_UNLOCK(); ACQ_LOCK(); for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { nextacq = LIST_NEXT(acq, chain); if (__LIST_CHAINED(acq)) { LIST_REMOVE(acq, chain); free(acq, M_IPSEC_SAQ); } } ACQ_UNLOCK(); SPACQ_LOCK(); for (spacq = LIST_FIRST(&V_spacqtree); spacq != NULL; spacq = nextspacq) { nextspacq = LIST_NEXT(spacq, chain); if (__LIST_CHAINED(spacq)) { LIST_REMOVE(spacq, chain); free(spacq, M_IPSEC_SAQ); } } SPACQ_UNLOCK(); } #endif /* * XXX: maybe This function is called after INBOUND IPsec processing. * * Special check for tunnel-mode packets. * We must make some checks for consistency between inner and outer IP header. * * xxx more checks to be provided */ int key_checktunnelsanity(struct secasvar *sav, u_int family, caddr_t src, caddr_t dst) { IPSEC_ASSERT(sav->sah != NULL, ("null SA header")); /* XXX: check inner IP header */ return 1; } /* record data transfer on SA, and update timestamps */ void key_sa_recordxfer(struct secasvar *sav, struct mbuf *m) { IPSEC_ASSERT(sav != NULL, ("Null secasvar")); IPSEC_ASSERT(m != NULL, ("Null mbuf")); if (!sav->lft_c) return; /* * XXX Currently, there is a difference of bytes size * between inbound and outbound processing. */ sav->lft_c->bytes += m->m_pkthdr.len; /* to check bytes lifetime is done in key_timehandler(). */ /* * We use the number of packets as the unit of * allocations. We increment the variable * whenever {esp,ah}_{in,out}put is called. */ sav->lft_c->allocations++; /* XXX check for expires? */ /* * NOTE: We record CURRENT usetime by using wall clock, * in seconds. HARD and SOFT lifetime are measured by the time * difference (again in seconds) from usetime. * * usetime * v expire expire * -----+-----+--------+---> t * <--------------> HARD * <-----> SOFT */ sav->lft_c->usetime = time_second; /* XXX check for expires? */ return; } static void key_sa_chgstate(struct secasvar *sav, u_int8_t state) { IPSEC_ASSERT(sav != NULL, ("NULL sav")); SAHTREE_LOCK_ASSERT(); if (sav->state != state) { if (__LIST_CHAINED(sav)) LIST_REMOVE(sav, chain); sav->state = state; LIST_INSERT_HEAD(&sav->sah->savtree[state], sav, chain); } } /* * Take one of the kernel's security keys and convert it into a PF_KEY * structure within an mbuf, suitable for sending up to a waiting * application in user land. * * IN: * src: A pointer to a kernel security key. * exttype: Which type of key this is. Refer to the PF_KEY data structures. * OUT: * a valid mbuf or NULL indicating an error * */ static struct mbuf * key_setkey(struct seckey *src, u_int16_t exttype) { struct mbuf *m; struct sadb_key *p; int len; if (src == NULL) return NULL; len = PFKEY_ALIGN8(sizeof(struct sadb_key) + _KEYLEN(src)); m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return NULL; m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_key *); bzero(p, len); p->sadb_key_len = PFKEY_UNIT64(len); p->sadb_key_exttype = exttype; p->sadb_key_bits = src->bits; bcopy(src->key_data, _KEYBUF(p), _KEYLEN(src)); return m; } /* * Take one of the kernel's lifetime data structures and convert it * into a PF_KEY structure within an mbuf, suitable for sending up to * a waiting application in user land. * * IN: * src: A pointer to a kernel lifetime structure. * exttype: Which type of lifetime this is. Refer to the PF_KEY * data structures for more information. * OUT: * a valid mbuf or NULL indicating an error * */ static struct mbuf * key_setlifetime(struct seclifetime *src, u_int16_t exttype) { struct mbuf *m = NULL; struct sadb_lifetime *p; int len = PFKEY_ALIGN8(sizeof(struct sadb_lifetime)); if (src == NULL) return NULL; m = m_get2(len, M_NOWAIT, MT_DATA, 0); if (m == NULL) return m; m_align(m, len); m->m_len = len; p = mtod(m, struct sadb_lifetime *); bzero(p, len); p->sadb_lifetime_len = PFKEY_UNIT64(len); p->sadb_lifetime_exttype = exttype; p->sadb_lifetime_allocations = src->allocations; p->sadb_lifetime_bytes = src->bytes; p->sadb_lifetime_addtime = src->addtime; p->sadb_lifetime_usetime = src->usetime; return m; } Index: projects/powernv/netipsec/xform_esp.c =================================================================== --- projects/powernv/netipsec/xform_esp.c (revision 290990) +++ projects/powernv/netipsec/xform_esp.c (revision 290991) @@ -1,1026 +1,1020 @@ /* $FreeBSD$ */ /* $OpenBSD: ip_esp.c,v 1.69 2001/06/26 06:18:59 angelos Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * * The original version of this code was written by John Ioannidis * for BSD/OS in Athens, Greece, in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Additional features in 1999 by Angelos D. Keromytis. * * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 2001 Angelos D. Keromytis. * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #endif #include #include #include #include VNET_DEFINE(int, esp_enable) = 1; VNET_PCPUSTAT_DEFINE(struct espstat, espstat); VNET_PCPUSTAT_SYSINIT(espstat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(espstat); #endif /* VIMAGE */ SYSCTL_DECL(_net_inet_esp); SYSCTL_INT(_net_inet_esp, OID_AUTO, esp_enable, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(esp_enable), 0, ""); SYSCTL_VNET_PCPUSTAT(_net_inet_esp, IPSECCTL_STATS, stats, struct espstat, espstat, "ESP statistics (struct espstat, netipsec/esp_var.h"); static int esp_input_cb(struct cryptop *op); static int esp_output_cb(struct cryptop *crp); /* * NB: this is public for use by the PF_KEY support. * NB: if you add support here; be sure to add code to esp_attach below! */ struct enc_xform * esp_algorithm_lookup(int alg) { if (alg >= ESP_ALG_MAX) return NULL; switch (alg) { case SADB_EALG_DESCBC: return &enc_xform_des; case SADB_EALG_3DESCBC: return &enc_xform_3des; case SADB_X_EALG_AES: return &enc_xform_rijndael128; case SADB_X_EALG_BLOWFISHCBC: return &enc_xform_blf; case SADB_X_EALG_CAST128CBC: return &enc_xform_cast5; case SADB_EALG_NULL: return &enc_xform_null; case SADB_X_EALG_CAMELLIACBC: return &enc_xform_camellia; case SADB_X_EALG_AESCTR: return &enc_xform_aes_icm; case SADB_X_EALG_AESGCM16: return &enc_xform_aes_nist_gcm; case SADB_X_EALG_AESGMAC: return &enc_xform_aes_nist_gmac; } return NULL; } size_t esp_hdrsiz(struct secasvar *sav) { size_t size; if (sav != NULL) { /*XXX not right for null algorithm--does it matter??*/ IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("SA with null xform")); if (sav->flags & SADB_X_EXT_OLD) size = sizeof (struct esp); else size = sizeof (struct newesp); size += sav->tdb_encalgxform->blocksize + 9; /*XXX need alg check???*/ if (sav->tdb_authalgxform != NULL && sav->replay) size += ah_hdrsiz(sav); } else { /* * base header size * + max iv length for CBC mode * + max pad length * + sizeof (pad length field) * + sizeof (next header field) * + max icv supported. */ size = sizeof (struct newesp) + EALG_MAX_BLOCK_LEN + 9 + 16; } return size; } /* * esp_init() is called when an SPI is being set up. */ static int esp_init(struct secasvar *sav, struct xformsw *xsp) { struct enc_xform *txform; struct cryptoini cria, crie; int keylen; int error; txform = esp_algorithm_lookup(sav->alg_enc); if (txform == NULL) { DPRINTF(("%s: unsupported encryption algorithm %d\n", __func__, sav->alg_enc)); return EINVAL; } if (sav->key_enc == NULL) { DPRINTF(("%s: no encoding key for %s algorithm\n", __func__, txform->name)); return EINVAL; } if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_IV4B)) == SADB_X_EXT_IV4B) { DPRINTF(("%s: 4-byte IV not supported with protocol\n", __func__)); return EINVAL; } /* subtract off the salt, RFC4106, 8.1 and RFC3686, 5.1 */ keylen = _KEYLEN(sav->key_enc) - SAV_ISCTRORGCM(sav) * 4; if (txform->minkey > keylen || keylen > txform->maxkey) { DPRINTF(("%s: invalid key length %u, must be in the range " "[%u..%u] for algorithm %s\n", __func__, keylen, txform->minkey, txform->maxkey, txform->name)); return EINVAL; } - /* - * NB: The null xform needs a non-zero blocksize to keep the - * crypto code happy but if we use it to set ivlen then - * the ESP header will be processed incorrectly. The - * compromise is to force it to zero here. - */ if (SAV_ISCTRORGCM(sav)) sav->ivlen = 8; /* RFC4106 3.1 and RFC3686 3.1 */ else - sav->ivlen = (txform == &enc_xform_null ? 0 : txform->ivsize); + sav->ivlen = txform->ivsize; /* * Setup AH-related state. */ if (sav->alg_auth != 0) { error = ah_init0(sav, xsp, &cria); if (error) return error; } /* NB: override anything set in ah_init0 */ sav->tdb_xform = xsp; sav->tdb_encalgxform = txform; /* * Whenever AES-GCM is used for encryption, one * of the AES authentication algorithms is chosen * as well, based on the key size. */ if (sav->alg_enc == SADB_X_EALG_AESGCM16) { switch (keylen) { case AES_128_GMAC_KEY_LEN: sav->alg_auth = SADB_X_AALG_AES128GMAC; sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_128; break; case AES_192_GMAC_KEY_LEN: sav->alg_auth = SADB_X_AALG_AES192GMAC; sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_192; break; case AES_256_GMAC_KEY_LEN: sav->alg_auth = SADB_X_AALG_AES256GMAC; sav->tdb_authalgxform = &auth_hash_nist_gmac_aes_256; break; default: DPRINTF(("%s: invalid key length %u" "for algorithm %s\n", __func__, keylen, txform->name)); return EINVAL; } bzero(&cria, sizeof(cria)); cria.cri_alg = sav->tdb_authalgxform->type; cria.cri_key = sav->key_enc->key_data; cria.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISGCM(sav) * 32; } /* Initialize crypto session. */ bzero(&crie, sizeof(crie)); crie.cri_alg = sav->tdb_encalgxform->type; crie.cri_key = sav->key_enc->key_data; crie.cri_klen = _KEYBITS(sav->key_enc) - SAV_ISCTRORGCM(sav) * 32; if (sav->tdb_authalgxform && sav->tdb_encalgxform) { /* init both auth & enc */ crie.cri_next = &cria; error = crypto_newsession(&sav->tdb_cryptoid, &crie, V_crypto_support); } else if (sav->tdb_encalgxform) { error = crypto_newsession(&sav->tdb_cryptoid, &crie, V_crypto_support); } else if (sav->tdb_authalgxform) { error = crypto_newsession(&sav->tdb_cryptoid, &cria, V_crypto_support); } else { /* XXX cannot happen? */ DPRINTF(("%s: no encoding OR authentication xform!\n", __func__)); error = EINVAL; } return error; } /* * Paranoia. */ static int esp_zeroize(struct secasvar *sav) { /* NB: ah_zerorize free's the crypto session state */ int error = ah_zeroize(sav); if (sav->key_enc) bzero(sav->key_enc->key_data, _KEYLEN(sav->key_enc)); sav->tdb_encalgxform = NULL; sav->tdb_xform = NULL; return error; } /* * ESP input processing, called (eventually) through the protocol switch. */ static int esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { char buf[128]; struct auth_hash *esph; struct enc_xform *espx; struct tdb_crypto *tc; uint8_t *ivp; int plen, alen, hlen; struct newesp *esp; struct cryptodesc *crde; struct cryptop *crp; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("null encoding xform")); /* Valid IP Packet length ? */ if ( (skip&3) || (m->m_pkthdr.len&3) ){ DPRINTF(("%s: misaligned packet, skip %u pkt len %u", __func__, skip, m->m_pkthdr.len)); ESPSTAT_INC(esps_badilen); m_freem(m); return EINVAL; } /* XXX don't pullup, just copy header */ IP6_EXTHDR_GET(esp, struct newesp *, m, skip, sizeof (struct newesp)); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; /* Determine the ESP header and auth length */ if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; alen = xform_ah_authsize(esph); /* * Verify payload length is multiple of encryption algorithm * block size. * * NB: This works for the null algorithm because the blocksize * is 4 and all packets must be 4-byte aligned regardless * of the algorithm. */ plen = m->m_pkthdr.len - (skip + hlen + alen); if ((plen & (espx->blocksize - 1)) || (plen <= 0)) { DPRINTF(("%s: payload of %d octets not a multiple of %d octets," " SA %s/%08lx\n", __func__, plen, espx->blocksize, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long)ntohl(sav->spi))); ESPSTAT_INC(esps_badilen); m_freem(m); return EINVAL; } /* * Check sequence number. */ if (esph != NULL && sav->replay != NULL && !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) { DPRINTF(("%s: packet replay check for %s\n", __func__, ipsec_logsastr(sav, buf, sizeof(buf)))); /*XXX*/ ESPSTAT_INC(esps_replay); m_freem(m); return ENOBUFS; /*XXX*/ } /* Update the counters */ ESPSTAT_ADD(esps_ibytes, m->m_pkthdr.len - (skip + hlen + alen)); /* Get crypto descriptors */ crp = crypto_getreq(esph && espx ? 2 : 1); if (crp == NULL) { DPRINTF(("%s: failed to acquire crypto descriptors\n", __func__)); ESPSTAT_INC(esps_crypto); m_freem(m); return ENOBUFS; } /* Get IPsec-specific opaque pointer */ tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto) + alen, M_XDATA, M_NOWAIT | M_ZERO); if (tc == NULL) { crypto_freereq(crp); DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); ESPSTAT_INC(esps_crypto); m_freem(m); return ENOBUFS; } if (esph != NULL) { struct cryptodesc *crda = crp->crp_desc; IPSEC_ASSERT(crda != NULL, ("null ah crypto descriptor")); /* Authentication descriptor */ crda->crd_skip = skip; if (SAV_ISGCM(sav)) crda->crd_len = 8; /* RFC4106 5, SPI + SN */ else crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; crda->crd_alg = esph->type; /* Copy the authenticator */ m_copydata(m, m->m_pkthdr.len - alen, alen, (caddr_t) (tc + 1)); /* Chain authentication request */ crde = crda->crd_next; } else { crde = crp->crp_desc; } /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_input_cb; crp->crp_sid = sav->tdb_cryptoid; crp->crp_opaque = (caddr_t) tc; /* These are passed as-is to the callback */ tc->tc_spi = sav->spi; tc->tc_dst = sav->sah->saidx.dst; tc->tc_proto = sav->sah->saidx.proto; tc->tc_protoff = protoff; tc->tc_skip = skip; KEY_ADDREFSA(sav); tc->tc_sav = sav; /* Decryption descriptor */ IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor")); crde->crd_skip = skip + hlen; crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); crde->crd_inject = skip + hlen - sav->ivlen; if (SAV_ISCTRORGCM(sav)) { ivp = &crde->crd_iv[0]; /* GCM IV Format: RFC4106 4 */ /* CTR IV Format: RFC3686 4 */ /* Salt is last four bytes of key, RFC4106 8.1 */ /* Nonce is last four bytes of key, RFC3686 5.1 */ memcpy(ivp, sav->key_enc->key_data + _KEYLEN(sav->key_enc) - 4, 4); if (SAV_ISCTR(sav)) { /* Initial block counter is 1, RFC3686 4 */ be32enc(&ivp[sav->ivlen + 4], 1); } m_copydata(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); crde->crd_flags |= CRD_F_IV_EXPLICIT; } crde->crd_alg = espx->type; return (crypto_dispatch(crp)); } /* * ESP input callback from the crypto driver. */ static int esp_input_cb(struct cryptop *crp) { char buf[128]; u_int8_t lastthree[3], aalg[AH_HMAC_MAXHASHLEN]; int hlen, skip, protoff, error, alen; struct mbuf *m; struct cryptodesc *crd; struct auth_hash *esph; struct enc_xform *espx; struct tdb_crypto *tc; struct secasvar *sav; struct secasindex *saidx; caddr_t ptr; crd = crp->crp_desc; IPSEC_ASSERT(crd != NULL, ("null crypto descriptor!")); tc = (struct tdb_crypto *) crp->crp_opaque; IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); skip = tc->tc_skip; protoff = tc->tc_protoff; m = (struct mbuf *) crp->crp_buf; sav = tc->tc_sav; IPSEC_ASSERT(sav != NULL, ("null SA!")); saidx = &sav->sah->saidx; IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || saidx->dst.sa.sa_family == AF_INET6, ("unexpected protocol family %u", saidx->dst.sa.sa_family)); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; /* Check for crypto errors */ if (crp->crp_etype) { /* Reset the session ID */ if (sav->tdb_cryptoid != 0) sav->tdb_cryptoid = crp->crp_sid; if (crp->crp_etype == EAGAIN) return (crypto_dispatch(crp)); ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; goto bad; } /* Shouldn't happen... */ if (m == NULL) { ESPSTAT_INC(esps_crypto); DPRINTF(("%s: bogus returned buffer from crypto\n", __func__)); error = EINVAL; goto bad; } ESPSTAT_INC(esps_hist[sav->alg_enc]); /* If authentication was performed, check now. */ if (esph != NULL) { alen = xform_ah_authsize(esph); AHSTAT_INC(ahs_hist[sav->alg_auth]); /* Copy the authenticator from the packet */ m_copydata(m, m->m_pkthdr.len - alen, alen, aalg); ptr = (caddr_t) (tc + 1); /* Verify authenticator */ if (timingsafe_bcmp(ptr, aalg, alen) != 0) { DPRINTF(("%s: authentication hash mismatch for " "packet in SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_badauth); error = EACCES; goto bad; } /* Remove trailing authenticator */ m_adj(m, -alen); } /* Release the crypto descriptors */ free(tc, M_XDATA), tc = NULL; crypto_freereq(crp), crp = NULL; /* * Packet is now decrypted. */ m->m_flags |= M_DECRYPTED; /* * Update replay sequence number, if appropriate. */ if (sav->replay) { u_int32_t seq; m_copydata(m, skip + offsetof(struct newesp, esp_seq), sizeof (seq), (caddr_t) &seq); if (ipsec_updatereplay(ntohl(seq), sav)) { DPRINTF(("%s: packet replay check for %s\n", __func__, ipsec_logsastr(sav, buf, sizeof(buf)))); ESPSTAT_INC(esps_replay); error = ENOBUFS; goto bad; } } /* Determine the ESP header length */ if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; /* Remove the ESP header and IV from the mbuf. */ error = m_striphdr(m, skip, hlen); if (error) { ESPSTAT_INC(esps_hdrops); DPRINTF(("%s: bad mbuf chain, SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); goto bad; } /* Save the last three bytes of decrypted data */ m_copydata(m, m->m_pkthdr.len - 3, 3, lastthree); /* Verify pad length */ if (lastthree[1] + 2 > m->m_pkthdr.len - skip) { ESPSTAT_INC(esps_badilen); DPRINTF(("%s: invalid padding length %d for %u byte packet " "in SA %s/%08lx\n", __func__, lastthree[1], m->m_pkthdr.len - skip, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } /* Verify correct decryption by checking the last padding bytes */ if ((sav->flags & SADB_X_EXT_PMASK) != SADB_X_EXT_PRAND) { if (lastthree[1] != lastthree[0] && lastthree[1] != 0) { ESPSTAT_INC(esps_badenc); DPRINTF(("%s: decryption failed for packet in " "SA %s/%08lx\n", __func__, ipsec_address( &sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = EINVAL; goto bad; } } /* Trim the mbuf chain to remove trailing authenticator and padding */ m_adj(m, -(lastthree[1] + 2)); /* Restore the Next Protocol field */ m_copyback(m, protoff, sizeof (u_int8_t), lastthree + 2); switch (saidx->dst.sa.sa_family) { #ifdef INET6 case AF_INET6: error = ipsec6_common_input_cb(m, sav, skip, protoff); break; #endif #ifdef INET case AF_INET: error = ipsec4_common_input_cb(m, sav, skip, protoff); break; #endif default: panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } KEY_FREESAV(&sav); return error; bad: if (sav) KEY_FREESAV(&sav); if (m != NULL) m_freem(m); if (tc != NULL) free(tc, M_XDATA); if (crp != NULL) crypto_freereq(crp); return error; } /* * ESP output routine, called by ipsec[46]_process_packet(). */ static int esp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, int skip, int protoff) { char buf[INET6_ADDRSTRLEN]; struct enc_xform *espx; struct auth_hash *esph; uint8_t *ivp; uint64_t cntr; int hlen, rlen, padding, blks, alen, i, roff; struct mbuf *mo = (struct mbuf *) NULL; struct tdb_crypto *tc; struct secasvar *sav; struct secasindex *saidx; unsigned char *pad; u_int8_t prot; int error, maxpacketsize; struct cryptodesc *crde = NULL, *crda = NULL; struct cryptop *crp; sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; IPSEC_ASSERT(espx != NULL, ("null encoding xform")); if (sav->flags & SADB_X_EXT_OLD) hlen = sizeof (struct esp) + sav->ivlen; else hlen = sizeof (struct newesp) + sav->ivlen; rlen = m->m_pkthdr.len - skip; /* Raw payload length. */ /* * RFC4303 2.4 Requires 4 byte alignment. */ blks = MAX(4, espx->blocksize); /* Cipher blocksize */ /* XXX clamp padding length a la KAME??? */ padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; alen = xform_ah_authsize(esph); ESPSTAT_INC(esps_output); saidx = &sav->sah->saidx; /* Check for maximum packet size violations. */ switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: maxpacketsize = IP_MAXPACKET; break; #endif /* INET */ #ifdef INET6 case AF_INET6: maxpacketsize = IPV6_MAXPACKET; break; #endif /* INET6 */ default: DPRINTF(("%s: unknown/unsupported protocol " "family %d, SA %s/%08lx\n", __func__, saidx->dst.sa.sa_family, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_nopf); error = EPFNOSUPPORT; goto bad; } DPRINTF(("%s: skip %d hlen %d rlen %d padding %d alen %d blksd %d\n", __func__, skip, hlen, rlen, padding, alen, blks)); if (skip + hlen + rlen + padding + alen > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi), skip + hlen + rlen + padding + alen, maxpacketsize)); ESPSTAT_INC(esps_toobig); error = EMSGSIZE; goto bad; } /* Update the counters. */ ESPSTAT_ADD(esps_obytes, m->m_pkthdr.len - skip); m = m_unshare(m, M_NOWAIT); if (m == NULL) { DPRINTF(("%s: cannot clone mbuf chain, SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_hdrops); error = ENOBUFS; goto bad; } /* Inject ESP header. */ mo = m_makespace(m, skip, hlen, &roff); if (mo == NULL) { DPRINTF(("%s: %u byte ESP hdr inject failed for SA %s/%08lx\n", __func__, hlen, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ error = ENOBUFS; goto bad; } /* Initialize ESP header. */ bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, sizeof(u_int32_t)); if (sav->replay) { u_int32_t replay; #ifdef REGRESSION /* Emulate replay attack when ipsec_replay is TRUE. */ if (!V_ipsec_replay) #endif sav->replay->count++; replay = htonl(sav->replay->count); bcopy((caddr_t) &replay, mtod(mo, caddr_t) + roff + sizeof(u_int32_t), sizeof(u_int32_t)); } /* * Add padding -- better to do it ourselves than use the crypto engine, * although if/when we support compression, we'd have to do that. */ pad = (u_char *) m_pad(m, padding + alen); if (pad == NULL) { DPRINTF(("%s: m_pad failed for SA %s/%08lx\n", __func__, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); m = NULL; /* NB: free'd by m_pad */ error = ENOBUFS; goto bad; } /* * Add padding: random, zero, or self-describing. * XXX catch unexpected setting */ switch (sav->flags & SADB_X_EXT_PMASK) { case SADB_X_EXT_PRAND: (void) read_random(pad, padding - 2); break; case SADB_X_EXT_PZERO: bzero(pad, padding - 2); break; case SADB_X_EXT_PSEQ: for (i = 0; i < padding - 2; i++) pad[i] = i+1; break; } /* Fix padding length and Next Protocol in padding itself. */ pad[padding - 2] = padding - 2; m_copydata(m, protoff, sizeof(u_int8_t), pad + padding - 1); /* Fix Next Protocol in IPv4/IPv6 header. */ prot = IPPROTO_ESP; m_copyback(m, protoff, sizeof(u_int8_t), (u_char *) &prot); /* Get crypto descriptors. */ crp = crypto_getreq(esph != NULL ? 2 : 1); if (crp == NULL) { DPRINTF(("%s: failed to acquire crypto descriptors\n", __func__)); ESPSTAT_INC(esps_crypto); error = ENOBUFS; goto bad; } /* IPsec-specific opaque crypto info. */ tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), M_XDATA, M_NOWAIT|M_ZERO); if (tc == NULL) { crypto_freereq(crp); DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); ESPSTAT_INC(esps_crypto); error = ENOBUFS; goto bad; } crde = crp->crp_desc; crda = crde->crd_next; /* Encryption descriptor. */ crde->crd_skip = skip + hlen; crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen); crde->crd_flags = CRD_F_ENCRYPT; crde->crd_inject = skip + hlen - sav->ivlen; /* Encryption operation. */ crde->crd_alg = espx->type; if (SAV_ISCTRORGCM(sav)) { ivp = &crde->crd_iv[0]; /* GCM IV Format: RFC4106 4 */ /* CTR IV Format: RFC3686 4 */ /* Salt is last four bytes of key, RFC4106 8.1 */ /* Nonce is last four bytes of key, RFC3686 5.1 */ memcpy(ivp, sav->key_enc->key_data + _KEYLEN(sav->key_enc) - 4, 4); SECASVAR_LOCK(sav); cntr = sav->cntr++; SECASVAR_UNLOCK(sav); be64enc(&ivp[4], cntr); if (SAV_ISCTR(sav)) { /* Initial block counter is 1, RFC3686 4 */ be32enc(&ivp[sav->ivlen + 4], 1); } m_copyback(m, skip + hlen - sav->ivlen, sav->ivlen, &ivp[4]); crde->crd_flags |= CRD_F_IV_EXPLICIT|CRD_F_IV_PRESENT; } /* Callback parameters */ key_addref(isr->sp); tc->tc_isr = isr; KEY_ADDREFSA(sav); tc->tc_sav = sav; tc->tc_spi = sav->spi; tc->tc_dst = saidx->dst; tc->tc_proto = saidx->proto; /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_output_cb; crp->crp_opaque = (caddr_t) tc; crp->crp_sid = sav->tdb_cryptoid; if (esph) { /* Authentication descriptor. */ crda->crd_alg = esph->type; crda->crd_skip = skip; if (SAV_ISGCM(sav)) crda->crd_len = 8; /* RFC4106 5, SPI + SN */ else crda->crd_len = m->m_pkthdr.len - (skip + alen); crda->crd_inject = m->m_pkthdr.len - alen; } return crypto_dispatch(crp); bad: if (m) m_freem(m); return (error); } /* * ESP output callback from the crypto driver. */ static int esp_output_cb(struct cryptop *crp) { char buf[INET6_ADDRSTRLEN]; struct tdb_crypto *tc; struct ipsecrequest *isr; struct secasvar *sav; struct mbuf *m; int error; tc = (struct tdb_crypto *) crp->crp_opaque; IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); m = (struct mbuf *) crp->crp_buf; isr = tc->tc_isr; IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); IPSECREQUEST_LOCK(isr); sav = tc->tc_sav; /* With the isr lock released, SA pointer may have changed. */ if (sav != isr->sav) { ESPSTAT_INC(esps_notdb); DPRINTF(("%s: SA gone during crypto (SA %s/%08lx proto %u)\n", __func__, ipsec_address(&tc->tc_dst, buf, sizeof(buf)), (u_long) ntohl(tc->tc_spi), tc->tc_proto)); error = ENOBUFS; /*XXX*/ goto bad; } /* Check for crypto errors. */ if (crp->crp_etype) { /* Reset session ID. */ if (sav->tdb_cryptoid != 0) sav->tdb_cryptoid = crp->crp_sid; if (crp->crp_etype == EAGAIN) { IPSECREQUEST_UNLOCK(isr); return (crypto_dispatch(crp)); } ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; goto bad; } /* Shouldn't happen... */ if (m == NULL) { ESPSTAT_INC(esps_crypto); DPRINTF(("%s: bogus returned buffer from crypto\n", __func__)); error = EINVAL; goto bad; } ESPSTAT_INC(esps_hist[sav->alg_enc]); if (sav->tdb_authalgxform != NULL) AHSTAT_INC(ahs_hist[sav->alg_auth]); /* Release crypto descriptors. */ free(tc, M_XDATA); crypto_freereq(crp); #ifdef REGRESSION /* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */ if (V_ipsec_integrity) { static unsigned char ipseczeroes[AH_HMAC_MAXHASHLEN]; struct auth_hash *esph; /* * Corrupt HMAC if we want to test integrity verification of * the other side. */ esph = sav->tdb_authalgxform; if (esph != NULL) { int alen; alen = xform_ah_authsize(esph); m_copyback(m, m->m_pkthdr.len - alen, alen, ipseczeroes); } } #endif /* NB: m is reclaimed by ipsec_process_done. */ error = ipsec_process_done(m, isr); KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); KEY_FREESP(&isr->sp); return (error); bad: if (sav) KEY_FREESAV(&sav); IPSECREQUEST_UNLOCK(isr); KEY_FREESP(&isr->sp); if (m) m_freem(m); free(tc, M_XDATA); crypto_freereq(crp); return (error); } static struct xformsw esp_xformsw = { XF_ESP, XFT_CONF|XFT_AUTH, "IPsec ESP", esp_init, esp_zeroize, esp_input, esp_output }; static void esp_attach(void) { xform_register(&esp_xformsw); } SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, esp_attach, NULL); Index: projects/powernv/netpfil/pf/pf.c =================================================================== --- projects/powernv/netpfil/pf/pf.c (revision 290990) +++ projects/powernv/netpfil/pf/pf.c (revision 290991) @@ -1,6526 +1,6526 @@ /*- * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002 - 2008 Henning Brauer * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * * $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX: only for DIR_IN/DIR_OUT */ #ifdef INET6 #include #include #include #include #include #endif /* INET6 */ #include #include #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x /* * Global variables */ /* state tables */ VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]); VNET_DEFINE(struct pf_palist, pf_pabuf); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive); VNET_DEFINE(struct pf_kstatus, pf_status); VNET_DEFINE(u_int32_t, ticket_altqs_active); VNET_DEFINE(u_int32_t, ticket_altqs_inactive); VNET_DEFINE(int, altqs_inactive_open); VNET_DEFINE(u_int32_t, ticket_pabuf); VNET_DEFINE(MD5_CTX, pf_tcp_secret_ctx); #define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx) VNET_DEFINE(u_char, pf_tcp_secret[16]); #define V_pf_tcp_secret VNET(pf_tcp_secret) VNET_DEFINE(int, pf_tcp_secret_init); #define V_pf_tcp_secret_init VNET(pf_tcp_secret_init) VNET_DEFINE(int, pf_tcp_iss_off); #define V_pf_tcp_iss_off VNET(pf_tcp_iss_off) /* * Queue for pf_intr() sends. */ static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations"); struct pf_send_entry { STAILQ_ENTRY(pf_send_entry) pfse_next; struct mbuf *pfse_m; enum { PFSE_IP, PFSE_IP6, PFSE_ICMP, PFSE_ICMP6, } pfse_type; struct { int type; int code; int mtu; } icmpopts; }; STAILQ_HEAD(pf_send_head, pf_send_entry); static VNET_DEFINE(struct pf_send_head, pf_sendqueue); #define V_pf_sendqueue VNET(pf_sendqueue) static struct mtx pf_sendqueue_mtx; MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF); #define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx) #define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx) /* * Queue for pf_overload_task() tasks. */ struct pf_overload_entry { SLIST_ENTRY(pf_overload_entry) next; struct pf_addr addr; sa_family_t af; uint8_t dir; struct pf_rule *rule; }; SLIST_HEAD(pf_overload_head, pf_overload_entry); static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue); #define V_pf_overloadqueue VNET(pf_overloadqueue) static VNET_DEFINE(struct task, pf_overloadtask); #define V_pf_overloadtask VNET(pf_overloadtask) static struct mtx pf_overloadqueue_mtx; MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx, "pf overload/flush queue", MTX_DEF); #define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx) #define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx) VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules); struct mtx pf_unlnkdrules_mtx; MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules", MTX_DEF); static VNET_DEFINE(uma_zone_t, pf_sources_z); #define V_pf_sources_z VNET(pf_sources_z) uma_zone_t pf_mtag_z; VNET_DEFINE(uma_zone_t, pf_state_z); VNET_DEFINE(uma_zone_t, pf_state_key_z); VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]); #define PFID_CPUBITS 8 #define PFID_CPUSHIFT (sizeof(uint64_t) * NBBY - PFID_CPUBITS) #define PFID_CPUMASK ((uint64_t)((1 << PFID_CPUBITS) - 1) << PFID_CPUSHIFT) #define PFID_MAXID (~PFID_CPUMASK) CTASSERT((1 << PFID_CPUBITS) >= MAXCPU); static void pf_src_tree_remove_state(struct pf_state *); static void pf_init_threshold(struct pf_threshold *, u_int32_t, u_int32_t); static void pf_add_threshold(struct pf_threshold *); static int pf_check_threshold(struct pf_threshold *); static void pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *, u_int16_t *, u_int16_t *, struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *); static void pf_change_icmp(struct pf_addr *, u_int16_t *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t *, u_int16_t *, u_int16_t *, u_int16_t *, u_int8_t, sa_family_t); static void pf_send_tcp(struct mbuf *, const struct pf_rule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, u_int16_t, struct ifnet *); static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, sa_family_t, struct pf_rule *); static void pf_detach_state(struct pf_state *); static int pf_state_key_attach(struct pf_state_key *, struct pf_state_key *, struct pf_state *); static void pf_state_key_detach(struct pf_state *, int); static int pf_state_key_ctor(void *, int, void *, int); static u_int32_t pf_tcp_iss(struct pf_pdesc *); static int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **, struct inpcb *); static int pf_create_state(struct pf_rule *, struct pf_rule *, struct pf_rule *, struct pf_pdesc *, struct pf_src_node *, struct pf_state_key *, struct pf_state_key *, struct mbuf *, int, u_int16_t, u_int16_t, int *, struct pfi_kif *, struct pf_state **, int, u_int16_t, u_int16_t, int); static int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **); static int pf_tcp_track_full(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pfi_kif *, struct mbuf *, int, struct pf_pdesc *, u_short *, int *); static int pf_tcp_track_sloppy(struct pf_state_peer *, struct pf_state_peer *, struct pf_state **, struct pf_pdesc *, u_short *); static int pf_test_state_tcp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); static int pf_test_state_udp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *); static int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); static int pf_test_state_other(struct pf_state **, int, struct pfi_kif *, struct mbuf *, struct pf_pdesc *); static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, sa_family_t); static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, sa_family_t); static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, u_int16_t); static int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); static void pf_print_state_parts(struct pf_state *, struct pf_state_key *, struct pf_state_key *); static int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); static struct pf_state *pf_find_state(struct pfi_kif *, struct pf_state_key_cmp *, u_int); static int pf_src_connlimit(struct pf_state **); static void pf_overload_task(void *v, int pending); static int pf_insert_src_node(struct pf_src_node **, struct pf_rule *, struct pf_addr *, sa_family_t); static u_int pf_purge_expired_states(u_int, int); static void pf_purge_unlinked_rules(void); static int pf_mtag_uminit(void *, int, int); static void pf_mtag_free(struct m_tag *); #ifdef INET static void pf_route(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, struct pf_pdesc *); #endif /* INET */ #ifdef INET6 static void pf_change_a6(struct pf_addr *, u_int16_t *, struct pf_addr *, u_int8_t); static void pf_route6(struct mbuf **, struct pf_rule *, int, struct ifnet *, struct pf_state *, struct pf_pdesc *); #endif /* INET6 */ int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); VNET_DECLARE(int, pf_end_threads); VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); #define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ (pd)->pf_mtag->flags & PF_PACKET_LOOPED) #define STATE_LOOKUP(i, k, d, s, pd) \ do { \ (s) = pf_find_state((i), (k), (d)); \ if ((s) == NULL) \ return (PF_DROP); \ if (PACKET_LOOPED(pd)) \ return (PF_PASS); \ if ((d) == PF_OUT && \ (((s)->rule.ptr->rt == PF_ROUTETO && \ (s)->rule.ptr->direction == PF_OUT) || \ ((s)->rule.ptr->rt == PF_REPLYTO && \ (s)->rule.ptr->direction == PF_IN)) && \ (s)->rt_kif != NULL && \ (s)->rt_kif != (i)) \ return (PF_PASS); \ } while (0) #define BOUND_IFACE(r, k) \ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all #define STATE_INC_COUNTERS(s) \ do { \ counter_u64_add(s->rule.ptr->states_cur, 1); \ counter_u64_add(s->rule.ptr->states_tot, 1); \ if (s->anchor.ptr != NULL) { \ counter_u64_add(s->anchor.ptr->states_cur, 1); \ counter_u64_add(s->anchor.ptr->states_tot, 1); \ } \ if (s->nat_rule.ptr != NULL) { \ counter_u64_add(s->nat_rule.ptr->states_cur, 1);\ counter_u64_add(s->nat_rule.ptr->states_tot, 1);\ } \ } while (0) #define STATE_DEC_COUNTERS(s) \ do { \ if (s->nat_rule.ptr != NULL) \ counter_u64_add(s->nat_rule.ptr->states_cur, -1);\ if (s->anchor.ptr != NULL) \ counter_u64_add(s->anchor.ptr->states_cur, -1); \ counter_u64_add(s->rule.ptr->states_cur, -1); \ } while (0) static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); VNET_DEFINE(struct pf_keyhash *, pf_keyhash); VNET_DEFINE(struct pf_idhash *, pf_idhash); VNET_DEFINE(struct pf_srchash *, pf_srchash); SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)"); u_long pf_hashmask; u_long pf_srchashmask; static u_long pf_hashsize; static u_long pf_srchashsize; SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN, &pf_hashsize, 0, "Size of pf(4) states hashtable"); SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN, &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable"); VNET_DEFINE(void *, pf_swi_cookie); VNET_DEFINE(uint32_t, pf_hashseed); #define V_pf_hashseed VNET(pf_hashseed) int pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: if (a->addr32[0] > b->addr32[0]) return (1); if (a->addr32[0] < b->addr32[0]) return (-1); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (a->addr32[3] > b->addr32[3]) return (1); if (a->addr32[3] < b->addr32[3]) return (-1); if (a->addr32[2] > b->addr32[2]) return (1); if (a->addr32[2] < b->addr32[2]) return (-1); if (a->addr32[1] > b->addr32[1]) return (1); if (a->addr32[1] < b->addr32[1]) return (-1); if (a->addr32[0] > b->addr32[0]) return (1); if (a->addr32[0] < b->addr32[0]) return (-1); break; #endif /* INET6 */ default: panic("%s: unknown address family %u", __func__, af); } return (0); } static __inline uint32_t pf_hashkey(struct pf_state_key *sk) { uint32_t h; h = murmur3_32_hash32((uint32_t *)sk, sizeof(struct pf_state_key_cmp)/sizeof(uint32_t), V_pf_hashseed); return (h & pf_hashmask); } static __inline uint32_t pf_hashsrc(struct pf_addr *addr, sa_family_t af) { uint32_t h; switch (af) { case AF_INET: h = murmur3_32_hash32((uint32_t *)&addr->v4, sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed); break; case AF_INET6: h = murmur3_32_hash32((uint32_t *)&addr->v6, sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed); break; default: panic("%s: unknown address family %u", __func__, af); } return (h & pf_srchashmask); } #ifdef ALTQ static int pf_state_hash(struct pf_state *s) { u_int32_t hv = (intptr_t)s / sizeof(*s); hv ^= crc32(&s->src, sizeof(s->src)); hv ^= crc32(&s->dst, sizeof(s->dst)); if (hv == 0) hv = 1; return (hv); } #endif #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: dst->addr32[0] = src->addr32[0]; break; #endif /* INET */ case AF_INET6: dst->addr32[0] = src->addr32[0]; dst->addr32[1] = src->addr32[1]; dst->addr32[2] = src->addr32[2]; dst->addr32[3] = src->addr32[3]; break; } } #endif /* INET6 */ static void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) { threshold->limit = limit * PF_THRESHOLD_MULT; threshold->seconds = seconds; threshold->count = 0; threshold->last = time_uptime; } static void pf_add_threshold(struct pf_threshold *threshold) { u_int32_t t = time_uptime, diff = t - threshold->last; if (diff >= threshold->seconds) threshold->count = 0; else threshold->count -= threshold->count * diff / threshold->seconds; threshold->count += PF_THRESHOLD_MULT; threshold->last = t; } static int pf_check_threshold(struct pf_threshold *threshold) { return (threshold->count > threshold->limit); } static int pf_src_connlimit(struct pf_state **state) { struct pf_overload_entry *pfoe; int bad = 0; PF_STATE_LOCK_ASSERT(*state); (*state)->src_node->conn++; (*state)->src.tcp_est = 1; pf_add_threshold(&(*state)->src_node->conn_rate); if ((*state)->rule.ptr->max_src_conn && (*state)->rule.ptr->max_src_conn < (*state)->src_node->conn) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1); bad++; } if ((*state)->rule.ptr->max_src_conn_rate.limit && pf_check_threshold(&(*state)->src_node->conn_rate)) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1); bad++; } if (!bad) return (0); /* Kill this state. */ (*state)->timeout = PFTM_PURGE; (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; if ((*state)->rule.ptr->overload_tbl == NULL) return (1); /* Schedule overloading and flushing task. */ pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT); if (pfoe == NULL) return (1); /* too bad :( */ bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr)); pfoe->af = (*state)->key[PF_SK_WIRE]->af; pfoe->rule = (*state)->rule.ptr; pfoe->dir = (*state)->direction; PF_OVERLOADQ_LOCK(); SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next); PF_OVERLOADQ_UNLOCK(); taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask); return (1); } static void pf_overload_task(void *v, int pending) { struct pf_overload_head queue; struct pfr_addr p; struct pf_overload_entry *pfoe, *pfoe1; uint32_t killed = 0; CURVNET_SET((struct vnet *)v); PF_OVERLOADQ_LOCK(); queue = V_pf_overloadqueue; SLIST_INIT(&V_pf_overloadqueue); PF_OVERLOADQ_UNLOCK(); bzero(&p, sizeof(p)); SLIST_FOREACH(pfoe, &queue, next) { counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("%s: blocking address ", __func__); pf_print_host(&pfoe->addr, 0, pfoe->af); printf("\n"); } p.pfra_af = pfoe->af; switch (pfoe->af) { #ifdef INET case AF_INET: p.pfra_net = 32; p.pfra_ip4addr = pfoe->addr.v4; break; #endif #ifdef INET6 case AF_INET6: p.pfra_net = 128; p.pfra_ip6addr = pfoe->addr.v6; break; #endif } PF_RULES_WLOCK(); pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second); PF_RULES_WUNLOCK(); } /* * Remove those entries, that don't need flushing. */ SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1) if (pfoe->rule->flush == 0) { SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next); free(pfoe, M_PFTEMP); } else counter_u64_add( V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1); /* If nothing to flush, return. */ if (SLIST_EMPTY(&queue)) { CURVNET_RESTORE(); return; } for (int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_state_key *sk; struct pf_state *s; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { sk = s->key[PF_SK_WIRE]; SLIST_FOREACH(pfoe, &queue, next) if (sk->af == pfoe->af && ((pfoe->rule->flush & PF_FLUSH_GLOBAL) || pfoe->rule == s->rule.ptr) && ((pfoe->dir == PF_OUT && PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) || (pfoe->dir == PF_IN && PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) { s->timeout = PFTM_PURGE; s->src.state = s->dst.state = TCPS_CLOSED; killed++; } } PF_HASHROW_UNLOCK(ih); } SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1) free(pfoe, M_PFTEMP); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: %u states killed", __func__, killed); CURVNET_RESTORE(); } /* * Can return locked on failure, so that we can consistently * allocate and insert a new one. */ struct pf_src_node * pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af, int returnlocked) { struct pf_srchash *sh; struct pf_src_node *n; counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); sh = &V_pf_srchash[pf_hashsrc(src, af)]; PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) if (n->rule.ptr == rule && n->af == af && ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) break; if (n != NULL) { n->states++; PF_HASHROW_UNLOCK(sh); } else if (returnlocked == 0) PF_HASHROW_UNLOCK(sh); return (n); } static int pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, struct pf_addr *src, sa_family_t af) { KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK || rule->rpool.opts & PF_POOL_STICKYADDR), ("%s for non-tracking rule %p", __func__, rule)); if (*sn == NULL) *sn = pf_find_src_node(src, rule, af, 1); if (*sn == NULL) { struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)]; PF_HASHROW_ASSERT(sh); if (!rule->max_src_nodes || counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes) (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); else counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1); if ((*sn) == NULL) { PF_HASHROW_UNLOCK(sh); return (-1); } pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, rule->max_src_conn_rate.seconds); (*sn)->af = af; (*sn)->rule.ptr = rule; PF_ACPY(&(*sn)->addr, src, af); LIST_INSERT_HEAD(&sh->nodes, *sn, entry); (*sn)->creation = time_uptime; (*sn)->ruletype = rule->action; (*sn)->states = 1; if ((*sn)->rule.ptr != NULL) counter_u64_add((*sn)->rule.ptr->src_nodes, 1); PF_HASHROW_UNLOCK(sh); counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); } else { if (rule->max_src_states && (*sn)->states >= rule->max_src_states) { counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], 1); return (-1); } } return (0); } void pf_unlink_src_node(struct pf_src_node *src) { PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]); LIST_REMOVE(src, entry); if (src->rule.ptr) counter_u64_add(src->rule.ptr->src_nodes, -1); } u_int pf_free_src_nodes(struct pf_src_node_list *head) { struct pf_src_node *sn, *tmp; u_int count = 0; LIST_FOREACH_SAFE(sn, head, entry, tmp) { uma_zfree(V_pf_sources_z, sn); count++; } counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count); return (count); } void pf_mtag_initialize() { pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) + sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL, UMA_ALIGN_PTR, 0); } /* Per-vnet data storage structures initialization. */ void pf_initialize() { struct pf_keyhash *kh; struct pf_idhash *ih; struct pf_srchash *sh; u_int i; if (pf_hashsize == 0 || !powerof2(pf_hashsize)) pf_hashsize = PF_HASHSIZ; if (pf_srchashsize == 0 || !powerof2(pf_srchashsize)) pf_srchashsize = PF_HASHSIZ / 4; V_pf_hashseed = arc4random(); /* States and state keys storage. */ V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z; uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT); uma_zone_set_warning(V_pf_state_z, "PF states limit reached"); V_pf_state_key_z = uma_zcreate("pf state keys", sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO); V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO); pf_hashmask = pf_hashsize - 1; for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask; i++, kh++, ih++) { mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF); } /* Source nodes. */ V_pf_sources_z = uma_zcreate("pf source nodes", sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z; uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT); uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached"); V_pf_srchash = malloc(pf_srchashsize * sizeof(struct pf_srchash), M_PFHASH, M_WAITOK|M_ZERO); pf_srchashmask = pf_srchashsize - 1; for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF); /* ALTQ */ TAILQ_INIT(&V_pf_altqs[0]); TAILQ_INIT(&V_pf_altqs[1]); TAILQ_INIT(&V_pf_pabuf); V_pf_altqs_active = &V_pf_altqs[0]; V_pf_altqs_inactive = &V_pf_altqs[1]; /* Send & overload+flush queues. */ STAILQ_INIT(&V_pf_sendqueue); SLIST_INIT(&V_pf_overloadqueue); TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet); /* Unlinked, but may be referenced rules. */ TAILQ_INIT(&V_pf_unlinked_rules); } void pf_mtag_cleanup() { uma_zdestroy(pf_mtag_z); } void pf_cleanup() { struct pf_keyhash *kh; struct pf_idhash *ih; struct pf_srchash *sh; struct pf_send_entry *pfse, *next; u_int i; for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask; i++, kh++, ih++) { KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty", __func__)); KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty", __func__)); mtx_destroy(&kh->lock); mtx_destroy(&ih->lock); } free(V_pf_keyhash, M_PFHASH); free(V_pf_idhash, M_PFHASH); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { KASSERT(LIST_EMPTY(&sh->nodes), ("%s: source node hash not empty", __func__)); mtx_destroy(&sh->lock); } free(V_pf_srchash, M_PFHASH); STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) { m_freem(pfse->pfse_m); free(pfse, M_PFTEMP); } uma_zdestroy(V_pf_sources_z); uma_zdestroy(V_pf_state_z); uma_zdestroy(V_pf_state_key_z); } static int pf_mtag_uminit(void *mem, int size, int how) { struct m_tag *t; t = (struct m_tag *)mem; t->m_tag_cookie = MTAG_ABI_COMPAT; t->m_tag_id = PACKET_TAG_PF; t->m_tag_len = sizeof(struct pf_mtag); t->m_tag_free = pf_mtag_free; return (0); } static void pf_mtag_free(struct m_tag *t) { uma_zfree(pf_mtag_z, t); } struct pf_mtag * pf_get_mtag(struct mbuf *m) { struct m_tag *mtag; if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL) return ((struct pf_mtag *)(mtag + 1)); mtag = uma_zalloc(pf_mtag_z, M_NOWAIT); if (mtag == NULL) return (NULL); bzero(mtag + 1, sizeof(struct pf_mtag)); m_tag_prepend(m, mtag); return ((struct pf_mtag *)(mtag + 1)); } static int pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks, struct pf_state *s) { struct pf_keyhash *khs, *khw, *kh; struct pf_state_key *sk, *cur; struct pf_state *si, *olds = NULL; int idx; KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__)); KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__)); /* * We need to lock hash slots of both keys. To avoid deadlock * we always lock the slot with lower address first. Unlock order * isn't important. * * We also need to lock ID hash slot before dropping key * locks. On success we return with ID hash slot locked. */ if (skw == sks) { khs = khw = &V_pf_keyhash[pf_hashkey(skw)]; PF_HASHROW_LOCK(khs); } else { khs = &V_pf_keyhash[pf_hashkey(sks)]; khw = &V_pf_keyhash[pf_hashkey(skw)]; if (khs == khw) { PF_HASHROW_LOCK(khs); } else if (khs < khw) { PF_HASHROW_LOCK(khs); PF_HASHROW_LOCK(khw); } else { PF_HASHROW_LOCK(khw); PF_HASHROW_LOCK(khs); } } #define KEYS_UNLOCK() do { \ if (khs != khw) { \ PF_HASHROW_UNLOCK(khs); \ PF_HASHROW_UNLOCK(khw); \ } else \ PF_HASHROW_UNLOCK(khs); \ } while (0) /* * First run: start with wire key. */ sk = skw; kh = khw; idx = PF_SK_WIRE; keyattach: LIST_FOREACH(cur, &kh->keys, entry) if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0) break; if (cur != NULL) { /* Key exists. Check for same kif, if none, add to key. */ TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) { struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)]; PF_HASHROW_LOCK(ih); if (si->kif == s->kif && si->direction == s->direction) { if (sk->proto == IPPROTO_TCP && si->src.state >= TCPS_FIN_WAIT_2 && si->dst.state >= TCPS_FIN_WAIT_2) { /* * New state matches an old >FIN_WAIT_2 * state. We can't drop key hash locks, * thus we can't unlink it properly. * * As a workaround we drop it into * TCPS_CLOSED state, schedule purge * ASAP and push it into the very end * of the slot TAILQ, so that it won't * conflict with our new state. */ si->src.state = si->dst.state = TCPS_CLOSED; si->timeout = PFTM_PURGE; olds = si; } else { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: %s key attach " "failed on %s: ", (idx == PF_SK_WIRE) ? "wire" : "stack", s->kif->pfik_name); pf_print_state_parts(s, (idx == PF_SK_WIRE) ? sk : NULL, (idx == PF_SK_STACK) ? sk : NULL); printf(", existing: "); pf_print_state_parts(si, (idx == PF_SK_WIRE) ? sk : NULL, (idx == PF_SK_STACK) ? sk : NULL); printf("\n"); } PF_HASHROW_UNLOCK(ih); KEYS_UNLOCK(); uma_zfree(V_pf_state_key_z, sk); if (idx == PF_SK_STACK) pf_detach_state(s); return (EEXIST); /* collision! */ } } PF_HASHROW_UNLOCK(ih); } uma_zfree(V_pf_state_key_z, sk); s->key[idx] = cur; } else { LIST_INSERT_HEAD(&kh->keys, sk, entry); s->key[idx] = sk; } stateattach: /* List is sorted, if-bound states before floating. */ if (s->kif == V_pfi_all) TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]); else TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]); if (olds) { TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]); TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds, key_list[idx]); olds = NULL; } /* * Attach done. See how should we (or should not?) * attach a second key. */ if (sks == skw) { s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; idx = PF_SK_STACK; sks = NULL; goto stateattach; } else if (sks != NULL) { /* * Continue attaching with stack key. */ sk = sks; kh = khs; idx = PF_SK_STACK; sks = NULL; goto keyattach; } PF_STATE_LOCK(s); KEYS_UNLOCK(); KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL, ("%s failure", __func__)); return (0); #undef KEYS_UNLOCK } static void pf_detach_state(struct pf_state *s) { struct pf_state_key *sks = s->key[PF_SK_STACK]; struct pf_keyhash *kh; if (sks != NULL) { kh = &V_pf_keyhash[pf_hashkey(sks)]; PF_HASHROW_LOCK(kh); if (s->key[PF_SK_STACK] != NULL) pf_state_key_detach(s, PF_SK_STACK); /* * If both point to same key, then we are done. */ if (sks == s->key[PF_SK_WIRE]) { pf_state_key_detach(s, PF_SK_WIRE); PF_HASHROW_UNLOCK(kh); return; } PF_HASHROW_UNLOCK(kh); } if (s->key[PF_SK_WIRE] != NULL) { kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])]; PF_HASHROW_LOCK(kh); if (s->key[PF_SK_WIRE] != NULL) pf_state_key_detach(s, PF_SK_WIRE); PF_HASHROW_UNLOCK(kh); } } static void pf_state_key_detach(struct pf_state *s, int idx) { struct pf_state_key *sk = s->key[idx]; #ifdef INVARIANTS struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)]; PF_HASHROW_ASSERT(kh); #endif TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]); s->key[idx] = NULL; if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) { LIST_REMOVE(sk, entry); uma_zfree(V_pf_state_key_z, sk); } } static int pf_state_key_ctor(void *mem, int size, void *arg, int flags) { struct pf_state_key *sk = mem; bzero(sk, sizeof(struct pf_state_key_cmp)); TAILQ_INIT(&sk->states[PF_SK_WIRE]); TAILQ_INIT(&sk->states[PF_SK_STACK]); return (0); } struct pf_state_key * pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t sport, u_int16_t dport) { struct pf_state_key *sk; sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sk == NULL) return (NULL); PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af); PF_ACPY(&sk->addr[pd->didx], daddr, pd->af); sk->port[pd->sidx] = sport; sk->port[pd->didx] = dport; sk->proto = pd->proto; sk->af = pd->af; return (sk); } struct pf_state_key * pf_state_key_clone(struct pf_state_key *orig) { struct pf_state_key *sk; sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sk == NULL) return (NULL); bcopy(orig, sk, sizeof(struct pf_state_key_cmp)); return (sk); } int pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, struct pf_state_key *sks, struct pf_state *s) { struct pf_idhash *ih; struct pf_state *cur; int error; KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]), ("%s: sks not pristine", __func__)); KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]), ("%s: skw not pristine", __func__)); KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); s->kif = kif; if (s->id == 0 && s->creatorid == 0) { /* XXX: should be atomic, but probability of collision low */ if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID) V_pf_stateid[curcpu] = 1; s->id |= (uint64_t )curcpu << PFID_CPUSHIFT; s->id = htobe64(s->id); s->creatorid = V_pf_status.hostid; } /* Returns with ID locked on success. */ if ((error = pf_state_key_attach(skw, sks, s)) != 0) return (error); ih = &V_pf_idhash[PF_IDHASH(s)]; PF_HASHROW_ASSERT(ih); LIST_FOREACH(cur, &ih->states, entry) if (cur->id == s->id && cur->creatorid == s->creatorid) break; if (cur != NULL) { PF_HASHROW_UNLOCK(ih); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: state ID collision: " "id: %016llx creatorid: %08x\n", (unsigned long long)be64toh(s->id), ntohl(s->creatorid)); } pf_detach_state(s); return (EEXIST); } LIST_INSERT_HEAD(&ih->states, s, entry); /* One for keys, one for ID hash. */ refcount_init(&s->refs, 2); counter_u64_add(V_pf_status.fcounters[FCNT_STATE_INSERT], 1); if (pfsync_insert_state_ptr != NULL) pfsync_insert_state_ptr(s); /* Returns locked. */ return (0); } /* * Find state by ID: returns with locked row on success. */ struct pf_state * pf_find_state_byid(uint64_t id, uint32_t creatorid) { struct pf_idhash *ih; struct pf_state *s; counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))]; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) if (s->id == id && s->creatorid == creatorid) break; if (s == NULL) PF_HASHROW_UNLOCK(ih); return (s); } /* * Find state by key. * Returns with ID hash slot locked on success. */ static struct pf_state * pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) { struct pf_keyhash *kh; struct pf_state_key *sk; struct pf_state *s; int idx; counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; PF_HASHROW_LOCK(kh); LIST_FOREACH(sk, &kh->keys, entry) if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) break; if (sk == NULL) { PF_HASHROW_UNLOCK(kh); return (NULL); } idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK); /* List is sorted, if-bound states before floating ones. */ TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) if (s->kif == V_pfi_all || s->kif == kif) { PF_STATE_LOCK(s); PF_HASHROW_UNLOCK(kh); if (s->timeout >= PFTM_MAX) { /* * State is either being processed by * pf_unlink_state() in an other thread, or * is scheduled for immediate expiry. */ PF_STATE_UNLOCK(s); return (NULL); } return (s); } PF_HASHROW_UNLOCK(kh); return (NULL); } struct pf_state * pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) { struct pf_keyhash *kh; struct pf_state_key *sk; struct pf_state *s, *ret = NULL; int idx, inout = 0; counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)]; PF_HASHROW_LOCK(kh); LIST_FOREACH(sk, &kh->keys, entry) if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) break; if (sk == NULL) { PF_HASHROW_UNLOCK(kh); return (NULL); } switch (dir) { case PF_IN: idx = PF_SK_WIRE; break; case PF_OUT: idx = PF_SK_STACK; break; case PF_INOUT: idx = PF_SK_WIRE; inout = 1; break; default: panic("%s: dir %u", __func__, dir); } second_run: TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) { if (more == NULL) { PF_HASHROW_UNLOCK(kh); return (s); } if (ret) (*more)++; else ret = s; } if (inout == 1) { inout = 0; idx = PF_SK_STACK; goto second_run; } PF_HASHROW_UNLOCK(kh); return (ret); } /* END state table stuff */ static void pf_send(struct pf_send_entry *pfse) { PF_SENDQ_LOCK(); STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next); PF_SENDQ_UNLOCK(); swi_sched(V_pf_swi_cookie, 0); } void pf_intr(void *v) { struct pf_send_head queue; struct pf_send_entry *pfse, *next; CURVNET_SET((struct vnet *)v); PF_SENDQ_LOCK(); queue = V_pf_sendqueue; STAILQ_INIT(&V_pf_sendqueue); PF_SENDQ_UNLOCK(); STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) { switch (pfse->pfse_type) { #ifdef INET case PFSE_IP: ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL); break; case PFSE_ICMP: icmp_error(pfse->pfse_m, pfse->icmpopts.type, pfse->icmpopts.code, 0, pfse->icmpopts.mtu); break; #endif /* INET */ #ifdef INET6 case PFSE_IP6: ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL, NULL); break; case PFSE_ICMP6: icmp6_error(pfse->pfse_m, pfse->icmpopts.type, pfse->icmpopts.code, pfse->icmpopts.mtu); break; #endif /* INET6 */ default: panic("%s: unknown type", __func__); } free(pfse, M_PFTEMP); } CURVNET_RESTORE(); } void pf_purge_thread(void *v) { u_int idx = 0; CURVNET_SET((struct vnet *)v); for (;;) { PF_RULES_RLOCK(); rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10); if (V_pf_end_threads) { /* * To cleanse up all kifs and rules we need * two runs: first one clears reference flags, * then pf_purge_expired_states() doesn't * raise them, and then second run frees. */ PF_RULES_RUNLOCK(); pf_purge_unlinked_rules(); pfi_kif_purge(); /* * Now purge everything. */ pf_purge_expired_states(0, pf_hashmask); pf_purge_expired_fragments(); pf_purge_expired_src_nodes(); /* * Now all kifs & rules should be unreferenced, * thus should be successfully freed. */ pf_purge_unlinked_rules(); pfi_kif_purge(); /* * Announce success and exit. */ PF_RULES_RLOCK(); V_pf_end_threads++; PF_RULES_RUNLOCK(); wakeup(pf_purge_thread); kproc_exit(0); } PF_RULES_RUNLOCK(); /* Process 1/interval fraction of the state table every run. */ idx = pf_purge_expired_states(idx, pf_hashmask / (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10)); /* Purge other expired types every PFTM_INTERVAL seconds. */ if (idx == 0) { /* * Order is important: * - states and src nodes reference rules * - states and rules reference kifs */ pf_purge_expired_fragments(); pf_purge_expired_src_nodes(); pf_purge_unlinked_rules(); pfi_kif_purge(); } } /* not reached */ CURVNET_RESTORE(); } u_int32_t pf_state_expires(const struct pf_state *state) { u_int32_t timeout; u_int32_t start; u_int32_t end; u_int32_t states; /* handle all PFTM_* > PFTM_MAX here */ if (state->timeout == PFTM_PURGE) return (time_uptime); KASSERT(state->timeout != PFTM_UNLINKED, ("pf_state_expires: timeout == PFTM_UNLINKED")); KASSERT((state->timeout < PFTM_MAX), ("pf_state_expires: timeout > PFTM_MAX")); timeout = state->rule.ptr->timeout[state->timeout]; if (!timeout) timeout = V_pf_default_rule.timeout[state->timeout]; start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; if (start) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; states = counter_u64_fetch(state->rule.ptr->states_cur); } else { start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START]; end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END]; states = V_pf_status.states; } if (end && states > start && start < end) { if (states < end) return (state->expire + timeout * (end - states) / (end - start)); else return (time_uptime); } return (state->expire + timeout); } void pf_purge_expired_src_nodes() { struct pf_src_node_list freelist; struct pf_srchash *sh; struct pf_src_node *cur, *next; int i; LIST_INIT(&freelist); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next) if (cur->states == 0 && cur->expire <= time_uptime) { pf_unlink_src_node(cur); LIST_INSERT_HEAD(&freelist, cur, entry); } else if (cur->rule.ptr != NULL) cur->rule.ptr->rule_flag |= PFRULE_REFS; PF_HASHROW_UNLOCK(sh); } pf_free_src_nodes(&freelist); V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z); } static void pf_src_tree_remove_state(struct pf_state *s) { struct pf_src_node *sn; struct pf_srchash *sh; uint32_t timeout; timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ? s->rule.ptr->timeout[PFTM_SRC_NODE] : V_pf_default_rule.timeout[PFTM_SRC_NODE]; if (s->src_node != NULL) { sn = s->src_node; sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)]; PF_HASHROW_LOCK(sh); if (s->src.tcp_est) --sn->conn; if (--sn->states == 0) sn->expire = time_uptime + timeout; PF_HASHROW_UNLOCK(sh); } if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { sn = s->nat_src_node; sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)]; PF_HASHROW_LOCK(sh); if (--sn->states == 0) sn->expire = time_uptime + timeout; PF_HASHROW_UNLOCK(sh); } s->src_node = s->nat_src_node = NULL; } /* * Unlink and potentilly free a state. Function may be * called with ID hash row locked, but always returns * unlocked, since it needs to go through key hash locking. */ int pf_unlink_state(struct pf_state *s, u_int flags) { struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)]; if ((flags & PF_ENTER_LOCKED) == 0) PF_HASHROW_LOCK(ih); else PF_HASHROW_ASSERT(ih); if (s->timeout == PFTM_UNLINKED) { /* * State is being processed * by pf_unlink_state() in * an other thread. */ PF_HASHROW_UNLOCK(ih); return (0); /* XXXGL: undefined actually */ } if (s->src.state == PF_TCPS_PROXY_DST) { /* XXX wire key the right one? */ pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af, &s->key[PF_SK_WIRE]->addr[1], &s->key[PF_SK_WIRE]->addr[0], s->key[PF_SK_WIRE]->port[1], s->key[PF_SK_WIRE]->port[0], s->src.seqhi, s->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL); } LIST_REMOVE(s, entry); pf_src_tree_remove_state(s); if (pfsync_delete_state_ptr != NULL) pfsync_delete_state_ptr(s); STATE_DEC_COUNTERS(s); s->timeout = PFTM_UNLINKED; PF_HASHROW_UNLOCK(ih); pf_detach_state(s); refcount_release(&s->refs); return (pf_release_state(s)); } void pf_free_state(struct pf_state *cur) { KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur)); KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__, cur->timeout)); pf_normalize_tcp_cleanup(cur); uma_zfree(V_pf_state_z, cur); counter_u64_add(V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1); } /* * Called only from pf_purge_thread(), thus serialized. */ static u_int pf_purge_expired_states(u_int i, int maxcheck) { struct pf_idhash *ih; struct pf_state *s; V_pf_status.states = uma_zone_get_cur(V_pf_state_z); /* * Go through hash and unlink states that expire now. */ while (maxcheck > 0) { ih = &V_pf_idhash[i]; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (pf_state_expires(s) <= time_uptime) { V_pf_status.states -= pf_unlink_state(s, PF_ENTER_LOCKED); goto relock; } s->rule.ptr->rule_flag |= PFRULE_REFS; if (s->nat_rule.ptr != NULL) s->nat_rule.ptr->rule_flag |= PFRULE_REFS; if (s->anchor.ptr != NULL) s->anchor.ptr->rule_flag |= PFRULE_REFS; s->kif->pfik_flags |= PFI_IFLAG_REFS; if (s->rt_kif) s->rt_kif->pfik_flags |= PFI_IFLAG_REFS; } PF_HASHROW_UNLOCK(ih); /* Return when we hit end of hash. */ if (++i > pf_hashmask) { V_pf_status.states = uma_zone_get_cur(V_pf_state_z); return (0); } maxcheck--; } V_pf_status.states = uma_zone_get_cur(V_pf_state_z); return (i); } static void pf_purge_unlinked_rules() { struct pf_rulequeue tmpq; struct pf_rule *r, *r1; /* * If we have overloading task pending, then we'd * better skip purging this time. There is a tiny * probability that overloading task references * an already unlinked rule. */ PF_OVERLOADQ_LOCK(); if (!SLIST_EMPTY(&V_pf_overloadqueue)) { PF_OVERLOADQ_UNLOCK(); return; } PF_OVERLOADQ_UNLOCK(); /* * Do naive mark-and-sweep garbage collecting of old rules. * Reference flag is raised by pf_purge_expired_states() * and pf_purge_expired_src_nodes(). * * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK, * use a temporary queue. */ TAILQ_INIT(&tmpq); PF_UNLNKDRULES_LOCK(); TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) { if (!(r->rule_flag & PFRULE_REFS)) { TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries); TAILQ_INSERT_TAIL(&tmpq, r, entries); } else r->rule_flag &= ~PFRULE_REFS; } PF_UNLNKDRULES_UNLOCK(); if (!TAILQ_EMPTY(&tmpq)) { PF_RULES_WLOCK(); TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) { TAILQ_REMOVE(&tmpq, r, entries); pf_free_rule(r); } PF_RULES_WUNLOCK(); } } void pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: { u_int32_t a = ntohl(addr->addr32[0]); printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, (a>>8)&255, a&255); if (p) { p = ntohs(p); printf(":%u", p); } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { u_int16_t b; u_int8_t i, curstart, curend, maxstart, maxend; curstart = curend = maxstart = maxend = 255; for (i = 0; i < 8; i++) { if (!addr->addr16[i]) { if (curstart == 255) curstart = i; curend = i; } else { if ((curend - curstart) > (maxend - maxstart)) { maxstart = curstart; maxend = curend; } curstart = curend = 255; } } if ((curend - curstart) > (maxend - maxstart)) { maxstart = curstart; maxend = curend; } for (i = 0; i < 8; i++) { if (i >= maxstart && i <= maxend) { if (i == 0) printf(":"); if (i == maxend) printf(":"); } else { b = ntohs(addr->addr16[i]); printf("%x", b); if (i < 7) printf(":"); } } if (p) { p = ntohs(p); printf("[%u]", p); } break; } #endif /* INET6 */ } } void pf_print_state(struct pf_state *s) { pf_print_state_parts(s, NULL, NULL); } static void pf_print_state_parts(struct pf_state *s, struct pf_state_key *skwp, struct pf_state_key *sksp) { struct pf_state_key *skw, *sks; u_int8_t proto, dir; /* Do our best to fill these, but they're skipped if NULL */ skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); proto = skw ? skw->proto : (sks ? sks->proto : 0); dir = s ? s->direction : 0; switch (proto) { case IPPROTO_IPV4: printf("IPv4"); break; case IPPROTO_IPV6: printf("IPv6"); break; case IPPROTO_TCP: printf("TCP"); break; case IPPROTO_UDP: printf("UDP"); break; case IPPROTO_ICMP: printf("ICMP"); break; case IPPROTO_ICMPV6: printf("ICMPv6"); break; default: printf("%u", skw->proto); break; } switch (dir) { case PF_IN: printf(" in"); break; case PF_OUT: printf(" out"); break; } if (skw) { printf(" wire: "); pf_print_host(&skw->addr[0], skw->port[0], skw->af); printf(" "); pf_print_host(&skw->addr[1], skw->port[1], skw->af); } if (sks) { printf(" stack: "); if (sks != skw) { pf_print_host(&sks->addr[0], sks->port[0], sks->af); printf(" "); pf_print_host(&sks->addr[1], sks->port[1], sks->af); } else printf("-"); } if (s) { if (proto == IPPROTO_TCP) { printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, s->src.seqhi, s->src.max_win, s->src.seqdiff); if (s->src.wscale && s->dst.wscale) printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); printf("]"); printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); if (s->src.wscale && s->dst.wscale) printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); printf("]"); } printf(" %u:%u", s->src.state, s->dst.state); } } void pf_print_flags(u_int8_t f) { if (f) printf(" "); if (f & TH_FIN) printf("F"); if (f & TH_SYN) printf("S"); if (f & TH_RST) printf("R"); if (f & TH_PUSH) printf("P"); if (f & TH_ACK) printf("A"); if (f & TH_URG) printf("U"); if (f & TH_ECE) printf("E"); if (f & TH_CWR) printf("W"); } #define PF_SET_SKIP_STEPS(i) \ do { \ while (head[i] != cur) { \ head[i]->skip[i].ptr = cur; \ head[i] = TAILQ_NEXT(head[i], entries); \ } \ } while (0) void pf_calc_skip_steps(struct pf_rulequeue *rules) { struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; int i; cur = TAILQ_FIRST(rules); prev = cur; for (i = 0; i < PF_SKIP_COUNT; ++i) head[i] = cur; while (cur != NULL) { if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) PF_SET_SKIP_STEPS(PF_SKIP_IFP); if (cur->direction != prev->direction) PF_SET_SKIP_STEPS(PF_SKIP_DIR); if (cur->af != prev->af) PF_SET_SKIP_STEPS(PF_SKIP_AF); if (cur->proto != prev->proto) PF_SET_SKIP_STEPS(PF_SKIP_PROTO); if (cur->src.neg != prev->src.neg || pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); if (cur->src.port[0] != prev->src.port[0] || cur->src.port[1] != prev->src.port[1] || cur->src.port_op != prev->src.port_op) PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); if (cur->dst.neg != prev->dst.neg || pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); if (cur->dst.port[0] != prev->dst.port[0] || cur->dst.port[1] != prev->dst.port[1] || cur->dst.port_op != prev->dst.port_op) PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); prev = cur; cur = TAILQ_NEXT(cur, entries); } for (i = 0; i < PF_SKIP_COUNT; ++i) PF_SET_SKIP_STEPS(i); } static int pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) { if (aw1->type != aw2->type) return (1); switch (aw1->type) { case PF_ADDR_ADDRMASK: case PF_ADDR_RANGE: if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) return (1); if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) return (1); return (0); case PF_ADDR_DYNIFTL: return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); case PF_ADDR_NOROUTE: case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (aw1->p.tbl != aw2->p.tbl); default: printf("invalid address type: %d\n", aw1->type); return (1); } } /** * Checksum updates are a little complicated because the checksum in the TCP/UDP * header isn't always a full checksum. In some cases (i.e. output) it's a * pseudo-header checksum, which is a partial checksum over src/dst IP * addresses, protocol number and length. * * That means we have the following cases: * * Input or forwarding: we don't have TSO, the checksum fields are full * checksums, we need to update the checksum whenever we change anything. * * Output (i.e. the checksum is a pseudo-header checksum): * x The field being updated is src/dst address or affects the length of * the packet. We need to update the pseudo-header checksum (note that this * checksum is not ones' complement). * x Some other field is being modified (e.g. src/dst port numbers): We * don't have to update anything. **/ u_int16_t pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { u_int32_t l; if (udp && !cksum) return (0x0000); l = cksum + old - new; l = (l >> 16) + (l & 65535); l = l & 65535; if (udp && !l) return (0xFFFF); return (l); } u_int16_t pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) { if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) return (cksum); return (pf_cksum_fixup(cksum, old, new, udp)); } static void pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) { struct pf_addr ao; u_int16_t po = *p; PF_ACPY(&ao, a, af); PF_ACPY(a, an, af); if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) *pc = ~*pc; *p = pn; switch (af) { #ifdef INET case AF_INET: *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, ao.addr16[0], an->addr16[0], 0), ao.addr16[1], an->addr16[1], 0); *p = pn; *pc = pf_cksum_fixup(pf_cksum_fixup(*pc, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u); *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u); break; #endif /* INET */ #ifdef INET6 case AF_INET6: *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*pc, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), ao.addr16[2], an->addr16[2], u), ao.addr16[3], an->addr16[3], u), ao.addr16[4], an->addr16[4], u), ao.addr16[5], an->addr16[5], u), ao.addr16[6], an->addr16[6], u), ao.addr16[7], an->addr16[7], u); *pc = pf_proto_cksum_fixup(m, *pc, po, pn, u); break; #endif /* INET6 */ } if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { *pc = ~*pc; if (! *pc) *pc = 0xffff; } } /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ void pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) { u_int32_t ao; memcpy(&ao, a, sizeof(ao)); memcpy(a, &an, sizeof(u_int32_t)); *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), ao % 65536, an % 65536, u); } void pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp) { u_int32_t ao; memcpy(&ao, a, sizeof(ao)); memcpy(a, &an, sizeof(u_int32_t)); *c = pf_proto_cksum_fixup(m, pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp), ao % 65536, an % 65536, udp); } #ifdef INET6 static void pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) { struct pf_addr ao; PF_ACPY(&ao, a, AF_INET6); PF_ACPY(a, an, AF_INET6); *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*c, ao.addr16[0], an->addr16[0], u), ao.addr16[1], an->addr16[1], u), ao.addr16[2], an->addr16[2], u), ao.addr16[3], an->addr16[3], u), ao.addr16[4], an->addr16[4], u), ao.addr16[5], an->addr16[5], u), ao.addr16[6], an->addr16[6], u), ao.addr16[7], an->addr16[7], u); } #endif /* INET6 */ static void pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) { struct pf_addr oia, ooa; PF_ACPY(&oia, ia, af); if (oa) PF_ACPY(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { u_int16_t oip = *ip; u_int32_t opc; if (pc != NULL) opc = *pc; *ip = np; if (pc != NULL) *pc = pf_cksum_fixup(*pc, oip, *ip, u); *ic = pf_cksum_fixup(*ic, oip, *ip, 0); if (pc != NULL) *ic = pf_cksum_fixup(*ic, opc, *pc, 0); } /* Change inner ip address, fix inner ip and icmp checksums. */ PF_ACPY(ia, na, af); switch (af) { #ifdef INET case AF_INET: { u_int32_t oh2c = *h2c; *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, oia.addr16[0], ia->addr16[0], 0), oia.addr16[1], ia->addr16[1], 0); *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, oia.addr16[0], ia->addr16[0], 0), oia.addr16[1], ia->addr16[1], 0); *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); break; } #endif /* INET */ #ifdef INET6 case AF_INET6: *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*ic, oia.addr16[0], ia->addr16[0], u), oia.addr16[1], ia->addr16[1], u), oia.addr16[2], ia->addr16[2], u), oia.addr16[3], ia->addr16[3], u), oia.addr16[4], ia->addr16[4], u), oia.addr16[5], ia->addr16[5], u), oia.addr16[6], ia->addr16[6], u), oia.addr16[7], ia->addr16[7], u); break; #endif /* INET6 */ } /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ if (oa) { PF_ACPY(oa, na, af); switch (af) { #ifdef INET case AF_INET: *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, ooa.addr16[0], oa->addr16[0], 0), ooa.addr16[1], oa->addr16[1], 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(*ic, ooa.addr16[0], oa->addr16[0], u), ooa.addr16[1], oa->addr16[1], u), ooa.addr16[2], oa->addr16[2], u), ooa.addr16[3], oa->addr16[3], u), ooa.addr16[4], oa->addr16[4], u), ooa.addr16[5], oa->addr16[5], u), ooa.addr16[6], oa->addr16[6], u), ooa.addr16[7], oa->addr16[7], u); break; #endif /* INET6 */ } } } /* * Need to modulate the sequence numbers in the TCP SACK option * (credits to Krzysztof Pfaff for report and patch) */ static int pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, struct tcphdr *th, struct pf_state_peer *dst) { int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; u_int8_t opts[TCP_MAXOLEN], *opt = opts; int copyback = 0, i, olen; struct sackblk sack; #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) if (hlen < TCPOLEN_SACKLEN || !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) return 0; while (hlen >= TCPOLEN_SACKLEN) { olen = opt[1]; switch (*opt) { case TCPOPT_EOL: /* FALLTHROUGH */ case TCPOPT_NOP: opt++; hlen--; break; case TCPOPT_SACK: if (olen > hlen) olen = hlen; if (olen >= TCPOLEN_SACKLEN) { for (i = 2; i + TCPOLEN_SACK <= olen; i += TCPOLEN_SACK) { memcpy(&sack, &opt[i], sizeof(sack)); pf_change_proto_a(m, &sack.start, &th->th_sum, htonl(ntohl(sack.start) - dst->seqdiff), 0); pf_change_proto_a(m, &sack.end, &th->th_sum, htonl(ntohl(sack.end) - dst->seqdiff), 0); memcpy(&opt[i], &sack, sizeof(sack)); } copyback = 1; } /* FALLTHROUGH */ default: if (olen < 2) olen = 2; hlen -= olen; opt += olen; } } if (copyback) m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts); return (copyback); } static void pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af, const struct pf_addr *saddr, const struct pf_addr *daddr, u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, u_int16_t rtag, struct ifnet *ifp) { struct pf_send_entry *pfse; struct mbuf *m; int len, tlen; #ifdef INET struct ip *h = NULL; #endif /* INET */ #ifdef INET6 struct ip6_hdr *h6 = NULL; #endif /* INET6 */ struct tcphdr *th; char *opt; struct pf_mtag *pf_mtag; len = 0; th = NULL; /* maximum segment size tcp option */ tlen = sizeof(struct tcphdr); if (mss) tlen += 4; switch (af) { #ifdef INET case AF_INET: len = sizeof(struct ip) + tlen; break; #endif /* INET */ #ifdef INET6 case AF_INET6: len = sizeof(struct ip6_hdr) + tlen; break; #endif /* INET6 */ default: panic("%s: unsupported af %d", __func__, af); } /* Allocate outgoing queue entry, mbuf and mbuf tag. */ pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); if (pfse == NULL) return; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { free(pfse, M_PFTEMP); return; } #ifdef MAC mac_netinet_firewall_send(m); #endif if ((pf_mtag = pf_get_mtag(m)) == NULL) { free(pfse, M_PFTEMP); m_freem(m); return; } if (tag) m->m_flags |= M_SKIP_FIREWALL; pf_mtag->tag = rtag; if (r != NULL && r->rtableid >= 0) M_SETFIB(m, r->rtableid); #ifdef ALTQ if (r != NULL && r->qid) { pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m, struct ip *); } #endif /* ALTQ */ m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; bzero(m->m_data, len); switch (af) { #ifdef INET case AF_INET: h = mtod(m, struct ip *); /* IP header fields included in the TCP checksum */ h->ip_p = IPPROTO_TCP; h->ip_len = htons(tlen); h->ip_src.s_addr = saddr->v4.s_addr; h->ip_dst.s_addr = daddr->v4.s_addr; th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); break; #endif /* INET */ #ifdef INET6 case AF_INET6: h6 = mtod(m, struct ip6_hdr *); /* IP header fields included in the TCP checksum */ h6->ip6_nxt = IPPROTO_TCP; h6->ip6_plen = htons(tlen); memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); break; #endif /* INET6 */ } /* TCP header */ th->th_sport = sport; th->th_dport = dport; th->th_seq = htonl(seq); th->th_ack = htonl(ack); th->th_off = tlen >> 2; th->th_flags = flags; th->th_win = htons(win); if (mss) { opt = (char *)(th + 1); opt[0] = TCPOPT_MAXSEG; opt[1] = 4; HTONS(mss); bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); } switch (af) { #ifdef INET case AF_INET: /* TCP checksum */ th->th_sum = in_cksum(m, len); /* Finish the IP header */ h->ip_v = 4; h->ip_hl = sizeof(*h) >> 2; h->ip_tos = IPTOS_LOWDELAY; h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0); h->ip_len = htons(len); h->ip_ttl = ttl ? ttl : V_ip_defttl; h->ip_sum = 0; pfse->pfse_type = PFSE_IP; break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* TCP checksum */ th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr), tlen); h6->ip6_vfc |= IPV6_VERSION; h6->ip6_hlim = IPV6_DEFHLIM; pfse->pfse_type = PFSE_IP6; break; #endif /* INET6 */ } pfse->pfse_m = m; pf_send(pfse); } static void pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, struct pf_rule *r) { struct pf_send_entry *pfse; struct mbuf *m0; struct pf_mtag *pf_mtag; /* Allocate outgoing queue entry, mbuf and mbuf tag. */ pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); if (pfse == NULL) return; if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) { free(pfse, M_PFTEMP); return; } if ((pf_mtag = pf_get_mtag(m0)) == NULL) { free(pfse, M_PFTEMP); return; } /* XXX: revisit */ m0->m_flags |= M_SKIP_FIREWALL; if (r->rtableid >= 0) M_SETFIB(m0, r->rtableid); #ifdef ALTQ if (r->qid) { pf_mtag->qid = r->qid; /* add hints for ecn */ pf_mtag->hdr = mtod(m0, struct ip *); } #endif /* ALTQ */ switch (af) { #ifdef INET case AF_INET: pfse->pfse_type = PFSE_ICMP; break; #endif /* INET */ #ifdef INET6 case AF_INET6: pfse->pfse_type = PFSE_ICMP6; break; #endif /* INET6 */ } pfse->pfse_m = m0; pfse->icmpopts.type = type; pfse->icmpopts.code = code; pf_send(pfse); } /* * Return 1 if the addresses a and b match (with mask m), otherwise return 0. * If n is 0, they match if they are equal. If n is != 0, they match if they * are different. */ int pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, struct pf_addr *b, sa_family_t af) { int match = 0; switch (af) { #ifdef INET case AF_INET: if ((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) match++; break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (((a->addr32[0] & m->addr32[0]) == (b->addr32[0] & m->addr32[0])) && ((a->addr32[1] & m->addr32[1]) == (b->addr32[1] & m->addr32[1])) && ((a->addr32[2] & m->addr32[2]) == (b->addr32[2] & m->addr32[2])) && ((a->addr32[3] & m->addr32[3]) == (b->addr32[3] & m->addr32[3]))) match++; break; #endif /* INET6 */ } if (match) { if (n) return (0); else return (1); } else { if (n) return (1); else return (0); } } /* * Return 1 if b <= a <= e, otherwise return 0. */ int pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, struct pf_addr *a, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: if ((a->addr32[0] < b->addr32[0]) || (a->addr32[0] > e->addr32[0])) return (0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: { int i; /* check a >= b */ for (i = 0; i < 4; ++i) if (a->addr32[i] > b->addr32[i]) break; else if (a->addr32[i] < b->addr32[i]) return (0); /* check a <= e */ for (i = 0; i < 4; ++i) if (a->addr32[i] < e->addr32[i]) break; else if (a->addr32[i] > e->addr32[i]) return (0); break; } #endif /* INET6 */ } return (1); } static int pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) { switch (op) { case PF_OP_IRG: return ((p > a1) && (p < a2)); case PF_OP_XRG: return ((p < a1) || (p > a2)); case PF_OP_RRG: return ((p >= a1) && (p <= a2)); case PF_OP_EQ: return (p == a1); case PF_OP_NE: return (p != a1); case PF_OP_LT: return (p < a1); case PF_OP_LE: return (p <= a1); case PF_OP_GT: return (p > a1); case PF_OP_GE: return (p >= a1); } return (0); /* never reached */ } int pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) { NTOHS(a1); NTOHS(a2); NTOHS(p); return (pf_match(op, a1, a2, p)); } static int pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) { if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, u)); } static int pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) { if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) return (0); return (pf_match(op, a1, a2, g)); } int pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag) { if (*tag == -1) *tag = mtag; return ((!r->match_tag_not && r->match_tag == *tag) || (r->match_tag_not && r->match_tag != *tag)); } int pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag) { KASSERT(tag > 0, ("%s: tag %d", __func__, tag)); if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL)) return (ENOMEM); pd->pf_mtag->tag = tag; return (0); } #define PF_ANCHOR_STACKSIZE 32 struct pf_anchor_stackframe { struct pf_ruleset *rs; struct pf_rule *r; /* XXX: + match bit */ struct pf_anchor *child; }; /* * XXX: We rely on malloc(9) returning pointer aligned addresses. */ #define PF_ANCHORSTACK_MATCH 0x00000001 #define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH) #define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH) #define PF_ANCHOR_RULE(f) (struct pf_rule *) \ ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK) #define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \ ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \ } while (0) void pf_step_into_anchor(struct pf_anchor_stackframe *stack, int *depth, struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; PF_RULES_RASSERT(); if (match) *match = 0; if (*depth >= PF_ANCHOR_STACKSIZE) { printf("%s: anchor stack overflow on %s\n", __func__, (*r)->anchor->name); *r = TAILQ_NEXT(*r, entries); return; } else if (*depth == 0 && a != NULL) *a = *r; f = stack + (*depth)++; f->rs = *rs; f->r = *r; if ((*r)->anchor_wildcard) { struct pf_anchor_node *parent = &(*r)->anchor->children; if ((f->child = RB_MIN(pf_anchor_node, parent)) == NULL) { *r = NULL; return; } *rs = &f->child->ruleset; } else { f->child = NULL; *rs = &(*r)->anchor->ruleset; } *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); } int pf_step_out_of_anchor(struct pf_anchor_stackframe *stack, int *depth, struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; struct pf_rule *fr; int quick = 0; PF_RULES_RASSERT(); do { if (*depth <= 0) break; f = stack + *depth - 1; fr = PF_ANCHOR_RULE(f); if (f->child != NULL) { struct pf_anchor_node *parent; /* * This block traverses through * a wildcard anchor. */ parent = &fr->anchor->children; if (match != NULL && *match) { /* * If any of "*" matched, then * "foo/ *" matched, mark frame * appropriately. */ PF_ANCHOR_SET_MATCH(f); *match = 0; } f->child = RB_NEXT(pf_anchor_node, parent, f->child); if (f->child != NULL) { *rs = &f->child->ruleset; *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); if (*r == NULL) continue; else break; } } (*depth)--; if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; if (PF_ANCHOR_MATCH(f) || (match != NULL && *match)) quick = fr->quick; *r = TAILQ_NEXT(fr, entries); } while (*r == NULL); return (quick); } #ifdef INET6 void pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); break; #endif /* INET */ case AF_INET6: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); break; } } void pf_addr_inc(struct pf_addr *addr, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); break; #endif /* INET */ case AF_INET6: if (addr->addr32[3] == 0xffffffff) { addr->addr32[3] = 0; if (addr->addr32[2] == 0xffffffff) { addr->addr32[2] = 0; if (addr->addr32[1] == 0xffffffff) { addr->addr32[1] = 0; addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); } else addr->addr32[1] = htonl(ntohl(addr->addr32[1]) + 1); } else addr->addr32[2] = htonl(ntohl(addr->addr32[2]) + 1); } else addr->addr32[3] = htonl(ntohl(addr->addr32[3]) + 1); break; } } #endif /* INET6 */ int pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; struct inpcbinfo *pi; struct inpcb *inp; pd->lookup.uid = UID_MAX; pd->lookup.gid = GID_MAX; switch (pd->proto) { case IPPROTO_TCP: if (pd->hdr.tcp == NULL) return (-1); sport = pd->hdr.tcp->th_sport; dport = pd->hdr.tcp->th_dport; pi = &V_tcbinfo; break; case IPPROTO_UDP: if (pd->hdr.udp == NULL) return (-1); sport = pd->hdr.udp->uh_sport; dport = pd->hdr.udp->uh_dport; pi = &V_udbinfo; break; default: return (-1); } if (direction == PF_IN) { saddr = pd->src; daddr = pd->dst; } else { u_int16_t p; p = sport; sport = dport; dport = p; saddr = pd->dst; daddr = pd->src; } switch (pd->af) { #ifdef INET case AF_INET: inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, dport, INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) { inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL, m); if (inp == NULL) return (-1); } break; #endif /* INET6 */ default: return (-1); } INP_RLOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; INP_RUNLOCK(inp); return (1); } static u_int8_t pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; u_int8_t wscale = 0; hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) return (0); if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) return (0); opt = hdr + sizeof(struct tcphdr); hlen -= sizeof(struct tcphdr); while (hlen >= 3) { switch (*opt) { case TCPOPT_EOL: case TCPOPT_NOP: ++opt; --hlen; break; case TCPOPT_WINDOW: wscale = opt[2]; if (wscale > TCP_MAX_WINSHIFT) wscale = TCP_MAX_WINSHIFT; wscale |= PF_WSCALE_FLAG; /* FALLTHROUGH */ default: optlen = opt[1]; if (optlen < 2) optlen = 2; hlen -= optlen; opt += optlen; break; } } return (wscale); } static u_int16_t pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) { int hlen; u_int8_t hdr[60]; u_int8_t *opt, optlen; u_int16_t mss = V_tcp_mssdflt; hlen = th_off << 2; /* hlen <= sizeof(hdr) */ if (hlen <= sizeof(struct tcphdr)) return (0); if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) return (0); opt = hdr + sizeof(struct tcphdr); hlen -= sizeof(struct tcphdr); while (hlen >= TCPOLEN_MAXSEG) { switch (*opt) { case TCPOPT_EOL: case TCPOPT_NOP: ++opt; --hlen; break; case TCPOPT_MAXSEG: bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); NTOHS(mss); /* FALLTHROUGH */ default: optlen = opt[1]; if (optlen < 2) optlen = 2; hlen -= optlen; opt += optlen; break; } } return (mss); } static u_int16_t pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) { #ifdef INET struct sockaddr_in *dst; struct route ro; #endif /* INET */ #ifdef INET6 struct sockaddr_in6 *dst6; struct route_in6 ro6; #endif /* INET6 */ struct rtentry *rt = NULL; int hlen = 0; u_int16_t mss = V_tcp_mssdflt; switch (af) { #ifdef INET case AF_INET: hlen = sizeof(struct ip); bzero(&ro, sizeof(ro)); dst = (struct sockaddr_in *)&ro.ro_dst; dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; in_rtalloc_ign(&ro, 0, rtableid); rt = ro.ro_rt; break; #endif /* INET */ #ifdef INET6 case AF_INET6: hlen = sizeof(struct ip6_hdr); bzero(&ro6, sizeof(ro6)); dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; in6_rtalloc_ign(&ro6, 0, rtableid); rt = ro6.ro_rt; break; #endif /* INET6 */ } if (rt && rt->rt_ifp) { mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); mss = max(V_tcp_mssdflt, mss); RTFREE(rt); } mss = min(mss, offer); mss = max(mss, 64); /* sanity - at least max opt space */ return (mss); } static u_int32_t pf_tcp_iss(struct pf_pdesc *pd) { MD5_CTX ctx; u_int32_t digest[4]; if (V_pf_tcp_secret_init == 0) { read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); MD5Init(&V_pf_tcp_secret_ctx); MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); V_pf_tcp_secret_init = 1; } ctx = V_pf_tcp_secret_ctx; MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); if (pd->af == AF_INET6) { MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); } else { MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); } MD5Final((u_char *)digest, &ctx); V_pf_tcp_iss_off += 4096; #define ISN_RANDOM_INCREMENT (4096 - 1) return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) + V_pf_tcp_iss_off); #undef ISN_RANDOM_INCREMENT } static int pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp) { struct pf_rule *nr = NULL; struct pf_addr * const saddr = pd->src; struct pf_addr * const daddr = pd->dst; sa_family_t af = pd->af; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; struct tcphdr *th = pd->hdr.tcp; struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; int rewrite = 0, hdrlen = 0; int tag = -1, rtableid = -1; int asd = 0; int match = 0; int state_icmp = 0; u_int16_t sport = 0, dport = 0; u_int16_t bproto_sum = 0, bip_sum = 0; u_int8_t icmptype = 0, icmpcode = 0; struct pf_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; PF_RULES_RASSERT(); if (inp != NULL) { INP_LOCK_ASSERT(inp); pd->lookup.uid = inp->inp_cred->cr_uid; pd->lookup.gid = inp->inp_cred->cr_groups[0]; pd->lookup.done = 1; } switch (pd->proto) { case IPPROTO_TCP: sport = th->th_sport; dport = th->th_dport; hdrlen = sizeof(*th); break; case IPPROTO_UDP: sport = pd->hdr.udp->uh_sport; dport = pd->hdr.udp->uh_dport; hdrlen = sizeof(*pd->hdr.udp); break; #ifdef INET case IPPROTO_ICMP: if (pd->af != AF_INET) break; sport = dport = pd->hdr.icmp->icmp_id; hdrlen = sizeof(*pd->hdr.icmp); icmptype = pd->hdr.icmp->icmp_type; icmpcode = pd->hdr.icmp->icmp_code; if (icmptype == ICMP_UNREACH || icmptype == ICMP_SOURCEQUENCH || icmptype == ICMP_REDIRECT || icmptype == ICMP_TIMXCEED || icmptype == ICMP_PARAMPROB) state_icmp++; break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: if (af != AF_INET6) break; sport = dport = pd->hdr.icmp6->icmp6_id; hdrlen = sizeof(*pd->hdr.icmp6); icmptype = pd->hdr.icmp6->icmp6_type; icmpcode = pd->hdr.icmp6->icmp6_code; if (icmptype == ICMP6_DST_UNREACH || icmptype == ICMP6_PACKET_TOO_BIG || icmptype == ICMP6_TIME_EXCEEDED || icmptype == ICMP6_PARAM_PROB) state_icmp++; break; #endif /* INET6 */ default: sport = dport = hdrlen = 0; break; } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); /* check packet for BINAT/NAT/RDR */ if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk, &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) { KASSERT(sk != NULL, ("%s: null sk", __func__)); KASSERT(nk != NULL, ("%s: null nk", __func__)); if (pd->ip_sum) bip_sum = *pd->ip_sum; switch (pd->proto) { case IPPROTO_TCP: bproto_sum = th->th_sum; pd->proto_sum = &th->th_sum; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || nk->port[pd->sidx] != sport) { pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 0, af); pd->sport = &th->th_sport; sport = th->th_sport; } if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || nk->port[pd->didx] != dport) { pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum, &th->th_sum, &nk->addr[pd->didx], nk->port[pd->didx], 0, af); dport = th->th_dport; pd->dport = &th->th_dport; } rewrite++; break; case IPPROTO_UDP: bproto_sum = pd->hdr.udp->uh_sum; pd->proto_sum = &pd->hdr.udp->uh_sum; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || nk->port[pd->sidx] != sport) { pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport, pd->ip_sum, &pd->hdr.udp->uh_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 1, af); sport = pd->hdr.udp->uh_sport; pd->sport = &pd->hdr.udp->uh_sport; } if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || nk->port[pd->didx] != dport) { pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport, pd->ip_sum, &pd->hdr.udp->uh_sum, &nk->addr[pd->didx], nk->port[pd->didx], 1, af); dport = pd->hdr.udp->uh_dport; pd->dport = &pd->hdr.udp->uh_dport; } rewrite++; break; #ifdef INET case IPPROTO_ICMP: nk->port[0] = nk->port[1]; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); if (nk->port[1] != pd->hdr.icmp->icmp_id) { pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, sport, nk->port[1], 0); pd->hdr.icmp->icmp_id = nk->port[1]; pd->sport = &pd->hdr.icmp->icmp_id; } m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: nk->port[0] = nk->port[1]; if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, &nk->addr[pd->sidx], 0); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, &nk->addr[pd->didx], 0); rewrite++; break; #endif /* INET */ default: switch (af) { #ifdef INET case AF_INET: if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) PF_ACPY(saddr, &nk->addr[pd->sidx], af); if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) PF_ACPY(saddr, &nk->addr[pd->didx], af); break; #endif /* INET */ } break; } if (nr->natpass) r = NULL; pd->nat_rule = nr; } while (r != NULL) { r->evaluations++; if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, saddr, af, r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ else if (r->src.port_op && !pf_match_port(r->src.port_op, r->src.port[0], r->src.port[1], sport)) r = r->skip[PF_SKIP_SRC_PORT].ptr; else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; /* tcp/udp only. port_op always 0 in other cases */ else if (r->dst.port_op && !pf_match_port(r->dst.port_op, r->dst.port[0], r->dst.port[1], dport)) r = r->skip[PF_SKIP_DST_PORT].ptr; /* icmp only. type always 0 in other cases */ else if (r->type && r->type != icmptype + 1) r = TAILQ_NEXT(r, entries); /* icmp only. type always 0 in other cases */ else if (r->code && r->code != icmpcode + 1) r = TAILQ_NEXT(r, entries); else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->rule_flag & PFRULE_FRAGMENT) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_TCP && (r->flagset & th->th_flags) != r->flags) r = TAILQ_NEXT(r, entries); /* tcp/udp only. uid.op always 0 in other cases */ else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = pf_socket_lookup(direction, pd, m), 1)) && !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], pd->lookup.uid)) r = TAILQ_NEXT(r, entries); /* tcp/udp only. gid.op always 0 in other cases */ else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = pf_socket_lookup(direction, pd, m), 1)) && !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= arc4random()) r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match( pf_osfp_fingerprint(pd, m, off, th), r->os_fingerprint))) r = TAILQ_NEXT(r, entries); else { if (r->tag) tag = r->tag; if (r->rtableid >= 0) rtableid = r->rtableid; if (r->anchor == NULL) { match = 1; *rm = r; *am = a; *rsm = ruleset; if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match); } if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match)) break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); if (r->log || (nr != NULL && nr->log)) { if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a, ruleset, pd, 1); } if ((r->action == PF_DROP) && ((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURNICMP) || (r->rule_flag & PFRULE_RETURN))) { /* undo NAT changes, if they have taken place */ if (nr != NULL) { PF_ACPY(saddr, &sk->addr[pd->sidx], af); PF_ACPY(daddr, &sk->addr[pd->didx], af); if (pd->sport) *pd->sport = sk->port[pd->sidx]; if (pd->dport) *pd->dport = sk->port[pd->didx]; if (pd->proto_sum) *pd->proto_sum = bproto_sum; if (pd->ip_sum) *pd->ip_sum = bip_sum; m_copyback(m, off, hdrlen, pd->hdr.any); } if (pd->proto == IPPROTO_TCP && ((r->rule_flag & PFRULE_RETURNRST) || (r->rule_flag & PFRULE_RETURN)) && !(th->th_flags & TH_RST)) { u_int32_t ack = ntohl(th->th_seq) + pd->p_len; int len = 0; #ifdef INET struct ip *h4; #endif #ifdef INET6 struct ip6_hdr *h6; #endif switch (af) { #ifdef INET case AF_INET: h4 = mtod(m, struct ip *); len = ntohs(h4->ip_len) - off; break; #endif #ifdef INET6 case AF_INET6: h6 = mtod(m, struct ip6_hdr *); len = ntohs(h6->ip6_plen) - (off - sizeof(*h6)); break; #endif } if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) REASON_SET(&reason, PFRES_PROTCKSUM); else { if (th->th_flags & TH_SYN) ack++; if (th->th_flags & TH_FIN) ack++; pf_send_tcp(m, r, af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 1, 0, kif->pfik_ifp); } } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && r->return_icmp) pf_send_icmp(m, r->return_icmp >> 8, r->return_icmp & 255, af, r); else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && r->return_icmp6) pf_send_icmp(m, r->return_icmp6 >> 8, r->return_icmp6 & 255, af, r); } if (r->action == PF_DROP) goto cleanup; if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); goto cleanup; } if (rtableid >= 0) M_SETFIB(m, rtableid); if (!state_icmp && (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM))) { int action; action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum, hdrlen); if (action != PF_PASS) return (action); } else { if (sk != NULL) uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) uma_zfree(V_pf_state_key_z, nk); } /* copy back packet headers if we performed NAT operations */ if (rewrite) m_copyback(m, off, hdrlen, pd->hdr.any); if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) && direction == PF_OUT && pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m)) /* * We want the state created, but we dont * want to send this in case a partner * firewall has to know about it to allow * replies through it. */ return (PF_DEFER); return (PF_PASS); cleanup: if (sk != NULL) uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) uma_zfree(V_pf_state_key_z, nk); return (PF_DROP); } static int pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk, struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen) { struct pf_state *s = NULL; struct pf_src_node *sn = NULL; struct tcphdr *th = pd->hdr.tcp; u_int16_t mss = V_tcp_mssdflt; u_short reason; /* check maximums */ if (r->max_states && (counter_u64_fetch(r->states_cur) >= r->max_states)) { counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1); REASON_SET(&reason, PFRES_MAXSTATES); return (PF_DROP); } /* src node for filter rule */ if ((r->rule_flag & PFRULE_SRCTRACK || r->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { REASON_SET(&reason, PFRES_SRCLIMIT); goto csfailed; } /* src node for translation rule */ if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { REASON_SET(&reason, PFRES_SRCLIMIT); goto csfailed; } s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO); if (s == NULL) { REASON_SET(&reason, PFRES_MEMORY); goto csfailed; } s->rule.ptr = r; s->nat_rule.ptr = nr; s->anchor.ptr = a; STATE_INC_COUNTERS(s); if (r->allow_opts) s->state_flags |= PFSTATE_ALLOWOPTS; if (r->rule_flag & PFRULE_STATESLOPPY) s->state_flags |= PFSTATE_SLOPPY; s->log = r->log & PF_LOG_ALL; s->sync_state = PFSYNC_S_NONE; if (nr != NULL) s->log |= nr->log & PF_LOG_ALL; switch (pd->proto) { case IPPROTO_TCP: s->src.seqlo = ntohl(th->th_seq); s->src.seqhi = s->src.seqlo + pd->p_len + 1; if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_MODULATE) { /* Generate sequence number modulator */ if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 0) s->src.seqdiff = 1; pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(s->src.seqlo + s->src.seqdiff), 0); *rewrite = 1; } else s->src.seqdiff = 0; if (th->th_flags & TH_SYN) { s->src.seqhi++; s->src.wscale = pf_get_wscale(m, off, th->th_off, pd->af); } s->src.max_win = MAX(ntohs(th->th_win), 1); if (s->src.wscale & PF_WSCALE_MASK) { /* Remove scale factor from initial window */ int win = s->src.max_win; win += 1 << (s->src.wscale & PF_WSCALE_MASK); s->src.max_win = (win - 1) >> (s->src.wscale & PF_WSCALE_MASK); } if (th->th_flags & TH_FIN) s->src.seqhi++; s->dst.seqhi = 1; s->dst.max_win = 1; s->src.state = TCPS_SYN_SENT; s->dst.state = TCPS_CLOSED; s->timeout = PFTM_TCP_FIRST_PACKET; break; case IPPROTO_UDP: s->src.state = PFUDPS_SINGLE; s->dst.state = PFUDPS_NO_TRAFFIC; s->timeout = PFTM_UDP_FIRST_PACKET; break; case IPPROTO_ICMP: #ifdef INET6 case IPPROTO_ICMPV6: #endif s->timeout = PFTM_ICMP_FIRST_PACKET; break; default: s->src.state = PFOTHERS_SINGLE; s->dst.state = PFOTHERS_NO_TRAFFIC; s->timeout = PFTM_OTHER_FIRST_PACKET; } if (r->rt && r->rt != PF_FASTROUTE) { if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) { REASON_SET(&reason, PFRES_MAPFAILED); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); uma_zfree(V_pf_state_z, s); goto csfailed; } s->rt_kif = r->rpool.cur->kif; } s->creation = time_uptime; s->expire = time_uptime; if (sn != NULL) s->src_node = sn; if (nsn != NULL) { /* XXX We only modify one side for now. */ PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); s->nat_src_node = nsn; } if (pd->proto == IPPROTO_TCP) { if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, off, pd, th, &s->src, &s->dst)) { REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); uma_zfree(V_pf_state_z, s); return (PF_DROP); } if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, &s->src, &s->dst, rewrite)) { /* This really shouldn't happen!!! */ DPFPRINTF(PF_DEBUG_URGENT, ("pf_normalize_tcp_stateful failed on first pkt")); pf_normalize_tcp_cleanup(s); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); uma_zfree(V_pf_state_z, s); return (PF_DROP); } } s->direction = pd->dir; /* * sk/nk could already been setup by pf_get_translation(). */ if (nr == NULL) { KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p", __func__, nr, sk, nk)); sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport); if (sk == NULL) goto csfailed; nk = sk; } else KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p", __func__, nr, sk, nk)); /* Swap sk/nk for PF_OUT. */ if (pf_state_insert(BOUND_IFACE(r, kif), (pd->dir == PF_IN) ? sk : nk, (pd->dir == PF_IN) ? nk : sk, s)) { if (pd->proto == IPPROTO_TCP) pf_normalize_tcp_cleanup(s); REASON_SET(&reason, PFRES_STATEINS); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); uma_zfree(V_pf_state_z, s); return (PF_DROP); } else *sm = s; if (tag > 0) s->tag = tag; if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { s->src.state = PF_TCPS_PROXY_SRC; /* undo NAT changes, if they have taken place */ if (nr != NULL) { struct pf_state_key *skt = s->key[PF_SK_WIRE]; if (pd->dir == PF_OUT) skt = s->key[PF_SK_STACK]; PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); if (pd->sport) *pd->sport = skt->port[pd->sidx]; if (pd->dport) *pd->dport = skt->port[pd->didx]; if (pd->proto_sum) *pd->proto_sum = bproto_sum; if (pd->ip_sum) *pd->ip_sum = bip_sum; m_copyback(m, off, hdrlen, pd->hdr.any); } s->src.seqhi = htonl(arc4random()); /* Find mss option */ int rtid = M_GETFIB(m); mss = pf_get_mss(m, off, th->th_off, pd->af); mss = pf_calc_mss(pd->src, pd->af, rtid, mss); mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); s->src.mss = mss; pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL); REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } return (PF_PASS); csfailed: if (sk != NULL) uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) uma_zfree(V_pf_state_key_z, nk); if (sn != NULL) { struct pf_srchash *sh; sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)]; PF_HASHROW_LOCK(sh); if (--sn->states == 0 && sn->expire == 0) { pf_unlink_src_node(sn); uma_zfree(V_pf_sources_z, sn); counter_u64_add( V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); } PF_HASHROW_UNLOCK(sh); } if (nsn != sn && nsn != NULL) { struct pf_srchash *sh; sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)]; PF_HASHROW_LOCK(sh); if (--nsn->states == 0 && nsn->expire == 0) { pf_unlink_src_node(nsn); uma_zfree(V_pf_sources_z, nsn); counter_u64_add( V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); } PF_HASHROW_UNLOCK(sh); } return (PF_DROP); } static int pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm) { struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; sa_family_t af = pd->af; u_short reason; int tag = -1; int asd = 0; int match = 0; struct pf_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; PF_RULES_RASSERT(); r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); while (r != NULL) { r->evaluations++; if (pfi_kif_match(r->kif, kif) == r->ifnot) r = r->skip[PF_SKIP_IFP].ptr; else if (r->direction && r->direction != direction) r = r->skip[PF_SKIP_DIR].ptr; else if (r->af && r->af != af) r = r->skip[PF_SKIP_AF].ptr; else if (r->proto && r->proto != pd->proto) r = r->skip[PF_SKIP_PROTO].ptr; else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, r->src.neg, kif, M_GETFIB(m))) r = r->skip[PF_SKIP_SRC_ADDR].ptr; else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, r->dst.neg, NULL, M_GETFIB(m))) r = r->skip[PF_SKIP_DST_ADDR].ptr; else if (r->tos && !(r->tos == pd->tos)) r = TAILQ_NEXT(r, entries); else if (r->os_fingerprint != PF_OSFP_ANY) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_UDP && (r->src.port_op || r->dst.port_op)) r = TAILQ_NEXT(r, entries); else if (pd->proto == IPPROTO_TCP && (r->src.port_op || r->dst.port_op || r->flagset)) r = TAILQ_NEXT(r, entries); else if ((pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) && (r->type || r->code)) r = TAILQ_NEXT(r, entries); else if (r->prob && r->prob <= (arc4random() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, &tag, pd->pf_mtag ? pd->pf_mtag->tag : 0)) r = TAILQ_NEXT(r, entries); else { if (r->anchor == NULL) { match = 1; *rm = r; *am = a; *rsm = ruleset; if ((*rm)->quick) break; r = TAILQ_NEXT(r, entries); } else pf_step_into_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match); } if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd, &ruleset, PF_RULESET_FILTER, &r, &a, &match)) break; } r = *rm; a = *am; ruleset = *rsm; REASON_SET(&reason, PFRES_MATCH); if (r->log) PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd, 1); if (r->action != PF_PASS) return (PF_DROP); if (tag > 0 && pf_tag_packet(m, pd, tag)) { REASON_SET(&reason, PFRES_MEMORY); return (PF_DROP); } return (PF_PASS); } static int pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd, u_short *reason, int *copyback) { struct tcphdr *th = pd->hdr.tcp; u_int16_t win = ntohs(th->th_win); u_int32_t ack, end, seq, orig_seq; u_int8_t sws, dws; int ackskew; if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { sws = src->wscale & PF_WSCALE_MASK; dws = dst->wscale & PF_WSCALE_MASK; } else sws = dws = 0; /* * Sequence tracking algorithm from Guido van Rooij's paper: * http://www.madison-gurkha.com/publications/tcp_filtering/ * tcp_filtering.ps */ orig_seq = seq = ntohl(th->th_seq); if (src->seqlo == 0) { /* First packet from this end. Set its state */ if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && src->scrub == NULL) { if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { REASON_SET(reason, PFRES_MEMORY); return (PF_DROP); } } /* Deferred generation of sequence number modulator */ if (dst->seqdiff && !src->seqdiff) { /* use random iss for the TCP server */ while ((src->seqdiff = arc4random() - seq) == 0) ; ack = ntohl(th->th_ack) - dst->seqdiff; pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0); *copyback = 1; } else { ack = ntohl(th->th_ack); } end = seq + pd->p_len; if (th->th_flags & TH_SYN) { end++; if (dst->wscale & PF_WSCALE_FLAG) { src->wscale = pf_get_wscale(m, off, th->th_off, pd->af); if (src->wscale & PF_WSCALE_FLAG) { /* Remove scale factor from initial * window */ sws = src->wscale & PF_WSCALE_MASK; win = ((u_int32_t)win + (1 << sws) - 1) >> sws; dws = dst->wscale & PF_WSCALE_MASK; } else { /* fixup other window */ dst->max_win <<= dst->wscale & PF_WSCALE_MASK; /* in case of a retrans SYN|ACK */ dst->wscale = 0; } } } if (th->th_flags & TH_FIN) end++; src->seqlo = seq; if (src->state < TCPS_SYN_SENT) src->state = TCPS_SYN_SENT; /* * May need to slide the window (seqhi may have been set by * the crappy stack check or if we picked up the connection * after establishment) */ if (src->seqhi == 1 || SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) src->seqhi = end + MAX(1, dst->max_win << dws); if (win > src->max_win) src->max_win = win; } else { ack = ntohl(th->th_ack) - dst->seqdiff; if (src->seqdiff) { /* Modulate sequence numbers */ pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0); *copyback = 1; } end = seq + pd->p_len; if (th->th_flags & TH_SYN) end++; if (th->th_flags & TH_FIN) end++; } if ((th->th_flags & TH_ACK) == 0) { /* Let it pass through the ack skew check */ ack = dst->seqlo; } else if ((ack == 0 && (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || /* broken tcp stacks do not set ack */ (dst->state < TCPS_SYN_SENT)) { /* * Many stacks (ours included) will set the ACK number in an * FIN|ACK if the SYN times out -- no sequence to ACK. */ ack = dst->seqlo; } if (seq == end) { /* Ease sequencing restrictions on no data packets */ seq = src->seqlo; end = seq; } ackskew = dst->seqlo - ack; /* * Need to demodulate the sequence numbers in any TCP SACK options * (Selective ACK). We could optionally validate the SACK values * against the current ACK window, either forwards or backwards, but * I'm not confident that SACK has been implemented properly * everywhere. It wouldn't surprise me if several stacks accidently * SACK too far backwards of previously ACKed data. There really aren't * any security implications of bad SACKing unless the target stack * doesn't validate the option length correctly. Someone trying to * spoof into a TCP connection won't bother blindly sending SACK * options anyway. */ if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { if (pf_modulate_sack(m, off, pd, th, dst)) *copyback = 1; } #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ if (SEQ_GEQ(src->seqhi, end) && /* Last octet inside other's window space */ SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && /* Retrans: not more than one window back */ (ackskew >= -MAXACKWINDOW) && /* Acking not more than one reassembled fragment backwards */ (ackskew <= (MAXACKWINDOW << sws)) && /* Acking not more than one window forward */ ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || (pd->flags & PFDESC_IP_REAS) == 0)) { /* Require an exact/+1 sequence match on resets when possible */ if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, *state, src, dst, copyback)) return (PF_DROP); } /* update max window */ if (src->max_win < win) src->max_win = win; /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) src->seqlo = end; /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) dst->seqhi = ack + MAX((win << sws), 1); /* update states */ if (th->th_flags & TH_SYN) if (src->state < TCPS_SYN_SENT) src->state = TCPS_SYN_SENT; if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) src->state = TCPS_CLOSING; if (th->th_flags & TH_ACK) { if (dst->state == TCPS_SYN_SENT) { dst->state = TCPS_ESTABLISHED; if (src->state == TCPS_ESTABLISHED && (*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } } else if (dst->state == TCPS_CLOSING) dst->state = TCPS_FIN_WAIT_2; } if (th->th_flags & TH_RST) src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; else if (src->state >= TCPS_CLOSING && dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) (*state)->timeout = PFTM_TCP_OPENING; else if (src->state >= TCPS_CLOSING || dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_CLOSING; else (*state)->timeout = PFTM_TCP_ESTABLISHED; /* Fall through to PASS packet */ } else if ((dst->state < TCPS_SYN_SENT || dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) && SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && /* Within a window forward of the originating packet */ SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { /* Within a window backward of the originating packet */ /* * This currently handles three situations: * 1) Stupid stacks will shotgun SYNs before their peer * replies. * 2) When PF catches an already established stream (the * firewall rebooted, the state table was flushed, routes * changed...) * 3) Packets get funky immediately after the connection * closes (this should catch Solaris spurious ACK|FINs * that web servers like to spew after a close) * * This must be a little more careful than the above code * since packet floods will also be caught here. We don't * update the TTL here to mitigate the damage of a packet * flood and so the same code can handle awkward establishment * and a loosened connection close. * In the establishment case, a correct peer response will * validate the connection, go through the normal state code * and keep updating the state TTL. */ if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: loose state match: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); } if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, *state, src, dst, copyback)) return (PF_DROP); } /* update max window */ if (src->max_win < win) src->max_win = win; /* synchronize sequencing */ if (SEQ_GT(end, src->seqlo)) src->seqlo = end; /* slide the window of what the other end can send */ if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) dst->seqhi = ack + MAX((win << sws), 1); /* * Cannot set dst->seqhi here since this could be a shotgunned * SYN and not an already established connection. */ if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) src->state = TCPS_CLOSING; if (th->th_flags & TH_RST) src->state = dst->state = TCPS_TIME_WAIT; /* Fall through to PASS packet */ } else { if ((*state)->dst.state == TCPS_SYN_SENT && (*state)->src.state == TCPS_SYN_SENT) { /* Send RST for state mismatches during handshake */ if (!(th->th_flags & TH_RST)) pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), 0, TH_RST, 0, 0, (*state)->rule.ptr->return_ttl, 1, 0, kif->pfik_ifp); src->seqlo = 0; src->seqhi = 1; src->max_win = 1; } else if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, (unsigned long long)(*state)->packets[0], (unsigned long long)(*state)->packets[1], pd->dir == PF_IN ? "in" : "out", pd->dir == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", SEQ_GEQ(src->seqhi, end) ? ' ' : '1', SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? ' ': '2', (ackskew >= -MAXACKWINDOW) ? ' ' : '3', (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } return (PF_PASS); } static int pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, struct pf_state **state, struct pf_pdesc *pd, u_short *reason) { struct tcphdr *th = pd->hdr.tcp; if (th->th_flags & TH_SYN) if (src->state < TCPS_SYN_SENT) src->state = TCPS_SYN_SENT; if (th->th_flags & TH_FIN) if (src->state < TCPS_CLOSING) src->state = TCPS_CLOSING; if (th->th_flags & TH_ACK) { if (dst->state == TCPS_SYN_SENT) { dst->state = TCPS_ESTABLISHED; if (src->state == TCPS_ESTABLISHED && (*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } } else if (dst->state == TCPS_CLOSING) { dst->state = TCPS_FIN_WAIT_2; } else if (src->state == TCPS_SYN_SENT && dst->state < TCPS_SYN_SENT) { /* * Handle a special sloppy case where we only see one * half of the connection. If there is a ACK after * the initial SYN without ever seeing a packet from * the destination, set the connection to established. */ dst->state = src->state = TCPS_ESTABLISHED; if ((*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } } else if (src->state == TCPS_CLOSING && dst->state == TCPS_ESTABLISHED && dst->seqlo == 0) { /* * Handle the closing of half connections where we * don't see the full bidirectional FIN/ACK+ACK * handshake. */ dst->state = TCPS_CLOSING; } } if (th->th_flags & TH_RST) src->state = dst->state = TCPS_TIME_WAIT; /* update expire time */ (*state)->expire = time_uptime; if (src->state >= TCPS_FIN_WAIT_2 && dst->state >= TCPS_FIN_WAIT_2) (*state)->timeout = PFTM_TCP_CLOSED; else if (src->state >= TCPS_CLOSING && dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_FIN_WAIT; else if (src->state < TCPS_ESTABLISHED || dst->state < TCPS_ESTABLISHED) (*state)->timeout = PFTM_TCP_OPENING; else if (src->state >= TCPS_CLOSING || dst->state >= TCPS_CLOSING) (*state)->timeout = PFTM_TCP_CLOSING; else (*state)->timeout = PFTM_TCP_ESTABLISHED; return (PF_PASS); } static int pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_state_key_cmp key; struct tcphdr *th = pd->hdr.tcp; int copyback = 0; struct pf_state_peer *src, *dst; struct pf_state_key *sk; bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_TCP; if (direction == PF_IN) { /* wire side, straight */ PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); key.port[0] = th->th_sport; key.port[1] = th->th_dport; } else { /* stack side, reverse */ PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); key.port[1] = th->th_sport; key.port[0] = th->th_dport; } STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { src = &(*state)->dst; dst = &(*state)->src; } sk = (*state)->key[pd->didx]; if ((*state)->src.state == PF_TCPS_PROXY_SRC) { if (direction != (*state)->direction) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } if (th->th_flags & TH_SYN) { if (ntohl(th->th_seq) != (*state)->src.seqlo) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, (*state)->src.seqhi, ntohl(th->th_seq) + 1, TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (!(th->th_flags & TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else if ((*state)->src_node != NULL && pf_src_connlimit(state)) { REASON_SET(reason, PFRES_SRCLIMIT); return (PF_DROP); } else (*state)->src.state = PF_TCPS_PROXY_DST; } if ((*state)->src.state == PF_TCPS_PROXY_DST) { if (direction == (*state)->direction) { if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } (*state)->src.max_win = MAX(ntohs(th->th_win), 1); if ((*state)->dst.seqhi == 1) (*state)->dst.seqhi = htonl(arc4random()); pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->dst.seqhi, 0, TH_SYN, 0, (*state)->src.mss, 0, 0, (*state)->tag, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } else if (((th->th_flags & (TH_SYN|TH_ACK)) != (TH_SYN|TH_ACK)) || (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { REASON_SET(reason, PFRES_SYNPROXY); return (PF_DROP); } else { (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); (*state)->dst.seqlo = ntohl(th->th_seq); pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst, pd->src, th->th_dport, th->th_sport, ntohl(th->th_ack), ntohl(th->th_seq) + 1, TH_ACK, (*state)->src.max_win, 0, 0, 0, (*state)->tag, NULL); pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, &sk->addr[pd->sidx], &sk->addr[pd->didx], sk->port[pd->sidx], sk->port[pd->didx], (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL); (*state)->src.seqdiff = (*state)->dst.seqhi - (*state)->src.seqlo; (*state)->dst.seqdiff = (*state)->src.seqhi - (*state)->dst.seqlo; (*state)->src.seqhi = (*state)->src.seqlo + (*state)->dst.max_win; (*state)->dst.seqhi = (*state)->dst.seqlo + (*state)->src.max_win; (*state)->src.wscale = (*state)->dst.wscale = 0; (*state)->src.state = (*state)->dst.state = TCPS_ESTABLISHED; REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } } if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && dst->state >= TCPS_FIN_WAIT_2 && src->state >= TCPS_FIN_WAIT_2) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: state reuse "); pf_print_state(*state); pf_print_flags(th->th_flags); printf("\n"); } /* XXX make sure it's the same direction ?? */ (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; pf_unlink_state(*state, PF_ENTER_LOCKED); *state = NULL; return (PF_DROP); } if ((*state)->state_flags & PFSTATE_SLOPPY) { if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP) return (PF_DROP); } else { if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason, ©back) == PF_DROP) return (PF_DROP); } /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || nk->port[pd->sidx] != th->th_sport) pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 0, pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || nk->port[pd->didx] != th->th_dport) pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum, &th->th_sum, &nk->addr[pd->didx], nk->port[pd->didx], 0, pd->af); copyback = 1; } /* Copyback sequence modulation or stateful scrub changes if needed */ if (copyback) m_copyback(m, off, sizeof(*th), (caddr_t)th); return (PF_PASS); } static int pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; struct udphdr *uh = pd->hdr.udp; bzero(&key, sizeof(key)); key.af = pd->af; key.proto = IPPROTO_UDP; if (direction == PF_IN) { /* wire side, straight */ PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); key.port[0] = uh->uh_sport; key.port[1] = uh->uh_dport; } else { /* stack side, reverse */ PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); key.port[1] = uh->uh_sport; key.port[0] = uh->uh_dport; } STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { src = &(*state)->dst; dst = &(*state)->src; } /* update states */ if (src->state < PFUDPS_SINGLE) src->state = PFUDPS_SINGLE; if (dst->state == PFUDPS_SINGLE) dst->state = PFUDPS_MULTIPLE; /* update expire time */ (*state)->expire = time_uptime; if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) (*state)->timeout = PFTM_UDP_MULTIPLE; else (*state)->timeout = PFTM_UDP_SINGLE; /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || nk->port[pd->sidx] != uh->uh_sport) pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->sidx], nk->port[pd->sidx], 1, pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || nk->port[pd->didx] != uh->uh_dport) pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum, &uh->uh_sum, &nk->addr[pd->didx], nk->port[pd->didx], 1, pd->af); m_copyback(m, off, sizeof(*uh), (caddr_t)uh); } return (PF_PASS); } static int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { struct pf_addr *saddr = pd->src, *daddr = pd->dst; u_int16_t icmpid = 0, *icmpsum; u_int8_t icmptype; int state_icmp = 0; struct pf_state_key_cmp key; bzero(&key, sizeof(key)); switch (pd->proto) { #ifdef INET case IPPROTO_ICMP: icmptype = pd->hdr.icmp->icmp_type; icmpid = pd->hdr.icmp->icmp_id; icmpsum = &pd->hdr.icmp->icmp_cksum; if (icmptype == ICMP_UNREACH || icmptype == ICMP_SOURCEQUENCH || icmptype == ICMP_REDIRECT || icmptype == ICMP_TIMXCEED || icmptype == ICMP_PARAMPROB) state_icmp++; break; #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: icmptype = pd->hdr.icmp6->icmp6_type; icmpid = pd->hdr.icmp6->icmp6_id; icmpsum = &pd->hdr.icmp6->icmp6_cksum; if (icmptype == ICMP6_DST_UNREACH || icmptype == ICMP6_PACKET_TOO_BIG || icmptype == ICMP6_TIME_EXCEEDED || icmptype == ICMP6_PARAM_PROB) state_icmp++; break; #endif /* INET6 */ } if (!state_icmp) { /* * ICMP query/reply message not related to a TCP/UDP packet. * Search for an ICMP state. */ key.af = pd->af; key.proto = pd->proto; key.port[0] = key.port[1] = icmpid; if (direction == PF_IN) { /* wire side, straight */ PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); } else { /* stack side, reverse */ PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); } STATE_LOOKUP(kif, &key, direction, *state, pd); (*state)->expire = time_uptime; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; switch (pd->af) { #ifdef INET case AF_INET: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); if (nk->port[0] != pd->hdr.icmp->icmp_id) { pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, nk->port[pd->sidx], 0); pd->hdr.icmp->icmp_id = nk->port[pd->sidx]; } m_copyback(m, off, ICMP_MINLEN, (caddr_t )pd->hdr.icmp); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET6)) pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, &nk->addr[pd->sidx], 0); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, &nk->addr[pd->didx], 0); m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )pd->hdr.icmp6); break; #endif /* INET6 */ } } return (PF_PASS); } else { /* * ICMP error message in response to a TCP/UDP packet. * Extract the inner TCP/UDP header and search for that state. */ struct pf_pdesc pd2; bzero(&pd2, sizeof pd2); #ifdef INET struct ip h2; #endif /* INET */ #ifdef INET6 struct ip6_hdr h2_6; int terminal = 0; #endif /* INET6 */ int ipoff2 = 0; int off2 = 0; pd2.af = pd->af; /* Payload packet is from the opposite direction. */ pd2.sidx = (direction == PF_IN) ? 1 : 0; pd2.didx = (direction == PF_IN) ? 0 : 1; switch (pd->af) { #ifdef INET case AF_INET: /* offset of h2 in mbuf chain */ ipoff2 = off + ICMP_MINLEN; if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip)\n")); return (PF_DROP); } /* * ICMP error messages don't refer to non-first * fragments */ if (h2.ip_off & htons(IP_OFFMASK)) { REASON_SET(reason, PFRES_FRAG); return (PF_DROP); } /* offset of protocol header that follows h2 */ off2 = ipoff2 + (h2.ip_hl << 2); pd2.proto = h2.ip_p; pd2.src = (struct pf_addr *)&h2.ip_src; pd2.dst = (struct pf_addr *)&h2.ip_dst; pd2.ip_sum = &h2.ip_sum; break; #endif /* INET */ #ifdef INET6 case AF_INET6: ipoff2 = off + sizeof(struct icmp6_hdr); if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(ip6)\n")); return (PF_DROP); } pd2.proto = h2_6.ip6_nxt; pd2.src = (struct pf_addr *)&h2_6.ip6_src; pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; pd2.ip_sum = NULL; off2 = ipoff2 + sizeof(h2_6); do { switch (pd2.proto) { case IPPROTO_FRAGMENT: /* * ICMPv6 error messages for * non-first fragments */ REASON_SET(reason, PFRES_FRAG); return (PF_DROP); case IPPROTO_AH: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; if (!pf_pull_hdr(m, off2, &opt6, sizeof(opt6), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMPv6 short opt\n")); return (PF_DROP); } if (pd2.proto == IPPROTO_AH) off2 += (opt6.ip6e_len + 2) * 4; else off2 += (opt6.ip6e_len + 1) * 8; pd2.proto = opt6.ip6e_nxt; /* goto the next header */ break; } default: terminal++; break; } } while (!terminal); break; #endif /* INET6 */ } switch (pd2.proto) { case IPPROTO_TCP: { struct tcphdr th; u_int32_t seq; struct pf_state_peer *src, *dst; u_int8_t dws; int copyback = 0; /* * Only the first 8 bytes of the TCP header can be * expected. Don't access any TCP header fields after * th_seq, an ackskew test is not possible. */ if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(tcp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_TCP; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = th.th_sport; key.port[pd2.didx] = th.th_dport; STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->dst; dst = &(*state)->src; } else { src = &(*state)->src; dst = &(*state)->dst; } if (src->wscale && dst->wscale) dws = dst->wscale & PF_WSCALE_MASK; else dws = 0; /* Demodulate sequence number */ seq = ntohl(th.th_seq) - src->seqdiff; if (src->seqdiff) { pf_change_a(&th.th_seq, icmpsum, htonl(seq), 0); copyback = 1; } if (!((*state)->state_flags & PFSTATE_SLOPPY) && (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); printf(" state: "); pf_print_state(*state); printf(" seq=%u\n", seq); } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); } else { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: OK ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); pf_print_host(pd->src, 0, pd->af); printf(" -> "); pf_print_host(pd->dst, 0, pd->af); printf(" state: "); pf_print_state(*state); printf(" seq=%u\n", seq); } } /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != th.th_sport) pf_change_icmp(pd2.src, &th.th_sport, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != th.th_dport) pf_change_icmp(pd2.dst, &th.th_dport, NULL, /* XXX Inbound NAT? */ &nk->addr[pd2.didx], nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); copyback = 1; } if (copyback) { switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, (caddr_t )pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t )&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t )&h2_6); break; #endif /* INET6 */ } m_copyback(m, off2, 8, (caddr_t)&th); } return (PF_PASS); break; } case IPPROTO_UDP: { struct udphdr uh; if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(udp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_UDP; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = uh.uh_sport; key.port[pd2.didx] = uh.uh_dport; STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != uh.uh_sport) pf_change_icmp(pd2.src, &uh.uh_sport, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != uh.uh_dport) pf_change_icmp(pd2.dst, &uh.uh_dport, NULL, /* XXX Inbound NAT? */ &nk->addr[pd2.didx], nk->port[pd2.didx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, (caddr_t )pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t )&h2_6); break; #endif /* INET6 */ } m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); } return (PF_PASS); break; } #ifdef INET case IPPROTO_ICMP: { struct icmp iih; if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short i" "(icmp)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_ICMP; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp_id; STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != iih.icmp_id) pf_change_icmp(pd2.src, &iih.icmp_id, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != iih.icmp_id) pf_change_icmp(pd2.dst, &iih.icmp_id, NULL, /* XXX Inbound NAT? */ &nk->addr[pd2.didx], nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); } return (PF_PASS); break; } #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: { struct icmp6_hdr iih; if (!pf_pull_hdr(m, off2, &iih, sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: ICMP error message too short " "(icmp6)\n")); return (PF_DROP); } key.af = pd2.af; key.proto = IPPROTO_ICMPV6; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = iih.icmp6_id; STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af) || nk->port[pd2.sidx] != iih.icmp6_id) pf_change_icmp(pd2.src, &iih.icmp6_id, daddr, &nk->addr[pd2.sidx], nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af) || nk->port[pd2.didx] != iih.icmp6_id) pf_change_icmp(pd2.dst, &iih.icmp6_id, NULL, /* XXX Inbound NAT? */ &nk->addr[pd2.didx], nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t)pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), (caddr_t)&iih); } return (PF_PASS); break; } #endif /* INET6 */ default: { key.af = pd2.af; key.proto = pd2.proto; PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; STATE_LOOKUP(kif, &key, direction, *state, pd); /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; if (PF_ANEQ(pd2.src, &nk->addr[pd2.sidx], pd2.af)) pf_change_icmp(pd2.src, NULL, daddr, &nk->addr[pd2.sidx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], pd2.af)) pf_change_icmp(pd2.src, NULL, NULL, /* XXX Inbound NAT? */ &nk->addr[pd2.didx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); switch (pd2.af) { #ifdef INET case AF_INET: m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); break; #endif /* INET */ #ifdef INET6 case AF_INET6: m_copyback(m, off, sizeof(struct icmp6_hdr), (caddr_t )pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t )&h2_6); break; #endif /* INET6 */ } } return (PF_PASS); break; } } } } static int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { PF_ACPY(&key.addr[0], pd->src, key.af); PF_ACPY(&key.addr[1], pd->dst, key.af); key.port[0] = key.port[1] = 0; } else { PF_ACPY(&key.addr[1], pd->src, key.af); PF_ACPY(&key.addr[0], pd->dst, key.af); key.port[1] = key.port[0] = 0; } STATE_LOOKUP(kif, &key, direction, *state, pd); if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { src = &(*state)->dst; dst = &(*state)->src; } /* update states */ if (src->state < PFOTHERS_SINGLE) src->state = PFOTHERS_SINGLE; if (dst->state == PFOTHERS_SINGLE) dst->state = PFOTHERS_MULTIPLE; /* update expire time */ (*state)->expire = time_uptime; if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) (*state)->timeout = PFTM_OTHER_MULTIPLE; else (*state)->timeout = PFTM_OTHER_SINGLE; /* translate source/destination address, if necessary */ if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { struct pf_state_key *nk = (*state)->key[pd->didx]; KASSERT(nk, ("%s: nk is null", __func__)); KASSERT(pd, ("%s: pd is null", __func__)); KASSERT(pd->src, ("%s: pd->src is null", __func__)); KASSERT(pd->dst, ("%s: pd->dst is null", __func__)); switch (pd->af) { #ifdef INET case AF_INET: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); #endif /* INET6 */ } } return (PF_PASS); } /* * ipoff and off are measured from the start of the mbuf chain. * h must be at "ipoff" on the mbuf chain. */ void * pf_pull_hdr(struct mbuf *m, int off, void *p, int len, u_short *actionp, u_short *reasonp, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: { struct ip *h = mtod(m, struct ip *); u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; if (fragoff) { if (fragoff >= len) ACTION_SET(actionp, PF_PASS); else { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_FRAG); } return (NULL); } if (m->m_pkthdr.len < off + len || ntohs(h->ip_len) < off + len) { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } break; } #endif /* INET */ #ifdef INET6 case AF_INET6: { struct ip6_hdr *h = mtod(m, struct ip6_hdr *); if (m->m_pkthdr.len < off + len || (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < (unsigned)(off + len)) { ACTION_SET(actionp, PF_DROP); REASON_SET(reasonp, PFRES_SHORT); return (NULL); } break; } #endif /* INET6 */ } m_copydata(m, off, len, p); return (p); } int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, int rtableid) { #ifdef RADIX_MPATH struct radix_node_head *rnh; #endif struct sockaddr_in *dst; int ret = 1; int check_mpath; #ifdef INET6 struct sockaddr_in6 *dst6; struct route_in6 ro; #else struct route ro; #endif struct radix_node *rn; struct rtentry *rt; struct ifnet *ifp; check_mpath = 0; #ifdef RADIX_MPATH /* XXX: stick to table 0 for now */ rnh = rt_tables_get_rnh(0, af); if (rnh != NULL && rn_mpath_capable(rnh)) check_mpath = 1; #endif bzero(&ro, sizeof(ro)); switch (af) { case AF_INET: dst = satosin(&ro.ro_dst); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = addr->v4; break; #ifdef INET6 case AF_INET6: /* * Skip check for addresses with embedded interface scope, * as they would always match anyway. */ if (IN6_IS_SCOPE_EMBED(&addr->v6)) goto out; dst6 = (struct sockaddr_in6 *)&ro.ro_dst; dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = addr->v6; break; #endif /* INET6 */ default: return (0); } /* Skip checks for ipsec interfaces */ if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) goto out; switch (af) { #ifdef INET6 case AF_INET6: in6_rtalloc_ign(&ro, 0, rtableid); break; #endif #ifdef INET case AF_INET: in_rtalloc_ign((struct route *)&ro, 0, rtableid); break; #endif } if (ro.ro_rt != NULL) { /* No interface given, this is a no-route check */ if (kif == NULL) goto out; if (kif->pfik_ifp == NULL) { ret = 0; goto out; } /* Perform uRPF check if passed input interface */ ret = 0; rn = (struct radix_node *)ro.ro_rt; do { rt = (struct rtentry *)rn; ifp = rt->rt_ifp; if (kif->pfik_ifp == ifp) ret = 1; #ifdef RADIX_MPATH rn = rn_mpath_next(rn); #endif } while (check_mpath == 1 && rn != NULL && ret == 0); } else ret = 0; out: if (ro.ro_rt != NULL) RTFREE(ro.ro_rt); return (ret); } #ifdef INET static void pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; struct sockaddr_in dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; uint16_t ip_len, ip_off; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", __func__)); if ((pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; goto bad_locked; } if (r->rt == PF_DUPTO) { if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { if (s) PF_STATE_UNLOCK(s); return; } } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { if (s) PF_STATE_UNLOCK(s); return; } m0 = *m; } ip = mtod(m0, struct ip *); bzero(&dst, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_len = sizeof(dst); dst.sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { struct rtentry *rt; if (s) PF_STATE_UNLOCK(s); rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0)); if (rt == NULL) { KMOD_IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; } ifp = rt->rt_ifp; counter_u64_add(rt->rt_pksent, 1); if (rt->rt_flags & RTF_GATEWAY) bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst)); RTFREE_LOCKED(rt); } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET)) dst.sin_addr.s_addr = naddr.v4.s_addr; ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET)) dst.sin_addr.s_addr = s->rt_addr.v4.s_addr; ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; PF_STATE_UNLOCK(s); } } if (ifp == NULL) goto bad; if (oifp != ifp) { if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: m0->m_len < sizeof(struct ip)\n", __func__)); goto bad; } ip = mtod(m0, struct ip *); } if (ifp->if_flags & IFF_LOOPBACK) m0->m_flags |= M_SKIP_FIREWALL; ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); /* Copied from FreeBSD 10.0-CURRENT ip_output. */ m0->m_pkthdr.csum_flags |= CSUM_IP; if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { in_delayed_cksum(m0); m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #ifdef SCTP if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif /* * If small enough for interface, or the interface will take * care of the fragmentation for us, we can just send directly. */ if (ip_len <= ifp->if_mtu || (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { ip->ip_sum = 0; if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); m0->m_pkthdr.csum_flags &= ~CSUM_IP; } m_clrprotoflags(m0); /* Avoid confusing lower layers. */ error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); goto done; } /* Balk when DF bit is set or the interface didn't support TSO. */ if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); if (r->rt != PF_DUPTO) { icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); goto done; } else goto bad; } error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist); if (error) goto bad; for (; m0; m0 = m1) { m1 = m0->m_nextpkt; m0->m_nextpkt = NULL; if (error == 0) { m_clrprotoflags(m0); error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); } else m_freem(m0); } if (error == 0) KMOD_IPSTAT_INC(ips_fragmented); done: if (r->rt != PF_DUPTO) *m = NULL; return; bad_locked: if (s) PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; } #endif /* INET */ #ifdef INET6 static void pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; struct sockaddr_in6 dst; struct ip6_hdr *ip6; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", __func__)); if ((pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) || pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; goto bad_locked; } if (r->rt == PF_DUPTO) { if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { if (s) PF_STATE_UNLOCK(s); return; } } else { if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { if (s) PF_STATE_UNLOCK(s); return; } m0 = *m; } ip6 = mtod(m0, struct ip6_hdr *); bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); dst.sin6_addr = ip6->ip6_dst; /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { if (s) PF_STATE_UNLOCK(s); m0->m_flags |= M_SKIP_FIREWALL; ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); *m = NULL; return; } if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, &naddr, AF_INET6); ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET6)) PF_ACPY((struct pf_addr *)&dst.sin6_addr, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } if (s) PF_STATE_UNLOCK(s); if (ifp == NULL) goto bad; if (oifp != ifp) { if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS) goto bad; else if (m0 == NULL) goto done; if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", __func__)); goto bad; } ip6 = mtod(m0, struct ip6_hdr *); } if (ifp->if_flags & IFF_LOOPBACK) m0->m_flags |= M_SKIP_FIREWALL; if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & ~ifp->if_hwassist) { uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6); in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr)); m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } /* * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr)) dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index); if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) - nd6_output_ifp(ifp, ifp, m0, &dst); + nd6_output_ifp(ifp, ifp, m0, &dst, NULL); else { in6_ifstat_inc(ifp, ifs6_in_toobig); if (r->rt != PF_DUPTO) icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); else goto bad; } done: if (r->rt != PF_DUPTO) *m = NULL; return; bad_locked: if (s) PF_STATE_UNLOCK(s); bad: m_freem(m0); goto done; } #endif /* INET6 */ /* * FreeBSD supports cksum offloads for the following drivers. * em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4), * ti(4), txp(4), xl(4) * * CSUM_DATA_VALID | CSUM_PSEUDO_HDR : * network driver performed cksum including pseudo header, need to verify * csum_data * CSUM_DATA_VALID : * network driver performed cksum, needs to additional pseudo header * cksum computation with partial csum_data(i.e. lack of H/W support for * pseudo header, for instance hme(4), sk(4) and possibly gem(4)) * * After validating the cksum of packet, set both flag CSUM_DATA_VALID and * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper * TCP/UDP layer. * Also, set csum_data to 0xffff to force cksum validation. */ static int pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af) { u_int16_t sum = 0; int hw_assist = 0; struct ip *ip; if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) return (1); if (m->m_pkthdr.len < off + len) return (1); switch (p) { case IPPROTO_TCP: if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_TCP)); } sum ^= 0xffff; ++hw_assist; } break; case IPPROTO_UDP: if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { sum = m->m_pkthdr.csum_data; } else { ip = mtod(m, struct ip *); sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + IPPROTO_UDP)); } sum ^= 0xffff; ++hw_assist; } break; case IPPROTO_ICMP: #ifdef INET6 case IPPROTO_ICMPV6: #endif /* INET6 */ break; default: return (1); } if (!hw_assist) { switch (af) { case AF_INET: if (p == IPPROTO_ICMP) { if (m->m_len < off) return (1); m->m_data += off; m->m_len -= off; sum = in_cksum(m, len); m->m_data -= off; m->m_len += off; } else { if (m->m_len < sizeof(struct ip)) return (1); sum = in4_cksum(m, p, off, len); } break; #ifdef INET6 case AF_INET6: if (m->m_len < sizeof(struct ip6_hdr)) return (1); sum = in6_cksum(m, p, off, len); break; #endif /* INET6 */ default: return (1); } } if (sum) { switch (p) { case IPPROTO_TCP: { KMOD_TCPSTAT_INC(tcps_rcvbadsum); break; } case IPPROTO_UDP: { KMOD_UDPSTAT_INC(udps_badsum); break; } #ifdef INET case IPPROTO_ICMP: { KMOD_ICMPSTAT_INC(icps_checksum); break; } #endif #ifdef INET6 case IPPROTO_ICMPV6: { KMOD_ICMP6STAT_INC(icp6s_checksum); break; } #endif /* INET6 */ } return (1); } else { if (p == IPPROTO_TCP || p == IPPROTO_UDP) { m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } } return (0); } #ifdef INET int pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0; struct ip *h = NULL; struct m_tag *ipfwtag; struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, dirndx, pqid = 0; M_ASSERTPKTHDR(m); if (!V_pf_status.running) return (PF_PASS); memset(&pd, 0, sizeof(pd)); kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) return (PF_PASS); if (m->m_flags & M_SKIP_FIREWALL) return (PF_PASS); pd.pf_mtag = pf_find_mtag(m); PF_RULES_RLOCK(); if (ip_divert_ptr != NULL && ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) { struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1); if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; goto done; } pd.pf_mtag->flags |= PF_PACKET_LOOPED; m_tag_delete(m, ipfwtag); } if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) { m->m_flags |= M_FASTFWD_OURS; pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT; } } else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { /* We do IP header normalization and packet reassembly here */ action = PF_DROP; goto done; } m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip *); off = h->ip_hl << 2; if (off < (int)sizeof(struct ip)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; pd.sport = pd.dport = NULL; pd.ip_sum = &h->ip_sum; pd.proto_sum = NULL; pd.proto = h->ip_p; pd.dir = dir; pd.sidx = (dir == PF_IN) ? 0 : 1; pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); /* handle fragments that didn't get reassembled by normalization */ if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { action = pf_test_fragment(&r, dir, kif, m, h, &pd, &a, &ruleset); goto done; } switch (h->ip_p) { case IPPROTO_TCP: { struct tcphdr th; pd.hdr.tcp = &th; if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET)) { log = action != PF_PASS; goto done; } pd.p_len = pd.tot_len - off - (th.th_off << 2); if ((th.th_flags & TH_ACK) && pd.p_len == 0) pqid = 1; action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) goto done; action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } case IPPROTO_UDP: { struct udphdr uh; pd.hdr.udp = &uh; if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET)) { log = action != PF_PASS; goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } case IPPROTO_ICMP: { struct icmp ih; pd.hdr.icmp = &ih; if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, &action, &reason, AF_INET)) { log = action != PF_PASS; goto done; } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } #ifdef INET6 case IPPROTO_ICMPV6: { action = PF_DROP; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping IPv4 packet with ICMPv6 payload\n")); goto done; } #endif default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } done: PF_RULES_RUNLOCK(); if (action == PF_PASS && h->ip_hl > 5 && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = r->log; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping packet with ip options\n")); } if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } if (r->rtableid >= 0) M_SETFIB(m, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } else { if (s != NULL) pd.pf_mtag->qid_hash = pf_state_hash(s); if (pqid || (pd.tos & IPTOS_LOWDELAY)) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* Add hints for ecn. */ pd.pf_mtag->hdr = h; } } #endif /* ALTQ */ /* * connections redirected to loopback should not match sockets * bound specifically to loopback due to security implications, * see tcp_input() and in_pcblookup_listen(). */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) m->m_flags |= M_SKIP_FIREWALL; if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL && !PACKET_LOOPED(&pd)) { ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0, sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); if (ipfwtag != NULL) { ((struct ipfw_rule_ref *)(ipfwtag+1))->info = ntohs(r->divert.port); ((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir; if (s) PF_STATE_UNLOCK(s); m_tag_prepend(m, ipfwtag); if (m->m_flags & M_FASTFWD_OURS) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); log = 1; DPFPRINTF(PF_DEBUG_MISC, ("pf: failed to allocate tag\n")); } else { pd.pf_mtag->flags |= PF_FASTFWD_OURS_PRESENT; m->m_flags &= ~M_FASTFWD_OURS; } } ip_divert_ptr(*m0, dir == PF_IN ? DIR_IN : DIR_OUT); *m0 = NULL; return (action); } else { /* XXX: ipfw has the same behaviour! */ action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); log = 1; DPFPRINTF(PF_DEBUG_MISC, ("pf: failed to allocate divert tag\n")); } } if (log) { struct pf_rule *lr; if (s != NULL && s->nat_rule.ptr != NULL && s->nat_rule.ptr->log & PF_LOG_ALL) lr = s->nat_rule.ptr; else lr = r; PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd, (s == NULL)); } kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { dirndx = (dir == PF_OUT); r->packets[dirndx]++; r->bytes[dirndx] += pd.tot_len; if (a != NULL) { a->packets[dirndx]++; a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { if (s->nat_rule.ptr != NULL) { s->nat_rule.ptr->packets[dirndx]++; s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { s->src_node->packets[dirndx]++; s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { s->nat_src_node->packets[dirndx]++; s->nat_src_node->bytes[dirndx] += pd.tot_len; } dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; if (nr != NULL && r == &V_pf_default_rule) tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, (s == NULL) ? pd.src : &s->key[(s->direction == PF_IN)]-> addr[(s->direction == PF_OUT)], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL) ? pd.dst : &s->key[(s->direction == PF_IN)]-> addr[(s->direction == PF_IN)], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->dst.neg); } switch (action) { case PF_SYNPROXY_DROP: m_freem(*m0); case PF_DEFER: *m0 = NULL; action = PF_PASS; break; case PF_DROP: m_freem(*m0); *m0 = NULL; break; default: /* pf_route() returns unlocked. */ if (r->rt) { pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); return (action); } break; } if (s) PF_STATE_UNLOCK(s); return (action); } #endif /* INET */ #ifdef INET6 int pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp) { struct pfi_kif *kif; u_short action, reason = 0, log = 0; struct mbuf *m = *m0, *n = NULL; struct m_tag *mtag; struct ip6_hdr *h = NULL; struct pf_rule *a = NULL, *r = &V_pf_default_rule, *tr, *nr; struct pf_state *s = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, terminal = 0, dirndx, rh_cnt = 0; int fwdir = dir; M_ASSERTPKTHDR(m); /* Detect packet forwarding. * If the input interface is different from the output interface we're * forwarding. * We do need to be careful about bridges. If the * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a * bridge, so if the input interface is a bridge member and the output * interface is its bridge we're not actually forwarding but bridging. */ if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif && (m->m_pkthdr.rcvif->if_bridge == NULL || m->m_pkthdr.rcvif->if_bridge != ifp->if_softc)) fwdir = PF_FWD; if (!V_pf_status.running) return (PF_PASS); memset(&pd, 0, sizeof(pd)); pd.pf_mtag = pf_find_mtag(m); if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED) return (PF_PASS); kif = (struct pfi_kif *)ifp->if_pf_kif; if (kif == NULL) { DPFPRINTF(PF_DEBUG_URGENT, ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); return (PF_DROP); } if (kif->pfik_flags & PFI_IFLAG_SKIP) return (PF_PASS); if (m->m_flags & M_SKIP_FIREWALL) return (PF_PASS); PF_RULES_RLOCK(); /* We do IP header normalization and packet reassembly here */ if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { action = PF_DROP; goto done; } m = *m0; /* pf_normalize messes with m0 */ h = mtod(m, struct ip6_hdr *); #if 1 /* * we do not support jumbogram yet. if we keep going, zero ip6_plen * will do something bad, so drop the packet for now. */ if (htons(h->ip6_plen) == 0) { action = PF_DROP; REASON_SET(&reason, PFRES_NORM); /*XXX*/ goto done; } #endif pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; pd.sport = pd.dport = NULL; pd.ip_sum = NULL; pd.proto_sum = NULL; pd.dir = dir; pd.sidx = (dir == PF_IN) ? 0 : 1; pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); pd.proto = h->ip6_nxt; do { switch (pd.proto) { case IPPROTO_FRAGMENT: action = pf_test_fragment(&r, dir, kif, m, h, &pd, &a, &ruleset); if (action == PF_DROP) REASON_SET(&reason, PFRES_FRAG); goto done; case IPPROTO_ROUTING: { struct ip6_rthdr rthdr; if (rh_cnt++) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 more than one rthdr\n")); action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; goto done; } if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, &reason, pd.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 short rthdr\n")); action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); log = 1; goto done; } if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 rthdr0\n")); action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; goto done; } /* FALLTHROUGH */ } case IPPROTO_AH: case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: { /* get next header and header length */ struct ip6_ext opt6; if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), NULL, &reason, pd.af)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: IPv6 short opt\n")); action = PF_DROP; log = 1; goto done; } if (pd.proto == IPPROTO_AH) off += (opt6.ip6e_len + 2) * 4; else off += (opt6.ip6e_len + 1) * 8; pd.proto = opt6.ip6e_nxt; /* goto the next header */ break; } default: terminal++; break; } } while (!terminal); /* if there's no routing header, use unmodified mbuf for checksumming */ if (!n) n = m; switch (pd.proto) { case IPPROTO_TCP: { struct tcphdr th; pd.hdr.tcp = &th; if (!pf_pull_hdr(m, off, &th, sizeof(th), &action, &reason, AF_INET6)) { log = action != PF_PASS; goto done; } pd.p_len = pd.tot_len - off - (th.th_off << 2); action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); if (action == PF_DROP) goto done; action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } case IPPROTO_UDP: { struct udphdr uh; pd.hdr.udp = &uh; if (!pf_pull_hdr(m, off, &uh, sizeof(uh), &action, &reason, AF_INET6)) { log = action != PF_PASS; goto done; } if (uh.uh_dport == 0 || ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { action = PF_DROP; REASON_SET(&reason, PFRES_SHORT); goto done; } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } case IPPROTO_ICMP: { action = PF_DROP; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping IPv6 packet with ICMPv4 payload\n")); goto done; } case IPPROTO_ICMPV6: { struct icmp6_hdr ih; pd.hdr.icmp6 = &ih; if (!pf_pull_hdr(m, off, &ih, sizeof(ih), &action, &reason, AF_INET6)) { log = action != PF_PASS; goto done; } action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { if (pfsync_update_state_ptr != NULL) pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, &pd, &a, &ruleset, inp); break; } done: PF_RULES_RUNLOCK(); if (n != m) { m_freem(n); n = NULL; } /* handle dangerous IPv6 extension headers. */ if (action == PF_PASS && rh_cnt && !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = r->log; DPFPRINTF(PF_DEBUG_MISC, ("pf: dropping packet with dangerous v6 headers\n")); } if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } if (r->rtableid >= 0) M_SETFIB(m, r->rtableid); #ifdef ALTQ if (action == PF_PASS && r->qid) { if (pd.pf_mtag == NULL && ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) { action = PF_DROP; REASON_SET(&reason, PFRES_MEMORY); } else { if (s != NULL) pd.pf_mtag->qid_hash = pf_state_hash(s); if (pd.tos & IPTOS_LOWDELAY) pd.pf_mtag->qid = r->pqid; else pd.pf_mtag->qid = r->qid; /* Add hints for ecn. */ pd.pf_mtag->hdr = h; } } #endif /* ALTQ */ if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && (s->nat_rule.ptr->action == PF_RDR || s->nat_rule.ptr->action == PF_BINAT) && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) m->m_flags |= M_SKIP_FIREWALL; /* XXX: Anybody working on it?! */ if (r->divert.port) printf("pf: divert(9) is not supported for IPv6\n"); if (log) { struct pf_rule *lr; if (s != NULL && s->nat_rule.ptr != NULL && s->nat_rule.ptr->log & PF_LOG_ALL) lr = s->nat_rule.ptr; else lr = r; PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset, &pd, (s == NULL)); } kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; if (action == PF_PASS || r->action == PF_DROP) { dirndx = (dir == PF_OUT); r->packets[dirndx]++; r->bytes[dirndx] += pd.tot_len; if (a != NULL) { a->packets[dirndx]++; a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { if (s->nat_rule.ptr != NULL) { s->nat_rule.ptr->packets[dirndx]++; s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; } if (s->src_node != NULL) { s->src_node->packets[dirndx]++; s->src_node->bytes[dirndx] += pd.tot_len; } if (s->nat_src_node != NULL) { s->nat_src_node->packets[dirndx]++; s->nat_src_node->bytes[dirndx] += pd.tot_len; } dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; if (nr != NULL && r == &V_pf_default_rule) tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->src.addr.p.tbl, (s == NULL) ? pd.src : &s->key[(s->direction == PF_IN)]->addr[0], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) pfr_update_stats(tr->dst.addr.p.tbl, (s == NULL) ? pd.dst : &s->key[(s->direction == PF_IN)]->addr[1], pd.af, pd.tot_len, dir == PF_OUT, r->action == PF_PASS, tr->dst.neg); } switch (action) { case PF_SYNPROXY_DROP: m_freem(*m0); case PF_DEFER: *m0 = NULL; action = PF_PASS; break; case PF_DROP: m_freem(*m0); *m0 = NULL; break; default: /* pf_route6() returns unlocked. */ if (r->rt) { pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); return (action); } break; } if (s) PF_STATE_UNLOCK(s); /* If reassembled packet passed, create new fragments. */ if (action == PF_PASS && *m0 && fwdir == PF_FWD && (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL) action = pf_refragment6(ifp, m0, mtag); return (action); } #endif /* INET6 */ Index: projects/powernv/netsmb/smb_iod.c =================================================================== --- projects/powernv/netsmb/smb_iod.c (revision 290990) +++ projects/powernv/netsmb/smb_iod.c (revision 290991) @@ -1,715 +1,717 @@ /*- * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SMBIOD_SLEEP_TIMO 2 #define SMBIOD_PING_TIMO 60 /* seconds */ #define SMB_IOD_EVLOCKPTR(iod) (&((iod)->iod_evlock)) #define SMB_IOD_EVLOCK(iod) smb_sl_lock(&((iod)->iod_evlock)) #define SMB_IOD_EVUNLOCK(iod) smb_sl_unlock(&((iod)->iod_evlock)) #define SMB_IOD_RQLOCKPTR(iod) (&((iod)->iod_rqlock)) #define SMB_IOD_RQLOCK(iod) smb_sl_lock(&((iod)->iod_rqlock)) #define SMB_IOD_RQUNLOCK(iod) smb_sl_unlock(&((iod)->iod_rqlock)) #define smb_iod_wakeup(iod) wakeup(&(iod)->iod_flags) static MALLOC_DEFINE(M_SMBIOD, "SMBIOD", "SMB network io daemon"); static int smb_iod_next; static int smb_iod_sendall(struct smbiod *iod); static int smb_iod_disconnect(struct smbiod *iod); static void smb_iod_thread(void *); static __inline void smb_iod_rqprocessed(struct smb_rq *rqp, int error) { SMBRQ_SLOCK(rqp); rqp->sr_lerror = error; rqp->sr_rpgen++; rqp->sr_state = SMBRQ_NOTIFIED; wakeup(&rqp->sr_state); SMBRQ_SUNLOCK(rqp); } static void smb_iod_invrq(struct smbiod *iod) { struct smb_rq *rqp; /* * Invalidate all outstanding requests for this connection */ SMB_IOD_RQLOCK(iod); TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) { rqp->sr_flags |= SMBR_RESTART; smb_iod_rqprocessed(rqp, ENOTCONN); } SMB_IOD_RQUNLOCK(iod); } static void smb_iod_closetran(struct smbiod *iod) { struct smb_vc *vcp = iod->iod_vc; struct thread *td = iod->iod_td; if (vcp->vc_tdata == NULL) return; SMB_TRAN_DISCONNECT(vcp, td); SMB_TRAN_DONE(vcp, td); vcp->vc_tdata = NULL; } static void smb_iod_dead(struct smbiod *iod) { iod->iod_state = SMBIOD_ST_DEAD; smb_iod_closetran(iod); smb_iod_invrq(iod); } static int smb_iod_connect(struct smbiod *iod) { struct smb_vc *vcp = iod->iod_vc; struct thread *td = iod->iod_td; int error; SMBIODEBUG("%d\n", iod->iod_state); switch(iod->iod_state) { case SMBIOD_ST_VCACTIVE: SMBERROR("called for already opened connection\n"); return EISCONN; case SMBIOD_ST_DEAD: return ENOTCONN; /* XXX: last error code ? */ default: break; } vcp->vc_genid++; error = 0; error = (int)SMB_TRAN_CREATE(vcp, td); if (error) goto fail; SMBIODEBUG("tcreate\n"); if (vcp->vc_laddr) { error = (int)SMB_TRAN_BIND(vcp, vcp->vc_laddr, td); if (error) goto fail; } SMBIODEBUG("tbind\n"); error = (int)SMB_TRAN_CONNECT(vcp, vcp->vc_paddr, td); if (error) goto fail; SMB_TRAN_SETPARAM(vcp, SMBTP_SELECTID, &iod->iod_flags); iod->iod_state = SMBIOD_ST_TRANACTIVE; SMBIODEBUG("tconnect\n"); /* vcp->vc_mid = 0;*/ error = (int)smb_smb_negotiate(vcp, &iod->iod_scred); if (error) goto fail; SMBIODEBUG("snegotiate\n"); error = (int)smb_smb_ssnsetup(vcp, &iod->iod_scred); if (error) goto fail; iod->iod_state = SMBIOD_ST_VCACTIVE; SMBIODEBUG("completed\n"); smb_iod_invrq(iod); return (0); fail: smb_iod_dead(iod); return (error); } static int smb_iod_disconnect(struct smbiod *iod) { struct smb_vc *vcp = iod->iod_vc; SMBIODEBUG("\n"); if (iod->iod_state == SMBIOD_ST_VCACTIVE) { smb_smb_ssnclose(vcp, &iod->iod_scred); iod->iod_state = SMBIOD_ST_TRANACTIVE; } vcp->vc_smbuid = SMB_UID_UNKNOWN; smb_iod_closetran(iod); iod->iod_state = SMBIOD_ST_NOTCONN; return 0; } static int smb_iod_treeconnect(struct smbiod *iod, struct smb_share *ssp) { int error; if (iod->iod_state != SMBIOD_ST_VCACTIVE) { if (iod->iod_state != SMBIOD_ST_DEAD) return ENOTCONN; iod->iod_state = SMBIOD_ST_RECONNECT; error = smb_iod_connect(iod); if (error) return error; } SMBIODEBUG("tree reconnect\n"); SMBS_ST_LOCK(ssp); ssp->ss_flags |= SMBS_RECONNECTING; SMBS_ST_UNLOCK(ssp); error = smb_smb_treeconnect(ssp, &iod->iod_scred); SMBS_ST_LOCK(ssp); ssp->ss_flags &= ~SMBS_RECONNECTING; SMBS_ST_UNLOCK(ssp); wakeup(&ssp->ss_vcgenid); return error; } static int smb_iod_sendrq(struct smbiod *iod, struct smb_rq *rqp) { struct thread *td = iod->iod_td; struct smb_vc *vcp = iod->iod_vc; struct smb_share *ssp = rqp->sr_share; struct mbuf *m; int error; SMBIODEBUG("iod_state = %d\n", iod->iod_state); switch (iod->iod_state) { case SMBIOD_ST_NOTCONN: smb_iod_rqprocessed(rqp, ENOTCONN); return 0; case SMBIOD_ST_DEAD: iod->iod_state = SMBIOD_ST_RECONNECT; return 0; case SMBIOD_ST_RECONNECT: return 0; default: break; } if (rqp->sr_sendcnt == 0) { #ifdef movedtoanotherplace if (vcp->vc_maxmux != 0 && iod->iod_muxcnt >= vcp->vc_maxmux) return 0; #endif le16enc(rqp->sr_rqtid, ssp ? ssp->ss_tid : SMB_TID_UNKNOWN); le16enc(rqp->sr_rquid, vcp ? vcp->vc_smbuid : 0); mb_fixhdr(&rqp->sr_rq); if (vcp->vc_hflags2 & SMB_FLAGS2_SECURITY_SIGNATURE) smb_rq_sign(rqp); } if (rqp->sr_sendcnt++ > 5) { rqp->sr_flags |= SMBR_RESTART; smb_iod_rqprocessed(rqp, rqp->sr_lerror); /* * If all attempts to send a request failed, then * something is seriously hosed. */ return ENOTCONN; } SMBSDEBUG("M:%04x, P:%04x, U:%04x, T:%04x\n", rqp->sr_mid, 0, 0, 0); m_dumpm(rqp->sr_rq.mb_top); m = m_copym(rqp->sr_rq.mb_top, 0, M_COPYALL, M_WAITOK); error = rqp->sr_lerror = SMB_TRAN_SEND(vcp, m, td); if (error == 0) { getnanotime(&rqp->sr_timesent); iod->iod_lastrqsent = rqp->sr_timesent; rqp->sr_flags |= SMBR_SENT; rqp->sr_state = SMBRQ_SENT; return 0; } /* * Check for fatal errors */ if (SMB_TRAN_FATAL(vcp, error)) { /* * No further attempts should be made */ return ENOTCONN; } if (smb_rq_intr(rqp)) smb_iod_rqprocessed(rqp, EINTR); return 0; } /* * Process incoming packets */ static int smb_iod_recvall(struct smbiod *iod) { struct smb_vc *vcp = iod->iod_vc; struct thread *td = iod->iod_td; struct smb_rq *rqp; struct mbuf *m; u_char *hp; u_short mid; int error; switch (iod->iod_state) { case SMBIOD_ST_NOTCONN: case SMBIOD_ST_DEAD: case SMBIOD_ST_RECONNECT: return 0; default: break; } for (;;) { m = NULL; error = SMB_TRAN_RECV(vcp, &m, td); if (error == EWOULDBLOCK) break; if (SMB_TRAN_FATAL(vcp, error)) { smb_iod_dead(iod); break; } if (error) break; if (m == NULL) { SMBERROR("tran return NULL without error\n"); error = EPIPE; continue; } m = m_pullup(m, SMB_HDRLEN); if (m == NULL) continue; /* wait for a good packet */ /* * Now we got an entire and possibly invalid SMB packet. * Be careful while parsing it. */ m_dumpm(m); hp = mtod(m, u_char*); if (bcmp(hp, SMB_SIGNATURE, SMB_SIGLEN) != 0) { m_freem(m); continue; } mid = SMB_HDRMID(hp); SMBSDEBUG("mid %04x\n", (u_int)mid); SMB_IOD_RQLOCK(iod); TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) { if (rqp->sr_mid != mid) continue; SMBRQ_SLOCK(rqp); if (rqp->sr_rp.md_top == NULL) { md_initm(&rqp->sr_rp, m); } else { if (rqp->sr_flags & SMBR_MULTIPACKET) { md_append_record(&rqp->sr_rp, m); } else { SMBRQ_SUNLOCK(rqp); SMBERROR("duplicate response %d (ignored)\n", mid); break; } } SMBRQ_SUNLOCK(rqp); smb_iod_rqprocessed(rqp, 0); break; } SMB_IOD_RQUNLOCK(iod); if (rqp == NULL) { SMBERROR("drop resp with mid %d\n", (u_int)mid); /* smb_printrqlist(vcp);*/ m_freem(m); } } /* * check for interrupts */ SMB_IOD_RQLOCK(iod); TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) { if (smb_td_intr(rqp->sr_cred->scr_td)) { smb_iod_rqprocessed(rqp, EINTR); } } SMB_IOD_RQUNLOCK(iod); return 0; } int smb_iod_request(struct smbiod *iod, int event, void *ident) { struct smbiod_event *evp; int error; SMBIODEBUG("\n"); evp = smb_zmalloc(sizeof(*evp), M_SMBIOD, M_WAITOK); evp->ev_type = event; evp->ev_ident = ident; SMB_IOD_EVLOCK(iod); STAILQ_INSERT_TAIL(&iod->iod_evlist, evp, ev_link); if ((event & SMBIOD_EV_SYNC) == 0) { SMB_IOD_EVUNLOCK(iod); smb_iod_wakeup(iod); return 0; } smb_iod_wakeup(iod); msleep(evp, SMB_IOD_EVLOCKPTR(iod), PWAIT | PDROP, "90evw", 0); error = evp->ev_error; free(evp, M_SMBIOD); return error; } /* * Place request in the queue. * Request from smbiod have a high priority. */ int smb_iod_addrq(struct smb_rq *rqp) { struct smb_vc *vcp = rqp->sr_vc; struct smbiod *iod = vcp->vc_iod; int error; SMBIODEBUG("\n"); if (rqp->sr_cred->scr_td != NULL && rqp->sr_cred->scr_td->td_proc == iod->iod_p) { rqp->sr_flags |= SMBR_INTERNAL; SMB_IOD_RQLOCK(iod); TAILQ_INSERT_HEAD(&iod->iod_rqlist, rqp, sr_link); SMB_IOD_RQUNLOCK(iod); for (;;) { if (smb_iod_sendrq(iod, rqp) != 0) { smb_iod_dead(iod); break; } /* * we don't need to lock state field here */ if (rqp->sr_state != SMBRQ_NOTSENT) break; tsleep(&iod->iod_flags, PWAIT, "90sndw", hz); } if (rqp->sr_lerror) smb_iod_removerq(rqp); return rqp->sr_lerror; } switch (iod->iod_state) { case SMBIOD_ST_NOTCONN: return ENOTCONN; case SMBIOD_ST_DEAD: error = smb_iod_request(vcp->vc_iod, SMBIOD_EV_CONNECT | SMBIOD_EV_SYNC, NULL); if (error) return error; return EXDEV; default: break; } SMB_IOD_RQLOCK(iod); for (;;) { if (vcp->vc_maxmux == 0) { SMBERROR("maxmux == 0\n"); break; } if (iod->iod_muxcnt < vcp->vc_maxmux) break; iod->iod_muxwant++; msleep(&iod->iod_muxwant, SMB_IOD_RQLOCKPTR(iod), PWAIT, "90mux", 0); } iod->iod_muxcnt++; TAILQ_INSERT_TAIL(&iod->iod_rqlist, rqp, sr_link); SMB_IOD_RQUNLOCK(iod); smb_iod_wakeup(iod); return 0; } int smb_iod_removerq(struct smb_rq *rqp) { struct smb_vc *vcp = rqp->sr_vc; struct smbiod *iod = vcp->vc_iod; SMBIODEBUG("\n"); if (rqp->sr_flags & SMBR_INTERNAL) { SMB_IOD_RQLOCK(iod); TAILQ_REMOVE(&iod->iod_rqlist, rqp, sr_link); SMB_IOD_RQUNLOCK(iod); return 0; } SMB_IOD_RQLOCK(iod); while (rqp->sr_flags & SMBR_XLOCK) { rqp->sr_flags |= SMBR_XLOCKWANT; msleep(rqp, SMB_IOD_RQLOCKPTR(iod), PWAIT, "90xrm", 0); } TAILQ_REMOVE(&iod->iod_rqlist, rqp, sr_link); iod->iod_muxcnt--; if (iod->iod_muxwant) { iod->iod_muxwant--; wakeup(&iod->iod_muxwant); } SMB_IOD_RQUNLOCK(iod); return 0; } int smb_iod_waitrq(struct smb_rq *rqp) { struct smbiod *iod = rqp->sr_vc->vc_iod; int error; SMBIODEBUG("\n"); if (rqp->sr_flags & SMBR_INTERNAL) { for (;;) { smb_iod_sendall(iod); smb_iod_recvall(iod); if (rqp->sr_rpgen != rqp->sr_rplast) break; tsleep(&iod->iod_flags, PWAIT, "90irq", hz); } smb_iod_removerq(rqp); return rqp->sr_lerror; } SMBRQ_SLOCK(rqp); if (rqp->sr_rpgen == rqp->sr_rplast) msleep(&rqp->sr_state, SMBRQ_SLOCKPTR(rqp), PWAIT, "90wrq", 0); rqp->sr_rplast++; SMBRQ_SUNLOCK(rqp); error = rqp->sr_lerror; if (rqp->sr_flags & SMBR_MULTIPACKET) { /* * If request should stay in the list, then reinsert it * at the end of queue so other waiters have chance to concur */ SMB_IOD_RQLOCK(iod); TAILQ_REMOVE(&iod->iod_rqlist, rqp, sr_link); TAILQ_INSERT_TAIL(&iod->iod_rqlist, rqp, sr_link); SMB_IOD_RQUNLOCK(iod); } else smb_iod_removerq(rqp); return error; } static int smb_iod_sendall(struct smbiod *iod) { struct smb_vc *vcp = iod->iod_vc; struct smb_rq *rqp; struct timespec ts, tstimeout; int herror; herror = 0; /* * Loop through the list of requests and send them if possible */ SMB_IOD_RQLOCK(iod); TAILQ_FOREACH(rqp, &iod->iod_rqlist, sr_link) { switch (rqp->sr_state) { case SMBRQ_NOTSENT: rqp->sr_flags |= SMBR_XLOCK; SMB_IOD_RQUNLOCK(iod); herror = smb_iod_sendrq(iod, rqp); SMB_IOD_RQLOCK(iod); rqp->sr_flags &= ~SMBR_XLOCK; if (rqp->sr_flags & SMBR_XLOCKWANT) { rqp->sr_flags &= ~SMBR_XLOCKWANT; wakeup(rqp); } break; case SMBRQ_SENT: SMB_TRAN_GETPARAM(vcp, SMBTP_TIMEOUT, &tstimeout); timespecadd(&tstimeout, &tstimeout); getnanotime(&ts); timespecsub(&ts, &tstimeout); if (timespeccmp(&ts, &rqp->sr_timesent, >)) { smb_iod_rqprocessed(rqp, ETIMEDOUT); } break; default: break; } if (herror) break; } SMB_IOD_RQUNLOCK(iod); if (herror == ENOTCONN) smb_iod_dead(iod); return 0; } /* * "main" function for smbiod daemon */ static __inline void smb_iod_main(struct smbiod *iod) { /* struct smb_vc *vcp = iod->iod_vc;*/ struct smbiod_event *evp; /* struct timespec tsnow;*/ int error; SMBIODEBUG("\n"); error = 0; /* * Check all interesting events */ for (;;) { SMB_IOD_EVLOCK(iod); evp = STAILQ_FIRST(&iod->iod_evlist); if (evp == NULL) { SMB_IOD_EVUNLOCK(iod); break; } STAILQ_REMOVE_HEAD(&iod->iod_evlist, ev_link); evp->ev_type |= SMBIOD_EV_PROCESSING; SMB_IOD_EVUNLOCK(iod); switch (evp->ev_type & SMBIOD_EV_MASK) { case SMBIOD_EV_CONNECT: iod->iod_state = SMBIOD_ST_RECONNECT; evp->ev_error = smb_iod_connect(iod); break; case SMBIOD_EV_DISCONNECT: evp->ev_error = smb_iod_disconnect(iod); break; case SMBIOD_EV_TREECONNECT: evp->ev_error = smb_iod_treeconnect(iod, evp->ev_ident); break; case SMBIOD_EV_SHUTDOWN: iod->iod_flags |= SMBIOD_SHUTDOWN; break; case SMBIOD_EV_NEWRQ: break; } if (evp->ev_type & SMBIOD_EV_SYNC) { SMB_IOD_EVLOCK(iod); wakeup(evp); SMB_IOD_EVUNLOCK(iod); } else free(evp, M_SMBIOD); } #if 0 if (iod->iod_state == SMBIOD_ST_VCACTIVE) { getnanotime(&tsnow); timespecsub(&tsnow, &iod->iod_pingtimo); if (timespeccmp(&tsnow, &iod->iod_lastrqsent, >)) { smb_smb_echo(vcp, &iod->iod_scred); } } #endif smb_iod_sendall(iod); smb_iod_recvall(iod); return; } void smb_iod_thread(void *arg) { struct smbiod *iod = arg; mtx_lock(&Giant); /* * Here we assume that the thread structure will be the same * for an entire kthread (kproc, to be more precise) life. */ iod->iod_td = curthread; smb_makescred(&iod->iod_scred, iod->iod_td, NULL); while ((iod->iod_flags & SMBIOD_SHUTDOWN) == 0) { smb_iod_main(iod); SMBIODEBUG("going to sleep for %d ticks\n", iod->iod_sleeptimo); if (iod->iod_flags & SMBIOD_SHUTDOWN) break; tsleep(&iod->iod_flags, PWAIT, "90idle", iod->iod_sleeptimo); } + + /* We can now safely destroy the mutexes and free the iod structure. */ + smb_sl_destroy(&iod->iod_rqlock); + smb_sl_destroy(&iod->iod_evlock); + free(iod, M_SMBIOD); mtx_unlock(&Giant); kproc_exit(0); } int smb_iod_create(struct smb_vc *vcp) { struct smbiod *iod; int error; iod = smb_zmalloc(sizeof(*iod), M_SMBIOD, M_WAITOK); iod->iod_id = smb_iod_next++; iod->iod_state = SMBIOD_ST_NOTCONN; iod->iod_vc = vcp; iod->iod_sleeptimo = hz * SMBIOD_SLEEP_TIMO; iod->iod_pingtimo.tv_sec = SMBIOD_PING_TIMO; getnanotime(&iod->iod_lastrqsent); vcp->vc_iod = iod; smb_sl_init(&iod->iod_rqlock, "90rql"); TAILQ_INIT(&iod->iod_rqlist); smb_sl_init(&iod->iod_evlock, "90evl"); STAILQ_INIT(&iod->iod_evlist); error = kproc_create(smb_iod_thread, iod, &iod->iod_p, RFNOWAIT, 0, "smbiod%d", iod->iod_id); if (error) { SMBERROR("can't start smbiod: %d", error); free(iod, M_SMBIOD); return error; } return 0; } int smb_iod_destroy(struct smbiod *iod) { smb_iod_request(iod, SMBIOD_EV_SHUTDOWN | SMBIOD_EV_SYNC, NULL); - smb_sl_destroy(&iod->iod_rqlock); - smb_sl_destroy(&iod->iod_evlock); - free(iod, M_SMBIOD); return 0; } int smb_iod_init(void) { return 0; } int smb_iod_done(void) { return 0; } Index: projects/powernv/opencrypto/xform.c =================================================================== --- projects/powernv/opencrypto/xform.c (revision 290990) +++ projects/powernv/opencrypto/xform.c (revision 290991) @@ -1,985 +1,985 @@ /* $OpenBSD: xform.c,v 1.16 2001/08/28 12:20:43 ben Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr), * Niels Provos (provos@physnet.uni-hamburg.de) and * Damien Miller (djm@mindrot.org). * * This code was written by John Ioannidis for BSD/OS in Athens, Greece, * in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Additional features in 1999 by Angelos D. Keromytis. * * AES XTS implementation in 2008 by Damien Miller * * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * * Copyright (C) 2001, Angelos D. Keromytis. * * Copyright (C) 2008, Damien Miller * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by John-Mark Gurney * under sponsorship of the FreeBSD Foundation and * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int null_setkey(u_int8_t **, u_int8_t *, int); static int des1_setkey(u_int8_t **, u_int8_t *, int); static int des3_setkey(u_int8_t **, u_int8_t *, int); static int blf_setkey(u_int8_t **, u_int8_t *, int); static int cast5_setkey(u_int8_t **, u_int8_t *, int); static int skipjack_setkey(u_int8_t **, u_int8_t *, int); static int rijndael128_setkey(u_int8_t **, u_int8_t *, int); static int aes_icm_setkey(u_int8_t **, u_int8_t *, int); static int aes_xts_setkey(u_int8_t **, u_int8_t *, int); static int cml_setkey(u_int8_t **, u_int8_t *, int); static void null_encrypt(caddr_t, u_int8_t *); static void des1_encrypt(caddr_t, u_int8_t *); static void des3_encrypt(caddr_t, u_int8_t *); static void blf_encrypt(caddr_t, u_int8_t *); static void cast5_encrypt(caddr_t, u_int8_t *); static void skipjack_encrypt(caddr_t, u_int8_t *); static void rijndael128_encrypt(caddr_t, u_int8_t *); static void aes_xts_encrypt(caddr_t, u_int8_t *); static void cml_encrypt(caddr_t, u_int8_t *); static void null_decrypt(caddr_t, u_int8_t *); static void des1_decrypt(caddr_t, u_int8_t *); static void des3_decrypt(caddr_t, u_int8_t *); static void blf_decrypt(caddr_t, u_int8_t *); static void cast5_decrypt(caddr_t, u_int8_t *); static void skipjack_decrypt(caddr_t, u_int8_t *); static void rijndael128_decrypt(caddr_t, u_int8_t *); static void aes_xts_decrypt(caddr_t, u_int8_t *); static void cml_decrypt(caddr_t, u_int8_t *); static void aes_icm_crypt(caddr_t, u_int8_t *); static void null_zerokey(u_int8_t **); static void des1_zerokey(u_int8_t **); static void des3_zerokey(u_int8_t **); static void blf_zerokey(u_int8_t **); static void cast5_zerokey(u_int8_t **); static void skipjack_zerokey(u_int8_t **); static void rijndael128_zerokey(u_int8_t **); static void aes_icm_zerokey(u_int8_t **); static void aes_xts_zerokey(u_int8_t **); static void cml_zerokey(u_int8_t **); static void aes_icm_reinit(caddr_t, u_int8_t *); static void aes_xts_reinit(caddr_t, u_int8_t *); static void aes_gcm_reinit(caddr_t, u_int8_t *); static void null_init(void *); static void null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len); static int null_update(void *, const u_int8_t *, u_int16_t); static void null_final(u_int8_t *, void *); static int MD5Update_int(void *, const u_int8_t *, u_int16_t); static void SHA1Init_int(void *); static int SHA1Update_int(void *, const u_int8_t *, u_int16_t); static void SHA1Final_int(u_int8_t *, void *); static int RMD160Update_int(void *, const u_int8_t *, u_int16_t); static int SHA256Update_int(void *, const u_int8_t *, u_int16_t); static int SHA384Update_int(void *, const u_int8_t *, u_int16_t); static int SHA512Update_int(void *, const u_int8_t *, u_int16_t); static u_int32_t deflate_compress(u_int8_t *, u_int32_t, u_int8_t **); static u_int32_t deflate_decompress(u_int8_t *, u_int32_t, u_int8_t **); #define AESICM_BLOCKSIZE AES_BLOCK_LEN struct aes_icm_ctx { u_int32_t ac_ek[4*(RIJNDAEL_MAXNR + 1)]; /* ac_block is initalized to IV */ u_int8_t ac_block[AESICM_BLOCKSIZE]; int ac_nr; }; MALLOC_DEFINE(M_XDATA, "xform", "xform data buffers"); /* Encryption instances */ struct enc_xform enc_xform_null = { CRYPTO_NULL_CBC, "NULL", /* NB: blocksize of 4 is to generate a properly aligned ESP header */ - NULL_BLOCK_LEN, NULL_BLOCK_LEN, NULL_MIN_KEY, NULL_MAX_KEY, + NULL_BLOCK_LEN, 0, NULL_MIN_KEY, NULL_MAX_KEY, null_encrypt, null_decrypt, null_setkey, null_zerokey, NULL, }; struct enc_xform enc_xform_des = { CRYPTO_DES_CBC, "DES", DES_BLOCK_LEN, DES_BLOCK_LEN, DES_MIN_KEY, DES_MAX_KEY, des1_encrypt, des1_decrypt, des1_setkey, des1_zerokey, NULL, }; struct enc_xform enc_xform_3des = { CRYPTO_3DES_CBC, "3DES", DES3_BLOCK_LEN, DES3_BLOCK_LEN, TRIPLE_DES_MIN_KEY, TRIPLE_DES_MAX_KEY, des3_encrypt, des3_decrypt, des3_setkey, des3_zerokey, NULL, }; struct enc_xform enc_xform_blf = { CRYPTO_BLF_CBC, "Blowfish", BLOWFISH_BLOCK_LEN, BLOWFISH_BLOCK_LEN, BLOWFISH_MIN_KEY, BLOWFISH_MAX_KEY, blf_encrypt, blf_decrypt, blf_setkey, blf_zerokey, NULL, }; struct enc_xform enc_xform_cast5 = { CRYPTO_CAST_CBC, "CAST-128", CAST128_BLOCK_LEN, CAST128_BLOCK_LEN, CAST_MIN_KEY, CAST_MAX_KEY, cast5_encrypt, cast5_decrypt, cast5_setkey, cast5_zerokey, NULL, }; struct enc_xform enc_xform_skipjack = { CRYPTO_SKIPJACK_CBC, "Skipjack", SKIPJACK_BLOCK_LEN, SKIPJACK_BLOCK_LEN, SKIPJACK_MIN_KEY, SKIPJACK_MAX_KEY, skipjack_encrypt, skipjack_decrypt, skipjack_setkey, skipjack_zerokey, NULL, }; struct enc_xform enc_xform_rijndael128 = { CRYPTO_RIJNDAEL128_CBC, "Rijndael-128/AES", RIJNDAEL128_BLOCK_LEN, RIJNDAEL128_BLOCK_LEN, RIJNDAEL_MIN_KEY, RIJNDAEL_MAX_KEY, rijndael128_encrypt, rijndael128_decrypt, rijndael128_setkey, rijndael128_zerokey, NULL, }; struct enc_xform enc_xform_aes_icm = { CRYPTO_AES_ICM, "AES-ICM", AES_BLOCK_LEN, AES_BLOCK_LEN, AES_MIN_KEY, AES_MAX_KEY, aes_icm_crypt, aes_icm_crypt, aes_icm_setkey, rijndael128_zerokey, aes_icm_reinit, }; struct enc_xform enc_xform_aes_nist_gcm = { CRYPTO_AES_NIST_GCM_16, "AES-GCM", AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY, aes_icm_crypt, aes_icm_crypt, aes_icm_setkey, aes_icm_zerokey, aes_gcm_reinit, }; struct enc_xform enc_xform_aes_nist_gmac = { CRYPTO_AES_NIST_GMAC, "AES-GMAC", AES_ICM_BLOCK_LEN, AES_GCM_IV_LEN, AES_MIN_KEY, AES_MAX_KEY, NULL, NULL, NULL, NULL, NULL, }; struct enc_xform enc_xform_aes_xts = { CRYPTO_AES_XTS, "AES-XTS", AES_BLOCK_LEN, AES_XTS_IV_LEN, AES_XTS_MIN_KEY, AES_XTS_MAX_KEY, aes_xts_encrypt, aes_xts_decrypt, aes_xts_setkey, aes_xts_zerokey, aes_xts_reinit }; struct enc_xform enc_xform_arc4 = { CRYPTO_ARC4, "ARC4", ARC4_BLOCK_LEN, ARC4_IV_LEN, ARC4_MIN_KEY, ARC4_MAX_KEY, NULL, NULL, NULL, NULL, NULL, }; struct enc_xform enc_xform_camellia = { CRYPTO_CAMELLIA_CBC, "Camellia", CAMELLIA_BLOCK_LEN, CAMELLIA_BLOCK_LEN, CAMELLIA_MIN_KEY, CAMELLIA_MAX_KEY, cml_encrypt, cml_decrypt, cml_setkey, cml_zerokey, NULL, }; /* Authentication instances */ struct auth_hash auth_hash_null = { /* NB: context isn't used */ CRYPTO_NULL_HMAC, "NULL-HMAC", NULL_HMAC_KEY_LEN, NULL_HASH_LEN, sizeof(int), NULL_HMAC_BLOCK_LEN, null_init, null_reinit, null_reinit, null_update, null_final }; struct auth_hash auth_hash_hmac_md5 = { CRYPTO_MD5_HMAC, "HMAC-MD5", MD5_HMAC_KEY_LEN, MD5_HASH_LEN, sizeof(MD5_CTX), MD5_HMAC_BLOCK_LEN, (void (*) (void *)) MD5Init, NULL, NULL, MD5Update_int, (void (*) (u_int8_t *, void *)) MD5Final }; struct auth_hash auth_hash_hmac_sha1 = { CRYPTO_SHA1_HMAC, "HMAC-SHA1", SHA1_HMAC_KEY_LEN, SHA1_HASH_LEN, sizeof(SHA1_CTX), SHA1_HMAC_BLOCK_LEN, SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int }; struct auth_hash auth_hash_hmac_ripemd_160 = { CRYPTO_RIPEMD160_HMAC, "HMAC-RIPEMD-160", RIPEMD160_HMAC_KEY_LEN, RIPEMD160_HASH_LEN, sizeof(RMD160_CTX), RIPEMD160_HMAC_BLOCK_LEN, (void (*)(void *)) RMD160Init, NULL, NULL, RMD160Update_int, (void (*)(u_int8_t *, void *)) RMD160Final }; struct auth_hash auth_hash_key_md5 = { CRYPTO_MD5_KPDK, "Keyed MD5", NULL_HMAC_KEY_LEN, MD5_KPDK_HASH_LEN, sizeof(MD5_CTX), 0, (void (*)(void *)) MD5Init, NULL, NULL, MD5Update_int, (void (*)(u_int8_t *, void *)) MD5Final }; struct auth_hash auth_hash_key_sha1 = { CRYPTO_SHA1_KPDK, "Keyed SHA1", NULL_HMAC_KEY_LEN, SHA1_KPDK_HASH_LEN, sizeof(SHA1_CTX), 0, SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int }; struct auth_hash auth_hash_hmac_sha2_256 = { CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256", SHA2_256_HMAC_KEY_LEN, SHA2_256_HASH_LEN, sizeof(SHA256_CTX), SHA2_256_HMAC_BLOCK_LEN, (void (*)(void *)) SHA256_Init, NULL, NULL, SHA256Update_int, (void (*)(u_int8_t *, void *)) SHA256_Final }; struct auth_hash auth_hash_hmac_sha2_384 = { CRYPTO_SHA2_384_HMAC, "HMAC-SHA2-384", SHA2_384_HMAC_KEY_LEN, SHA2_384_HASH_LEN, sizeof(SHA384_CTX), SHA2_384_HMAC_BLOCK_LEN, (void (*)(void *)) SHA384_Init, NULL, NULL, SHA384Update_int, (void (*)(u_int8_t *, void *)) SHA384_Final }; struct auth_hash auth_hash_hmac_sha2_512 = { CRYPTO_SHA2_512_HMAC, "HMAC-SHA2-512", SHA2_512_HMAC_KEY_LEN, SHA2_512_HASH_LEN, sizeof(SHA512_CTX), SHA2_512_HMAC_BLOCK_LEN, (void (*)(void *)) SHA512_Init, NULL, NULL, SHA512Update_int, (void (*)(u_int8_t *, void *)) SHA512_Final }; struct auth_hash auth_hash_nist_gmac_aes_128 = { CRYPTO_AES_128_NIST_GMAC, "GMAC-AES-128", AES_128_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx), GMAC_BLOCK_LEN, (void (*)(void *)) AES_GMAC_Init, (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, (void (*)(u_int8_t *, void *)) AES_GMAC_Final }; struct auth_hash auth_hash_nist_gmac_aes_192 = { CRYPTO_AES_192_NIST_GMAC, "GMAC-AES-192", AES_192_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx), GMAC_BLOCK_LEN, (void (*)(void *)) AES_GMAC_Init, (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, (void (*)(u_int8_t *, void *)) AES_GMAC_Final }; struct auth_hash auth_hash_nist_gmac_aes_256 = { CRYPTO_AES_256_NIST_GMAC, "GMAC-AES-256", AES_256_GMAC_KEY_LEN, AES_GMAC_HASH_LEN, sizeof(struct aes_gmac_ctx), GMAC_BLOCK_LEN, (void (*)(void *)) AES_GMAC_Init, (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, (void (*)(u_int8_t *, void *)) AES_GMAC_Final }; /* Compression instance */ struct comp_algo comp_algo_deflate = { CRYPTO_DEFLATE_COMP, "Deflate", 90, deflate_compress, deflate_decompress }; /* * Encryption wrapper routines. */ static void null_encrypt(caddr_t key, u_int8_t *blk) { } static void null_decrypt(caddr_t key, u_int8_t *blk) { } static int null_setkey(u_int8_t **sched, u_int8_t *key, int len) { *sched = NULL; return 0; } static void null_zerokey(u_int8_t **sched) { *sched = NULL; } static void des1_encrypt(caddr_t key, u_int8_t *blk) { des_cblock *cb = (des_cblock *) blk; des_key_schedule *p = (des_key_schedule *) key; des_ecb_encrypt(cb, cb, p[0], DES_ENCRYPT); } static void des1_decrypt(caddr_t key, u_int8_t *blk) { des_cblock *cb = (des_cblock *) blk; des_key_schedule *p = (des_key_schedule *) key; des_ecb_encrypt(cb, cb, p[0], DES_DECRYPT); } static int des1_setkey(u_int8_t **sched, u_int8_t *key, int len) { des_key_schedule *p; int err; p = malloc(sizeof (des_key_schedule), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (p != NULL) { des_set_key((des_cblock *) key, p[0]); err = 0; } else err = ENOMEM; *sched = (u_int8_t *) p; return err; } static void des1_zerokey(u_int8_t **sched) { bzero(*sched, sizeof (des_key_schedule)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } static void des3_encrypt(caddr_t key, u_int8_t *blk) { des_cblock *cb = (des_cblock *) blk; des_key_schedule *p = (des_key_schedule *) key; des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_ENCRYPT); } static void des3_decrypt(caddr_t key, u_int8_t *blk) { des_cblock *cb = (des_cblock *) blk; des_key_schedule *p = (des_key_schedule *) key; des_ecb3_encrypt(cb, cb, p[0], p[1], p[2], DES_DECRYPT); } static int des3_setkey(u_int8_t **sched, u_int8_t *key, int len) { des_key_schedule *p; int err; p = malloc(3*sizeof (des_key_schedule), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (p != NULL) { des_set_key((des_cblock *)(key + 0), p[0]); des_set_key((des_cblock *)(key + 8), p[1]); des_set_key((des_cblock *)(key + 16), p[2]); err = 0; } else err = ENOMEM; *sched = (u_int8_t *) p; return err; } static void des3_zerokey(u_int8_t **sched) { bzero(*sched, 3*sizeof (des_key_schedule)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } static void blf_encrypt(caddr_t key, u_int8_t *blk) { BF_LONG t[2]; memcpy(t, blk, sizeof (t)); t[0] = ntohl(t[0]); t[1] = ntohl(t[1]); /* NB: BF_encrypt expects the block in host order! */ BF_encrypt(t, (BF_KEY *) key); t[0] = htonl(t[0]); t[1] = htonl(t[1]); memcpy(blk, t, sizeof (t)); } static void blf_decrypt(caddr_t key, u_int8_t *blk) { BF_LONG t[2]; memcpy(t, blk, sizeof (t)); t[0] = ntohl(t[0]); t[1] = ntohl(t[1]); /* NB: BF_decrypt expects the block in host order! */ BF_decrypt(t, (BF_KEY *) key); t[0] = htonl(t[0]); t[1] = htonl(t[1]); memcpy(blk, t, sizeof (t)); } static int blf_setkey(u_int8_t **sched, u_int8_t *key, int len) { int err; *sched = malloc(sizeof(BF_KEY), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (*sched != NULL) { BF_set_key((BF_KEY *) *sched, len, key); err = 0; } else err = ENOMEM; return err; } static void blf_zerokey(u_int8_t **sched) { bzero(*sched, sizeof(BF_KEY)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } static void cast5_encrypt(caddr_t key, u_int8_t *blk) { cast_encrypt((cast_key *) key, blk, blk); } static void cast5_decrypt(caddr_t key, u_int8_t *blk) { cast_decrypt((cast_key *) key, blk, blk); } static int cast5_setkey(u_int8_t **sched, u_int8_t *key, int len) { int err; *sched = malloc(sizeof(cast_key), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (*sched != NULL) { cast_setkey((cast_key *)*sched, key, len); err = 0; } else err = ENOMEM; return err; } static void cast5_zerokey(u_int8_t **sched) { bzero(*sched, sizeof(cast_key)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } static void skipjack_encrypt(caddr_t key, u_int8_t *blk) { skipjack_forwards(blk, blk, (u_int8_t **) key); } static void skipjack_decrypt(caddr_t key, u_int8_t *blk) { skipjack_backwards(blk, blk, (u_int8_t **) key); } static int skipjack_setkey(u_int8_t **sched, u_int8_t *key, int len) { int err; /* NB: allocate all the memory that's needed at once */ *sched = malloc(10 * (sizeof(u_int8_t *) + 0x100), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (*sched != NULL) { u_int8_t** key_tables = (u_int8_t**) *sched; u_int8_t* table = (u_int8_t*) &key_tables[10]; int k; for (k = 0; k < 10; k++) { key_tables[k] = table; table += 0x100; } subkey_table_gen(key, (u_int8_t **) *sched); err = 0; } else err = ENOMEM; return err; } static void skipjack_zerokey(u_int8_t **sched) { bzero(*sched, 10 * (sizeof(u_int8_t *) + 0x100)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } static void rijndael128_encrypt(caddr_t key, u_int8_t *blk) { rijndael_encrypt((rijndael_ctx *) key, (u_char *) blk, (u_char *) blk); } static void rijndael128_decrypt(caddr_t key, u_int8_t *blk) { rijndael_decrypt(((rijndael_ctx *) key), (u_char *) blk, (u_char *) blk); } static int rijndael128_setkey(u_int8_t **sched, u_int8_t *key, int len) { int err; if (len != 16 && len != 24 && len != 32) return (EINVAL); *sched = malloc(sizeof(rijndael_ctx), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (*sched != NULL) { rijndael_set_key((rijndael_ctx *) *sched, (u_char *) key, len * 8); err = 0; } else err = ENOMEM; return err; } static void rijndael128_zerokey(u_int8_t **sched) { bzero(*sched, sizeof(rijndael_ctx)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } void aes_icm_reinit(caddr_t key, u_int8_t *iv) { struct aes_icm_ctx *ctx; ctx = (struct aes_icm_ctx *)key; bcopy(iv, ctx->ac_block, AESICM_BLOCKSIZE); } void aes_gcm_reinit(caddr_t key, u_int8_t *iv) { struct aes_icm_ctx *ctx; aes_icm_reinit(key, iv); ctx = (struct aes_icm_ctx *)key; /* GCM starts with 2 as counter 1 is used for final xor of tag. */ bzero(&ctx->ac_block[AESICM_BLOCKSIZE - 4], 4); ctx->ac_block[AESICM_BLOCKSIZE - 1] = 2; } void aes_icm_crypt(caddr_t key, u_int8_t *data) { struct aes_icm_ctx *ctx; u_int8_t keystream[AESICM_BLOCKSIZE]; int i; ctx = (struct aes_icm_ctx *)key; rijndaelEncrypt(ctx->ac_ek, ctx->ac_nr, ctx->ac_block, keystream); for (i = 0; i < AESICM_BLOCKSIZE; i++) data[i] ^= keystream[i]; explicit_bzero(keystream, sizeof(keystream)); /* increment counter */ for (i = AESICM_BLOCKSIZE - 1; i >= 0; i--) if (++ctx->ac_block[i]) /* continue on overflow */ break; } int aes_icm_setkey(u_int8_t **sched, u_int8_t *key, int len) { struct aes_icm_ctx *ctx; if (len != 16 && len != 24 && len != 32) return EINVAL; *sched = malloc(sizeof(struct aes_icm_ctx), M_CRYPTO_DATA, M_NOWAIT | M_ZERO); if (*sched == NULL) return ENOMEM; ctx = (struct aes_icm_ctx *)*sched; ctx->ac_nr = rijndaelKeySetupEnc(ctx->ac_ek, (u_char *)key, len * 8); return 0; } void aes_icm_zerokey(u_int8_t **sched) { bzero(*sched, sizeof(struct aes_icm_ctx)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } #define AES_XTS_BLOCKSIZE 16 #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ struct aes_xts_ctx { rijndael_ctx key1; rijndael_ctx key2; u_int8_t tweak[AES_XTS_BLOCKSIZE]; }; void aes_xts_reinit(caddr_t key, u_int8_t *iv) { struct aes_xts_ctx *ctx = (struct aes_xts_ctx *)key; u_int64_t blocknum; u_int i; /* * Prepare tweak as E_k2(IV). IV is specified as LE representation * of a 64-bit block number which we allow to be passed in directly. */ bcopy(iv, &blocknum, AES_XTS_IVSIZE); for (i = 0; i < AES_XTS_IVSIZE; i++) { ctx->tweak[i] = blocknum & 0xff; blocknum >>= 8; } /* Last 64 bits of IV are always zero */ bzero(ctx->tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE); rijndael_encrypt(&ctx->key2, ctx->tweak, ctx->tweak); } static void aes_xts_crypt(struct aes_xts_ctx *ctx, u_int8_t *data, u_int do_encrypt) { u_int8_t block[AES_XTS_BLOCKSIZE]; u_int i, carry_in, carry_out; for (i = 0; i < AES_XTS_BLOCKSIZE; i++) block[i] = data[i] ^ ctx->tweak[i]; if (do_encrypt) rijndael_encrypt(&ctx->key1, block, data); else rijndael_decrypt(&ctx->key1, block, data); for (i = 0; i < AES_XTS_BLOCKSIZE; i++) data[i] ^= ctx->tweak[i]; /* Exponentiate tweak */ carry_in = 0; for (i = 0; i < AES_XTS_BLOCKSIZE; i++) { carry_out = ctx->tweak[i] & 0x80; ctx->tweak[i] = (ctx->tweak[i] << 1) | (carry_in ? 1 : 0); carry_in = carry_out; } if (carry_in) ctx->tweak[0] ^= AES_XTS_ALPHA; bzero(block, sizeof(block)); } void aes_xts_encrypt(caddr_t key, u_int8_t *data) { aes_xts_crypt((struct aes_xts_ctx *)key, data, 1); } void aes_xts_decrypt(caddr_t key, u_int8_t *data) { aes_xts_crypt((struct aes_xts_ctx *)key, data, 0); } int aes_xts_setkey(u_int8_t **sched, u_int8_t *key, int len) { struct aes_xts_ctx *ctx; if (len != 32 && len != 64) return EINVAL; *sched = malloc(sizeof(struct aes_xts_ctx), M_CRYPTO_DATA, M_NOWAIT | M_ZERO); if (*sched == NULL) return ENOMEM; ctx = (struct aes_xts_ctx *)*sched; rijndael_set_key(&ctx->key1, key, len * 4); rijndael_set_key(&ctx->key2, key + (len / 2), len * 4); return 0; } void aes_xts_zerokey(u_int8_t **sched) { bzero(*sched, sizeof(struct aes_xts_ctx)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } static void cml_encrypt(caddr_t key, u_int8_t *blk) { camellia_encrypt((camellia_ctx *) key, (u_char *) blk, (u_char *) blk); } static void cml_decrypt(caddr_t key, u_int8_t *blk) { camellia_decrypt(((camellia_ctx *) key), (u_char *) blk, (u_char *) blk); } static int cml_setkey(u_int8_t **sched, u_int8_t *key, int len) { int err; if (len != 16 && len != 24 && len != 32) return (EINVAL); *sched = malloc(sizeof(camellia_ctx), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (*sched != NULL) { camellia_set_key((camellia_ctx *) *sched, (u_char *) key, len * 8); err = 0; } else err = ENOMEM; return err; } static void cml_zerokey(u_int8_t **sched) { bzero(*sched, sizeof(camellia_ctx)); free(*sched, M_CRYPTO_DATA); *sched = NULL; } /* * And now for auth. */ static void null_init(void *ctx) { } static void null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len) { } static int null_update(void *ctx, const u_int8_t *buf, u_int16_t len) { return 0; } static void null_final(u_int8_t *buf, void *ctx) { if (buf != (u_int8_t *) 0) bzero(buf, 12); } static int RMD160Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { RMD160Update(ctx, buf, len); return 0; } static int MD5Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { MD5Update(ctx, buf, len); return 0; } static void SHA1Init_int(void *ctx) { SHA1Init(ctx); } static int SHA1Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA1Update(ctx, buf, len); return 0; } static void SHA1Final_int(u_int8_t *blk, void *ctx) { SHA1Final(blk, ctx); } static int SHA256Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA256_Update(ctx, buf, len); return 0; } static int SHA384Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA384_Update(ctx, buf, len); return 0; } static int SHA512Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA512_Update(ctx, buf, len); return 0; } /* * And compression */ static u_int32_t deflate_compress(data, size, out) u_int8_t *data; u_int32_t size; u_int8_t **out; { return deflate_global(data, size, 0, out); } static u_int32_t deflate_decompress(data, size, out) u_int8_t *data; u_int32_t size; u_int8_t **out; { return deflate_global(data, size, 1, out); } Index: projects/powernv/powerpc/aim/mmu_oea64.c =================================================================== --- projects/powernv/powerpc/aim/mmu_oea64.c (revision 290990) +++ projects/powernv/powerpc/aim/mmu_oea64.c (revision 290991) @@ -1,2725 +1,2725 @@ /*- * Copyright (c) 2008-2015 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is also stored by the * logical address mapping module, this module may throw away valid virtual * to physical mappings at almost any time. However, invalidations of * mappings must be done as requested. * * In order to cope with hardware architectures which make virtual to * physical map invalidates expensive, this module may delay invalidate * reduced protection operations until such time as they are actually * necessary. This module is given full information as to which processors * are currently using which maps, and to when physical maps must be made * correct. */ #include "opt_compat.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_oea64.h" #include "mmu_if.h" #include "moea64_if.h" void moea64_release_vsid(uint64_t vsid); uintptr_t moea64_get_unique_vsid(void); #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) #define ENABLE_TRANS(msr) mtmsr(msr) #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) #define VSID_HASH_MASK 0x0000007fffffffffULL /* * Locking semantics: * * There are two locks of interest: the page locks and the pmap locks, which * protect their individual PVO lists and are locked in that order. The contents * of all PVO entries are protected by the locks of their respective pmaps. * The pmap of any PVO is guaranteed not to change so long as the PVO is linked * into any list. * */ #define PV_LOCK_COUNT PA_LOCK_COUNT*3 static struct mtx_padalign pv_lock[PV_LOCK_COUNT]; #define PV_LOCKPTR(pa) ((struct mtx *)(&pv_lock[pa_index(pa) % PV_LOCK_COUNT])) #define PV_LOCK(pa) mtx_lock(PV_LOCKPTR(pa)) #define PV_UNLOCK(pa) mtx_unlock(PV_LOCKPTR(pa)) #define PV_LOCKASSERT(pa) mtx_assert(PV_LOCKPTR(pa), MA_OWNED) #define PV_PAGE_LOCK(m) PV_LOCK(VM_PAGE_TO_PHYS(m)) #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m)) #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m)) struct ofw_map { cell_t om_va; cell_t om_len; uint64_t om_pa; cell_t om_mode; }; extern unsigned char _etext[]; extern unsigned char _end[]; /* * Map of physical memory regions. */ static struct mem_region *regions; static struct mem_region *pregions; static u_int phys_avail_count; static int regions_sz, pregions_sz; extern void bs_remap_earlyboot(void); /* * Lock for the SLB tables. */ struct mtx moea64_slb_mutex; /* * PTEG data. */ u_int moea64_pteg_count; u_int moea64_pteg_mask; /* * PVO data. */ uma_zone_t moea64_pvo_zone; /* zone for pvo entries */ static struct pvo_entry *moea64_bpvo_pool; static int moea64_bpvo_pool_index = 0; static int moea64_bpvo_pool_size = 327680; TUNABLE_INT("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size); SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD, &moea64_bpvo_pool_index, 0, ""); #define VSID_NBPW (sizeof(u_int32_t) * 8) #ifdef __powerpc64__ #define NVSIDS (NPMAPS * 16) #define VSID_HASHMASK 0xffffffffUL #else #define NVSIDS NPMAPS #define VSID_HASHMASK 0xfffffUL #endif static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; static boolean_t moea64_initialized = FALSE; /* * Statistics. */ u_int moea64_pte_valid = 0; u_int moea64_pte_overflow = 0; u_int moea64_pvo_entries = 0; u_int moea64_pvo_enter_calls = 0; u_int moea64_pvo_remove_calls = 0; SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, &moea64_pte_valid, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, &moea64_pte_overflow, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, &moea64_pvo_entries, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, &moea64_pvo_enter_calls, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, &moea64_pvo_remove_calls, 0, ""); vm_offset_t moea64_scratchpage_va[2]; struct pvo_entry *moea64_scratchpage_pvo[2]; struct mtx moea64_scratchpage_mtx; uint64_t moea64_large_page_mask = 0; uint64_t moea64_large_page_size = 0; int moea64_large_page_shift = 0; /* * PVO calls. */ static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head); static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo); static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo); static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); /* * Utility routines. */ static boolean_t moea64_query_bit(mmu_t, vm_page_t, uint64_t); static u_int moea64_clear_bit(mmu_t, vm_page_t, uint64_t); static void moea64_kremove(mmu_t, vm_offset_t); static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_size_t sz); static void moea64_pmap_init_qpages(void); /* * Kernel MMU interface */ void moea64_clear_modify(mmu_t, vm_page_t); void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize); int moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int flags, int8_t psind); void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, vm_prot_t); void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea64_init(mmu_t); boolean_t moea64_is_modified(mmu_t, vm_page_t); boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); boolean_t moea64_is_referenced(mmu_t, vm_page_t); int moea64_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); int moea64_page_wired_mappings(mmu_t, vm_page_t); void moea64_pinit(mmu_t, pmap_t); void moea64_pinit0(mmu_t, pmap_t); void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); void moea64_qremove(mmu_t, vm_offset_t, int); void moea64_release(mmu_t, pmap_t); void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_remove_pages(mmu_t, pmap_t); void moea64_remove_all(mmu_t, vm_page_t); void moea64_remove_write(mmu_t, vm_page_t); void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_zero_page(mmu_t, vm_page_t); void moea64_zero_page_area(mmu_t, vm_page_t, int, int); void moea64_zero_page_idle(mmu_t, vm_page_t); void moea64_activate(mmu_t, struct thread *); void moea64_deactivate(mmu_t, struct thread *); void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t); void *moea64_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); vm_paddr_t moea64_kextract(mmu_t, vm_offset_t); void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); void moea64_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t ma); void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t); boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va); void moea64_scan_init(mmu_t mmu); vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m); void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr); static mmu_method_t moea64_methods[] = { MMUMETHOD(mmu_clear_modify, moea64_clear_modify), MMUMETHOD(mmu_copy_page, moea64_copy_page), MMUMETHOD(mmu_copy_pages, moea64_copy_pages), MMUMETHOD(mmu_enter, moea64_enter), MMUMETHOD(mmu_enter_object, moea64_enter_object), MMUMETHOD(mmu_enter_quick, moea64_enter_quick), MMUMETHOD(mmu_extract, moea64_extract), MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), MMUMETHOD(mmu_init, moea64_init), MMUMETHOD(mmu_is_modified, moea64_is_modified), MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), MMUMETHOD(mmu_is_referenced, moea64_is_referenced), MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), MMUMETHOD(mmu_map, moea64_map), MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), MMUMETHOD(mmu_pinit, moea64_pinit), MMUMETHOD(mmu_pinit0, moea64_pinit0), MMUMETHOD(mmu_protect, moea64_protect), MMUMETHOD(mmu_qenter, moea64_qenter), MMUMETHOD(mmu_qremove, moea64_qremove), MMUMETHOD(mmu_release, moea64_release), MMUMETHOD(mmu_remove, moea64_remove), MMUMETHOD(mmu_remove_pages, moea64_remove_pages), MMUMETHOD(mmu_remove_all, moea64_remove_all), MMUMETHOD(mmu_remove_write, moea64_remove_write), MMUMETHOD(mmu_sync_icache, moea64_sync_icache), MMUMETHOD(mmu_unwire, moea64_unwire), MMUMETHOD(mmu_zero_page, moea64_zero_page), MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), MMUMETHOD(mmu_zero_page_idle, moea64_zero_page_idle), MMUMETHOD(mmu_activate, moea64_activate), MMUMETHOD(mmu_deactivate, moea64_deactivate), MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), MMUMETHOD(mmu_quick_enter_page, moea64_quick_enter_page), MMUMETHOD(mmu_quick_remove_page, moea64_quick_remove_page), /* Internal interfaces */ MMUMETHOD(mmu_mapdev, moea64_mapdev), MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), MMUMETHOD(mmu_unmapdev, moea64_unmapdev), MMUMETHOD(mmu_kextract, moea64_kextract), MMUMETHOD(mmu_kenter, moea64_kenter), MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), MMUMETHOD(mmu_scan_init, moea64_scan_init), MMUMETHOD(mmu_dumpsys_map, moea64_dumpsys_map), { 0, 0 } }; MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); static struct pvo_head * vm_page_to_pvoh(vm_page_t m) { mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED); return (&m->md.mdpg_pvoh); } static struct pvo_entry * alloc_pvo_entry(int bootstrap) { struct pvo_entry *pvo; if (!moea64_initialized || bootstrap) { if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) { panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", moea64_bpvo_pool_index, moea64_bpvo_pool_size, moea64_bpvo_pool_size * sizeof(struct pvo_entry)); } pvo = &moea64_bpvo_pool[ atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)]; bzero(pvo, sizeof(*pvo)); pvo->pvo_vaddr = PVO_BOOTSTRAP; } else { pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT); bzero(pvo, sizeof(*pvo)); } return (pvo); } static void init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va) { uint64_t vsid; uint64_t hash; int shift; PMAP_LOCK_ASSERT(pmap, MA_OWNED); pvo->pvo_pmap = pmap; va &= ~ADDR_POFF; pvo->pvo_vaddr |= va; vsid = va_to_vsid(pmap, va); pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) | (vsid << 16); shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift : ADDR_PIDX_SHFT; hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift); pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3; } static void free_pvo_entry(struct pvo_entry *pvo) { if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) uma_zfree(moea64_pvo_zone, pvo); } void moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte) { lpte->pte_hi = (pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & LPTE_AVPN_MASK; lpte->pte_hi |= LPTE_VALID; if (pvo->pvo_vaddr & PVO_LARGE) lpte->pte_hi |= LPTE_BIG; if (pvo->pvo_vaddr & PVO_WIRED) lpte->pte_hi |= LPTE_WIRED; if (pvo->pvo_vaddr & PVO_HID) lpte->pte_hi |= LPTE_HID; lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */ if (pvo->pvo_pte.prot & VM_PROT_WRITE) lpte->pte_lo |= LPTE_BW; else lpte->pte_lo |= LPTE_BR; if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE)) lpte->pte_lo |= LPTE_NOEXEC; } static __inline uint64_t moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) { uint64_t pte_lo; int i; if (ma != VM_MEMATTR_DEFAULT) { switch (ma) { case VM_MEMATTR_UNCACHEABLE: return (LPTE_I | LPTE_G); case VM_MEMATTR_WRITE_COMBINING: case VM_MEMATTR_WRITE_BACK: case VM_MEMATTR_PREFETCHABLE: return (LPTE_I); case VM_MEMATTR_WRITE_THROUGH: return (LPTE_W | LPTE_M); } } /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. */ pte_lo = LPTE_I | LPTE_G; for (i = 0; i < pregions_sz; i++) { if ((pa >= pregions[i].mr_start) && (pa < (pregions[i].mr_start + pregions[i].mr_size))) { pte_lo &= ~(LPTE_I | LPTE_G); pte_lo |= LPTE_M; break; } } return pte_lo; } /* * Quick sort callout for comparing memory regions. */ static int om_cmp(const void *a, const void *b); static int om_cmp(const void *a, const void *b) { const struct ofw_map *mapa; const struct ofw_map *mapb; mapa = a; mapb = b; if (mapa->om_pa < mapb->om_pa) return (-1); else if (mapa->om_pa > mapb->om_pa) return (1); else return (0); } static void moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) { struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */ pcell_t acells, trans_cells[sz/sizeof(cell_t)]; struct pvo_entry *pvo; register_t msr; vm_offset_t off; vm_paddr_t pa_base; int i, j; bzero(translations, sz); - OF_getprop(OF_finddevice("/"), "#address-cells", &acells, + OF_getencprop(OF_finddevice("/"), "#address-cells", &acells, sizeof(acells)); - if (OF_getprop(mmu, "translations", trans_cells, sz) == -1) + if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1) panic("moea64_bootstrap: can't get ofw translations"); CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); sz /= sizeof(cell_t); for (i = 0, j = 0; i < sz; j++) { translations[j].om_va = trans_cells[i++]; translations[j].om_len = trans_cells[i++]; translations[j].om_pa = trans_cells[i++]; if (acells == 2) { translations[j].om_pa <<= 32; translations[j].om_pa |= trans_cells[i++]; } translations[j].om_mode = trans_cells[i++]; } KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)", i, sz)); sz = j; qsort(translations, sz, sizeof (*translations), om_cmp); for (i = 0; i < sz; i++) { pa_base = translations[i].om_pa; #ifndef __powerpc64__ if ((translations[i].om_pa >> 32) != 0) panic("OFW translations above 32-bit boundary!"); #endif if (pa_base % PAGE_SIZE) panic("OFW translation not page-aligned (phys)!"); if (translations[i].om_va % PAGE_SIZE) panic("OFW translation not page-aligned (virt)!"); CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x", pa_base, translations[i].om_va, translations[i].om_len); /* Now enter the pages for this mapping */ DISABLE_TRANS(msr); for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { /* If this address is direct-mapped, skip remapping */ if (hw_direct_map && translations[i].om_va == pa_base && moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) == LPTE_M) continue; PMAP_LOCK(kernel_pmap); pvo = moea64_pvo_find_va(kernel_pmap, translations[i].om_va + off); PMAP_UNLOCK(kernel_pmap); if (pvo != NULL) continue; moea64_kenter(mmup, translations[i].om_va + off, pa_base + off); } ENABLE_TRANS(msr); } } #ifdef __powerpc64__ static void moea64_probe_large_page(void) { uint16_t pvr = mfpvr() >> 16; switch (pvr) { case IBM970: case IBM970FX: case IBM970MP: powerpc_sync(); isync(); mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); powerpc_sync(); isync(); /* FALLTHROUGH */ default: moea64_large_page_size = 0x1000000; /* 16 MB */ moea64_large_page_shift = 24; } moea64_large_page_mask = moea64_large_page_size - 1; } static void moea64_bootstrap_slb_prefault(vm_offset_t va, int large) { struct slb *cache; struct slb entry; uint64_t esid, slbe; uint64_t i; cache = PCPU_GET(slb); esid = va >> ADDR_SR_SHFT; slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; for (i = 0; i < 64; i++) { if (cache[i].slbe == (slbe | i)) return; } entry.slbe = slbe; entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; if (large) entry.slbv |= SLBV_L; slb_insert_kernel(entry.slbe, entry.slbv); } #endif static void moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { struct pvo_entry *pvo; register_t msr; vm_paddr_t pa; vm_offset_t size, off; uint64_t pte_lo; int i; if (moea64_large_page_size == 0) hw_direct_map = 0; DISABLE_TRANS(msr); if (hw_direct_map) { PMAP_LOCK(kernel_pmap); for (i = 0; i < pregions_sz; i++) { for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + pregions[i].mr_size; pa += moea64_large_page_size) { pte_lo = LPTE_M; pvo = alloc_pvo_entry(1 /* bootstrap */); pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; init_pvo_entry(pvo, kernel_pmap, pa); /* * Set memory access as guarded if prefetch within * the page could exit the available physmem area. */ if (pa & moea64_large_page_mask) { pa &= moea64_large_page_mask; pte_lo |= LPTE_G; } if (pa + moea64_large_page_size > pregions[i].mr_start + pregions[i].mr_size) pte_lo |= LPTE_G; pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; pvo->pvo_pte.pa = pa | pte_lo; moea64_pvo_enter(mmup, pvo, NULL); } } PMAP_UNLOCK(kernel_pmap); } else { size = moea64_bpvo_pool_size*sizeof(struct pvo_entry); off = (vm_offset_t)(moea64_bpvo_pool); for (pa = off; pa < off + size; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); /* * Map certain important things, like ourselves. * * NOTE: We do not map the exception vector space. That code is * used only in real mode, and leaving it unmapped allows us to * catch NULL pointer deferences, instead of making NULL a valid * address. */ for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); } ENABLE_TRANS(msr); /* * Allow user to override unmapped_buf_allowed for testing. * XXXKIB Only direct map implementation was tested. */ if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", &unmapped_buf_allowed)) unmapped_buf_allowed = hw_direct_map; } void moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { int i, j; vm_size_t physsz, hwphyssz; #ifndef __powerpc64__ /* We don't have a direct map since there is no BAT */ hw_direct_map = 0; /* Make sure battable is zero, since we have no BAT */ for (i = 0; i < 16; i++) { battable[i].batu = 0; battable[i].batl = 0; } #else moea64_probe_large_page(); /* Use a direct map if we have large page support */ if (moea64_large_page_size > 0) hw_direct_map = 1; else hw_direct_map = 0; #endif /* Get physical memory regions from firmware */ mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) panic("moea64_bootstrap: phys_avail too small"); phys_avail_count = 0; physsz = 0; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); for (i = 0, j = 0; i < regions_sz; i++, j += 2) { CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)", regions[i].mr_start, regions[i].mr_start + regions[i].mr_size, regions[i].mr_size); if (hwphyssz != 0 && (physsz + regions[i].mr_size) >= hwphyssz) { if (physsz < hwphyssz) { phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + hwphyssz - physsz; physsz = hwphyssz; phys_avail_count++; } break; } phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; phys_avail_count++; physsz += regions[i].mr_size; } /* Check for overlap with the kernel and exception vectors */ for (j = 0; j < 2*phys_avail_count; j+=2) { if (phys_avail[j] < EXC_LAST) phys_avail[j] += EXC_LAST; if (kernelstart >= phys_avail[j] && kernelstart < phys_avail[j+1]) { if (kernelend < phys_avail[j+1]) { phys_avail[2*phys_avail_count] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; phys_avail[2*phys_avail_count + 1] = phys_avail[j+1]; phys_avail_count++; } phys_avail[j+1] = kernelstart & ~PAGE_MASK; } if (kernelend >= phys_avail[j] && kernelend < phys_avail[j+1]) { if (kernelstart > phys_avail[j]) { phys_avail[2*phys_avail_count] = phys_avail[j]; phys_avail[2*phys_avail_count + 1] = kernelstart & ~PAGE_MASK; phys_avail_count++; } phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; } } physmem = btoc(physsz); #ifdef PTEGCOUNT moea64_pteg_count = PTEGCOUNT; #else moea64_pteg_count = 0x1000; while (moea64_pteg_count < physmem) moea64_pteg_count <<= 1; moea64_pteg_count >>= 1; #endif /* PTEGCOUNT */ } void moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { int i; /* * Set PTEG mask */ moea64_pteg_mask = moea64_pteg_count - 1; /* * Initialize SLB table lock and page locks */ mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); for (i = 0; i < PV_LOCK_COUNT; i++) mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF); /* * Initialise the bootstrap pvo pool. */ moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( moea64_bpvo_pool_size*sizeof(struct pvo_entry), 0); moea64_bpvo_pool_index = 0; /* * Make sure kernel vsid is allocated as well as VSID 0. */ #ifndef __powerpc64__ moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); moea64_vsid_bitmap[0] |= 1; #endif /* * Initialize the kernel pmap (which is statically allocated). */ #ifdef __powerpc64__ for (i = 0; i < 64; i++) { pcpup->pc_slb[i].slbv = 0; pcpup->pc_slb[i].slbe = 0; } #else for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; #endif kernel_pmap->pmap_phys = kernel_pmap; CPU_FILL(&kernel_pmap->pm_active); RB_INIT(&kernel_pmap->pmap_pvo); PMAP_LOCK_INIT(kernel_pmap); /* * Now map in all the other buffers we allocated earlier */ moea64_setup_direct_map(mmup, kernelstart, kernelend); } void moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { ihandle_t mmui; phandle_t chosen; phandle_t mmu; ssize_t sz; int i; vm_offset_t pa, va; void *dpcpu; /* * Set up the Open Firmware pmap and add its mappings if not in real * mode. */ chosen = OF_finddevice("/chosen"); - if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) { + if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) { mmu = OF_instance_to_package(mmui); if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1) sz = 0; if (sz > 6144 /* tmpstksz - 2 KB headroom */) panic("moea64_bootstrap: too many ofw translations"); if (sz > 0) moea64_add_ofw_mappings(mmup, mmu, sz); } /* * Calculate the last available physical address. */ for (i = 0; phys_avail[i + 2] != 0; i += 2) ; Maxmem = powerpc_btop(phys_avail[i + 1]); /* * Initialize MMU and remap early physical mappings */ MMU_CPU_BOOTSTRAP(mmup,0); mtmsr(mfmsr() | PSL_DR | PSL_IR); pmap_bootstrapped++; bs_remap_earlyboot(); /* * Set the start and end of kva. */ virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; /* * Map the entire KVA range into the SLB. We must not fault there. */ #ifdef __powerpc64__ for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) moea64_bootstrap_slb_prefault(va, 0); #endif /* * Figure out how far we can extend virtual_end into segment 16 * without running into existing mappings. Segment 16 is guaranteed * to contain neither RAM nor devices (at least on Apple hardware), * but will generally contain some OFW mappings we should not * step on. */ #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ PMAP_LOCK(kernel_pmap); while (virtual_end < VM_MAX_KERNEL_ADDRESS && moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) virtual_end += PAGE_SIZE; PMAP_UNLOCK(kernel_pmap); #endif /* * Allocate a kernel stack with a guard page for thread0 and map it * into the kernel page map. */ pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE); va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; virtual_avail = va + kstack_pages * PAGE_SIZE; CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); thread0.td_kstack = va; thread0.td_kstack_pages = kstack_pages; for (i = 0; i < kstack_pages; i++) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the message buffer. */ pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); msgbufp = (struct msgbuf *)virtual_avail; va = virtual_avail; virtual_avail += round_page(msgbufsize); while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the dynamic percpu area. */ pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); dpcpu = (void *)virtual_avail; va = virtual_avail; virtual_avail += DPCPU_SIZE; while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } dpcpu_init(dpcpu, 0); /* * Allocate some things for page zeroing. We put this directly * in the page table and use MOEA64_PTE_REPLACE to avoid any * of the PVO book-keeping or other parts of the VM system * from even knowing that this hack exists. */ if (!hw_direct_map) { mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, MTX_DEF); for (i = 0; i < 2; i++) { moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; virtual_end -= PAGE_SIZE; moea64_kenter(mmup, moea64_scratchpage_va[i], 0); PMAP_LOCK(kernel_pmap); moea64_scratchpage_pvo[i] = moea64_pvo_find_va( kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); PMAP_UNLOCK(kernel_pmap); } } } static void moea64_pmap_init_qpages(void) { struct pcpu *pc; int i; if (hw_direct_map) return; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); if (pc->pc_qmap_addr == 0) panic("pmap_init_qpages: unable to allocate KVA"); PMAP_LOCK(kernel_pmap); pc->pc_qmap_pvo = moea64_pvo_find_va(kernel_pmap, pc->pc_qmap_addr); PMAP_UNLOCK(kernel_pmap); mtx_init(&pc->pc_qmap_lock, "qmap lock", NULL, MTX_DEF); } } SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL); /* * Activate a user pmap. This mostly involves setting some non-CPU * state. */ void moea64_activate(mmu_t mmu, struct thread *td) { pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; CPU_SET(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(userslb, pm->pm_slb); __asm __volatile("slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #else PCPU_SET(curpmap, pm->pmap_phys); mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid); #endif } void moea64_deactivate(mmu_t mmu, struct thread *td) { pmap_t pm; __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR)); pm = &td->td_proc->p_vmspace->vm_pmap; CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(userslb, NULL); #else PCPU_SET(curpmap, NULL); #endif } void moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry key, *pvo; vm_page_t m; int64_t refchg; key.pvo_vaddr = sva; PMAP_LOCK(pm); for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { if ((pvo->pvo_vaddr & PVO_WIRED) == 0) panic("moea64_unwire: pvo %p is missing PVO_WIRED", pvo); pvo->pvo_vaddr &= ~PVO_WIRED; refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */); if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { if (refchg < 0) refchg = LPTE_CHG; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(m); if (refchg & LPTE_REF) vm_page_aflag_set(m, PGA_REFERENCED); } pm->pm_stats.wired_count--; } PMAP_UNLOCK(pm); } /* * This goes through and sets the physical address of our * special scratch PTE to the PA we want to zero or copy. Because * of locking issues (this can get called in pvo_enter() by * the UMA allocator), we can't use most other utility functions here */ static __inline void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_paddr_t pa) { KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); moea64_scratchpage_pvo[which]->pvo_pte.pa = moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; MOEA64_PTE_REPLACE(mmup, moea64_scratchpage_pvo[which], MOEA64_PTE_INVALIDATE); isync(); } void moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) { vm_offset_t dst; vm_offset_t src; dst = VM_PAGE_TO_PHYS(mdst); src = VM_PAGE_TO_PHYS(msrc); if (hw_direct_map) { bcopy((void *)src, (void *)dst, PAGE_SIZE); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, src); moea64_set_scratchpage_pa(mmu, 1, dst); bcopy((void *)moea64_scratchpage_va[0], (void *)moea64_scratchpage_va[1], PAGE_SIZE); mtx_unlock(&moea64_scratchpage_mtx); } } static inline void moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); a_cp = (char *)VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]) + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); b_cp = (char *)VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]) + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } static inline void moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; mtx_lock(&moea64_scratchpage_mtx); while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); moea64_set_scratchpage_pa(mmu, 0, VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); moea64_set_scratchpage_pa(mmu, 1, VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } mtx_unlock(&moea64_scratchpage_mtx); } void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { if (hw_direct_map) { moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset, xfersize); } else { moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset, xfersize); } } void moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) { vm_paddr_t pa = VM_PAGE_TO_PHYS(m); if (size + off > PAGE_SIZE) panic("moea64_zero_page: size + off > PAGE_SIZE"); if (hw_direct_map) { bzero((caddr_t)pa + off, size); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); bzero((caddr_t)moea64_scratchpage_va[0] + off, size); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Zero a page of physical memory by temporarily mapping it */ void moea64_zero_page(mmu_t mmu, vm_page_t m) { vm_paddr_t pa = VM_PAGE_TO_PHYS(m); vm_offset_t va, off; if (!hw_direct_map) { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); va = moea64_scratchpage_va[0]; } else { va = pa; } for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); if (!hw_direct_map) mtx_unlock(&moea64_scratchpage_mtx); } void moea64_zero_page_idle(mmu_t mmu, vm_page_t m) { moea64_zero_page(mmu, m); } vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; vm_paddr_t pa = VM_PAGE_TO_PHYS(m); if (hw_direct_map) return (pa); /* * MOEA64_PTE_REPLACE does some locking, so we can't just grab * a critical section and access the PCPU data like on i386. * Instead, pin the thread and grab the PCPU lock to prevent * a preempting thread from using the same PCPU data. */ sched_pin(); mtx_assert(PCPU_PTR(qmap_lock), MA_NOTOWNED); pvo = PCPU_GET(qmap_pvo); mtx_lock(PCPU_PTR(qmap_lock)); pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) | (uint64_t)pa; MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE); isync(); return (PCPU_GET(qmap_addr)); } void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr) { if (hw_direct_map) return; mtx_assert(PCPU_PTR(qmap_lock), MA_OWNED); KASSERT(PCPU_GET(qmap_addr) == addr, ("moea64_quick_remove_page: invalid address")); mtx_unlock(PCPU_PTR(qmap_lock)); sched_unpin(); } /* * Map the given physical page at the specified virtual address in the * target pmap with the protection requested. If specified the page * will be wired down. */ int moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { struct pvo_entry *pvo, *oldpvo; struct pvo_head *pvo_head; uint64_t pte_lo; int error; if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pvo = alloc_pvo_entry(0); pvo->pvo_pmap = NULL; /* to be filled in later */ pvo->pvo_pte.prot = prot; pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo; if ((flags & PMAP_ENTER_WIRED) != 0) pvo->pvo_vaddr |= PVO_WIRED; if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) { pvo_head = NULL; } else { pvo_head = &m->md.mdpg_pvoh; pvo->pvo_vaddr |= PVO_MANAGED; } for (;;) { PV_PAGE_LOCK(m); PMAP_LOCK(pmap); if (pvo->pvo_pmap == NULL) init_pvo_entry(pvo, pmap, va); if (prot & VM_PROT_WRITE) if (pmap_bootstrapped && (m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); oldpvo = moea64_pvo_find_va(pmap, va); if (oldpvo != NULL) { if (oldpvo->pvo_vaddr == pvo->pvo_vaddr && oldpvo->pvo_pte.pa == pvo->pvo_pte.pa && oldpvo->pvo_pte.prot == prot) { /* Identical mapping already exists */ error = 0; /* If not in page table, reinsert it */ if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) { moea64_pte_overflow--; MOEA64_PTE_INSERT(mmu, oldpvo); } /* Then just clean up and go home */ PV_PAGE_UNLOCK(m); PMAP_UNLOCK(pmap); free_pvo_entry(pvo); break; } /* Otherwise, need to kill it first */ KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old " "mapping does not match new mapping")); moea64_pvo_remove_from_pmap(mmu, oldpvo); } error = moea64_pvo_enter(mmu, pvo, pvo_head); PV_PAGE_UNLOCK(m); PMAP_UNLOCK(pmap); /* Free any dead pages */ if (oldpvo != NULL) { PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, oldpvo); PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); free_pvo_entry(oldpvo); } if (error != ENOMEM) break; if ((flags & PMAP_ENTER_NOSLEEP) != 0) return (KERN_RESOURCE_SHORTAGE); VM_OBJECT_ASSERT_UNLOCKED(m->object); VM_WAIT; } /* * Flush the page from the instruction cache if this page is * mapped executable and cacheable. */ if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) && (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { vm_page_aflag_set(m, PGA_EXECUTABLE); moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); } return (KERN_SUCCESS); } static void moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_size_t sz) { /* * This is much trickier than on older systems because * we can't sync the icache on physical addresses directly * without a direct map. Instead we check a couple of cases * where the memory is already mapped in and, failing that, * use the same trick we use for page zeroing to create * a temporary mapping for this physical address. */ if (!pmap_bootstrapped) { /* * If PMAP is not bootstrapped, we are likely to be * in real mode. */ __syncicache((void *)pa, sz); } else if (pmap == kernel_pmap) { __syncicache((void *)va, sz); } else if (hw_direct_map) { __syncicache((void *)pa, sz); } else { /* Use the scratch page to set up a temp mapping */ mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); __syncicache((void *)(moea64_scratchpage_va[1] + (va & ADDR_POFF)), sz); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); m = m_start; while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { moea64_enter(mmu, pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0); m = TAILQ_NEXT(m, listq); } } void moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0); } vm_paddr_t moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; PMAP_LOCK(pm); pvo = moea64_pvo_find_va(pm, va); if (pvo == NULL) pa = 0; else pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(pm); return (pa); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. */ vm_page_t moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; vm_paddr_t pa; m = NULL; pa = 0; PMAP_LOCK(pmap); retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pa & LPTE_RPGN, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); vm_page_hold(m); } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } static mmu_t installed_mmu; static void * moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { struct pvo_entry *pvo; vm_offset_t va; vm_page_t m; int pflags, needed_lock; /* * This entire routine is a horrible hack to avoid bothering kmem * for new KVA addresses. Because this can get called from inside * kmem allocation routines, calling kmem for a new address here * can lead to multiply locking non-recursive mutexes. */ *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; for (;;) { m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); if (m == NULL) { if (wait & M_NOWAIT) return (NULL); VM_WAIT; } else break; } va = VM_PAGE_TO_PHYS(m); pvo = alloc_pvo_entry(1 /* bootstrap */); pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE; pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M; if (needed_lock) PMAP_LOCK(kernel_pmap); init_pvo_entry(pvo, kernel_pmap, va); pvo->pvo_vaddr |= PVO_WIRED; moea64_pvo_enter(installed_mmu, pvo, NULL); if (needed_lock) PMAP_UNLOCK(kernel_pmap); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) bzero((void *)va, PAGE_SIZE); return (void *)va; } extern int elf32_nxstack; void moea64_init(mmu_t mmu) { CTR0(KTR_PMAP, "moea64_init"); moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); if (!hw_direct_map) { installed_mmu = mmu; uma_zone_set_allocf(moea64_pvo_zone,moea64_uma_page_alloc); } #ifdef COMPAT_FREEBSD32 elf32_nxstack = 1; #endif moea64_initialized = TRUE; } boolean_t moea64_is_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_referenced: page %p is not managed", m)); return (moea64_query_bit(mmu, m, LPTE_REF)); } boolean_t moea64_is_modified(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have LPTE_CHG set. */ VM_OBJECT_ASSERT_LOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (moea64_query_bit(mmu, m, LPTE_CHG)); } boolean_t moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) { struct pvo_entry *pvo; boolean_t rv = TRUE; PMAP_LOCK(pmap); pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL) rv = FALSE; PMAP_UNLOCK(pmap); return (rv); } void moea64_clear_modify(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("moea64_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG * set. If the object containing the page is locked and the page is * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; moea64_clear_bit(mmu, m, LPTE_CHG); } /* * Clear the write and modified bits in each of the given page's mappings. */ void moea64_remove_write(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int64_t refchg, ret; pmap_t pmap; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; powerpc_sync(); PV_PAGE_LOCK(m); refchg = 0; LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if (!(pvo->pvo_vaddr & PVO_DEAD) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { pvo->pvo_pte.prot &= ~VM_PROT_WRITE; ret = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); if (ret < 0) ret = LPTE_CHG; refchg |= ret; if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG) vm_page_dirty(m); vm_page_aflag_clear(m, PGA_WRITEABLE); PV_PAGE_UNLOCK(m); } /* * moea64_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int moea64_ts_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_ts_referenced: page %p is not managed", m)); return (moea64_clear_bit(mmu, m, LPTE_REF)); } /* * Modify the WIMG settings of all mappings for a page. */ void moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { struct pvo_entry *pvo; int64_t refchg; pmap_t pmap; uint64_t lo; if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; } lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) { pvo->pvo_pte.pa &= ~LPTE_WIMG; pvo->pvo_pte.pa |= lo; refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE); if (refchg < 0) refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ? LPTE_CHG : 0; if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(m); if (refchg & LPTE_REF) vm_page_aflag_set(m, PGA_REFERENCED); } if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } m->md.mdpg_cache_attrs = ma; PV_PAGE_UNLOCK(m); } /* * Map a wired page into kernel virtual address space. */ void moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) { int error; struct pvo_entry *pvo, *oldpvo; pvo = alloc_pvo_entry(0); pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma); pvo->pvo_vaddr |= PVO_WIRED; PMAP_LOCK(kernel_pmap); oldpvo = moea64_pvo_find_va(kernel_pmap, va); if (oldpvo != NULL) moea64_pvo_remove_from_pmap(mmu, oldpvo); init_pvo_entry(pvo, kernel_pmap, va); error = moea64_pvo_enter(mmu, pvo, NULL); PMAP_UNLOCK(kernel_pmap); /* Free any dead pages */ if (oldpvo != NULL) { PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, oldpvo); PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); free_pvo_entry(oldpvo); } if (error != 0 && error != ENOENT) panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va, pa, error); } void moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) { moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); } /* * Extract the physical page address associated with the given kernel virtual * address. */ vm_paddr_t moea64_kextract(mmu_t mmu, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; /* * Shortcut the direct-mapped case when applicable. We never put * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS. */ if (va < VM_MIN_KERNEL_ADDRESS) return (va); PMAP_LOCK(kernel_pmap); pvo = moea64_pvo_find_va(kernel_pmap, va); KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, va)); pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(kernel_pmap); return (pa); } /* * Remove a wired page from kernel virtual address space. */ void moea64_kremove(mmu_t mmu, vm_offset_t va) { moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); } /* * Map a range of physical addresses into kernel virtual address space. * * The value passed in *virt is a suggested virtual address for the mapping. * Architectures which can support a direct-mapped physical to virtual region * can return the appropriate address within that region, leaving '*virt' * unchanged. Other architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped region. */ vm_offset_t moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, vm_paddr_t pa_end, int prot) { vm_offset_t sva, va; if (hw_direct_map) { /* * Check if every page in the region is covered by the direct * map. The direct map covers all of physical memory. Use * moea64_calc_wimg() as a shortcut to see if the page is in * physical memory as a way to see if the direct map covers it. */ for (va = pa_start; va < pa_end; va += PAGE_SIZE) if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M) break; if (va == pa_end) return (pa_start); } sva = *virt; va = sva; /* XXX respect prot argument */ for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) moea64_kenter(mmu, va, pa_start); *virt = va; return (sva); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) { int loops; struct pvo_entry *pvo; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_page_exists_quick: page %p is not managed", m)); loops = 0; rv = FALSE; PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) { rv = TRUE; break; } if (++loops >= 16) break; } PV_PAGE_UNLOCK(m); return (rv); } /* * Return the number of managed mappings to the given physical page * that are wired. */ int moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED) count++; PV_PAGE_UNLOCK(m); return (count); } static uintptr_t moea64_vsidcontext; uintptr_t moea64_get_unique_vsid(void) { u_int entropy; register_t hash; uint32_t mask; int i; entropy = 0; __asm __volatile("mftb %0" : "=r"(entropy)); mtx_lock(&moea64_slb_mutex); for (i = 0; i < NVSIDS; i += VSID_NBPW) { u_int n; /* * Create a new value by mutiplying by a prime and adding in * entropy from the timebase register. This is to make the * VSID more random so that the PT hash function collides * less often. (Note that the prime casues gcc to do shifts * instead of a multiply.) */ moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; hash = moea64_vsidcontext & (NVSIDS - 1); if (hash == 0) /* 0 is special, avoid it */ continue; n = hash >> 5; mask = 1 << (hash & (VSID_NBPW - 1)); hash = (moea64_vsidcontext & VSID_HASHMASK); if (moea64_vsid_bitmap[n] & mask) { /* collision? */ /* anything free in this bucket? */ if (moea64_vsid_bitmap[n] == 0xffffffff) { entropy = (moea64_vsidcontext >> 20); continue; } i = ffs(~moea64_vsid_bitmap[n]) - 1; mask = 1 << i; hash &= VSID_HASHMASK & ~(VSID_NBPW - 1); hash |= i; } if (hash == VSID_VRMA) /* also special, avoid this too */ continue; KASSERT(!(moea64_vsid_bitmap[n] & mask), ("Allocating in-use VSID %#zx\n", hash)); moea64_vsid_bitmap[n] |= mask; mtx_unlock(&moea64_slb_mutex); return (hash); } mtx_unlock(&moea64_slb_mutex); panic("%s: out of segments",__func__); } #ifdef __powerpc64__ void moea64_pinit(mmu_t mmu, pmap_t pmap) { RB_INIT(&pmap->pmap_pvo); pmap->pm_slb_tree_root = slb_alloc_tree(); pmap->pm_slb = slb_alloc_user_cache(); pmap->pm_slb_len = 0; } #else void moea64_pinit(mmu_t mmu, pmap_t pmap) { int i; uint32_t hash; RB_INIT(&pmap->pmap_pvo); if (pmap_bootstrapped) pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, (vm_offset_t)pmap); else pmap->pmap_phys = pmap; /* * Allocate some segment registers for this pmap. */ hash = moea64_get_unique_vsid(); for (i = 0; i < 16; i++) pmap->pm_sr[i] = VSID_MAKE(i, hash); KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); } #endif /* * Initialize the pmap associated with process 0. */ void moea64_pinit0(mmu_t mmu, pmap_t pm) { PMAP_LOCK_INIT(pm); moea64_pinit(mmu, pm); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); } /* * Set the physical protection on the specified range of this map as requested. */ static void moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) { struct vm_page *pg; vm_prot_t oldprot; int32_t refchg; PMAP_LOCK_ASSERT(pm, MA_OWNED); /* * Change the protection of the page. */ oldprot = pvo->pvo_pte.prot; pvo->pvo_pte.prot = prot; pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); /* * If the PVO is in the page table, update mapping */ refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); if (refchg < 0) refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0; if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { if ((pg->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(pg, PGA_EXECUTABLE); moea64_syncicache(mmu, pm, PVO_VADDR(pvo), pvo->pvo_pte.pa & LPTE_RPGN, PAGE_SIZE); } /* * Update vm about the REF/CHG bits if the page is managed and we have * removed write access. */ if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) && (oldprot & VM_PROT_WRITE)) { refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(pg); if (refchg & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); } } void moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct pvo_entry *pvo, *tpvo, key; CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, sva, eva, prot); KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, ("moea64_protect: non current pmap")); if ((prot & VM_PROT_READ) == VM_PROT_NONE) { moea64_remove(mmu, pm, sva, eva); return; } PMAP_LOCK(pm); key.pvo_vaddr = sva; for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); moea64_pvo_protect(mmu, pm, pvo, prot); } PMAP_UNLOCK(pm); } /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or * references recorded. Existing mappings in the region are overwritten. */ void moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) { while (count-- > 0) { moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } } /* * Remove page mappings from kernel virtual address space. Intended for * temporary mappings entered by moea64_qenter. */ void moea64_qremove(mmu_t mmu, vm_offset_t va, int count) { while (count-- > 0) { moea64_kremove(mmu, va); va += PAGE_SIZE; } } void moea64_release_vsid(uint64_t vsid) { int idx, mask; mtx_lock(&moea64_slb_mutex); idx = vsid & (NVSIDS-1); mask = 1 << (idx % VSID_NBPW); idx /= VSID_NBPW; KASSERT(moea64_vsid_bitmap[idx] & mask, ("Freeing unallocated VSID %#jx", vsid)); moea64_vsid_bitmap[idx] &= ~mask; mtx_unlock(&moea64_slb_mutex); } void moea64_release(mmu_t mmu, pmap_t pmap) { /* * Free segment registers' VSIDs */ #ifdef __powerpc64__ slb_free_tree(pmap); slb_free_user_cache(pmap->pm_slb); #else KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); #endif } /* * Remove all pages mapped by the specified pmap */ void moea64_remove_pages(mmu_t mmu, pmap_t pm) { struct pvo_entry *pvo, *tpvo; struct pvo_tree tofree; RB_INIT(&tofree); PMAP_LOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) { if (pvo->pvo_vaddr & PVO_WIRED) continue; /* * For locking reasons, remove this from the page table and * pmap, but save delinking from the vm_page for a second * pass */ moea64_pvo_remove_from_pmap(mmu, pvo); RB_INSERT(pvo_tree, &tofree, pvo); } PMAP_UNLOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, pvo); PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); RB_REMOVE(pvo_tree, &tofree, pvo); free_pvo_entry(pvo); } } /* * Remove the given range of addresses from the specified map. */ void moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry *pvo, *tpvo, key; struct pvo_tree tofree; /* * Perform an unsynchronized read. This is, however, safe. */ if (pm->pm_stats.resident_count == 0) return; key.pvo_vaddr = sva; RB_INIT(&tofree); PMAP_LOCK(pm); for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); /* * For locking reasons, remove this from the page table and * pmap, but save delinking from the vm_page for a second * pass */ moea64_pvo_remove_from_pmap(mmu, pvo); RB_INSERT(pvo_tree, &tofree, pvo); } PMAP_UNLOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page(mmu, pvo); PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); RB_REMOVE(pvo_tree, &tofree, pvo); free_pvo_entry(pvo); } } /* * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() * will reflect changes in pte's back to the vm_page. */ void moea64_remove_all(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo, *next_pvo; struct pvo_head freequeue; int wasdead; pmap_t pmap; LIST_INIT(&freequeue); PV_PAGE_LOCK(m); LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); wasdead = (pvo->pvo_vaddr & PVO_DEAD); if (!wasdead) moea64_pvo_remove_from_pmap(mmu, pvo); moea64_pvo_remove_from_page(mmu, pvo); if (!wasdead) LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink); PMAP_UNLOCK(pmap); } KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings")); KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable")); PV_PAGE_UNLOCK(m); /* Clean up UMA allocations */ LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo) free_pvo_entry(pvo); } /* * Allocate a physical page of memory directly from the phys_avail map. * Can only be called from moea64_bootstrap before avail start and end are * calculated. */ vm_offset_t moea64_bootstrap_alloc(vm_size_t size, u_int align) { vm_offset_t s, e; int i, j; size = round_page(size); for (i = 0; phys_avail[i + 1] != 0; i += 2) { if (align != 0) s = (phys_avail[i] + align - 1) & ~(align - 1); else s = phys_avail[i]; e = s + size; if (s < phys_avail[i] || e > phys_avail[i + 1]) continue; if (s + size > platform_real_maxaddr()) continue; if (s == phys_avail[i]) { phys_avail[i] += size; } else if (e == phys_avail[i + 1]) { phys_avail[i + 1] -= size; } else { for (j = phys_avail_count * 2; j > i; j -= 2) { phys_avail[j] = phys_avail[j - 2]; phys_avail[j + 1] = phys_avail[j - 1]; } phys_avail[i + 3] = phys_avail[i + 1]; phys_avail[i + 1] = s; phys_avail[i + 2] = e; phys_avail_count++; } return (s); } panic("moea64_bootstrap_alloc: could not allocate memory"); } static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head) { int first, err; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); KASSERT(moea64_pvo_find_va(pvo->pvo_pmap, PVO_VADDR(pvo)) == NULL, ("Existing mapping for VA %#jx", (uintmax_t)PVO_VADDR(pvo))); moea64_pvo_enter_calls++; /* * Add to pmap list */ RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); /* * Remember if the list was empty and therefore will be the first * item. */ if (pvo_head != NULL) { if (LIST_FIRST(pvo_head) == NULL) first = 1; LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); } if (pvo->pvo_vaddr & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count++; pvo->pvo_pmap->pm_stats.resident_count++; /* * Insert it into the hardware page table */ err = MOEA64_PTE_INSERT(mmu, pvo); if (err != 0) { panic("moea64_pvo_enter: overflow"); } moea64_pvo_entries++; if (pvo->pvo_pmap == kernel_pmap) isync(); #ifdef __powerpc64__ /* * Make sure all our bootstrap mappings are in the SLB as soon * as virtual memory is switched on. */ if (!pmap_bootstrapped) moea64_bootstrap_slb_prefault(PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); #endif return (first ? ENOENT : 0); } static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo) { struct vm_page *pg; int32_t refchg; KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap")); PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO")); /* * If there is an active pte entry, we need to deactivate it */ refchg = MOEA64_PTE_UNSET(mmu, pvo); if (refchg < 0) { /* * If it was evicted from the page table, be pessimistic and * dirty the page. */ if (pvo->pvo_pte.prot & VM_PROT_WRITE) refchg = LPTE_CHG; else refchg = 0; } /* * Update our statistics. */ pvo->pvo_pmap->pm_stats.resident_count--; if (pvo->pvo_vaddr & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count--; /* * Remove this PVO from the pmap list. */ RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); /* * Mark this for the next sweep */ pvo->pvo_vaddr |= PVO_DEAD; /* Send RC bits to VM */ if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); if (pg != NULL) { refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(pg); if (refchg & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); } } } static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo) { struct vm_page *pg; KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page")); /* Use NULL pmaps as a sentinel for races in page deletion */ if (pvo->pvo_pmap == NULL) return; pvo->pvo_pmap = NULL; /* * Update vm about page writeability/executability if managed */ PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN); pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); if ((pvo->pvo_vaddr & PVO_MANAGED) && pg != NULL) { LIST_REMOVE(pvo, pvo_vlink); if (LIST_EMPTY(vm_page_to_pvoh(pg))) vm_page_aflag_clear(pg, PGA_WRITEABLE | PGA_EXECUTABLE); } moea64_pvo_entries--; moea64_pvo_remove_calls++; } static struct pvo_entry * moea64_pvo_find_va(pmap_t pm, vm_offset_t va) { struct pvo_entry key; PMAP_LOCK_ASSERT(pm, MA_OWNED); key.pvo_vaddr = va & ~ADDR_POFF; return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key)); } static boolean_t moea64_query_bit(mmu_t mmu, vm_page_t m, uint64_t ptebit) { struct pvo_entry *pvo; int64_t ret; boolean_t rv; /* * See if this bit is stored in the page already. */ if (m->md.mdpg_attrs & ptebit) return (TRUE); /* * Examine each PTE. Sync so that any pending REF/CHG bits are * flushed to the PTEs. */ rv = FALSE; powerpc_sync(); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { ret = 0; /* * See if this pvo has a valid PTE. if so, fetch the * REF/CHG bits from the valid PTE. If the appropriate * ptebit is set, return success. */ PMAP_LOCK(pvo->pvo_pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) ret = MOEA64_PTE_SYNCH(mmu, pvo); PMAP_UNLOCK(pvo->pvo_pmap); if (ret > 0) { atomic_set_32(&m->md.mdpg_attrs, ret & (LPTE_CHG | LPTE_REF)); if (ret & ptebit) { rv = TRUE; break; } } } PV_PAGE_UNLOCK(m); return (rv); } static u_int moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) { u_int count; struct pvo_entry *pvo; int64_t ret; /* * Sync so that any pending REF/CHG bits are flushed to the PTEs (so * we can reset the right ones). */ powerpc_sync(); /* * For each pvo entry, clear the pte's ptebit. */ count = 0; PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { ret = 0; PMAP_LOCK(pvo->pvo_pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) ret = MOEA64_PTE_CLEAR(mmu, pvo, ptebit); PMAP_UNLOCK(pvo->pvo_pmap); if (ret > 0 && (ret & ptebit)) count++; } atomic_clear_32(&m->md.mdpg_attrs, ptebit); PV_PAGE_UNLOCK(m); return (count); } boolean_t moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { struct pvo_entry *pvo, key; vm_offset_t ppa; int error = 0; PMAP_LOCK(kernel_pmap); key.pvo_vaddr = ppa = pa & ~ADDR_POFF; for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key); ppa < pa + size; ppa += PAGE_SIZE, pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) { if (pvo == NULL || (pvo->pvo_pte.pa & LPTE_RPGN) != ppa) { error = EFAULT; break; } } PMAP_UNLOCK(kernel_pmap); return (error); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * moea64_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) { vm_offset_t va, tmpva, ppa, offset; ppa = trunc_page(pa); offset = pa & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); va = kva_alloc(size); if (!va) panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); for (tmpva = va; size > 0;) { moea64_kenter_attr(mmu, tmpva, ppa, ma); size -= PAGE_SIZE; tmpva += PAGE_SIZE; ppa += PAGE_SIZE; } return ((void *)(va + offset)); } void * moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); } void moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) { vm_offset_t base, offset; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); kva_free(base, size); } void moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) { struct pvo_entry *pvo; vm_offset_t lim; vm_paddr_t pa; vm_size_t len; PMAP_LOCK(pm); while (sz > 0) { lim = round_page(va); len = MIN(lim - va, sz); pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) { pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va & ADDR_POFF); moea64_syncicache(mmu, pm, va, pa, len); } va += len; sz -= len; } PMAP_UNLOCK(pm); } void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) { *va = (void *)pa; } extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; void moea64_scan_init(mmu_t mmu) { struct pvo_entry *pvo; vm_offset_t va; int i; if (!do_minidump) { /* Initialize phys. segments for dumpsys(). */ memset(&dump_map, 0, sizeof(dump_map)); mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); for (i = 0; i < pregions_sz; i++) { dump_map[i].pa_start = pregions[i].mr_start; dump_map[i].pa_size = pregions[i].mr_size; } return; } /* Virtual segments for minidumps: */ memset(&dump_map, 0, sizeof(dump_map)); /* 1st: kernel .data and .bss. */ dump_map[0].pa_start = trunc_page((uintptr_t)_etext); dump_map[0].pa_size = round_page((uintptr_t)_end) - dump_map[0].pa_start; /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ dump_map[1].pa_start = (vm_paddr_t)msgbufp->msg_ptr; dump_map[1].pa_size = round_page(msgbufp->msg_size); /* 3rd: kernel VM. */ va = dump_map[1].pa_start + dump_map[1].pa_size; /* Find start of next chunk (from va). */ while (va < virtual_end) { /* Don't dump the buffer cache. */ if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { va = kmi.buffer_eva; continue; } pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) break; va += PAGE_SIZE; } if (va < virtual_end) { dump_map[2].pa_start = va; va += PAGE_SIZE; /* Find last page in chunk. */ while (va < virtual_end) { /* Don't run into the buffer cache. */ if (va == kmi.buffer_sva) break; pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) break; va += PAGE_SIZE; } dump_map[2].pa_size = va - dump_map[2].pa_start; } } Index: projects/powernv/powerpc/aim/moea64_native.c =================================================================== --- projects/powernv/powerpc/aim/moea64_native.c (revision 290990) +++ projects/powernv/powerpc/aim/moea64_native.c (revision 290991) @@ -1,658 +1,662 @@ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Matt Thomas of Allegro Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ */ /*- * Copyright (C) 2001 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Native 64-bit page table operations for running without a hypervisor. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_oea64.h" #include "mmu_if.h" #include "moea64_if.h" #define PTESYNC() __asm __volatile("ptesync"); #define TLBSYNC() __asm __volatile("tlbsync; ptesync"); #define SYNC() __asm __volatile("sync"); #define EIEIO() __asm __volatile("eieio"); #define VSID_HASH_MASK 0x0000007fffffffffULL static __inline void TLBIE(uint64_t vpn) { #ifndef __powerpc64__ register_t vpn_hi, vpn_lo; register_t msr; register_t scratch, intr; #endif static volatile u_int tlbie_lock = 0; vpn <<= ADDR_PIDX_SHFT; vpn &= ~(0xffffULL << 48); /* Hobo spinlock: we need stronger guarantees than mutexes provide */ while (!atomic_cmpset_int(&tlbie_lock, 0, 1)); isync(); /* Flush instruction queue once lock acquired */ #ifdef __powerpc64__ __asm __volatile("tlbie %0" :: "r"(vpn) : "memory"); __asm __volatile("eieio; tlbsync; ptesync" ::: "memory"); #else vpn_hi = (uint32_t)(vpn >> 32); vpn_lo = (uint32_t)vpn; intr = intr_disable(); __asm __volatile("\ mfmsr %0; \ mr %1, %0; \ insrdi %1,%5,1,0; \ mtmsrd %1; isync; \ \ sld %1,%2,%4; \ or %1,%1,%3; \ tlbie %1; \ \ mtmsrd %0; isync; \ eieio; \ tlbsync; \ ptesync;" : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) : "memory"); intr_restore(intr); #endif /* No barriers or special ops -- taken care of by ptesync above */ tlbie_lock = 0; } #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) #define ENABLE_TRANS(msr) mtmsr(msr) /* * PTEG data. */ static volatile struct lpte *moea64_pteg_table; static struct rwlock moea64_eviction_lock; /* * PTE calls. */ static int moea64_pte_insert_native(mmu_t, struct pvo_entry *); static int64_t moea64_pte_synch_native(mmu_t, struct pvo_entry *); static int64_t moea64_pte_clear_native(mmu_t, struct pvo_entry *, uint64_t); static int64_t moea64_pte_replace_native(mmu_t, struct pvo_entry *, int); static int64_t moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *); /* * Utility routines. */ static void moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); static void moea64_cpu_bootstrap_native(mmu_t, int ap); static void tlbia(void); static mmu_method_t moea64_native_methods[] = { /* Internal interfaces */ MMUMETHOD(mmu_bootstrap, moea64_bootstrap_native), MMUMETHOD(mmu_cpu_bootstrap, moea64_cpu_bootstrap_native), MMUMETHOD(moea64_pte_synch, moea64_pte_synch_native), MMUMETHOD(moea64_pte_clear, moea64_pte_clear_native), MMUMETHOD(moea64_pte_unset, moea64_pte_unset_native), MMUMETHOD(moea64_pte_replace, moea64_pte_replace_native), MMUMETHOD(moea64_pte_insert, moea64_pte_insert_native), { 0, 0 } }; MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, 0, oea64_mmu); static int64_t moea64_pte_synch_native(mmu_t mmu, struct pvo_entry *pvo) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; uint64_t ptelo; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); - if ((pt->pte_hi & LPTE_AVPN_MASK) != + if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != (properpt.pte_hi & LPTE_AVPN_MASK)) { /* Evicted */ rw_runlock(&moea64_eviction_lock); return (-1); } PTESYNC(); ptelo = be64toh(pt->pte_lo); rw_runlock(&moea64_eviction_lock); return (ptelo & (LPTE_REF | LPTE_CHG)); } static int64_t moea64_pte_clear_native(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; uint64_t ptelo; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); - if ((pt->pte_hi & LPTE_AVPN_MASK) != + if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != (properpt.pte_hi & LPTE_AVPN_MASK)) { /* Evicted */ rw_runlock(&moea64_eviction_lock); return (-1); } if (ptebit == LPTE_REF) { /* See "Resetting the Reference Bit" in arch manual */ PTESYNC(); /* 2-step here safe: precision is not guaranteed */ - ptelo = pt->pte_lo; + ptelo = be64toh(pt->pte_lo); /* One-byte store to avoid touching the C bit */ ((volatile uint8_t *)(&pt->pte_lo))[6] = +#if BYTE_ORDER == BIG_ENDIAN ((uint8_t *)(&properpt.pte_lo))[6]; +#else + ((uint8_t *)(&properpt.pte_lo))[1]; +#endif rw_runlock(&moea64_eviction_lock); critical_enter(); TLBIE(pvo->pvo_vpn); critical_exit(); } else { rw_runlock(&moea64_eviction_lock); ptelo = moea64_pte_unset_native(mmu, pvo); moea64_pte_insert_native(mmu, pvo); } return (ptelo & (LPTE_REF | LPTE_CHG)); } static int64_t moea64_pte_unset_native(mmu_t mmu, struct pvo_entry *pvo) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; uint64_t ptelo; moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); - if ((pt->pte_hi & LPTE_AVPN_MASK) != + if ((be64toh(pt->pte_hi & LPTE_AVPN_MASK)) != (properpt.pte_hi & LPTE_AVPN_MASK)) { /* Evicted */ moea64_pte_overflow--; rw_runlock(&moea64_eviction_lock); return (-1); } /* * Invalidate the pte, briefly locking it to collect RC bits. No * atomics needed since this is protected against eviction by the lock. */ isync(); critical_enter(); - pt->pte_hi = (pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED; + pt->pte_hi = be64toh((pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED); PTESYNC(); TLBIE(pvo->pvo_vpn); ptelo = be64toh(pt->pte_lo); *((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */ critical_exit(); rw_runlock(&moea64_eviction_lock); /* Keep statistics */ moea64_pte_valid--; return (ptelo & (LPTE_CHG | LPTE_REF)); } static int64_t moea64_pte_replace_native(mmu_t mmu, struct pvo_entry *pvo, int flags) { volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; struct lpte properpt; int64_t ptelo; if (flags == 0) { /* Just some software bits changing. */ moea64_pte_from_pvo(pvo, &properpt); rw_rlock(&moea64_eviction_lock); - if ((pt->pte_hi & LPTE_AVPN_MASK) != + if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != (properpt.pte_hi & LPTE_AVPN_MASK)) { rw_runlock(&moea64_eviction_lock); return (-1); } - pt->pte_hi = properpt.pte_hi; - ptelo = pt->pte_lo; + pt->pte_hi = htobe64(properpt.pte_hi); + ptelo = be64toh(pt->pte_lo); rw_runlock(&moea64_eviction_lock); } else { /* Otherwise, need reinsertion and deletion */ ptelo = moea64_pte_unset_native(mmu, pvo); moea64_pte_insert_native(mmu, pvo); } return (ptelo); } static void moea64_cpu_bootstrap_native(mmu_t mmup, int ap) { int i = 0; #ifdef __powerpc64__ struct slb *slb = PCPU_GET(slb); register_t seg0; #endif /* * Initialize segment registers and MMU */ mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); /* * Install kernel SLB entries */ #ifdef __powerpc64__ __asm __volatile ("slbia"); __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); for (i = 0; i < 64; i++) { if (!(slb[i].slbe & SLBE_VALID)) continue; __asm __volatile ("slbmte %0, %1" :: "r"(slb[i].slbv), "r"(slb[i].slbe)); } #else for (i = 0; i < 16; i++) mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); #endif /* * Install page table */ __asm __volatile ("ptesync; mtsdr1 %0; isync" :: "r"((uintptr_t)moea64_pteg_table | (uintptr_t)(flsl(moea64_pteg_mask >> 11)))); tlbia(); } static void moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { vm_size_t size; vm_offset_t off; vm_paddr_t pa; register_t msr; moea64_early_bootstrap(mmup, kernelstart, kernelend); /* * Allocate PTEG table. */ size = moea64_pteg_count * sizeof(struct lpteg); CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes", moea64_pteg_count, size); rw_init(&moea64_eviction_lock, "pte eviction"); /* * We now need to allocate memory. This memory, to be allocated, * has to reside in a page table. The page table we are about to * allocate. We don't have BAT. So drop to data real mode for a minute * as a measure of last resort. We do this a couple times. */ moea64_pteg_table = (struct lpte *)moea64_bootstrap_alloc(size, size); DISABLE_TRANS(msr); bzero(__DEVOLATILE(void *, moea64_pteg_table), moea64_pteg_count * sizeof(struct lpteg)); ENABLE_TRANS(msr); CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table); moea64_mid_bootstrap(mmup, kernelstart, kernelend); /* * Add a mapping for the page table itself if there is no direct map. */ if (!hw_direct_map) { size = moea64_pteg_count * sizeof(struct lpteg); off = (vm_offset_t)(moea64_pteg_table); DISABLE_TRANS(msr); for (pa = off; pa < off + size; pa += PAGE_SIZE) pmap_kenter(pa, pa); ENABLE_TRANS(msr); } /* Bring up virtual memory */ moea64_late_bootstrap(mmup, kernelstart, kernelend); } static void tlbia(void) { vm_offset_t i; #ifndef __powerpc64__ register_t msr, scratch; #endif TLBSYNC(); for (i = 0; i < 0xFF000; i += 0x00001000) { #ifdef __powerpc64__ __asm __volatile("tlbiel %0" :: "r"(i)); #else __asm __volatile("\ mfmsr %0; \ mr %1, %0; \ insrdi %1,%3,1,0; \ mtmsrd %1; \ isync; \ \ tlbiel %2; \ \ mtmsrd %0; \ isync;" : "=r"(msr), "=r"(scratch) : "r"(i), "r"(1)); #endif } EIEIO(); TLBSYNC(); } static int atomic_pte_lock(volatile struct lpte *pte, uint64_t bitmask, uint64_t *oldhi) { int ret; uint32_t oldhihalf; /* * Note: in principle, if just the locked bit were set here, we * could avoid needing the eviction lock. However, eviction occurs * so rarely that it isn't worth bothering about in practice. */ __asm __volatile ( "1:\tlwarx %1, 0, %3\n\t" /* load old value */ "and. %0,%1,%4\n\t" /* check if any bits set */ "bne 2f\n\t" /* exit if any set */ "stwcx. %5, 0, %3\n\t" /* attempt to store */ "bne- 1b\n\t" /* spin if failed */ "li %0, 1\n\t" /* success - retval = 1 */ "b 3f\n\t" /* we've succeeded */ "2:\n\t" "stwcx. %1, 0, %3\n\t" /* clear reservation (74xx) */ "li %0, 0\n\t" /* failure - retval = 0 */ "3:\n\t" : "=&r" (ret), "=&r"(oldhihalf), "=m" (pte->pte_hi) : "r" ((volatile char *)&pte->pte_hi + 4), "r" ((uint32_t)bitmask), "r" ((uint32_t)LPTE_LOCKED), "m" (pte->pte_hi) : "cr0", "cr1", "cr2", "memory"); *oldhi = (pte->pte_hi & 0xffffffff00000000ULL) | oldhihalf; return (ret); } static uintptr_t moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase, uint64_t mask) { volatile struct lpte *pt; uint64_t oldptehi, va; uintptr_t k; int i, j; /* Start at a random slot */ i = mftb() % 8; for (j = 0; j < 8; j++) { k = slotbase + (i + j) % 8; pt = &moea64_pteg_table[k]; /* Invalidate and seize lock only if no bits in mask set */ if (atomic_pte_lock(pt, mask, &oldptehi)) /* Lock obtained */ break; } if (j == 8) return (-1); if (oldptehi & LPTE_VALID) { KASSERT(!(oldptehi & LPTE_WIRED), ("Unmapped wired entry")); /* * Need to invalidate old entry completely: see * "Modifying a Page Table Entry". Need to reconstruct * the virtual address for the outgoing entry to do that. */ if (oldptehi & LPTE_BIG) va = oldptehi >> moea64_large_page_shift; else va = oldptehi >> ADDR_PIDX_SHFT; if (oldptehi & LPTE_HID) va = (((k >> 3) ^ moea64_pteg_mask) ^ va) & VSID_HASH_MASK; else va = ((k >> 3) ^ va) & VSID_HASH_MASK; va |= (oldptehi & LPTE_AVPN_MASK) << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT); PTESYNC(); TLBIE(va); moea64_pte_valid--; moea64_pte_overflow++; } /* * Update the PTE as per "Adding a Page Table Entry". Lock is released * by setting the high doubleworld. */ - pt->pte_lo = pvo_pt->pte_lo; + pt->pte_lo = htobe64(pvo_pt->pte_lo); EIEIO(); - pt->pte_hi = pvo_pt->pte_hi; + pt->pte_hi = htobe64(pvo_pt->pte_hi); PTESYNC(); /* Keep statistics */ moea64_pte_valid++; return (k); } static int moea64_pte_insert_native(mmu_t mmu, struct pvo_entry *pvo) { struct lpte insertpt; uintptr_t slot; /* Initialize PTE */ moea64_pte_from_pvo(pvo, &insertpt); /* Make sure further insertion is locked out during evictions */ rw_rlock(&moea64_eviction_lock); /* * First try primary hash. */ pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */ slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_VALID | LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_runlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* * Now try secondary hash. */ pvo->pvo_vaddr ^= PVO_HID; insertpt.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_VALID | LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_runlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* * Out of luck. Find a PTE to sacrifice. */ /* Lock out all insertions for a bit */ if (!rw_try_upgrade(&moea64_eviction_lock)) { rw_runlock(&moea64_eviction_lock); rw_wlock(&moea64_eviction_lock); } slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_wunlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* Try other hash table. Now we're getting desperate... */ pvo->pvo_vaddr ^= PVO_HID; insertpt.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot, LPTE_WIRED | LPTE_LOCKED); if (slot != -1) { rw_wunlock(&moea64_eviction_lock); pvo->pvo_pte.slot = slot; return (0); } /* No freeable slots in either PTEG? We're hosed. */ rw_wunlock(&moea64_eviction_lock); panic("moea64_pte_insert: overflow"); return (-1); } Index: projects/powernv/powerpc/ofw/ofw_machdep.c =================================================================== --- projects/powernv/powerpc/ofw/ofw_machdep.c (revision 290990) +++ projects/powernv/powerpc/ofw/ofw_machdep.c (revision 290991) @@ -1,699 +1,699 @@ /*- * Copyright (C) 1996 Wolfgang Solfrank. * Copyright (C) 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: ofw_machdep.c,v 1.5 2000/05/23 13:25:43 tsubai Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void *fdt; int ofw_real_mode; #ifdef AIM extern register_t ofmsr[5]; extern void *openfirmware_entry; char save_trap_init[0x2f00]; /* EXC_LAST */ char save_trap_of[0x2f00]; /* EXC_LAST */ int ofwcall(void *); static int openfirmware(void *args); __inline void ofw_save_trap_vec(char *save_trap_vec) { if (!ofw_real_mode) return; bcopy((void *)EXC_RST, save_trap_vec, EXC_LAST - EXC_RST); } static __inline void ofw_restore_trap_vec(char *restore_trap_vec) { if (!ofw_real_mode) return; bcopy(restore_trap_vec, (void *)EXC_RST, EXC_LAST - EXC_RST); __syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD); } /* * Saved SPRG0-3 from OpenFirmware. Will be restored prior to the callback. */ register_t ofw_sprg0_save; static __inline void ofw_sprg_prepare(void) { if (ofw_real_mode) return; /* * Assume that interrupt are disabled at this point, or * SPRG1-3 could be trashed */ __asm __volatile("mfsprg0 %0\n\t" "mtsprg0 %1\n\t" "mtsprg1 %2\n\t" "mtsprg2 %3\n\t" "mtsprg3 %4\n\t" : "=&r"(ofw_sprg0_save) : "r"(ofmsr[1]), "r"(ofmsr[2]), "r"(ofmsr[3]), "r"(ofmsr[4])); } static __inline void ofw_sprg_restore(void) { if (ofw_real_mode) return; /* * Note that SPRG1-3 contents are irrelevant. They are scratch * registers used in the early portion of trap handling when * interrupts are disabled. * * PCPU data cannot be used until this routine is called ! */ __asm __volatile("mtsprg0 %0" :: "r"(ofw_sprg0_save)); } #endif static int parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output) { cell_t address_cells, size_cells; cell_t OFmem[4 * PHYS_AVAIL_SZ]; int sz, i, j; phandle_t phandle; sz = 0; /* * Get #address-cells from root node, defaulting to 1 if it cannot * be found. */ phandle = OF_finddevice("/"); - if (OF_getprop(phandle, "#address-cells", &address_cells, + if (OF_getencprop(phandle, "#address-cells", &address_cells, sizeof(address_cells)) < (ssize_t)sizeof(address_cells)) address_cells = 1; - if (OF_getprop(phandle, "#size-cells", &size_cells, + if (OF_getencprop(phandle, "#size-cells", &size_cells, sizeof(size_cells)) < (ssize_t)sizeof(size_cells)) size_cells = 1; /* * Get memory. */ - if (node == -1 || (sz = OF_getprop(node, prop, + if (node == -1 || (sz = OF_getencprop(node, prop, OFmem, sizeof(OFmem))) <= 0) panic("Physical memory map not found"); i = 0; j = 0; while (i < sz/sizeof(cell_t)) { #ifndef __powerpc64__ /* On 32-bit PPC, ignore regions starting above 4 GB */ if (address_cells > 1 && OFmem[i] > 0) { i += address_cells + size_cells; continue; } #endif output[j].mr_start = OFmem[i++]; if (address_cells == 2) { #ifdef __powerpc64__ output[j].mr_start <<= 32; #endif output[j].mr_start += OFmem[i++]; } output[j].mr_size = OFmem[i++]; if (size_cells == 2) { #ifdef __powerpc64__ output[j].mr_size <<= 32; #endif output[j].mr_size += OFmem[i++]; } #ifndef __powerpc64__ /* * Check for memory regions extending above 32-bit * memory space, and restrict them to stay there. */ if (((uint64_t)output[j].mr_start + (uint64_t)output[j].mr_size) > BUS_SPACE_MAXADDR_32BIT) { output[j].mr_size = BUS_SPACE_MAXADDR_32BIT - output[j].mr_start; } #endif j++; } sz = j*sizeof(output[0]); return (sz); } static int excise_fdt_reserved(struct mem_region *avail, int asz) { struct { uint64_t address; uint64_t size; } fdtmap[16]; ssize_t fdtmapsize; phandle_t chosen; int i, j, k; chosen = OF_finddevice("/chosen"); fdtmapsize = OF_getprop(chosen, "fdtmemreserv", fdtmap, sizeof(fdtmap)); for (j = 0; j < fdtmapsize/sizeof(fdtmap[0]); j++) { fdtmap[j].address = be64toh(fdtmap[j].address); fdtmap[j].size = be64toh(fdtmap[j].size); } for (i = 0; i < asz; i++) { for (j = 0; j < fdtmapsize/sizeof(fdtmap[0]); j++) { /* * Case 1: Exclusion region encloses complete * available entry. Drop it and move on. */ if (fdtmap[j].address <= avail[i].mr_start && fdtmap[j].address + fdtmap[j].size >= avail[i].mr_start + avail[i].mr_size) { for (k = i+1; k < asz; k++) avail[k-1] = avail[k]; asz--; i--; /* Repeat some entries */ continue; } /* * Case 2: Exclusion region starts in available entry. * Trim it to where the entry begins and append * a new available entry with the region after * the excluded region, if any. */ if (fdtmap[j].address >= avail[i].mr_start && fdtmap[j].address < avail[i].mr_start + avail[i].mr_size) { if (fdtmap[j].address + fdtmap[j].size < avail[i].mr_start + avail[i].mr_size) { avail[asz].mr_start = fdtmap[j].address + fdtmap[j].size; avail[asz].mr_size = avail[i].mr_start + avail[i].mr_size - avail[asz].mr_start; asz++; } avail[i].mr_size = fdtmap[j].address - avail[i].mr_start; } /* * Case 3: Exclusion region ends in available entry. * Move start point to where the exclusion zone ends. * The case of a contained exclusion zone has already * been caught in case 2. */ if (fdtmap[j].address + fdtmap[j].size >= avail[i].mr_start && fdtmap[j].address + fdtmap[j].size < avail[i].mr_start + avail[i].mr_size) { avail[i].mr_size += avail[i].mr_start; avail[i].mr_start = fdtmap[j].address + fdtmap[j].size; avail[i].mr_size -= avail[i].mr_start; } } } return (asz); } /* * This is called during powerpc_init, before the system is really initialized. * It shall provide the total and the available regions of RAM. * The available regions need not take the kernel into account. */ void ofw_mem_regions(struct mem_region *memp, int *memsz, struct mem_region *availp, int *availsz) { phandle_t phandle; int asz, msz; int res; char name[31]; asz = msz = 0; /* * Get memory from all the /memory nodes. */ for (phandle = OF_child(OF_peer(0)); phandle != 0; phandle = OF_peer(phandle)) { if (OF_getprop(phandle, "name", name, sizeof(name)) <= 0) continue; if (strncmp(name, "memory", sizeof(name)) != 0 && strncmp(name, "memory@", strlen("memory@")) != 0) continue; res = parse_ofw_memory(phandle, "reg", &memp[msz]); msz += res/sizeof(struct mem_region); if (OF_getproplen(phandle, "available") >= 0) res = parse_ofw_memory(phandle, "available", &availp[asz]); else res = parse_ofw_memory(phandle, "reg", &availp[asz]); asz += res/sizeof(struct mem_region); } phandle = OF_finddevice("/chosen"); if (OF_hasprop(phandle, "fdtmemreserv")) asz = excise_fdt_reserved(availp, asz); *memsz = msz; *availsz = asz; } void OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *)) { #ifdef AIM ofmsr[0] = mfmsr(); #ifdef __powerpc64__ ofmsr[0] &= ~PSL_SF; #endif __asm __volatile("mfsprg0 %0" : "=&r"(ofmsr[1])); __asm __volatile("mfsprg1 %0" : "=&r"(ofmsr[2])); __asm __volatile("mfsprg2 %0" : "=&r"(ofmsr[3])); __asm __volatile("mfsprg3 %0" : "=&r"(ofmsr[4])); openfirmware_entry = openfirm; if (ofmsr[0] & PSL_DR) ofw_real_mode = 0; else ofw_real_mode = 1; ofw_save_trap_vec(save_trap_init); #else ofw_real_mode = 1; #endif fdt = fdt_ptr; #ifdef FDT_DTB_STATIC /* Check for a statically included blob */ if (fdt == NULL) fdt = &fdt_static_dtb; #endif } boolean_t OF_bootstrap() { boolean_t status = FALSE; #ifdef AIM if (openfirmware_entry != NULL) { if (ofw_real_mode) { status = OF_install(OFW_STD_REAL, 0); } else { #ifdef __powerpc64__ status = OF_install(OFW_STD_32BIT, 0); #else status = OF_install(OFW_STD_DIRECT, 0); #endif } if (status != TRUE) return status; OF_init(openfirmware); } else #endif if (fdt != NULL) { status = OF_install(OFW_FDT, 0); if (status != TRUE) return status; OF_init(fdt); OF_interpret("perform-fixup", 0); } return (status); } #ifdef AIM void ofw_quiesce(void) { struct { cell_t name; cell_t nargs; cell_t nreturns; } args; KASSERT(!pmap_bootstrapped, ("Cannot call ofw_quiesce after VM is up")); args.name = (cell_t)(uintptr_t)"quiesce"; args.nargs = 0; args.nreturns = 0; openfirmware(&args); } static int openfirmware_core(void *args) { int result; register_t oldmsr; if (openfirmware_entry == NULL) return (-1); /* * Turn off exceptions - we really don't want to end up * anywhere unexpected with PCPU set to something strange * or the stack pointer wrong. */ oldmsr = intr_disable(); ofw_sprg_prepare(); /* Save trap vectors */ ofw_save_trap_vec(save_trap_of); /* Restore initially saved trap vectors */ ofw_restore_trap_vec(save_trap_init); #if defined(AIM) && !defined(__powerpc64__) /* * Clear battable[] translations */ if (!(cpu_features & PPC_FEATURE_64)) __asm __volatile("mtdbatu 2, %0\n" "mtdbatu 3, %0" : : "r" (0)); isync(); #endif result = ofwcall(args); /* Restore trap vecotrs */ ofw_restore_trap_vec(save_trap_of); ofw_sprg_restore(); intr_restore(oldmsr); return (result); } #ifdef SMP struct ofw_rv_args { void *args; int retval; volatile int in_progress; }; static void ofw_rendezvous_dispatch(void *xargs) { struct ofw_rv_args *rv_args = xargs; /* NOTE: Interrupts are disabled here */ if (PCPU_GET(cpuid) == 0) { /* * Execute all OF calls on CPU 0 */ rv_args->retval = openfirmware_core(rv_args->args); rv_args->in_progress = 0; } else { /* * Spin with interrupts off on other CPUs while OF has * control of the machine. */ while (rv_args->in_progress) cpu_spinwait(); } } #endif static int openfirmware(void *args) { int result; #ifdef SMP struct ofw_rv_args rv_args; #endif if (openfirmware_entry == NULL) return (-1); #ifdef SMP rv_args.args = args; rv_args.in_progress = 1; smp_rendezvous(smp_no_rendevous_barrier, ofw_rendezvous_dispatch, smp_no_rendevous_barrier, &rv_args); result = rv_args.retval; #else result = openfirmware_core(args); #endif return (result); } void OF_reboot() { struct { cell_t name; cell_t nargs; cell_t nreturns; cell_t arg; } args; args.name = (cell_t)(uintptr_t)"interpret"; args.nargs = 1; args.nreturns = 0; args.arg = (cell_t)(uintptr_t)"reset-all"; openfirmware_core(&args); /* Don't do rendezvous! */ for (;;); /* just in case */ } #endif /* AIM */ void OF_getetheraddr(device_t dev, u_char *addr) { phandle_t node; node = ofw_bus_get_node(dev); OF_getprop(node, "local-mac-address", addr, ETHER_ADDR_LEN); } /* * Return a bus handle and bus tag that corresponds to the register * numbered regno for the device referenced by the package handle * dev. This function is intended to be used by console drivers in * early boot only. It works by mapping the address of the device's * register in the address space of its parent and recursively walk * the device tree upward this way. */ static void OF_get_addr_props(phandle_t node, uint32_t *addrp, uint32_t *sizep, int *pcip) { char type[64]; uint32_t addr, size; int pci, res; - res = OF_getprop(node, "#address-cells", &addr, sizeof(addr)); + res = OF_getencprop(node, "#address-cells", &addr, sizeof(addr)); if (res == -1) addr = 2; - res = OF_getprop(node, "#size-cells", &size, sizeof(size)); + res = OF_getencprop(node, "#size-cells", &size, sizeof(size)); if (res == -1) size = 1; pci = 0; if (addr == 3 && size == 2) { res = OF_getprop(node, "device_type", type, sizeof(type)); if (res != -1) { type[sizeof(type) - 1] = '\0'; pci = (strcmp(type, "pci") == 0) ? 1 : 0; } } if (addrp != NULL) *addrp = addr; if (sizep != NULL) *sizep = size; if (pcip != NULL) *pcip = pci; } int OF_decode_addr(phandle_t dev, int regno, bus_space_tag_t *tag, bus_space_handle_t *handle) { uint32_t cell[32]; bus_addr_t addr, raddr, baddr; bus_size_t size, rsize; uint32_t c, nbridge, naddr, nsize; phandle_t bridge, parent; u_int spc, rspc, prefetch; int pci, pcib, res; /* Sanity checking. */ if (dev == 0) return (EINVAL); bridge = OF_parent(dev); if (bridge == 0) return (EINVAL); if (regno < 0) return (EINVAL); if (tag == NULL || handle == NULL) return (EINVAL); /* Assume big-endian unless we find a PCI device */ *tag = &bs_be_tag; /* Get the requested register. */ OF_get_addr_props(bridge, &naddr, &nsize, &pci); if (pci) *tag = &bs_le_tag; - res = OF_getprop(dev, (pci) ? "assigned-addresses" : "reg", + res = OF_getencprop(dev, (pci) ? "assigned-addresses" : "reg", cell, sizeof(cell)); if (res == -1) return (ENXIO); if (res % sizeof(cell[0])) return (ENXIO); res /= sizeof(cell[0]); regno *= naddr + nsize; if (regno + naddr + nsize > res) return (EINVAL); spc = (pci) ? cell[regno] & OFW_PCI_PHYS_HI_SPACEMASK : ~0; prefetch = (pci) ? cell[regno] & OFW_PCI_PHYS_HI_PREFETCHABLE : 0; addr = 0; for (c = 0; c < naddr; c++) addr = ((uint64_t)addr << 32) | cell[regno++]; size = 0; for (c = 0; c < nsize; c++) size = ((uint64_t)size << 32) | cell[regno++]; /* * Map the address range in the bridge's decoding window as given * by the "ranges" property. If a node doesn't have such property * then no mapping is done. */ parent = OF_parent(bridge); while (parent != 0) { OF_get_addr_props(parent, &nbridge, NULL, &pcib); if (pcib) *tag = &bs_le_tag; - res = OF_getprop(bridge, "ranges", cell, sizeof(cell)); + res = OF_getencprop(bridge, "ranges", cell, sizeof(cell)); if (res == -1) goto next; if (res % sizeof(cell[0])) return (ENXIO); res /= sizeof(cell[0]); regno = 0; while (regno < res) { rspc = (pci) ? cell[regno] & OFW_PCI_PHYS_HI_SPACEMASK : ~0; if (rspc != spc) { regno += naddr + nbridge + nsize; continue; } raddr = 0; for (c = 0; c < naddr; c++) raddr = ((uint64_t)raddr << 32) | cell[regno++]; rspc = (pcib) ? cell[regno] & OFW_PCI_PHYS_HI_SPACEMASK : ~0; baddr = 0; for (c = 0; c < nbridge; c++) baddr = ((uint64_t)baddr << 32) | cell[regno++]; rsize = 0; for (c = 0; c < nsize; c++) rsize = ((uint64_t)rsize << 32) | cell[regno++]; if (addr < raddr || addr >= raddr + rsize) continue; addr = addr - raddr + baddr; if (rspc != ~0) spc = rspc; } next: bridge = parent; parent = OF_parent(bridge); OF_get_addr_props(bridge, &naddr, &nsize, &pci); } return (bus_space_map(*tag, addr, size, prefetch ? BUS_SPACE_MAP_PREFETCHABLE : 0, handle)); } Index: projects/powernv/powerpc/ofw/ofw_pci.c =================================================================== --- projects/powernv/powerpc/ofw/ofw_pci.c (revision 290990) +++ projects/powernv/powerpc/ofw/ofw_pci.c (revision 290991) @@ -1,561 +1,562 @@ /*- * Copyright (c) 2011 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" /* * Bus interface. */ static int ofw_pci_read_ivar(device_t, device_t, int, uintptr_t *); static struct resource * ofw_pci_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags); static int ofw_pci_release_resource(device_t bus, device_t child, int type, int rid, struct resource *res); static int ofw_pci_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *res); static int ofw_pci_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *res); static int ofw_pci_adjust_resource(device_t bus, device_t child, int type, struct resource *res, u_long start, u_long end); /* * pcib interface. */ static int ofw_pci_maxslots(device_t); static int ofw_pci_route_interrupt(device_t, device_t, int); /* * ofw_bus interface */ static phandle_t ofw_pci_get_node(device_t bus, device_t dev); /* * local methods */ static int ofw_pci_nranges(phandle_t node); static int ofw_pci_fill_ranges(phandle_t node, struct ofw_pci_range *ranges); /* * Driver methods. */ static device_method_t ofw_pci_methods[] = { /* Device interface */ DEVMETHOD(device_attach, ofw_pci_attach), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, ofw_pci_read_ivar), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), DEVMETHOD(bus_alloc_resource, ofw_pci_alloc_resource), DEVMETHOD(bus_release_resource, ofw_pci_release_resource), DEVMETHOD(bus_activate_resource, ofw_pci_activate_resource), DEVMETHOD(bus_deactivate_resource, ofw_pci_deactivate_resource), DEVMETHOD(bus_adjust_resource, ofw_pci_adjust_resource), /* pcib interface */ DEVMETHOD(pcib_maxslots, ofw_pci_maxslots), DEVMETHOD(pcib_route_interrupt, ofw_pci_route_interrupt), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_node, ofw_pci_get_node), DEVMETHOD_END }; DEFINE_CLASS_0(ofw_pci, ofw_pci_driver, ofw_pci_methods, 0); int ofw_pci_init(device_t dev) { struct ofw_pci_softc *sc; phandle_t node; u_int32_t busrange[2]; struct ofw_pci_range *rp; int error; node = ofw_bus_get_node(dev); sc = device_get_softc(dev); sc->sc_initialized = 1; - if (OF_getprop(node, "reg", &sc->sc_pcir, sizeof(sc->sc_pcir)) == -1) + if (OF_getencprop(node, "reg", (pcell_t *)&sc->sc_pcir, + sizeof(sc->sc_pcir)) == -1) return (ENXIO); - if (OF_getprop(node, "bus-range", busrange, sizeof(busrange)) != 8) + if (OF_getencprop(node, "bus-range", busrange, sizeof(busrange)) != 8) busrange[0] = 0; sc->sc_dev = dev; sc->sc_node = node; sc->sc_bus = busrange[0]; if (sc->sc_quirks & OFW_PCI_QUIRK_RANGES_ON_CHILDREN) { phandle_t c; int n, i; sc->sc_nrange = 0; for (c = OF_child(node); c != 0; c = OF_peer(c)) { n = ofw_pci_nranges(c); if (n > 0) sc->sc_nrange += n; } if (sc->sc_nrange == 0) return (ENXIO); sc->sc_range = malloc(sc->sc_nrange * sizeof(sc->sc_range[0]), M_DEVBUF, M_WAITOK); i = 0; for (c = OF_child(node); c != 0; c = OF_peer(c)) { n = ofw_pci_fill_ranges(c, &sc->sc_range[i]); if (n > 0) i += n; } KASSERT(i == sc->sc_nrange, ("range count mismatch")); } else { sc->sc_nrange = ofw_pci_nranges(node); if (sc->sc_nrange <= 0) { device_printf(dev, "could not get ranges\n"); return (ENXIO); } sc->sc_range = malloc(sc->sc_nrange * sizeof(sc->sc_range[0]), M_DEVBUF, M_WAITOK); ofw_pci_fill_ranges(node, sc->sc_range); } sc->sc_io_rman.rm_type = RMAN_ARRAY; sc->sc_io_rman.rm_descr = "PCI I/O Ports"; error = rman_init(&sc->sc_io_rman); if (error) { device_printf(dev, "rman_init() failed. error = %d\n", error); return (error); } sc->sc_mem_rman.rm_type = RMAN_ARRAY; sc->sc_mem_rman.rm_descr = "PCI Memory"; error = rman_init(&sc->sc_mem_rman); if (error) { device_printf(dev, "rman_init() failed. error = %d\n", error); return (error); } for (rp = sc->sc_range; rp < sc->sc_range + sc->sc_nrange && rp->pci_hi != 0; rp++) { error = 0; switch (rp->pci_hi & OFW_PCI_PHYS_HI_SPACEMASK) { case OFW_PCI_PHYS_HI_SPACE_CONFIG: break; case OFW_PCI_PHYS_HI_SPACE_IO: error = rman_manage_region(&sc->sc_io_rman, rp->pci, rp->pci + rp->size - 1); break; case OFW_PCI_PHYS_HI_SPACE_MEM32: case OFW_PCI_PHYS_HI_SPACE_MEM64: error = rman_manage_region(&sc->sc_mem_rman, rp->pci, rp->pci + rp->size - 1); break; } if (error) { device_printf(dev, "rman_manage_region(%x, %#jx, %#jx) failed. " "error = %d\n", rp->pci_hi & OFW_PCI_PHYS_HI_SPACEMASK, rp->pci, rp->pci + rp->size - 1, error); return (error); } } ofw_bus_setup_iinfo(node, &sc->sc_pci_iinfo, sizeof(cell_t)); return (error); } int ofw_pci_attach(device_t dev) { struct ofw_pci_softc *sc; int error; sc = device_get_softc(dev); if (!sc->sc_initialized) { error = ofw_pci_init(dev); if (error) return (error); } device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); } static int ofw_pci_maxslots(device_t dev) { return (PCI_SLOTMAX); } static int ofw_pci_route_interrupt(device_t bus, device_t dev, int pin) { struct ofw_pci_softc *sc; struct ofw_pci_register reg; uint32_t pintr, mintr[2]; int intrcells; phandle_t iparent; sc = device_get_softc(bus); pintr = pin; /* Fabricate imap information in case this isn't an OFW device */ bzero(®, sizeof(reg)); reg.phys_hi = (pci_get_bus(dev) << OFW_PCI_PHYS_HI_BUSSHIFT) | (pci_get_slot(dev) << OFW_PCI_PHYS_HI_DEVICESHIFT) | (pci_get_function(dev) << OFW_PCI_PHYS_HI_FUNCTIONSHIFT); intrcells = ofw_bus_lookup_imap(ofw_bus_get_node(dev), &sc->sc_pci_iinfo, ®, sizeof(reg), &pintr, sizeof(pintr), mintr, sizeof(mintr), &iparent); if (intrcells) { pintr = ofw_bus_map_intr(dev, iparent, intrcells, mintr); return (pintr); } /* Maybe it's a real interrupt, not an intpin */ if (pin > 4) return (pin); device_printf(bus, "could not route pin %d for device %d.%d\n", pin, pci_get_slot(dev), pci_get_function(dev)); return (PCI_INVALID_IRQ); } static int ofw_pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct ofw_pci_softc *sc; sc = device_get_softc(dev); switch (which) { case PCIB_IVAR_DOMAIN: *result = device_get_unit(dev); return (0); case PCIB_IVAR_BUS: *result = sc->sc_bus; return (0); } return (ENOENT); } static struct resource * ofw_pci_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct ofw_pci_softc *sc; struct resource *rv; struct rman *rm; int needactivate; needactivate = flags & RF_ACTIVE; flags &= ~RF_ACTIVE; sc = device_get_softc(bus); switch (type) { case SYS_RES_MEMORY: rm = &sc->sc_mem_rman; break; case SYS_RES_IOPORT: rm = &sc->sc_io_rman; break; case SYS_RES_IRQ: return (bus_alloc_resource(bus, type, rid, start, end, count, flags)); default: device_printf(bus, "unknown resource request from %s\n", device_get_nameunit(child)); return (NULL); } rv = rman_reserve_resource(rm, start, end, count, flags, child); if (rv == NULL) { device_printf(bus, "failed to reserve resource for %s\n", device_get_nameunit(child)); return (NULL); } rman_set_rid(rv, *rid); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv) != 0) { device_printf(bus, "failed to activate resource for %s\n", device_get_nameunit(child)); rman_release_resource(rv); return (NULL); } } return (rv); } static int ofw_pci_release_resource(device_t bus, device_t child, int type, int rid, struct resource *res) { if (rman_get_flags(res) & RF_ACTIVE) { int error = bus_deactivate_resource(child, type, rid, res); if (error) return error; } return (rman_release_resource(res)); } static int ofw_pci_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *res) { struct ofw_pci_softc *sc; void *p; sc = device_get_softc(bus); if (type == SYS_RES_IRQ) { return (bus_activate_resource(bus, type, rid, res)); } if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { struct ofw_pci_range *rp; vm_offset_t start; int space; start = (vm_offset_t)rman_get_start(res); /* * Map this through the ranges list */ for (rp = sc->sc_range; rp < sc->sc_range + sc->sc_nrange && rp->pci_hi != 0; rp++) { if (start < rp->pci || start >= rp->pci + rp->size) continue; switch (rp->pci_hi & OFW_PCI_PHYS_HI_SPACEMASK) { case OFW_PCI_PHYS_HI_SPACE_IO: space = SYS_RES_IOPORT; break; case OFW_PCI_PHYS_HI_SPACE_MEM32: case OFW_PCI_PHYS_HI_SPACE_MEM64: space = SYS_RES_MEMORY; break; default: space = -1; } if (type == space) { start += (rp->host - rp->pci); break; } } if (bootverbose) printf("ofw_pci mapdev: start %zx, len %ld\n", start, rman_get_size(res)); p = pmap_mapdev(start, (vm_size_t)rman_get_size(res)); if (p == NULL) return (ENOMEM); rman_set_virtual(res, p); rman_set_bustag(res, &bs_le_tag); rman_set_bushandle(res, (u_long)p); } return (rman_activate_resource(res)); } static int ofw_pci_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *res) { /* * If this is a memory resource, unmap it. */ if ((type == SYS_RES_MEMORY) || (type == SYS_RES_IOPORT)) { u_int32_t psize; psize = rman_get_size(res); pmap_unmapdev((vm_offset_t)rman_get_virtual(res), psize); } return (rman_deactivate_resource(res)); } static int ofw_pci_adjust_resource(device_t bus, device_t child, int type, struct resource *res, u_long start, u_long end) { struct rman *rm = NULL; struct ofw_pci_softc *sc = device_get_softc(bus); KASSERT(!(rman_get_flags(res) & RF_ACTIVE), ("active resources cannot be adjusted")); if (rman_get_flags(res) & RF_ACTIVE) return (EINVAL); switch (type) { case SYS_RES_MEMORY: rm = &sc->sc_mem_rman; break; case SYS_RES_IOPORT: rm = &sc->sc_io_rman; break; default: return (ENXIO); } if (!rman_is_region_manager(res, rm)) return (EINVAL); return (rman_adjust_resource(res, start, end)); } static phandle_t ofw_pci_get_node(device_t bus, device_t dev) { struct ofw_pci_softc *sc; sc = device_get_softc(bus); /* We only have one child, the PCI bus, which needs our own node. */ return (sc->sc_node); } static int ofw_pci_nranges(phandle_t node) { int host_address_cells = 1, pci_address_cells = 3, size_cells = 2; ssize_t nbase_ranges; - OF_getprop(OF_parent(node), "#address-cells", &host_address_cells, + OF_getencprop(OF_parent(node), "#address-cells", &host_address_cells, sizeof(host_address_cells)); - OF_getprop(node, "#address-cells", &pci_address_cells, + OF_getencprop(node, "#address-cells", &pci_address_cells, sizeof(pci_address_cells)); - OF_getprop(node, "#size-cells", &size_cells, sizeof(size_cells)); + OF_getencprop(node, "#size-cells", &size_cells, sizeof(size_cells)); nbase_ranges = OF_getproplen(node, "ranges"); if (nbase_ranges <= 0) return (-1); return (nbase_ranges / sizeof(cell_t) / (pci_address_cells + host_address_cells + size_cells)); } static int ofw_pci_fill_ranges(phandle_t node, struct ofw_pci_range *ranges) { int host_address_cells = 1, pci_address_cells = 3, size_cells = 2; cell_t *base_ranges; ssize_t nbase_ranges; int nranges; int i, j, k; - OF_getprop(OF_parent(node), "#address-cells", &host_address_cells, + OF_getencprop(OF_parent(node), "#address-cells", &host_address_cells, sizeof(host_address_cells)); - OF_getprop(node, "#address-cells", &pci_address_cells, + OF_getencprop(node, "#address-cells", &pci_address_cells, sizeof(pci_address_cells)); - OF_getprop(node, "#size-cells", &size_cells, sizeof(size_cells)); + OF_getencprop(node, "#size-cells", &size_cells, sizeof(size_cells)); nbase_ranges = OF_getproplen(node, "ranges"); if (nbase_ranges <= 0) return (-1); nranges = nbase_ranges / sizeof(cell_t) / (pci_address_cells + host_address_cells + size_cells); base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK); - OF_getprop(node, "ranges", base_ranges, nbase_ranges); + OF_getencprop(node, "ranges", base_ranges, nbase_ranges); for (i = 0, j = 0; i < nranges; i++) { ranges[i].pci_hi = base_ranges[j++]; ranges[i].pci = 0; for (k = 0; k < pci_address_cells - 1; k++) { ranges[i].pci <<= 32; ranges[i].pci |= base_ranges[j++]; } ranges[i].host = 0; for (k = 0; k < host_address_cells; k++) { ranges[i].host <<= 32; ranges[i].host |= base_ranges[j++]; } ranges[i].size = 0; for (k = 0; k < size_cells; k++) { ranges[i].size <<= 32; ranges[i].size |= base_ranges[j++]; } } free(base_ranges, M_DEVBUF); return (nranges); } Index: projects/powernv/powerpc/ofw/ofw_pcibus.c =================================================================== --- projects/powernv/powerpc/ofw/ofw_pcibus.c (revision 290990) +++ projects/powernv/powerpc/ofw/ofw_pcibus.c (revision 290991) @@ -1,355 +1,357 @@ /*- * Copyright (c) 1997, Stefan Esser * Copyright (c) 2000, Michael Smith * Copyright (c) 2000, BSDi * Copyright (c) 2003, Thomas Moestl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ofw_pcibus.h" #include "pcib_if.h" #include "pci_if.h" typedef uint32_t ofw_pci_intr_t; /* Methods */ static device_probe_t ofw_pcibus_probe; static device_attach_t ofw_pcibus_attach; static pci_assign_interrupt_t ofw_pcibus_assign_interrupt; static ofw_bus_get_devinfo_t ofw_pcibus_get_devinfo; static int ofw_pcibus_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf, size_t buflen); static void ofw_pcibus_enum_devtree(device_t dev, u_int domain, u_int busno); static void ofw_pcibus_enum_bus(device_t dev, u_int domain, u_int busno); static device_method_t ofw_pcibus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ofw_pcibus_probe), DEVMETHOD(device_attach, ofw_pcibus_attach), /* Bus interface */ DEVMETHOD(bus_child_pnpinfo_str, ofw_pcibus_child_pnpinfo_str_method), /* PCI interface */ DEVMETHOD(pci_assign_interrupt, ofw_pcibus_assign_interrupt), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_devinfo, ofw_pcibus_get_devinfo), DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), DEVMETHOD_END }; static devclass_t pci_devclass; DEFINE_CLASS_1(pci, ofw_pcibus_driver, ofw_pcibus_methods, sizeof(struct pci_softc), pci_driver); DRIVER_MODULE(ofw_pcibus, pcib, ofw_pcibus_driver, pci_devclass, 0, 0); MODULE_VERSION(ofw_pcibus, 1); MODULE_DEPEND(ofw_pcibus, pci, 1, 1, 1); static int ofw_devices_only = 0; TUNABLE_INT("hw.pci.ofw_devices_only", &ofw_devices_only); static int ofw_pcibus_probe(device_t dev) { if (ofw_bus_get_node(dev) == -1) return (ENXIO); device_set_desc(dev, "OFW PCI bus"); return (BUS_PROBE_DEFAULT); } static int ofw_pcibus_attach(device_t dev) { u_int busno, domain; int error; error = pci_attach_common(dev); if (error) return (error); domain = pcib_get_domain(dev); busno = pcib_get_bus(dev); /* * Attach those children represented in the device tree. */ ofw_pcibus_enum_devtree(dev, domain, busno); /* * We now attach any laggard devices. FDT, for instance, allows * the device tree to enumerate only some PCI devices. Apple's * OF device tree on some Grackle-based hardware can also miss * functions on multi-function cards. */ if (!ofw_devices_only) ofw_pcibus_enum_bus(dev, domain, busno); return (bus_generic_attach(dev)); } static void ofw_pcibus_enum_devtree(device_t dev, u_int domain, u_int busno) { device_t pcib; struct ofw_pci_register pcir; struct ofw_pcibus_devinfo *dinfo; phandle_t node, child; u_int func, slot; int intline; pcib = device_get_parent(dev); node = ofw_bus_get_node(dev); for (child = OF_child(node); child != 0; child = OF_peer(child)) { - if (OF_getprop(child, "reg", &pcir, sizeof(pcir)) == -1) + if (OF_getencprop(child, "reg", (pcell_t *)&pcir, + sizeof(pcir)) == -1) continue; slot = OFW_PCI_PHYS_HI_DEVICE(pcir.phys_hi); func = OFW_PCI_PHYS_HI_FUNCTION(pcir.phys_hi); /* Some OFW device trees contain dupes. */ if (pci_find_dbsf(domain, busno, slot, func) != NULL) continue; /* * The preset in the intline register is usually bogus. Reset * it such that the PCI code will reroute the interrupt if * needed. */ intline = PCI_INVALID_IRQ; if (OF_getproplen(child, "interrupts") > 0) intline = 0; PCIB_WRITE_CONFIG(pcib, busno, slot, func, PCIR_INTLINE, intline, 1); /* * Now set up the PCI and OFW bus layer devinfo and add it * to the PCI bus. */ dinfo = (struct ofw_pcibus_devinfo *)pci_read_device(pcib, domain, busno, slot, func, sizeof(*dinfo)); if (dinfo == NULL) continue; if (ofw_bus_gen_setup_devinfo(&dinfo->opd_obdinfo, child) != 0) { pci_freecfg((struct pci_devinfo *)dinfo); continue; } dinfo->opd_dma_tag = NULL; pci_add_child(dev, (struct pci_devinfo *)dinfo); /* * Some devices don't have an intpin set, but do have * interrupts. These are fully specified, and set in the * interrupts property, so add that value to the device's * resource list. */ if (dinfo->opd_dinfo.cfg.intpin == 0) ofw_bus_intr_to_rl(dev, child, &dinfo->opd_dinfo.resources, NULL); } } /* * The following is an almost exact clone of pci_add_children(), with the * addition that it (a) will not add children that have already been added, * and (b) will set up the OFW devinfo to point to invalid values. This is * to handle non-enumerated PCI children as exist in FDT and on the second * function of the Rage 128 in my Blue & White G3. */ static void ofw_pcibus_enum_bus(device_t dev, u_int domain, u_int busno) { device_t pcib; struct ofw_pcibus_devinfo *dinfo; int maxslots; int s, f, pcifunchigh; uint8_t hdrtype; pcib = device_get_parent(dev); maxslots = PCIB_MAXSLOTS(pcib); for (s = 0; s <= maxslots; s++) { pcifunchigh = 0; f = 0; DELAY(1); hdrtype = PCIB_READ_CONFIG(pcib, busno, s, f, PCIR_HDRTYPE, 1); if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if (hdrtype & PCIM_MFDEV) pcifunchigh = PCI_FUNCMAX; for (f = 0; f <= pcifunchigh; f++) { /* Filter devices we have already added */ if (pci_find_dbsf(domain, busno, s, f) != NULL) continue; dinfo = (struct ofw_pcibus_devinfo *)pci_read_device( pcib, domain, busno, s, f, sizeof(*dinfo)); if (dinfo == NULL) continue; dinfo->opd_dma_tag = NULL; dinfo->opd_obdinfo.obd_node = -1; dinfo->opd_obdinfo.obd_name = NULL; dinfo->opd_obdinfo.obd_compat = NULL; dinfo->opd_obdinfo.obd_type = NULL; dinfo->opd_obdinfo.obd_model = NULL; /* * For non OFW-devices, don't believe 0 * for an interrupt. */ if (dinfo->opd_dinfo.cfg.intline == 0) { dinfo->opd_dinfo.cfg.intline = PCI_INVALID_IRQ; PCIB_WRITE_CONFIG(pcib, busno, s, f, PCIR_INTLINE, PCI_INVALID_IRQ, 1); } pci_add_child(dev, (struct pci_devinfo *)dinfo); } } } static int ofw_pcibus_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf, size_t buflen) { pci_child_pnpinfo_str_method(cbdev, child, buf, buflen); if (ofw_bus_get_node(child) != -1) { strlcat(buf, " ", buflen); /* Separate info */ ofw_bus_gen_child_pnpinfo_str(cbdev, child, buf, buflen); } return (0); } static int ofw_pcibus_assign_interrupt(device_t dev, device_t child) { ofw_pci_intr_t intr[2]; phandle_t node, iparent; int isz, icells; node = ofw_bus_get_node(child); if (node == -1) { /* Non-firmware enumerated child, use standard routing */ intr[0] = pci_get_intpin(child); return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child, intr[0])); } /* * Try to determine the node's interrupt parent so we know which * PIC to use. */ iparent = -1; - if (OF_getprop(node, "interrupt-parent", &iparent, sizeof(iparent)) < 0) + if (OF_getencprop(node, "interrupt-parent", &iparent, + sizeof(iparent)) < 0) iparent = -1; icells = 1; if (iparent != -1) - OF_getprop(OF_node_from_xref(iparent), "#interrupt-cells", + OF_getencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)); /* * Any AAPL,interrupts property gets priority and is * fully specified (i.e. does not need routing) */ - isz = OF_getprop(node, "AAPL,interrupts", intr, sizeof(intr)); + isz = OF_getencprop(node, "AAPL,interrupts", intr, sizeof(intr)); if (isz == sizeof(intr[0])*icells) return ((iparent == -1) ? intr[0] : ofw_bus_map_intr(dev, iparent, icells, intr)); - isz = OF_getprop(node, "interrupts", intr, sizeof(intr)); + isz = OF_getencprop(node, "interrupts", intr, sizeof(intr)); if (isz == sizeof(intr[0])*icells) { if (iparent != -1) intr[0] = ofw_bus_map_intr(dev, iparent, icells, intr); } else { /* No property: our best guess is the intpin. */ intr[0] = pci_get_intpin(child); } /* * If we got intr from a property, it may or may not be an intpin. * For on-board devices, it frequently is not, and is completely out * of the valid intpin range. For PCI slots, it hopefully is, * otherwise we will have trouble interfacing with non-OFW buses * such as cardbus. * Since we cannot tell which it is without violating layering, we * will always use the route_interrupt method, and treat exceptions * on the level they become apparent. */ return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child, intr[0])); } static const struct ofw_bus_devinfo * ofw_pcibus_get_devinfo(device_t bus, device_t dev) { struct ofw_pcibus_devinfo *dinfo; dinfo = device_get_ivars(dev); return (&dinfo->opd_obdinfo); } Index: projects/powernv/powerpc/ofw/openpic_ofw.c =================================================================== --- projects/powernv/powerpc/ofw/openpic_ofw.c (revision 290990) +++ projects/powernv/powerpc/ofw/openpic_ofw.c (revision 290991) @@ -1,168 +1,168 @@ /*- * Copyright 2003 by Peter Grehan. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pic_if.h" /* * OFW interface */ static int openpic_ofw_probe(device_t); static int openpic_ofw_attach(device_t); static void openpic_ofw_translate_code(device_t, u_int irq, int code, enum intr_trigger *trig, enum intr_polarity *pol); static device_method_t openpic_ofw_methods[] = { /* Device interface */ DEVMETHOD(device_probe, openpic_ofw_probe), DEVMETHOD(device_attach, openpic_ofw_attach), DEVMETHOD(device_suspend, openpic_suspend), DEVMETHOD(device_resume, openpic_resume), /* PIC interface */ DEVMETHOD(pic_bind, openpic_bind), DEVMETHOD(pic_config, openpic_config), DEVMETHOD(pic_dispatch, openpic_dispatch), DEVMETHOD(pic_enable, openpic_enable), DEVMETHOD(pic_eoi, openpic_eoi), DEVMETHOD(pic_ipi, openpic_ipi), DEVMETHOD(pic_mask, openpic_mask), DEVMETHOD(pic_unmask, openpic_unmask), DEVMETHOD(pic_translate_code, openpic_ofw_translate_code), DEVMETHOD_END }; static driver_t openpic_ofw_driver = { "openpic", openpic_ofw_methods, sizeof(struct openpic_softc), }; DRIVER_MODULE(openpic, ofwbus, openpic_ofw_driver, openpic_devclass, 0, 0); DRIVER_MODULE(openpic, simplebus, openpic_ofw_driver, openpic_devclass, 0, 0); DRIVER_MODULE(openpic, macio, openpic_ofw_driver, openpic_devclass, 0, 0); static int openpic_ofw_probe(device_t dev) { const char *type = ofw_bus_get_type(dev); if (type == NULL) return (ENXIO); if (!ofw_bus_is_compatible(dev, "chrp,open-pic") && strcmp(type, "open-pic") != 0) return (ENXIO); /* * On some U4 systems, there is a phantom MPIC in the mac-io cell. * The uninorth driver will pick up the real PIC, so ignore it here. */ if (OF_finddevice("/u4") != (phandle_t)-1) return (ENXIO); device_set_desc(dev, OPENPIC_DEVSTR); return (0); } static int openpic_ofw_attach(device_t dev) { phandle_t xref, node; node = ofw_bus_get_node(dev); - if (OF_getprop(node, "phandle", &xref, sizeof(xref)) == -1 && - OF_getprop(node, "ibm,phandle", &xref, sizeof(xref)) == -1 && - OF_getprop(node, "linux,phandle", &xref, sizeof(xref)) == -1) + if (OF_getencprop(node, "phandle", &xref, sizeof(xref)) == -1 && + OF_getencprop(node, "ibm,phandle", &xref, sizeof(xref)) == -1 && + OF_getencprop(node, "linux,phandle", &xref, sizeof(xref)) == -1) xref = node; return (openpic_common_attach(dev, xref)); } static void openpic_ofw_translate_code(device_t dev, u_int irq, int code, enum intr_trigger *trig, enum intr_polarity *pol) { switch (code) { case 0: /* L to H edge */ *trig = INTR_TRIGGER_EDGE; *pol = INTR_POLARITY_HIGH; break; case 1: /* Active L level */ *trig = INTR_TRIGGER_LEVEL; *pol = INTR_POLARITY_LOW; break; case 2: /* Active H level */ *trig = INTR_TRIGGER_LEVEL; *pol = INTR_POLARITY_HIGH; break; case 3: /* H to L edge */ *trig = INTR_TRIGGER_EDGE; *pol = INTR_POLARITY_LOW; break; default: *trig = INTR_TRIGGER_CONFORM; *pol = INTR_POLARITY_CONFORM; } } Index: projects/powernv/powerpc/powermac/cpcht.c =================================================================== --- projects/powernv/powerpc/powermac/cpcht.c (revision 290990) +++ projects/powernv/powerpc/powermac/cpcht.c (revision 290991) @@ -1,735 +1,735 @@ /*- * Copyright (C) 2008-2010 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" #include "pic_if.h" /* * IBM CPC9X5 Hypertransport Device interface. */ static int cpcht_probe(device_t); static int cpcht_attach(device_t); static void cpcht_configure_htbridge(device_t, phandle_t); /* * pcib interface. */ static u_int32_t cpcht_read_config(device_t, u_int, u_int, u_int, u_int, int); static void cpcht_write_config(device_t, u_int, u_int, u_int, u_int, u_int32_t, int); static int cpcht_route_interrupt(device_t, device_t, int); static int cpcht_alloc_msi(device_t dev, device_t child, int count, int maxcount, int *irqs); static int cpcht_release_msi(device_t dev, device_t child, int count, int *irqs); static int cpcht_alloc_msix(device_t dev, device_t child, int *irq); static int cpcht_release_msix(device_t dev, device_t child, int irq); static int cpcht_map_msi(device_t dev, device_t child, int irq, uint64_t *addr, uint32_t *data); /* * Driver methods. */ static device_method_t cpcht_methods[] = { /* Device interface */ DEVMETHOD(device_probe, cpcht_probe), DEVMETHOD(device_attach, cpcht_attach), /* pcib interface */ DEVMETHOD(pcib_read_config, cpcht_read_config), DEVMETHOD(pcib_write_config, cpcht_write_config), DEVMETHOD(pcib_route_interrupt, cpcht_route_interrupt), DEVMETHOD(pcib_alloc_msi, cpcht_alloc_msi), DEVMETHOD(pcib_release_msi, cpcht_release_msi), DEVMETHOD(pcib_alloc_msix, cpcht_alloc_msix), DEVMETHOD(pcib_release_msix, cpcht_release_msix), DEVMETHOD(pcib_map_msi, cpcht_map_msi), DEVMETHOD_END }; struct cpcht_irq { enum { IRQ_NONE, IRQ_HT, IRQ_MSI, IRQ_INTERNAL } irq_type; int ht_source; vm_offset_t ht_base; vm_offset_t apple_eoi; uint32_t eoi_data; int edge; }; static struct cpcht_irq *cpcht_irqmap = NULL; uint32_t cpcht_msipic = 0; struct cpcht_softc { struct ofw_pci_softc pci_sc; vm_offset_t sc_data; uint64_t sc_populated_slots; struct cpcht_irq htirq_map[128]; struct mtx htirq_mtx; }; static devclass_t cpcht_devclass; DEFINE_CLASS_1(pcib, cpcht_driver, cpcht_methods, sizeof(struct cpcht_softc), ofw_pci_driver); DRIVER_MODULE(cpcht, ofwbus, cpcht_driver, cpcht_devclass, 0, 0); #define CPCHT_IOPORT_BASE 0xf4000000UL /* Hardwired */ #define CPCHT_IOPORT_SIZE 0x00400000UL #define HTAPIC_REQUEST_EOI 0x20 #define HTAPIC_TRIGGER_LEVEL 0x02 #define HTAPIC_MASK 0x01 static int cpcht_probe(device_t dev) { const char *type, *compatible; type = ofw_bus_get_type(dev); compatible = ofw_bus_get_compat(dev); if (type == NULL || compatible == NULL) return (ENXIO); if (strcmp(type, "ht") != 0) return (ENXIO); if (strcmp(compatible, "u3-ht") != 0) return (ENXIO); device_set_desc(dev, "IBM CPC9X5 HyperTransport Tunnel"); return (0); } static int cpcht_attach(device_t dev) { struct cpcht_softc *sc; phandle_t node, child; u_int32_t reg[3]; int i; node = ofw_bus_get_node(dev); sc = device_get_softc(dev); - if (OF_getprop(node, "reg", reg, sizeof(reg)) < 12) + if (OF_getencprop(node, "reg", reg, sizeof(reg)) < 12) return (ENXIO); if (OF_getproplen(node, "ranges") <= 0) sc->pci_sc.sc_quirks = OFW_PCI_QUIRK_RANGES_ON_CHILDREN; sc->sc_populated_slots = 0; sc->sc_data = (vm_offset_t)pmap_mapdev(reg[1], reg[2]); /* * Set up the resource manager and the HT->MPIC mapping. For cpcht, * the ranges are properties of the child bridges, and this is also * where we get the HT interrupts properties. */ #if 0 /* I/O port mappings are usually not in the device tree */ rman_manage_region(&sc->pci_sc.sc_io_rman, 0, CPCHT_IOPORT_SIZE - 1); #endif bzero(sc->htirq_map, sizeof(sc->htirq_map)); mtx_init(&sc->htirq_mtx, "cpcht irq", NULL, MTX_DEF); for (i = 0; i < 8; i++) sc->htirq_map[i].irq_type = IRQ_INTERNAL; for (child = OF_child(node); child != 0; child = OF_peer(child)) cpcht_configure_htbridge(dev, child); /* Now make the mapping table available to the MPIC */ cpcht_irqmap = sc->htirq_map; return (ofw_pci_attach(dev)); } static void cpcht_configure_htbridge(device_t dev, phandle_t child) { struct cpcht_softc *sc; struct ofw_pci_register pcir; int ptr, nextptr; uint32_t vend, val; int i, nirq, irq; u_int b, f, s; sc = device_get_softc(dev); - if (OF_getprop(child, "reg", &pcir, sizeof(pcir)) == -1) + if (OF_getencprop(child, "reg", (pcell_t *)&pcir, sizeof(pcir)) == -1) return; b = OFW_PCI_PHYS_HI_BUS(pcir.phys_hi); s = OFW_PCI_PHYS_HI_DEVICE(pcir.phys_hi); f = OFW_PCI_PHYS_HI_FUNCTION(pcir.phys_hi); /* * Mark this slot is populated. The remote south bridge does * not like us talking to unpopulated slots on the root bus. */ sc->sc_populated_slots |= (1 << s); /* * Next build up any HT->MPIC mappings for this sub-bus. One would * naively hope that enabling, disabling, and EOIing interrupts would * cause the appropriate HT bus transactions to that effect. This is * not the case. * * Instead, we have to muck about on the HT peer's root PCI bridges, * figure out what interrupts they send, enable them, and cache * the location of their WaitForEOI registers so that we can * send EOIs later. */ /* All the devices we are interested in have caps */ if (!(PCIB_READ_CONFIG(dev, b, s, f, PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)) return; nextptr = PCIB_READ_CONFIG(dev, b, s, f, PCIR_CAP_PTR, 1); while (nextptr != 0) { ptr = nextptr; nextptr = PCIB_READ_CONFIG(dev, b, s, f, ptr + PCICAP_NEXTPTR, 1); /* Find the HT IRQ capabilities */ if (PCIB_READ_CONFIG(dev, b, s, f, ptr + PCICAP_ID, 1) != PCIY_HT) continue; val = PCIB_READ_CONFIG(dev, b, s, f, ptr + PCIR_HT_COMMAND, 2); if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_INTERRUPT) continue; /* Ask for the IRQ count */ PCIB_WRITE_CONFIG(dev, b, s, f, ptr + PCIR_HT_COMMAND, 0x1, 1); nirq = PCIB_READ_CONFIG(dev, b, s, f, ptr + 4, 4); nirq = ((nirq >> 16) & 0xff) + 1; device_printf(dev, "%d HT IRQs on device %d.%d\n", nirq, s, f); for (i = 0; i < nirq; i++) { PCIB_WRITE_CONFIG(dev, b, s, f, ptr + PCIR_HT_COMMAND, 0x10 + (i << 1), 1); irq = PCIB_READ_CONFIG(dev, b, s, f, ptr + 4, 4); /* * Mask this interrupt for now. */ PCIB_WRITE_CONFIG(dev, b, s, f, ptr + 4, irq | HTAPIC_MASK, 4); irq = (irq >> 16) & 0xff; sc->htirq_map[irq].irq_type = IRQ_HT; sc->htirq_map[irq].ht_source = i; sc->htirq_map[irq].ht_base = sc->sc_data + (((((s & 0x1f) << 3) | (f & 0x07)) << 8) | (ptr)); PCIB_WRITE_CONFIG(dev, b, s, f, ptr + PCIR_HT_COMMAND, 0x11 + (i << 1), 1); sc->htirq_map[irq].eoi_data = PCIB_READ_CONFIG(dev, b, s, f, ptr + 4, 4) | 0x80000000; /* * Apple uses a non-compliant IO/APIC that differs * in how we signal EOIs. Check if this device was * made by Apple, and act accordingly. */ vend = PCIB_READ_CONFIG(dev, b, s, f, PCIR_DEVVENDOR, 4); if ((vend & 0xffff) == 0x106b) sc->htirq_map[irq].apple_eoi = (sc->htirq_map[irq].ht_base - ptr) + 0x60; } } } static u_int32_t cpcht_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int width) { struct cpcht_softc *sc; vm_offset_t caoff; sc = device_get_softc(dev); caoff = sc->sc_data + (((((slot & 0x1f) << 3) | (func & 0x07)) << 8) | reg); if (bus == 0 && (!(sc->sc_populated_slots & (1 << slot)) || func > 0)) return (0xffffffff); if (bus > 0) caoff += 0x01000000UL + (bus << 16); switch (width) { case 1: return (in8rb(caoff)); break; case 2: return (in16rb(caoff)); break; case 4: return (in32rb(caoff)); break; } return (0xffffffff); } static void cpcht_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, u_int32_t val, int width) { struct cpcht_softc *sc; vm_offset_t caoff; sc = device_get_softc(dev); caoff = sc->sc_data + (((((slot & 0x1f) << 3) | (func & 0x07)) << 8) | reg); if (bus == 0 && (!(sc->sc_populated_slots & (1 << slot)) || func > 0)) return; if (bus > 0) caoff += 0x01000000UL + (bus << 16); switch (width) { case 1: out8rb(caoff, val); break; case 2: out16rb(caoff, val); break; case 4: out32rb(caoff, val); break; } } static int cpcht_route_interrupt(device_t bus, device_t dev, int pin) { return (pin); } static int cpcht_alloc_msi(device_t dev, device_t child, int count, int maxcount, int *irqs) { struct cpcht_softc *sc; int i, j; sc = device_get_softc(dev); j = 0; /* Bail if no MSI PIC yet */ if (cpcht_msipic == 0) return (ENXIO); mtx_lock(&sc->htirq_mtx); for (i = 8; i < 124 - count; i++) { for (j = 0; j < count; j++) { if (sc->htirq_map[i+j].irq_type != IRQ_NONE) break; } if (j == count) break; i += j; /* We know there isn't a large enough run */ } if (j != count) { mtx_unlock(&sc->htirq_mtx); return (ENXIO); } for (j = 0; j < count; j++) { irqs[j] = MAP_IRQ(cpcht_msipic, i+j); sc->htirq_map[i+j].irq_type = IRQ_MSI; } mtx_unlock(&sc->htirq_mtx); return (0); } static int cpcht_release_msi(device_t dev, device_t child, int count, int *irqs) { struct cpcht_softc *sc; int i; sc = device_get_softc(dev); mtx_lock(&sc->htirq_mtx); for (i = 0; i < count; i++) sc->htirq_map[irqs[i] & 0xff].irq_type = IRQ_NONE; mtx_unlock(&sc->htirq_mtx); return (0); } static int cpcht_alloc_msix(device_t dev, device_t child, int *irq) { struct cpcht_softc *sc; int i; sc = device_get_softc(dev); /* Bail if no MSI PIC yet */ if (cpcht_msipic == 0) return (ENXIO); mtx_lock(&sc->htirq_mtx); for (i = 8; i < 124; i++) { if (sc->htirq_map[i].irq_type == IRQ_NONE) { sc->htirq_map[i].irq_type = IRQ_MSI; *irq = MAP_IRQ(cpcht_msipic, i); mtx_unlock(&sc->htirq_mtx); return (0); } } mtx_unlock(&sc->htirq_mtx); return (ENXIO); } static int cpcht_release_msix(device_t dev, device_t child, int irq) { struct cpcht_softc *sc; sc = device_get_softc(dev); mtx_lock(&sc->htirq_mtx); sc->htirq_map[irq & 0xff].irq_type = IRQ_NONE; mtx_unlock(&sc->htirq_mtx); return (0); } static int cpcht_map_msi(device_t dev, device_t child, int irq, uint64_t *addr, uint32_t *data) { device_t pcib; struct pci_devinfo *dinfo; struct pcicfg_ht *ht = NULL; for (pcib = child; pcib != dev; pcib = device_get_parent(device_get_parent(pcib))) { dinfo = device_get_ivars(pcib); ht = &dinfo->cfg.ht; if (ht == NULL) continue; } if (ht == NULL) return (ENXIO); *addr = ht->ht_msiaddr; *data = irq & 0xff; return (0); } /* * Driver for the integrated MPIC on U3/U4 (CPC925/CPC945) */ static int openpic_cpcht_probe(device_t); static int openpic_cpcht_attach(device_t); static void openpic_cpcht_config(device_t, u_int irq, enum intr_trigger trig, enum intr_polarity pol); static void openpic_cpcht_enable(device_t, u_int irq, u_int vector); static void openpic_cpcht_unmask(device_t, u_int irq); static void openpic_cpcht_eoi(device_t, u_int irq); static device_method_t openpic_cpcht_methods[] = { /* Device interface */ DEVMETHOD(device_probe, openpic_cpcht_probe), DEVMETHOD(device_attach, openpic_cpcht_attach), /* PIC interface */ DEVMETHOD(pic_bind, openpic_bind), DEVMETHOD(pic_config, openpic_cpcht_config), DEVMETHOD(pic_dispatch, openpic_dispatch), DEVMETHOD(pic_enable, openpic_cpcht_enable), DEVMETHOD(pic_eoi, openpic_cpcht_eoi), DEVMETHOD(pic_ipi, openpic_ipi), DEVMETHOD(pic_mask, openpic_mask), DEVMETHOD(pic_unmask, openpic_cpcht_unmask), { 0, 0 }, }; struct openpic_cpcht_softc { struct openpic_softc sc_openpic; struct mtx sc_ht_mtx; }; static driver_t openpic_cpcht_driver = { "htpic", openpic_cpcht_methods, sizeof(struct openpic_cpcht_softc), }; DRIVER_MODULE(openpic, unin, openpic_cpcht_driver, openpic_devclass, 0, 0); static int openpic_cpcht_probe(device_t dev) { const char *type = ofw_bus_get_type(dev); if (strcmp(type, "open-pic") != 0) return (ENXIO); device_set_desc(dev, OPENPIC_DEVSTR); return (0); } static int openpic_cpcht_attach(device_t dev) { struct openpic_cpcht_softc *sc; phandle_t node; int err, irq; node = ofw_bus_get_node(dev); err = openpic_common_attach(dev, node); if (err != 0) return (err); /* * The HT APIC stuff is not thread-safe, so we need a mutex to * protect it. */ sc = device_get_softc(dev); mtx_init(&sc->sc_ht_mtx, "htpic", NULL, MTX_SPIN); /* * Interrupts 0-3 are internally sourced and are level triggered * active low. Interrupts 4-123 are connected to a pulse generator * and should be programmed as edge triggered low-to-high. * * IBM CPC945 Manual, Section 9.3. */ for (irq = 0; irq < 4; irq++) openpic_config(dev, irq, INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW); for (irq = 4; irq < 124; irq++) openpic_config(dev, irq, INTR_TRIGGER_EDGE, INTR_POLARITY_LOW); /* * Use this PIC for MSI only if it is the root PIC. This may not * be necessary, but Linux does it, and I cannot find any U3 machines * with MSI devices to test. */ if (dev == root_pic) cpcht_msipic = node; return (0); } static void openpic_cpcht_config(device_t dev, u_int irq, enum intr_trigger trig, enum intr_polarity pol) { struct openpic_cpcht_softc *sc; uint32_t ht_irq; /* * The interrupt settings for the MPIC are completely determined * by the internal wiring in the northbridge. Real changes to these * settings need to be negotiated with the remote IO-APIC on the HT * link. */ sc = device_get_softc(dev); if (cpcht_irqmap != NULL && irq < 128 && cpcht_irqmap[irq].ht_base > 0 && !cpcht_irqmap[irq].edge) { mtx_lock_spin(&sc->sc_ht_mtx); /* Program the data port */ out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND, 0x10 + (cpcht_irqmap[irq].ht_source << 1)); /* Grab the IRQ config register */ ht_irq = in32rb(cpcht_irqmap[irq].ht_base + 4); /* Mask the IRQ while we fiddle settings */ out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq | HTAPIC_MASK); /* Program the interrupt sense */ ht_irq &= ~(HTAPIC_TRIGGER_LEVEL | HTAPIC_REQUEST_EOI); if (trig == INTR_TRIGGER_EDGE) { cpcht_irqmap[irq].edge = 1; } else { cpcht_irqmap[irq].edge = 0; ht_irq |= HTAPIC_TRIGGER_LEVEL | HTAPIC_REQUEST_EOI; } out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq); mtx_unlock_spin(&sc->sc_ht_mtx); } } static void openpic_cpcht_enable(device_t dev, u_int irq, u_int vec) { struct openpic_cpcht_softc *sc; uint32_t ht_irq; openpic_enable(dev, irq, vec); sc = device_get_softc(dev); if (cpcht_irqmap != NULL && irq < 128 && cpcht_irqmap[irq].ht_base > 0) { mtx_lock_spin(&sc->sc_ht_mtx); /* Program the data port */ out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND, 0x10 + (cpcht_irqmap[irq].ht_source << 1)); /* Unmask the interrupt */ ht_irq = in32rb(cpcht_irqmap[irq].ht_base + 4); ht_irq &= ~HTAPIC_MASK; out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq); mtx_unlock_spin(&sc->sc_ht_mtx); } openpic_cpcht_eoi(dev, irq); } static void openpic_cpcht_unmask(device_t dev, u_int irq) { struct openpic_cpcht_softc *sc; uint32_t ht_irq; openpic_unmask(dev, irq); sc = device_get_softc(dev); if (cpcht_irqmap != NULL && irq < 128 && cpcht_irqmap[irq].ht_base > 0) { mtx_lock_spin(&sc->sc_ht_mtx); /* Program the data port */ out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND, 0x10 + (cpcht_irqmap[irq].ht_source << 1)); /* Unmask the interrupt */ ht_irq = in32rb(cpcht_irqmap[irq].ht_base + 4); ht_irq &= ~HTAPIC_MASK; out32rb(cpcht_irqmap[irq].ht_base + 4, ht_irq); mtx_unlock_spin(&sc->sc_ht_mtx); } openpic_cpcht_eoi(dev, irq); } static void openpic_cpcht_eoi(device_t dev, u_int irq) { struct openpic_cpcht_softc *sc; uint32_t off, mask; if (irq == 255) return; sc = device_get_softc(dev); if (cpcht_irqmap != NULL && irq < 128 && cpcht_irqmap[irq].ht_base > 0 && !cpcht_irqmap[irq].edge) { /* If this is an HT IRQ, acknowledge it at the remote APIC */ if (cpcht_irqmap[irq].apple_eoi) { off = (cpcht_irqmap[irq].ht_source >> 3) & ~3; mask = 1 << (cpcht_irqmap[irq].ht_source & 0x1f); out32rb(cpcht_irqmap[irq].apple_eoi + off, mask); } else { mtx_lock_spin(&sc->sc_ht_mtx); out8rb(cpcht_irqmap[irq].ht_base + PCIR_HT_COMMAND, 0x11 + (cpcht_irqmap[irq].ht_source << 1)); out32rb(cpcht_irqmap[irq].ht_base + 4, cpcht_irqmap[irq].eoi_data); mtx_unlock_spin(&sc->sc_ht_mtx); } } openpic_eoi(dev, irq); } Index: projects/powernv/powerpc/powermac/kiic.c =================================================================== --- projects/powernv/powerpc/powermac/kiic.c (revision 290990) +++ projects/powernv/powerpc/powermac/kiic.c (revision 290991) @@ -1,444 +1,444 @@ /*- * Copyright (c) 2001 Tsubai Masanari. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * NetBSD: ki2c.c,v 1.11 2007/12/06 17:00:33 ad Exp * Id: ki2c.c,v 1.7 2002/10/05 09:56:05 tsubai Exp */ /* * Support routines for the Keywest I2C controller. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iicbus_if.h" /* Keywest I2C Register offsets */ #define MODE 0 #define CONTROL 1 #define STATUS 2 #define ISR 3 #define IER 4 #define ADDR 5 #define SUBADDR 6 #define DATA 7 #define REV 8 /* MODE */ #define I2C_SPEED 0x03 /* Speed mask */ #define I2C_100kHz 0x00 #define I2C_50kHz 0x01 #define I2C_25kHz 0x02 #define I2C_MODE 0x0c /* Mode mask */ #define I2C_DUMBMODE 0x00 /* Dumb mode */ #define I2C_STDMODE 0x04 /* Standard mode */ #define I2C_STDSUBMODE 0x08 /* Standard mode + sub address */ #define I2C_COMBMODE 0x0c /* Combined mode */ #define I2C_PORT 0xf0 /* Port mask */ /* CONTROL */ #define I2C_CT_AAK 0x01 /* Send AAK */ #define I2C_CT_ADDR 0x02 /* Send address(es) */ #define I2C_CT_STOP 0x04 /* Send STOP */ #define I2C_CT_START 0x08 /* Send START */ /* STATUS */ #define I2C_ST_BUSY 0x01 /* Busy */ #define I2C_ST_LASTAAK 0x02 /* Last AAK */ #define I2C_ST_LASTRW 0x04 /* Last R/W */ #define I2C_ST_SDA 0x08 /* SDA */ #define I2C_ST_SCL 0x10 /* SCL */ /* ISR/IER */ #define I2C_INT_DATA 0x01 /* Data byte sent/received */ #define I2C_INT_ADDR 0x02 /* Address sent */ #define I2C_INT_STOP 0x04 /* STOP condition sent */ #define I2C_INT_START 0x08 /* START condition sent */ /* I2C flags */ #define I2C_BUSY 0x01 #define I2C_READING 0x02 #define I2C_ERROR 0x04 #define I2C_SELECTED 0x08 struct kiic_softc { device_t sc_dev; phandle_t sc_node; struct mtx sc_mutex; struct resource *sc_reg; int sc_irqrid; struct resource *sc_irq; void *sc_ih; u_int sc_regstep; u_int sc_flags; u_char *sc_data; int sc_resid; uint16_t sc_i2c_base; device_t sc_iicbus; }; static int kiic_probe(device_t dev); static int kiic_attach(device_t dev); static void kiic_writereg(struct kiic_softc *sc, u_int, u_int); static u_int kiic_readreg(struct kiic_softc *, u_int); static void kiic_setport(struct kiic_softc *, u_int); static void kiic_setmode(struct kiic_softc *, u_int); static void kiic_setspeed(struct kiic_softc *, u_int); static void kiic_intr(void *xsc); static int kiic_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs); static phandle_t kiic_get_node(device_t bus, device_t dev); static device_method_t kiic_methods[] = { /* device interface */ DEVMETHOD(device_probe, kiic_probe), DEVMETHOD(device_attach, kiic_attach), /* iicbus interface */ DEVMETHOD(iicbus_callback, iicbus_null_callback), DEVMETHOD(iicbus_transfer, kiic_transfer), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_node, kiic_get_node), { 0, 0 } }; static driver_t kiic_driver = { "iichb", kiic_methods, sizeof(struct kiic_softc) }; static devclass_t kiic_devclass; DRIVER_MODULE(kiic, macio, kiic_driver, kiic_devclass, 0, 0); DRIVER_MODULE(kiic, unin, kiic_driver, kiic_devclass, 0, 0); static int kiic_probe(device_t self) { const char *name; name = ofw_bus_get_name(self); if (name && strcmp(name, "i2c") == 0) { device_set_desc(self, "Keywest I2C controller"); return (0); } return (ENXIO); } static int kiic_attach(device_t self) { struct kiic_softc *sc = device_get_softc(self); int rid, rate; phandle_t node; char name[64]; bzero(sc, sizeof(*sc)); sc->sc_dev = self; node = ofw_bus_get_node(self); if (node == 0 || node == -1) { return (EINVAL); } rid = 0; sc->sc_reg = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_reg == NULL) { return (ENOMEM); } - if (OF_getprop(node, "AAPL,i2c-rate", &rate, 4) != 4) { + if (OF_getencprop(node, "AAPL,i2c-rate", &rate, 4) != 4) { device_printf(self, "cannot get i2c-rate\n"); return (ENXIO); } - if (OF_getprop(node, "AAPL,address-step", &sc->sc_regstep, 4) != 4) { + if (OF_getencprop(node, "AAPL,address-step", &sc->sc_regstep, 4) != 4) { device_printf(self, "unable to find i2c address step\n"); return (ENXIO); } /* * Some Keywest I2C devices have their children attached directly * underneath them. Some have a single 'iicbus' child with the * devices underneath that. Sort this out, and make sure that the * OFW I2C layer has the correct node. * * Note: the I2C children of the Uninorth bridges have two ports. * In general, the port is designated in the 9th bit of the I2C * address. However, for kiic devices with children attached below * an i2c-bus node, the port is indicated in the 'reg' property * of the i2c-bus node. */ sc->sc_node = node; node = OF_child(node); if (OF_getprop(node, "name", name, sizeof(name)) > 0) { if (strcmp(name,"i2c-bus") == 0) { phandle_t reg; if (OF_getprop(node, "reg", ®, sizeof(reg)) > 0) sc->sc_i2c_base = reg << 8; sc->sc_node = node; } } mtx_init(&sc->sc_mutex, "kiic", NULL, MTX_DEF); sc->sc_irq = bus_alloc_resource_any(self, SYS_RES_IRQ, &sc->sc_irqrid, RF_ACTIVE); bus_setup_intr(self, sc->sc_irq, INTR_TYPE_MISC | INTR_MPSAFE, NULL, kiic_intr, sc, &sc->sc_ih); kiic_writereg(sc, ISR, kiic_readreg(sc, ISR)); kiic_writereg(sc, STATUS, 0); kiic_writereg(sc, IER, 0); kiic_setmode(sc, I2C_STDMODE); kiic_setspeed(sc, I2C_100kHz); /* XXX rate */ kiic_writereg(sc, IER, I2C_INT_DATA | I2C_INT_ADDR | I2C_INT_STOP); if (bootverbose) device_printf(self, "Revision: %02X\n", kiic_readreg(sc, REV)); /* Add the IIC bus layer */ sc->sc_iicbus = device_add_child(self, "iicbus", -1); return (bus_generic_attach(self)); } static void kiic_writereg(struct kiic_softc *sc, u_int reg, u_int val) { bus_write_4(sc->sc_reg, sc->sc_regstep * reg, val); DELAY(100); /* register access delay */ } static u_int kiic_readreg(struct kiic_softc *sc, u_int reg) { return bus_read_4(sc->sc_reg, sc->sc_regstep * reg) & 0xff; } static void kiic_setmode(struct kiic_softc *sc, u_int mode) { u_int x; KASSERT((mode & ~I2C_MODE) == 0, ("bad mode")); x = kiic_readreg(sc, MODE); x &= ~I2C_MODE; x |= mode; kiic_writereg(sc, MODE, x); } static void kiic_setport(struct kiic_softc *sc, u_int port) { u_int x; KASSERT(port == 1 || port == 0, ("bad port")); x = kiic_readreg(sc, MODE); x &= ~I2C_PORT; x |= (port << 4); kiic_writereg(sc, MODE, x); } static void kiic_setspeed(struct kiic_softc *sc, u_int speed) { u_int x; KASSERT((speed & ~I2C_SPEED) == 0, ("bad speed")); x = kiic_readreg(sc, MODE); x &= ~I2C_SPEED; x |= speed; kiic_writereg(sc, MODE, x); } static void kiic_intr(void *xsc) { struct kiic_softc *sc = xsc; u_int isr; uint32_t x; mtx_lock(&sc->sc_mutex); isr = kiic_readreg(sc, ISR); if (isr & I2C_INT_ADDR) { sc->sc_flags |= I2C_SELECTED; if (sc->sc_flags & I2C_READING) { if (sc->sc_resid > 1) { x = kiic_readreg(sc, CONTROL); x |= I2C_CT_AAK; kiic_writereg(sc, CONTROL, x); } } else { kiic_writereg(sc, DATA, *sc->sc_data++); sc->sc_resid--; } } if (isr & I2C_INT_DATA) { if (sc->sc_flags & I2C_READING) { if (sc->sc_resid > 0) { *sc->sc_data++ = kiic_readreg(sc, DATA); sc->sc_resid--; } if (sc->sc_resid == 0) /* done */ kiic_writereg(sc, CONTROL, 0); } else { if (sc->sc_resid == 0) { x = kiic_readreg(sc, CONTROL); x |= I2C_CT_STOP; kiic_writereg(sc, CONTROL, x); } else { kiic_writereg(sc, DATA, *sc->sc_data++); sc->sc_resid--; } } } if (isr & I2C_INT_STOP) { kiic_writereg(sc, CONTROL, 0); sc->sc_flags &= ~I2C_SELECTED; wakeup(sc->sc_dev); } kiic_writereg(sc, ISR, isr); mtx_unlock(&sc->sc_mutex); } static int kiic_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs) { struct kiic_softc *sc; int i, x, timo, err; uint16_t addr; uint8_t subaddr; sc = device_get_softc(dev); timo = 100; subaddr = 0; mtx_lock(&sc->sc_mutex); if (sc->sc_flags & I2C_BUSY) mtx_sleep(dev, &sc->sc_mutex, 0, "kiic", timo); if (sc->sc_flags & I2C_BUSY) { mtx_unlock(&sc->sc_mutex); return (ETIMEDOUT); } sc->sc_flags = I2C_BUSY; /* Clear pending interrupts, and reset controller */ kiic_writereg(sc, ISR, kiic_readreg(sc, ISR)); kiic_writereg(sc, STATUS, 0); for (i = 0; i < nmsgs; i++) { if (msgs[i].flags & IIC_M_NOSTOP) { if (msgs[i+1].flags & IIC_M_RD) kiic_setmode(sc, I2C_COMBMODE); else kiic_setmode(sc, I2C_STDSUBMODE); KASSERT(msgs[i].len == 1, ("oversize I2C message")); subaddr = msgs[i].buf[0]; i++; } else { kiic_setmode(sc, I2C_STDMODE); } sc->sc_data = msgs[i].buf; sc->sc_resid = msgs[i].len; sc->sc_flags = I2C_BUSY; addr = msgs[i].slave; timo = 1000 + sc->sc_resid * 200; timo += 100000; if (msgs[i].flags & IIC_M_RD) { sc->sc_flags |= I2C_READING; addr |= 1; } addr |= sc->sc_i2c_base; kiic_setport(sc, (addr & 0x100) >> 8); kiic_writereg(sc, ADDR, addr & 0xff); kiic_writereg(sc, SUBADDR, subaddr); x = kiic_readreg(sc, CONTROL) | I2C_CT_ADDR; kiic_writereg(sc, CONTROL, x); err = mtx_sleep(dev, &sc->sc_mutex, 0, "kiic", timo); msgs[i].len -= sc->sc_resid; if ((sc->sc_flags & I2C_ERROR) || err == EWOULDBLOCK) { device_printf(sc->sc_dev, "I2C error\n"); sc->sc_flags = 0; mtx_unlock(&sc->sc_mutex); return (EIO); } } sc->sc_flags = 0; mtx_unlock(&sc->sc_mutex); return (0); } static phandle_t kiic_get_node(device_t bus, device_t dev) { struct kiic_softc *sc; sc = device_get_softc(bus); /* We only have one child, the I2C bus, which needs our own node. */ return sc->sc_node; } Index: projects/powernv/powerpc/powermac/macgpio.c =================================================================== --- projects/powernv/powerpc/powermac/macgpio.c (revision 290990) +++ projects/powernv/powerpc/powermac/macgpio.c (revision 290991) @@ -1,403 +1,403 @@ /*- * Copyright 2008 by Nathan Whitehorn. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Driver for MacIO GPIO controller */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Macgpio softc */ struct macgpio_softc { phandle_t sc_node; struct resource *sc_gpios; int sc_gpios_rid; uint32_t sc_saved_gpio_levels[2]; uint32_t sc_saved_gpios[GPIO_COUNT]; uint32_t sc_saved_extint_gpios[GPIO_EXTINT_COUNT]; }; static MALLOC_DEFINE(M_MACGPIO, "macgpio", "macgpio device information"); static int macgpio_probe(device_t); static int macgpio_attach(device_t); static int macgpio_print_child(device_t dev, device_t child); static void macgpio_probe_nomatch(device_t, device_t); static struct resource *macgpio_alloc_resource(device_t, device_t, int, int *, u_long, u_long, u_long, u_int); static int macgpio_activate_resource(device_t, device_t, int, int, struct resource *); static int macgpio_deactivate_resource(device_t, device_t, int, int, struct resource *); static ofw_bus_get_devinfo_t macgpio_get_devinfo; static int macgpio_suspend(device_t dev); static int macgpio_resume(device_t dev); /* * Bus interface definition */ static device_method_t macgpio_methods[] = { /* Device interface */ DEVMETHOD(device_probe, macgpio_probe), DEVMETHOD(device_attach, macgpio_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, macgpio_suspend), DEVMETHOD(device_resume, macgpio_resume), /* Bus interface */ DEVMETHOD(bus_print_child, macgpio_print_child), DEVMETHOD(bus_probe_nomatch, macgpio_probe_nomatch), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), DEVMETHOD(bus_alloc_resource, macgpio_alloc_resource), DEVMETHOD(bus_activate_resource, macgpio_activate_resource), DEVMETHOD(bus_deactivate_resource, macgpio_deactivate_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_child_pnpinfo_str, ofw_bus_gen_child_pnpinfo_str), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_devinfo, macgpio_get_devinfo), DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), { 0, 0 } }; static driver_t macgpio_pci_driver = { "macgpio", macgpio_methods, sizeof(struct macgpio_softc) }; devclass_t macgpio_devclass; DRIVER_MODULE(macgpio, macio, macgpio_pci_driver, macgpio_devclass, 0, 0); struct macgpio_devinfo { struct ofw_bus_devinfo mdi_obdinfo; struct resource_list mdi_resources; int gpio_num; }; static int macgpio_probe(device_t dev) { const char *name; name = ofw_bus_get_name(dev); if (name && strcmp(name, "gpio") == 0) { device_set_desc(dev, "MacIO GPIO Controller"); return (0); } return (ENXIO); } /* * Scan Open Firmware child nodes, and attach these as children * of the macgpio bus */ static int macgpio_attach(device_t dev) { struct macgpio_softc *sc; struct macgpio_devinfo *dinfo; phandle_t root, child, iparent; device_t cdev; uint32_t irq; sc = device_get_softc(dev); root = sc->sc_node = ofw_bus_get_node(dev); sc->sc_gpios = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->sc_gpios_rid, RF_ACTIVE); /* * Iterate through the sub-devices */ for (child = OF_child(root); child != 0; child = OF_peer(child)) { dinfo = malloc(sizeof(*dinfo), M_MACGPIO, M_WAITOK | M_ZERO); if (ofw_bus_gen_setup_devinfo(&dinfo->mdi_obdinfo, child) != 0) { free(dinfo, M_MACGPIO); continue; } - if (OF_getprop(child,"reg",&dinfo->gpio_num, + if (OF_getencprop(child, "reg", &dinfo->gpio_num, sizeof(dinfo->gpio_num)) != sizeof(dinfo->gpio_num)) { /* * Some early GPIO controllers don't provide GPIO * numbers for GPIOs designed only to provide * interrupt resources. We should still allow these * to attach, but with caution. */ dinfo->gpio_num = -1; } resource_list_init(&dinfo->mdi_resources); - if (OF_getprop(child, "interrupts", &irq, sizeof(irq)) == + if (OF_getencprop(child, "interrupts", &irq, sizeof(irq)) == sizeof(irq)) { - OF_searchprop(child, "interrupt-parent", &iparent, + OF_searchencprop(child, "interrupt-parent", &iparent, sizeof(iparent)); resource_list_add(&dinfo->mdi_resources, SYS_RES_IRQ, 0, MAP_IRQ(iparent, irq), MAP_IRQ(iparent, irq), 1); } /* Fix messed-up offsets */ if (dinfo->gpio_num > 0x50) dinfo->gpio_num -= 0x50; cdev = device_add_child(dev, NULL, -1); if (cdev == NULL) { device_printf(dev, "<%s>: device_add_child failed\n", dinfo->mdi_obdinfo.obd_name); ofw_bus_gen_destroy_devinfo(&dinfo->mdi_obdinfo); free(dinfo, M_MACGPIO); continue; } device_set_ivars(cdev, dinfo); } return (bus_generic_attach(dev)); } static int macgpio_print_child(device_t dev, device_t child) { struct macgpio_devinfo *dinfo; int retval = 0; dinfo = device_get_ivars(child); retval += bus_print_child_header(dev, child); if (dinfo->gpio_num >= GPIO_BASE) printf(" gpio %d", dinfo->gpio_num - GPIO_BASE); else if (dinfo->gpio_num >= GPIO_EXTINT_BASE) printf(" extint-gpio %d", dinfo->gpio_num - GPIO_EXTINT_BASE); else if (dinfo->gpio_num >= 0) printf(" addr 0x%02x", dinfo->gpio_num); /* should not happen */ resource_list_print_type(&dinfo->mdi_resources, "irq", SYS_RES_IRQ, "%ld"); retval += bus_print_child_footer(dev, child); return (retval); } static void macgpio_probe_nomatch(device_t dev, device_t child) { struct macgpio_devinfo *dinfo; const char *type; if (bootverbose) { dinfo = device_get_ivars(child); if ((type = ofw_bus_get_type(child)) == NULL) type = "(unknown)"; device_printf(dev, "<%s, %s>", type, ofw_bus_get_name(child)); if (dinfo->gpio_num >= 0) printf(" gpio %d",dinfo->gpio_num); resource_list_print_type(&dinfo->mdi_resources, "irq", SYS_RES_IRQ, "%ld"); printf(" (no driver attached)\n"); } } static struct resource * macgpio_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct macgpio_devinfo *dinfo; dinfo = device_get_ivars(child); if (type != SYS_RES_IRQ) return (NULL); return (resource_list_alloc(&dinfo->mdi_resources, bus, child, type, rid, start, end, count, flags)); } static int macgpio_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *res) { struct macgpio_softc *sc; struct macgpio_devinfo *dinfo; u_char val; sc = device_get_softc(bus); dinfo = device_get_ivars(child); if (type != SYS_RES_IRQ) return ENXIO; if (dinfo->gpio_num >= 0) { val = bus_read_1(sc->sc_gpios,dinfo->gpio_num); val |= 0x80; bus_write_1(sc->sc_gpios,dinfo->gpio_num,val); } return (bus_activate_resource(bus, type, rid, res)); } static int macgpio_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *res) { struct macgpio_softc *sc; struct macgpio_devinfo *dinfo; u_char val; sc = device_get_softc(bus); dinfo = device_get_ivars(child); if (type != SYS_RES_IRQ) return ENXIO; if (dinfo->gpio_num >= 0) { val = bus_read_1(sc->sc_gpios,dinfo->gpio_num); val &= ~0x80; bus_write_1(sc->sc_gpios,dinfo->gpio_num,val); } return (bus_deactivate_resource(bus, type, rid, res)); } uint8_t macgpio_read(device_t dev) { struct macgpio_softc *sc; struct macgpio_devinfo *dinfo; sc = device_get_softc(device_get_parent(dev)); dinfo = device_get_ivars(dev); if (dinfo->gpio_num < 0) return (0); return (bus_read_1(sc->sc_gpios,dinfo->gpio_num)); } void macgpio_write(device_t dev, uint8_t val) { struct macgpio_softc *sc; struct macgpio_devinfo *dinfo; sc = device_get_softc(device_get_parent(dev)); dinfo = device_get_ivars(dev); if (dinfo->gpio_num < 0) return; bus_write_1(sc->sc_gpios,dinfo->gpio_num,val); } static const struct ofw_bus_devinfo * macgpio_get_devinfo(device_t dev, device_t child) { struct macgpio_devinfo *dinfo; dinfo = device_get_ivars(child); return (&dinfo->mdi_obdinfo); } static int macgpio_suspend(device_t dev) { struct macgpio_softc *sc; int i; sc = device_get_softc(dev); sc->sc_saved_gpio_levels[0] = bus_read_4(sc->sc_gpios, GPIO_LEVELS_0); sc->sc_saved_gpio_levels[1] = bus_read_4(sc->sc_gpios, GPIO_LEVELS_1); for (i = 0; i < GPIO_COUNT; i++) sc->sc_saved_gpios[i] = bus_read_1(sc->sc_gpios, GPIO_BASE + i); for (i = 0; i < GPIO_EXTINT_COUNT; i++) sc->sc_saved_extint_gpios[i] = bus_read_1(sc->sc_gpios, GPIO_EXTINT_BASE + i); return (0); } static int macgpio_resume(device_t dev) { struct macgpio_softc *sc; int i; sc = device_get_softc(dev); bus_write_4(sc->sc_gpios, GPIO_LEVELS_0, sc->sc_saved_gpio_levels[0]); bus_write_4(sc->sc_gpios, GPIO_LEVELS_1, sc->sc_saved_gpio_levels[1]); for (i = 0; i < GPIO_COUNT; i++) bus_write_1(sc->sc_gpios, GPIO_BASE + i, sc->sc_saved_gpios[i]); for (i = 0; i < GPIO_EXTINT_COUNT; i++) bus_write_1(sc->sc_gpios, GPIO_EXTINT_BASE + i, sc->sc_saved_extint_gpios[i]); return (0); } Index: projects/powernv/powerpc/pseries/mmu_phyp.c =================================================================== --- projects/powernv/powerpc/pseries/mmu_phyp.c (revision 290990) +++ projects/powernv/powerpc/pseries/mmu_phyp.c (revision 290991) @@ -1,474 +1,474 @@ /* * Copyright (C) 2010 Andreas Tobler * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_if.h" #include "moea64_if.h" #include "phyp-hvcall.h" extern int n_slbs; static struct rmlock mphyp_eviction_lock; /* * Kernel MMU interface */ static void mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend); static void mphyp_cpu_bootstrap(mmu_t mmup, int ap); static int64_t mphyp_pte_synch(mmu_t, struct pvo_entry *pvo); static int64_t mphyp_pte_clear(mmu_t, struct pvo_entry *pvo, uint64_t ptebit); static int64_t mphyp_pte_unset(mmu_t, struct pvo_entry *pvo); static int mphyp_pte_insert(mmu_t, struct pvo_entry *pvo); static mmu_method_t mphyp_methods[] = { MMUMETHOD(mmu_bootstrap, mphyp_bootstrap), MMUMETHOD(mmu_cpu_bootstrap, mphyp_cpu_bootstrap), MMUMETHOD(moea64_pte_synch, mphyp_pte_synch), MMUMETHOD(moea64_pte_clear, mphyp_pte_clear), MMUMETHOD(moea64_pte_unset, mphyp_pte_unset), MMUMETHOD(moea64_pte_insert, mphyp_pte_insert), /* XXX: pmap_copy_page, pmap_init_page with H_PAGE_INIT */ { 0, 0 } }; MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, 0, oea64_mmu); static int brokenkvm = 0; static void print_kvm_bug_warning(void *data) { if (brokenkvm) printf("WARNING: Running on a broken hypervisor that does " "not support mandatory H_CLEAR_MOD and H_CLEAR_REF " "hypercalls. Performance will be suboptimal.\n"); } SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, print_kvm_bug_warning, NULL); SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning, NULL); static void mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { uint64_t final_pteg_count = 0; char buf[8]; uint32_t prop[2]; uint32_t nptlp, shift = 0, slb_encoding = 0; uint32_t lp_size, lp_encoding; struct lpte old; uint64_t vsid; phandle_t dev, node, root; int idx, len, res; rm_init(&mphyp_eviction_lock, "pte eviction"); moea64_early_bootstrap(mmup, kernelstart, kernelend); root = OF_peer(0); dev = OF_child(root); while (dev != 0) { res = OF_getprop(dev, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; dev = OF_peer(dev); } node = OF_child(dev); while (node != 0) { res = OF_getprop(node, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) break; node = OF_peer(node); } - res = OF_getprop(node, "ibm,pft-size", prop, sizeof(prop)); + res = OF_getencprop(node, "ibm,pft-size", prop, sizeof(prop)); if (res <= 0) panic("mmu_phyp: unknown PFT size"); final_pteg_count = 1 << prop[1]; - res = OF_getprop(node, "ibm,slb-size", prop, sizeof(prop[0])); + res = OF_getencprop(node, "ibm,slb-size", prop, sizeof(prop[0])); if (res > 0) n_slbs = prop[0]; moea64_pteg_count = final_pteg_count / sizeof(struct lpteg); /* Clear any old page table entries */ for (idx = 0; idx < moea64_pteg_count*8; idx++) { phyp_pft_hcall(H_READ, 0, idx, 0, 0, &old.pte_hi, &old.pte_lo, &old.pte_lo); vsid = (old.pte_hi << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) >> 28; if (vsid == VSID_VRMA || vsid == 0 /* Older VRMA */) continue; if (old.pte_hi & LPTE_VALID) phyp_hcall(H_REMOVE, 0, idx, 0); } /* * Scan the large page size property for PAPR compatible machines. * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' * for the encoding of the property. */ len = OF_getproplen(node, "ibm,segment-page-sizes"); if (len > 0) { /* * We have to use a variable length array on the stack * since we have very limited stack space. */ pcell_t arr[len/sizeof(cell_t)]; res = OF_getencprop(node, "ibm,segment-page-sizes", arr, sizeof(arr)); len /= 4; idx = 0; while (len > 0) { shift = arr[idx]; slb_encoding = arr[idx + 1]; nptlp = arr[idx + 2]; idx += 3; len -= 3; while (len > 0 && nptlp) { lp_size = arr[idx]; lp_encoding = arr[idx+1]; if (slb_encoding == SLBV_L && lp_encoding == 0) break; idx += 2; len -= 2; nptlp--; } if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0) break; } if (len == 0) panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " "not supported by this system. Please enable huge " "page backing if running under PowerKVM."); moea64_large_page_shift = shift; moea64_large_page_size = 1ULL << lp_size; } moea64_mid_bootstrap(mmup, kernelstart, kernelend); moea64_late_bootstrap(mmup, kernelstart, kernelend); /* Test for broken versions of KVM that don't conform to the spec */ if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION) brokenkvm = 1; } static void mphyp_cpu_bootstrap(mmu_t mmup, int ap) { struct slb *slb = PCPU_GET(slb); register_t seg0; int i; /* * Install kernel SLB entries */ __asm __volatile ("slbia"); __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); for (i = 0; i < 64; i++) { if (!(slb[i].slbe & SLBE_VALID)) continue; __asm __volatile ("slbmte %0, %1" :: "r"(slb[i].slbv), "r"(slb[i].slbe)); } } static int64_t mphyp_pte_synch(mmu_t mmu, struct pvo_entry *pvo) { struct lpte pte; uint64_t junk; __asm __volatile("ptesync"); phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi, &pte.pte_lo, &junk); if ((pte.pte_hi & LPTE_AVPN_MASK) != ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & LPTE_AVPN_MASK)) return (-1); if (!(pte.pte_hi & LPTE_VALID)) return (-1); return (pte.pte_lo & (LPTE_CHG | LPTE_REF)); } static int64_t mphyp_pte_clear(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit) { struct rm_priotracker track; int64_t refchg; uint64_t ptelo, junk; int err; /* * This involves two steps (synch and clear) so we need the entry * not to change in the middle. We are protected against deliberate * unset by virtue of holding the pmap lock. Protection against * incidental unset (page table eviction) comes from holding the * shared eviction lock. */ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); rm_rlock(&mphyp_eviction_lock, &track); refchg = mphyp_pte_synch(mmu, pvo); if (refchg < 0) { rm_runlock(&mphyp_eviction_lock, &track); return (refchg); } if (brokenkvm) { /* * No way to clear either bit, which is total madness. * Pessimistically claim that, once modified, it stays so * forever and that it is never referenced. */ rm_runlock(&mphyp_eviction_lock, &track); return (refchg & ~LPTE_REF); } if (ptebit & LPTE_CHG) { err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0, &ptelo, &junk, &junk); KASSERT(err == H_SUCCESS, ("Error clearing page change bit: %d", err)); refchg |= (ptelo & LPTE_CHG); } if (ptebit & LPTE_REF) { err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0, &ptelo, &junk, &junk); KASSERT(err == H_SUCCESS, ("Error clearing page reference bit: %d", err)); refchg |= (ptelo & LPTE_REF); } rm_runlock(&mphyp_eviction_lock, &track); return (refchg); } static int64_t mphyp_pte_unset(mmu_t mmu, struct pvo_entry *pvo) { struct lpte pte; uint64_t junk; int err; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); moea64_pte_from_pvo(pvo, &pte); err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot, pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo, &junk); KASSERT(err == H_SUCCESS || err == H_NOT_FOUND, ("Error removing page: %d", err)); if (err == H_NOT_FOUND) { moea64_pte_overflow--; return (-1); } return (pte.pte_lo & (LPTE_REF | LPTE_CHG)); } static uintptr_t mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict) { uint64_t slot, junk, k; struct lpte pt; int i, j; /* Start at a random slot */ i = mftb() % 8; k = -1; for (j = 0; j < 8; j++) { slot = ptegbase + (i + j) % 8; phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi, &pt.pte_lo, &junk); if (pt.pte_hi & LPTE_WIRED) continue; /* This is a candidate, so remember it */ k = slot; /* Try to get a page that has not been used lately */ if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) { memcpy(to_evict, &pt, sizeof(struct lpte)); return (k); } } if (k == -1) return (k); phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi, &to_evict->pte_lo, &junk); return (k); } static int mphyp_pte_insert(mmu_t mmu, struct pvo_entry *pvo) { struct rm_priotracker track; int64_t result; struct lpte evicted, pte; uint64_t index, junk, lastptelo; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); /* Initialize PTE */ moea64_pte_from_pvo(pvo, &pte); evicted.pte_hi = 0; /* Make sure further insertion is locked out during evictions */ rm_rlock(&mphyp_eviction_lock, &track); /* * First try primary hash. */ pvo->pvo_pte.slot &= ~7UL; /* Base slot address */ result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); if (result == H_SUCCESS) { rm_runlock(&mphyp_eviction_lock, &track); pvo->pvo_pte.slot = index; return (0); } KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld " "(ptegidx: %#zx/%#x, PTE %#lx/%#lx", result, pvo->pvo_pte.slot, moea64_pteg_count, pte.pte_hi, pte.pte_lo)); /* * Next try secondary hash. */ pvo->pvo_vaddr ^= PVO_HID; pte.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); if (result == H_SUCCESS) { rm_runlock(&mphyp_eviction_lock, &track); pvo->pvo_pte.slot = index; return (0); } KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld", result)); /* * Out of luck. Find a PTE to sacrifice. */ /* Lock out all insertions for a bit */ rm_runlock(&mphyp_eviction_lock, &track); rm_wlock(&mphyp_eviction_lock); index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); if (index == -1L) { /* Try other hash table? */ pvo->pvo_vaddr ^= PVO_HID; pte.pte_hi ^= LPTE_HID; pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); } if (index == -1L) { /* No freeable slots in either PTEG? We're hosed. */ rm_wunlock(&mphyp_eviction_lock); panic("mphyp_pte_insert: overflow"); return (-1); } /* Victim acquired: update page before waving goodbye */ if (evicted.pte_hi & LPTE_VALID) { result = phyp_pft_hcall(H_REMOVE, H_AVPN, index, evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo, &junk); moea64_pte_overflow++; KASSERT(result == H_SUCCESS, ("Error evicting page: %d", (int)result)); } /* * Set the new PTE. */ result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); rm_wunlock(&mphyp_eviction_lock); /* All clear */ pvo->pvo_pte.slot = index; if (result == H_SUCCESS) return (0); panic("Page replacement error: %ld", result); return (result); } Index: projects/powernv/powerpc/pseries/phyp_console.c =================================================================== --- projects/powernv/powerpc/pseries/phyp_console.c (revision 290990) +++ projects/powernv/powerpc/pseries/phyp_console.c (revision 290991) @@ -1,433 +1,433 @@ /*- * Copyright (C) 2011 by Nathan Whitehorn. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "phyp-hvcall.h" #include "uart_if.h" struct uart_phyp_softc { device_t dev; phandle_t node; int vtermid; struct tty *tp; struct resource *irqres; int irqrid; struct callout callout; void *sc_icookie; int polltime; struct mtx sc_mtx; int protocol; union { uint64_t u64[2]; char str[16]; } phyp_inbuf; uint64_t inbuflen; uint8_t outseqno; }; static struct uart_phyp_softc *console_sc = NULL; #if defined(KDB) static int alt_break_state; #endif enum { HVTERM1, HVTERMPROT }; #define VS_DATA_PACKET_HEADER 0xff #define VS_CONTROL_PACKET_HEADER 0xfe #define VSV_SET_MODEM_CTL 0x01 #define VSV_MODEM_CTL_UPDATE 0x02 #define VSV_RENEGOTIATE_CONNECTION 0x03 #define VS_QUERY_PACKET_HEADER 0xfd #define VSV_SEND_VERSION_NUMBER 0x01 #define VSV_SEND_MODEM_CTL_STATUS 0x02 #define VS_QUERY_RESPONSE_PACKET_HEADER 0xfc static int uart_phyp_probe(device_t dev); static int uart_phyp_attach(device_t dev); static void uart_phyp_intr(void *v); static device_method_t uart_phyp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, uart_phyp_probe), DEVMETHOD(device_attach, uart_phyp_attach), DEVMETHOD_END }; static driver_t uart_phyp_driver = { "uart", uart_phyp_methods, sizeof(struct uart_phyp_softc), }; DRIVER_MODULE(uart_phyp, vdevice, uart_phyp_driver, uart_devclass, 0, 0); static cn_probe_t uart_phyp_cnprobe; static cn_init_t uart_phyp_cninit; static cn_term_t uart_phyp_cnterm; static cn_getc_t uart_phyp_cngetc; static cn_putc_t uart_phyp_cnputc; static cn_grab_t uart_phyp_cngrab; static cn_ungrab_t uart_phyp_cnungrab; CONSOLE_DRIVER(uart_phyp); static void uart_phyp_ttyoutwakeup(struct tty *tp); static struct ttydevsw uart_phyp_tty_class = { .tsw_flags = TF_INITLOCK|TF_CALLOUT, .tsw_outwakeup = uart_phyp_ttyoutwakeup, }; static int uart_phyp_probe_node(struct uart_phyp_softc *sc) { phandle_t node = sc->node; uint32_t reg; char buf[64]; sc->inbuflen = 0; sc->outseqno = 0; if (OF_getprop(node, "name", buf, sizeof(buf)) <= 0) return (ENXIO); if (strcmp(buf, "vty") != 0) return (ENXIO); if (OF_getprop(node, "device_type", buf, sizeof(buf)) <= 0) return (ENXIO); if (strcmp(buf, "serial") != 0) return (ENXIO); reg = -1; - OF_getprop(node, "reg", ®, sizeof(reg)); + OF_getencprop(node, "reg", ®, sizeof(reg)); if (reg == -1) return (ENXIO); sc->vtermid = reg; sc->node = node; if (OF_getprop(node, "compatible", buf, sizeof(buf)) <= 0) return (ENXIO); if (strcmp(buf, "hvterm1") == 0) { sc->protocol = HVTERM1; return (0); } else if (strcmp(buf, "hvterm-protocol") == 0) { sc->protocol = HVTERMPROT; return (0); } return (ENXIO); } static int uart_phyp_probe(device_t dev) { const char *name; struct uart_phyp_softc sc; int err; name = ofw_bus_get_name(dev); if (name == NULL || strcmp(name, "vty") != 0) return (ENXIO); sc.node = ofw_bus_get_node(dev); err = uart_phyp_probe_node(&sc); if (err != 0) return (err); device_set_desc(dev, "POWER Hypervisor Virtual Serial Port"); return (err); } static void uart_phyp_cnprobe(struct consdev *cp) { char buf[64]; ihandle_t stdout; phandle_t input, chosen; static struct uart_phyp_softc sc; if ((chosen = OF_finddevice("/chosen")) == -1) goto fail; /* Check if OF has an active stdin/stdout */ input = -1; - if (OF_getprop(chosen, "stdout", &stdout, + if (OF_getencprop(chosen, "stdout", &stdout, sizeof(stdout)) == sizeof(stdout) && stdout != 0) input = OF_instance_to_package(stdout); if (input == -1) goto fail; if (OF_getprop(input, "device_type", buf, sizeof(buf)) == -1) goto fail; if (strcmp(buf, "serial") != 0) goto fail; sc.node = input; if (uart_phyp_probe_node(&sc) != 0) goto fail; mtx_init(&sc.sc_mtx, "uart_phyp", NULL, MTX_SPIN | MTX_QUIET | MTX_NOWITNESS); cp->cn_pri = CN_NORMAL; console_sc = ≻ return; fail: cp->cn_pri = CN_DEAD; return; } static int uart_phyp_attach(device_t dev) { struct uart_phyp_softc *sc; int unit; sc = device_get_softc(dev); sc->dev = dev; sc->node = ofw_bus_get_node(dev); uart_phyp_probe_node(sc); unit = device_get_unit(dev); sc->tp = tty_alloc(&uart_phyp_tty_class, sc); mtx_init(&sc->sc_mtx, device_get_nameunit(dev), NULL, MTX_SPIN | MTX_QUIET | MTX_NOWITNESS); if (console_sc != NULL && console_sc->vtermid == sc->vtermid) { sc->outseqno = console_sc->outseqno; console_sc = sc; sprintf(uart_phyp_consdev.cn_name, "ttyu%r", unit); tty_init_console(sc->tp, 0); } sc->irqrid = 0; sc->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqrid, RF_ACTIVE | RF_SHAREABLE); if (sc->irqres != NULL) { bus_setup_intr(dev, sc->irqres, INTR_TYPE_TTY | INTR_MPSAFE, NULL, uart_phyp_intr, sc, &sc->sc_icookie); } else { callout_init(&sc->callout, 1); sc->polltime = hz / 20; if (sc->polltime < 1) sc->polltime = 1; callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc); } tty_makedev(sc->tp, NULL, "u%r", unit); return (0); } static void uart_phyp_cninit(struct consdev *cp) { strcpy(cp->cn_name, "phypcons"); } static void uart_phyp_cnterm(struct consdev *cp) { } static int uart_phyp_get(struct uart_phyp_softc *sc, void *buffer, size_t bufsize) { int err; int hdr = 0; uart_lock(&sc->sc_mtx); if (sc->inbuflen == 0) { err = phyp_pft_hcall(H_GET_TERM_CHAR, sc->vtermid, 0, 0, 0, &sc->inbuflen, &sc->phyp_inbuf.u64[0], &sc->phyp_inbuf.u64[1]); if (err != H_SUCCESS) { uart_unlock(&sc->sc_mtx); return (-1); } hdr = 1; } if (sc->inbuflen == 0) { uart_unlock(&sc->sc_mtx); return (0); } if (bufsize > sc->inbuflen) bufsize = sc->inbuflen; if ((sc->protocol == HVTERMPROT) && (hdr == 1)) { sc->inbuflen = sc->inbuflen - 4; /* The VTERM protocol has a 4 byte header, skip it here. */ memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[4], sc->inbuflen); } memcpy(buffer, sc->phyp_inbuf.str, bufsize); sc->inbuflen -= bufsize; if (sc->inbuflen > 0) memmove(&sc->phyp_inbuf.str[0], &sc->phyp_inbuf.str[bufsize], sc->inbuflen); uart_unlock(&sc->sc_mtx); return (bufsize); } static int uart_phyp_put(struct uart_phyp_softc *sc, void *buffer, size_t bufsize) { uint16_t seqno; uint64_t len = 0; int err; union { uint64_t u64[2]; char bytes[16]; } cbuf; uart_lock(&sc->sc_mtx); switch (sc->protocol) { case HVTERM1: if (bufsize > 16) bufsize = 16; memcpy(&cbuf, buffer, bufsize); len = bufsize; break; case HVTERMPROT: if (bufsize > 12) bufsize = 12; seqno = sc->outseqno++; cbuf.bytes[0] = VS_DATA_PACKET_HEADER; cbuf.bytes[1] = 4 + bufsize; /* total length, max 16 bytes */ cbuf.bytes[2] = (seqno >> 8) & 0xff; cbuf.bytes[3] = seqno & 0xff; memcpy(&cbuf.bytes[4], buffer, bufsize); len = 4 + bufsize; break; } do { err = phyp_hcall(H_PUT_TERM_CHAR, sc->vtermid, len, cbuf.u64[0], cbuf.u64[1]); DELAY(100); } while (err == H_BUSY); uart_unlock(&sc->sc_mtx); return (bufsize); } static int uart_phyp_cngetc(struct consdev *cp) { unsigned char c; int retval; retval = uart_phyp_get(console_sc, &c, 1); if (retval != 1) return (-1); #if defined(KDB) kdb_alt_break(c, &alt_break_state); #endif return (c); } static void uart_phyp_cnputc(struct consdev *cp, int c) { unsigned char ch = c; uart_phyp_put(console_sc, &ch, 1); } static void uart_phyp_cngrab(struct consdev *cp) { } static void uart_phyp_cnungrab(struct consdev *cp) { } static void uart_phyp_ttyoutwakeup(struct tty *tp) { struct uart_phyp_softc *sc; char buffer[8]; int len; sc = tty_softc(tp); while ((len = ttydisc_getc(tp, buffer, sizeof(buffer))) != 0) uart_phyp_put(sc, buffer, len); } static void uart_phyp_intr(void *v) { struct uart_phyp_softc *sc = v; struct tty *tp = sc->tp; unsigned char c; int len; tty_lock(tp); while ((len = uart_phyp_get(sc, &c, 1)) > 0) ttydisc_rint(tp, c, 0); ttydisc_rint_done(tp); tty_unlock(tp); if (sc->irqres == NULL) callout_reset(&sc->callout, sc->polltime, uart_phyp_intr, sc); } Index: projects/powernv/powerpc/pseries/phyp_llan.c =================================================================== --- projects/powernv/powerpc/pseries/phyp_llan.c (revision 290990) +++ projects/powernv/powerpc/pseries/phyp_llan.c (revision 290991) @@ -1,512 +1,512 @@ /*- * Copyright 2013 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define LLAN_MAX_RX_PACKETS 100 #define LLAN_MAX_TX_PACKETS 100 #define LLAN_RX_BUF_LEN 8*PAGE_SIZE #define LLAN_BUFDESC_VALID (1ULL << 63) #define LLAN_ADD_MULTICAST 0x1 #define LLAN_DEL_MULTICAST 0x2 #define LLAN_CLEAR_MULTICAST 0x3 struct llan_xfer { struct mbuf *rx_mbuf; bus_dmamap_t rx_dmamap; uint64_t rx_bufdesc; }; struct llan_receive_queue_entry { /* PAPR page 539 */ uint8_t control; uint8_t reserved; uint16_t offset; uint32_t length; uint64_t handle; } __packed; struct llan_softc { device_t dev; struct mtx io_lock; cell_t unit; uint8_t mac_address[8]; int irqid; struct resource *irq; void *irq_cookie; bus_dma_tag_t rx_dma_tag; bus_dma_tag_t rxbuf_dma_tag; bus_dma_tag_t tx_dma_tag; bus_dmamap_t tx_dma_map; struct llan_receive_queue_entry *rx_buf; int rx_dma_slot; int rx_valid_val; bus_dmamap_t rx_buf_map; bus_addr_t rx_buf_phys; bus_size_t rx_buf_len; bus_addr_t input_buf_phys; bus_addr_t filter_buf_phys; struct llan_xfer rx_xfer[LLAN_MAX_RX_PACKETS]; struct ifnet *ifp; }; static int llan_probe(device_t); static int llan_attach(device_t); static void llan_intr(void *xsc); static void llan_init(void *xsc); static void llan_start(struct ifnet *ifp); static int llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static void llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err); static int llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx); static int llan_set_multicast(struct llan_softc *sc); static devclass_t llan_devclass; static device_method_t llan_methods[] = { DEVMETHOD(device_probe, llan_probe), DEVMETHOD(device_attach, llan_attach), DEVMETHOD_END }; static driver_t llan_driver = { "llan", llan_methods, sizeof(struct llan_softc) }; DRIVER_MODULE(llan, vdevice, llan_driver, llan_devclass, 0, 0); static int llan_probe(device_t dev) { if (!ofw_bus_is_compatible(dev,"IBM,l-lan")) return (ENXIO); device_set_desc(dev, "POWER Hypervisor Virtual Ethernet"); return (0); } static int llan_attach(device_t dev) { struct llan_softc *sc; phandle_t node; int error, i; sc = device_get_softc(dev); sc->dev = dev; /* Get firmware properties */ node = ofw_bus_get_node(dev); OF_getprop(node, "local-mac-address", sc->mac_address, sizeof(sc->mac_address)); - OF_getprop(node, "reg", &sc->unit, sizeof(sc->unit)); + OF_getencprop(node, "reg", &sc->unit, sizeof(sc->unit)); mtx_init(&sc->io_lock, "llan", NULL, MTX_DEF); /* Setup interrupt */ sc->irqid = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid, RF_ACTIVE); if (!sc->irq) { device_printf(dev, "Could not allocate IRQ\n"); mtx_destroy(&sc->io_lock); return (ENXIO); } bus_setup_intr(dev, sc->irq, INTR_TYPE_MISC | INTR_MPSAFE | INTR_ENTROPY, NULL, llan_intr, sc, &sc->irq_cookie); /* Setup DMA */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 16, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, LLAN_RX_BUF_LEN, 1, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->rx_dma_tag); error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 1, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->rxbuf_dma_tag); error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 6, BUS_SPACE_MAXSIZE_32BIT, 0, busdma_lock_mutex, &sc->io_lock, &sc->tx_dma_tag); error = bus_dmamem_alloc(sc->rx_dma_tag, (void **)&sc->rx_buf, BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->rx_buf_map); error = bus_dmamap_load(sc->rx_dma_tag, sc->rx_buf_map, sc->rx_buf, LLAN_RX_BUF_LEN, llan_rx_load_cb, sc, 0); /* TX DMA maps */ bus_dmamap_create(sc->tx_dma_tag, 0, &sc->tx_dma_map); /* RX DMA */ for (i = 0; i < LLAN_MAX_RX_PACKETS; i++) { error = bus_dmamap_create(sc->rxbuf_dma_tag, 0, &sc->rx_xfer[i].rx_dmamap); sc->rx_xfer[i].rx_mbuf = NULL; } /* Attach to network stack */ sc->ifp = if_alloc(IFT_ETHER); sc->ifp->if_softc = sc; if_initname(sc->ifp, device_get_name(dev), device_get_unit(dev)); sc->ifp->if_mtu = ETHERMTU; /* XXX max-frame-size from OF? */ sc->ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; sc->ifp->if_hwassist = 0; /* XXX: ibm,illan-options */ sc->ifp->if_capabilities = 0; sc->ifp->if_capenable = 0; sc->ifp->if_start = llan_start; sc->ifp->if_ioctl = llan_ioctl; sc->ifp->if_init = llan_init; IFQ_SET_MAXLEN(&sc->ifp->if_snd, LLAN_MAX_TX_PACKETS); sc->ifp->if_snd.ifq_drv_maxlen = LLAN_MAX_TX_PACKETS; IFQ_SET_READY(&sc->ifp->if_snd); ether_ifattach(sc->ifp, &sc->mac_address[2]); return (0); } static void llan_rx_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err) { struct llan_softc *sc = xsc; sc->rx_buf_phys = segs[0].ds_addr; sc->rx_buf_len = segs[0].ds_len - 2*PAGE_SIZE; sc->input_buf_phys = segs[0].ds_addr + segs[0].ds_len - PAGE_SIZE; sc->filter_buf_phys = segs[0].ds_addr + segs[0].ds_len - 2*PAGE_SIZE; } static void llan_init(void *xsc) { struct llan_softc *sc = xsc; uint64_t rx_buf_desc; uint64_t macaddr; int err, i; mtx_lock(&sc->io_lock); phyp_hcall(H_FREE_LOGICAL_LAN, sc->unit); /* Create buffers (page 539) */ sc->rx_dma_slot = 0; sc->rx_valid_val = 1; rx_buf_desc = LLAN_BUFDESC_VALID; rx_buf_desc |= (sc->rx_buf_len << 32); rx_buf_desc |= sc->rx_buf_phys; memcpy(&macaddr, sc->mac_address, 8); err = phyp_hcall(H_REGISTER_LOGICAL_LAN, sc->unit, sc->input_buf_phys, rx_buf_desc, sc->filter_buf_phys, macaddr); for (i = 0; i < LLAN_MAX_RX_PACKETS; i++) llan_add_rxbuf(sc, &sc->rx_xfer[i]); phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */ /* Tell stack we're up */ sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mtx_unlock(&sc->io_lock); /* Check for pending receives scheduled before interrupt enable */ llan_intr(sc); } static int llan_add_rxbuf(struct llan_softc *sc, struct llan_xfer *rx) { struct mbuf *m; bus_dma_segment_t segs[1]; int error, nsegs; mtx_assert(&sc->io_lock, MA_OWNED); m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; if (rx->rx_mbuf != NULL) { bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rxbuf_dma_tag, rx->rx_dmamap); } /* Save pointer to buffer structure */ m_copyback(m, 0, 8, (void *)&rx); error = bus_dmamap_load_mbuf_sg(sc->rxbuf_dma_tag, rx->rx_dmamap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->dev, "cannot load RX DMA map %p, error = %d\n", rx, error); m_freem(m); return (error); } /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs == 1, ("%s: too many DMA segments (%d)", __func__, nsegs)); rx->rx_mbuf = m; bus_dmamap_sync(sc->rxbuf_dma_tag, rx->rx_dmamap, BUS_DMASYNC_PREREAD); rx->rx_bufdesc = LLAN_BUFDESC_VALID; rx->rx_bufdesc |= (((uint64_t)segs[0].ds_len) << 32); rx->rx_bufdesc |= segs[0].ds_addr; error = phyp_hcall(H_ADD_LOGICAL_LAN_BUFFER, sc->unit, rx->rx_bufdesc); if (error != 0) { m_freem(m); rx->rx_mbuf = NULL; return (ENOBUFS); } return (0); } static void llan_intr(void *xsc) { struct llan_softc *sc = xsc; struct llan_xfer *rx; struct mbuf *m; mtx_lock(&sc->io_lock); restart: phyp_hcall(H_VIO_SIGNAL, sc->unit, 0); while ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val) { rx = (struct llan_xfer *)sc->rx_buf[sc->rx_dma_slot].handle; m = rx->rx_mbuf; m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset - 8); m->m_len = sc->rx_buf[sc->rx_dma_slot].length; /* llan_add_rxbuf does DMA sync and unload as well as requeue */ if (llan_add_rxbuf(sc, rx) != 0) { if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); phyp_hcall(H_ADD_LOGICAL_LAN_BUFFER, sc->unit, rx->rx_bufdesc); continue; } if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); m_adj(m, sc->rx_buf[sc->rx_dma_slot].offset); m->m_len = sc->rx_buf[sc->rx_dma_slot].length; m->m_pkthdr.rcvif = sc->ifp; m->m_pkthdr.len = m->m_len; sc->rx_dma_slot++; if (sc->rx_dma_slot >= sc->rx_buf_len/sizeof(sc->rx_buf[0])) { sc->rx_dma_slot = 0; sc->rx_valid_val = !sc->rx_valid_val; } mtx_unlock(&sc->io_lock); (*sc->ifp->if_input)(sc->ifp, m); mtx_lock(&sc->io_lock); } phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* * H_VIO_SIGNAL enables interrupts for future packets only. * Make sure none were queued between the end of the loop and the * enable interrupts call. */ if ((sc->rx_buf[sc->rx_dma_slot].control >> 7) == sc->rx_valid_val) goto restart; mtx_unlock(&sc->io_lock); } static void llan_send_packet(void *xsc, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { struct llan_softc *sc = xsc; uint64_t bufdescs[6]; int i; bzero(bufdescs, sizeof(bufdescs)); for (i = 0; i < nsegs; i++) { bufdescs[i] = LLAN_BUFDESC_VALID; bufdescs[i] |= (((uint64_t)segs[i].ds_len) << 32); bufdescs[i] |= segs[i].ds_addr; } phyp_hcall(H_SEND_LOGICAL_LAN, sc->unit, bufdescs[0], bufdescs[1], bufdescs[2], bufdescs[3], bufdescs[4], bufdescs[5], 0); /* * The hypercall returning implies completion -- or that the call will * not complete. In principle, we should try a few times if we get back * H_BUSY based on the continuation token in R4. For now, just drop * the packet in such cases. */ } static void llan_start_locked(struct ifnet *ifp) { struct llan_softc *sc = ifp->if_softc; bus_addr_t first; int nsegs; struct mbuf *mb_head, *m; mtx_assert(&sc->io_lock, MA_OWNED); first = 0; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, mb_head); if (mb_head == NULL) break; BPF_MTAP(ifp, mb_head); for (m = mb_head, nsegs = 0; m != NULL; m = m->m_next) nsegs++; if (nsegs > 6) { m = m_collapse(mb_head, M_NOWAIT, 6); if (m == NULL) { m_freem(mb_head); continue; } } bus_dmamap_load_mbuf(sc->tx_dma_tag, sc->tx_dma_map, mb_head, llan_send_packet, sc, 0); bus_dmamap_unload(sc->tx_dma_tag, sc->tx_dma_map); m_freem(mb_head); } } static void llan_start(struct ifnet *ifp) { struct llan_softc *sc = ifp->if_softc; mtx_lock(&sc->io_lock); llan_start_locked(ifp); mtx_unlock(&sc->io_lock); } static int llan_set_multicast(struct llan_softc *sc) { struct ifnet *ifp = sc->ifp; struct ifmultiaddr *inm; uint64_t macaddr; mtx_assert(&sc->io_lock, MA_OWNED); phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_CLEAR_MULTICAST, 0); if_maddr_rlock(ifp); TAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) { if (inm->ifma_addr->sa_family != AF_LINK) continue; memcpy((uint8_t *)&macaddr + 2, LLADDR((struct sockaddr_dl *)inm->ifma_addr), 6); phyp_hcall(H_MULTICAST_CTRL, sc->unit, LLAN_ADD_MULTICAST, macaddr); } if_maddr_runlock(ifp); return (0); } static int llan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int err = 0; struct llan_softc *sc = ifp->if_softc; switch (cmd) { case SIOCADDMULTI: case SIOCDELMULTI: mtx_lock(&sc->io_lock); if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) llan_set_multicast(sc); mtx_unlock(&sc->io_lock); break; case SIOCSIFFLAGS: default: err = ether_ioctl(ifp, cmd, data); break; } return (err); } Index: projects/powernv/powerpc/pseries/phyp_vscsi.c =================================================================== --- projects/powernv/powerpc/pseries/phyp_vscsi.c (revision 290990) +++ projects/powernv/powerpc/pseries/phyp_vscsi.c (revision 290991) @@ -1,992 +1,993 @@ /*- * Copyright 2013 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct vscsi_softc; /* VSCSI CRQ format from table 260 of PAPR spec 2.4 (page 760) */ struct vscsi_crq { uint8_t valid; uint8_t format; uint8_t reserved; uint8_t status; uint16_t timeout; uint16_t iu_length; uint64_t iu_data; }; struct vscsi_xfer { TAILQ_ENTRY(vscsi_xfer) queue; struct vscsi_softc *sc; union ccb *ccb; bus_dmamap_t dmamap; uint64_t tag; vmem_addr_t srp_iu_offset; vmem_size_t srp_iu_size; }; TAILQ_HEAD(vscsi_xferq, vscsi_xfer); struct vscsi_softc { device_t dev; struct cam_devq *devq; struct cam_sim *sim; struct cam_path *path; struct mtx io_lock; cell_t unit; int bus_initialized; int bus_logged_in; int max_transactions; int irqid; struct resource *irq; void *irq_cookie; bus_dma_tag_t crq_tag; struct vscsi_crq *crq_queue; int n_crqs, cur_crq; bus_dmamap_t crq_map; bus_addr_t crq_phys; vmem_t *srp_iu_arena; void *srp_iu_queue; bus_addr_t srp_iu_phys; bus_dma_tag_t data_tag; struct vscsi_xfer loginxp; struct vscsi_xfer *xfer; struct vscsi_xferq active_xferq; struct vscsi_xferq free_xferq; }; struct srp_login { uint8_t type; uint8_t reserved[7]; uint64_t tag; uint64_t max_cmd_length; uint32_t reserved2; uint16_t buffer_formats; uint8_t flags; uint8_t reserved3[5]; uint8_t initiator_port_id[16]; uint8_t target_port_id[16]; } __packed; struct srp_login_rsp { uint8_t type; uint8_t reserved[3]; uint32_t request_limit_delta; uint8_t tag; uint32_t max_i_to_t_len; uint32_t max_t_to_i_len; uint16_t buffer_formats; uint8_t flags; /* Some reserved bits follow */ } __packed; struct srp_cmd { uint8_t type; uint8_t flags1; uint8_t reserved[3]; uint8_t formats; uint8_t out_buffer_count; uint8_t in_buffer_count; uint64_t tag; uint32_t reserved2; uint64_t lun; uint8_t reserved3[3]; uint8_t additional_cdb; uint8_t cdb[16]; uint8_t data_payload[0]; } __packed; struct srp_rsp { uint8_t type; uint8_t reserved[3]; uint32_t request_limit_delta; uint64_t tag; uint16_t reserved2; uint8_t flags; uint8_t status; uint32_t data_out_resid; uint32_t data_in_resid; uint32_t sense_data_len; uint32_t response_data_len; uint8_t data_payload[0]; } __packed; struct srp_tsk_mgmt { uint8_t type; uint8_t reserved[7]; uint64_t tag; uint32_t reserved2; uint64_t lun; uint8_t reserved3[2]; uint8_t function; uint8_t reserved4; uint64_t manage_tag; uint64_t reserved5; } __packed; /* Message code type */ #define SRP_LOGIN_REQ 0x00 #define SRP_TSK_MGMT 0x01 #define SRP_CMD 0x02 #define SRP_I_LOGOUT 0x03 #define SRP_LOGIN_RSP 0xC0 #define SRP_RSP 0xC1 #define SRP_LOGIN_REJ 0xC2 #define SRP_T_LOGOUT 0x80 #define SRP_CRED_REQ 0x81 #define SRP_AER_REQ 0x82 #define SRP_CRED_RSP 0x41 #define SRP_AER_RSP 0x41 /* Flags for srp_rsp flags field */ #define SRP_RSPVALID 0x01 #define SRP_SNSVALID 0x02 #define SRP_DOOVER 0x04 #define SRP_DOUNDER 0x08 #define SRP_DIOVER 0x10 #define SRP_DIUNDER 0x20 #define MAD_SUCESS 0x00 #define MAD_NOT_SUPPORTED 0xf1 #define MAD_FAILED 0xf7 #define MAD_EMPTY_IU 0x01 #define MAD_ERROR_LOGGING_REQUEST 0x02 #define MAD_ADAPTER_INFO_REQUEST 0x03 #define MAD_CAPABILITIES_EXCHANGE 0x05 #define MAD_PHYS_ADAP_INFO_REQUEST 0x06 #define MAD_TAPE_PASSTHROUGH_REQUEST 0x07 #define MAD_ENABLE_FAST_FAIL 0x08 static int vscsi_probe(device_t); static int vscsi_attach(device_t); static int vscsi_detach(device_t); static void vscsi_cam_action(struct cam_sim *, union ccb *); static void vscsi_cam_poll(struct cam_sim *); static void vscsi_intr(void *arg); static void vscsi_check_response_queue(struct vscsi_softc *sc); static void vscsi_setup_bus(struct vscsi_softc *sc); static void vscsi_srp_login(struct vscsi_softc *sc); static void vscsi_crq_load_cb(void *, bus_dma_segment_t *, int, int); static void vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs, int nsegs, int err); static void vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb); static void vscsi_srp_response(struct vscsi_xfer *, struct vscsi_crq *); static devclass_t vscsi_devclass; static device_method_t vscsi_methods[] = { DEVMETHOD(device_probe, vscsi_probe), DEVMETHOD(device_attach, vscsi_attach), DEVMETHOD(device_detach, vscsi_detach), DEVMETHOD_END }; static driver_t vscsi_driver = { "vscsi", vscsi_methods, sizeof(struct vscsi_softc) }; DRIVER_MODULE(vscsi, vdevice, vscsi_driver, vscsi_devclass, 0, 0); MALLOC_DEFINE(M_VSCSI, "vscsi", "CAM device queue for VSCSI"); static int vscsi_probe(device_t dev) { if (!ofw_bus_is_compatible(dev, "IBM,v-scsi")) return (ENXIO); device_set_desc(dev, "POWER Hypervisor Virtual SCSI Bus"); return (0); } static int vscsi_attach(device_t dev) { struct vscsi_softc *sc; struct vscsi_xfer *xp; int error, i; sc = device_get_softc(dev); if (sc == NULL) return (EINVAL); sc->dev = dev; mtx_init(&sc->io_lock, "vscsi", NULL, MTX_DEF); /* Get properties */ - OF_getprop(ofw_bus_get_node(dev), "reg", &sc->unit, sizeof(sc->unit)); + OF_getencprop(ofw_bus_get_node(dev), "reg", &sc->unit, + sizeof(sc->unit)); /* Setup interrupt */ sc->irqid = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid, RF_ACTIVE); if (!sc->irq) { device_printf(dev, "Could not allocate IRQ\n"); mtx_destroy(&sc->io_lock); return (ENXIO); } bus_setup_intr(dev, sc->irq, INTR_TYPE_CAM | INTR_MPSAFE | INTR_ENTROPY, NULL, vscsi_intr, sc, &sc->irq_cookie); /* Data DMA */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 256, BUS_SPACE_MAXSIZE_32BIT, 0, busdma_lock_mutex, &sc->io_lock, &sc->data_tag); TAILQ_INIT(&sc->active_xferq); TAILQ_INIT(&sc->free_xferq); /* First XFER for login data */ sc->loginxp.sc = sc; bus_dmamap_create(sc->data_tag, 0, &sc->loginxp.dmamap); TAILQ_INSERT_TAIL(&sc->free_xferq, &sc->loginxp, queue); /* CRQ area */ error = bus_dma_tag_create(bus_get_dma_tag(dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 8*PAGE_SIZE, 1, BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->crq_tag); error = bus_dmamem_alloc(sc->crq_tag, (void **)&sc->crq_queue, BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->crq_map); sc->crq_phys = 0; sc->n_crqs = 0; error = bus_dmamap_load(sc->crq_tag, sc->crq_map, sc->crq_queue, 8*PAGE_SIZE, vscsi_crq_load_cb, sc, 0); mtx_lock(&sc->io_lock); vscsi_setup_bus(sc); sc->xfer = malloc(sizeof(sc->xfer[0])*sc->max_transactions, M_VSCSI, M_NOWAIT); for (i = 0; i < sc->max_transactions; i++) { xp = &sc->xfer[i]; xp->sc = sc; error = bus_dmamap_create(sc->data_tag, 0, &xp->dmamap); if (error) { device_printf(dev, "Could not create DMA map (%d)\n", error); break; } TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue); } mtx_unlock(&sc->io_lock); /* Allocate CAM bits */ if ((sc->devq = cam_simq_alloc(sc->max_transactions)) == NULL) return (ENOMEM); sc->sim = cam_sim_alloc(vscsi_cam_action, vscsi_cam_poll, "vscsi", sc, device_get_unit(dev), &sc->io_lock, sc->max_transactions, sc->max_transactions, sc->devq); if (sc->sim == NULL) { cam_simq_free(sc->devq); sc->devq = NULL; device_printf(dev, "CAM SIM attach failed\n"); return (EINVAL); } mtx_lock(&sc->io_lock); if (xpt_bus_register(sc->sim, dev, 0) != 0) { device_printf(dev, "XPT bus registration failed\n"); cam_sim_free(sc->sim, FALSE); sc->sim = NULL; cam_simq_free(sc->devq); sc->devq = NULL; mtx_unlock(&sc->io_lock); return (EINVAL); } mtx_unlock(&sc->io_lock); return (0); } static int vscsi_detach(device_t dev) { struct vscsi_softc *sc; sc = device_get_softc(dev); if (sc == NULL) return (EINVAL); if (sc->sim != NULL) { mtx_lock(&sc->io_lock); xpt_bus_deregister(cam_sim_path(sc->sim)); cam_sim_free(sc->sim, FALSE); sc->sim = NULL; mtx_unlock(&sc->io_lock); } if (sc->devq != NULL) { cam_simq_free(sc->devq); sc->devq = NULL; } mtx_destroy(&sc->io_lock); return (0); } static void vscsi_cam_action(struct cam_sim *sim, union ccb *ccb) { struct vscsi_softc *sc = cam_sim_softc(sim); mtx_assert(&sc->io_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE; cpi->hba_misc = PIM_EXTLUNS; cpi->target_sprt = 0; cpi->hba_eng_cnt = 0; cpi->max_target = 0; cpi->max_lun = 0; cpi->initiator_id = ~0; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, "IBM", HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; cpi->transport = XPORT_SRP; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC4; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_BUS: ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_RESET_DEV: ccb->ccb_h.status = CAM_REQ_INPROG; vscsi_task_management(sc, ccb); return; case XPT_GET_TRAN_SETTINGS: ccb->cts.protocol = PROTO_SCSI; ccb->cts.protocol_version = SCSI_REV_SPC4; ccb->cts.transport = XPORT_SRP; ccb->cts.transport_version = 0; ccb->cts.proto_specific.valid = 0; ccb->cts.xport_specific.valid = 0; ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_SET_TRAN_SETTINGS: ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; case XPT_SCSI_IO: { struct vscsi_xfer *xp; ccb->ccb_h.status = CAM_REQ_INPROG; xp = TAILQ_FIRST(&sc->free_xferq); if (xp == NULL) panic("SCSI queue flooded"); xp->ccb = ccb; TAILQ_REMOVE(&sc->free_xferq, xp, queue); TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); bus_dmamap_load_ccb(sc->data_tag, xp->dmamap, ccb, vscsi_scsi_command, xp, 0); return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } xpt_done(ccb); return; } static void vscsi_srp_login(struct vscsi_softc *sc) { struct vscsi_xfer *xp; struct srp_login *login; struct vscsi_crq crq; int err; mtx_assert(&sc->io_lock, MA_OWNED); xp = TAILQ_FIRST(&sc->free_xferq); if (xp == NULL) panic("SCSI queue flooded"); xp->ccb = NULL; TAILQ_REMOVE(&sc->free_xferq, xp, queue); TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); /* Set up command */ xp->srp_iu_size = crq.iu_length = 64; err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); if (err) panic("Error during VMEM allocation (%d)", err); login = (struct srp_login *)((uint8_t *)xp->sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset); bzero(login, xp->srp_iu_size); login->type = SRP_LOGIN_REQ; login->tag = (uint64_t)(xp); login->max_cmd_length = htobe64(256); login->buffer_formats = htobe16(0x1 | 0x2); /* Direct and indirect */ login->flags = 0; /* Create CRQ entry */ crq.valid = 0x80; crq.format = 0x01; crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset; bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0], ((uint64_t *)(&crq))[1]); if (err != 0) panic("CRQ send failure (%d)", err); } static void vscsi_task_management(struct vscsi_softc *sc, union ccb *ccb) { struct srp_tsk_mgmt *cmd; struct vscsi_xfer *xp; struct vscsi_crq crq; int err; mtx_assert(&sc->io_lock, MA_OWNED); xp = TAILQ_FIRST(&sc->free_xferq); if (xp == NULL) panic("SCSI queue flooded"); xp->ccb = ccb; TAILQ_REMOVE(&sc->free_xferq, xp, queue); TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); xp->srp_iu_size = crq.iu_length = sizeof(*cmd); err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); if (err) panic("Error during VMEM allocation (%d)", err); cmd = (struct srp_tsk_mgmt *)((uint8_t *)xp->sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset); bzero(cmd, xp->srp_iu_size); cmd->type = SRP_TSK_MGMT; cmd->tag = (uint64_t)xp; cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); switch (ccb->ccb_h.func_code) { case XPT_RESET_DEV: cmd->function = 0x08; break; default: panic("Unimplemented code %d", ccb->ccb_h.func_code); break; } bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE); /* Create CRQ entry */ crq.valid = 0x80; crq.format = 0x01; crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset; err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0], ((uint64_t *)(&crq))[1]); if (err != 0) panic("CRQ send failure (%d)", err); } static void vscsi_scsi_command(void *xxp, bus_dma_segment_t *segs, int nsegs, int err) { struct vscsi_xfer *xp = xxp; uint8_t *cdb; union ccb *ccb = xp->ccb; struct srp_cmd *cmd; uint64_t chunk_addr; uint32_t chunk_size; int desc_start, i; struct vscsi_crq crq; KASSERT(err == 0, ("DMA error %d\n", err)); mtx_assert(&xp->sc->io_lock, MA_OWNED); cdb = (ccb->ccb_h.flags & CAM_CDB_POINTER) ? ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes; /* Command format from Table 20, page 37 of SRP spec */ crq.iu_length = 48 + ((nsegs > 1) ? 20 : 16) + ((ccb->csio.cdb_len > 16) ? (ccb->csio.cdb_len - 16) : 0); xp->srp_iu_size = crq.iu_length; if (nsegs > 1) xp->srp_iu_size += nsegs*16; xp->srp_iu_size = roundup(xp->srp_iu_size, 16); err = vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); if (err) panic("Error during VMEM allocation (%d)", err); cmd = (struct srp_cmd *)((uint8_t *)xp->sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset); bzero(cmd, xp->srp_iu_size); cmd->type = SRP_CMD; if (ccb->csio.cdb_len > 16) cmd->additional_cdb = (ccb->csio.cdb_len - 16) << 2; memcpy(cmd->cdb, cdb, ccb->csio.cdb_len); cmd->tag = (uint64_t)(xp); /* Let the responder find this again */ cmd->lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); if (nsegs > 1) { /* Use indirect descriptors */ switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: cmd->formats = (2 << 4); break; case CAM_DIR_IN: cmd->formats = 2; break; default: panic("Does not support bidirectional commands (%d)", ccb->ccb_h.flags & CAM_DIR_MASK); break; } desc_start = ((ccb->csio.cdb_len > 16) ? ccb->csio.cdb_len - 16 : 0); chunk_addr = xp->sc->srp_iu_phys + xp->srp_iu_offset + 20 + desc_start + sizeof(*cmd); chunk_size = 16*nsegs; memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8); memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4); chunk_size = 0; for (i = 0; i < nsegs; i++) chunk_size += segs[i].ds_len; memcpy(&cmd->data_payload[desc_start+16], &chunk_size, 4); desc_start += 20; for (i = 0; i < nsegs; i++) { chunk_addr = segs[i].ds_addr; chunk_size = segs[i].ds_len; memcpy(&cmd->data_payload[desc_start + 16*i], &chunk_addr, 8); /* Set handle tag to 0 */ memcpy(&cmd->data_payload[desc_start + 16*i + 12], &chunk_size, 4); } } else if (nsegs == 1) { switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: cmd->formats = (1 << 4); break; case CAM_DIR_IN: cmd->formats = 1; break; default: panic("Does not support bidirectional commands (%d)", ccb->ccb_h.flags & CAM_DIR_MASK); break; } /* * Memory descriptor: * 8 byte address * 4 byte handle * 4 byte length */ chunk_addr = segs[0].ds_addr; chunk_size = segs[0].ds_len; desc_start = ((ccb->csio.cdb_len > 16) ? ccb->csio.cdb_len - 16 : 0); memcpy(&cmd->data_payload[desc_start], &chunk_addr, 8); /* Set handle tag to 0 */ memcpy(&cmd->data_payload[desc_start+12], &chunk_size, 4); KASSERT(xp->srp_iu_size >= 48 + ((ccb->csio.cdb_len > 16) ? ccb->csio.cdb_len : 16), ("SRP IU command length")); } else { cmd->formats = 0; } bus_dmamap_sync(xp->sc->crq_tag, xp->sc->crq_map, BUS_DMASYNC_PREWRITE); /* Create CRQ entry */ crq.valid = 0x80; crq.format = 0x01; crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset; err = phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0], ((uint64_t *)(&crq))[1]); if (err != 0) panic("CRQ send failure (%d)", err); } static void vscsi_crq_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int err) { struct vscsi_softc *sc = xsc; sc->crq_phys = segs[0].ds_addr; sc->n_crqs = PAGE_SIZE/sizeof(struct vscsi_crq); sc->srp_iu_queue = (uint8_t *)(sc->crq_queue); sc->srp_iu_phys = segs[0].ds_addr; sc->srp_iu_arena = vmem_create("VSCSI SRP IU", PAGE_SIZE, segs[0].ds_len - PAGE_SIZE, 16, 0, M_BESTFIT | M_NOWAIT); } static void vscsi_setup_bus(struct vscsi_softc *sc) { struct vscsi_crq crq; struct vscsi_xfer *xp; int error; struct { uint32_t type; uint16_t status; uint16_t length; uint64_t tag; uint64_t buffer; struct { char srp_version[8]; char partition_name[96]; uint32_t partition_number; uint32_t mad_version; uint32_t os_type; uint32_t port_max_txu[8]; } payload; } mad_adapter_info; bzero(&crq, sizeof(crq)); /* Init message */ crq.valid = 0xc0; crq.format = 0x01; do { error = phyp_hcall(H_FREE_CRQ, sc->unit); } while (error == H_BUSY); /* See initialization sequence page 757 */ bzero(sc->crq_queue, sc->n_crqs*sizeof(sc->crq_queue[0])); sc->cur_crq = 0; sc->bus_initialized = 0; sc->bus_logged_in = 0; bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); error = phyp_hcall(H_REG_CRQ, sc->unit, sc->crq_phys, sc->n_crqs*sizeof(sc->crq_queue[0])); KASSERT(error == 0, ("CRQ registration success")); error = phyp_hcall(H_SEND_CRQ, sc->unit, ((uint64_t *)(&crq))[0], ((uint64_t *)(&crq))[1]); if (error != 0) panic("CRQ setup failure (%d)", error); while (sc->bus_initialized == 0) vscsi_check_response_queue(sc); /* Send MAD adapter info */ mad_adapter_info.type = MAD_ADAPTER_INFO_REQUEST; mad_adapter_info.status = 0; mad_adapter_info.length = sizeof(mad_adapter_info.payload); strcpy(mad_adapter_info.payload.srp_version, "16.a"); strcpy(mad_adapter_info.payload.partition_name, "UNKNOWN"); mad_adapter_info.payload.partition_number = -1; mad_adapter_info.payload.mad_version = 1; mad_adapter_info.payload.os_type = 2; /* Claim we are Linux */ mad_adapter_info.payload.port_max_txu[0] = 0; /* If this fails, we get the defaults above */ OF_getprop(OF_finddevice("/"), "ibm,partition-name", mad_adapter_info.payload.partition_name, sizeof(mad_adapter_info.payload.partition_name)); OF_getprop(OF_finddevice("/"), "ibm,partition-no", &mad_adapter_info.payload.partition_number, sizeof(mad_adapter_info.payload.partition_number)); xp = TAILQ_FIRST(&sc->free_xferq); xp->ccb = NULL; TAILQ_REMOVE(&sc->free_xferq, xp, queue); TAILQ_INSERT_TAIL(&sc->active_xferq, xp, queue); xp->srp_iu_size = crq.iu_length = sizeof(mad_adapter_info); vmem_alloc(xp->sc->srp_iu_arena, xp->srp_iu_size, M_BESTFIT | M_NOWAIT, &xp->srp_iu_offset); mad_adapter_info.buffer = xp->sc->srp_iu_phys + xp->srp_iu_offset + 24; mad_adapter_info.tag = (uint64_t)xp; memcpy((uint8_t *)xp->sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset, &mad_adapter_info, sizeof(mad_adapter_info)); crq.valid = 0x80; crq.format = 0x02; crq.iu_data = xp->sc->srp_iu_phys + xp->srp_iu_offset; bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); phyp_hcall(H_SEND_CRQ, xp->sc->unit, ((uint64_t *)(&crq))[0], ((uint64_t *)(&crq))[1]); while (TAILQ_EMPTY(&sc->free_xferq)) vscsi_check_response_queue(sc); /* Send SRP login */ vscsi_srp_login(sc); while (sc->bus_logged_in == 0) vscsi_check_response_queue(sc); error = phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); /* Enable interrupts */ } static void vscsi_intr(void *xsc) { struct vscsi_softc *sc = xsc; mtx_lock(&sc->io_lock); vscsi_check_response_queue(sc); mtx_unlock(&sc->io_lock); } static void vscsi_srp_response(struct vscsi_xfer *xp, struct vscsi_crq *crq) { union ccb *ccb = xp->ccb; struct vscsi_softc *sc = xp->sc; struct srp_rsp *rsp; uint32_t sense_len; /* SRP response packet in original request */ rsp = (struct srp_rsp *)((uint8_t *)sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset); ccb->csio.scsi_status = rsp->status; if (ccb->csio.scsi_status == SCSI_STATUS_OK) ccb->ccb_h.status = CAM_REQ_CMP; else ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR; #ifdef NOTYET /* Collect fast fail codes */ if (crq->status != 0) ccb->ccb_h.status = CAM_REQ_CMP_ERR; #endif if (ccb->ccb_h.status != CAM_REQ_CMP) { ccb->ccb_h.status |= CAM_DEV_QFRZN; xpt_freeze_devq(ccb->ccb_h.path, /*count*/ 1); } if (!(rsp->flags & SRP_RSPVALID)) rsp->response_data_len = 0; if (!(rsp->flags & SRP_SNSVALID)) rsp->sense_data_len = 0; if (!(rsp->flags & (SRP_DOOVER | SRP_DOUNDER))) rsp->data_out_resid = 0; if (!(rsp->flags & (SRP_DIOVER | SRP_DIUNDER))) rsp->data_in_resid = 0; if (rsp->flags & SRP_SNSVALID) { bzero(&ccb->csio.sense_data, sizeof(struct scsi_sense_data)); ccb->ccb_h.status |= CAM_AUTOSNS_VALID; sense_len = min(be32toh(rsp->sense_data_len), ccb->csio.sense_len); memcpy(&ccb->csio.sense_data, &rsp->data_payload[be32toh(rsp->response_data_len)], sense_len); ccb->csio.sense_resid = ccb->csio.sense_len - be32toh(rsp->sense_data_len); } switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: ccb->csio.resid = rsp->data_out_resid; break; case CAM_DIR_IN: ccb->csio.resid = rsp->data_in_resid; break; } bus_dmamap_sync(sc->data_tag, xp->dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->data_tag, xp->dmamap); xpt_done(ccb); xp->ccb = NULL; } static void vscsi_login_response(struct vscsi_xfer *xp, struct vscsi_crq *crq) { struct vscsi_softc *sc = xp->sc; struct srp_login_rsp *rsp; /* SRP response packet in original request */ rsp = (struct srp_login_rsp *)((uint8_t *)sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset); KASSERT(be16toh(rsp->buffer_formats) & 0x3, ("Both direct and indirect " "buffers supported")); sc->max_transactions = be32toh(rsp->request_limit_delta); device_printf(sc->dev, "Queue depth %d commands\n", sc->max_transactions); sc->bus_logged_in = 1; } static void vscsi_cam_poll(struct cam_sim *sim) { struct vscsi_softc *sc = cam_sim_softc(sim); vscsi_check_response_queue(sc); } static void vscsi_check_response_queue(struct vscsi_softc *sc) { struct vscsi_crq *crq; struct vscsi_xfer *xp; int code; mtx_assert(&sc->io_lock, MA_OWNED); while (sc->crq_queue[sc->cur_crq].valid != 0) { /* The hypercalls at both ends of this are not optimal */ phyp_hcall(H_VIO_SIGNAL, sc->unit, 0); bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_POSTREAD); crq = &sc->crq_queue[sc->cur_crq]; switch (crq->valid) { case 0xc0: if (crq->format == 0x02) sc->bus_initialized = 1; break; case 0x80: /* IU data is set to tag pointer (the XP) */ xp = (struct vscsi_xfer *)crq->iu_data; switch (crq->format) { case 0x01: code = *((uint8_t *)sc->srp_iu_queue + (uintptr_t)xp->srp_iu_offset); switch (code) { case SRP_RSP: vscsi_srp_response(xp, crq); break; case SRP_LOGIN_RSP: vscsi_login_response(xp, crq); break; default: device_printf(sc->dev, "Unknown SRP " "response code %d\n", code); break; } break; case 0x02: /* Ignore management datagrams */ break; default: panic("Unknown CRQ format %d\n", crq->format); break; } vmem_free(sc->srp_iu_arena, xp->srp_iu_offset, xp->srp_iu_size); TAILQ_REMOVE(&sc->active_xferq, xp, queue); TAILQ_INSERT_TAIL(&sc->free_xferq, xp, queue); break; default: device_printf(sc->dev, "Unknown CRQ message type %d\n", crq->valid); break; } crq->valid = 0; sc->cur_crq = (sc->cur_crq + 1) % sc->n_crqs; bus_dmamap_sync(sc->crq_tag, sc->crq_map, BUS_DMASYNC_PREWRITE); phyp_hcall(H_VIO_SIGNAL, sc->unit, 1); } } Index: projects/powernv/powerpc/pseries/platform_chrp.c =================================================================== --- projects/powernv/powerpc/pseries/platform_chrp.c (revision 290990) +++ projects/powernv/powerpc/pseries/platform_chrp.c (revision 290991) @@ -1,516 +1,517 @@ /*- * Copyright (c) 2008 Marcel Moolenaar * Copyright (c) 2009 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "platform_if.h" #ifdef SMP extern void *ap_pcpu; #endif #ifdef __powerpc64__ static uint8_t splpar_vpa[MAXCPU][640] __aligned(128); /* XXX: dpcpu */ #endif static vm_offset_t realmaxaddr = VM_MAX_ADDRESS; static int chrp_probe(platform_t); static int chrp_attach(platform_t); void chrp_mem_regions(platform_t, struct mem_region *phys, int *physsz, struct mem_region *avail, int *availsz); static vm_offset_t chrp_real_maxaddr(platform_t); static u_long chrp_timebase_freq(platform_t, struct cpuref *cpuref); static int chrp_smp_first_cpu(platform_t, struct cpuref *cpuref); static int chrp_smp_next_cpu(platform_t, struct cpuref *cpuref); static int chrp_smp_get_bsp(platform_t, struct cpuref *cpuref); static void chrp_smp_ap_init(platform_t); #ifdef SMP static int chrp_smp_start_cpu(platform_t, struct pcpu *cpu); static struct cpu_group *chrp_smp_topo(platform_t plat); #endif static void chrp_reset(platform_t); #ifdef __powerpc64__ #include "phyp-hvcall.h" static void phyp_cpu_idle(sbintime_t sbt); #endif static platform_method_t chrp_methods[] = { PLATFORMMETHOD(platform_probe, chrp_probe), PLATFORMMETHOD(platform_attach, chrp_attach), PLATFORMMETHOD(platform_mem_regions, chrp_mem_regions), PLATFORMMETHOD(platform_real_maxaddr, chrp_real_maxaddr), PLATFORMMETHOD(platform_timebase_freq, chrp_timebase_freq), PLATFORMMETHOD(platform_smp_ap_init, chrp_smp_ap_init), PLATFORMMETHOD(platform_smp_first_cpu, chrp_smp_first_cpu), PLATFORMMETHOD(platform_smp_next_cpu, chrp_smp_next_cpu), PLATFORMMETHOD(platform_smp_get_bsp, chrp_smp_get_bsp), #ifdef SMP PLATFORMMETHOD(platform_smp_start_cpu, chrp_smp_start_cpu), PLATFORMMETHOD(platform_smp_topo, chrp_smp_topo), #endif PLATFORMMETHOD(platform_reset, chrp_reset), { 0, 0 } }; static platform_def_t chrp_platform = { "chrp", chrp_methods, 0 }; PLATFORM_DEF(chrp_platform); static int chrp_probe(platform_t plat) { if (OF_finddevice("/memory") != -1 || OF_finddevice("/memory@0") != -1) return (BUS_PROBE_GENERIC); return (ENXIO); } static int chrp_attach(platform_t plat) { #ifdef __powerpc64__ int i; /* XXX: check for /rtas/ibm,hypertas-functions? */ if (!(mfmsr() & PSL_HV)) { struct mem_region *phys, *avail; int nphys, navail; mem_regions(&phys, &nphys, &avail, &navail); realmaxaddr = phys[0].mr_size; pmap_mmu_install("mmu_phyp", BUS_PROBE_SPECIFIC); cpu_idle_hook = phyp_cpu_idle; /* Set up important VPA fields */ for (i = 0; i < MAXCPU; i++) { bzero(splpar_vpa[i], sizeof(splpar_vpa)); /* First two: VPA size */ splpar_vpa[i][4] = (uint8_t)((sizeof(splpar_vpa[i]) >> 8) & 0xff); splpar_vpa[i][5] = (uint8_t)(sizeof(splpar_vpa[i]) & 0xff); splpar_vpa[i][0xba] = 1; /* Maintain FPRs */ splpar_vpa[i][0xbb] = 1; /* Maintain PMCs */ splpar_vpa[i][0xfc] = 0xff; /* Maintain full SLB */ splpar_vpa[i][0xfd] = 0xff; splpar_vpa[i][0xff] = 1; /* Maintain Altivec */ } mb(); /* Set up hypervisor CPU stuff */ chrp_smp_ap_init(plat); } #endif /* Some systems (e.g. QEMU) need Open Firmware to stand down */ ofw_quiesce(); return (0); } static int parse_drconf_memory(struct mem_region *ofmem, int *msz, struct mem_region *ofavail, int *asz) { phandle_t phandle; vm_offset_t base; int i, idx, len, lasz, lmsz, res; uint32_t flags, lmb_size[2]; - uint64_t *dmem; + uint32_t *dmem; lmsz = *msz; lasz = *asz; phandle = OF_finddevice("/ibm,dynamic-reconfiguration-memory"); if (phandle == -1) /* No drconf node, return. */ return (0); - res = OF_getprop(phandle, "ibm,lmb-size", lmb_size, sizeof(lmb_size)); + res = OF_getencprop(phandle, "ibm,lmb-size", lmb_size, + sizeof(lmb_size)); if (res == -1) return (0); printf("Logical Memory Block size: %d MB\n", lmb_size[1] >> 20); /* Parse the /ibm,dynamic-memory. The first position gives the # of entries. The next two words reflect the address of the memory block. The next four words are the DRC index, reserved, list index and flags. (see PAPR C.6.6.2 ibm,dynamic-reconfiguration-memory) #el Addr DRC-idx res list-idx flags ------------------------------------------------- | 4 | 8 | 4 | 4 | 4 | 4 |.... ------------------------------------------------- */ len = OF_getproplen(phandle, "ibm,dynamic-memory"); if (len > 0) { /* We have to use a variable length array on the stack since we have very limited stack space. */ cell_t arr[len/sizeof(cell_t)]; - res = OF_getprop(phandle, "ibm,dynamic-memory", &arr, - sizeof(arr)); + res = OF_getencprop(phandle, "ibm,dynamic-memory", arr, + sizeof(arr)); if (res == -1) return (0); /* Number of elements */ idx = arr[0]; /* First address, in arr[1], arr[2]*/ - dmem = (uint64_t*)&arr[1]; + dmem = &arr[1]; for (i = 0; i < idx; i++) { - base = *dmem; - dmem += 2; - flags = *dmem; + base = ((uint64_t)dmem[0] << 32) + dmem[1]; + dmem += 4; + flags = dmem[1]; /* Use region only if available and not reserved. */ if ((flags & 0x8) && !(flags & 0x80)) { ofmem[lmsz].mr_start = base; ofmem[lmsz].mr_size = (vm_size_t)lmb_size[1]; ofavail[lasz].mr_start = base; ofavail[lasz].mr_size = (vm_size_t)lmb_size[1]; lmsz++; lasz++; } - dmem++; + dmem += 2; } } *msz = lmsz; *asz = lasz; return (1); } void chrp_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, struct mem_region *avail, int *availsz) { vm_offset_t maxphysaddr; int i; ofw_mem_regions(phys, physsz, avail, availsz); parse_drconf_memory(phys, physsz, avail, availsz); /* * On some firmwares (SLOF), some memory may be marked available that * doesn't actually exist. This manifests as an extension of the last * available segment past the end of physical memory, so truncate that * one. */ maxphysaddr = 0; for (i = 0; i < *physsz; i++) if (phys[i].mr_start + phys[i].mr_size > maxphysaddr) maxphysaddr = phys[i].mr_start + phys[i].mr_size; for (i = 0; i < *availsz; i++) if (avail[i].mr_start + avail[i].mr_size > maxphysaddr) avail[i].mr_size = maxphysaddr - avail[i].mr_start; } static vm_offset_t chrp_real_maxaddr(platform_t plat) { return (realmaxaddr); } static u_long chrp_timebase_freq(platform_t plat, struct cpuref *cpuref) { phandle_t phandle; int32_t ticks = -1; phandle = cpuref->cr_hwref; - OF_getprop(phandle, "timebase-frequency", &ticks, sizeof(ticks)); + OF_getencprop(phandle, "timebase-frequency", &ticks, sizeof(ticks)); if (ticks <= 0) panic("Unable to determine timebase frequency!"); return (ticks); } static int chrp_smp_first_cpu(platform_t plat, struct cpuref *cpuref) { char buf[8]; phandle_t cpu, dev, root; int res, cpuid; root = OF_peer(0); dev = OF_child(root); while (dev != 0) { res = OF_getprop(dev, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; dev = OF_peer(dev); } if (dev == 0) { /* * psim doesn't have a name property on the /cpus node, * but it can be found directly */ dev = OF_finddevice("/cpus"); if (dev == 0) return (ENOENT); } cpu = OF_child(dev); while (cpu != 0) { res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) break; cpu = OF_peer(cpu); } if (cpu == 0) return (ENOENT); cpuref->cr_hwref = cpu; - res = OF_getprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid, + res = OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid, sizeof(cpuid)); if (res <= 0) - res = OF_getprop(cpu, "reg", &cpuid, sizeof(cpuid)); + res = OF_getencprop(cpu, "reg", &cpuid, sizeof(cpuid)); if (res <= 0) cpuid = 0; cpuref->cr_cpuid = cpuid; return (0); } static int chrp_smp_next_cpu(platform_t plat, struct cpuref *cpuref) { char buf[8]; phandle_t cpu; int i, res, cpuid; /* Check for whether it should be the next thread */ res = OF_getproplen(cpuref->cr_hwref, "ibm,ppc-interrupt-server#s"); if (res > 0) { cell_t interrupt_servers[res/sizeof(cell_t)]; - OF_getprop(cpuref->cr_hwref, "ibm,ppc-interrupt-server#s", + OF_getencprop(cpuref->cr_hwref, "ibm,ppc-interrupt-server#s", interrupt_servers, res); for (i = 0; i < res/sizeof(cell_t) - 1; i++) { if (interrupt_servers[i] == cpuref->cr_cpuid) { cpuref->cr_cpuid = interrupt_servers[i+1]; return (0); } } } /* Next CPU core/package */ cpu = OF_peer(cpuref->cr_hwref); while (cpu != 0) { res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) break; cpu = OF_peer(cpu); } if (cpu == 0) return (ENOENT); cpuref->cr_hwref = cpu; - res = OF_getprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid, + res = OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", &cpuid, sizeof(cpuid)); if (res <= 0) - res = OF_getprop(cpu, "reg", &cpuid, sizeof(cpuid)); + res = OF_getencprop(cpu, "reg", &cpuid, sizeof(cpuid)); if (res <= 0) cpuid = 0; cpuref->cr_cpuid = cpuid; return (0); } static int chrp_smp_get_bsp(platform_t plat, struct cpuref *cpuref) { ihandle_t inst; phandle_t bsp, chosen; int res, cpuid; chosen = OF_finddevice("/chosen"); if (chosen == 0) return (ENXIO); - res = OF_getprop(chosen, "cpu", &inst, sizeof(inst)); + res = OF_getencprop(chosen, "cpu", &inst, sizeof(inst)); if (res < 0) return (ENXIO); bsp = OF_instance_to_package(inst); /* Pick the primary thread. Can it be any other? */ cpuref->cr_hwref = bsp; - res = OF_getprop(bsp, "ibm,ppc-interrupt-server#s", &cpuid, + res = OF_getencprop(bsp, "ibm,ppc-interrupt-server#s", &cpuid, sizeof(cpuid)); if (res <= 0) - res = OF_getprop(bsp, "reg", &cpuid, sizeof(cpuid)); + res = OF_getencprop(bsp, "reg", &cpuid, sizeof(cpuid)); if (res <= 0) cpuid = 0; cpuref->cr_cpuid = cpuid; return (0); } #ifdef SMP static int chrp_smp_start_cpu(platform_t plat, struct pcpu *pc) { cell_t start_cpu; int result, err, timeout; if (!rtas_exists()) { printf("RTAS uninitialized: unable to start AP %d\n", pc->pc_cpuid); return (ENXIO); } start_cpu = rtas_token_lookup("start-cpu"); if (start_cpu == -1) { printf("RTAS unknown method: unable to start AP %d\n", pc->pc_cpuid); return (ENXIO); } ap_pcpu = pc; powerpc_sync(); result = rtas_call_method(start_cpu, 3, 1, pc->pc_cpuid, EXC_RST, pc, &err); if (result < 0 || err != 0) { printf("RTAS error (%d/%d): unable to start AP %d\n", result, err, pc->pc_cpuid); return (ENXIO); } timeout = 10000; while (!pc->pc_awake && timeout--) DELAY(100); return ((pc->pc_awake) ? 0 : EBUSY); } static struct cpu_group * chrp_smp_topo(platform_t plat) { struct pcpu *pc, *last_pc; int i, ncores, ncpus; ncores = ncpus = 0; last_pc = NULL; for (i = 0; i <= mp_maxid; i++) { pc = pcpu_find(i); if (pc == NULL) continue; if (last_pc == NULL || pc->pc_hwref != last_pc->pc_hwref) ncores++; last_pc = pc; ncpus++; } if (ncpus % ncores != 0) { printf("WARNING: Irregular SMP topology. Performance may be " "suboptimal (%d CPUS, %d cores)\n", ncpus, ncores); return (smp_topo_none()); } /* Don't do anything fancier for non-threaded SMP */ if (ncpus == ncores) return (smp_topo_none()); return (smp_topo_1level(CG_SHARE_L1, ncpus / ncores, CG_FLAG_SMT)); } #endif static void chrp_reset(platform_t platform) { OF_reboot(); } #ifdef __powerpc64__ static void phyp_cpu_idle(sbintime_t sbt) { phyp_hcall(H_CEDE); } static void chrp_smp_ap_init(platform_t platform) { if (!(mfmsr() & PSL_HV)) { /* Register VPA */ phyp_hcall(H_REGISTER_VPA, 1UL, PCPU_GET(cpuid), splpar_vpa[PCPU_GET(cpuid)]); /* Set interrupt priority */ phyp_hcall(H_CPPR, 0xff); } } #else static void chrp_smp_ap_init(platform_t platform) { } #endif Index: projects/powernv/powerpc/pseries/plpar_iommu.c =================================================================== --- projects/powernv/powerpc/pseries/plpar_iommu.c (revision 290990) +++ projects/powernv/powerpc/pseries/plpar_iommu.c (revision 290991) @@ -1,244 +1,245 @@ /*- * Copyright (c) 2013, Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_PHYPIOMMU, "iommu", "IOMMU data for PAPR LPARs"); struct papr_iommu_map { uint32_t iobn; vmem_t *vmem; struct papr_iommu_map *next; }; static SLIST_HEAD(iommu_maps, iommu_map) iommu_map_head = SLIST_HEAD_INITIALIZER(iommu_map_head); static int papr_supports_stuff_tce = -1; struct iommu_map { uint32_t iobn; vmem_t *vmem; SLIST_ENTRY(iommu_map) entries; }; struct dma_window { struct iommu_map *map; bus_addr_t start; bus_addr_t end; }; int phyp_iommu_set_dma_tag(device_t bus, device_t dev, bus_dma_tag_t tag) { device_t p; phandle_t node; cell_t dma_acells, dma_scells, dmawindow[6]; struct iommu_map *i; int cell; for (p = dev; device_get_parent(p) != NULL; p = device_get_parent(p)) { if (ofw_bus_has_prop(p, "ibm,my-dma-window")) break; if (ofw_bus_has_prop(p, "ibm,dma-window")) break; } if (p == NULL) return (ENXIO); node = ofw_bus_get_node(p); - if (OF_getprop(node, "ibm,#dma-size-cells", &dma_scells, + if (OF_getencprop(node, "ibm,#dma-size-cells", &dma_scells, sizeof(cell_t)) <= 0) - OF_searchprop(node, "#size-cells", &dma_scells, sizeof(cell_t)); - if (OF_getprop(node, "ibm,#dma-address-cells", &dma_acells, + OF_searchencprop(node, "#size-cells", &dma_scells, + sizeof(cell_t)); + if (OF_getencprop(node, "ibm,#dma-address-cells", &dma_acells, sizeof(cell_t)) <= 0) - OF_searchprop(node, "#address-cells", &dma_acells, + OF_searchencprop(node, "#address-cells", &dma_acells, sizeof(cell_t)); if (ofw_bus_has_prop(p, "ibm,my-dma-window")) - OF_getprop(node, "ibm,my-dma-window", dmawindow, + OF_getencprop(node, "ibm,my-dma-window", dmawindow, sizeof(cell_t)*(dma_scells + dma_acells + 1)); else - OF_getprop(node, "ibm,dma-window", dmawindow, + OF_getencprop(node, "ibm,dma-window", dmawindow, sizeof(cell_t)*(dma_scells + dma_acells + 1)); struct dma_window *window = malloc(sizeof(struct dma_window), M_PHYPIOMMU, M_WAITOK); window->start = 0; for (cell = 1; cell < 1 + dma_acells; cell++) { window->start <<= 32; window->start |= dmawindow[cell]; } window->end = 0; for (; cell < 1 + dma_acells + dma_scells; cell++) { window->end <<= 32; window->end |= dmawindow[cell]; } window->end += window->start; if (bootverbose) device_printf(dev, "Mapping IOMMU domain %#x\n", dmawindow[0]); window->map = NULL; SLIST_FOREACH(i, &iommu_map_head, entries) { if (i->iobn == dmawindow[0]) { window->map = i; break; } } if (window->map == NULL) { window->map = malloc(sizeof(struct iommu_map), M_PHYPIOMMU, M_WAITOK); window->map->iobn = dmawindow[0]; /* * Allocate IOMMU range beginning at PAGE_SIZE. Some drivers * (em(4), for example) do not like getting mappings at 0. */ window->map->vmem = vmem_create("IOMMU mappings", PAGE_SIZE, trunc_page(VMEM_ADDR_MAX) - PAGE_SIZE, PAGE_SIZE, 0, M_BESTFIT | M_NOWAIT); SLIST_INSERT_HEAD(&iommu_map_head, window->map, entries); } /* * Check experimentally whether we can use H_STUFF_TCE. It is required * by the spec but some firmware (e.g. QEMU) does not actually support * it */ if (papr_supports_stuff_tce == -1) papr_supports_stuff_tce = !(phyp_hcall(H_STUFF_TCE, window->map->iobn, 0, 0, 0) == H_FUNCTION); bus_dma_tag_set_iommu(tag, bus, window); return (0); } int phyp_iommu_map(device_t dev, bus_dma_segment_t *segs, int *nsegs, bus_addr_t min, bus_addr_t max, bus_size_t alignment, bus_addr_t boundary, void *cookie) { struct dma_window *window = cookie; bus_addr_t minaddr, maxaddr; bus_addr_t alloced; bus_size_t allocsize; int error, i, j; uint64_t tce; minaddr = window->start; maxaddr = window->end; /* XXX: handle exclusion range in a more useful way */ if (min < maxaddr) maxaddr = min; /* XXX: consolidate segs? */ for (i = 0; i < *nsegs; i++) { allocsize = round_page(segs[i].ds_len + (segs[i].ds_addr & PAGE_MASK)); error = vmem_xalloc(window->map->vmem, allocsize, (alignment < PAGE_SIZE) ? PAGE_SIZE : alignment, 0, boundary, minaddr, maxaddr, M_BESTFIT | M_NOWAIT, &alloced); if (error != 0) { panic("VMEM failure: %d\n", error); return (error); } KASSERT(alloced % PAGE_SIZE == 0, ("Alloc not page aligned")); KASSERT((alloced + (segs[i].ds_addr & PAGE_MASK)) % alignment == 0, ("Allocated segment does not match alignment constraint")); tce = trunc_page(segs[i].ds_addr); tce |= 0x3; /* read/write */ for (j = 0; j < allocsize; j += PAGE_SIZE) { error = phyp_hcall(H_PUT_TCE, window->map->iobn, alloced + j, tce + j); if (error < 0) { panic("IOMMU mapping error: %d\n", error); return (ENOMEM); } } segs[i].ds_addr = alloced + (segs[i].ds_addr & PAGE_MASK); KASSERT(segs[i].ds_addr > 0, ("Address needs to be positive")); KASSERT(segs[i].ds_addr + segs[i].ds_len < maxaddr, ("Address not in range")); if (error < 0) { panic("IOMMU mapping error: %d\n", error); return (ENOMEM); } } return (0); } int phyp_iommu_unmap(device_t dev, bus_dma_segment_t *segs, int nsegs, void *cookie) { struct dma_window *window = cookie; bus_addr_t pageround; bus_size_t roundedsize; int i; bus_addr_t j; for (i = 0; i < nsegs; i++) { pageround = trunc_page(segs[i].ds_addr); roundedsize = round_page(segs[i].ds_len + (segs[i].ds_addr & PAGE_MASK)); if (papr_supports_stuff_tce) { phyp_hcall(H_STUFF_TCE, window->map->iobn, pageround, 0, roundedsize/PAGE_SIZE); } else { for (j = 0; j < roundedsize; j += PAGE_SIZE) phyp_hcall(H_PUT_TCE, window->map->iobn, pageround + j, 0); } vmem_xfree(window->map->vmem, pageround, roundedsize); } return (0); } Index: projects/powernv/powerpc/pseries/rtas_pci.c =================================================================== --- projects/powernv/powerpc/pseries/rtas_pci.c (revision 290990) +++ projects/powernv/powerpc/pseries/rtas_pci.c (revision 290991) @@ -1,205 +1,205 @@ /*- * Copyright (c) 2011 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" #include "iommu_if.h" /* * Device interface. */ static int rtaspci_probe(device_t); static int rtaspci_attach(device_t); /* * pcib interface. */ static u_int32_t rtaspci_read_config(device_t, u_int, u_int, u_int, u_int, int); static void rtaspci_write_config(device_t, u_int, u_int, u_int, u_int, u_int32_t, int); /* * Driver methods. */ static device_method_t rtaspci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, rtaspci_probe), DEVMETHOD(device_attach, rtaspci_attach), /* pcib interface */ DEVMETHOD(pcib_read_config, rtaspci_read_config), DEVMETHOD(pcib_write_config, rtaspci_write_config), DEVMETHOD_END }; struct rtaspci_softc { struct ofw_pci_softc pci_sc; cell_t read_pci_config, write_pci_config; cell_t ex_read_pci_config, ex_write_pci_config; int sc_extended_config; }; static devclass_t rtaspci_devclass; DEFINE_CLASS_1(pcib, rtaspci_driver, rtaspci_methods, sizeof(struct rtaspci_softc), ofw_pci_driver); DRIVER_MODULE(rtaspci, ofwbus, rtaspci_driver, rtaspci_devclass, 0, 0); static int rtaspci_probe(device_t dev) { const char *type; if (!rtas_exists()) return (ENXIO); type = ofw_bus_get_type(dev); if (OF_getproplen(ofw_bus_get_node(dev), "used-by-rtas") < 0) return (ENXIO); if (type == NULL || strcmp(type, "pci") != 0) return (ENXIO); device_set_desc(dev, "RTAS Host-PCI bridge"); return (BUS_PROBE_GENERIC); } static int rtaspci_attach(device_t dev) { struct rtaspci_softc *sc; sc = device_get_softc(dev); sc->read_pci_config = rtas_token_lookup("read-pci-config"); sc->write_pci_config = rtas_token_lookup("write-pci-config"); sc->ex_read_pci_config = rtas_token_lookup("ibm,read-pci-config"); sc->ex_write_pci_config = rtas_token_lookup("ibm,write-pci-config"); sc->sc_extended_config = 0; - OF_getprop(ofw_bus_get_node(dev), "ibm,pci-config-space-type", + OF_getencprop(ofw_bus_get_node(dev), "ibm,pci-config-space-type", &sc->sc_extended_config, sizeof(sc->sc_extended_config)); return (ofw_pci_attach(dev)); } static uint32_t rtaspci_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int width) { struct rtaspci_softc *sc; uint32_t retval = 0xffffffff; uint32_t config_addr; int error, pcierror; sc = device_get_softc(dev); config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) | ((func & 0x7) << 8) | (reg & 0xff); if (sc->sc_extended_config) config_addr |= (reg & 0xf00) << 16; if (sc->ex_read_pci_config != -1) error = rtas_call_method(sc->ex_read_pci_config, 4, 2, config_addr, sc->pci_sc.sc_pcir.phys_hi, sc->pci_sc.sc_pcir.phys_mid, width, &pcierror, &retval); else error = rtas_call_method(sc->read_pci_config, 2, 2, config_addr, width, &pcierror, &retval); /* Sign-extend output */ switch (width) { case 1: retval = (int32_t)(int8_t)(retval); break; case 2: retval = (int32_t)(int16_t)(retval); break; } if (error < 0 || pcierror != 0) retval = 0xffffffff; return (retval); } static void rtaspci_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t val, int width) { struct rtaspci_softc *sc; uint32_t config_addr; int pcierror; sc = device_get_softc(dev); config_addr = ((bus & 0xff) << 16) | ((slot & 0x1f) << 11) | ((func & 0x7) << 8) | (reg & 0xff); if (sc->sc_extended_config) config_addr |= (reg & 0xf00) << 16; if (sc->ex_write_pci_config != -1) rtas_call_method(sc->ex_write_pci_config, 5, 1, config_addr, sc->pci_sc.sc_pcir.phys_hi, sc->pci_sc.sc_pcir.phys_mid, width, val, &pcierror); else rtas_call_method(sc->write_pci_config, 3, 1, config_addr, width, val, &pcierror); } Index: projects/powernv/sparc64/include/dump.h =================================================================== --- projects/powernv/sparc64/include/dump.h (revision 290990) +++ projects/powernv/sparc64/include/dump.h (revision 290991) @@ -1,76 +1,68 @@ /*- * Copyright (c) 2014 EMC Corp. * Author: Conrad Meyer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_DUMP_H_ #define _MACHINE_DUMP_H_ #define DUMPSYS_MD_PA_NPAIRS 128 #define DUMPSYS_NUM_AUX_HDRS 0 #define KERNELDUMP_ARCH_VERSION KERNELDUMP_SPARC64_VERSION #define EM_VALUE EM_SPARCV9 -void dumpsys_pa_init(void); int dumpsys(struct dumperinfo *); static inline struct dump_pa * dumpsys_pa_next(struct dump_pa *p) { return (dumpsys_gen_pa_next(p)); } static inline void dumpsys_wbinv_all(void) { dumpsys_gen_wbinv_all(); } static inline void dumpsys_unmap_chunk(vm_paddr_t pa, size_t s, void *va) { dumpsys_gen_unmap_chunk(pa, s, va); -} - -static inline int -dumpsys_write_aux_headers(struct dumperinfo *di) -{ - - return (dumpsys_gen_write_aux_headers(di)); } static inline int minidumpsys(struct dumperinfo *di) { return (-ENOSYS); } #endif /* !_MACHINE_DUMP_H_ */ Index: projects/powernv/sparc64/sparc64/dump_machdep.c =================================================================== --- projects/powernv/sparc64/sparc64/dump_machdep.c (revision 290990) +++ projects/powernv/sparc64/sparc64/dump_machdep.c (revision 290991) @@ -1,172 +1,162 @@ /*- * Copyright (c) 2002 Marcel Moolenaar * Copyright (c) 2002 Thomas Moestl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static off_t fileofs; extern off_t dumplo; extern struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]; int do_minidump = 0; void -dumpsys_pa_init(void) -{ - int i; - - memset(dump_map, 0, sizeof(dump_map)); - for (i = 0; i < sparc64_nmemreg; i++) { - dump_map[i].pa_start = sparc64_memreg[i].mr_start; - dump_map[i].pa_size = sparc64_memreg[i].mr_size; - } -} - -void dumpsys_map_chunk(vm_paddr_t pa, size_t chunk __unused, void **va) { *va = (void *)TLB_PHYS_TO_DIRECT(pa); } static int reg_write(struct dumperinfo *di, vm_paddr_t pa, vm_size_t size) { struct sparc64_dump_reg r; r.dr_pa = pa; r.dr_size = size; r.dr_offs = fileofs; fileofs += size; return (dumpsys_buf_write(di, (char *)&r, sizeof(r))); } int dumpsys(struct dumperinfo *di) { static struct kerneldumpheader kdh; - struct sparc64_dump_hdr hdr; vm_size_t size, totsize, hdrsize; int error, i, nreg; - /* Calculate dump size. */ + /* Set up dump_map and calculate dump size. */ size = 0; nreg = sparc64_nmemreg; - for (i = 0; i < sparc64_nmemreg; i++) - size += sparc64_memreg[i].mr_size; + memset(dump_map, 0, sizeof(dump_map)); + for (i = 0; i < nreg; i++) { + dump_map[i].pa_start = sparc64_memreg[i].mr_start; + size += dump_map[i].pa_size = sparc64_memreg[i].mr_size; + } /* Account for the header size. */ hdrsize = roundup2(sizeof(hdr) + sizeof(struct sparc64_dump_reg) * nreg, DEV_BSIZE); size += hdrsize; totsize = size + 2 * sizeof(kdh); if (totsize > di->mediasize) { printf("Insufficient space on device (need %ld, have %ld), " "refusing to dump.\n", (long)totsize, (long)di->mediasize); error = ENOSPC; goto fail; } /* Determine dump offset on device. */ dumplo = di->mediaoffset + di->mediasize - totsize; mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_SPARC64_VERSION, size, di->blocksize); printf("Dumping %lu MB (%d chunks)\n", (u_long)(size >> 20), nreg); /* Dump leader */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; dumplo += sizeof(kdh); /* Dump the private header. */ hdr.dh_hdr_size = hdrsize; hdr.dh_tsb_pa = tsb_kernel_phys; hdr.dh_tsb_size = tsb_kernel_size; hdr.dh_tsb_mask = tsb_kernel_mask; hdr.dh_nregions = nreg; if (dumpsys_buf_write(di, (char *)&hdr, sizeof(hdr)) != 0) goto fail; fileofs = hdrsize; /* Now, write out the region descriptors. */ - for (i = 0; i < sparc64_nmemreg; i++) { + for (i = 0; i < nreg; i++) { error = reg_write(di, sparc64_memreg[i].mr_start, sparc64_memreg[i].mr_size); if (error != 0) goto fail; } dumpsys_buf_flush(di); /* Dump memory chunks. */ error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di); if (error < 0) goto fail; /* Dump trailer */ error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); if (error) goto fail; /* Signal completion, signoff and exit stage left. */ dump_write(di, NULL, 0, 0, 0); printf("\nDump complete\n"); return (0); fail: if (error < 0) error = -error; /* XXX It should look more like VMS :-) */ printf("** DUMP FAILED (ERROR %d) **\n", error); return (error); } Index: projects/powernv/sys/jail.h =================================================================== --- projects/powernv/sys/jail.h (revision 290990) +++ projects/powernv/sys/jail.h (revision 290991) @@ -1,414 +1,415 @@ /*- * Copyright (c) 1999 Poul-Henning Kamp. * Copyright (c) 2009 James Gritton. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_JAIL_H_ #define _SYS_JAIL_H_ #ifdef _KERNEL struct jail_v0 { u_int32_t version; char *path; char *hostname; u_int32_t ip_number; }; #endif struct jail { uint32_t version; char *path; char *hostname; char *jailname; uint32_t ip4s; uint32_t ip6s; struct in_addr *ip4; struct in6_addr *ip6; }; #define JAIL_API_VERSION 2 /* * For all xprison structs, always keep the pr_version an int and * the first variable so userspace can easily distinguish them. */ #ifndef _KERNEL struct xprison_v1 { int pr_version; int pr_id; char pr_path[MAXPATHLEN]; char pr_host[MAXHOSTNAMELEN]; u_int32_t pr_ip; }; #endif struct xprison { int pr_version; int pr_id; int pr_state; cpusetid_t pr_cpusetid; char pr_path[MAXPATHLEN]; char pr_host[MAXHOSTNAMELEN]; char pr_name[MAXHOSTNAMELEN]; uint32_t pr_ip4s; uint32_t pr_ip6s; #if 0 /* * sizeof(xprison) will be malloced + size needed for all * IPv4 and IPv6 addesses. Offsets are based numbers of addresses. */ struct in_addr pr_ip4[]; struct in6_addr pr_ip6[]; #endif }; #define XPRISON_VERSION 3 #define PRISON_STATE_INVALID 0 #define PRISON_STATE_ALIVE 1 #define PRISON_STATE_DYING 2 /* * Flags for jail_set and jail_get. */ #define JAIL_CREATE 0x01 /* Create jail if it doesn't exist */ #define JAIL_UPDATE 0x02 /* Update parameters of existing jail */ #define JAIL_ATTACH 0x04 /* Attach to jail upon creation */ #define JAIL_DYING 0x08 /* Allow getting a dying jail */ #define JAIL_SET_MASK 0x0f #define JAIL_GET_MASK 0x08 #define JAIL_SYS_DISABLE 0 #define JAIL_SYS_NEW 1 #define JAIL_SYS_INHERIT 2 #ifndef _KERNEL struct iovec; int jail(struct jail *); int jail_set(struct iovec *, unsigned int, int); int jail_get(struct iovec *, unsigned int, int); int jail_attach(int); int jail_remove(int); #else /* _KERNEL */ #include #include #include #include #include #define JAIL_MAX 999999 #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PRISON); #endif #endif /* _KERNEL */ #if defined(_KERNEL) || defined(_WANT_PRISON) #include #define HOSTUUIDLEN 64 #define OSRELEASELEN 32 struct racct; struct prison_racct; /* * This structure describes a prison. It is pointed to by all struct * ucreds's of the inmates. pr_ref keeps track of them and is used to * delete the struture when the last inmate is dead. * * Lock key: * (a) allprison_lock * (p) locked by pr_mtx * (c) set only during creation before the structure is shared, no mutex * required to read * (d) set only during destruction of jail, no mutex needed */ struct prison { TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */ int pr_id; /* (c) prison id */ int pr_ref; /* (p) refcount */ int pr_uref; /* (p) user (alive) refcount */ unsigned pr_flags; /* (p) PR_* flags */ LIST_HEAD(, prison) pr_children; /* (a) list of child jails */ LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */ struct prison *pr_parent; /* (c) containing jail */ struct mtx pr_mtx; struct task pr_task; /* (d) destroy task */ struct osd pr_osd; /* (p) additional data */ struct cpuset *pr_cpuset; /* (p) cpuset */ struct vnet *pr_vnet; /* (c) network stack */ struct vnode *pr_root; /* (c) vnode to rdir */ int pr_ip4s; /* (p) number of v4 IPs */ int pr_ip6s; /* (p) number of v6 IPs */ struct in_addr *pr_ip4; /* (p) v4 IPs of jail */ struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */ struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */ void *pr_sparep[3]; int pr_childcount; /* (a) number of child jails */ int pr_childmax; /* (p) maximum child jails */ unsigned pr_allow; /* (p) PR_ALLOW_* flags */ int pr_securelevel; /* (p) securelevel */ int pr_enforce_statfs; /* (p) statfs permission */ int pr_devfs_rsnum; /* (p) devfs ruleset */ int pr_spare[3]; int pr_osreldate; /* (c) kern.osreldate value */ unsigned long pr_hostid; /* (p) jail hostid */ char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */ char pr_path[MAXPATHLEN]; /* (c) chroot path */ char pr_hostname[MAXHOSTNAMELEN]; /* (p) jail hostname */ char pr_domainname[MAXHOSTNAMELEN]; /* (p) jail domainname */ char pr_hostuuid[HOSTUUIDLEN]; /* (p) jail hostuuid */ char pr_osrelease[OSRELEASELEN]; /* (c) kern.osrelease value */ }; struct prison_racct { LIST_ENTRY(prison_racct) prr_next; char prr_name[MAXHOSTNAMELEN]; u_int prr_refcount; struct racct *prr_racct; }; #endif /* _KERNEL || _WANT_PRISON */ #ifdef _KERNEL /* Flag bits set via options */ #define PR_PERSIST 0x00000001 /* Can exist without processes */ #define PR_HOST 0x00000002 /* Virtualize hostname et al */ #define PR_IP4_USER 0x00000004 /* Restrict IPv4 addresses */ #define PR_IP6_USER 0x00000008 /* Restrict IPv6 addresses */ #define PR_VNET 0x00000010 /* Virtual network stack */ #define PR_IP4_SADDRSEL 0x00000080 /* Do IPv4 src addr sel. or use the */ /* primary jail address. */ #define PR_IP6_SADDRSEL 0x00000100 /* Do IPv6 src addr sel. or use the */ /* primary jail address. */ /* Internal flag bits */ #define PR_REMOVE 0x01000000 /* In process of being removed */ #define PR_IP4 0x02000000 /* IPv4 restricted or disabled */ /* by this jail or an ancestor */ #define PR_IP6 0x04000000 /* IPv6 restricted or disabled */ /* by this jail or an ancestor */ /* Flags for pr_allow */ #define PR_ALLOW_SET_HOSTNAME 0x0001 #define PR_ALLOW_SYSVIPC 0x0002 #define PR_ALLOW_RAW_SOCKETS 0x0004 #define PR_ALLOW_CHFLAGS 0x0008 #define PR_ALLOW_MOUNT 0x0010 #define PR_ALLOW_QUOTAS 0x0020 #define PR_ALLOW_SOCKET_AF 0x0040 #define PR_ALLOW_MOUNT_DEVFS 0x0080 #define PR_ALLOW_MOUNT_NULLFS 0x0100 #define PR_ALLOW_MOUNT_ZFS 0x0200 #define PR_ALLOW_MOUNT_PROCFS 0x0400 #define PR_ALLOW_MOUNT_TMPFS 0x0800 #define PR_ALLOW_MOUNT_FDESCFS 0x1000 #define PR_ALLOW_MOUNT_LINPROCFS 0x2000 #define PR_ALLOW_MOUNT_LINSYSFS 0x4000 #define PR_ALLOW_ALL 0x7fff /* * OSD methods */ #define PR_METHOD_CREATE 0 #define PR_METHOD_GET 1 #define PR_METHOD_SET 2 #define PR_METHOD_CHECK 3 #define PR_METHOD_ATTACH 4 #define PR_MAXMETHOD 5 /* * Lock/unlock a prison. * XXX These exist not so much for general convenience, but to be useable in * the FOREACH_PRISON_DESCENDANT_LOCKED macro which can't handle them in * non-function form as currently defined. */ static __inline void prison_lock(struct prison *pr) { mtx_lock(&pr->pr_mtx); } static __inline void prison_unlock(struct prison *pr) { mtx_unlock(&pr->pr_mtx); } /* Traverse a prison's immediate children. */ #define FOREACH_PRISON_CHILD(ppr, cpr) \ LIST_FOREACH(cpr, &(ppr)->pr_children, pr_sibling) /* * Preorder traversal of all of a prison's descendants. * This ugly loop allows the macro to be followed by a single block * as expected in a looping primitive. */ #define FOREACH_PRISON_DESCENDANT(ppr, cpr, descend) \ for ((cpr) = (ppr), (descend) = 1; \ ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \ ? LIST_FIRST(&(cpr)->pr_children) \ : ((cpr) == (ppr) \ ? NULL \ : (((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \ ? LIST_NEXT(cpr, pr_sibling) \ : (cpr)->pr_parent))));) \ if (!(descend)) \ ; \ else /* * As above, but lock descendants on the way down and unlock on the way up. */ #define FOREACH_PRISON_DESCENDANT_LOCKED(ppr, cpr, descend) \ for ((cpr) = (ppr), (descend) = 1; \ ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \ ? LIST_FIRST(&(cpr)->pr_children) \ : ((cpr) == (ppr) \ ? NULL \ : ((prison_unlock(cpr), \ (descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \ ? LIST_NEXT(cpr, pr_sibling) \ : (cpr)->pr_parent))));) \ if ((descend) ? (prison_lock(cpr), 0) : 1) \ ; \ else /* * As above, but also keep track of the level descended to. */ #define FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(ppr, cpr, descend, level)\ for ((cpr) = (ppr), (descend) = 1, (level) = 0; \ ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \ ? (level++, LIST_FIRST(&(cpr)->pr_children)) \ : ((cpr) == (ppr) \ ? NULL \ : ((prison_unlock(cpr), \ (descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \ ? LIST_NEXT(cpr, pr_sibling) \ : (level--, (cpr)->pr_parent)))));) \ if ((descend) ? (prison_lock(cpr), 0) : 1) \ ; \ else /* * Attributes of the physical system, and the root of the jail tree. */ extern struct prison prison0; TAILQ_HEAD(prisonlist, prison); extern struct prisonlist allprison; extern struct sx allprison_lock; /* * Sysctls to describe jail parameters. */ SYSCTL_DECL(_security_jail_param); #define SYSCTL_JAIL_PARAM(module, param, type, fmt, descr) \ SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param, \ (type) | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_param, fmt, descr) #define SYSCTL_JAIL_PARAM_STRING(module, param, access, len, descr) \ SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param, \ CTLTYPE_STRING | CTLFLAG_MPSAFE | (access), NULL, len, \ sysctl_jail_param, "A", descr) #define SYSCTL_JAIL_PARAM_STRUCT(module, param, access, len, fmt, descr)\ SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param, \ CTLTYPE_STRUCT | CTLFLAG_MPSAFE | (access), NULL, len, \ sysctl_jail_param, fmt, descr) #define SYSCTL_JAIL_PARAM_NODE(module, descr) \ SYSCTL_NODE(_security_jail_param, OID_AUTO, module, 0, 0, descr) #define SYSCTL_JAIL_PARAM_SUBNODE(parent, module, descr) \ SYSCTL_NODE(_security_jail_param_##parent, OID_AUTO, module, 0, 0, descr) #define SYSCTL_JAIL_PARAM_SYS_NODE(module, access, descr) \ SYSCTL_JAIL_PARAM_NODE(module, descr); \ SYSCTL_JAIL_PARAM(_##module, , CTLTYPE_INT | (access), "E,jailsys", \ descr) /* * Kernel support functions for jail(). */ struct ucred; struct mount; struct sockaddr; struct statfs; int jailed(struct ucred *cred); int jailed_without_vnet(struct ucred *); void getcredhostname(struct ucred *, char *, size_t); void getcreddomainname(struct ucred *, char *, size_t); void getcredhostuuid(struct ucred *, char *, size_t); void getcredhostid(struct ucred *, unsigned long *); void prison0_init(void); int prison_allow(struct ucred *, unsigned); int prison_check(struct ucred *cred1, struct ucred *cred2); int prison_owns_vnet(struct ucred *); int prison_canseemount(struct ucred *cred, struct mount *mp); void prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp); struct prison *prison_find(int prid); struct prison *prison_find_child(struct prison *, int); struct prison *prison_find_name(struct prison *, const char *); int prison_flag(struct ucred *, unsigned); void prison_free(struct prison *pr); void prison_free_locked(struct prison *pr); void prison_hold(struct prison *pr); void prison_hold_locked(struct prison *pr); void prison_proc_hold(struct prison *); void prison_proc_free(struct prison *); int prison_ischild(struct prison *, struct prison *); int prison_equal_ip4(struct prison *, struct prison *); int prison_get_ip4(struct ucred *cred, struct in_addr *ia); int prison_local_ip4(struct ucred *cred, struct in_addr *ia); int prison_remote_ip4(struct ucred *cred, struct in_addr *ia); int prison_check_ip4(const struct ucred *, const struct in_addr *); int prison_saddrsel_ip4(struct ucred *, struct in_addr *); #ifdef INET6 int prison_equal_ip6(struct prison *, struct prison *); int prison_get_ip6(struct ucred *, struct in6_addr *); int prison_local_ip6(struct ucred *, struct in6_addr *, int); int prison_remote_ip6(struct ucred *, struct in6_addr *); int prison_check_ip6(struct ucred *, struct in6_addr *); int prison_saddrsel_ip6(struct ucred *, struct in6_addr *); #endif int prison_check_af(struct ucred *cred, int af); int prison_if(struct ucred *cred, struct sockaddr *sa); char *prison_name(struct prison *, struct prison *); int prison_priv_check(struct ucred *cred, int priv); int sysctl_jail_param(SYSCTL_HANDLER_ARGS); void prison_racct_foreach(void (*callback)(struct racct *racct, - void *arg2, void *arg3), void *arg2, void *arg3); + void *arg2, void *arg3), void (*pre)(void), void (*post)(void), + void *arg2, void *arg3); struct prison_racct *prison_racct_find(const char *name); void prison_racct_hold(struct prison_racct *prr); void prison_racct_free(struct prison_racct *prr); #endif /* _KERNEL */ #endif /* !_SYS_JAIL_H_ */ Index: projects/powernv/sys/loginclass.h =================================================================== --- projects/powernv/sys/loginclass.h (revision 290990) +++ projects/powernv/sys/loginclass.h (revision 290991) @@ -1,53 +1,54 @@ /*- * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_LOGINCLASS_H_ #define _SYS_LOGINCLASS_H_ struct racct; /* * Exactly one of these structures exists per login class. */ struct loginclass { LIST_ENTRY(loginclass) lc_next; char lc_name[MAXLOGNAME]; u_int lc_refcount; struct racct *lc_racct; }; void loginclass_hold(struct loginclass *lc); void loginclass_free(struct loginclass *lc); struct loginclass *loginclass_find(const char *name); void loginclass_racct_foreach(void (*callback)(struct racct *racct, - void *arg2, void *arg3), void *arg2, void *arg3); + void *arg2, void *arg3), void (*pre)(void), void (*post)(void), + void *arg2, void *arg3); #endif /* !_SYS_LOGINCLASS_H_ */ Index: projects/powernv/sys/pmc.h =================================================================== --- projects/powernv/sys/pmc.h (revision 290990) +++ projects/powernv/sys/pmc.h (revision 290991) @@ -1,1158 +1,1159 @@ /*- * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_PMC_H_ #define _SYS_PMC_H_ #include #include #include #define PMC_MODULE_NAME "hwpmc" #define PMC_NAME_MAX 64 /* HW counter name size */ #define PMC_CLASS_MAX 8 /* max #classes of PMCs per-system */ /* * Kernel<->userland API version number [MMmmpppp] * * Major numbers are to be incremented when an incompatible change to * the ABI occurs that older clients will not be able to handle. * * Minor numbers are incremented when a backwards compatible change * occurs that allows older correct programs to run unchanged. For * example, when support for a new PMC type is added. * * The patch version is incremented for every bug fix. */ #define PMC_VERSION_MAJOR 0x03 #define PMC_VERSION_MINOR 0x01 #define PMC_VERSION_PATCH 0x0000 #define PMC_VERSION (PMC_VERSION_MAJOR << 24 | \ PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH) /* * Kinds of CPUs known. * * We keep track of CPU variants that need to be distinguished in * some way for PMC operations. CPU names are grouped by manufacturer * and numbered sparsely in order to minimize changes to the ABI involved * when new CPUs are added. */ #define __PMC_CPUS() \ __PMC_CPU(AMD_K7, 0x00, "AMD K7") \ __PMC_CPU(AMD_K8, 0x01, "AMD K8") \ __PMC_CPU(INTEL_P5, 0x80, "Intel Pentium") \ __PMC_CPU(INTEL_P6, 0x81, "Intel Pentium Pro") \ __PMC_CPU(INTEL_CL, 0x82, "Intel Celeron") \ __PMC_CPU(INTEL_PII, 0x83, "Intel Pentium II") \ __PMC_CPU(INTEL_PIII, 0x84, "Intel Pentium III") \ __PMC_CPU(INTEL_PM, 0x85, "Intel Pentium M") \ __PMC_CPU(INTEL_PIV, 0x86, "Intel Pentium IV") \ __PMC_CPU(INTEL_CORE, 0x87, "Intel Core Solo/Duo") \ __PMC_CPU(INTEL_CORE2, 0x88, "Intel Core2") \ __PMC_CPU(INTEL_CORE2EXTREME, 0x89, "Intel Core2 Extreme") \ __PMC_CPU(INTEL_ATOM, 0x8A, "Intel Atom") \ __PMC_CPU(INTEL_COREI7, 0x8B, "Intel Core i7") \ __PMC_CPU(INTEL_WESTMERE, 0x8C, "Intel Westmere") \ __PMC_CPU(INTEL_SANDYBRIDGE, 0x8D, "Intel Sandy Bridge") \ __PMC_CPU(INTEL_IVYBRIDGE, 0x8E, "Intel Ivy Bridge") \ __PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F, "Intel Sandy Bridge Xeon") \ __PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90, "Intel Ivy Bridge Xeon") \ __PMC_CPU(INTEL_HASWELL, 0x91, "Intel Haswell") \ __PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92, "Intel Atom Silvermont") \ __PMC_CPU(INTEL_NEHALEM_EX, 0x93, "Intel Nehalem Xeon 7500") \ __PMC_CPU(INTEL_WESTMERE_EX, 0x94, "Intel Westmere Xeon E7") \ __PMC_CPU(INTEL_HASWELL_XEON, 0x95, "Intel Haswell Xeon E5 v3") \ __PMC_CPU(INTEL_BROADWELL, 0x96, "Intel Broadwell") \ __PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \ __PMC_CPU(MIPS_24K, 0x200, "MIPS 24K") \ __PMC_CPU(MIPS_OCTEON, 0x201, "Cavium Octeon") \ __PMC_CPU(MIPS_74K, 0x202, "MIPS 74K") \ __PMC_CPU(PPC_7450, 0x300, "PowerPC MPC7450") \ __PMC_CPU(PPC_E500, 0x340, "PowerPC e500 Core") \ __PMC_CPU(PPC_MPC85XX, 0x340, "Freescale PowerPC MPC85XX") \ __PMC_CPU(PPC_970, 0x380, "IBM PowerPC 970") \ __PMC_CPU(GENERIC, 0x400, "Generic") \ __PMC_CPU(ARMV7_CORTEX_A5, 0x500, "ARMv7 Cortex A5") \ __PMC_CPU(ARMV7_CORTEX_A7, 0x501, "ARMv7 Cortex A7") \ __PMC_CPU(ARMV7_CORTEX_A8, 0x502, "ARMv7 Cortex A8") \ __PMC_CPU(ARMV7_CORTEX_A9, 0x503, "ARMv7 Cortex A9") \ __PMC_CPU(ARMV7_CORTEX_A15, 0x504, "ARMv7 Cortex A15") \ __PMC_CPU(ARMV7_CORTEX_A17, 0x505, "ARMv7 Cortex A17") \ __PMC_CPU(ARMV8_CORTEX_A53, 0x600, "ARMv8 Cortex A53") \ __PMC_CPU(ARMV8_CORTEX_A57, 0x601, "ARMv8 Cortex A57") enum pmc_cputype { #undef __PMC_CPU #define __PMC_CPU(S,V,D) PMC_CPU_##S = V, __PMC_CPUS() }; #define PMC_CPU_FIRST PMC_CPU_AMD_K7 #define PMC_CPU_LAST PMC_CPU_GENERIC /* * Classes of PMCs */ #define __PMC_CLASSES() \ __PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \ __PMC_CLASS(K7, 0x01, "AMD K7 performance counters") \ __PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \ __PMC_CLASS(P5, 0x03, "Intel Pentium counters") \ __PMC_CLASS(P6, 0x04, "Intel Pentium Pro counters") \ __PMC_CLASS(P4, 0x05, "Intel Pentium-IV counters") \ __PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \ __PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \ __PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \ __PMC_CLASS(UCP, 0x09, "Intel Uncore programmable") \ __PMC_CLASS(XSCALE, 0x0A, "Intel XScale counters") \ __PMC_CLASS(MIPS24K, 0x0B, "MIPS 24K") \ __PMC_CLASS(OCTEON, 0x0C, "Cavium Octeon") \ __PMC_CLASS(PPC7450, 0x0D, "Motorola MPC7450 class") \ __PMC_CLASS(PPC970, 0x0E, "IBM PowerPC 970 class") \ __PMC_CLASS(SOFT, 0x0F, "Software events") \ __PMC_CLASS(ARMV7, 0x10, "ARMv7") \ __PMC_CLASS(ARMV8, 0x11, "ARMv8") \ __PMC_CLASS(MIPS74K, 0x12, "MIPS 74K") \ __PMC_CLASS(E500, 0x13, "Freescale e500 class") enum pmc_class { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) PMC_CLASS_##S = V, __PMC_CLASSES() }; #define PMC_CLASS_FIRST PMC_CLASS_TSC #define PMC_CLASS_LAST PMC_CLASS_E500 /* * A PMC can be in the following states: * * Hardware states: * DISABLED -- administratively prohibited from being used. * FREE -- HW available for use * Software states: * ALLOCATED -- allocated * STOPPED -- allocated, but not counting events * RUNNING -- allocated, and in operation; 'pm_runcount' * holds the number of CPUs using this PMC at * a given instant * DELETED -- being destroyed */ #define __PMC_HWSTATES() \ __PMC_STATE(DISABLED) \ __PMC_STATE(FREE) #define __PMC_SWSTATES() \ __PMC_STATE(ALLOCATED) \ __PMC_STATE(STOPPED) \ __PMC_STATE(RUNNING) \ __PMC_STATE(DELETED) #define __PMC_STATES() \ __PMC_HWSTATES() \ __PMC_SWSTATES() enum pmc_state { #undef __PMC_STATE #define __PMC_STATE(S) PMC_STATE_##S, __PMC_STATES() __PMC_STATE(MAX) }; #define PMC_STATE_FIRST PMC_STATE_DISABLED #define PMC_STATE_LAST PMC_STATE_DELETED /* * An allocated PMC may used as a 'global' counter or as a * 'thread-private' one. Each such mode of use can be in either * statistical sampling mode or in counting mode. Thus a PMC in use * * SS i.e., SYSTEM STATISTICAL -- system-wide statistical profiling * SC i.e., SYSTEM COUNTER -- system-wide counting mode * TS i.e., THREAD STATISTICAL -- thread virtual, statistical profiling * TC i.e., THREAD COUNTER -- thread virtual, counting mode * * Statistical profiling modes rely on the PMC periodically delivering * a interrupt to the CPU (when the configured number of events have * been measured), so the PMC must have the ability to generate * interrupts. * * In counting modes, the PMC counts its configured events, with the * value of the PMC being read whenever needed by its owner process. * * The thread specific modes "virtualize" the PMCs -- the PMCs appear * to be thread private and count events only when the profiled thread * actually executes on the CPU. * * The system-wide "global" modes keep the PMCs running all the time * and are used to measure the behaviour of the whole system. */ #define __PMC_MODES() \ __PMC_MODE(SS, 0) \ __PMC_MODE(SC, 1) \ __PMC_MODE(TS, 2) \ __PMC_MODE(TC, 3) enum pmc_mode { #undef __PMC_MODE #define __PMC_MODE(M,N) PMC_MODE_##M = N, __PMC_MODES() }; #define PMC_MODE_FIRST PMC_MODE_SS #define PMC_MODE_LAST PMC_MODE_TC #define PMC_IS_COUNTING_MODE(mode) \ ((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC) #define PMC_IS_SYSTEM_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC) #define PMC_IS_SAMPLING_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS) #define PMC_IS_VIRTUAL_MODE(mode) \ ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC) /* * PMC row disposition */ #define __PMC_DISPOSITIONS(N) \ __PMC_DISP(STANDALONE) /* global/disabled counters */ \ __PMC_DISP(FREE) /* free/available */ \ __PMC_DISP(THREAD) /* thread-virtual PMCs */ \ __PMC_DISP(UNKNOWN) /* sentinel */ enum pmc_disp { #undef __PMC_DISP #define __PMC_DISP(D) PMC_DISP_##D , __PMC_DISPOSITIONS() }; #define PMC_DISP_FIRST PMC_DISP_STANDALONE #define PMC_DISP_LAST PMC_DISP_THREAD /* * Counter capabilities * * __PMC_CAPS(NAME, VALUE, DESCRIPTION) */ #define __PMC_CAPS() \ __PMC_CAP(INTERRUPT, 0, "generate interrupts") \ __PMC_CAP(USER, 1, "count user-mode events") \ __PMC_CAP(SYSTEM, 2, "count system-mode events") \ __PMC_CAP(EDGE, 3, "do edge detection of events") \ __PMC_CAP(THRESHOLD, 4, "ignore events below a threshold") \ __PMC_CAP(READ, 5, "read PMC counter") \ __PMC_CAP(WRITE, 6, "reprogram PMC counter") \ __PMC_CAP(INVERT, 7, "invert comparision sense") \ __PMC_CAP(QUALIFIER, 8, "further qualify monitored events") \ __PMC_CAP(PRECISE, 9, "perform precise sampling") \ __PMC_CAP(TAGGING, 10, "tag upstream events") \ __PMC_CAP(CASCADE, 11, "cascade counters") enum pmc_caps { #undef __PMC_CAP #define __PMC_CAP(NAME, VALUE, DESCR) PMC_CAP_##NAME = (1 << VALUE) , __PMC_CAPS() }; #define PMC_CAP_FIRST PMC_CAP_INTERRUPT #define PMC_CAP_LAST PMC_CAP_CASCADE /* * PMC Event Numbers * * These are generated from the definitions in "dev/hwpmc/pmc_events.h". */ enum pmc_event { #undef __PMC_EV #undef __PMC_EV_BLOCK #define __PMC_EV_BLOCK(C,V) PMC_EV_ ## C ## __BLOCK_START = (V) - 1 , #define __PMC_EV(C,N) PMC_EV_ ## C ## _ ## N , __PMC_EVENTS() }; /* * PMC SYSCALL INTERFACE */ /* * "PMC_OPS" -- these are the commands recognized by the kernel * module, and are used when performing a system call from userland. */ #define __PMC_OPS() \ __PMC_OP(CONFIGURELOG, "Set log file") \ __PMC_OP(FLUSHLOG, "Flush log file") \ __PMC_OP(GETCPUINFO, "Get system CPU information") \ __PMC_OP(GETDRIVERSTATS, "Get driver statistics") \ __PMC_OP(GETMODULEVERSION, "Get module version") \ __PMC_OP(GETPMCINFO, "Get per-cpu PMC information") \ __PMC_OP(PMCADMIN, "Set PMC state") \ __PMC_OP(PMCALLOCATE, "Allocate and configure a PMC") \ __PMC_OP(PMCATTACH, "Attach a PMC to a process") \ __PMC_OP(PMCDETACH, "Detach a PMC from a process") \ __PMC_OP(PMCGETMSR, "Get a PMC's hardware address") \ __PMC_OP(PMCRELEASE, "Release a PMC") \ __PMC_OP(PMCRW, "Read/Set a PMC") \ __PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate") \ __PMC_OP(PMCSTART, "Start a PMC") \ __PMC_OP(PMCSTOP, "Stop a PMC") \ __PMC_OP(WRITELOG, "Write a cookie to the log file") \ __PMC_OP(CLOSELOG, "Close log file") \ __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") enum pmc_ops { #undef __PMC_OP #define __PMC_OP(N, D) PMC_OP_##N, __PMC_OPS() }; /* * Flags used in operations on PMCs. */ #define PMC_F_FORCE 0x00000001 /*OP ADMIN force operation */ #define PMC_F_DESCENDANTS 0x00000002 /*OP ALLOCATE track descendants */ #define PMC_F_LOG_PROCCSW 0x00000004 /*OP ALLOCATE track ctx switches */ #define PMC_F_LOG_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */ #define PMC_F_NEWVALUE 0x00000010 /*OP RW write new value */ #define PMC_F_OLDVALUE 0x00000020 /*OP RW get old value */ #define PMC_F_KGMON 0x00000040 /*OP ALLOCATE kgmon(8) profiling */ /* V2 API */ #define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */ /* internal flags */ #define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/ #define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */ #define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */ #define PMC_CALLCHAIN_DEPTH_MAX 128 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ /* * Cookies used to denote allocated PMCs, and the values of PMCs. */ typedef uint32_t pmc_id_t; typedef uint64_t pmc_value_t; #define PMC_ID_INVALID (~ (pmc_id_t) 0) /* * PMC IDs have the following format: * * +--------+----------+-----------+-----------+ * | CPU | PMC MODE | PMC CLASS | ROW INDEX | * +--------+----------+-----------+-----------+ * * where each field is 8 bits wide. Field 'CPU' is set to the * requested CPU for system-wide PMCs or PMC_CPU_ANY for process-mode * PMCs. Field 'PMC MODE' is the allocated PMC mode. Field 'PMC * CLASS' is the class of the PMC. Field 'ROW INDEX' is the row index * for the PMC. * * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total * number of hardware PMCs on this cpu. */ #define PMC_ID_TO_ROWINDEX(ID) ((ID) & 0xFF) #define PMC_ID_TO_CLASS(ID) (((ID) & 0xFF00) >> 8) #define PMC_ID_TO_MODE(ID) (((ID) & 0xFF0000) >> 16) #define PMC_ID_TO_CPU(ID) (((ID) & 0xFF000000) >> 24) #define PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX) \ ((((CPU) & 0xFF) << 24) | (((MODE) & 0xFF) << 16) | \ (((CLASS) & 0xFF) << 8) | ((ROWINDEX) & 0xFF)) /* * Data structures for system calls supported by the pmc driver. */ /* * OP PMCALLOCATE * * Allocate a PMC on the named CPU. */ #define PMC_CPU_ANY ~0 struct pmc_op_pmcallocate { uint32_t pm_caps; /* PMC_CAP_* */ uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */ enum pmc_class pm_class; /* class of PMC desired */ enum pmc_event pm_ev; /* [enum pmc_event] desired */ uint32_t pm_flags; /* additional modifiers PMC_F_* */ enum pmc_mode pm_mode; /* desired mode */ pmc_id_t pm_pmcid; /* [return] process pmc id */ union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */ }; /* * OP PMCADMIN * * Set the administrative state (i.e., whether enabled or disabled) of * a PMC 'pm_pmc' on CPU 'pm_cpu'. Note that 'pm_pmc' specifies an * absolute PMC number and need not have been first allocated by the * calling process. */ struct pmc_op_pmcadmin { int pm_cpu; /* CPU# */ uint32_t pm_flags; /* flags */ int pm_pmc; /* PMC# */ enum pmc_state pm_state; /* desired state */ }; /* * OP PMCATTACH / OP PMCDETACH * * Attach/detach a PMC and a process. */ struct pmc_op_pmcattach { pmc_id_t pm_pmc; /* PMC to attach to */ pid_t pm_pid; /* target process */ }; /* * OP PMCSETCOUNT * * Set the sampling rate (i.e., the reload count) for statistical counters. * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcsetcount { pmc_value_t pm_count; /* initial/sample count */ pmc_id_t pm_pmcid; /* PMC id to set */ }; /* * OP PMCRW * * Read the value of a PMC named by 'pm_pmcid'. 'pm_pmcid' needs * to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcrw { uint32_t pm_flags; /* PMC_F_{OLD,NEW}VALUE*/ pmc_id_t pm_pmcid; /* pmc id */ pmc_value_t pm_value; /* new&returned value */ }; /* * OP GETPMCINFO * * retrieve PMC state for a named CPU. The caller is expected to * allocate 'npmc' * 'struct pmc_info' bytes of space for the return * values. */ struct pmc_info { char pm_name[PMC_NAME_MAX]; /* pmc name */ enum pmc_class pm_class; /* enum pmc_class */ int pm_enabled; /* whether enabled */ enum pmc_disp pm_rowdisp; /* FREE, THREAD or STANDLONE */ pid_t pm_ownerpid; /* owner, or -1 */ enum pmc_mode pm_mode; /* current mode [enum pmc_mode] */ enum pmc_event pm_event; /* current event */ uint32_t pm_flags; /* current flags */ pmc_value_t pm_reloadcount; /* sampling counters only */ }; struct pmc_op_getpmcinfo { int32_t pm_cpu; /* 0 <= cpu < mp_maxid */ struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */ }; /* * OP GETCPUINFO * * Retrieve system CPU information. */ struct pmc_classinfo { enum pmc_class pm_class; /* class id */ uint32_t pm_caps; /* counter capabilities */ uint32_t pm_width; /* width of the PMC */ uint32_t pm_num; /* number of PMCs in class */ }; struct pmc_op_getcpuinfo { enum pmc_cputype pm_cputype; /* what kind of CPU */ uint32_t pm_ncpu; /* max CPU number */ uint32_t pm_npmc; /* #PMCs per CPU */ uint32_t pm_nclass; /* #classes of PMCs */ struct pmc_classinfo pm_classes[PMC_CLASS_MAX]; }; /* * OP CONFIGURELOG * * Configure a log file for writing system-wide statistics to. */ struct pmc_op_configurelog { int pm_flags; int pm_logfd; /* logfile fd (or -1) */ }; /* * OP GETDRIVERSTATS * * Retrieve pmc(4) driver-wide statistics. */ struct pmc_op_getdriverstats { - int pm_intr_ignored; /* #interrupts ignored */ - int pm_intr_processed; /* #interrupts processed */ - int pm_intr_bufferfull; /* #interrupts with ENOSPC */ - int pm_syscalls; /* #syscalls */ - int pm_syscall_errors; /* #syscalls with errors */ - int pm_buffer_requests; /* #buffer requests */ - int pm_buffer_requests_failed; /* #failed buffer requests */ - int pm_log_sweeps; /* #sample buffer processing passes */ + unsigned int pm_intr_ignored; /* #interrupts ignored */ + unsigned int pm_intr_processed; /* #interrupts processed */ + unsigned int pm_intr_bufferfull; /* #interrupts with ENOSPC */ + unsigned int pm_syscalls; /* #syscalls */ + unsigned int pm_syscall_errors; /* #syscalls with errors */ + unsigned int pm_buffer_requests; /* #buffer requests */ + unsigned int pm_buffer_requests_failed; /* #failed buffer requests */ + unsigned int pm_log_sweeps; /* #sample buffer processing + passes */ }; /* * OP RELEASE / OP START / OP STOP * * Simple operations on a PMC id. */ struct pmc_op_simple { pmc_id_t pm_pmcid; }; /* * OP WRITELOG * * Flush the current log buffer and write 4 bytes of user data to it. */ struct pmc_op_writelog { uint32_t pm_userdata; }; /* * OP GETMSR * * Retrieve the machine specific address assoicated with the allocated * PMC. This number can be used subsequently with a read-performance-counter * instruction. */ struct pmc_op_getmsr { uint32_t pm_msr; /* machine specific address */ pmc_id_t pm_pmcid; /* allocated pmc id */ }; /* * OP GETDYNEVENTINFO * * Retrieve a PMC dynamic class events list. */ struct pmc_dyn_event_descr { char pm_ev_name[PMC_NAME_MAX]; enum pmc_event pm_ev_code; }; struct pmc_op_getdyneventinfo { enum pmc_class pm_class; unsigned int pm_nevent; struct pmc_dyn_event_descr pm_events[PMC_EV_DYN_COUNT]; }; #ifdef _KERNEL #include #include #include #include #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 #define PMC_LOG_BUFFER_SIZE 4 #define PMC_NLOGBUFFERS 1024 #define PMC_NSAMPLES 1024 #define PMC_CALLCHAIN_DEPTH 32 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." /* * Locking keys * * (b) - pmc_bufferlist_mtx (spin lock) * (k) - pmc_kthread_mtx (sleep lock) * (o) - po->po_mtx (spin lock) */ /* * PMC commands */ struct pmc_syscall_args { register_t pmop_code; /* one of PMC_OP_* */ void *pmop_data; /* syscall parameter */ }; /* * Interface to processor specific s1tuff */ /* * struct pmc_descr * * Machine independent (i.e., the common parts) of a human readable * PMC description. */ struct pmc_descr { char pd_name[PMC_NAME_MAX]; /* name */ uint32_t pd_caps; /* capabilities */ enum pmc_class pd_class; /* class of the PMC */ uint32_t pd_width; /* width in bits */ }; /* * struct pmc_target * * This structure records all the target processes associated with a * PMC. */ struct pmc_target { LIST_ENTRY(pmc_target) pt_next; struct pmc_process *pt_process; /* target descriptor */ }; /* * struct pmc * * Describes each allocated PMC. * * Each PMC has precisely one owner, namely the process that allocated * the PMC. * * A PMC may be attached to multiple target processes. The * 'pm_targets' field links all the target processes being monitored * by this PMC. * * The 'pm_savedvalue' field is protected by a mutex. * * On a multi-cpu machine, multiple target threads associated with a * process-virtual PMC could be concurrently executing on different * CPUs. The 'pm_runcount' field is atomically incremented every time * the PMC gets scheduled on a CPU and atomically decremented when it * get descheduled. Deletion of a PMC is only permitted when this * field is '0'. * */ struct pmc { LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */ LIST_ENTRY(pmc) pm_next; /* owner's list */ /* * System-wide PMCs are allocated on a CPU and are not moved * around. For system-wide PMCs we record the CPU the PMC was * allocated on in the 'CPU' field of the pmc ID. * * Virtual PMCs run on whichever CPU is currently executing * their targets' threads. For these PMCs we need to save * their current PMC counter values when they are taken off * CPU. */ union { pmc_value_t pm_savedvalue; /* Virtual PMCS */ } pm_gv; /* * For sampling mode PMCs, we keep track of the PMC's "reload * count", which is the counter value to be loaded in when * arming the PMC for the next counting session. For counting * modes on PMCs that are read-only (e.g., the x86 TSC), we * keep track of the initial value at the start of * counting-mode operation. */ union { pmc_value_t pm_reloadcount; /* sampling PMC modes */ pmc_value_t pm_initial; /* counting PMC modes */ } pm_sc; volatile cpuset_t pm_stalled; /* marks stalled sampling PMCs */ volatile cpuset_t pm_cpustate; /* CPUs where PMC should be active */ uint32_t pm_caps; /* PMC capabilities */ enum pmc_event pm_event; /* event being measured */ uint32_t pm_flags; /* additional flags PMC_F_... */ struct pmc_owner *pm_owner; /* owner thread state */ int pm_runcount; /* #cpus currently on */ enum pmc_state pm_state; /* current PMC state */ /* * The PMC ID field encodes the row-index for the PMC, its * mode, class and the CPU# associated with the PMC. */ pmc_id_t pm_id; /* allocated PMC id */ /* md extensions */ union pmc_md_pmc pm_md; }; /* * Accessor macros for 'struct pmc' */ #define PMC_TO_MODE(P) PMC_ID_TO_MODE((P)->pm_id) #define PMC_TO_CLASS(P) PMC_ID_TO_CLASS((P)->pm_id) #define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id) #define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id) /* * struct pmc_process * * Record a 'target' process being profiled. * * The target process being profiled could be different from the owner * process which allocated the PMCs. Each target process descriptor * is associated with NHWPMC 'struct pmc *' pointers. Each PMC at a * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]' * array. The size of this structure is thus PMC architecture * dependent. * */ struct pmc_targetstate { struct pmc *pp_pmc; /* target PMC */ pmc_value_t pp_pmcval; /* per-process value */ }; struct pmc_process { LIST_ENTRY(pmc_process) pp_next; /* hash chain */ int pp_refcnt; /* reference count */ uint32_t pp_flags; /* flags PMC_PP_* */ struct proc *pp_proc; /* target thread */ struct pmc_targetstate pp_pmcs[]; /* NHWPMCs */ }; #define PMC_PP_ENABLE_MSR_ACCESS 0x00000001 /* * struct pmc_owner * * We associate a PMC with an 'owner' process. * * A process can be associated with 0..NCPUS*NHWPMC PMCs during its * lifetime, where NCPUS is the numbers of CPUS in the system and * NHWPMC is the number of hardware PMCs per CPU. These are * maintained in the list headed by the 'po_pmcs' to save on space. * */ struct pmc_owner { LIST_ENTRY(pmc_owner) po_next; /* hash chain */ LIST_ENTRY(pmc_owner) po_ssnext; /* list of SS PMC owners */ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */ struct mtx po_mtx; /* spin lock for (o) */ struct proc *po_owner; /* owner proc */ uint32_t po_flags; /* (k) flags PMC_PO_* */ struct proc *po_kthread; /* (k) helper kthread */ struct pmclog_buffer *po_curbuf; /* current log buffer */ struct file *po_file; /* file reference */ int po_error; /* recorded error */ short po_sscount; /* # SS PMCs owned */ short po_logprocmaps; /* global mappings done */ }; #define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */ #define PMC_PO_SHUTDOWN 0x00000010 /* in the process of shutdown */ #define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020 /* * struct pmc_hw -- describe the state of the PMC hardware * * When in use, a HW PMC is associated with one allocated 'struct pmc' * pointed to by field 'phw_pmc'. When inactive, this field is NULL. * * On an SMP box, one or more HW PMC's in process virtual mode with * the same 'phw_pmc' could be executing on different CPUs. In order * to handle this case correctly, we need to ensure that only * incremental counts get added to the saved value in the associated * 'struct pmc'. The 'phw_save' field is used to keep the saved PMC * value at the time the hardware is started during this context * switch (i.e., the difference between the new (hardware) count and * the saved count is atomically added to the count field in 'struct * pmc' at context switch time). * */ struct pmc_hw { uint32_t phw_state; /* see PHW_* macros below */ struct pmc *phw_pmc; /* current thread PMC */ }; #define PMC_PHW_RI_MASK 0x000000FF #define PMC_PHW_CPU_SHIFT 8 #define PMC_PHW_CPU_MASK 0x0000FF00 #define PMC_PHW_FLAGS_SHIFT 16 #define PMC_PHW_FLAGS_MASK 0xFFFF0000 #define PMC_PHW_INDEX_TO_STATE(ri) ((ri) & PMC_PHW_RI_MASK) #define PMC_PHW_STATE_TO_INDEX(state) ((state) & PMC_PHW_RI_MASK) #define PMC_PHW_CPU_TO_STATE(cpu) (((cpu) << PMC_PHW_CPU_SHIFT) & \ PMC_PHW_CPU_MASK) #define PMC_PHW_STATE_TO_CPU(state) (((state) & PMC_PHW_CPU_MASK) >> \ PMC_PHW_CPU_SHIFT) #define PMC_PHW_FLAGS_TO_STATE(flags) (((flags) << PMC_PHW_FLAGS_SHIFT) & \ PMC_PHW_FLAGS_MASK) #define PMC_PHW_STATE_TO_FLAGS(state) (((state) & PMC_PHW_FLAGS_MASK) >> \ PMC_PHW_FLAGS_SHIFT) #define PMC_PHW_FLAG_IS_ENABLED (PMC_PHW_FLAGS_TO_STATE(0x01)) #define PMC_PHW_FLAG_IS_SHAREABLE (PMC_PHW_FLAGS_TO_STATE(0x02)) /* * struct pmc_sample * * Space for N (tunable) PC samples and associated control data. */ struct pmc_sample { uint16_t ps_nsamples; /* callchain depth */ uint8_t ps_cpu; /* cpu number */ uint8_t ps_flags; /* other flags */ pid_t ps_pid; /* process PID or -1 */ struct thread *ps_td; /* which thread */ struct pmc *ps_pmc; /* interrupting PMC */ uintptr_t *ps_pc; /* (const) callchain start */ }; #define PMC_SAMPLE_FREE ((uint16_t) 0) #define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF) struct pmc_samplebuffer { struct pmc_sample * volatile ps_read; /* read pointer */ struct pmc_sample * volatile ps_write; /* write pointer */ uintptr_t *ps_callchains; /* all saved call chains */ struct pmc_sample *ps_fence; /* one beyond ps_samples[] */ struct pmc_sample ps_samples[]; /* array of sample entries */ }; /* * struct pmc_cpustate * * A CPU is modelled as a collection of HW PMCs with space for additional * flags. */ struct pmc_cpu { uint32_t pc_state; /* physical cpu number + flags */ struct pmc_samplebuffer *pc_sb[2]; /* space for samples */ struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */ }; #define PMC_PCPU_CPU_MASK 0x000000FF #define PMC_PCPU_FLAGS_MASK 0xFFFFFF00 #define PMC_PCPU_FLAGS_SHIFT 8 #define PMC_PCPU_STATE_TO_CPU(S) ((S) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_STATE_TO_FLAGS(S) (((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT) #define PMC_PCPU_FLAGS_TO_STATE(F) (((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK) #define PMC_PCPU_CPU_TO_STATE(C) ((C) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_FLAG_HTT (PMC_PCPU_FLAGS_TO_STATE(0x1)) /* * struct pmc_binding * * CPU binding information. */ struct pmc_binding { int pb_bound; /* is bound? */ int pb_cpu; /* if so, to which CPU */ }; struct pmc_mdep; /* * struct pmc_classdep * * PMC class-dependent operations. */ struct pmc_classdep { uint32_t pcd_caps; /* class capabilities */ enum pmc_class pcd_class; /* class id */ int pcd_num; /* number of PMCs */ int pcd_ri; /* row index of the first PMC in class */ int pcd_width; /* width of the PMC */ /* configuring/reading/writing the hardware PMCs */ int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm); int (*pcd_read_pmc)(int _cpu, int _ri, pmc_value_t *_value); int (*pcd_write_pmc)(int _cpu, int _ri, pmc_value_t _value); /* pmc allocation/release */ int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t, const struct pmc_op_pmcallocate *_a); int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm); /* starting and stopping PMCs */ int (*pcd_start_pmc)(int _cpu, int _ri); int (*pcd_stop_pmc)(int _cpu, int _ri); /* description */ int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi, struct pmc **_ppmc); /* class-dependent initialization & finalization */ int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* machine-specific interface */ int (*pcd_get_msr)(int _ri, uint32_t *_msr); }; /* * struct pmc_mdep * * Machine dependent bits needed per CPU type. */ struct pmc_mdep { uint32_t pmd_cputype; /* from enum pmc_cputype */ uint32_t pmd_npmc; /* number of PMCs per CPU */ uint32_t pmd_nclass; /* number of PMC classes present */ /* * Machine dependent methods. */ /* per-cpu initialization and finalization */ int (*pmd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pmd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* thread context switch in/out */ int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp); int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp); /* handle a PMC interrupt */ int (*pmd_intr)(int _cpu, struct trapframe *_tf); /* * PMC class dependent information. */ struct pmc_classdep pmd_classdep[]; }; /* * Per-CPU state. This is an array of 'mp_ncpu' pointers * to struct pmc_cpu descriptors. */ extern struct pmc_cpu **pmc_pcpu; /* driver statistics */ extern struct pmc_op_getdriverstats pmc_stats; #if defined(HWPMC_DEBUG) #include /* debug flags, major flag groups */ struct pmc_debugflags { int pdb_CPU; int pdb_CSW; int pdb_LOG; int pdb_MDP; int pdb_MOD; int pdb_OWN; int pdb_PMC; int pdb_PRC; int pdb_SAM; }; extern struct pmc_debugflags pmc_debugflags; #define KTR_PMC KTR_SUBSYS #define PMC_DEBUG_STRSIZE 128 #define PMC_DEBUG_DEFAULT_FLAGS { 0, 0, 0, 0, 0, 0, 0, 0 } #define PMCDBG0(M, N, L, F) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR0(KTR_PMC, #M ":" #N ":" #L ": " F); \ } while (0) #define PMCDBG1(M, N, L, F, p1) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR1(KTR_PMC, #M ":" #N ":" #L ": " F, p1); \ } while (0) #define PMCDBG2(M, N, L, F, p1, p2) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR2(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2); \ } while (0) #define PMCDBG3(M, N, L, F, p1, p2, p3) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR3(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3); \ } while (0) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR4(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4);\ } while (0) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR5(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5); \ } while (0) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR6(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5, p6); \ } while (0) /* Major numbers */ #define PMC_DEBUG_MAJ_CPU 0 /* cpu switches */ #define PMC_DEBUG_MAJ_CSW 1 /* context switches */ #define PMC_DEBUG_MAJ_LOG 2 /* logging */ #define PMC_DEBUG_MAJ_MDP 3 /* machine dependent */ #define PMC_DEBUG_MAJ_MOD 4 /* misc module infrastructure */ #define PMC_DEBUG_MAJ_OWN 5 /* owner */ #define PMC_DEBUG_MAJ_PMC 6 /* pmc management */ #define PMC_DEBUG_MAJ_PRC 7 /* processes */ #define PMC_DEBUG_MAJ_SAM 8 /* sampling */ /* Minor numbers */ /* Common (8 bits) */ #define PMC_DEBUG_MIN_ALL 0 /* allocation */ #define PMC_DEBUG_MIN_REL 1 /* release */ #define PMC_DEBUG_MIN_OPS 2 /* ops: start, stop, ... */ #define PMC_DEBUG_MIN_INI 3 /* init */ #define PMC_DEBUG_MIN_FND 4 /* find */ /* MODULE */ #define PMC_DEBUG_MIN_PMH 14 /* pmc_hook */ #define PMC_DEBUG_MIN_PMS 15 /* pmc_syscall */ /* OWN */ #define PMC_DEBUG_MIN_ORM 8 /* owner remove */ #define PMC_DEBUG_MIN_OMR 9 /* owner maybe remove */ /* PROCESSES */ #define PMC_DEBUG_MIN_TLK 8 /* link target */ #define PMC_DEBUG_MIN_TUL 9 /* unlink target */ #define PMC_DEBUG_MIN_EXT 10 /* process exit */ #define PMC_DEBUG_MIN_EXC 11 /* process exec */ #define PMC_DEBUG_MIN_FRK 12 /* process fork */ #define PMC_DEBUG_MIN_ATT 13 /* attach/detach */ #define PMC_DEBUG_MIN_SIG 14 /* signalling */ /* CONTEXT SWITCHES */ #define PMC_DEBUG_MIN_SWI 8 /* switch in */ #define PMC_DEBUG_MIN_SWO 9 /* switch out */ /* PMC */ #define PMC_DEBUG_MIN_REG 8 /* pmc register */ #define PMC_DEBUG_MIN_ALR 9 /* allocate row */ /* MACHINE DEPENDENT LAYER */ #define PMC_DEBUG_MIN_REA 8 /* read */ #define PMC_DEBUG_MIN_WRI 9 /* write */ #define PMC_DEBUG_MIN_CFG 10 /* config */ #define PMC_DEBUG_MIN_STA 11 /* start */ #define PMC_DEBUG_MIN_STO 12 /* stop */ #define PMC_DEBUG_MIN_INT 13 /* interrupts */ /* CPU */ #define PMC_DEBUG_MIN_BND 8 /* bind */ #define PMC_DEBUG_MIN_SEL 9 /* select */ /* LOG */ #define PMC_DEBUG_MIN_GTB 8 /* get buf */ #define PMC_DEBUG_MIN_SIO 9 /* schedule i/o */ #define PMC_DEBUG_MIN_FLS 10 /* flush */ #define PMC_DEBUG_MIN_SAM 11 /* sample */ #define PMC_DEBUG_MIN_CLO 12 /* close */ #else #define PMCDBG0(M, N, L, F) /* nothing */ #define PMCDBG1(M, N, L, F, p1) #define PMCDBG2(M, N, L, F, p1, p2) #define PMCDBG3(M, N, L, F, p1, p2, p3) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) #endif /* declare a dedicated memory pool */ MALLOC_DECLARE(M_PMC); /* * Functions */ struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); int pmc_process_interrupt(int _cpu, int _soft, struct pmc *_pm, struct trapframe *_tf, int _inuserspace); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */ Index: projects/powernv/sys/resourcevar.h =================================================================== --- projects/powernv/sys/resourcevar.h (revision 290990) +++ projects/powernv/sys/resourcevar.h (revision 290991) @@ -1,163 +1,164 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)resourcevar.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_RESOURCEVAR_H_ #define _SYS_RESOURCEVAR_H_ #include #include #ifdef _KERNEL #include #include #endif /* * Kernel per-process accounting / statistics * (not necessarily resident except when running). * * Locking key: * b - created at fork, never changes * c - locked by proc mtx * k - only accessed by curthread * w - locked by proc itim lock * w2 - locked by proc prof lock */ struct pstats { #define pstat_startzero p_cru struct rusage p_cru; /* Stats for reaped children. */ struct itimerval p_timer[3]; /* (w) Virtual-time timers. */ #define pstat_endzero pstat_startcopy #define pstat_startcopy p_prof struct uprof { /* Profile arguments. */ caddr_t pr_base; /* (c + w2) Buffer base. */ u_long pr_size; /* (c + w2) Buffer size. */ u_long pr_off; /* (c + w2) PC offset. */ u_long pr_scale; /* (c + w2) PC scaling. */ } p_prof; #define pstat_endcopy p_start struct timeval p_start; /* (b) Starting time. */ }; #ifdef _KERNEL /* * Kernel shareable process resource limits. Because this structure * is moderately large but changes infrequently, it is normally * shared copy-on-write after forks. */ struct plimit { struct rlimit pl_rlimit[RLIM_NLIMITS]; int pl_refcnt; /* number of references */ }; struct racct; /*- * Per uid resource consumption. This structure is used to track * the total resource consumption (process count, socket buffer size, * etc) for the uid and impose limits. * * Locking guide: * (a) Constant from inception * (b) Lockless, updated using atomics * (c) Locked by global uihashtbl_lock * (d) Locked by the ui_vmsize_mtx */ struct uidinfo { LIST_ENTRY(uidinfo) ui_hash; /* (c) hash chain of uidinfos */ struct mtx ui_vmsize_mtx; vm_ooffset_t ui_vmsize; /* (d) swap reservation by uid */ long ui_sbsize; /* (b) socket buffer space consumed */ long ui_proccnt; /* (b) number of processes */ long ui_ptscnt; /* (b) number of pseudo-terminals */ long ui_kqcnt; /* (b) number of kqueues */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ #ifdef RACCT struct racct *ui_racct; /* (a) resource accounting */ #endif }; #define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx)) #define UIDINFO_VMSIZE_UNLOCK(ui) mtx_unlock(&((ui)->ui_vmsize_mtx)) struct proc; struct rusage_ext; struct thread; void addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks); void addupc_task(struct thread *td, uintfptr_t pc, u_int ticks); void calccru(struct proc *p, struct timeval *up, struct timeval *sp); void calcru(struct proc *p, struct timeval *up, struct timeval *sp); int chgkqcnt(struct uidinfo *uip, int diff, rlim_t max); int chgproccnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t maxval); int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval); int fuswintr(void *base); int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp); struct plimit *lim_alloc(void); void lim_copy(struct plimit *dst, struct plimit *src); rlim_t lim_cur(struct thread *td, int which); rlim_t lim_cur_proc(struct proc *p, int which); void lim_fork(struct proc *p1, struct proc *p2); void lim_free(struct plimit *limp); struct plimit *lim_hold(struct plimit *limp); rlim_t lim_max(struct thread *td, int which); rlim_t lim_max_proc(struct proc *p, int which); void lim_rlimit(struct thread *td, int which, struct rlimit *rlp); void lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp); void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, struct rusage_ext *rux2); void rucollect(struct rusage *ru, struct rusage *ru2); void rufetch(struct proc *p, struct rusage *ru); void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up, struct timeval *sp); void rufetchtd(struct thread *td, struct rusage *ru); void ruxagg(struct proc *p, struct thread *td); int suswintr(void *base, int word); struct uidinfo *uifind(uid_t uid); void uifree(struct uidinfo *uip); void uihashinit(void); void uihold(struct uidinfo *uip); #ifdef RACCT void ui_racct_foreach(void (*callback)(struct racct *racct, - void *arg2, void *arg3), void *arg2, void *arg3); + void *arg2, void *arg3), void (*pre)(void), void (*post)(void), + void *arg2, void *arg3); #endif #endif /* _KERNEL */ #endif /* !_SYS_RESOURCEVAR_H_ */ Index: projects/powernv/vm/vm_page.h =================================================================== --- projects/powernv/vm/vm_page.h (revision 290990) +++ projects/powernv/vm/vm_page.h (revision 290991) @@ -1,680 +1,681 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $FreeBSD$ */ /* * Resident memory system definitions. */ #ifndef _VM_PAGE_ #define _VM_PAGE_ #include /* * Management of resident (logical) pages. * * A small structure is kept for each resident * page, indexed by page number. Each structure * is an element of several collections: * * A radix tree used to quickly * perform object/offset lookups * * A list of all pages for a given object, * so they can be quickly deactivated at * time of deallocation. * * An ordered list of pages due for pageout. * * In addition, the structure contains the object * and offset to which this page belongs (for pageout), * and sundry status bits. * * In general, operations on this structure's mutable fields are * synchronized using either one of or a combination of the lock on the * object that the page belongs to (O), the pool lock for the page (P), * or the lock for either the free or paging queue (Q). If a field is * annotated below with two of these locks, then holding either lock is * sufficient for read access, but both locks are required for write * access. * * In contrast, the synchronization of accesses to the page's * dirty field is machine dependent (M). In the * machine-independent layer, the lock on the object that the * page belongs to must be held in order to operate on the field. * However, the pmap layer is permitted to set all bits within * the field without holding that lock. If the underlying * architecture does not support atomic read-modify-write * operations on the field's type, then the machine-independent * layer uses a 32-bit atomic on the aligned 32-bit word that * contains the dirty field. In the machine-independent layer, * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). */ #if PAGE_SIZE == 4096 #define VM_PAGE_BITS_ALL 0xffu typedef uint8_t vm_page_bits_t; #elif PAGE_SIZE == 8192 #define VM_PAGE_BITS_ALL 0xffffu typedef uint16_t vm_page_bits_t; #elif PAGE_SIZE == 16384 #define VM_PAGE_BITS_ALL 0xffffffffu typedef uint32_t vm_page_bits_t; #elif PAGE_SIZE == 32768 #define VM_PAGE_BITS_ALL 0xfffffffffffffffflu typedef uint64_t vm_page_bits_t; #endif struct vm_page { union { TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */ struct { SLIST_ENTRY(vm_page) ss; /* private slists */ void *pv; } s; struct { u_long p; u_long v; } memguard; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ vm_object_t object; /* which object am I in (O,P) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page */ struct md_page md; /* machine dependant stuff */ u_int wire_count; /* wired down maps refs (P) */ volatile u_int busy_lock; /* busy owners lock */ uint16_t hold_count; /* page hold count (P) */ uint16_t flags; /* page PG_* flags (P) */ uint8_t aflags; /* access is atomic */ uint8_t oflags; /* page VPO_* flags (O) */ uint8_t queue; /* page queue index (P,Q) */ int8_t psind; /* pagesizes[] index (O) */ int8_t segind; uint8_t order; /* index of the buddy queue */ uint8_t pool; u_char act_count; /* page usage count (P) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ }; /* * Page flags stored in oflags: * * Access to these page flags is synchronized by the lock on the object * containing the page (O). * * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG) * indicates that the page is not under PV management but * otherwise should be treated as a normal page. Pages not * under PV management cannot be paged out via the * object/vm_page_t because there is no knowledge of their pte * mappings, and such pages are also not on any PQ queue. * */ #define VPO_UNUSED01 0x01 /* --available-- */ #define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */ #define VPO_UNMANAGED 0x04 /* no PV management for page */ #define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */ #define VPO_NOSYNC 0x10 /* do not collect for syncer */ /* * Busy page implementation details. * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation, * even if the support for owner identity is removed because of size * constraints. Checks on lock recursion are then not possible, while the * lock assertions effectiveness is someway reduced. */ #define VPB_BIT_SHARED 0x01 #define VPB_BIT_EXCLUSIVE 0x02 #define VPB_BIT_WAITERS 0x04 #define VPB_BIT_FLAGMASK \ (VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS) #define VPB_SHARERS_SHIFT 3 #define VPB_SHARERS(x) \ (((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT) #define VPB_SHARERS_WORD(x) ((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED) #define VPB_ONE_SHARER (1 << VPB_SHARERS_SHIFT) #define VPB_SINGLE_EXCLUSIVER VPB_BIT_EXCLUSIVE #define VPB_UNBUSIED VPB_SHARERS_WORD(0) #define PQ_NONE 255 #define PQ_INACTIVE 0 #define PQ_ACTIVE 1 #define PQ_COUNT 2 TAILQ_HEAD(pglist, vm_page); SLIST_HEAD(spglist, vm_page); struct vm_pagequeue { struct mtx pq_mutex; struct pglist pq_pl; int pq_cnt; int * const pq_vcnt; const char * const pq_name; } __aligned(CACHE_LINE_SIZE); struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ boolean_t vmd_oom; int vmd_pass; /* local pagedaemon pass */ + int vmd_oom_seq; int vmd_last_active_scan; struct vm_page vmd_marker; /* marker for pagedaemon private use */ struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */ }; extern struct vm_domain vm_dom[MAXMEMDOM]; #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) #define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) #ifdef _KERNEL static __inline void vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend) { #ifdef notyet vm_pagequeue_assert_locked(pq); #endif pq->pq_cnt += addend; atomic_add_int(pq->pq_vcnt, addend); } #define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1) #define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1) #endif /* _KERNEL */ extern struct mtx_padalign vm_page_queue_free_mtx; extern struct mtx_padalign pa_lock[]; #if defined(__arm__) #define PDRSHIFT PDR_SHIFT #elif !defined(PDRSHIFT) #define PDRSHIFT 21 #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define PA_LOCKPTR(pa) ((struct mtx *)(&pa_lock[pa_index(pa) % PA_LOCK_COUNT])) #define PA_LOCKOBJPTR(pa) ((struct lock_object *)PA_LOCKPTR((pa))) #define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) #define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) #define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) #define PA_UNLOCK_COND(pa) \ do { \ if ((pa) != 0) { \ PA_UNLOCK((pa)); \ (pa) = 0; \ } \ } while (0) #define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) #ifdef KLD_MODULE #define vm_page_lock(m) vm_page_lock_KBI((m), LOCK_FILE, LOCK_LINE) #define vm_page_unlock(m) vm_page_unlock_KBI((m), LOCK_FILE, LOCK_LINE) #define vm_page_trylock(m) vm_page_trylock_KBI((m), LOCK_FILE, LOCK_LINE) #else /* !KLD_MODULE */ #define vm_page_lockptr(m) (PA_LOCKPTR(VM_PAGE_TO_PHYS((m)))) #define vm_page_lock(m) mtx_lock(vm_page_lockptr((m))) #define vm_page_unlock(m) mtx_unlock(vm_page_lockptr((m))) #define vm_page_trylock(m) mtx_trylock(vm_page_lockptr((m))) #endif #if defined(INVARIANTS) #define vm_page_assert_locked(m) \ vm_page_assert_locked_KBI((m), __FILE__, __LINE__) #define vm_page_lock_assert(m, a) \ vm_page_lock_assert_KBI((m), (a), __FILE__, __LINE__) #else #define vm_page_assert_locked(m) #define vm_page_lock_assert(m, a) #endif /* * The vm_page's aflags are updated using atomic operations. To set or clear * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear() * must be used. Neither these flags nor these functions are part of the KBI. * * PGA_REFERENCED may be cleared only if the page is locked. It is set by * both the MI and MD VM layers. However, kernel loadable modules should not * directly set this flag. They should call vm_page_reference() instead. * * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). * When it does so, the object must be locked, or the page must be * exclusive busied. The MI VM layer must never access this flag * directly. Instead, it should call pmap_page_is_write_mapped(). * * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has * at least one executable mapping. It is not consumed by the MI VM layer. */ #define PGA_WRITEABLE 0x01 /* page may be mapped writeable */ #define PGA_REFERENCED 0x02 /* page has been referenced */ #define PGA_EXECUTABLE 0x04 /* page may be mapped executable */ /* * Page flags. If changed at any other time than page allocation or * freeing, the modification must be protected by the vm_page lock. */ #define PG_CACHED 0x0001 /* page is cached */ #define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ #define PG_WINATCFLS 0x0040 /* flush dirty page on inactive q */ #define PG_NODUMP 0x0080 /* don't include this page in a dump */ #define PG_UNHOLDFREE 0x0100 /* delayed free of a held page */ /* * Misc constants. */ #define ACT_DECLINE 1 #define ACT_ADVANCE 3 #define ACT_INIT 5 #define ACT_MAX 64 #ifdef _KERNEL #include #include /* * Each pageable resident page falls into one of four lists: * * free * Available for allocation now. * * cache * Almost available for allocation. Still associated with * an object, but clean and immediately freeable. * * The following lists are LRU sorted: * * inactive * Low activity, candidates for reclamation. * This is the list of pages that should be * paged out next. * * active * Pages that are "active" i.e. they have been * recently referenced. * */ extern int vm_page_zero_count; extern vm_page_t vm_page_array; /* First resident page in table */ extern long vm_page_array_size; /* number of vm_page_t's */ extern long first_page; /* first physical page number */ #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) /* * PHYS_TO_VM_PAGE() returns the vm_page_t object that represents a memory * page to which the given physical address belongs. The correct vm_page_t * object is returned for addresses that are not page-aligned. */ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); /* * Page allocation parameters for vm_page for the functions * vm_page_alloc(), vm_page_grab(), vm_page_alloc_contig() and * vm_page_alloc_freelist(). Some functions support only a subset * of the flags, and ignore others, see the flags legend. * * Bits 0 - 1 define class. * Bits 2 - 15 dedicated for flags. * Legend: * (a) - vm_page_alloc() supports the flag. * (c) - vm_page_alloc_contig() supports the flag. * (f) - vm_page_alloc_freelist() supports the flag. * (g) - vm_page_grab() supports the flag. * Bits above 15 define the count of additional pages that the caller * intends to allocate. */ #define VM_ALLOC_NORMAL 0 #define VM_ALLOC_INTERRUPT 1 #define VM_ALLOC_SYSTEM 2 #define VM_ALLOC_CLASS_MASK 3 #define VM_ALLOC_WIRED 0x0020 /* (acfg) Allocate non pageable page */ #define VM_ALLOC_ZERO 0x0040 /* (acfg) Try to obtain a zeroed page */ #define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */ #define VM_ALLOC_NOBUSY 0x0200 /* (acg) Do not busy the page */ #define VM_ALLOC_IFCACHED 0x0400 /* (ag) Fail if page is not cached */ #define VM_ALLOC_IFNOTCACHED 0x0800 /* (ag) Fail if page is cached */ #define VM_ALLOC_IGN_SBUSY 0x1000 /* (g) Ignore shared busy flag */ #define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */ #define VM_ALLOC_SBUSY 0x4000 /* (acg) Shared busy the page */ #define VM_ALLOC_NOWAIT 0x8000 /* (g) Do not sleep, return NULL */ #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) #ifdef M_NOWAIT static inline int malloc2vm_flags(int malloc_flags) { int pflags; KASSERT((malloc_flags & M_USE_RESERVE) == 0 || (malloc_flags & M_NOWAIT) != 0, ("M_USE_RESERVE requires M_NOWAIT")); pflags = (malloc_flags & M_USE_RESERVE) != 0 ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM; if ((malloc_flags & M_ZERO) != 0) pflags |= VM_ALLOC_ZERO; if ((malloc_flags & M_NODUMP) != 0) pflags |= VM_ALLOC_NODUMP; return (pflags); } #endif void vm_page_busy_downgrade(vm_page_t m); void vm_page_busy_sleep(vm_page_t m, const char *msg); void vm_page_flash(vm_page_t m); void vm_page_hold(vm_page_t mem); void vm_page_unhold(vm_page_t mem); void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); vm_page_t vm_page_alloc_freelist(int, int); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t); int vm_page_try_to_cache (vm_page_t); int vm_page_try_to_free (vm_page_t); void vm_page_deactivate (vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_locked(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); struct vm_pagequeue *vm_page_pagequeue(vm_page_t m); vm_page_t vm_page_prev(vm_page_t m); boolean_t vm_page_ps_is_valid(vm_page_t m); void vm_page_putfake(vm_page_t m); void vm_page_readahead_finish(vm_page_t m); void vm_page_reference(vm_page_t m); void vm_page_remove (vm_page_t); int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex); void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); int vm_page_sbusied(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); boolean_t vm_page_unwire(vm_page_t m, uint8_t queue); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); void vm_page_clear_dirty (vm_page_t, int, int); void vm_page_set_invalid (vm_page_t, int, int); int vm_page_is_valid (vm_page_t, int, int); void vm_page_test_dirty (vm_page_t); vm_page_bits_t vm_page_bits(int base, int size); void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); void vm_page_free_toq(vm_page_t m); void vm_page_zero_idle_wakeup(void); void vm_page_dirty_KBI(vm_page_t m); void vm_page_lock_KBI(vm_page_t m, const char *file, int line); void vm_page_unlock_KBI(vm_page_t m, const char *file, int line); int vm_page_trylock_KBI(vm_page_t m, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line); void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line); #endif #define vm_page_assert_sbusied(m) \ KASSERT(vm_page_sbusied(m), \ ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \ (void *)m, __FILE__, __LINE__)); #define vm_page_assert_unbusied(m) \ KASSERT(!vm_page_busied(m), \ ("vm_page_assert_unbusied: page %p busy @ %s:%d", \ (void *)m, __FILE__, __LINE__)); #define vm_page_assert_xbusied(m) \ KASSERT(vm_page_xbusied(m), \ ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \ (void *)m, __FILE__, __LINE__)); #define vm_page_busied(m) \ ((m)->busy_lock != VPB_UNBUSIED) #define vm_page_sbusy(m) do { \ if (!vm_page_trysbusy(m)) \ panic("%s: page %p failed shared busing", __func__, m); \ } while (0) #define vm_page_tryxbusy(m) \ (atomic_cmpset_acq_int(&m->busy_lock, VPB_UNBUSIED, \ VPB_SINGLE_EXCLUSIVER)) #define vm_page_xbusied(m) \ ((m->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0) #define vm_page_xbusy(m) do { \ if (!vm_page_tryxbusy(m)) \ panic("%s: page %p failed exclusive busing", __func__, \ m); \ } while (0) #define vm_page_xunbusy(m) do { \ if (!atomic_cmpset_rel_int(&(m)->busy_lock, \ VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) \ vm_page_xunbusy_hard(m); \ } while (0) #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m); #define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m) void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits); #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) \ vm_page_assert_pga_writeable(m, bits) #else #define VM_PAGE_OBJECT_LOCK_ASSERT(m) (void)0 #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) (void)0 #endif /* * We want to use atomic updates for the aflags field, which is 8 bits wide. * However, not all architectures support atomic operations on 8-bit * destinations. In order that we can easily use a 32-bit operation, we * require that the aflags field be 32-bit aligned. */ CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0); /* * Clear the given bits in the specified page. */ static inline void vm_page_aflag_clear(vm_page_t m, uint8_t bits) { uint32_t *addr, val; /* * The PGA_REFERENCED flag can only be cleared if the page is locked. */ if ((bits & PGA_REFERENCED) != 0) vm_page_assert_locked(m); /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ addr = (void *)&m->aflags; KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, ("vm_page_aflag_clear: aflags is misaligned")); val = bits; #if BYTE_ORDER == BIG_ENDIAN val <<= 24; #endif atomic_clear_32(addr, val); } /* * Set the given bits in the specified page. */ static inline void vm_page_aflag_set(vm_page_t m, uint8_t bits) { uint32_t *addr, val; VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits); /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ addr = (void *)&m->aflags; KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, ("vm_page_aflag_set: aflags is misaligned")); val = bits; #if BYTE_ORDER == BIG_ENDIAN val <<= 24; #endif atomic_set_32(addr, val); } /* * vm_page_dirty: * * Set all bits in the page's dirty field. * * The object containing the specified page must be locked if the * call is made from the machine-independent layer. * * See vm_page_clear_dirty_mask(). */ static __inline void vm_page_dirty(vm_page_t m) { /* Use vm_page_dirty_KBI() under INVARIANTS to save memory. */ #if defined(KLD_MODULE) || defined(INVARIANTS) vm_page_dirty_KBI(m); #else m->dirty = VM_PAGE_BITS_ALL; #endif } /* * vm_page_remque: * * If the given page is in a page queue, then remove it from that page * queue. * * The page must be locked. */ static inline void vm_page_remque(vm_page_t m) { if (m->queue != PQ_NONE) vm_page_dequeue(m); } /* * vm_page_undirty: * * Set page to not be dirty. Note: does not clear pmap modify bits */ static __inline void vm_page_undirty(vm_page_t m) { VM_PAGE_OBJECT_LOCK_ASSERT(m); m->dirty = 0; } #endif /* _KERNEL */ #endif /* !_VM_PAGE_ */ Index: projects/powernv/vm/vm_pageout.c =================================================================== --- projects/powernv/vm/vm_pageout.c (revision 290990) +++ projects/powernv/vm/vm_pageout.c (revision 290991) @@ -1,1938 +1,2015 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005 Yahoo! Technologies Norway AS * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * The proverbial page-out daemon. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include "opt_kdtrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * System initialization */ /* the kernel process "vm_pageout"*/ static void vm_pageout(void); static void vm_pageout_init(void); static int vm_pageout_clean(vm_page_t m); static int vm_pageout_cluster(vm_page_t m); static void vm_pageout_scan(struct vm_domain *vmd, int pass); -static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass); +static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, + int starting_page_shortage); SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, NULL); struct proc *pageproc; static struct kproc_desc page_kp = { "pagedaemon", vm_pageout, &pageproc }; SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &page_kp); SDT_PROVIDER_DEFINE(vm); SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache); SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); #if !defined(NO_SWAPPING) /* the kernel process "vm_daemon"*/ static void vm_daemon(void); static struct proc *vmproc; static struct kproc_desc vm_kp = { "vmdaemon", vm_daemon, &vmproc }; SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp); #endif int vm_pages_needed; /* Event on which pageout daemon sleeps */ int vm_pageout_deficit; /* Estimated number of pages deficit */ int vm_pageout_wakeup_thresh; +static int vm_pageout_oom_seq = 12; #if !defined(NO_SWAPPING) static int vm_pageout_req_swapout; /* XXX */ static int vm_daemon_needed; static struct mtx vm_daemon_mtx; /* Allow for use by vm_pageout before vm_daemon is initialized. */ MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); #endif static int vm_max_launder = 32; static int vm_pageout_update_period; static int defer_swap_pageouts; static int disable_swap_pageouts; static int lowmem_period = 10; static time_t lowmem_uptime; #if defined(NO_SWAPPING) static int vm_swap_enabled = 0; static int vm_swap_idle_enabled = 0; #else static int vm_swap_enabled = 1; static int vm_swap_idle_enabled = 0; #endif static int vm_panic_on_oom = 0; SYSCTL_INT(_vm, OID_AUTO, panic_on_oom, CTLFLAG_RWTUN, &vm_panic_on_oom, 0, "panic on out of memory instead of killing the largest process"); SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh, CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0, "free page threshold for waking up the pageout daemon"); SYSCTL_INT(_vm, OID_AUTO, max_launder, CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout"); SYSCTL_INT(_vm, OID_AUTO, pageout_update_period, CTLFLAG_RW, &vm_pageout_update_period, 0, "Maximum active LRU update period"); SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0, "Low memory callback period"); #if defined(NO_SWAPPING) SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout"); SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); #else SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); #endif SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts, CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem"); SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); static int pageout_lock_miss; SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); +SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq, + CTLFLAG_RW, &vm_pageout_oom_seq, 0, + "back-to-back calls to oom detector to start OOM"); + #define VM_PAGEOUT_PAGE_COUNT 16 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; int vm_page_max_wired; /* XXX max # of wired pages system-wide */ SYSCTL_INT(_vm, OID_AUTO, max_wired, CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); static boolean_t vm_pageout_launder(struct vm_pagequeue *pq, int, vm_paddr_t, vm_paddr_t); #if !defined(NO_SWAPPING) static void vm_pageout_map_deactivate_pages(vm_map_t, long); static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long); static void vm_req_vmdaemon(int req); #endif static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); /* * Initialize a dummy page for marking the caller's place in the specified * paging queue. In principle, this function only needs to set the flag * PG_MARKER. Nonetheless, it wirte busies and initializes the hold count * to one as safety precautions. */ static void vm_pageout_init_marker(vm_page_t marker, u_short queue) { bzero(marker, sizeof(*marker)); marker->flags = PG_MARKER; marker->busy_lock = VPB_SINGLE_EXCLUSIVER; marker->queue = queue; marker->hold_count = 1; } /* * vm_pageout_fallback_object_lock: * * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is * known to have failed and page queue must be either PQ_ACTIVE or * PQ_INACTIVE. To avoid lock order violation, unlock the page queues * while locking the vm object. Use marker page to detect page queue * changes and maintain notion of next page on page queue. Return * TRUE if no changes were detected, FALSE otherwise. vm object is * locked on return. * * This function depends on both the lock portion of struct vm_object * and normal struct vm_page being type stable. */ static boolean_t vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; struct vm_pagequeue *pq; boolean_t unchanged; u_short queue; vm_object_t object; queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); object = m->object; TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q); vm_pagequeue_unlock(pq); vm_page_unlock(m); VM_OBJECT_WLOCK(object); vm_page_lock(m); vm_pagequeue_lock(pq); /* * The page's object might have changed, and/or the page might * have moved from its original position in the queue. If the * page's object has changed, then the caller should abandon * processing the page because the wrong object lock was * acquired. Use the marker's plinks.q, not the page's, to * determine if the page has been moved. The state of the * page's plinks.q can be indeterminate; whereas, the marker's * plinks.q must be valid. */ *next = TAILQ_NEXT(&marker, plinks.q); unchanged = m->object == object && m == TAILQ_PREV(&marker, pglist, plinks.q); KASSERT(!unchanged || m->queue == queue, ("page %p queue %d %d", m, queue, m->queue)); TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q); return (unchanged); } /* * Lock the page while holding the page queue lock. Use marker page * to detect page queue changes and maintain notion of next page on * page queue. Return TRUE if no changes were detected, FALSE * otherwise. The page is locked on return. The page queue lock might * be dropped and reacquired. * * This function depends on normal struct vm_page being type stable. */ static boolean_t vm_pageout_page_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; struct vm_pagequeue *pq; boolean_t unchanged; u_short queue; vm_page_lock_assert(m, MA_NOTOWNED); if (vm_page_trylock(m)) return (TRUE); queue = m->queue; vm_pageout_init_marker(&marker, queue); pq = vm_page_pagequeue(m); TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q); vm_pagequeue_unlock(pq); vm_page_lock(m); vm_pagequeue_lock(pq); /* Page queue might have changed. */ *next = TAILQ_NEXT(&marker, plinks.q); unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q); KASSERT(!unchanged || m->queue == queue, ("page %p queue %d %d", m, queue, m->queue)); TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q); return (unchanged); } /* * vm_pageout_clean: * * Clean the page and remove it from the laundry. * * We set the busy bit to cause potential page faults on this page to * block. Note the careful timing, however, the busy bit isn't set till * late and we cannot do anything that will mess with the page. */ static int vm_pageout_cluster(vm_page_t m) { vm_object_t object; vm_page_t mc[2*vm_pageout_page_count], pb, ps; int pageout_count; int ib, is, page_base; vm_pindex_t pindex = m->pindex; vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); /* * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP * with the new swapper, but we could have serious problems paging * out other object types if there is insufficient memory. * * Unfortunately, checking free memory here is far too late, so the * check has been moved up a procedural level. */ /* * Can't clean the page if it's busy or held. */ vm_page_assert_unbusied(m); KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m)); vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; page_base = vm_pageout_page_count; ib = 1; is = 1; /* * Scan object for clusterable pages. * * We can cluster ONLY if: ->> the page is NOT * clean, wired, busy, held, or mapped into a * buffer, and one of the following: * 1) The page is inactive, or a seldom used * active page. * -or- * 2) we force the issue. * * During heavy mmap/modification loads the pageout * daemon can really fragment the underlying file * due to flushing pages out of order and not trying * align the clusters (which leave sporatic out-of-order * holes). To solve this problem we do the reverse scan * first and attempt to align our cluster, then do a * forward scan if room remains. */ more: while (ib && pageout_count < vm_pageout_page_count) { vm_page_t p; if (ib > pindex) { ib = 0; break; } if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { ib = 0; break; } vm_page_test_dirty(p); if (p->dirty == 0) { ib = 0; break; } vm_page_lock(p); if (p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ vm_page_unlock(p); ib = 0; break; } vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; ++ib; /* * alignment boundry, stop here and switch directions. Do * not clear ib. */ if ((pindex - (ib - 1)) % vm_pageout_page_count == 0) break; } while (pageout_count < vm_pageout_page_count && pindex + is < object->size) { vm_page_t p; if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) break; vm_page_test_dirty(p); if (p->dirty == 0) break; vm_page_lock(p); if (p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ vm_page_unlock(p); break; } vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; ++is; } /* * If we exhausted our forward scan, continue with the reverse scan * when possible, even past a page boundry. This catches boundry * conditions. */ if (ib && pageout_count < vm_pageout_page_count) goto more; /* * we allow reads during pageouts... */ return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL, NULL)); } /* * vm_pageout_flush() - launder the given pages * * The given pages are laundered. Note that we setup for the start of * I/O ( i.e. busy the page ), mark it read-only, and bump the object * reference count all in here rather then in the parent. If we want * the parent to do more sophisticated things we may have to change * the ordering. * * Returned runlen is the count of pages between mreq and first * page after mreq with status VM_PAGER_AGAIN. * *eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL * for any page in runlen set. */ int vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, boolean_t *eio) { vm_object_t object = mc[0]->object; int pageout_status[count]; int numpagedout = 0; int i, runlen; VM_OBJECT_ASSERT_WLOCKED(object); /* * Initiate I/O. Bump the vm_page_t->busy counter and * mark the pages read-only. * * We do not have to fixup the clean/dirty bits here... we can * allow the pager to do it after the I/O completes. * * NOTE! mc[i]->dirty may be partial or fragmented due to an * edge case with file fragments. */ for (i = 0; i < count; i++) { KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); vm_page_sbusy(mc[i]); pmap_remove_write(mc[i]); } vm_object_pip_add(object, count); vm_pager_put_pages(object, mc, count, flags, pageout_status); runlen = count - mreq; if (eio != NULL) *eio = FALSE; for (i = 0; i < count; i++) { vm_page_t mt = mc[i]; KASSERT(pageout_status[i] == VM_PAGER_PEND || !pmap_page_is_write_mapped(mt), ("vm_pageout_flush: page %p is not write protected", mt)); switch (pageout_status[i]) { case VM_PAGER_OK: case VM_PAGER_PEND: numpagedout++; break; case VM_PAGER_BAD: /* * Page outside of range of object. Right now we * essentially lose the changes by pretending it * worked. */ vm_page_undirty(mt); break; case VM_PAGER_ERROR: case VM_PAGER_FAIL: /* * If page couldn't be paged out, then reactivate the * page so it doesn't clog the inactive list. (We * will try paging out it again later). */ vm_page_lock(mt); vm_page_activate(mt); vm_page_unlock(mt); if (eio != NULL && i >= mreq && i - mreq < runlen) *eio = TRUE; break; case VM_PAGER_AGAIN: if (i >= mreq && i - mreq < runlen) runlen = i - mreq; break; } /* * If the operation is still going, leave the page busy to * block all other accesses. Also, leave the paging in * progress indicator set so that we don't attempt an object * collapse. */ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); vm_page_sunbusy(mt); } } if (prunlen != NULL) *prunlen = runlen; return (numpagedout); } static boolean_t vm_pageout_launder(struct vm_pagequeue *pq, int tries, vm_paddr_t low, vm_paddr_t high) { struct mount *mp; struct vnode *vp; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_tmp, next; int lockmode; vm_pagequeue_lock(pq); TAILQ_FOREACH_SAFE(m, &pq->pq_pl, plinks.q, next) { if ((m->flags & PG_MARKER) != 0) continue; pa = VM_PAGE_TO_PHYS(m); if (pa < low || pa + PAGE_SIZE > high) continue; if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) { vm_page_unlock(m); continue; } object = m->object; if ((!VM_OBJECT_TRYWLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || m->hold_count != 0)) || vm_page_busied(m)) { vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); continue; } vm_page_test_dirty(m); if (m->dirty == 0 && object->ref_count != 0) pmap_remove_all(m); if (m->dirty != 0) { vm_page_unlock(m); if (tries == 0 || (object->flags & OBJ_DEAD) != 0) { VM_OBJECT_WUNLOCK(object); continue; } if (object->type == OBJT_VNODE) { vm_pagequeue_unlock(pq); vp = object->handle; vm_object_reference_locked(object); VM_OBJECT_WUNLOCK(object); (void)vn_start_write(vp, &mp, V_WAIT); lockmode = MNT_SHARED_WRITES(vp->v_mount) ? LK_SHARED : LK_EXCLUSIVE; vn_lock(vp, lockmode | LK_RETRY); VM_OBJECT_WLOCK(object); vm_object_page_clean(object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(object); VOP_UNLOCK(vp, 0); vm_object_deallocate(object); vn_finished_write(mp); return (TRUE); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { vm_pagequeue_unlock(pq); m_tmp = m; vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0, NULL, NULL); VM_OBJECT_WUNLOCK(object); return (TRUE); } } else { /* * Dequeue here to prevent lock recursion in * vm_page_cache(). */ vm_page_dequeue_locked(m); vm_page_cache(m); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(object); } vm_pagequeue_unlock(pq); return (FALSE); } /* * Increase the number of cached pages. The specified value, "tries", * determines which categories of pages are cached: * * 0: All clean, inactive pages within the specified physical address range * are cached. Will not sleep. * 1: The vm_lowmem handlers are called. All inactive pages within * the specified physical address range are cached. May sleep. * 2: The vm_lowmem handlers are called. All inactive and active pages * within the specified physical address range are cached. May sleep. */ void vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high) { int actl, actmax, inactl, inactmax, dom, initial_dom; static int start_dom = 0; if (tries > 0) { /* * Decrease registered cache sizes. The vm_lowmem handlers * may acquire locks and/or sleep, so they can only be invoked * when "tries" is greater than zero. */ SDT_PROBE0(vm, , , vm__lowmem_cache); EVENTHANDLER_INVOKE(vm_lowmem, 0); /* * We do this explicitly after the caches have been drained * above. */ uma_reclaim(); } /* * Make the next scan start on the next domain. */ initial_dom = atomic_fetchadd_int(&start_dom, 1) % vm_ndomains; inactl = 0; inactmax = vm_cnt.v_inactive_count; actl = 0; actmax = tries < 2 ? 0 : vm_cnt.v_active_count; dom = initial_dom; /* * Scan domains in round-robin order, first inactive queues, * then active. Since domain usually owns large physically * contiguous chunk of memory, it makes sense to completely * exhaust one domain before switching to next, while growing * the pool of contiguous physical pages. * * Do not even start launder a domain which cannot contain * the specified address range, as indicated by segments * constituting the domain. */ again: if (inactl < inactmax) { if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs, low, high) && vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_INACTIVE], tries, low, high)) { inactl++; goto again; } if (++dom == vm_ndomains) dom = 0; if (dom != initial_dom) goto again; } if (actl < actmax) { if (vm_phys_domain_intersects(vm_dom[dom].vmd_segs, low, high) && vm_pageout_launder(&vm_dom[dom].vmd_pagequeues[PQ_ACTIVE], tries, low, high)) { actl++; goto again; } if (++dom == vm_ndomains) dom = 0; if (dom != initial_dom) goto again; } } #if !defined(NO_SWAPPING) /* * vm_pageout_object_deactivate_pages * * Deactivate enough pages to satisfy the inactive target * requirements. * * The object and map must be locked. */ static void vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, long desired) { vm_object_t backing_object, object; vm_page_t p; int act_delta, remove_mode; VM_OBJECT_ASSERT_LOCKED(first_object); if ((first_object->flags & OBJ_FICTITIOUS) != 0) return; for (object = first_object;; object = backing_object) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; VM_OBJECT_ASSERT_LOCKED(object); if ((object->flags & OBJ_UNMANAGED) != 0 || object->paging_in_progress != 0) goto unlock_return; remove_mode = 0; if (object->shadow_count > 1) remove_mode = 1; /* * Scan the object's entire memory queue. */ TAILQ_FOREACH(p, &object->memq, listq) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; if (vm_page_busied(p)) continue; PCPU_INC(cnt.v_pdpages); vm_page_lock(p); if (p->wire_count != 0 || p->hold_count != 0 || !pmap_page_exists_quick(pmap, p)) { vm_page_unlock(p); continue; } act_delta = pmap_ts_referenced(p); if ((p->aflags & PGA_REFERENCED) != 0) { if (act_delta == 0) act_delta = 1; vm_page_aflag_clear(p, PGA_REFERENCED); } if (p->queue != PQ_ACTIVE && act_delta != 0) { vm_page_activate(p); p->act_count += act_delta; } else if (p->queue == PQ_ACTIVE) { if (act_delta == 0) { p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { pmap_remove_all(p); vm_page_deactivate(p); } else vm_page_requeue(p); } else { vm_page_activate(p); if (p->act_count < ACT_MAX - ACT_ADVANCE) p->act_count += ACT_ADVANCE; vm_page_requeue(p); } } else if (p->queue == PQ_INACTIVE) pmap_remove_all(p); vm_page_unlock(p); } if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_RLOCK(backing_object); if (object != first_object) VM_OBJECT_RUNLOCK(object); } unlock_return: if (object != first_object) VM_OBJECT_RUNLOCK(object); } /* * deactivate some number of pages in a map, try to do it fairly, but * that is really hard to do. */ static void vm_pageout_map_deactivate_pages(map, desired) vm_map_t map; long desired; { vm_map_entry_t tmpe; vm_object_t obj, bigobj; int nothingwired; if (!vm_map_trylock(map)) return; bigobj = NULL; nothingwired = TRUE; /* * first, search out the biggest object, and try to free pages from * that. */ tmpe = map->header.next; while (tmpe != &map->header) { if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) { if (obj->shadow_count <= 1 && (bigobj == NULL || bigobj->resident_page_count < obj->resident_page_count)) { if (bigobj != NULL) VM_OBJECT_RUNLOCK(bigobj); bigobj = obj; } else VM_OBJECT_RUNLOCK(obj); } } if (tmpe->wired_count > 0) nothingwired = FALSE; tmpe = tmpe->next; } if (bigobj != NULL) { vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired); VM_OBJECT_RUNLOCK(bigobj); } /* * Next, hunt around for other pages to deactivate. We actually * do this search sort of wrong -- .text first is not the best idea. */ tmpe = map->header.next; while (tmpe != &map->header) { if (pmap_resident_count(vm_map_pmap(map)) <= desired) break; if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL) { VM_OBJECT_RLOCK(obj); vm_pageout_object_deactivate_pages(map->pmap, obj, desired); VM_OBJECT_RUNLOCK(obj); } } tmpe = tmpe->next; } /* * Remove all mappings if a process is swapped out, this will free page * table pages. */ if (desired == 0 && nothingwired) { pmap_remove(vm_map_pmap(map), vm_map_min(map), vm_map_max(map)); } vm_map_unlock(map); } #endif /* !defined(NO_SWAPPING) */ /* * Attempt to acquire all of the necessary locks to launder a page and * then call through the clustering layer to PUTPAGES. Wait a short * time for a vnode lock. * * Requires the page and object lock on entry, releases both before return. * Returns 0 on success and an errno otherwise. */ static int vm_pageout_clean(vm_page_t m) { struct vnode *vp; struct mount *mp; vm_object_t object; vm_pindex_t pindex; int error, lockmode; vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); error = 0; vp = NULL; mp = NULL; /* * The object is already known NOT to be dead. It * is possible for the vget() to block the whole * pageout daemon, but the new low-memory handling * code should prevent it. * * We can't wait forever for the vnode lock, we might * deadlock due to a vn_read() getting stuck in * vm_wait while holding this vnode. We skip the * vnode if we can't get it in a reasonable amount * of time. */ if (object->type == OBJT_VNODE) { vm_page_unlock(m); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { mp = NULL; error = EDEADLK; goto unlock_all; } KASSERT(mp != NULL, ("vp %p with NULL v_mount", vp)); vm_object_reference_locked(object); pindex = m->pindex; VM_OBJECT_WUNLOCK(object); lockmode = MNT_SHARED_WRITES(vp->v_mount) ? LK_SHARED : LK_EXCLUSIVE; if (vget(vp, lockmode | LK_TIMELOCK, curthread)) { vp = NULL; error = EDEADLK; goto unlock_mp; } VM_OBJECT_WLOCK(object); vm_page_lock(m); /* * While the object and page were unlocked, the page * may have been: * (1) moved to a different queue, * (2) reallocated to a different object, * (3) reallocated to a different offset, or * (4) cleaned. */ if (m->queue != PQ_INACTIVE || m->object != object || m->pindex != pindex || m->dirty == 0) { vm_page_unlock(m); error = ENXIO; goto unlock_all; } /* * The page may have been busied or held while the object * and page locks were released. */ if (vm_page_busied(m) || m->hold_count != 0) { vm_page_unlock(m); error = EBUSY; goto unlock_all; } } /* * If a page is dirty, then it is either being washed * (but not yet cleaned) or it is still in the * laundry. If it is still in the laundry, then we * start the cleaning operation. */ if (vm_pageout_cluster(m) == 0) error = EIO; unlock_all: VM_OBJECT_WUNLOCK(object); unlock_mp: vm_page_lock_assert(m, MA_NOTOWNED); if (mp != NULL) { if (vp != NULL) vput(vp); vm_object_deallocate(object); vn_finished_write(mp); } return (error); } /* * vm_pageout_scan does the dirty work for the pageout daemon. * * pass 0 - Update active LRU/deactivate pages * pass 1 - Move inactive to cache or free * pass 2 - Launder dirty pages */ static void vm_pageout_scan(struct vm_domain *vmd, int pass) { vm_page_t m, next; struct vm_pagequeue *pq; vm_object_t object; long min_scan; int act_delta, addl_page_shortage, deficit, error, maxlaunder, maxscan; - int page_shortage, scan_tick, scanned, vnodes_skipped; + int page_shortage, scan_tick, scanned, starting_page_shortage; + int vnodes_skipped; boolean_t pageout_ok, queues_locked; /* * If we need to reclaim memory ask kernel caches to return * some. We rate limit to avoid thrashing. */ if (vmd == &vm_dom[0] && pass > 0 && (time_uptime - lowmem_uptime) >= lowmem_period) { /* * Decrease registered cache sizes. */ SDT_PROBE0(vm, , , vm__lowmem_scan); EVENTHANDLER_INVOKE(vm_lowmem, 0); /* * We do this explicitly after the caches have been * drained above. */ uma_reclaim(); lowmem_uptime = time_uptime; } /* * The addl_page_shortage is the number of temporarily * stuck pages in the inactive queue. In other words, the * number of pages from the inactive count that should be * discounted in setting the target for the active queue scan. */ addl_page_shortage = 0; /* * Calculate the number of pages we want to either free or move * to the cache. */ if (pass > 0) { deficit = atomic_readandclear_int(&vm_pageout_deficit); page_shortage = vm_paging_target() + deficit; } else page_shortage = deficit = 0; + starting_page_shortage = page_shortage; /* * maxlaunder limits the number of dirty pages we flush per scan. * For most systems a smaller value (16 or 32) is more robust under * extreme memory and disk pressure because any unnecessary writes * to disk can result in extreme performance degredation. However, * systems with excessive dirty pages (especially when MAP_NOSYNC is * used) will die horribly with limited laundering. If the pageout * daemon cannot clean enough pages in the first pass, we let it go * all out in succeeding passes. */ if ((maxlaunder = vm_max_launder) <= 1) maxlaunder = 1; if (pass > 1) maxlaunder = 10000; vnodes_skipped = 0; /* * Start scanning the inactive queue for pages we can move to the * cache or free. The scan will stop when the target is reached or * we have scanned the entire inactive queue. Note that m->act_count * is not used to form decisions for the inactive queue, only for the * active queue. */ pq = &vmd->vmd_pagequeues[PQ_INACTIVE]; maxscan = pq->pq_cnt; vm_pagequeue_lock(pq); queues_locked = TRUE; for (m = TAILQ_FIRST(&pq->pq_pl); m != NULL && maxscan-- > 0 && page_shortage > 0; m = next) { vm_pagequeue_assert_locked(pq); KASSERT(queues_locked, ("unlocked queues")); KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m)); PCPU_INC(cnt.v_pdpages); next = TAILQ_NEXT(m, plinks.q); /* * skip marker pages */ if (m->flags & PG_MARKER) continue; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("Fictitious page %p cannot be in inactive queue", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("Unmanaged page %p cannot be in inactive queue", m)); /* * The page or object lock acquisitions fail if the * page was removed from the queue or moved to a * different position within the queue. In either * case, addl_page_shortage should not be incremented. */ if (!vm_pageout_page_lock(m, &next)) goto unlock_page; else if (m->hold_count != 0) { /* * Held pages are essentially stuck in the * queue. So, they ought to be discounted * from the inactive count. See the * calculation of the page_shortage for the * loop over the active queue below. */ addl_page_shortage++; goto unlock_page; } object = m->object; if (!VM_OBJECT_TRYWLOCK(object)) { if (!vm_pageout_fallback_object_lock(m, &next)) goto unlock_object; else if (m->hold_count != 0) { addl_page_shortage++; goto unlock_object; } } if (vm_page_busied(m)) { /* * Don't mess with busy pages. Leave them at * the front of the queue. Most likely, they * are being paged out and will leave the * queue shortly after the scan finishes. So, * they ought to be discounted from the * inactive count. */ addl_page_shortage++; unlock_object: VM_OBJECT_WUNLOCK(object); unlock_page: vm_page_unlock(m); continue; } KASSERT(m->hold_count == 0, ("Held page %p", m)); /* * We unlock the inactive page queue, invalidating the * 'next' pointer. Use our marker to remember our * place. */ TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q); vm_pagequeue_unlock(pq); queues_locked = FALSE; /* * Invalid pages can be easily freed. They cannot be * mapped, vm_page_free() asserts this. */ if (m->valid == 0) goto free_page; /* * If the page has been referenced and the object is not dead, * reactivate or requeue the page depending on whether the * object is mapped. */ if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta = 1; } else act_delta = 0; if (object->ref_count != 0) { act_delta += pmap_ts_referenced(m); } else { KASSERT(!pmap_page_is_mapped(m), ("vm_pageout_scan: page %p is mapped", m)); } if (act_delta != 0) { if (object->ref_count != 0) { vm_page_activate(m); /* * Increase the activation count if the page * was referenced while in the inactive queue. * This makes it less likely that the page will * be returned prematurely to the inactive * queue. */ m->act_count += act_delta + ACT_ADVANCE; goto drop_page; } else if ((object->flags & OBJ_DEAD) == 0) goto requeue_page; } /* * If the page appears to be clean at the machine-independent * layer, then remove all of its mappings from the pmap in * anticipation of placing it onto the cache queue. If, * however, any of the page's mappings allow write access, * then the page may still be modified until the last of those * mappings are removed. */ if (object->ref_count != 0) { vm_page_test_dirty(m); if (m->dirty == 0) pmap_remove_all(m); } if (m->dirty == 0) { /* * Clean pages can be freed. */ free_page: vm_page_free(m); PCPU_INC(cnt.v_dfree); --page_shortage; } else if ((object->flags & OBJ_DEAD) != 0) { /* * Leave dirty pages from dead objects at the front of * the queue. They are being paged out and freed by * the thread that destroyed the object. They will * leave the queue shortly after the scan finishes, so * they should be discounted from the inactive count. */ addl_page_shortage++; } else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) { /* * Dirty pages need to be paged out, but flushing * a page is extremely expensive versus freeing * a clean page. Rather then artificially limiting * the number of pages we can flush, we instead give * dirty pages extra priority on the inactive queue * by forcing them to be cycled through the queue * twice before being flushed, after which the * (now clean) page will cycle through once more * before being freed. This significantly extends * the thrash point for a heavily loaded machine. */ m->flags |= PG_WINATCFLS; requeue_page: vm_pagequeue_lock(pq); queues_locked = TRUE; vm_page_requeue_locked(m); } else if (maxlaunder > 0) { /* * We always want to try to flush some dirty pages if * we encounter them, to keep the system stable. * Normally this number is small, but under extreme * pressure where there are insufficient clean pages * on the inactive queue, we may have to go all out. */ if (object->type != OBJT_SWAP && object->type != OBJT_DEFAULT) pageout_ok = TRUE; else if (disable_swap_pageouts) pageout_ok = FALSE; else if (defer_swap_pageouts) pageout_ok = vm_page_count_min(); else pageout_ok = TRUE; if (!pageout_ok) goto requeue_page; error = vm_pageout_clean(m); /* * Decrement page_shortage on success to account for * the (future) cleaned page. Otherwise we could wind * up laundering or cleaning too many pages. */ if (error == 0) { page_shortage--; maxlaunder--; } else if (error == EDEADLK) { pageout_lock_miss++; vnodes_skipped++; } else if (error == EBUSY) { addl_page_shortage++; } vm_page_lock_assert(m, MA_NOTOWNED); goto relock_queues; } drop_page: vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); relock_queues: if (!queues_locked) { vm_pagequeue_lock(pq); queues_locked = TRUE; } next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q); TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q); } vm_pagequeue_unlock(pq); #if !defined(NO_SWAPPING) /* * Wakeup the swapout daemon if we didn't cache or free the targeted * number of pages. */ if (vm_swap_enabled && page_shortage > 0) vm_req_vmdaemon(VM_SWAP_NORMAL); #endif /* * Wakeup the sync daemon if we skipped a vnode in a writeable object * and we didn't cache or free enough pages. */ if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target - vm_cnt.v_free_min) (void)speedup_syncer(); /* + * If the inactive queue scan fails repeatedly to meet its + * target, kill the largest process. + */ + vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage); + + /* * Compute the number of pages we want to try to move from the * active queue to the inactive queue. */ page_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count + vm_paging_target() + deficit + addl_page_shortage; pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; vm_pagequeue_lock(pq); maxscan = pq->pq_cnt; /* * If we're just idle polling attempt to visit every * active page within 'update_period' seconds. */ scan_tick = ticks; if (vm_pageout_update_period != 0) { min_scan = pq->pq_cnt; min_scan *= scan_tick - vmd->vmd_last_active_scan; min_scan /= hz * vm_pageout_update_period; } else min_scan = 0; if (min_scan > 0 || (page_shortage > 0 && maxscan > 0)) vmd->vmd_last_active_scan = scan_tick; /* * Scan the active queue for pages that can be deactivated. Update * the per-page activity counter and use it to identify deactivation * candidates. */ for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned < min_scan || (page_shortage > 0 && scanned < maxscan)); m = next, scanned++) { KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_scan: page %p isn't active", m)); next = TAILQ_NEXT(m, plinks.q); if ((m->flags & PG_MARKER) != 0) continue; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("Fictitious page %p cannot be in active queue", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("Unmanaged page %p cannot be in active queue", m)); if (!vm_pageout_page_lock(m, &next)) { vm_page_unlock(m); continue; } /* * The count for pagedaemon pages is done after checking the * page for eligibility... */ PCPU_INC(cnt.v_pdpages); /* * Check to see "how much" the page has been used. */ if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); act_delta = 1; } else act_delta = 0; /* * Unlocked object ref count check. Two races are possible. * 1) The ref was transitioning to zero and we saw non-zero, * the pmap bits will be checked unnecessarily. * 2) The ref was transitioning to one and we saw zero. * The page lock prevents a new reference to this page so * we need not check the reference bits. */ if (m->object->ref_count != 0) act_delta += pmap_ts_referenced(m); /* * Advance or decay the act_count based on recent usage. */ if (act_delta != 0) { m->act_count += ACT_ADVANCE + act_delta; if (m->act_count > ACT_MAX) m->act_count = ACT_MAX; } else m->act_count -= min(m->act_count, ACT_DECLINE); /* * Move this page to the tail of the active or inactive * queue depending on usage. */ if (m->act_count == 0) { /* Dequeue to avoid later lock recursion. */ vm_page_dequeue_locked(m); vm_page_deactivate(m); page_shortage--; } else vm_page_requeue_locked(m); vm_page_unlock(m); } vm_pagequeue_unlock(pq); #if !defined(NO_SWAPPING) /* * Idle process swapout -- run once per second. */ if (vm_swap_idle_enabled) { static long lsec; if (time_second != lsec) { vm_req_vmdaemon(VM_SWAP_IDLE); lsec = time_second; } } #endif - - /* - * If we are critically low on one of RAM or swap and low on - * the other, kill the largest process. However, we avoid - * doing this on the first pass in order to give ourselves a - * chance to flush out dirty vnode-backed pages and to allow - * active pages to be moved to the inactive queue and reclaimed. - */ - vm_pageout_mightbe_oom(vmd, pass); } static int vm_pageout_oom_vote; /* * The pagedaemon threads randlomly select one to perform the * OOM. Trying to kill processes before all pagedaemons * failed to reach free target is premature. */ static void -vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass) +vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage, + int starting_page_shortage) { int old_vote; - if (pass <= 1 || !((swap_pager_avail < 64 && vm_page_count_min()) || - (swap_pager_full && vm_paging_target() > 0))) { + if (starting_page_shortage <= 0 || starting_page_shortage != + page_shortage) + vmd->vmd_oom_seq = 0; + else + vmd->vmd_oom_seq++; + if (vmd->vmd_oom_seq < vm_pageout_oom_seq) { if (vmd->vmd_oom) { vmd->vmd_oom = FALSE; atomic_subtract_int(&vm_pageout_oom_vote, 1); } return; } + /* + * Do not follow the call sequence until OOM condition is + * cleared. + */ + vmd->vmd_oom_seq = 0; + if (vmd->vmd_oom) return; vmd->vmd_oom = TRUE; old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1); if (old_vote != vm_ndomains - 1) return; /* * The current pagedaemon thread is the last in the quorum to * start OOM. Initiate the selection and signaling of the * victim. */ vm_pageout_oom(VM_OOM_MEM); /* * After one round of OOM terror, recall our vote. On the * next pass, current pagedaemon would vote again if the low * memory condition is still there, due to vmd_oom being * false. */ vmd->vmd_oom = FALSE; atomic_subtract_int(&vm_pageout_oom_vote, 1); } +/* + * The OOM killer is the page daemon's action of last resort when + * memory allocation requests have been stalled for a prolonged period + * of time because it cannot reclaim memory. This function computes + * the approximate number of physical pages that could be reclaimed if + * the specified address space is destroyed. + * + * Private, anonymous memory owned by the address space is the + * principal resource that we expect to recover after an OOM kill. + * Since the physical pages mapped by the address space's COW entries + * are typically shared pages, they are unlikely to be released and so + * they are not counted. + * + * To get to the point where the page daemon runs the OOM killer, its + * efforts to write-back vnode-backed pages may have stalled. This + * could be caused by a memory allocation deadlock in the write path + * that might be resolved by an OOM kill. Therefore, physical pages + * belonging to vnode-backed objects are counted, because they might + * be freed without being written out first if the address space holds + * the last reference to an unlinked vnode. + * + * Similarly, physical pages belonging to OBJT_PHYS objects are + * counted because the address space might hold the last reference to + * the object. + */ +static long +vm_pageout_oom_pagecount(struct vmspace *vmspace) +{ + vm_map_t map; + vm_map_entry_t entry; + vm_object_t obj; + long res; + + map = &vmspace->vm_map; + KASSERT(!map->system_map, ("system map")); + sx_assert(&map->lock, SA_LOCKED); + res = 0; + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) + continue; + obj = entry->object.vm_object; + if (obj == NULL) + continue; + if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 && + obj->ref_count != 1) + continue; + switch (obj->type) { + case OBJT_DEFAULT: + case OBJT_SWAP: + case OBJT_PHYS: + case OBJT_VNODE: + res += obj->resident_page_count; + break; + } + } + return (res); +} + void vm_pageout_oom(int shortage) { struct proc *p, *bigproc; vm_offset_t size, bigsize; struct thread *td; struct vmspace *vm; /* * We keep the process bigproc locked once we find it to keep anyone * from messing with it; however, there is a possibility of * deadlock if process B is bigproc and one of it's child processes * attempts to propagate a signal to B while we are waiting for A's * lock while walking this list. To avoid this, we don't block on * the process lock but just skip a process if it is already locked. */ bigproc = NULL; bigsize = 0; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { int breakout; PROC_LOCK(p); /* * If this is a system, protected or killed process, skip it. */ if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC | P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 || p->p_pid == 1 || P_KILLED(p) || (p->p_pid < 48 && swap_pager_avail != 0)) { PROC_UNLOCK(p); continue; } /* * If the process is in a non-running type state, * don't touch it. Check all the threads individually. */ breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td) && - !TD_IS_SUSPENDED(td)) { + !TD_IS_SUSPENDED(td) && + !TD_IS_SWAPPED(td)) { thread_unlock(td); breakout = 1; break; } thread_unlock(td); } if (breakout) { PROC_UNLOCK(p); continue; } /* * get the process size */ vm = vmspace_acquire_ref(p); if (vm == NULL) { PROC_UNLOCK(p); continue; } _PHOLD(p); if (!vm_map_trylock_read(&vm->vm_map)) { _PRELE(p); PROC_UNLOCK(p); vmspace_free(vm); continue; } PROC_UNLOCK(p); size = vmspace_swap_count(vm); - vm_map_unlock_read(&vm->vm_map); if (shortage == VM_OOM_MEM) - size += vmspace_resident_count(vm); + size += vm_pageout_oom_pagecount(vm); + vm_map_unlock_read(&vm->vm_map); vmspace_free(vm); + /* - * if the this process is bigger than the biggest one + * If this process is bigger than the biggest one, * remember it. */ if (size > bigsize) { if (bigproc != NULL) PRELE(bigproc); bigproc = p; bigsize = size; } else { PRELE(p); } } sx_sunlock(&allproc_lock); if (bigproc != NULL) { if (vm_panic_on_oom != 0) panic("out of swap space"); PROC_LOCK(bigproc); killproc(bigproc, "out of swap space"); sched_nice(bigproc, PRIO_MIN); _PRELE(bigproc); PROC_UNLOCK(bigproc); wakeup(&vm_cnt.v_free_count); } } static void vm_pageout_worker(void *arg) { struct vm_domain *domain; int domidx; domidx = (uintptr_t)arg; domain = &vm_dom[domidx]; /* * XXXKIB It could be useful to bind pageout daemon threads to * the cores belonging to the domain, from which vm_page_array * is allocated. */ KASSERT(domain->vmd_segs != 0, ("domain without segments")); domain->vmd_last_active_scan = ticks; vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE); vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE); TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl, &domain->vmd_inacthead, plinks.q); /* * The pageout daemon worker is never done, so loop forever. */ while (TRUE) { /* * If we have enough free memory, wakeup waiters. Do * not clear vm_pages_needed until we reach our target, * otherwise we may be woken up over and over again and * waste a lot of cpu. */ mtx_lock(&vm_page_queue_free_mtx); if (vm_pages_needed && !vm_page_count_min()) { if (!vm_paging_needed()) vm_pages_needed = 0; wakeup(&vm_cnt.v_free_count); } if (vm_pages_needed) { /* * We're still not done. Either vm_pages_needed was * set by another thread during the previous scan * (typically, this happens during a level 0 scan) or * vm_pages_needed was already set and the scan failed * to free enough pages. If we haven't yet performed * a level >= 2 scan (unlimited dirty cleaning), then * upgrade the level and scan again now. Otherwise, * sleep a bit and try again later. While sleeping, * vm_pages_needed can be cleared. */ if (domain->vmd_pass > 1) msleep(&vm_pages_needed, &vm_page_queue_free_mtx, PVM, "psleep", hz / 2); } else { /* * Good enough, sleep until required to refresh * stats. */ msleep(&vm_pages_needed, &vm_page_queue_free_mtx, PVM, "psleep", hz); } if (vm_pages_needed) { vm_cnt.v_pdwakeups++; domain->vmd_pass++; } else domain->vmd_pass = 0; mtx_unlock(&vm_page_queue_free_mtx); vm_pageout_scan(domain, domain->vmd_pass); } } /* * vm_pageout_init initialises basic pageout daemon settings. */ static void vm_pageout_init(void) { /* * Initialize some paging parameters. */ vm_cnt.v_interrupt_free_min = 2; if (vm_cnt.v_page_count < 2000) vm_pageout_page_count = 8; /* * v_free_reserved needs to include enough for the largest * swap pager structures plus enough for any pv_entry structs * when paging. */ if (vm_cnt.v_page_count > 1024) vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200; else vm_cnt.v_free_min = 4; vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + vm_cnt.v_interrupt_free_min; vm_cnt.v_free_reserved = vm_pageout_page_count + vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768); vm_cnt.v_free_severe = vm_cnt.v_free_min / 2; vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved; vm_cnt.v_free_min += vm_cnt.v_free_reserved; vm_cnt.v_free_severe += vm_cnt.v_free_reserved; vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2; if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3) vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3; /* * Set the default wakeup threshold to be 10% above the minimum * page limit. This keeps the steady state out of shortfall. */ vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11; /* * Set interval in seconds for active scan. We want to visit each * page at least once every ten minutes. This is to prevent worst * case paging behaviors with stale active LRU. */ if (vm_pageout_update_period == 0) vm_pageout_update_period = 600; /* XXX does not really belong here */ if (vm_page_max_wired == 0) vm_page_max_wired = vm_cnt.v_free_count / 3; } /* * vm_pageout is the high level pageout daemon. */ static void vm_pageout(void) { int error; #if MAXMEMDOM > 1 int i; #endif swap_pager_swap_init(); #if MAXMEMDOM > 1 for (i = 1; i < vm_ndomains; i++) { error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i, curproc, NULL, 0, 0, "dom%d", i); if (error != 0) { panic("starting pageout for domain %d, error %d\n", i, error); } } #endif error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL, 0, 0, "uma"); if (error != 0) panic("starting uma_reclaim helper, error %d\n", error); vm_pageout_worker((void *)(uintptr_t)0); } /* * Unless the free page queue lock is held by the caller, this function * should be regarded as advisory. Specifically, the caller should * not msleep() on &vm_cnt.v_free_count following this function unless * the free page queue lock is held until the msleep() is performed. */ void pagedaemon_wakeup(void) { if (!vm_pages_needed && curthread->td_proc != pageproc) { vm_pages_needed = 1; wakeup(&vm_pages_needed); } } #if !defined(NO_SWAPPING) static void vm_req_vmdaemon(int req) { static int lastrun = 0; mtx_lock(&vm_daemon_mtx); vm_pageout_req_swapout |= req; if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { wakeup(&vm_daemon_needed); lastrun = ticks; } mtx_unlock(&vm_daemon_mtx); } static void vm_daemon(void) { struct rlimit rsslim; struct proc *p; struct thread *td; struct vmspace *vm; int breakout, swapout_flags, tryagain, attempts; #ifdef RACCT uint64_t rsize, ravailable; #endif while (TRUE) { mtx_lock(&vm_daemon_mtx); msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", #ifdef RACCT racct_enable ? hz : 0 #else 0 #endif ); swapout_flags = vm_pageout_req_swapout; vm_pageout_req_swapout = 0; mtx_unlock(&vm_daemon_mtx); if (swapout_flags) swapout_procs(swapout_flags); /* * scan the processes for exceeding their rlimits or if * process is swapped out -- deactivate pages */ tryagain = 0; attempts = 0; again: attempts++; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { vm_pindex_t limit, size; /* * if this is a system process or if we have already * looked at this process, skip it. */ PROC_LOCK(p); if (p->p_state != PRS_NORMAL || p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) { PROC_UNLOCK(p); continue; } /* * if the process is in a non-running type state, * don't touch it. */ breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td) && !TD_IS_SUSPENDED(td)) { thread_unlock(td); breakout = 1; break; } thread_unlock(td); } if (breakout) { PROC_UNLOCK(p); continue; } /* * get a limit */ lim_rlimit_proc(p, RLIMIT_RSS, &rsslim); limit = OFF_TO_IDX( qmin(rsslim.rlim_cur, rsslim.rlim_max)); /* * let processes that are swapped out really be * swapped out set the limit to nothing (will force a * swap-out.) */ if ((p->p_flag & P_INMEM) == 0) limit = 0; /* XXX */ vm = vmspace_acquire_ref(p); PROC_UNLOCK(p); if (vm == NULL) continue; size = vmspace_resident_count(vm); if (size >= limit) { vm_pageout_map_deactivate_pages( &vm->vm_map, limit); } #ifdef RACCT if (racct_enable) { rsize = IDX_TO_OFF(size); PROC_LOCK(p); racct_set(p, RACCT_RSS, rsize); ravailable = racct_get_available(p, RACCT_RSS); PROC_UNLOCK(p); if (rsize > ravailable) { /* * Don't be overly aggressive; this * might be an innocent process, * and the limit could've been exceeded * by some memory hog. Don't try * to deactivate more than 1/4th * of process' resident set size. */ if (attempts <= 8) { if (ravailable < rsize - (rsize / 4)) { ravailable = rsize - (rsize / 4); } } vm_pageout_map_deactivate_pages( &vm->vm_map, OFF_TO_IDX(ravailable)); /* Update RSS usage after paging out. */ size = vmspace_resident_count(vm); rsize = IDX_TO_OFF(size); PROC_LOCK(p); racct_set(p, RACCT_RSS, rsize); PROC_UNLOCK(p); if (rsize > ravailable) tryagain = 1; } } #endif vmspace_free(vm); } sx_sunlock(&allproc_lock); if (tryagain != 0 && attempts <= 10) goto again; } } #endif /* !defined(NO_SWAPPING) */ Index: projects/powernv =================================================================== --- projects/powernv (revision 290990) +++ projects/powernv (revision 290991) Property changes on: projects/powernv ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r290829-290990