Index: stable/11/sys/dev/ntb/ntb.c =================================================================== --- stable/11/sys/dev/ntb/ntb.c (revision 323454) +++ stable/11/sys/dev/ntb/ntb.c (revision 323455) @@ -1,477 +1,520 @@ /*- - * Copyright (c) 2016 Alexander Motin + * Copyright (c) 2016-2017 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "ntb.h" devclass_t ntb_hw_devclass; SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls"); struct ntb_child { device_t dev; + int function; int enabled; int mwoff; int mwcnt; int spadoff; int spadcnt; int dboff; - int dbmask; + int dbcnt; + uint64_t dbmask; void *ctx; const struct ntb_ctx_ops *ctx_ops; struct rmlock ctx_lock; struct ntb_child *next; }; int ntb_register_device(device_t dev) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; int i, mw, mwu, mwt, spad, spadu, spadt, db, dbu, dbt; char cfg[128] = ""; char buf[32]; char *n, *np, *c, *p, *name; mwu = 0; mwt = NTB_MW_COUNT(dev); spadu = 0; spadt = NTB_SPAD_COUNT(dev); dbu = 0; dbt = flsll(NTB_DB_VALID_MASK(dev)); device_printf(dev, "%d memory windows, %d scratchpads, " "%d doorbells\n", mwt, spadt, dbt); snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev), device_get_unit(dev)); TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg)); n = cfg; i = 0; while ((c = strsep(&n, ",")) != NULL) { np = c; name = strsep(&np, ":"); if (name != NULL && name[0] == 0) name = NULL; p = strsep(&np, ":"); mw = (p && p[0] != 0) ? strtol(p, NULL, 10) : mwt - mwu; p = strsep(&np, ":"); spad = (p && p[0] != 0) ? strtol(p, NULL, 10) : spadt - spadu; db = (np && np[0] != 0) ? strtol(np, NULL, 10) : dbt - dbu; if (mw > mwt - mwu || spad > spadt - spadu || db > dbt - dbu) { device_printf(dev, "Not enough resources for config\n"); break; } nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO); + nc->function = i; nc->mwoff = mwu; nc->mwcnt = mw; nc->spadoff = spadu; nc->spadcnt = spad; nc->dboff = dbu; + nc->dbcnt = db; nc->dbmask = (db == 0) ? 0 : (0xffffffffffffffff >> (64 - db)); rm_init(&nc->ctx_lock, "ntb ctx"); nc->dev = device_add_child(dev, name, -1); if (nc->dev == NULL) { ntb_unregister_device(dev); return (ENOMEM); } device_set_ivars(nc->dev, nc); *cpp = nc; cpp = &nc->next; if (bootverbose) { device_printf(dev, "%d \"%s\":", i, name); if (mw > 0) { printf(" memory windows %d", mwu); if (mw > 1) printf("-%d", mwu + mw - 1); } if (spad > 0) { printf(" scratchpads %d", spadu); if (spad > 1) printf("-%d", spadu + spad - 1); } if (db > 0) { printf(" doorbells %d", dbu); if (db > 1) printf("-%d", dbu + db - 1); } printf("\n"); } mwu += mw; spadu += spad; dbu += db; i++; } bus_generic_attach(dev); return (0); } int ntb_unregister_device(device_t dev) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; int error = 0; while ((nc = *cpp) != NULL) { *cpp = (*cpp)->next; error = device_delete_child(dev, nc->dev); if (error) break; rm_destroy(&nc->ctx_lock); free(nc, M_DEVBUF); } return (error); +} + +int +ntb_child_location_str(device_t dev, device_t child, char *buf, + size_t buflen) +{ + struct ntb_child *nc = device_get_ivars(child); + + snprintf(buf, buflen, "function=%d", nc->function); + return (0); +} + +int +ntb_print_child(device_t dev, device_t child) +{ + struct ntb_child *nc = device_get_ivars(child); + int retval; + + retval = bus_print_child_header(dev, child); + if (nc->mwcnt > 0) { + printf(" mw %d", nc->mwoff); + if (nc->mwcnt > 1) + printf("-%d", nc->mwoff + nc->mwcnt - 1); + } + if (nc->spadcnt > 0) { + printf(" spad %d", nc->spadoff); + if (nc->spadcnt > 1) + printf("-%d", nc->spadoff + nc->spadcnt - 1); + } + if (nc->dbcnt > 0) { + printf(" db %d", nc->dboff); + if (nc->dbcnt > 1) + printf("-%d", nc->dboff + nc->dbcnt - 1); + } + retval += printf(" at function %d", nc->function); + retval += bus_print_child_domain(dev, child); + retval += bus_print_child_footer(dev, child); + + return (retval); } void ntb_link_event(device_t dev) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; struct rm_priotracker ctx_tracker; enum ntb_speed speed; enum ntb_width width; if (NTB_LINK_IS_UP(dev, &speed, &width)) { device_printf(dev, "Link is up (PCIe %d.x / x%d)\n", (int)speed, (int)width); } else { device_printf(dev, "Link is down\n"); } for (nc = *cpp; nc != NULL; nc = nc->next) { rm_rlock(&nc->ctx_lock, &ctx_tracker); if (nc->ctx_ops != NULL && nc->ctx_ops->link_event != NULL) nc->ctx_ops->link_event(nc->ctx); rm_runlock(&nc->ctx_lock, &ctx_tracker); } } void ntb_db_event(device_t dev, uint32_t vec) { struct ntb_child **cpp = device_get_softc(dev); struct ntb_child *nc; struct rm_priotracker ctx_tracker; for (nc = *cpp; nc != NULL; nc = nc->next) { rm_rlock(&nc->ctx_lock, &ctx_tracker); if (nc->ctx_ops != NULL && nc->ctx_ops->db_event != NULL) nc->ctx_ops->db_event(nc->ctx, vec); rm_runlock(&nc->ctx_lock, &ctx_tracker); } } bool ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width) { return (NTB_LINK_IS_UP(device_get_parent(ntb), speed, width)); } int ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width) { struct ntb_child *nc = device_get_ivars(ntb); struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev)); struct ntb_child *nc1; for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) { if (nc1->enabled) { nc->enabled = 1; return (0); } } nc->enabled = 1; return (NTB_LINK_ENABLE(device_get_parent(ntb), speed, width)); } int ntb_link_disable(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev)); struct ntb_child *nc1; if (!nc->enabled) return (0); nc->enabled = 0; for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) { if (nc1->enabled) return (0); } return (NTB_LINK_DISABLE(device_get_parent(ntb))); } bool ntb_link_enabled(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->enabled && NTB_LINK_ENABLED(device_get_parent(ntb))); } int ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops) { struct ntb_child *nc = device_get_ivars(ntb); if (ctx == NULL || ctx_ops == NULL) return (EINVAL); rm_wlock(&nc->ctx_lock); if (nc->ctx_ops != NULL) { rm_wunlock(&nc->ctx_lock); return (EINVAL); } nc->ctx = ctx; nc->ctx_ops = ctx_ops; /* * If applicaiton driver asks for link events, generate fake one now * to let it update link state without races while we hold the lock. */ if (ctx_ops->link_event != NULL) ctx_ops->link_event(ctx); rm_wunlock(&nc->ctx_lock); return (0); } void * ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops) { struct ntb_child *nc = device_get_ivars(ntb); KASSERT(nc->ctx != NULL && nc->ctx_ops != NULL, ("bogus")); if (ctx_ops != NULL) *ctx_ops = nc->ctx_ops; return (nc->ctx); } void ntb_clear_ctx(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); rm_wlock(&nc->ctx_lock); nc->ctx = NULL; nc->ctx_ops = NULL; rm_wunlock(&nc->ctx_lock); } uint8_t ntb_mw_count(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->mwcnt); } int ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_GET_RANGE(device_get_parent(ntb), mw_idx + nc->mwoff, base, vbase, size, align, align_size, plimit)); } int ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_SET_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff, addr, size)); } int ntb_mw_clear_trans(device_t ntb, unsigned mw_idx) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_CLEAR_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff)); } int ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_GET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode)); } int ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_MW_SET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode)); } uint8_t ntb_spad_count(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->spadcnt); } void ntb_spad_clear(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); unsigned i; for (i = 0; i < nc->spadcnt; i++) NTB_SPAD_WRITE(device_get_parent(ntb), i + nc->spadoff, 0); } int ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val)); } int ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val)); } int ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_PEER_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val)); } int ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_PEER_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val)); } uint64_t ntb_db_valid_mask(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return (nc->dbmask); } int ntb_db_vector_count(device_t ntb) { return (NTB_DB_VECTOR_COUNT(device_get_parent(ntb))); } uint64_t ntb_db_vector_mask(device_t ntb, uint32_t vector) { struct ntb_child *nc = device_get_ivars(ntb); return ((NTB_DB_VECTOR_MASK(device_get_parent(ntb), vector) >> nc->dboff) & nc->dbmask); } int ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size) { return (NTB_PEER_DB_ADDR(device_get_parent(ntb), db_addr, db_size)); } void ntb_db_clear(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_DB_CLEAR(device_get_parent(ntb), bits << nc->dboff)); } void ntb_db_clear_mask(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_DB_CLEAR_MASK(device_get_parent(ntb), bits << nc->dboff)); } uint64_t ntb_db_read(device_t ntb) { struct ntb_child *nc = device_get_ivars(ntb); return ((NTB_DB_READ(device_get_parent(ntb)) >> nc->dboff) & nc->dbmask); } void ntb_db_set_mask(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_DB_SET_MASK(device_get_parent(ntb), bits << nc->dboff)); } void ntb_peer_db_set(device_t ntb, uint64_t bits) { struct ntb_child *nc = device_get_ivars(ntb); return (NTB_PEER_DB_SET(device_get_parent(ntb), bits << nc->dboff)); } MODULE_VERSION(ntb, 1); Index: stable/11/sys/dev/ntb/ntb.h =================================================================== --- stable/11/sys/dev/ntb/ntb.h (revision 323454) +++ stable/11/sys/dev/ntb/ntb.h (revision 323455) @@ -1,409 +1,412 @@ /*- * Copyright (c) 2016 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NTB_H_ #define _NTB_H_ #include "ntb_if.h" extern devclass_t ntb_hw_devclass; SYSCTL_DECL(_hw_ntb); int ntb_register_device(device_t ntb); int ntb_unregister_device(device_t ntb); +int ntb_child_location_str(device_t dev, device_t child, char *buf, + size_t buflen); +int ntb_print_child(device_t dev, device_t child); /* * ntb_link_event() - notify driver context of a change in link status * @ntb: NTB device context * * Notify the driver context that the link status may have changed. The driver * should call intb_link_is_up() to get the current status. */ void ntb_link_event(device_t ntb); /* * ntb_db_event() - notify driver context of a doorbell event * @ntb: NTB device context * @vector: Interrupt vector number * * Notify the driver context of a doorbell event. If hardware supports * multiple interrupt vectors for doorbells, the vector number indicates which * vector received the interrupt. The vector number is relative to the first * vector used for doorbells, starting at zero, and must be less than * ntb_db_vector_count(). The driver may call ntb_db_read() to check which * doorbell bits need service, and ntb_db_vector_mask() to determine which of * those bits are associated with the vector number. */ void ntb_db_event(device_t ntb, uint32_t vec); /* * ntb_link_is_up() - get the current ntb link state * @ntb: NTB device context * @speed: OUT - The link speed expressed as PCIe generation number * @width: OUT - The link width expressed as the number of PCIe lanes * * RETURNS: true or false based on the hardware link state */ bool ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width); /* * ntb_link_enable() - enable the link on the secondary side of the ntb * @ntb: NTB device context * @max_speed: The maximum link speed expressed as PCIe generation number[0] * @max_width: The maximum link width expressed as the number of PCIe lanes[0] * * Enable the link on the secondary side of the ntb. This can only be done * from the primary side of the ntb in primary or b2b topology. The ntb device * should train the link to its maximum speed and width, or the requested speed * and width, whichever is smaller, if supported. * * Return: Zero on success, otherwise an error number. * * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed * and width input will be ignored. */ int ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width); /* * ntb_link_disable() - disable the link on the secondary side of the ntb * @ntb: NTB device context * * Disable the link on the secondary side of the ntb. This can only be done * from the primary side of the ntb in primary or b2b topology. The ntb device * should disable the link. Returning from this call must indicate that a * barrier has passed, though with no more writes may pass in either direction * across the link, except if this call returns an error number. * * Return: Zero on success, otherwise an error number. */ int ntb_link_disable(device_t ntb); /* * get enable status of the link on the secondary side of the ntb */ bool ntb_link_enabled(device_t ntb); /* * ntb_set_ctx() - associate a driver context with an ntb device * @ntb: NTB device context * @ctx: Driver context * @ctx_ops: Driver context operations * * Associate a driver context and operations with a ntb device. The context is * provided by the client driver, and the driver may associate a different * context with each ntb device. * * Return: Zero if the context is associated, otherwise an error number. */ int ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops); /* * ntb_set_ctx() - get a driver context associated with an ntb device * @ntb: NTB device context * @ctx_ops: Driver context operations * * Get a driver context and operations associated with a ntb device. */ void * ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops); /* * ntb_clear_ctx() - disassociate any driver context from an ntb device * @ntb: NTB device context * * Clear any association that may exist between a driver context and the ntb * device. */ void ntb_clear_ctx(device_t ntb); /* * ntb_mw_count() - Get the number of memory windows available for KPI * consumers. * * (Excludes any MW wholly reserved for register access.) */ uint8_t ntb_mw_count(device_t ntb); /* * ntb_mw_get_range() - get the range of a memory window * @ntb: NTB device context * @idx: Memory window number * @base: OUT - the base address for mapping the memory window * @size: OUT - the size for mapping the memory window * @align: OUT - the base alignment for translating the memory window * @align_size: OUT - the size alignment for translating the memory window * * Get the range of a memory window. NULL may be given for any output * parameter if the value is not needed. The base and size may be used for * mapping the memory window, to access the peer memory. The alignment and * size may be used for translating the memory window, for the peer to access * memory on the local system. * * Return: Zero on success, otherwise an error number. */ int ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit); /* * ntb_mw_set_trans() - set the translation of a memory window * @ntb: NTB device context * @idx: Memory window number * @addr: The dma address local memory to expose to the peer * @size: The size of the local memory to expose to the peer * * Set the translation of a memory window. The peer may access local memory * through the window starting at the address, up to the size. The address * must be aligned to the alignment specified by ntb_mw_get_range(). The size * must be aligned to the size alignment specified by ntb_mw_get_range(). The * address must be below the plimit specified by ntb_mw_get_range() (i.e. for * 32-bit BARs). * * Return: Zero on success, otherwise an error number. */ int ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size); /* * ntb_mw_clear_trans() - clear the translation of a memory window * @ntb: NTB device context * @idx: Memory window number * * Clear the translation of a memory window. The peer may no longer access * local memory through the window. * * Return: Zero on success, otherwise an error number. */ int ntb_mw_clear_trans(device_t ntb, unsigned mw_idx); /* * ntb_mw_get_wc - Get the write-combine status of a memory window * * Returns: Zero on success, setting *wc; otherwise an error number (e.g. if * idx is an invalid memory window). * * Mode is a VM_MEMATTR_* type. */ int ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode); /* * ntb_mw_set_wc - Set the write-combine status of a memory window * * If 'mode' matches the current status, this does nothing and succeeds. Mode * is a VM_MEMATTR_* type. * * Returns: Zero on success, setting the caching attribute on the virtual * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid * memory window, or if changing the caching attribute fails). */ int ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode); /* * ntb_spad_count() - get the total scratch regs usable * @ntb: pointer to ntb_softc instance * * This function returns the max 32bit scratchpad registers usable by the * upper layer. * * RETURNS: total number of scratch pad registers available */ uint8_t ntb_spad_count(device_t ntb); /* * ntb_get_max_spads() - zero local scratch registers * @ntb: pointer to ntb_softc instance * * This functions overwrites all local scratchpad registers with zeroes. */ void ntb_spad_clear(device_t ntb); /* * ntb_spad_write() - write to the secondary scratchpad register * @ntb: pointer to ntb_softc instance * @idx: index to the scratchpad register, 0 based * @val: the data value to put into the register * * This function allows writing of a 32bit value to the indexed scratchpad * register. The register resides on the secondary (external) side. * * RETURNS: An appropriate ERRNO error value on error, or zero for success. */ int ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val); /* * ntb_spad_read() - read from the primary scratchpad register * @ntb: pointer to ntb_softc instance * @idx: index to scratchpad register, 0 based * @val: pointer to 32bit integer for storing the register value * * This function allows reading of the 32bit scratchpad register on * the primary (internal) side. * * RETURNS: An appropriate ERRNO error value on error, or zero for success. */ int ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val); /* * ntb_peer_spad_write() - write to the secondary scratchpad register * @ntb: pointer to ntb_softc instance * @idx: index to the scratchpad register, 0 based * @val: the data value to put into the register * * This function allows writing of a 32bit value to the indexed scratchpad * register. The register resides on the secondary (external) side. * * RETURNS: An appropriate ERRNO error value on error, or zero for success. */ int ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val); /* * ntb_peer_spad_read() - read from the primary scratchpad register * @ntb: pointer to ntb_softc instance * @idx: index to scratchpad register, 0 based * @val: pointer to 32bit integer for storing the register value * * This function allows reading of the 32bit scratchpad register on * the primary (internal) side. * * RETURNS: An appropriate ERRNO error value on error, or zero for success. */ int ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val); /* * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb * @ntb: NTB device context * * Hardware may support different number or arrangement of doorbell bits. * * Return: A mask of doorbell bits supported by the ntb. */ uint64_t ntb_db_valid_mask(device_t ntb); /* * ntb_db_vector_count() - get the number of doorbell interrupt vectors * @ntb: NTB device context. * * Hardware may support different number of interrupt vectors. * * Return: The number of doorbell interrupt vectors. */ int ntb_db_vector_count(device_t ntb); /* * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector * @ntb: NTB device context * @vector: Doorbell vector number * * Each interrupt vector may have a different number or arrangement of bits. * * Return: A mask of doorbell bits serviced by a vector. */ uint64_t ntb_db_vector_mask(device_t ntb, uint32_t vector); /* * ntb_peer_db_addr() - address and size of the peer doorbell register * @ntb: NTB device context. * @db_addr: OUT - The address of the peer doorbell register. * @db_size: OUT - The number of bytes to write the peer doorbell register. * * Return the address of the peer doorbell register. This may be used, for * example, by drivers that offload memory copy operations to a dma engine. * The drivers may wish to ring the peer doorbell at the completion of memory * copy operations. For efficiency, and to simplify ordering of operations * between the dma memory copies and the ringing doorbell, the driver may * append one additional dma memory copy with the doorbell register as the * destination, after the memory copy operations. * * Return: Zero on success, otherwise an error number. * * Note that writing the peer doorbell via a memory window will *not* generate * an interrupt on the remote host; that must be done separately. */ int ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size); /* * ntb_db_clear() - clear bits in the local doorbell register * @ntb: NTB device context. * @db_bits: Doorbell bits to clear. * * Clear bits in the local doorbell register, arming the bits for the next * doorbell. * * Return: Zero on success, otherwise an error number. */ void ntb_db_clear(device_t ntb, uint64_t bits); /* * ntb_db_clear_mask() - clear bits in the local doorbell mask * @ntb: NTB device context. * @db_bits: Doorbell bits to clear. * * Clear bits in the local doorbell mask register, allowing doorbell interrupts * from being generated for those doorbell bits. If a doorbell bit is already * set at the time the mask is cleared, and the corresponding mask bit is * changed from set to clear, then the ntb driver must ensure that * ntb_db_event() is called. If the hardware does not generate the interrupt * on clearing the mask bit, then the driver must call ntb_db_event() anyway. * * Return: Zero on success, otherwise an error number. */ void ntb_db_clear_mask(device_t ntb, uint64_t bits); /* * ntb_db_read() - read the local doorbell register * @ntb: NTB device context. * * Read the local doorbell register, and return the bits that are set. * * Return: The bits currently set in the local doorbell register. */ uint64_t ntb_db_read(device_t ntb); /* * ntb_db_set_mask() - set bits in the local doorbell mask * @ntb: NTB device context. * @db_bits: Doorbell mask bits to set. * * Set bits in the local doorbell mask register, preventing doorbell interrupts * from being generated for those doorbell bits. Bits that were already set * must remain set. * * Return: Zero on success, otherwise an error number. */ void ntb_db_set_mask(device_t ntb, uint64_t bits); /* * ntb_peer_db_set() - Set the doorbell on the secondary/external side * @ntb: pointer to ntb_softc instance * @bit: doorbell bits to ring * * This function allows triggering of a doorbell on the secondary/external * side that will initiate an interrupt on the remote host */ void ntb_peer_db_set(device_t ntb, uint64_t bits); #endif /* _NTB_H_ */ Index: stable/11/sys/dev/ntb/ntb_hw/ntb_hw_intel.c =================================================================== --- stable/11/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision 323454) +++ stable/11/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision 323455) @@ -1,3121 +1,3124 @@ /*- - * Copyright (c) 2016 Alexander Motin + * Copyright (c) 2016-2017 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The Non-Transparent Bridge (NTB) is a device that allows you to connect * two or more systems using a PCI-e links, providing remote memory access. * * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs. * * NOTE: Much of the code in this module is shared with Linux. Any patches may * be picked up and redistributed in Linux with a dual GPL/BSD license. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ntb_hw_intel.h" #include "../ntb.h" #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT) #define NTB_HB_TIMEOUT 1 /* second */ #define ATOM_LINK_RECOVERY_TIME 500 /* ms */ #define BAR_HIGH_MASK (~((1ull << 12) - 1)) #define NTB_MSIX_VER_GUARD 0xaabbccdd #define NTB_MSIX_RECEIVED 0xe0f0e0f0 /* * PCI constants could be somewhere more generic, but aren't defined/used in * pci.c. */ #define PCI_MSIX_ENTRY_SIZE 16 #define PCI_MSIX_ENTRY_LOWER_ADDR 0 #define PCI_MSIX_ENTRY_UPPER_ADDR 4 #define PCI_MSIX_ENTRY_DATA 8 enum ntb_device_type { NTB_XEON, NTB_ATOM }; /* ntb_conn_type are hardware numbers, cannot change. */ enum ntb_conn_type { NTB_CONN_TRANSPARENT = 0, NTB_CONN_B2B = 1, NTB_CONN_RP = 2, }; enum ntb_b2b_direction { NTB_DEV_USD = 0, NTB_DEV_DSD = 1, }; enum ntb_bar { NTB_CONFIG_BAR = 0, NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3, NTB_MAX_BARS }; enum { NTB_MSIX_GUARD = 0, NTB_MSIX_DATA0, NTB_MSIX_DATA1, NTB_MSIX_DATA2, NTB_MSIX_OFS0, NTB_MSIX_OFS1, NTB_MSIX_OFS2, NTB_MSIX_DONE, NTB_MAX_MSIX_SPAD }; /* Device features and workarounds */ #define HAS_FEATURE(ntb, feature) \ (((ntb)->features & (feature)) != 0) struct ntb_hw_info { uint32_t device_id; const char *desc; enum ntb_device_type type; uint32_t features; }; struct ntb_pci_bar_info { bus_space_tag_t pci_bus_tag; bus_space_handle_t pci_bus_handle; int pci_resource_id; struct resource *pci_resource; vm_paddr_t pbase; caddr_t vbase; vm_size_t size; vm_memattr_t map_mode; /* Configuration register offsets */ uint32_t psz_off; uint32_t ssz_off; uint32_t pbarxlat_off; }; struct ntb_int_info { struct resource *res; int rid; void *tag; }; struct ntb_vec { struct ntb_softc *ntb; uint32_t num; unsigned masked; }; struct ntb_reg { uint32_t ntb_ctl; uint32_t lnk_sta; uint8_t db_size; unsigned mw_bar[NTB_MAX_BARS]; }; struct ntb_alt_reg { uint32_t db_bell; uint32_t db_mask; uint32_t spad; }; struct ntb_xlat_reg { uint32_t bar0_base; uint32_t bar2_base; uint32_t bar4_base; uint32_t bar5_base; uint32_t bar2_xlat; uint32_t bar4_xlat; uint32_t bar5_xlat; uint32_t bar2_limit; uint32_t bar4_limit; uint32_t bar5_limit; }; struct ntb_b2b_addr { uint64_t bar0_addr; uint64_t bar2_addr64; uint64_t bar4_addr64; uint64_t bar4_addr32; uint64_t bar5_addr32; }; struct ntb_msix_data { uint32_t nmd_ofs; uint32_t nmd_data; }; struct ntb_softc { /* ntb.c context. Do not move! Must go first! */ void *ntb_store; device_t device; enum ntb_device_type type; uint32_t features; struct ntb_pci_bar_info bar_info[NTB_MAX_BARS]; struct ntb_int_info int_info[MAX_MSIX_INTERRUPTS]; uint32_t allocated_interrupts; struct ntb_msix_data peer_msix_data[XEON_NONLINK_DB_MSIX_BITS]; struct ntb_msix_data msix_data[XEON_NONLINK_DB_MSIX_BITS]; bool peer_msix_good; bool peer_msix_done; struct ntb_pci_bar_info *peer_lapic_bar; struct callout peer_msix_work; struct callout heartbeat_timer; struct callout lr_timer; struct ntb_vec *msix_vec; uint32_t ppd; enum ntb_conn_type conn_type; enum ntb_b2b_direction dev_type; /* Offset of peer bar0 in B2B BAR */ uint64_t b2b_off; /* Memory window used to access peer bar0 */ #define B2B_MW_DISABLED UINT8_MAX uint8_t b2b_mw_idx; uint32_t msix_xlat; uint8_t msix_mw_idx; uint8_t mw_count; uint8_t spad_count; uint8_t db_count; uint8_t db_vec_count; uint8_t db_vec_shift; /* Protects local db_mask. */ #define DB_MASK_LOCK(sc) mtx_lock_spin(&(sc)->db_mask_lock) #define DB_MASK_UNLOCK(sc) mtx_unlock_spin(&(sc)->db_mask_lock) #define DB_MASK_ASSERT(sc,f) mtx_assert(&(sc)->db_mask_lock, (f)) struct mtx db_mask_lock; volatile uint32_t ntb_ctl; volatile uint32_t lnk_sta; uint64_t db_valid_mask; uint64_t db_link_mask; uint64_t db_mask; uint64_t fake_db; /* NTB_SB01BASE_LOCKUP*/ uint64_t force_db; /* NTB_SB01BASE_LOCKUP*/ int last_ts; /* ticks @ last irq */ const struct ntb_reg *reg; const struct ntb_alt_reg *self_reg; const struct ntb_alt_reg *peer_reg; const struct ntb_xlat_reg *xlat_reg; }; #ifdef __i386__ static __inline uint64_t bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { return (bus_space_read_4(tag, handle, offset) | ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32); } static __inline void bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset, uint64_t val) { bus_space_write_4(tag, handle, offset, val); bus_space_write_4(tag, handle, offset + 4, val >> 32); } #endif #define intel_ntb_bar_read(SIZE, bar, offset) \ bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \ ntb->bar_info[(bar)].pci_bus_handle, (offset)) #define intel_ntb_bar_write(SIZE, bar, offset, val) \ bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \ ntb->bar_info[(bar)].pci_bus_handle, (offset), (val)) #define intel_ntb_reg_read(SIZE, offset) \ intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset) #define intel_ntb_reg_write(SIZE, offset, val) \ intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val) #define intel_ntb_mw_read(SIZE, offset) \ intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \ offset) #define intel_ntb_mw_write(SIZE, offset, val) \ intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \ offset, val) static int intel_ntb_probe(device_t device); static int intel_ntb_attach(device_t device); static int intel_ntb_detach(device_t device); static uint64_t intel_ntb_db_valid_mask(device_t dev); static void intel_ntb_spad_clear(device_t dev); static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector); static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width); static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed, enum ntb_width width); static int intel_ntb_link_disable(device_t dev); static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val); static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val); static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx); static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw); static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar); static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar, uint32_t *base, uint32_t *xlat, uint32_t *lmt); static int intel_ntb_map_pci_bars(struct ntb_softc *ntb); static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx, vm_memattr_t); static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *, const char *); static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar); static int map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar); static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb); static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail); static int intel_ntb_init_isr(struct ntb_softc *ntb); static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb); static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors); static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb); static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector); static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec); static void ndev_vec_isr(void *arg); static void ndev_irq_isr(void *arg); static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff); static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t); static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t); static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors); static void intel_ntb_free_msix_vec(struct ntb_softc *ntb); static void intel_ntb_get_msix_info(struct ntb_softc *ntb); static void intel_ntb_exchange_msix(void *); static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id); static void intel_ntb_detect_max_mw(struct ntb_softc *ntb); static int intel_ntb_detect_xeon(struct ntb_softc *ntb); static int intel_ntb_detect_atom(struct ntb_softc *ntb); static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb); static int intel_ntb_atom_init_dev(struct ntb_softc *ntb); static void intel_ntb_teardown_xeon(struct ntb_softc *ntb); static void configure_atom_secondary_side_bars(struct ntb_softc *ntb); static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx, enum ntb_bar regbar); static void xeon_set_sbar_base_and_limit(struct ntb_softc *, uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar); static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr, enum ntb_bar idx); static int xeon_setup_b2b_mw(struct ntb_softc *, const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr); static inline bool link_is_up(struct ntb_softc *ntb); static inline bool _xeon_link_is_up(struct ntb_softc *ntb); static inline bool atom_link_is_err(struct ntb_softc *ntb); static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *); static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *); static void atom_link_hb(void *arg); static void recover_atom_link(void *arg); static bool intel_ntb_poll_link(struct ntb_softc *ntb); static void save_bar_parameters(struct ntb_pci_bar_info *bar); static void intel_ntb_sysctl_init(struct ntb_softc *); static int sysctl_handle_features(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS); static int sysctl_handle_register(SYSCTL_HANDLER_ARGS); static unsigned g_ntb_hw_debug_level; SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose"); #define intel_ntb_printf(lvl, ...) do { \ if ((lvl) <= g_ntb_hw_debug_level) { \ device_printf(ntb->device, __VA_ARGS__); \ } \ } while (0) #define _NTB_PAT_UC 0 #define _NTB_PAT_WC 1 #define _NTB_PAT_WT 4 #define _NTB_PAT_WP 5 #define _NTB_PAT_WB 6 #define _NTB_PAT_UCM 7 static unsigned g_ntb_mw_pat = _NTB_PAT_UC; SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN, &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): " "UC: " __XSTRING(_NTB_PAT_UC) ", " "WC: " __XSTRING(_NTB_PAT_WC) ", " "WT: " __XSTRING(_NTB_PAT_WT) ", " "WP: " __XSTRING(_NTB_PAT_WP) ", " "WB: " __XSTRING(_NTB_PAT_WB) ", " "UC-: " __XSTRING(_NTB_PAT_UCM)); static inline vm_memattr_t intel_ntb_pat_flags(void) { switch (g_ntb_mw_pat) { case _NTB_PAT_WC: return (VM_MEMATTR_WRITE_COMBINING); case _NTB_PAT_WT: return (VM_MEMATTR_WRITE_THROUGH); case _NTB_PAT_WP: return (VM_MEMATTR_WRITE_PROTECTED); case _NTB_PAT_WB: return (VM_MEMATTR_WRITE_BACK); case _NTB_PAT_UCM: return (VM_MEMATTR_WEAK_UNCACHEABLE); case _NTB_PAT_UC: /* FALLTHROUGH */ default: return (VM_MEMATTR_UNCACHEABLE); } } /* * Well, this obviously doesn't belong here, but it doesn't seem to exist * anywhere better yet. */ static inline const char * intel_ntb_vm_memattr_to_str(vm_memattr_t pat) { switch (pat) { case VM_MEMATTR_WRITE_COMBINING: return ("WRITE_COMBINING"); case VM_MEMATTR_WRITE_THROUGH: return ("WRITE_THROUGH"); case VM_MEMATTR_WRITE_PROTECTED: return ("WRITE_PROTECTED"); case VM_MEMATTR_WRITE_BACK: return ("WRITE_BACK"); case VM_MEMATTR_WEAK_UNCACHEABLE: return ("UNCACHED"); case VM_MEMATTR_UNCACHEABLE: return ("UNCACHEABLE"); default: return ("UNKNOWN"); } } static int g_ntb_msix_idx = 1; SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx, 0, "Use this memory window to access the peer MSIX message complex on " "certain Xeon-based NTB systems, as a workaround for a hardware errata. " "Like b2b_mw_idx, negative values index from the last available memory " "window. (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)"); static int g_ntb_mw_idx = -1; SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx, 0, "Use this memory window to access the peer NTB registers. A " "non-negative value starts from the first MW index; a negative value " "starts from the last MW index. The default is -1, i.e., the last " "available memory window. Both sides of the NTB MUST set the same " "value here! (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)"); /* Hardware owns the low 16 bits of features. */ #define NTB_BAR_SIZE_4K (1 << 0) #define NTB_SDOORBELL_LOCKUP (1 << 1) #define NTB_SB01BASE_LOCKUP (1 << 2) #define NTB_B2BDOORBELL_BIT14 (1 << 3) /* Software/configuration owns the top 16 bits. */ #define NTB_SPLIT_BAR (1ull << 16) #define NTB_FEATURES_STR \ "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \ "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K" static struct ntb_hw_info pci_ids[] = { /* XXX: PS/SS IDs left out until they are supported. */ { 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B", NTB_ATOM, 0 }, { 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 }, { 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 }, { 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K }, { 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP }, { 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP }, { 0x00000000, NULL, NTB_ATOM, 0 } }; static const struct ntb_reg atom_reg = { .ntb_ctl = ATOM_NTBCNTL_OFFSET, .lnk_sta = ATOM_LINK_STATUS_OFFSET, .db_size = sizeof(uint64_t), .mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 }, }; static const struct ntb_alt_reg atom_pri_reg = { .db_bell = ATOM_PDOORBELL_OFFSET, .db_mask = ATOM_PDBMSK_OFFSET, .spad = ATOM_SPAD_OFFSET, }; static const struct ntb_alt_reg atom_b2b_reg = { .db_bell = ATOM_B2B_DOORBELL_OFFSET, .spad = ATOM_B2B_SPAD_OFFSET, }; static const struct ntb_xlat_reg atom_sec_xlat = { #if 0 /* "FIXME" says the Linux driver. */ .bar0_base = ATOM_SBAR0BASE_OFFSET, .bar2_base = ATOM_SBAR2BASE_OFFSET, .bar4_base = ATOM_SBAR4BASE_OFFSET, .bar2_limit = ATOM_SBAR2LMT_OFFSET, .bar4_limit = ATOM_SBAR4LMT_OFFSET, #endif .bar2_xlat = ATOM_SBAR2XLAT_OFFSET, .bar4_xlat = ATOM_SBAR4XLAT_OFFSET, }; static const struct ntb_reg xeon_reg = { .ntb_ctl = XEON_NTBCNTL_OFFSET, .lnk_sta = XEON_LINK_STATUS_OFFSET, .db_size = sizeof(uint16_t), .mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 }, }; static const struct ntb_alt_reg xeon_pri_reg = { .db_bell = XEON_PDOORBELL_OFFSET, .db_mask = XEON_PDBMSK_OFFSET, .spad = XEON_SPAD_OFFSET, }; static const struct ntb_alt_reg xeon_b2b_reg = { .db_bell = XEON_B2B_DOORBELL_OFFSET, .spad = XEON_B2B_SPAD_OFFSET, }; static const struct ntb_xlat_reg xeon_sec_xlat = { .bar0_base = XEON_SBAR0BASE_OFFSET, .bar2_base = XEON_SBAR2BASE_OFFSET, .bar4_base = XEON_SBAR4BASE_OFFSET, .bar5_base = XEON_SBAR5BASE_OFFSET, .bar2_limit = XEON_SBAR2LMT_OFFSET, .bar4_limit = XEON_SBAR4LMT_OFFSET, .bar5_limit = XEON_SBAR5LMT_OFFSET, .bar2_xlat = XEON_SBAR2XLAT_OFFSET, .bar4_xlat = XEON_SBAR4XLAT_OFFSET, .bar5_xlat = XEON_SBAR5XLAT_OFFSET, }; static struct ntb_b2b_addr xeon_b2b_usd_addr = { .bar0_addr = XEON_B2B_BAR0_ADDR, .bar2_addr64 = XEON_B2B_BAR2_ADDR64, .bar4_addr64 = XEON_B2B_BAR4_ADDR64, .bar4_addr32 = XEON_B2B_BAR4_ADDR32, .bar5_addr32 = XEON_B2B_BAR5_ADDR32, }; static struct ntb_b2b_addr xeon_b2b_dsd_addr = { .bar0_addr = XEON_B2B_BAR0_ADDR, .bar2_addr64 = XEON_B2B_BAR2_ADDR64, .bar4_addr64 = XEON_B2B_BAR4_ADDR64, .bar4_addr32 = XEON_B2B_BAR4_ADDR32, .bar5_addr32 = XEON_B2B_BAR5_ADDR32, }; SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0, "B2B MW segment overrides -- MUST be the same on both sides"); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon " "hardware, use this 64-bit address on the bus between the NTB devices for " "the window at BAR2, on the upstream side of the link. MUST be the same " "address on both sides."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon " "hardware, use this 64-bit address on the bus between the NTB devices for " "the window at BAR2, on the downstream side of the link. MUST be the same" " address on both sides."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 " "(split-BAR mode)."); /* * OS <-> Driver interface structures */ MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations"); /* * OS <-> Driver linkage functions */ static int intel_ntb_probe(device_t device) { struct ntb_hw_info *p; p = intel_ntb_get_device_info(pci_get_devid(device)); if (p == NULL) return (ENXIO); device_set_desc(device, p->desc); return (0); } static int intel_ntb_attach(device_t device) { struct ntb_softc *ntb; struct ntb_hw_info *p; int error; ntb = device_get_softc(device); p = intel_ntb_get_device_info(pci_get_devid(device)); ntb->device = device; ntb->type = p->type; ntb->features = p->features; ntb->b2b_mw_idx = B2B_MW_DISABLED; ntb->msix_mw_idx = B2B_MW_DISABLED; /* Heartbeat timer for NTB_ATOM since there is no link interrupt */ callout_init(&ntb->heartbeat_timer, 1); callout_init(&ntb->lr_timer, 1); callout_init(&ntb->peer_msix_work, 1); mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN); if (ntb->type == NTB_ATOM) error = intel_ntb_detect_atom(ntb); else error = intel_ntb_detect_xeon(ntb); if (error != 0) goto out; intel_ntb_detect_max_mw(ntb); pci_enable_busmaster(ntb->device); error = intel_ntb_map_pci_bars(ntb); if (error != 0) goto out; if (ntb->type == NTB_ATOM) error = intel_ntb_atom_init_dev(ntb); else error = intel_ntb_xeon_init_dev(ntb); if (error != 0) goto out; intel_ntb_spad_clear(device); intel_ntb_poll_link(ntb); intel_ntb_sysctl_init(ntb); /* Attach children to this controller */ error = ntb_register_device(device); out: if (error != 0) intel_ntb_detach(device); return (error); } static int intel_ntb_detach(device_t device) { struct ntb_softc *ntb; ntb = device_get_softc(device); /* Detach & delete all children */ ntb_unregister_device(device); if (ntb->self_reg != NULL) { DB_MASK_LOCK(ntb); db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask); DB_MASK_UNLOCK(ntb); } callout_drain(&ntb->heartbeat_timer); callout_drain(&ntb->lr_timer); callout_drain(&ntb->peer_msix_work); pci_disable_busmaster(ntb->device); if (ntb->type == NTB_XEON) intel_ntb_teardown_xeon(ntb); intel_ntb_teardown_interrupts(ntb); mtx_destroy(&ntb->db_mask_lock); intel_ntb_unmap_pci_bar(ntb); return (0); } /* * Driver internal routines */ static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw) { KASSERT(mw < ntb->mw_count, ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count)); KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw")); return (ntb->reg->mw_bar[mw]); } static inline bool bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar) { /* XXX This assertion could be stronger. */ KASSERT(bar < NTB_MAX_BARS, ("bogus bar")); return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR)); } static inline void bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base, uint32_t *xlat, uint32_t *lmt) { uint32_t basev, lmtv, xlatv; switch (bar) { case NTB_B2B_BAR_1: basev = ntb->xlat_reg->bar2_base; lmtv = ntb->xlat_reg->bar2_limit; xlatv = ntb->xlat_reg->bar2_xlat; break; case NTB_B2B_BAR_2: basev = ntb->xlat_reg->bar4_base; lmtv = ntb->xlat_reg->bar4_limit; xlatv = ntb->xlat_reg->bar4_xlat; break; case NTB_B2B_BAR_3: basev = ntb->xlat_reg->bar5_base; lmtv = ntb->xlat_reg->bar5_limit; xlatv = ntb->xlat_reg->bar5_xlat; break; default: KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS, ("bad bar")); basev = lmtv = xlatv = 0; break; } if (base != NULL) *base = basev; if (xlat != NULL) *xlat = xlatv; if (lmt != NULL) *lmt = lmtv; } static int intel_ntb_map_pci_bars(struct ntb_softc *ntb) { int rc; ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0); rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET; if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR)) goto out; ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]); ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET; out: if (rc != 0) device_printf(ntb->device, "unable to allocate pci resource\n"); return (rc); } static void print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar, const char *kind) { device_printf(ntb->device, "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), (uintmax_t)bar->size, kind); } static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar) { bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY, &bar->pci_resource_id, RF_ACTIVE); if (bar->pci_resource == NULL) return (ENXIO); save_bar_parameters(bar); bar->map_mode = VM_MEMATTR_UNCACHEABLE; print_map_success(ntb, bar, "mmr"); return (0); } static int map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar) { int rc; vm_memattr_t mapmode; uint8_t bar_size_bits = 0; bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY, &bar->pci_resource_id, RF_ACTIVE); if (bar->pci_resource == NULL) return (ENXIO); save_bar_parameters(bar); /* * Ivytown NTB BAR sizes are misreported by the hardware due to a * hardware issue. To work around this, query the size it should be * configured to by the device and modify the resource to correspond to * this new size. The BIOS on systems with this problem is required to * provide enough address space to allow the driver to make this change * safely. * * Ideally I could have just specified the size when I allocated the * resource like: * bus_alloc_resource(ntb->device, * SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul, * 1ul << bar_size_bits, RF_ACTIVE); * but the PCI driver does not honor the size in this call, so we have * to modify it after the fact. */ if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) { if (bar->pci_resource_id == PCIR_BAR(2)) bar_size_bits = pci_read_config(ntb->device, XEON_PBAR23SZ_OFFSET, 1); else bar_size_bits = pci_read_config(ntb->device, XEON_PBAR45SZ_OFFSET, 1); rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY, bar->pci_resource, bar->pbase, bar->pbase + (1ul << bar_size_bits) - 1); if (rc != 0) { device_printf(ntb->device, "unable to resize bar\n"); return (rc); } save_bar_parameters(bar); } bar->map_mode = VM_MEMATTR_UNCACHEABLE; print_map_success(ntb, bar, "mw"); /* * Optionally, mark MW BARs as anything other than UC to improve * performance. */ mapmode = intel_ntb_pat_flags(); if (mapmode == bar->map_mode) return (0); rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode); if (rc == 0) { bar->map_mode = mapmode; device_printf(ntb->device, "Marked BAR%d v:[%p-%p] p:[%p-%p] as " "%s.\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), intel_ntb_vm_memattr_to_str(mapmode)); } else device_printf(ntb->device, "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as " "%s: %d\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), intel_ntb_vm_memattr_to_str(mapmode), rc); /* Proceed anyway */ return (0); } static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb) { struct ntb_pci_bar_info *current_bar; int i; for (i = 0; i < NTB_MAX_BARS; i++) { current_bar = &ntb->bar_info[i]; if (current_bar->pci_resource != NULL) bus_release_resource(ntb->device, SYS_RES_MEMORY, current_bar->pci_resource_id, current_bar->pci_resource); } } static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors) { uint32_t i; int rc; for (i = 0; i < num_vectors; i++) { ntb->int_info[i].rid = i + 1; ntb->int_info[i].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE); if (ntb->int_info[i].res == NULL) { device_printf(ntb->device, "bus_alloc_resource failed\n"); return (ENOMEM); } ntb->int_info[i].tag = NULL; ntb->allocated_interrupts++; rc = bus_setup_intr(ntb->device, ntb->int_info[i].res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr, &ntb->msix_vec[i], &ntb->int_info[i].tag); if (rc != 0) { device_printf(ntb->device, "bus_setup_intr failed\n"); return (ENXIO); } } return (0); } /* * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector * cannot be allocated for each MSI-X message. JHB seems to think remapping * should be okay. This tunable should enable us to test that hypothesis * when someone gets their hands on some Xeon hardware. */ static int ntb_force_remap_mode; SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN, &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped" " to a smaller number of ithreads, even if the desired number are " "available"); /* * In case it is NOT ok, give consumers an abort button. */ static int ntb_prefer_intx; SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN, &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather " "than remapping MSI-X messages over available slots (match Linux driver " "behavior)"); /* * Remap the desired number of MSI-X messages to available ithreads in a simple * round-robin fashion. */ static int intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail) { u_int *vectors; uint32_t i; int rc; if (ntb_prefer_intx != 0) return (ENXIO); vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK); for (i = 0; i < desired; i++) vectors[i] = (i % avail) + 1; rc = pci_remap_msix(dev, desired, vectors); free(vectors, M_NTB); return (rc); } static int intel_ntb_init_isr(struct ntb_softc *ntb) { uint32_t desired_vectors, num_vectors; int rc; ntb->allocated_interrupts = 0; ntb->last_ts = ticks; /* * Mask all doorbell interrupts. (Except link events!) */ DB_MASK_LOCK(ntb); ntb->db_mask = ntb->db_valid_mask; db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device), ntb->db_count); if (desired_vectors >= 1) { rc = pci_alloc_msix(ntb->device, &num_vectors); if (ntb_force_remap_mode != 0 && rc == 0 && num_vectors == desired_vectors) num_vectors--; if (rc == 0 && num_vectors < desired_vectors) { rc = intel_ntb_remap_msix(ntb->device, desired_vectors, num_vectors); if (rc == 0) num_vectors = desired_vectors; else pci_release_msi(ntb->device); } if (rc != 0) num_vectors = 1; } else num_vectors = 1; if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) { if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround does not support MSI or INTX\n"); return (EINVAL); } ntb->db_vec_count = 1; ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT; rc = intel_ntb_setup_legacy_interrupt(ntb); } else { if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS && HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround expects %d doorbell bits\n", XEON_NONLINK_DB_MSIX_BITS); return (EINVAL); } intel_ntb_create_msix_vec(ntb, num_vectors); rc = intel_ntb_setup_msix(ntb, num_vectors); } if (rc != 0) { device_printf(ntb->device, "Error allocating interrupts: %d\n", rc); intel_ntb_free_msix_vec(ntb); } return (rc); } static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb) { int rc; ntb->int_info[0].rid = 0; ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE); if (ntb->int_info[0].res == NULL) { device_printf(ntb->device, "bus_alloc_resource failed\n"); return (ENOMEM); } ntb->int_info[0].tag = NULL; ntb->allocated_interrupts = 1; rc = bus_setup_intr(ntb->device, ntb->int_info[0].res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr, ntb, &ntb->int_info[0].tag); if (rc != 0) { device_printf(ntb->device, "bus_setup_intr failed\n"); return (ENXIO); } return (0); } static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb) { struct ntb_int_info *current_int; int i; for (i = 0; i < ntb->allocated_interrupts; i++) { current_int = &ntb->int_info[i]; if (current_int->tag != NULL) bus_teardown_intr(ntb->device, current_int->res, current_int->tag); if (current_int->res != NULL) bus_release_resource(ntb->device, SYS_RES_IRQ, rman_get_rid(current_int->res), current_int->res); } intel_ntb_free_msix_vec(ntb); pci_release_msi(ntb->device); } /* * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon. Abstract it * out to make code clearer. */ static inline uint64_t db_ioread(struct ntb_softc *ntb, uint64_t regoff) { if (ntb->type == NTB_ATOM) return (intel_ntb_reg_read(8, regoff)); KASSERT(ntb->type == NTB_XEON, ("bad ntb type")); return (intel_ntb_reg_read(2, regoff)); } static inline void db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val) { KASSERT((val & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(val & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); if (regoff == ntb->self_reg->db_mask) DB_MASK_ASSERT(ntb, MA_OWNED); db_iowrite_raw(ntb, regoff, val); } static inline void db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val) { if (ntb->type == NTB_ATOM) { intel_ntb_reg_write(8, regoff, val); return; } KASSERT(ntb->type == NTB_XEON, ("bad ntb type")); intel_ntb_reg_write(2, regoff, (uint16_t)val); } static void intel_ntb_db_set_mask(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); DB_MASK_LOCK(ntb); ntb->db_mask |= bits; if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); } static void intel_ntb_db_clear_mask(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); uint64_t ibits; int i; KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); DB_MASK_LOCK(ntb); ibits = ntb->fake_db & ntb->db_mask & bits; ntb->db_mask &= ~bits; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { /* Simulate fake interrupts if unmasked DB bits are set. */ ntb->force_db |= ibits; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0) swi_sched(ntb->int_info[i].tag, 0); } } else { db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); } DB_MASK_UNLOCK(ntb); } static uint64_t intel_ntb_db_read(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) return (ntb->fake_db); return (db_ioread(ntb, ntb->self_reg->db_bell)); } static void intel_ntb_db_clear(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { DB_MASK_LOCK(ntb); ntb->fake_db &= ~bits; DB_MASK_UNLOCK(ntb); return; } db_iowrite(ntb, ntb->self_reg->db_bell, bits); } static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector) { uint64_t shift, mask; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { /* * Remap vectors in custom way to make at least first * three doorbells to not generate stray events. * This breaks Linux compatibility (if one existed) * when more then one DB is used (not by if_ntb). */ if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1) return (1 << db_vector); if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1) return (0x7ffc); } shift = ntb->db_vec_shift; mask = (1ull << shift) - 1; return (mask << (shift * db_vector)); } static void intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec) { uint64_t vec_mask; ntb->last_ts = ticks; vec_mask = intel_ntb_vec_mask(ntb, vec); if ((vec_mask & ntb->db_link_mask) != 0) { if (intel_ntb_poll_link(ntb)) ntb_link_event(ntb->device); } if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) && (vec_mask & ntb->db_link_mask) == 0) { DB_MASK_LOCK(ntb); /* * Do not report same DB events again if not cleared yet, * unless the mask was just cleared for them and this * interrupt handler call can be the consequence of it. */ vec_mask &= ~ntb->fake_db | ntb->force_db; ntb->force_db &= ~vec_mask; /* Update our internal doorbell register. */ ntb->fake_db |= vec_mask; /* Do not report masked DB events. */ vec_mask &= ~ntb->db_mask; DB_MASK_UNLOCK(ntb); } if ((vec_mask & ntb->db_valid_mask) != 0) ntb_db_event(ntb->device, vec); } static void ndev_vec_isr(void *arg) { struct ntb_vec *nvec = arg; intel_ntb_interrupt(nvec->ntb, nvec->num); } static void ndev_irq_isr(void *arg) { /* If we couldn't set up MSI-X, we only have the one vector. */ intel_ntb_interrupt(arg, 0); } static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors) { uint32_t i; ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB, M_ZERO | M_WAITOK); for (i = 0; i < num_vectors; i++) { ntb->msix_vec[i].num = i; ntb->msix_vec[i].ntb = ntb; } return (0); } static void intel_ntb_free_msix_vec(struct ntb_softc *ntb) { if (ntb->msix_vec == NULL) return; free(ntb->msix_vec, M_NTB); ntb->msix_vec = NULL; } static void intel_ntb_get_msix_info(struct ntb_softc *ntb) { struct pci_devinfo *dinfo; struct pcicfg_msix *msix; uint32_t laddr, data, i, offset; dinfo = device_get_ivars(ntb->device); msix = &dinfo->cfg.msix; CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data)); for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE; laddr = bus_read_4(msix->msix_table_res, offset + PCI_MSIX_ENTRY_LOWER_ADDR); intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr); KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE, ("local MSIX addr 0x%x not in MSI base 0x%x", laddr, MSI_INTEL_ADDR_BASE)); ntb->msix_data[i].nmd_ofs = laddr; data = bus_read_4(msix->msix_table_res, offset + PCI_MSIX_ENTRY_DATA); intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data); ntb->msix_data[i].nmd_data = data; } } static struct ntb_hw_info * intel_ntb_get_device_info(uint32_t device_id) { struct ntb_hw_info *ep = pci_ids; while (ep->device_id) { if (ep->device_id == device_id) return (ep); ++ep; } return (NULL); } static void intel_ntb_teardown_xeon(struct ntb_softc *ntb) { if (ntb->reg != NULL) intel_ntb_link_disable(ntb->device); } static void intel_ntb_detect_max_mw(struct ntb_softc *ntb) { if (ntb->type == NTB_ATOM) { ntb->mw_count = ATOM_MW_COUNT; return; } if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT; else ntb->mw_count = XEON_SNB_MW_COUNT; } static int intel_ntb_detect_xeon(struct ntb_softc *ntb) { uint8_t ppd, conn_type; ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1); ntb->ppd = ppd; if ((ppd & XEON_PPD_DEV_TYPE) != 0) ntb->dev_type = NTB_DEV_DSD; else ntb->dev_type = NTB_DEV_USD; if ((ppd & XEON_PPD_SPLIT_BAR) != 0) ntb->features |= NTB_SPLIT_BAR; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { device_printf(ntb->device, "Can not apply SB01BASE_LOCKUP workaround " "with split BARs disabled!\n"); device_printf(ntb->device, "Expect system hangs under heavy NTB traffic!\n"); ntb->features &= ~NTB_SB01BASE_LOCKUP; } /* * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP * errata workaround; only do one at a time. */ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) ntb->features &= ~NTB_SDOORBELL_LOCKUP; conn_type = ppd & XEON_PPD_CONN_TYPE; switch (conn_type) { case NTB_CONN_B2B: ntb->conn_type = conn_type; break; case NTB_CONN_RP: case NTB_CONN_TRANSPARENT: default: device_printf(ntb->device, "Unsupported connection type: %u\n", (unsigned)conn_type); return (ENXIO); } return (0); } static int intel_ntb_detect_atom(struct ntb_softc *ntb) { uint32_t ppd, conn_type; ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); ntb->ppd = ppd; if ((ppd & ATOM_PPD_DEV_TYPE) != 0) ntb->dev_type = NTB_DEV_DSD; else ntb->dev_type = NTB_DEV_USD; conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8; switch (conn_type) { case NTB_CONN_B2B: ntb->conn_type = conn_type; break; default: device_printf(ntb->device, "Unsupported NTB configuration\n"); return (ENXIO); } return (0); } static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb) { int rc; ntb->spad_count = XEON_SPAD_COUNT; ntb->db_count = XEON_DB_COUNT; ntb->db_link_mask = XEON_DB_LINK_BIT; ntb->db_vec_count = XEON_DB_MSIX_VECTOR_COUNT; ntb->db_vec_shift = XEON_DB_MSIX_VECTOR_SHIFT; if (ntb->conn_type != NTB_CONN_B2B) { device_printf(ntb->device, "Connection type %d not supported\n", ntb->conn_type); return (ENXIO); } ntb->reg = &xeon_reg; ntb->self_reg = &xeon_pri_reg; ntb->peer_reg = &xeon_b2b_reg; ntb->xlat_reg = &xeon_sec_xlat; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { ntb->force_db = ntb->fake_db = 0; ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) % ntb->mw_count; intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n", g_ntb_msix_idx, ntb->msix_mw_idx); rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); } else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { /* * There is a Xeon hardware errata related to writes to SDOORBELL or * B2BDOORBELL in conjunction with inbound access to NTB MMIO space, * which may hang the system. To workaround this, use a memory * window to access the interrupt and scratch pad registers on the * remote system. */ ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) % ntb->mw_count; intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n", g_ntb_mw_idx, ntb->b2b_mw_idx); rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); } else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14)) /* * HW Errata on bit 14 of b2bdoorbell register. Writes will not be * mirrored to the remote system. Shrink the number of bits by one, * since bit 14 is the last bit. * * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register * anyway. Nor for non-B2B connection types. */ ntb->db_count = XEON_DB_COUNT - 1; ntb->db_valid_mask = (1ull << ntb->db_count) - 1; if (ntb->dev_type == NTB_DEV_USD) rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr, &xeon_b2b_usd_addr); else rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr, &xeon_b2b_dsd_addr); if (rc != 0) return (rc); /* Enable Bus Master and Memory Space on the secondary side */ intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET, PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); /* * Mask all doorbell interrupts. */ DB_MASK_LOCK(ntb); ntb->db_mask = ntb->db_valid_mask; db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); rc = intel_ntb_init_isr(ntb); return (rc); } static int intel_ntb_atom_init_dev(struct ntb_softc *ntb) { int error; KASSERT(ntb->conn_type == NTB_CONN_B2B, ("Unsupported NTB configuration (%d)\n", ntb->conn_type)); ntb->spad_count = ATOM_SPAD_COUNT; ntb->db_count = ATOM_DB_COUNT; ntb->db_vec_count = ATOM_DB_MSIX_VECTOR_COUNT; ntb->db_vec_shift = ATOM_DB_MSIX_VECTOR_SHIFT; ntb->db_valid_mask = (1ull << ntb->db_count) - 1; ntb->reg = &atom_reg; ntb->self_reg = &atom_pri_reg; ntb->peer_reg = &atom_b2b_reg; ntb->xlat_reg = &atom_sec_xlat; /* * FIXME - MSI-X bug on early Atom HW, remove once internal issue is * resolved. Mask transaction layer internal parity errors. */ pci_write_config(ntb->device, 0xFC, 0x4, 4); configure_atom_secondary_side_bars(ntb); /* Enable Bus Master and Memory Space on the secondary side */ intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET, PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); error = intel_ntb_init_isr(ntb); if (error != 0) return (error); /* Initiate PCI-E link training */ intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb); return (0); } /* XXX: Linux driver doesn't seem to do any of this for Atom. */ static void configure_atom_secondary_side_bars(struct ntb_softc *ntb) { if (ntb->dev_type == NTB_DEV_USD) { intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET, XEON_B2B_BAR4_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64); } else { intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET, XEON_B2B_BAR4_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64); } } /* * When working around Xeon SDOORBELL errata by remapping remote registers in a * MW, limit the B2B MW to half a MW. By sharing a MW, half the shared MW * remains for use by a higher layer. * * Will only be used if working around SDOORBELL errata and the BIOS-configured * MW size is sufficiently large. */ static unsigned int ntb_b2b_mw_share; SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share, 0, "If enabled (non-zero), prefer to share half of the B2B peer register " "MW with higher level consumers. Both sides of the NTB MUST set the same " "value here."); static void xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx, enum ntb_bar regbar) { struct ntb_pci_bar_info *bar; uint8_t bar_sz; if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3) return; bar = &ntb->bar_info[idx]; bar_sz = pci_read_config(ntb->device, bar->psz_off, 1); if (idx == regbar) { if (ntb->b2b_off != 0) bar_sz--; else bar_sz = 0; } pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1); bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1); (void)bar_sz; } static void xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr, enum ntb_bar idx, enum ntb_bar regbar) { uint64_t reg_val; uint32_t base_reg, lmt_reg; bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg); if (idx == regbar) { if (ntb->b2b_off) bar_addr += ntb->b2b_off; else bar_addr = 0; } if (!bar_is_64bit(ntb, idx)) { intel_ntb_reg_write(4, base_reg, bar_addr); reg_val = intel_ntb_reg_read(4, base_reg); (void)reg_val; intel_ntb_reg_write(4, lmt_reg, bar_addr); reg_val = intel_ntb_reg_read(4, lmt_reg); (void)reg_val; } else { intel_ntb_reg_write(8, base_reg, bar_addr); reg_val = intel_ntb_reg_read(8, base_reg); (void)reg_val; intel_ntb_reg_write(8, lmt_reg, bar_addr); reg_val = intel_ntb_reg_read(8, lmt_reg); (void)reg_val; } } static void xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx) { struct ntb_pci_bar_info *bar; bar = &ntb->bar_info[idx]; if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) { intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr); base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off); } else { intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr); base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off); } (void)base_addr; } static int xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr) { struct ntb_pci_bar_info *b2b_bar; vm_size_t bar_size; uint64_t bar_addr; enum ntb_bar b2b_bar_num, i; if (ntb->b2b_mw_idx == B2B_MW_DISABLED) { b2b_bar = NULL; b2b_bar_num = NTB_CONFIG_BAR; ntb->b2b_off = 0; } else { b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx); KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS, ("invalid b2b mw bar")); b2b_bar = &ntb->bar_info[b2b_bar_num]; bar_size = b2b_bar->size; if (ntb_b2b_mw_share != 0 && (bar_size >> 1) >= XEON_B2B_MIN_SIZE) ntb->b2b_off = bar_size >> 1; else if (bar_size >= XEON_B2B_MIN_SIZE) { ntb->b2b_off = 0; } else { device_printf(ntb->device, "B2B bar size is too small!\n"); return (EIO); } } /* * Reset the secondary bar sizes to match the primary bar sizes. * (Except, disable or halve the size of the B2B secondary bar.) */ for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++) xeon_reset_sbar_size(ntb, i, b2b_bar_num); bar_addr = 0; if (b2b_bar_num == NTB_CONFIG_BAR) bar_addr = addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = addr->bar2_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = addr->bar4_addr32; else if (b2b_bar_num == NTB_B2B_BAR_3) bar_addr = addr->bar5_addr32; else KASSERT(false, ("invalid bar")); intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr); /* * Other SBARs are normally hit by the PBAR xlat, except for the b2b * register BAR. The B2B BAR is either disabled above or configured * half-size. It starts at PBAR xlat + offset. * * Also set up incoming BAR limits == base (zero length window). */ xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1, b2b_bar_num); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32, NTB_B2B_BAR_2, b2b_bar_num); xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32, NTB_B2B_BAR_3, b2b_bar_num); } else xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64, NTB_B2B_BAR_2, b2b_bar_num); /* Zero incoming translation addrs */ intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0); intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { uint32_t xlat_reg, lmt_reg; enum ntb_bar bar_num; /* * We point the chosen MSIX MW BAR xlat to remote LAPIC for * workaround */ bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx); bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg); if (bar_is_64bit(ntb, bar_num)) { intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE); ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg); intel_ntb_reg_write(8, lmt_reg, 0); } else { intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE); ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg); intel_ntb_reg_write(4, lmt_reg, 0); } ntb->peer_lapic_bar = &ntb->bar_info[bar_num]; } (void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET); (void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET); /* Zero outgoing translation limits (whole bar size windows) */ intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0); intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0); /* Set outgoing translation offsets */ xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2); xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3); } else xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2); /* Set the translation offset for B2B registers */ bar_addr = 0; if (b2b_bar_num == NTB_CONFIG_BAR) bar_addr = peer_addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = peer_addr->bar2_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = peer_addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = peer_addr->bar4_addr32; else if (b2b_bar_num == NTB_B2B_BAR_3) bar_addr = peer_addr->bar5_addr32; else KASSERT(false, ("invalid bar")); /* * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits * at a time. */ intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff); intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32); return (0); } static inline bool _xeon_link_is_up(struct ntb_softc *ntb) { if (ntb->conn_type == NTB_CONN_TRANSPARENT) return (true); return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0); } static inline bool link_is_up(struct ntb_softc *ntb) { if (ntb->type == NTB_XEON) return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good || !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))); KASSERT(ntb->type == NTB_ATOM, ("ntb type")); return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0); } static inline bool atom_link_is_err(struct ntb_softc *ntb) { uint32_t status; KASSERT(ntb->type == NTB_ATOM, ("ntb type")); status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET); if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0) return (true); status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET); return ((status & ATOM_IBIST_ERR_OFLOW) != 0); } /* Atom does not have link status interrupt, poll on that platform */ static void atom_link_hb(void *arg) { struct ntb_softc *ntb = arg; sbintime_t timo, poll_ts; timo = NTB_HB_TIMEOUT * hz; poll_ts = ntb->last_ts + timo; /* * Delay polling the link status if an interrupt was received, unless * the cached link status says the link is down. */ if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) { timo = poll_ts - ticks; goto out; } if (intel_ntb_poll_link(ntb)) ntb_link_event(ntb->device); if (!link_is_up(ntb) && atom_link_is_err(ntb)) { /* Link is down with error, proceed with recovery */ callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb); return; } out: callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb); } static void atom_perform_link_restart(struct ntb_softc *ntb) { uint32_t status; /* Driver resets the NTB ModPhy lanes - magic! */ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60); /* Driver waits 100ms to allow the NTB ModPhy to settle */ pause("ModPhy", hz / 10); /* Clear AER Errors, write to clear */ status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET); status &= PCIM_AER_COR_REPLAY_ROLLOVER; intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status); /* Clear unexpected electrical idle event in LTSSM, write to clear */ status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET); status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI; intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status); /* Clear DeSkew Buffer error, write to clear */ status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET); status |= ATOM_DESKEWSTS_DBERR; intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status); status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET); status &= ATOM_IBIST_ERR_OFLOW; intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status); /* Releases the NTB state machine to allow the link to retrain */ status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET); status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT; intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status); } static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused, enum ntb_width width __unused) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; intel_ntb_printf(2, "%s\n", __func__); if (ntb->type == NTB_ATOM) { pci_write_config(ntb->device, NTB_PPD_OFFSET, ntb->ppd | ATOM_PPD_INIT_LINK, 4); return (0); } if (ntb->conn_type == NTB_CONN_TRANSPARENT) { ntb_link_event(dev); return (0); } cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK); cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP; cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP; if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP; intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } static int intel_ntb_link_disable(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; intel_ntb_printf(2, "%s\n", __func__); if (ntb->conn_type == NTB_CONN_TRANSPARENT) { ntb_link_event(dev); return (0); } cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP); cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP); cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK; intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } static bool intel_ntb_link_enabled(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; if (ntb->type == NTB_ATOM) { cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); return ((cntl & ATOM_PPD_INIT_LINK) != 0); } if (ntb->conn_type == NTB_CONN_TRANSPARENT) return (true); cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); return ((cntl & NTB_CNTL_LINK_DISABLE) == 0); } static void recover_atom_link(void *arg) { struct ntb_softc *ntb = arg; unsigned speed, width, oldspeed, oldwidth; uint32_t status32; atom_perform_link_restart(ntb); /* * There is a potential race between the 2 NTB devices recovering at * the same time. If the times are the same, the link will not recover * and the driver will be stuck in this loop forever. Add a random * interval to the recovery time to prevent this race. */ status32 = arc4random() % ATOM_LINK_RECOVERY_TIME; pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000); if (atom_link_is_err(ntb)) goto retry; status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); if ((status32 & ATOM_CNTL_LINK_DOWN) != 0) goto out; status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta); width = NTB_LNK_STA_WIDTH(status32); speed = status32 & NTB_LINK_SPEED_MASK; oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta); oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK; if (oldwidth != width || oldspeed != speed) goto retry; out: callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb, ntb); return; retry: callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link, ntb); } /* * Polls the HW link status register(s); returns true if something has changed. */ static bool intel_ntb_poll_link(struct ntb_softc *ntb) { uint32_t ntb_cntl; uint16_t reg_val; if (ntb->type == NTB_ATOM) { ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); if (ntb_cntl == ntb->ntb_ctl) return (false); ntb->ntb_ctl = ntb_cntl; ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta); } else { db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask); reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2); if (reg_val == ntb->lnk_sta) return (false); ntb->lnk_sta = reg_val; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { if (_xeon_link_is_up(ntb)) { if (!ntb->peer_msix_good) { callout_reset(&ntb->peer_msix_work, 0, intel_ntb_exchange_msix, ntb); return (false); } } else { ntb->peer_msix_good = false; ntb->peer_msix_done = false; } } } return (true); } static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *ntb) { if (!link_is_up(ntb)) return (NTB_SPEED_NONE); return (ntb->lnk_sta & NTB_LINK_SPEED_MASK); } static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *ntb) { if (!link_is_up(ntb)) return (NTB_WIDTH_NONE); return (NTB_LNK_STA_WIDTH(ntb->lnk_sta)); } SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0, "Driver state, statistics, and HW registers"); #define NTB_REGSZ_MASK (3ul << 30) #define NTB_REG_64 (1ul << 30) #define NTB_REG_32 (2ul << 30) #define NTB_REG_16 (3ul << 30) #define NTB_REG_8 (0ul << 30) #define NTB_DB_READ (1ul << 29) #define NTB_PCI_REG (1ul << 28) #define NTB_REGFLAGS_MASK (NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG) static void intel_ntb_sysctl_init(struct ntb_softc *ntb) { struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree, *tmptree; ctx = device_get_sysctl_ctx(ntb->device); globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status", CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status_human, "A", "Link status (human readable)"); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active", CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status, "IU", "Link status (1=active, 0=inactive)"); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up", CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin, "IU", "Set/get interface status (1=UP, 0=DOWN)"); tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info", CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers"); tree_par = SYSCTL_CHILDREN(tree); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD, &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD, &ntb->dev_type, 0, "0 - USD; 1 - DSD"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD, &ntb->ppd, 0, "Raw PPD register (cached)"); if (ntb->b2b_mw_idx != B2B_MW_DISABLED) { SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD, &ntb->b2b_mw_idx, 0, "Index of the MW used for B2B remote register access"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off", CTLFLAG_RD, &ntb->b2b_off, "If non-zero, offset of B2B register region in shared MW"); } SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features", CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A", "Features/errata of this NTB device"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD, __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0, "NTB CTL register (cached)"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD, __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0, "LNK STA register (cached)"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD, &ntb->mw_count, 0, "MW count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD, &ntb->spad_count, 0, "Scratchpad count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD, &ntb->db_count, 0, "Doorbell count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD, &ntb->db_vec_count, 0, "Doorbell vector count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD, &ntb->db_vec_shift, 0, "Doorbell vector shift"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD, &ntb->db_valid_mask, "Doorbell valid mask"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD, &ntb->db_link_mask, "Doorbell link mask"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD, &ntb->db_mask, "Doorbell mask (cached)"); tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers", CTLFLAG_RD, NULL, "Raw HW registers (big-endian)"); regpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU", "NTB Control register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | 0x19c, sysctl_handle_register, "IU", "NTB Link Capabilities"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU", "NTB Link Control register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask, sysctl_handle_register, "QU", "Doorbell mask register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell, sysctl_handle_register, "QU", "Doorbell register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_xlat, sysctl_handle_register, "QU", "Incoming XLAT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_xlat, sysctl_handle_register, "IU", "Incoming XLAT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_xlat, sysctl_handle_register, "IU", "Incoming XLAT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_xlat, sysctl_handle_register, "QU", "Incoming XLAT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_limit, sysctl_handle_register, "QU", "Incoming LMT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_limit, sysctl_handle_register, "IU", "Incoming LMT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_limit, sysctl_handle_register, "IU", "Incoming LMT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_limit, sysctl_handle_register, "QU", "Incoming LMT45 register"); } if (ntb->type == NTB_ATOM) return; tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats", CTLFLAG_RD, NULL, "Xeon HW statistics"); statpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | XEON_USMEMMISS_OFFSET, sysctl_handle_register, "SU", "Upstream Memory Miss"); tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err", CTLFLAG_RD, NULL, "Xeon HW errors"); errpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET, sysctl_handle_register, "CU", "PPD"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET, sysctl_handle_register, "CU", "PBAR23 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET, sysctl_handle_register, "CU", "PBAR4 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET, sysctl_handle_register, "CU", "PBAR5 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET, sysctl_handle_register, "CU", "SBAR23 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET, sysctl_handle_register, "CU", "SBAR4 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET, sysctl_handle_register, "CU", "SBAR5 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET, sysctl_handle_register, "SU", "DEVSTS"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET, sysctl_handle_register, "SU", "LNKSTS"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET, sysctl_handle_register, "SU", "SLNKSTS"); SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET, sysctl_handle_register, "IU", "UNCERRSTS"); SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET, sysctl_handle_register, "IU", "CORERRSTS"); if (ntb->conn_type != NTB_CONN_B2B) return; SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off, sysctl_handle_register, "QU", "Outgoing XLAT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off, sysctl_handle_register, "IU", "Outgoing XLAT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off, sysctl_handle_register, "IU", "Outgoing XLAT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off, sysctl_handle_register, "QU", "Outgoing XLAT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | XEON_PBAR2LMT_OFFSET, sysctl_handle_register, "QU", "Outgoing LMT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | XEON_PBAR4LMT_OFFSET, sysctl_handle_register, "IU", "Outgoing LMT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | XEON_PBAR5LMT_OFFSET, sysctl_handle_register, "IU", "Outgoing LMT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | XEON_PBAR4LMT_OFFSET, sysctl_handle_register, "QU", "Outgoing LMT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar0_base, sysctl_handle_register, "QU", "Secondary BAR01 base register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_base, sysctl_handle_register, "QU", "Secondary BAR23 base register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_base, sysctl_handle_register, "IU", "Secondary BAR4 base register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_base, sysctl_handle_register, "IU", "Secondary BAR5 base register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_base, sysctl_handle_register, "QU", "Secondary BAR45 base register"); } } static int sysctl_handle_features(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; struct sbuf sb; int error; sbuf_new_for_sysctl(&sb, NULL, 256, req); sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR); error = sbuf_finish(&sb); sbuf_delete(&sb); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; unsigned old, new; int error; old = intel_ntb_link_enabled(ntb->device); error = SYSCTL_OUT(req, &old, sizeof(old)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &new, sizeof(new)); if (error != 0) return (error); intel_ntb_printf(0, "Admin set interface state to '%sabled'\n", (new != 0)? "en" : "dis"); if (new != 0) error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); else error = intel_ntb_link_disable(ntb->device); return (error); } static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; struct sbuf sb; enum ntb_speed speed; enum ntb_width width; int error; sbuf_new_for_sysctl(&sb, NULL, 32, req); if (intel_ntb_link_is_up(ntb->device, &speed, &width)) sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u", (unsigned)speed, (unsigned)width); else sbuf_printf(&sb, "down"); error = sbuf_finish(&sb); sbuf_delete(&sb); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; unsigned res; int error; res = intel_ntb_link_is_up(ntb->device, NULL, NULL); error = SYSCTL_OUT(req, &res, sizeof(res)); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_register(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb; const void *outp; uintptr_t sz; uint64_t umv; char be[sizeof(umv)]; size_t outsz; uint32_t reg; bool db, pci; int error; ntb = arg1; reg = arg2 & ~NTB_REGFLAGS_MASK; sz = arg2 & NTB_REGSZ_MASK; db = (arg2 & NTB_DB_READ) != 0; pci = (arg2 & NTB_PCI_REG) != 0; KASSERT(!(db && pci), ("bogus")); if (db) { KASSERT(sz == NTB_REG_64, ("bogus")); umv = db_ioread(ntb, reg); outsz = sizeof(uint64_t); } else { switch (sz) { case NTB_REG_64: if (pci) umv = pci_read_config(ntb->device, reg, 8); else umv = intel_ntb_reg_read(8, reg); outsz = sizeof(uint64_t); break; case NTB_REG_32: if (pci) umv = pci_read_config(ntb->device, reg, 4); else umv = intel_ntb_reg_read(4, reg); outsz = sizeof(uint32_t); break; case NTB_REG_16: if (pci) umv = pci_read_config(ntb->device, reg, 2); else umv = intel_ntb_reg_read(2, reg); outsz = sizeof(uint16_t); break; case NTB_REG_8: if (pci) umv = pci_read_config(ntb->device, reg, 1); else umv = intel_ntb_reg_read(1, reg); outsz = sizeof(uint8_t); break; default: panic("bogus"); break; } } /* Encode bigendian so that sysctl -x is legible. */ be64enc(be, umv); outp = ((char *)be) + sizeof(umv) - outsz; error = SYSCTL_OUT(req, outp, outsz); if (error || !req->newptr) return (error); return (EINVAL); } static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx) { if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 && uidx >= ntb->b2b_mw_idx) || (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx)) uidx++; if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 && uidx >= ntb->b2b_mw_idx) && (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx)) uidx++; return (uidx); } #ifndef EARLY_AP_STARTUP static int msix_ready; static void intel_ntb_msix_ready(void *arg __unused) { msix_ready = 1; } SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY, intel_ntb_msix_ready, NULL); #endif static void intel_ntb_exchange_msix(void *ctx) { struct ntb_softc *ntb; uint32_t val; unsigned i; ntb = ctx; if (ntb->peer_msix_good) goto msix_good; if (ntb->peer_msix_done) goto msix_done; #ifndef EARLY_AP_STARTUP /* Block MSIX negotiation until SMP started and IRQ reshuffled. */ if (!msix_ready) goto reschedule; #endif intel_ntb_get_msix_info(ntb); for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i, ntb->msix_data[i].nmd_data); intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i, ntb->msix_data[i].nmd_ofs - ntb->msix_xlat); } intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD); intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val); if (val != NTB_MSIX_VER_GUARD) goto reschedule; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val); intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_data = val; intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val); intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_ofs = val; } ntb->peer_msix_done = true; msix_done: intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED); intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val); if (val != NTB_MSIX_RECEIVED) goto reschedule; intel_ntb_spad_clear(ntb->device); ntb->peer_msix_good = true; /* Give peer time to see our NTB_MSIX_RECEIVED. */ goto reschedule; msix_good: intel_ntb_poll_link(ntb); ntb_link_event(ntb->device); return; reschedule: ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2); if (_xeon_link_is_up(ntb)) { callout_reset(&ntb->peer_msix_work, hz * (ntb->peer_msix_good ? 2 : 1) / 100, intel_ntb_exchange_msix, ntb); } else intel_ntb_spad_clear(ntb->device); } /* * Public API to the rest of the OS */ static uint8_t intel_ntb_spad_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->spad_count); } static uint8_t intel_ntb_mw_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint8_t res; res = ntb->mw_count; if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0) res--; if (ntb->msix_mw_idx != B2B_MW_DISABLED) res--; return (res); } static int intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val); return (0); } /* * Zeros the local scratchpad. */ static void intel_ntb_spad_clear(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); unsigned i; for (i = 0; i < ntb->spad_count; i++) intel_ntb_spad_write(dev, i, 0); } static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); *val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4); return (0); } static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val); else intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val); return (0); } static int intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) *val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4); else *val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4); return (0); } static int intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; bus_addr_t limit; size_t bar_b2b_off; enum ntb_bar bar_num; if (mw_idx >= intel_ntb_mw_count(dev)) return (EINVAL); mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx); bar_num = intel_ntb_mw_to_bar(ntb, mw_idx); bar = &ntb->bar_info[bar_num]; bar_b2b_off = 0; if (mw_idx == ntb->b2b_mw_idx) { KASSERT(ntb->b2b_off != 0, ("user shouldn't get non-shared b2b mw")); bar_b2b_off = ntb->b2b_off; } if (bar_is_64bit(ntb, bar_num)) limit = BUS_SPACE_MAXADDR; else limit = BUS_SPACE_MAXADDR_32BIT; if (base != NULL) *base = bar->pbase + bar_b2b_off; if (vbase != NULL) *vbase = bar->vbase + bar_b2b_off; if (size != NULL) *size = bar->size - bar_b2b_off; if (align != NULL) *align = bar->size; if (align_size != NULL) *align_size = 1; if (plimit != NULL) *plimit = limit; return (0); } static int intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t base, limit, reg_val; size_t bar_size, mw_size; uint32_t base_reg, xlat_reg, limit_reg; enum ntb_bar bar_num; if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); bar_num = intel_ntb_mw_to_bar(ntb, idx); bar = &ntb->bar_info[bar_num]; bar_size = bar->size; if (idx == ntb->b2b_mw_idx) mw_size = bar_size - ntb->b2b_off; else mw_size = bar_size; /* Hardware requires that addr is aligned to bar size */ if ((addr & (bar_size - 1)) != 0) return (EINVAL); if (size > mw_size) return (EINVAL); bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg); limit = 0; if (bar_is_64bit(ntb, bar_num)) { base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK; if (limit_reg != 0 && size != mw_size) limit = base + size; /* Set and verify translation address */ intel_ntb_reg_write(8, xlat_reg, addr); reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK; if (reg_val != addr) { intel_ntb_reg_write(8, xlat_reg, 0); return (EIO); } /* Set and verify the limit */ intel_ntb_reg_write(8, limit_reg, limit); reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK; if (reg_val != limit) { intel_ntb_reg_write(8, limit_reg, base); intel_ntb_reg_write(8, xlat_reg, 0); return (EIO); } } else { /* Configure 32-bit (split) BAR MW */ if ((addr & UINT32_MAX) != addr) return (ERANGE); if (((addr + size) & UINT32_MAX) != (addr + size)) return (ERANGE); base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK; if (limit_reg != 0 && size != mw_size) limit = base + size; /* Set and verify translation address */ intel_ntb_reg_write(4, xlat_reg, addr); reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK; if (reg_val != addr) { intel_ntb_reg_write(4, xlat_reg, 0); return (EIO); } /* Set and verify the limit */ intel_ntb_reg_write(4, limit_reg, limit); reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK; if (reg_val != limit) { intel_ntb_reg_write(4, limit_reg, base); intel_ntb_reg_write(4, xlat_reg, 0); return (EIO); } } return (0); } static int intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx) { return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0)); } static int intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)]; *mode = bar->map_mode; return (0); } static int intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); return (intel_ntb_mw_set_wc_internal(ntb, idx, mode)); } static int intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode) { struct ntb_pci_bar_info *bar; int rc; bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)]; if (bar->map_mode == mode) return (0); rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode); if (rc == 0) bar->map_mode = mode; return (rc); } static void intel_ntb_peer_db_set(device_t dev, uint64_t bit) { struct ntb_softc *ntb = device_get_softc(dev); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { struct ntb_pci_bar_info *lapic; unsigned i; lapic = ntb->peer_lapic_bar; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0) bus_space_write_4(lapic->pci_bus_tag, lapic->pci_bus_handle, ntb->peer_msix_data[i].nmd_ofs, ntb->peer_msix_data[i].nmd_data); } return; } if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit); return; } db_iowrite(ntb, ntb->peer_reg->db_bell, bit); } static int intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t regoff; KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL")); if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { bar = &ntb->bar_info[NTB_CONFIG_BAR]; regoff = ntb->peer_reg->db_bell; } else { KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED, ("invalid b2b idx")); bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)]; regoff = XEON_PDOORBELL_OFFSET; } KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh")); /* HACK: Specific to current x86 bus implementation. */ *db_addr = ((uint64_t)bar->pci_bus_handle + regoff); *db_size = ntb->reg->db_size; return (0); } static uint64_t intel_ntb_db_valid_mask(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->db_valid_mask); } static int intel_ntb_db_vector_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->db_vec_count); } static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector) { struct ntb_softc *ntb = device_get_softc(dev); if (vector > ntb->db_vec_count) return (0); return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector)); } static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width) { struct ntb_softc *ntb = device_get_softc(dev); if (speed != NULL) *speed = intel_ntb_link_sta_speed(ntb); if (width != NULL) *width = intel_ntb_link_sta_width(ntb); return (link_is_up(ntb)); } static void save_bar_parameters(struct ntb_pci_bar_info *bar) { bar->pci_bus_tag = rman_get_bustag(bar->pci_resource); bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource); bar->pbase = rman_get_start(bar->pci_resource); bar->size = rman_get_size(bar->pci_resource); bar->vbase = rman_get_virtual(bar->pci_resource); } static device_method_t ntb_intel_methods[] = { /* Device interface */ DEVMETHOD(device_probe, intel_ntb_probe), DEVMETHOD(device_attach, intel_ntb_attach), DEVMETHOD(device_detach, intel_ntb_detach), + /* Bus interface */ + DEVMETHOD(bus_child_location_str, ntb_child_location_str), + DEVMETHOD(bus_print_child, ntb_print_child), /* NTB interface */ DEVMETHOD(ntb_link_is_up, intel_ntb_link_is_up), DEVMETHOD(ntb_link_enable, intel_ntb_link_enable), DEVMETHOD(ntb_link_disable, intel_ntb_link_disable), DEVMETHOD(ntb_link_enabled, intel_ntb_link_enabled), DEVMETHOD(ntb_mw_count, intel_ntb_mw_count), DEVMETHOD(ntb_mw_get_range, intel_ntb_mw_get_range), DEVMETHOD(ntb_mw_set_trans, intel_ntb_mw_set_trans), DEVMETHOD(ntb_mw_clear_trans, intel_ntb_mw_clear_trans), DEVMETHOD(ntb_mw_get_wc, intel_ntb_mw_get_wc), DEVMETHOD(ntb_mw_set_wc, intel_ntb_mw_set_wc), DEVMETHOD(ntb_spad_count, intel_ntb_spad_count), DEVMETHOD(ntb_spad_clear, intel_ntb_spad_clear), DEVMETHOD(ntb_spad_write, intel_ntb_spad_write), DEVMETHOD(ntb_spad_read, intel_ntb_spad_read), DEVMETHOD(ntb_peer_spad_write, intel_ntb_peer_spad_write), DEVMETHOD(ntb_peer_spad_read, intel_ntb_peer_spad_read), DEVMETHOD(ntb_db_valid_mask, intel_ntb_db_valid_mask), DEVMETHOD(ntb_db_vector_count, intel_ntb_db_vector_count), DEVMETHOD(ntb_db_vector_mask, intel_ntb_db_vector_mask), DEVMETHOD(ntb_db_clear, intel_ntb_db_clear), DEVMETHOD(ntb_db_clear_mask, intel_ntb_db_clear_mask), DEVMETHOD(ntb_db_read, intel_ntb_db_read), DEVMETHOD(ntb_db_set_mask, intel_ntb_db_set_mask), DEVMETHOD(ntb_peer_db_addr, intel_ntb_peer_db_addr), DEVMETHOD(ntb_peer_db_set, intel_ntb_peer_db_set), DEVMETHOD_END }; static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods, sizeof(struct ntb_softc)); DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL); MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1); MODULE_VERSION(ntb_hw_intel, 1); Index: stable/11/sys/dev/ntb/ntb_hw/ntb_hw_plx.c =================================================================== --- stable/11/sys/dev/ntb/ntb_hw/ntb_hw_plx.c (revision 323454) +++ stable/11/sys/dev/ntb/ntb_hw/ntb_hw_plx.c (revision 323455) @@ -1,950 +1,953 @@ /*- * Copyright (c) 2017 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The Non-Transparent Bridge (NTB) is a device that allows you to connect * two or more systems using a PCI-e links, providing remote memory access. * * This module contains a driver for NTBs in PLX/Avago/Broadcom PCIe bridges. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../ntb.h" #define PLX_MAX_BARS 4 /* There are at most 4 data BARs. */ #define PLX_NUM_SPAD 8 /* There are 8 scratchpads. */ #define PLX_NUM_SPAD_PATT 4 /* Use test pattern as 4 more. */ #define PLX_NUM_DB 16 /* There are 16 doorbells. */ struct ntb_plx_mw_info { int mw_bar; int mw_64bit; int mw_rid; struct resource *mw_res; vm_paddr_t mw_pbase; caddr_t mw_vbase; vm_size_t mw_size; vm_memattr_t mw_map_mode; bus_addr_t mw_xlat_addr; size_t mw_xlat_size; }; struct ntb_plx_softc { /* ntb.c context. Do not move! Must go first! */ void *ntb_store; device_t dev; struct resource *conf_res; int conf_rid; u_int ntx; /* NTx number within chip. */ u_int link; /* Link v/s Virtual side. */ u_int port; /* Port number within chip. */ int int_rid; struct resource *int_res; void *int_tag; struct ntb_plx_mw_info mw_info[PLX_MAX_BARS]; int mw_count; /* Number of memory windows. */ int spad_count1; /* Number of standard spads. */ int spad_count2; /* Number of extra spads. */ uint32_t spad_off1; /* Offset of our spads. */ uint32_t spad_off2; /* Offset of our extra spads. */ uint32_t spad_offp1; /* Offset of peer spads. */ uint32_t spad_offp2; /* Offset of peer extra spads. */ /* Parameters of window shared with peer config access in B2B mode. */ int b2b_mw; /* Shared window number. */ uint64_t b2b_off; /* Offset in shared window. */ }; #define PLX_NT0_BASE 0x3E000 #define PLX_NT1_BASE 0x3C000 #define PLX_NTX_BASE(sc) ((sc)->ntx ? PLX_NT1_BASE : PLX_NT0_BASE) #define PLX_NTX_LINK_OFFSET 0x01000 /* Bases of NTx our/peer interface registers */ #define PLX_NTX_OUR_BASE(sc) \ (PLX_NTX_BASE(sc) + ((sc)->link ? PLX_NTX_LINK_OFFSET : 0)) #define PLX_NTX_PEER_BASE(sc) \ (PLX_NTX_BASE(sc) + ((sc)->link ? 0 : PLX_NTX_LINK_OFFSET)) /* Read/write NTx our interface registers */ #define NTX_READ(sc, reg) \ bus_read_4((sc)->conf_res, PLX_NTX_OUR_BASE(sc) + (reg)) #define NTX_WRITE(sc, reg, val) \ bus_write_4((sc)->conf_res, PLX_NTX_OUR_BASE(sc) + (reg), (val)) /* Read/write NTx peer interface registers */ #define PNTX_READ(sc, reg) \ bus_read_4((sc)->conf_res, PLX_NTX_PEER_BASE(sc) + (reg)) #define PNTX_WRITE(sc, reg, val) \ bus_write_4((sc)->conf_res, PLX_NTX_PEER_BASE(sc) + (reg), (val)) /* Read/write B2B NTx registers */ #define BNTX_READ(sc, reg) \ bus_read_4((sc)->mw_info[(sc)->b2b_mw].mw_res, \ PLX_NTX_BASE(sc) + (reg)) #define BNTX_WRITE(sc, reg, val) \ bus_write_4((sc)->mw_info[(sc)->b2b_mw].mw_res, \ PLX_NTX_BASE(sc) + (reg), (val)) #define PLX_PORT_BASE(p) ((p) << 12) #define PLX_STATION_PORT_BASE(sc) PLX_PORT_BASE((sc)->port & ~7) #define PLX_PORT_CONTROL(sc) (PLX_STATION_PORT_BASE(sc) + 0x208) static int ntb_plx_init(device_t dev); static int ntb_plx_detach(device_t dev); static int ntb_plx_mw_set_trans_internal(device_t dev, unsigned mw_idx); static int ntb_plx_probe(device_t dev) { switch (pci_get_devid(dev)) { case 0x87a010b5: device_set_desc(dev, "PLX Non-Transparent Bridge NT0 Link"); return (BUS_PROBE_DEFAULT); case 0x87a110b5: device_set_desc(dev, "PLX Non-Transparent Bridge NT1 Link"); return (BUS_PROBE_DEFAULT); case 0x87b010b5: device_set_desc(dev, "PLX Non-Transparent Bridge NT0 Virtual"); return (BUS_PROBE_DEFAULT); case 0x87b110b5: device_set_desc(dev, "PLX Non-Transparent Bridge NT1 Virtual"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int ntb_plx_init(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; uint64_t val64; int i; uint32_t val; if (sc->b2b_mw >= 0) { /* Set peer BAR0/1 size and address for B2B NTx access. */ mw = &sc->mw_info[sc->b2b_mw]; if (mw->mw_64bit) { PNTX_WRITE(sc, 0xe4, 0x3); /* 64-bit */ val64 = 0x2000000000000000 * mw->mw_bar | 0x4; PNTX_WRITE(sc, PCIR_BAR(0), val64); PNTX_WRITE(sc, PCIR_BAR(0) + 4, val64 >> 32); } else { PNTX_WRITE(sc, 0xe4, 0x2); /* 32-bit */ val = 0x20000000 * mw->mw_bar; PNTX_WRITE(sc, PCIR_BAR(0), val); } /* Set Virtual to Link address translation for B2B. */ for (i = 0; i < sc->mw_count; i++) { mw = &sc->mw_info[i]; if (mw->mw_64bit) { val64 = 0x2000000000000000 * mw->mw_bar; NTX_WRITE(sc, 0xc3c + (mw->mw_bar - 2) * 4, val64); NTX_WRITE(sc, 0xc3c + (mw->mw_bar - 2) * 4 + 4, val64 >> 32); } else { val = 0x20000000 * mw->mw_bar; NTX_WRITE(sc, 0xc3c + (mw->mw_bar - 2) * 4, val); } } /* Enable Link Interface LUT entry 0 for 0:0.0. */ PNTX_WRITE(sc, 0xdb4, 1); } /* * Enable Virtual Interface LUT entry 0 for 0:0.0 and * entry 1 for our Requester ID reported by chip. */ val = (NTX_READ(sc, 0xc90) << 16) | 0x00010001; NTX_WRITE(sc, sc->link ? 0xdb4 : 0xd94, val); /* Set Link to Virtual address translation. */ for (i = 0; i < sc->mw_count; i++) { mw = &sc->mw_info[i]; if (mw->mw_xlat_size != 0) ntb_plx_mw_set_trans_internal(dev, i); } pci_enable_busmaster(dev); if (sc->b2b_mw >= 0) PNTX_WRITE(sc, PCIR_COMMAND, PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); return (0); } static void ntb_plx_isr(void *arg) { device_t dev = arg; struct ntb_plx_softc *sc = device_get_softc(dev); uint32_t val; ntb_db_event((device_t)arg, 0); if (sc->link) /* Link Interface has no Link Error registers. */ return; val = NTX_READ(sc, 0xfe0); if (val == 0) return; NTX_WRITE(sc, 0xfe0, val); if (val & 1) device_printf(dev, "Correctable Error\n"); if (val & 2) device_printf(dev, "Uncorrectable Error\n"); if (val & 4) { /* DL_Down resets link side registers, have to reinit. */ ntb_plx_init(dev); ntb_link_event(dev); } if (val & 8) device_printf(dev, "Uncorrectable Error Message Drop\n"); } static int ntb_plx_setup_intr(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); int error; /* * XXX: This hardware supports MSI, but I found it unusable. * It generates new MSI only when doorbell register goes from * zero, but does not generate it when another bit is set or on * partial clear. It makes operation very racy and unreliable. * The data book mentions some mask juggling magic to workaround * that, but I failed to make it work. */ sc->int_rid = 0; sc->int_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->int_rid, RF_SHAREABLE|RF_ACTIVE); if (sc->int_res == NULL) { device_printf(dev, "bus_alloc_resource failed\n"); return (ENOMEM); } error = bus_setup_intr(dev, sc->int_res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ntb_plx_isr, dev, &sc->int_tag); if (error != 0) { device_printf(dev, "bus_setup_intr failed: %d\n", error); return (error); } if (!sc->link) { /* Link Interface has no Link Error registers. */ NTX_WRITE(sc, 0xfe0, 0xf); /* Clear link interrupts. */ NTX_WRITE(sc, 0xfe4, 0x0); /* Unmask link interrupts. */ } return (0); } static void ntb_plx_teardown_intr(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); if (!sc->link) /* Link Interface has no Link Error registers. */ NTX_WRITE(sc, 0xfe4, 0xf); /* Mask link interrupts. */ if (sc->int_res) { bus_teardown_intr(dev, sc->int_res, sc->int_tag); bus_release_resource(dev, SYS_RES_IRQ, sc->int_rid, sc->int_res); } } static int ntb_plx_attach(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; int error = 0, i; uint32_t val; char buf[32]; /* Identify what we are (what side of what NTx). */ sc->dev = dev; val = pci_read_config(dev, 0xc8c, 4); sc->ntx = (val & 1) != 0; sc->link = (val & 0x80000000) != 0; /* Get access to whole 256KB of chip configuration space via BAR0/1. */ sc->conf_rid = PCIR_BAR(0); sc->conf_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->conf_rid, RF_ACTIVE); if (sc->conf_res == NULL) { device_printf(dev, "Can't allocate configuration BAR.\n"); return (ENXIO); } /* Identify chip port we are connected to. */ val = bus_read_4(sc->conf_res, 0x360); sc->port = (val >> ((sc->ntx == 0) ? 8 : 16)) & 0x1f; /* Find configured memory windows at BAR2-5. */ sc->mw_count = 0; for (i = 2; i <= 5; i++) { mw = &sc->mw_info[sc->mw_count]; mw->mw_bar = i; mw->mw_rid = PCIR_BAR(mw->mw_bar); mw->mw_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &mw->mw_rid, RF_ACTIVE); if (mw->mw_res == NULL) continue; mw->mw_pbase = rman_get_start(mw->mw_res); mw->mw_size = rman_get_size(mw->mw_res); mw->mw_vbase = rman_get_virtual(mw->mw_res); mw->mw_map_mode = VM_MEMATTR_UNCACHEABLE; sc->mw_count++; /* Skip over adjacent BAR for 64-bit BARs. */ val = pci_read_config(dev, PCIR_BAR(mw->mw_bar), 4); if ((val & PCIM_BAR_MEM_TYPE) == PCIM_BAR_MEM_64) { mw->mw_64bit = 1; i++; } } /* Try to identify B2B mode. */ i = 1; snprintf(buf, sizeof(buf), "hint.%s.%d.b2b", device_get_name(dev), device_get_unit(dev)); TUNABLE_INT_FETCH(buf, &i); if (sc->link) { device_printf(dev, "NTB-to-Root Port mode (Link Interface)\n"); sc->b2b_mw = -1; } else if (i == 0) { device_printf(dev, "NTB-to-Root Port mode (Virtual Interface)\n"); sc->b2b_mw = -1; } else { device_printf(dev, "NTB-to-NTB (back-to-back) mode\n"); /* We need at least one memory window for B2B peer access. */ if (sc->mw_count == 0) { device_printf(dev, "No memory window BARs enabled.\n"); error = ENXIO; goto out; } sc->b2b_mw = sc->mw_count - 1; /* Use half of the window for B2B, but no less then 1MB. */ mw = &sc->mw_info[sc->b2b_mw]; if (mw->mw_size >= 2 * 1024 * 1024) sc->b2b_off = mw->mw_size / 2; else sc->b2b_off = 0; } /* * Use Physical Layer User Test Pattern as additional scratchpad. * Make sure they are present and enabled by writing to them. * XXX: Its a hack, but standard 8 registers are not enough. */ sc->spad_offp1 = sc->spad_off1 = PLX_NTX_OUR_BASE(sc) + 0xc6c; sc->spad_offp2 = sc->spad_off2 = PLX_PORT_BASE(sc->ntx * 8) + 0x20c; if (sc->b2b_mw >= 0) { /* In NTB-to-NTB mode each side has own scratchpads. */ sc->spad_count1 = PLX_NUM_SPAD; bus_write_4(sc->conf_res, sc->spad_off2, 0x12345678); if (bus_read_4(sc->conf_res, sc->spad_off2) == 0x12345678) sc->spad_count2 = PLX_NUM_SPAD_PATT; } else { /* Otherwise we have share scratchpads with the peer. */ if (sc->link) { sc->spad_off1 += PLX_NUM_SPAD / 2 * 4; sc->spad_off2 += PLX_NUM_SPAD_PATT / 2 * 4; } else { sc->spad_offp1 += PLX_NUM_SPAD / 2 * 4; sc->spad_offp2 += PLX_NUM_SPAD_PATT / 2 * 4; } sc->spad_count1 = PLX_NUM_SPAD / 2; bus_write_4(sc->conf_res, sc->spad_off2, 0x12345678); if (bus_read_4(sc->conf_res, sc->spad_off2) == 0x12345678) sc->spad_count2 = PLX_NUM_SPAD_PATT / 2; } /* Apply static part of NTB configuration. */ ntb_plx_init(dev); /* Allocate and setup interrupts. */ error = ntb_plx_setup_intr(dev); if (error) goto out; /* Attach children to this controller */ error = ntb_register_device(dev); out: if (error != 0) ntb_plx_detach(dev); return (error); } static int ntb_plx_detach(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; int i; /* Detach & delete all children */ ntb_unregister_device(dev); /* Disable and free interrupts. */ ntb_plx_teardown_intr(dev); /* Free memory resources. */ for (i = 0; i < sc->mw_count; i++) { mw = &sc->mw_info[i]; bus_release_resource(dev, SYS_RES_MEMORY, mw->mw_rid, mw->mw_res); } bus_release_resource(dev, SYS_RES_MEMORY, sc->conf_rid, sc->conf_res); return (0); } static bool ntb_plx_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width) { uint16_t link; link = pcie_read_config(dev, PCIER_LINK_STA, 2); if (speed != NULL) *speed = (link & PCIEM_LINK_STA_SPEED); if (width != NULL) *width = (link & PCIEM_LINK_STA_WIDTH) >> 4; return ((link & PCIEM_LINK_STA_WIDTH) != 0); } static int ntb_plx_link_enable(device_t dev, enum ntb_speed speed __unused, enum ntb_width width __unused) { struct ntb_plx_softc *sc = device_get_softc(dev); uint32_t reg, val; /* The fact that we see the Link Interface means link is enabled. */ if (sc->link) { ntb_link_event(dev); return (0); } reg = PLX_PORT_CONTROL(sc); val = bus_read_4(sc->conf_res, reg); if ((val & (1 << (sc->port & 7))) == 0) { /* If already enabled, generate fake link event and exit. */ ntb_link_event(dev); return (0); } val &= ~(1 << (sc->port & 7)); bus_write_4(sc->conf_res, reg, val); return (0); } static int ntb_plx_link_disable(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); uint32_t reg, val; /* Link disable for Link Interface would be suicidal. */ if (sc->link) return (0); reg = PLX_PORT_CONTROL(sc); val = bus_read_4(sc->conf_res, reg); val |= (1 << (sc->port & 7)); bus_write_4(sc->conf_res, reg, val); return (0); } static bool ntb_plx_link_enabled(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); uint32_t reg, val; /* The fact that we see the Link Interface means link is enabled. */ if (sc->link) return (TRUE); reg = PLX_PORT_CONTROL(sc); val = bus_read_4(sc->conf_res, reg); return ((val & (1 << (sc->port & 7))) == 0); } static uint8_t ntb_plx_mw_count(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); if (sc->b2b_mw >= 0 && sc->b2b_off == 0) return (sc->mw_count - 1); /* B2B consumed whole window. */ return (sc->mw_count); } static int ntb_plx_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; size_t off; if (mw_idx >= sc->mw_count) return (EINVAL); off = 0; if (mw_idx == sc->b2b_mw) { KASSERT(sc->b2b_off != 0, ("user shouldn't get non-shared b2b mw")); off = sc->b2b_off; } mw = &sc->mw_info[mw_idx]; /* Local to remote memory window parameters. */ if (base != NULL) *base = mw->mw_pbase + off; if (vbase != NULL) *vbase = mw->mw_vbase + off; if (size != NULL) *size = mw->mw_size - off; /* * Remote to local memory window translation address alignment. * XXX: In B2B mode we can change window size (and so alignmet) * live, but there is no way to report it, so report safe value. */ if (align != NULL) *align = mw->mw_size - off; /* * Remote to local memory window size alignment. * XXX: The chip has no limit registers. In B2B case size must be * power of 2 (since we can reprogram BAR size), but there is no way * to report it, so report 1MB -- minimal BAR size. In non-B2B case * there is no control at all, so report the precofigured BAR size. */ if (align_size != NULL) { if (sc->b2b_mw >= 0) *align_size = 1024 * 1024; else *align_size = mw->mw_size - off; } /* Remote to local memory window translation address upper limit. */ if (plimit != NULL) *plimit = mw->mw_64bit ? BUS_SPACE_MAXADDR : BUS_SPACE_MAXADDR_32BIT; return (0); } static int ntb_plx_mw_set_trans_internal(device_t dev, unsigned mw_idx) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; uint64_t addr, off, size, val64; uint32_t val; mw = &sc->mw_info[mw_idx]; addr = mw->mw_xlat_addr; size = mw->mw_xlat_size; off = 0; if (mw_idx == sc->b2b_mw) { off = sc->b2b_off; KASSERT(off != 0, ("user shouldn't get non-shared b2b mw")); /* * While generally we can set any BAR size on link side, * for B2B shared window we can't go above preconfigured * size due to BAR address alignment requirements. */ if (size > mw->mw_size - off) return (EINVAL); } if (size > 0) { /* Round BAR size to next power of 2 or at least 1MB. */ if (!powerof2(size)) size = 1LL << flsll(size); if (size < 1024 * 1024) size = 1024 * 1024; /* Hardware requires addr aligned to BAR size. */ if ((addr & (size - 1)) != 0) return (EINVAL); } if (mw->mw_64bit) { if (sc->b2b_mw >= 0) { /* Set Link Interface BAR size and enable/disable it. */ val64 = 0; if (size > 0) val64 = (~(size - 1) & ~0xfffff); val64 |= 0x4; PNTX_WRITE(sc, 0xe8 + (mw->mw_bar - 2) * 4, val64); PNTX_WRITE(sc, 0xe8 + (mw->mw_bar - 2) * 4 + 4, val64 >> 32); /* Set Link Interface BAR address. */ val64 = 0x2000000000000000 * mw->mw_bar + off; val64 |= 0x4; PNTX_WRITE(sc, PCIR_BAR(mw->mw_bar), val64); PNTX_WRITE(sc, PCIR_BAR(mw->mw_bar) + 4, val64 >> 32); } /* Set Virtual Interface BARs address translation */ PNTX_WRITE(sc, 0xc3c + (mw->mw_bar - 2) * 4, addr); PNTX_WRITE(sc, 0xc3c + (mw->mw_bar - 2) * 4 + 4, addr >> 32); } else { /* Make sure we fit into 32-bit address space. */ if ((addr & UINT32_MAX) != addr) return (ERANGE); if (((addr + size) & UINT32_MAX) != (addr + size)) return (ERANGE); if (sc->b2b_mw >= 0) { /* Set Link Interface BAR size and enable/disable it. */ val = 0; if (size > 0) val = (~(size - 1) & ~0xfffff); PNTX_WRITE(sc, 0xe8 + (mw->mw_bar - 2) * 4, val); /* Set Link Interface BAR address. */ val64 = 0x20000000 * mw->mw_bar + off; PNTX_WRITE(sc, PCIR_BAR(mw->mw_bar), val64); } /* Set Virtual Interface BARs address translation */ PNTX_WRITE(sc, 0xc3c + (mw->mw_bar - 2) * 4, addr); } return (0); } static int ntb_plx_mw_set_trans(device_t dev, unsigned mw_idx, bus_addr_t addr, size_t size) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; if (mw_idx >= sc->mw_count) return (EINVAL); mw = &sc->mw_info[mw_idx]; mw->mw_xlat_addr = addr; mw->mw_xlat_size = size; return (ntb_plx_mw_set_trans_internal(dev, mw_idx)); } static int ntb_plx_mw_clear_trans(device_t dev, unsigned mw_idx) { return (ntb_plx_mw_set_trans(dev, mw_idx, 0, 0)); } static int ntb_plx_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; if (idx >= sc->mw_count) return (EINVAL); mw = &sc->mw_info[idx]; *mode = mw->mw_map_mode; return (0); } static int ntb_plx_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; uint64_t off; int rc; if (idx >= sc->mw_count) return (EINVAL); mw = &sc->mw_info[idx]; if (mw->mw_map_mode == mode) return (0); off = 0; if (idx == sc->b2b_mw) { KASSERT(sc->b2b_off != 0, ("user shouldn't get non-shared b2b mw")); off = sc->b2b_off; } rc = pmap_change_attr((vm_offset_t)mw->mw_vbase + off, mw->mw_size - off, mode); if (rc == 0) mw->mw_map_mode = mode; return (rc); } static uint8_t ntb_plx_spad_count(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); return (sc->spad_count1 + sc->spad_count2); } static int ntb_plx_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_plx_softc *sc = device_get_softc(dev); u_int off; if (idx >= sc->spad_count1 + sc->spad_count2) return (EINVAL); if (idx < sc->spad_count1) off = sc->spad_off1 + idx * 4; else off = sc->spad_off2 + (idx - sc->spad_count1) * 4; bus_write_4(sc->conf_res, off, val); return (0); } static void ntb_plx_spad_clear(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->spad_count1 + sc->spad_count2; i++) ntb_plx_spad_write(dev, i, 0); } static int ntb_plx_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_plx_softc *sc = device_get_softc(dev); u_int off; if (idx >= sc->spad_count1 + sc->spad_count2) return (EINVAL); if (idx < sc->spad_count1) off = sc->spad_off1 + idx * 4; else off = sc->spad_off2 + (idx - sc->spad_count1) * 4; *val = bus_read_4(sc->conf_res, off); return (0); } static int ntb_plx_peer_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_plx_softc *sc = device_get_softc(dev); u_int off; if (idx >= sc->spad_count1 + sc->spad_count2) return (EINVAL); if (idx < sc->spad_count1) off = sc->spad_offp1 + idx * 4; else off = sc->spad_offp2 + (idx - sc->spad_count1) * 4; if (sc->b2b_mw >= 0) bus_write_4(sc->mw_info[sc->b2b_mw].mw_res, off, val); else bus_write_4(sc->conf_res, off, val); return (0); } static int ntb_plx_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_plx_softc *sc = device_get_softc(dev); u_int off; if (idx >= sc->spad_count1 + sc->spad_count2) return (EINVAL); if (idx < sc->spad_count1) off = sc->spad_offp1 + idx * 4; else off = sc->spad_offp2 + (idx - sc->spad_count1) * 4; if (sc->b2b_mw >= 0) *val = bus_read_4(sc->mw_info[sc->b2b_mw].mw_res, off); else *val = bus_read_4(sc->conf_res, off); return (0); } static uint64_t ntb_plx_db_valid_mask(device_t dev) { return ((1LL << PLX_NUM_DB) - 1); } static int ntb_plx_db_vector_count(device_t dev) { return (1); } static uint64_t ntb_plx_db_vector_mask(device_t dev, uint32_t vector) { if (vector > 0) return (0); return ((1LL << PLX_NUM_DB) - 1); } static void ntb_plx_db_clear(device_t dev, uint64_t bits) { struct ntb_plx_softc *sc = device_get_softc(dev); NTX_WRITE(sc, sc->link ? 0xc60 : 0xc50, bits); } static void ntb_plx_db_clear_mask(device_t dev, uint64_t bits) { struct ntb_plx_softc *sc = device_get_softc(dev); NTX_WRITE(sc, sc->link ? 0xc68 : 0xc58, bits); } static uint64_t ntb_plx_db_read(device_t dev) { struct ntb_plx_softc *sc = device_get_softc(dev); return (NTX_READ(sc, sc->link ? 0xc5c : 0xc4c)); } static void ntb_plx_db_set_mask(device_t dev, uint64_t bits) { struct ntb_plx_softc *sc = device_get_softc(dev); NTX_WRITE(sc, sc->link ? 0xc64 : 0xc54, bits); } static int ntb_plx_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size) { struct ntb_plx_softc *sc = device_get_softc(dev); struct ntb_plx_mw_info *mw; KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL")); if (sc->b2b_mw >= 0) { mw = &sc->mw_info[sc->b2b_mw]; *db_addr = (uint64_t)mw->mw_pbase + PLX_NTX_BASE(sc) + 0xc4c; } else { *db_addr = rman_get_start(sc->conf_res) + PLX_NTX_BASE(sc); *db_addr += sc->link ? 0xc4c : 0xc5c; } *db_size = 4; return (0); } static void ntb_plx_peer_db_set(device_t dev, uint64_t bit) { struct ntb_plx_softc *sc = device_get_softc(dev); if (sc->b2b_mw >= 0) BNTX_WRITE(sc, 0xc4c, bit); else NTX_WRITE(sc, sc->link ? 0xc4c : 0xc5c, bit); } static device_method_t ntb_plx_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ntb_plx_probe), DEVMETHOD(device_attach, ntb_plx_attach), DEVMETHOD(device_detach, ntb_plx_detach), + /* Bus interface */ + DEVMETHOD(bus_child_location_str, ntb_child_location_str), + DEVMETHOD(bus_print_child, ntb_print_child), /* NTB interface */ DEVMETHOD(ntb_link_is_up, ntb_plx_link_is_up), DEVMETHOD(ntb_link_enable, ntb_plx_link_enable), DEVMETHOD(ntb_link_disable, ntb_plx_link_disable), DEVMETHOD(ntb_link_enabled, ntb_plx_link_enabled), DEVMETHOD(ntb_mw_count, ntb_plx_mw_count), DEVMETHOD(ntb_mw_get_range, ntb_plx_mw_get_range), DEVMETHOD(ntb_mw_set_trans, ntb_plx_mw_set_trans), DEVMETHOD(ntb_mw_clear_trans, ntb_plx_mw_clear_trans), DEVMETHOD(ntb_mw_get_wc, ntb_plx_mw_get_wc), DEVMETHOD(ntb_mw_set_wc, ntb_plx_mw_set_wc), DEVMETHOD(ntb_spad_count, ntb_plx_spad_count), DEVMETHOD(ntb_spad_clear, ntb_plx_spad_clear), DEVMETHOD(ntb_spad_write, ntb_plx_spad_write), DEVMETHOD(ntb_spad_read, ntb_plx_spad_read), DEVMETHOD(ntb_peer_spad_write, ntb_plx_peer_spad_write), DEVMETHOD(ntb_peer_spad_read, ntb_plx_peer_spad_read), DEVMETHOD(ntb_db_valid_mask, ntb_plx_db_valid_mask), DEVMETHOD(ntb_db_vector_count, ntb_plx_db_vector_count), DEVMETHOD(ntb_db_vector_mask, ntb_plx_db_vector_mask), DEVMETHOD(ntb_db_clear, ntb_plx_db_clear), DEVMETHOD(ntb_db_clear_mask, ntb_plx_db_clear_mask), DEVMETHOD(ntb_db_read, ntb_plx_db_read), DEVMETHOD(ntb_db_set_mask, ntb_plx_db_set_mask), DEVMETHOD(ntb_peer_db_addr, ntb_plx_peer_db_addr), DEVMETHOD(ntb_peer_db_set, ntb_plx_peer_db_set), DEVMETHOD_END }; static DEFINE_CLASS_0(ntb_hw, ntb_plx_driver, ntb_plx_methods, sizeof(struct ntb_plx_softc)); DRIVER_MODULE(ntb_hw_plx, pci, ntb_plx_driver, ntb_hw_devclass, NULL, NULL); MODULE_DEPEND(ntb_hw_plx, ntb, 1, 1, 1); MODULE_VERSION(ntb_hw_plx, 1); Index: stable/11/sys/dev/ntb/ntb_transport.c =================================================================== --- stable/11/sys/dev/ntb/ntb_transport.c (revision 323454) +++ stable/11/sys/dev/ntb/ntb_transport.c (revision 323455) @@ -1,1564 +1,1595 @@ /*- - * Copyright (c) 2016 Alexander Motin + * Copyright (c) 2016-2017 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The Non-Transparent Bridge (NTB) is a device that allows you to connect * two or more systems using a PCI-e links, providing remote memory access. * * This module contains a transport for sending and receiving messages by * writing to remote memory window(s) provided by underlying NTB device. * * NOTE: Much of the code in this module is shared with Linux. Any patches may * be picked up and redistributed in Linux with a dual GPL/BSD license. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ntb.h" #include "ntb_transport.h" #define KTR_NTB KTR_SPARE3 #define NTB_TRANSPORT_VERSION 4 static SYSCTL_NODE(_hw, OID_AUTO, ntb_transport, CTLFLAG_RW, 0, "ntb_transport"); static unsigned g_ntb_transport_debug_level; SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ntb_transport_debug_level, 0, "ntb_transport log level -- higher is more verbose"); #define ntb_printf(lvl, ...) do { \ if ((lvl) <= g_ntb_transport_debug_level) { \ printf(__VA_ARGS__); \ } \ } while (0) static unsigned transport_mtu = 0x10000; static uint64_t max_mw_size; SYSCTL_UQUAD(_hw_ntb_transport, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0, "If enabled (non-zero), limit the size of large memory windows. " "Both sides of the NTB MUST set the same value here."); static unsigned enable_xeon_watchdog; SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN, &enable_xeon_watchdog, 0, "If non-zero, write a register every second to " "keep a watchdog from tearing down the NTB link"); STAILQ_HEAD(ntb_queue_list, ntb_queue_entry); typedef uint32_t ntb_q_idx_t; struct ntb_queue_entry { /* ntb_queue list reference */ STAILQ_ENTRY(ntb_queue_entry) entry; /* info on data to be transferred */ void *cb_data; void *buf; uint32_t len; uint32_t flags; struct ntb_transport_qp *qp; struct ntb_payload_header *x_hdr; ntb_q_idx_t index; }; struct ntb_rx_info { ntb_q_idx_t entry; }; struct ntb_transport_qp { struct ntb_transport_ctx *transport; device_t dev; void *cb_data; bool client_ready; volatile bool link_is_up; uint8_t qp_num; /* Only 64 QPs are allowed. 0-63 */ struct ntb_rx_info *rx_info; struct ntb_rx_info *remote_rx_info; void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); struct ntb_queue_list tx_free_q; struct mtx ntb_tx_free_q_lock; caddr_t tx_mw; bus_addr_t tx_mw_phys; ntb_q_idx_t tx_index; ntb_q_idx_t tx_max_entry; uint64_t tx_max_frame; void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); struct ntb_queue_list rx_post_q; struct ntb_queue_list rx_pend_q; /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */ struct mtx ntb_rx_q_lock; struct task rxc_db_work; struct taskqueue *rxc_tq; caddr_t rx_buff; ntb_q_idx_t rx_index; ntb_q_idx_t rx_max_entry; uint64_t rx_max_frame; void (*event_handler)(void *data, enum ntb_link_event status); struct callout link_work; struct callout rx_full; uint64_t last_rx_no_buf; /* Stats */ uint64_t rx_bytes; uint64_t rx_pkts; uint64_t rx_ring_empty; uint64_t rx_err_no_buf; uint64_t rx_err_oflow; uint64_t rx_err_ver; uint64_t tx_bytes; uint64_t tx_pkts; uint64_t tx_ring_full; uint64_t tx_err_no_buf; struct mtx tx_lock; }; struct ntb_transport_mw { vm_paddr_t phys_addr; size_t phys_size; size_t xlat_align; size_t xlat_align_size; bus_addr_t addr_limit; /* Tx buff is off vbase / phys_addr */ caddr_t vbase; size_t xlat_size; size_t buff_size; /* Rx buff is off virt_addr / dma_addr */ caddr_t virt_addr; bus_addr_t dma_addr; }; struct ntb_transport_child { device_t dev; + int consumer; int qpoff; int qpcnt; struct ntb_transport_child *next; }; struct ntb_transport_ctx { device_t dev; struct ntb_transport_child *child; struct ntb_transport_mw *mw_vec; struct ntb_transport_qp *qp_vec; unsigned mw_count; unsigned qp_count; uint64_t qp_bitmap; volatile bool link_is_up; enum ntb_speed link_speed; enum ntb_width link_width; struct callout link_work; struct callout link_watchdog; struct task link_cleanup; }; enum { NTBT_DESC_DONE_FLAG = 1 << 0, NTBT_LINK_DOWN_FLAG = 1 << 1, }; struct ntb_payload_header { ntb_q_idx_t ver; uint32_t len; uint32_t flags; }; enum { /* * The order of this enum is part of the remote protocol. Do not * reorder without bumping protocol version (and it's probably best * to keep the protocol in lock-step with the Linux NTB driver. */ NTBT_VERSION = 0, NTBT_QP_LINKS, NTBT_NUM_QPS, NTBT_NUM_MWS, /* * N.B.: transport_link_work assumes MW1 enums = MW0 + 2. */ NTBT_MW0_SZ_HIGH, NTBT_MW0_SZ_LOW, NTBT_MW1_SZ_HIGH, NTBT_MW1_SZ_LOW, /* * Some NTB-using hardware have a watchdog to work around NTB hangs; if * a register or doorbell isn't written every few seconds, the link is * torn down. Write an otherwise unused register every few seconds to * work around this watchdog. */ NTBT_WATCHDOG_SPAD = 15 }; #define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 static int ntb_transport_probe(device_t dev); static int ntb_transport_attach(device_t dev); static int ntb_transport_detach(device_t dev); static void ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num); static int ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry); static void ntb_transport_rxc_db(void *arg, int pending); static int ntb_process_rxc(struct ntb_transport_qp *qp); static void ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, void *offset); static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data); static void ntb_complete_rxc(struct ntb_transport_qp *qp); static void ntb_transport_doorbell_callback(void *data, uint32_t vector); static void ntb_transport_event_callback(void *data); static void ntb_transport_link_work(void *arg); static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size); static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw); static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num); static void ntb_qp_link_work(void *arg); static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt); static void ntb_transport_link_cleanup_work(void *, int); static void ntb_qp_link_down(struct ntb_transport_qp *qp); static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp); static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp); static void ntb_send_link_down(struct ntb_transport_qp *qp); static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, struct ntb_queue_list *list); static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list); static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from, struct ntb_queue_list *to); static void xeon_link_watchdog_hb(void *); static const struct ntb_ctx_ops ntb_transport_ops = { .link_event = ntb_transport_event_callback, .db_event = ntb_transport_doorbell_callback, }; MALLOC_DEFINE(M_NTB_T, "ntb_transport", "ntb transport driver"); static inline void iowrite32(uint32_t val, void *addr) { bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr, val); } /* Transport Init and teardown */ static void xeon_link_watchdog_hb(void *arg) { struct ntb_transport_ctx *nt; nt = arg; ntb_spad_write(nt->dev, NTBT_WATCHDOG_SPAD, 0); callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt); } static int ntb_transport_probe(device_t dev) { device_set_desc(dev, "NTB Transport"); return (0); } static int ntb_transport_attach(device_t dev) { struct ntb_transport_ctx *nt = device_get_softc(dev); struct ntb_transport_child **cpp = &nt->child; struct ntb_transport_child *nc; struct ntb_transport_mw *mw; uint64_t db_bitmap; int rc, i, db_count, spad_count, qp, qpu, qpo, qpt; char cfg[128] = ""; char buf[32]; char *n, *np, *c, *name; nt->dev = dev; nt->mw_count = ntb_mw_count(dev); spad_count = ntb_spad_count(dev); db_bitmap = ntb_db_valid_mask(dev); db_count = flsll(db_bitmap); KASSERT(db_bitmap == (1 << db_count) - 1, ("Doorbells are not sequential (%jx).\n", db_bitmap)); - device_printf(dev, "%d memory windows, %d scratchpads, " - "%d doorbells\n", nt->mw_count, spad_count, db_count); - if (nt->mw_count == 0) { device_printf(dev, "At least 1 memory window required.\n"); return (ENXIO); } if (spad_count < 6) { device_printf(dev, "At least 6 scratchpads required.\n"); return (ENXIO); } if (spad_count < 4 + 2 * nt->mw_count) { nt->mw_count = (spad_count - 4) / 2; device_printf(dev, "Scratchpads enough only for %d " "memory windows.\n", nt->mw_count); } if (db_bitmap == 0) { device_printf(dev, "At least one doorbell required.\n"); return (ENXIO); } nt->mw_vec = malloc(nt->mw_count * sizeof(*nt->mw_vec), M_NTB_T, M_WAITOK | M_ZERO); for (i = 0; i < nt->mw_count; i++) { mw = &nt->mw_vec[i]; rc = ntb_mw_get_range(dev, i, &mw->phys_addr, &mw->vbase, &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size, &mw->addr_limit); if (rc != 0) goto err; mw->buff_size = 0; mw->xlat_size = 0; mw->virt_addr = NULL; mw->dma_addr = 0; rc = ntb_mw_set_wc(dev, i, VM_MEMATTR_WRITE_COMBINING); if (rc) ntb_printf(0, "Unable to set mw%d caching\n", i); } qpu = 0; qpo = imin(db_count, nt->mw_count); qpt = db_count; snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev), device_get_unit(dev)); TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg)); n = cfg; i = 0; while ((c = strsep(&n, ",")) != NULL) { np = c; name = strsep(&np, ":"); if (name != NULL && name[0] == 0) name = NULL; qp = (np && np[0] != 0) ? strtol(np, NULL, 10) : qpo - qpu; if (qp <= 0) qp = 1; if (qp > qpt - qpu) { device_printf(dev, "Not enough resources for config\n"); break; } nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO); + nc->consumer = i; nc->qpoff = qpu; nc->qpcnt = qp; nc->dev = device_add_child(dev, name, -1); if (nc->dev == NULL) { device_printf(dev, "Can not add child.\n"); break; } device_set_ivars(nc->dev, nc); *cpp = nc; cpp = &nc->next; if (bootverbose) { device_printf(dev, "%d \"%s\": queues %d", i, name, qpu); if (qp > 1) printf("-%d", qpu + qp - 1); printf("\n"); } qpu += qp; i++; } nt->qp_count = qpu; nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_T, M_WAITOK | M_ZERO); for (i = 0; i < nt->qp_count; i++) ntb_transport_init_queue(nt, i); callout_init(&nt->link_work, 0); callout_init(&nt->link_watchdog, 0); TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt); nt->link_is_up = false; rc = ntb_set_ctx(dev, nt, &ntb_transport_ops); if (rc != 0) goto err; ntb_link_enable(dev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); if (enable_xeon_watchdog != 0) callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt); bus_generic_attach(dev); return (0); err: free(nt->qp_vec, M_NTB_T); free(nt->mw_vec, M_NTB_T); return (rc); } static int ntb_transport_detach(device_t dev) { struct ntb_transport_ctx *nt = device_get_softc(dev); struct ntb_transport_child **cpp = &nt->child; struct ntb_transport_child *nc; int error = 0, i; while ((nc = *cpp) != NULL) { *cpp = (*cpp)->next; error = device_delete_child(dev, nc->dev); if (error) break; free(nc, M_DEVBUF); } KASSERT(nt->qp_bitmap == 0, ("Some queues not freed on detach (%jx)", nt->qp_bitmap)); ntb_transport_link_cleanup(nt); taskqueue_drain(taskqueue_swi, &nt->link_cleanup); callout_drain(&nt->link_work); callout_drain(&nt->link_watchdog); ntb_link_disable(dev); ntb_clear_ctx(dev); for (i = 0; i < nt->mw_count; i++) ntb_free_mw(nt, i); free(nt->qp_vec, M_NTB_T); free(nt->mw_vec, M_NTB_T); return (0); } +static int +ntb_transport_print_child(device_t dev, device_t child) +{ + struct ntb_transport_child *nc = device_get_ivars(child); + int retval; + + retval = bus_print_child_header(dev, child); + if (nc->qpcnt > 0) { + printf(" queue %d", nc->qpoff); + if (nc->qpcnt > 1) + printf("-%d", nc->qpoff + nc->qpcnt - 1); + } + retval += printf(" at consumer %d", nc->consumer); + retval += bus_print_child_domain(dev, child); + retval += bus_print_child_footer(dev, child); + + return (retval); +} + +static int +ntb_transport_child_location_str(device_t dev, device_t child, char *buf, + size_t buflen) +{ + struct ntb_transport_child *nc = device_get_ivars(child); + + snprintf(buf, buflen, "consumer=%d", nc->consumer); + return (0); +} + int ntb_transport_queue_count(device_t dev) { struct ntb_transport_child *nc = device_get_ivars(dev); return (nc->qpcnt); } static void ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num) { struct ntb_transport_mw *mw; struct ntb_transport_qp *qp; vm_paddr_t mw_base; uint64_t mw_size, qp_offset; size_t tx_size; unsigned num_qps_mw, mw_num, mw_count; mw_count = nt->mw_count; mw_num = QP_TO_MW(nt, qp_num); mw = &nt->mw_vec[mw_num]; qp = &nt->qp_vec[qp_num]; qp->qp_num = qp_num; qp->transport = nt; qp->dev = nt->dev; qp->client_ready = false; qp->event_handler = NULL; ntb_qp_link_down_reset(qp); if (mw_num < nt->qp_count % mw_count) num_qps_mw = nt->qp_count / mw_count + 1; else num_qps_mw = nt->qp_count / mw_count; mw_base = mw->phys_addr; mw_size = mw->phys_size; tx_size = mw_size / num_qps_mw; qp_offset = tx_size * (qp_num / mw_count); qp->tx_mw = mw->vbase + qp_offset; KASSERT(qp->tx_mw != NULL, ("uh oh?")); /* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */ qp->tx_mw_phys = mw_base + qp_offset; KASSERT(qp->tx_mw_phys != 0, ("uh oh?")); tx_size -= sizeof(struct ntb_rx_info); qp->rx_info = (void *)(qp->tx_mw + tx_size); /* Due to house-keeping, there must be at least 2 buffs */ qp->tx_max_frame = qmin(transport_mtu, tx_size / 2); qp->tx_max_entry = tx_size / qp->tx_max_frame; callout_init(&qp->link_work, 0); callout_init(&qp->rx_full, 1); mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN); mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN); mtx_init(&qp->tx_lock, "ntb transport tx", NULL, MTX_DEF); TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp); qp->rxc_tq = taskqueue_create("ntbt_rx", M_WAITOK, taskqueue_thread_enqueue, &qp->rxc_tq); taskqueue_start_threads(&qp->rxc_tq, 1, PI_NET, "%s rx%d", device_get_nameunit(nt->dev), qp_num); STAILQ_INIT(&qp->rx_post_q); STAILQ_INIT(&qp->rx_pend_q); STAILQ_INIT(&qp->tx_free_q); } void ntb_transport_free_queue(struct ntb_transport_qp *qp) { struct ntb_transport_ctx *nt = qp->transport; struct ntb_queue_entry *entry; callout_drain(&qp->link_work); ntb_db_set_mask(qp->dev, 1ull << qp->qp_num); taskqueue_drain_all(qp->rxc_tq); taskqueue_free(qp->rxc_tq); qp->cb_data = NULL; qp->rx_handler = NULL; qp->tx_handler = NULL; qp->event_handler = NULL; while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) free(entry, M_NTB_T); while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) free(entry, M_NTB_T); while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) free(entry, M_NTB_T); nt->qp_bitmap &= ~(1 << qp->qp_num); } /** * ntb_transport_create_queue - Create a new NTB transport layer queue * @rx_handler: receive callback function * @tx_handler: transmit callback function * @event_handler: event callback function * * Create a new NTB transport layer queue and provide the queue with a callback * routine for both transmit and receive. The receive callback routine will be * used to pass up data when the transport has received it on the queue. The * transmit callback routine will be called when the transport has completed the * transmission of the data on the queue and the data is ready to be freed. * * RETURNS: pointer to newly created ntb_queue, NULL on error. */ struct ntb_transport_qp * ntb_transport_create_queue(device_t dev, int q, const struct ntb_queue_handlers *handlers, void *data) { struct ntb_transport_child *nc = device_get_ivars(dev); struct ntb_transport_ctx *nt = device_get_softc(device_get_parent(dev)); struct ntb_queue_entry *entry; struct ntb_transport_qp *qp; int i; if (q < 0 || q >= nc->qpcnt) return (NULL); qp = &nt->qp_vec[nc->qpoff + q]; nt->qp_bitmap |= (1 << qp->qp_num); qp->cb_data = data; qp->rx_handler = handlers->rx_handler; qp->tx_handler = handlers->tx_handler; qp->event_handler = handlers->event_handler; for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO); entry->cb_data = data; entry->buf = NULL; entry->len = transport_mtu; entry->qp = qp; ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q); } for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO); entry->qp = qp; ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); } ntb_db_clear(dev, 1ull << qp->qp_num); return (qp); } /** * ntb_transport_link_up - Notify NTB transport of client readiness to use queue * @qp: NTB transport layer queue to be enabled * * Notify NTB transport layer of client readiness to use queue */ void ntb_transport_link_up(struct ntb_transport_qp *qp) { struct ntb_transport_ctx *nt = qp->transport; qp->client_ready = true; ntb_printf(2, "qp %d client ready\n", qp->qp_num); if (nt->link_is_up) callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); } /* Transport Tx */ /** * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry * @qp: NTB transport layer queue the entry is to be enqueued on * @cb: per buffer pointer for callback function to use * @data: pointer to data buffer that will be sent * @len: length of the data buffer * * Enqueue a new transmit buffer onto the transport queue from which a NTB * payload will be transmitted. This assumes that a lock is being held to * serialize access to the qp. * * RETURNS: An appropriate ERRNO error value on error, or zero for success. */ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, unsigned int len) { struct ntb_queue_entry *entry; int rc; if (!qp->link_is_up || len == 0) { CTR0(KTR_NTB, "TX: link not up"); return (EINVAL); } entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); if (entry == NULL) { CTR0(KTR_NTB, "TX: could not get entry from tx_free_q"); qp->tx_err_no_buf++; return (EBUSY); } CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry); entry->cb_data = cb; entry->buf = data; entry->len = len; entry->flags = 0; mtx_lock(&qp->tx_lock); rc = ntb_process_tx(qp, entry); mtx_unlock(&qp->tx_lock); if (rc != 0) { ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); CTR1(KTR_NTB, "TX: process_tx failed. Returning entry %p to tx_free_q", entry); } return (rc); } static void ntb_tx_copy_callback(void *data) { struct ntb_queue_entry *entry = data; struct ntb_transport_qp *qp = entry->qp; struct ntb_payload_header *hdr = entry->x_hdr; iowrite32(entry->flags | NTBT_DESC_DONE_FLAG, &hdr->flags); CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr); ntb_peer_db_set(qp->dev, 1ull << qp->qp_num); /* * The entry length can only be zero if the packet is intended to be a * "link down" or similar. Since no payload is being sent in these * cases, there is nothing to add to the completion queue. */ if (entry->len > 0) { qp->tx_bytes += entry->len; if (qp->tx_handler) qp->tx_handler(qp, qp->cb_data, entry->buf, entry->len); else m_freem(entry->buf); entry->buf = NULL; } CTR3(KTR_NTB, "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning " "to tx_free_q", entry, hdr->ver, hdr->flags); ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); } static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void *offset) { CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset); if (entry->buf != NULL) { m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset); /* * Ensure that the data is fully copied before setting the * flags */ wmb(); } ntb_tx_copy_callback(entry); } static void ntb_async_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) { struct ntb_payload_header *hdr; void *offset; offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame - sizeof(struct ntb_payload_header)); entry->x_hdr = hdr; iowrite32(entry->len, &hdr->len); iowrite32(qp->tx_pkts, &hdr->ver); ntb_memcpy_tx(entry, offset); } static int ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) { CTR3(KTR_NTB, "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u", qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry); if (qp->tx_index == qp->remote_rx_info->entry) { CTR0(KTR_NTB, "TX: ring full"); qp->tx_ring_full++; return (EAGAIN); } if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) { if (qp->tx_handler != NULL) qp->tx_handler(qp, qp->cb_data, entry->buf, EIO); else m_freem(entry->buf); entry->buf = NULL; ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q); CTR1(KTR_NTB, "TX: frame too big. returning entry %p to tx_free_q", entry); return (0); } CTR2(KTR_NTB, "TX: copying entry %p to index %u", entry, qp->tx_index); ntb_async_tx(qp, entry); qp->tx_index++; qp->tx_index %= qp->tx_max_entry; qp->tx_pkts++; return (0); } /* Transport Rx */ static void ntb_transport_rxc_db(void *arg, int pending __unused) { struct ntb_transport_qp *qp = arg; uint64_t qp_mask = 1ull << qp->qp_num; int rc; CTR0(KTR_NTB, "RX: transport_rx"); again: while ((rc = ntb_process_rxc(qp)) == 0) ; CTR1(KTR_NTB, "RX: process_rxc returned %d", rc); if ((ntb_db_read(qp->dev) & qp_mask) != 0) { /* If db is set, clear it and check queue once more. */ ntb_db_clear(qp->dev, qp_mask); goto again; } if (qp->link_is_up) ntb_db_clear_mask(qp->dev, qp_mask); } static int ntb_process_rxc(struct ntb_transport_qp *qp) { struct ntb_payload_header *hdr; struct ntb_queue_entry *entry; caddr_t offset; offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; hdr = (void *)(offset + qp->rx_max_frame - sizeof(struct ntb_payload_header)); CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index); if ((hdr->flags & NTBT_DESC_DONE_FLAG) == 0) { CTR0(KTR_NTB, "RX: hdr not done"); qp->rx_ring_empty++; return (EAGAIN); } if ((hdr->flags & NTBT_LINK_DOWN_FLAG) != 0) { CTR0(KTR_NTB, "RX: link down"); ntb_qp_link_down(qp); hdr->flags = 0; return (EAGAIN); } if (hdr->ver != (uint32_t)qp->rx_pkts) { CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). " "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts); qp->rx_err_ver++; return (EIO); } entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q); if (entry == NULL) { qp->rx_err_no_buf++; CTR0(KTR_NTB, "RX: No entries in rx_pend_q"); return (EAGAIN); } callout_stop(&qp->rx_full); CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry); entry->x_hdr = hdr; entry->index = qp->rx_index; if (hdr->len > entry->len) { CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju", (uintmax_t)hdr->len, (uintmax_t)entry->len); qp->rx_err_oflow++; entry->len = -EIO; entry->flags |= NTBT_DESC_DONE_FLAG; ntb_complete_rxc(qp); } else { qp->rx_bytes += hdr->len; qp->rx_pkts++; CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts); entry->len = hdr->len; ntb_memcpy_rx(qp, entry, offset); } qp->rx_index++; qp->rx_index %= qp->rx_max_entry; return (0); } static void ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry, void *offset) { struct ifnet *ifp = entry->cb_data; unsigned int len = entry->len; CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset); entry->buf = (void *)m_devget(offset, len, 0, ifp, NULL); if (entry->buf == NULL) entry->len = -ENOMEM; /* Ensure that the data is globally visible before clearing the flag */ wmb(); CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, entry->buf); ntb_rx_copy_callback(qp, entry); } static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data) { struct ntb_queue_entry *entry; entry = data; entry->flags |= NTBT_DESC_DONE_FLAG; ntb_complete_rxc(qp); } static void ntb_complete_rxc(struct ntb_transport_qp *qp) { struct ntb_queue_entry *entry; struct mbuf *m; unsigned len; CTR0(KTR_NTB, "RX: rx_completion_task"); mtx_lock_spin(&qp->ntb_rx_q_lock); while (!STAILQ_EMPTY(&qp->rx_post_q)) { entry = STAILQ_FIRST(&qp->rx_post_q); if ((entry->flags & NTBT_DESC_DONE_FLAG) == 0) break; entry->x_hdr->flags = 0; iowrite32(entry->index, &qp->rx_info->entry); STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry); len = entry->len; m = entry->buf; /* * Re-initialize queue_entry for reuse; rx_handler takes * ownership of the mbuf. */ entry->buf = NULL; entry->len = transport_mtu; entry->cb_data = qp->cb_data; STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry); mtx_unlock_spin(&qp->ntb_rx_q_lock); CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m); if (qp->rx_handler != NULL && qp->client_ready) qp->rx_handler(qp, qp->cb_data, m, len); else m_freem(m); mtx_lock_spin(&qp->ntb_rx_q_lock); } mtx_unlock_spin(&qp->ntb_rx_q_lock); } static void ntb_transport_doorbell_callback(void *data, uint32_t vector) { struct ntb_transport_ctx *nt = data; struct ntb_transport_qp *qp; uint64_t vec_mask; unsigned qp_num; vec_mask = ntb_db_vector_mask(nt->dev, vector); vec_mask &= nt->qp_bitmap; if ((vec_mask & (vec_mask - 1)) != 0) vec_mask &= ntb_db_read(nt->dev); if (vec_mask != 0) { ntb_db_set_mask(nt->dev, vec_mask); ntb_db_clear(nt->dev, vec_mask); } while (vec_mask != 0) { qp_num = ffsll(vec_mask) - 1; qp = &nt->qp_vec[qp_num]; if (qp->link_is_up) taskqueue_enqueue(qp->rxc_tq, &qp->rxc_db_work); vec_mask &= ~(1ull << qp_num); } } /* Link Event handler */ static void ntb_transport_event_callback(void *data) { struct ntb_transport_ctx *nt = data; if (ntb_link_is_up(nt->dev, &nt->link_speed, &nt->link_width)) { ntb_printf(1, "HW link up\n"); callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt); } else { ntb_printf(1, "HW link down\n"); taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup); } } /* Link bring up */ static void ntb_transport_link_work(void *arg) { struct ntb_transport_ctx *nt = arg; device_t dev = nt->dev; struct ntb_transport_qp *qp; uint64_t val64, size; uint32_t val; unsigned i; int rc; /* send the local info, in the opposite order of the way we read it */ for (i = 0; i < nt->mw_count; i++) { size = nt->mw_vec[i].phys_size; if (max_mw_size != 0 && size > max_mw_size) size = max_mw_size; ntb_peer_spad_write(dev, NTBT_MW0_SZ_HIGH + (i * 2), size >> 32); ntb_peer_spad_write(dev, NTBT_MW0_SZ_LOW + (i * 2), size); } ntb_peer_spad_write(dev, NTBT_NUM_MWS, nt->mw_count); ntb_peer_spad_write(dev, NTBT_NUM_QPS, nt->qp_count); ntb_peer_spad_write(dev, NTBT_QP_LINKS, 0); ntb_peer_spad_write(dev, NTBT_VERSION, NTB_TRANSPORT_VERSION); /* Query the remote side for its info */ val = 0; ntb_spad_read(dev, NTBT_VERSION, &val); if (val != NTB_TRANSPORT_VERSION) goto out; ntb_spad_read(dev, NTBT_NUM_QPS, &val); if (val != nt->qp_count) goto out; ntb_spad_read(dev, NTBT_NUM_MWS, &val); if (val != nt->mw_count) goto out; for (i = 0; i < nt->mw_count; i++) { ntb_spad_read(dev, NTBT_MW0_SZ_HIGH + (i * 2), &val); val64 = (uint64_t)val << 32; ntb_spad_read(dev, NTBT_MW0_SZ_LOW + (i * 2), &val); val64 |= val; rc = ntb_set_mw(nt, i, val64); if (rc != 0) goto free_mws; } nt->link_is_up = true; ntb_printf(1, "transport link up\n"); for (i = 0; i < nt->qp_count; i++) { qp = &nt->qp_vec[i]; ntb_transport_setup_qp_mw(nt, i); if (qp->client_ready) callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp); } return; free_mws: for (i = 0; i < nt->mw_count; i++) ntb_free_mw(nt, i); out: if (ntb_link_is_up(dev, &nt->link_speed, &nt->link_width)) callout_reset(&nt->link_work, NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt); } static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; size_t xlat_size, buff_size; int rc; if (size == 0) return (EINVAL); xlat_size = roundup(size, mw->xlat_align_size); buff_size = xlat_size; /* No need to re-setup */ if (mw->xlat_size == xlat_size) return (0); if (mw->buff_size != 0) ntb_free_mw(nt, num_mw); /* Alloc memory for receiving data. Must be aligned */ mw->xlat_size = xlat_size; mw->buff_size = buff_size; mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_T, M_ZERO, 0, mw->addr_limit, mw->xlat_align, 0); if (mw->virt_addr == NULL) { ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n", mw->buff_size, mw->xlat_size); mw->xlat_size = 0; mw->buff_size = 0; return (ENOMEM); } /* TODO: replace with bus_space_* functions */ mw->dma_addr = vtophys(mw->virt_addr); /* * Ensure that the allocation from contigmalloc is aligned as * requested. XXX: This may not be needed -- brought in for parity * with the Linux driver. */ if (mw->dma_addr % mw->xlat_align != 0) { ntb_printf(0, "DMA memory 0x%jx not aligned to BAR size 0x%zx\n", (uintmax_t)mw->dma_addr, size); ntb_free_mw(nt, num_mw); return (ENOMEM); } /* Notify HW the memory location of the receive buffer */ rc = ntb_mw_set_trans(nt->dev, num_mw, mw->dma_addr, mw->xlat_size); if (rc) { ntb_printf(0, "Unable to set mw%d translation\n", num_mw); ntb_free_mw(nt, num_mw); return (rc); } return (0); } static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; if (mw->virt_addr == NULL) return; ntb_mw_clear_trans(nt->dev, num_mw); contigfree(mw->virt_addr, mw->xlat_size, M_NTB_T); mw->xlat_size = 0; mw->buff_size = 0; mw->virt_addr = NULL; } static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num) { struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; struct ntb_transport_mw *mw; void *offset; ntb_q_idx_t i; size_t rx_size; unsigned num_qps_mw, mw_num, mw_count; mw_count = nt->mw_count; mw_num = QP_TO_MW(nt, qp_num); mw = &nt->mw_vec[mw_num]; if (mw->virt_addr == NULL) return (ENOMEM); if (mw_num < nt->qp_count % mw_count) num_qps_mw = nt->qp_count / mw_count + 1; else num_qps_mw = nt->qp_count / mw_count; rx_size = mw->xlat_size / num_qps_mw; qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count); rx_size -= sizeof(struct ntb_rx_info); qp->remote_rx_info = (void*)(qp->rx_buff + rx_size); /* Due to house-keeping, there must be at least 2 buffs */ qp->rx_max_frame = qmin(transport_mtu, rx_size / 2); qp->rx_max_entry = rx_size / qp->rx_max_frame; qp->rx_index = 0; qp->remote_rx_info->entry = qp->rx_max_entry - 1; /* Set up the hdr offsets with 0s */ for (i = 0; i < qp->rx_max_entry; i++) { offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) - sizeof(struct ntb_payload_header)); memset(offset, 0, sizeof(struct ntb_payload_header)); } qp->rx_pkts = 0; qp->tx_pkts = 0; qp->tx_index = 0; return (0); } static void ntb_qp_link_work(void *arg) { struct ntb_transport_qp *qp = arg; device_t dev = qp->dev; struct ntb_transport_ctx *nt = qp->transport; int i; uint32_t val; /* Report queues that are up on our side */ for (i = 0, val = 0; i < nt->qp_count; i++) { if (nt->qp_vec[i].client_ready) val |= (1 << i); } ntb_peer_spad_write(dev, NTBT_QP_LINKS, val); /* See if the remote side is up */ ntb_spad_read(dev, NTBT_QP_LINKS, &val); if ((val & (1ull << qp->qp_num)) != 0) { ntb_printf(2, "qp %d link up\n", qp->qp_num); qp->link_is_up = true; if (qp->event_handler != NULL) qp->event_handler(qp->cb_data, NTB_LINK_UP); ntb_db_clear_mask(dev, 1ull << qp->qp_num); } else if (nt->link_is_up) callout_reset(&qp->link_work, NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp); } /* Link down event*/ static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt) { struct ntb_transport_qp *qp; int i; callout_drain(&nt->link_work); nt->link_is_up = 0; /* Pass along the info to any clients */ for (i = 0; i < nt->qp_count; i++) { if ((nt->qp_bitmap & (1 << i)) != 0) { qp = &nt->qp_vec[i]; ntb_qp_link_cleanup(qp); callout_drain(&qp->link_work); } } /* * The scratchpad registers keep the values if the remote side * goes down, blast them now to give them a sane value the next * time they are accessed */ ntb_spad_clear(nt->dev); } static void ntb_transport_link_cleanup_work(void *arg, int pending __unused) { ntb_transport_link_cleanup(arg); } static void ntb_qp_link_down(struct ntb_transport_qp *qp) { ntb_qp_link_cleanup(qp); } static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) { qp->link_is_up = false; ntb_db_set_mask(qp->dev, 1ull << qp->qp_num); qp->tx_index = qp->rx_index = 0; qp->tx_bytes = qp->rx_bytes = 0; qp->tx_pkts = qp->rx_pkts = 0; qp->rx_ring_empty = 0; qp->tx_ring_full = 0; qp->rx_err_no_buf = qp->tx_err_no_buf = 0; qp->rx_err_oflow = qp->rx_err_ver = 0; } static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) { callout_drain(&qp->link_work); ntb_qp_link_down_reset(qp); if (qp->event_handler != NULL) qp->event_handler(qp->cb_data, NTB_LINK_DOWN); } /* Link commanded down */ /** * ntb_transport_link_down - Notify NTB transport to no longer enqueue data * @qp: NTB transport layer queue to be disabled * * Notify NTB transport layer of client's desire to no longer receive data on * transport queue specified. It is the client's responsibility to ensure all * entries on queue are purged or otherwise handled appropriately. */ void ntb_transport_link_down(struct ntb_transport_qp *qp) { struct ntb_transport_ctx *nt = qp->transport; int i; uint32_t val; qp->client_ready = false; for (i = 0, val = 0; i < nt->qp_count; i++) { if (nt->qp_vec[i].client_ready) val |= (1 << i); } ntb_peer_spad_write(qp->dev, NTBT_QP_LINKS, val); if (qp->link_is_up) ntb_send_link_down(qp); else callout_drain(&qp->link_work); } /** * ntb_transport_link_query - Query transport link state * @qp: NTB transport layer queue to be queried * * Query connectivity to the remote system of the NTB transport queue * * RETURNS: true for link up or false for link down */ bool ntb_transport_link_query(struct ntb_transport_qp *qp) { return (qp->link_is_up); } /** * ntb_transport_link_speed - Query transport link speed * @qp: NTB transport layer queue to be queried * * Query connection speed to the remote system of the NTB transport queue * * RETURNS: link speed in bits per second */ uint64_t ntb_transport_link_speed(struct ntb_transport_qp *qp) { struct ntb_transport_ctx *nt = qp->transport; uint64_t rate; if (!nt->link_is_up) return (0); switch (nt->link_speed) { case NTB_SPEED_GEN1: rate = 2500000000 * 8 / 10; break; case NTB_SPEED_GEN2: rate = 5000000000 * 8 / 10; break; case NTB_SPEED_GEN3: rate = 8000000000 * 128 / 130; break; case NTB_SPEED_GEN4: rate = 16000000000 * 128 / 130; break; default: return (0); } if (nt->link_width <= 0) return (0); return (rate * nt->link_width); } static void ntb_send_link_down(struct ntb_transport_qp *qp) { struct ntb_queue_entry *entry; int i, rc; if (!qp->link_is_up) return; for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); if (entry != NULL) break; pause("NTB Wait for link down", hz / 10); } if (entry == NULL) return; entry->cb_data = NULL; entry->buf = NULL; entry->len = 0; entry->flags = NTBT_LINK_DOWN_FLAG; mtx_lock(&qp->tx_lock); rc = ntb_process_tx(qp, entry); mtx_unlock(&qp->tx_lock); if (rc != 0) printf("ntb: Failed to send link down\n"); ntb_qp_link_down_reset(qp); } /* List Management */ static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry, struct ntb_queue_list *list) { mtx_lock_spin(lock); STAILQ_INSERT_TAIL(list, entry, entry); mtx_unlock_spin(lock); } static struct ntb_queue_entry * ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list) { struct ntb_queue_entry *entry; mtx_lock_spin(lock); if (STAILQ_EMPTY(list)) { entry = NULL; goto out; } entry = STAILQ_FIRST(list); STAILQ_REMOVE_HEAD(list, entry); out: mtx_unlock_spin(lock); return (entry); } static struct ntb_queue_entry * ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from, struct ntb_queue_list *to) { struct ntb_queue_entry *entry; mtx_lock_spin(lock); if (STAILQ_EMPTY(from)) { entry = NULL; goto out; } entry = STAILQ_FIRST(from); STAILQ_REMOVE_HEAD(from, entry); STAILQ_INSERT_TAIL(to, entry, entry); out: mtx_unlock_spin(lock); return (entry); } /** * ntb_transport_qp_num - Query the qp number * @qp: NTB transport layer queue to be queried * * Query qp number of the NTB transport queue * * RETURNS: a zero based number specifying the qp number */ unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp) { return (qp->qp_num); } /** * ntb_transport_max_size - Query the max payload size of a qp * @qp: NTB transport layer queue to be queried * * Query the maximum payload size permissible on the given qp * * RETURNS: the max payload size of a qp */ unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) { return (qp->tx_max_frame - sizeof(struct ntb_payload_header)); } unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp) { unsigned int head = qp->tx_index; unsigned int tail = qp->remote_rx_info->entry; return (tail >= head ? tail - head : qp->tx_max_entry + tail - head); } static device_method_t ntb_transport_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ntb_transport_probe), DEVMETHOD(device_attach, ntb_transport_attach), DEVMETHOD(device_detach, ntb_transport_detach), + /* Bus interface */ + DEVMETHOD(bus_child_location_str, ntb_transport_child_location_str), + DEVMETHOD(bus_print_child, ntb_transport_print_child), DEVMETHOD_END }; devclass_t ntb_transport_devclass; static DEFINE_CLASS_0(ntb_transport, ntb_transport_driver, ntb_transport_methods, sizeof(struct ntb_transport_ctx)); DRIVER_MODULE(ntb_transport, ntb_hw, ntb_transport_driver, ntb_transport_devclass, NULL, NULL); MODULE_DEPEND(ntb_transport, ntb, 1, 1, 1); MODULE_VERSION(ntb_transport, 1); Index: stable/11 =================================================================== --- stable/11 (revision 323454) +++ stable/11 (revision 323455) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r323126