Index: ./sys/dev/ntb/test/ntb_perf.c =================================================================== --- ./sys/dev/ntb/test/ntb_perf.c +++ ./sys/dev/ntb/test/ntb_perf.c @@ -1,128 +1,153 @@ -/* +/*- * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. + * redistributing this file, you may do so under either license. * - * GPL LICENSE SUMMARY + * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * Copyright(c) 2017 T-Platforms. All Rights Reserved. + * Copyright(c) 2019 Shreyank Amartya + * Copyright(c) 2019 Advanced Mirco Devices, Inc. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. * - * BSD LICENSE + * BSD LICENSE * - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * Copyright(c) 2017 T-Platforms. All Rights Reserved. + * Copyright(c) 2015 AMD Corporation. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of AMD Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * PCIe NTB Perf Linux driver + * PCIe NTB Perf FreeBSD driver */ /* * How to use this tool, by example. * - * Assuming $DBG_DIR is something like: - * '/sys/kernel/debug/ntb_perf/0000:00:03.0' * Suppose aside from local device there is at least one remote device * connected to NTB with index 0. *----------------------------------------------------------------------------- * Eg: install driver with specified chunk/total orders and dma-enabled flag * - * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma + * root@freebsd:~ #kldload ntb_perf *----------------------------------------------------------------------------- - * Eg: check NTB ports (index) and MW mapping information + * Eg: Set test parameter total order using sysctl total_order + * + * root@freebsd:~ #sysctl dev.ntb_perf.0.total_order=20 + *----------------------------------------------------------------------------- + * Eg: Set test parameter chunk order using sysctl data_order * - * root@self# cat $DBG_DIR/info + * root@freebsd:~ #sysctl dev.ntb_perf.0.data_order=20 *----------------------------------------------------------------------------- * Eg: start performance test with peer (index 0) and get the test metrics * - * root@self# echo 0 > $DBG_DIR/run - * root@self# cat $DBG_DIR/run + * root@freebsd:~ #sysctl dev.ntb_perf.0.run=0 + * root@freebsd:~ #sysctl dev.ntb_perf.0.read_stats + *----------------------------------------------------------------------------- + * Eg: check NTB ports (index) and MW mapping information + * + * root@freebsd:~ #sysctl dev.ntb_perf.0.info + *----------------------------------------------------------------------------- */ -#include -#include -#include -#include -#include -#include -#include -#include +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#include #include -#include -#include -#include -#include -#include -#include -#include -#define DRIVER_NAME "ntb_perf" -#define DRIVER_VERSION "2.0" +#include "../ntb.h" -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRIVER_VERSION); -MODULE_AUTHOR("Dave Jiang "); -MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); +#define DRIVER_NAME "ntb_perf" +#define DRIVER_VERSION "1.0" #define MAX_THREADS_CNT 32 #define DEF_THREADS_CNT 1 -#define MAX_CHUNK_SIZE SZ_1M +#define MAX_CHUNK_SIZE 0x100000 #define MAX_CHUNK_ORDER 20 /* no larger than 1M */ -#define DMA_TRIES 100 -#define DMA_MDELAY 10 - #define MSG_TRIES 500 -#define MSG_UDELAY_LOW 1000 -#define MSG_UDELAY_HIGH 2000 -#define PERF_BUF_LEN 1024 +#define lower_32_bits(n) ((uint32_t)(n)) +#define upper_32_bits(n) ((uint32_t)((n) >> 32)) + +MALLOC_DEFINE(M_PERF, "ntb_perf","ntb perf data"); + +SYSCTL_NODE(_debug, OID_AUTO, ntb_perf, CTLFLAG_RWTUN, NULL, "NTB Perf Debugging"); -static unsigned long max_mw_size; -module_param(max_mw_size, ulong, 0644); -MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size"); +static unsigned g_ntb_perf_debug_level; +SYSCTL_UINT(_debug_ntb_perf, OID_AUTO, debug_level, CTLFLAG_RWTUN, + &g_ntb_perf_debug_level, 0, "NTB Perf log level -- higher is verbose"); -static unsigned char chunk_order = 19; /* 512K */ -module_param(chunk_order, byte, 0644); -MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer"); +#define ntb_perf_printf(lvl, ...) do { \ + if (lvl <= g_ntb_perf_debug_level) \ + device_printf(perf->dev, __VA_ARGS__); \ +} while (0) -static unsigned char total_order = 30; /* 1G */ -module_param(total_order, byte, 0644); -MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer"); +/* + *============================================================================== + * Static data declarations + *============================================================================== + */ -static bool use_dma; /* default to 0 */ -module_param(use_dma, bool, 0644); -MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance"); +static unsigned int max_mw_size = 0x100000; +static unsigned int chunk_order = 19; +static unsigned int total_order = 30; +static bool use_dma = false; +static struct taskqueue *work_queue; -/*============================================================================== +/* + *============================================================================== * Perf driver data definition *============================================================================== */ @@ -146,292 +171,196 @@ int gidx; /* Outbound MW params */ - u64 outbuf_xlat; - resource_size_t outbuf_size; - void __iomem *outbuf; + bus_addr_t addr_limit; + caddr_t outbuf; + size_t outbuf_size; + size_t xlat_align_size; + size_t xlat_align; + uint64_t outbuf_xlat; /* Inbound MW params */ - dma_addr_t inbuf_xlat; - resource_size_t inbuf_size; - void *inbuf; + bus_dmamap_t dma_map; + bus_dma_tag_t dma_tag; + caddr_t *inbuf; + size_t inbuf_size; + bus_addr_t inbuf_xlat; /* NTB connection setup service */ - struct work_struct service; - unsigned long sts; + unsigned long sts; + struct task service_task; }; -#define to_peer_service(__work) \ - container_of(__work, struct perf_peer, service) struct perf_thread { struct perf_ctx *perf; - int tidx; - - /* DMA-based test sync parameters */ - atomic_t dma_sync; - wait_queue_head_t dma_wait; - struct dma_chan *dma_chan; + int tidx; /* Data source and measured statistics */ - void *src; - u64 copied; - ktime_t duration; - int status; - struct work_struct work; + ktime_t duration; + void *src; + int status; + uint64_t copied; + struct task work_task; }; -#define to_thread_work(__work) \ - container_of(__work, struct perf_thread, work) struct perf_ctx { - struct ntb_dev *ntb; + device_t dev; /* Global device index and peers descriptors */ - int gidx; - int pcnt; + int gidx; + int pcnt; struct perf_peer *peers; /* Performance measuring work-threads interface */ - unsigned long busy_flag; - wait_queue_head_t twait; - atomic_t tsync; - u8 tcnt; - struct perf_peer *test_peer; - struct perf_thread threads[MAX_THREADS_CNT]; + size_t tcnt; + unsigned int tsync; + unsigned long busy_flag; + struct callout clout; + struct perf_peer *test_peer; + struct perf_thread threads[MAX_THREADS_CNT]; /* Scratchpad/Message IO operations */ - int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data); + int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, uint64_t data); int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd, - u64 *data); + uint64_t *data); - struct dentry *dbgfs_dir; + struct mtx lock; }; -/* - * Scratchpads-base commands interface - */ -#define PERF_SPAD_CNT(_pcnt) \ - (3*((_pcnt) + 1)) -#define PERF_SPAD_CMD(_gidx) \ - (3*(_gidx)) -#define PERF_SPAD_LDATA(_gidx) \ - (3*(_gidx) + 1) -#define PERF_SPAD_HDATA(_gidx) \ - (3*(_gidx) + 2) -#define PERF_SPAD_NOTIFY(_gidx) \ - (BIT_ULL(_gidx)) +struct ntb_load_cb_args { + bus_addr_t addr; + int error; +}; /* - * Messages-base commands interface - */ -#define PERF_MSG_CNT 3 -#define PERF_MSG_CMD 0 -#define PERF_MSG_LDATA 1 -#define PERF_MSG_HDATA 2 - -/*============================================================================== - * Static data declarations - *============================================================================== + * Scratchpads-base commands interface */ +#define PERF_SPAD_CNT(_pcnt) (3*((_pcnt) + 1)) +#define PERF_SPAD_CMD(_gidx) (3*(_gidx)) +#define PERF_SPAD_LDATA(_gidx) (3*(_gidx) + 1) +#define PERF_SPAD_HDATA(_gidx) (3*(_gidx) + 2) +#define PERF_SPAD_NOTIFY(_gidx) (BIT_ULL(_gidx)) -static struct dentry *perf_dbgfs_topdir; -static struct workqueue_struct *perf_wq __read_mostly; +#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) -/*============================================================================== +/* + *============================================================================== * NTB cross-link commands execution service *============================================================================== */ static void perf_terminate_test(struct perf_ctx *perf); -static inline bool perf_link_is_up(struct perf_peer *peer) +static inline bool +perf_link_is_up(struct perf_peer *peer) { - u64 link; + uint64_t link; - link = ntb_link_is_up(peer->perf->ntb, NULL, NULL); + link = ntb_link_is_up(peer->perf->dev, NULL, NULL); return !!(link & BIT_ULL_MASK(peer->pidx)); } -static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, - u64 data) +static int +perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, + uint64_t data) { struct perf_ctx *perf = peer->perf; - int try; - u32 sts; - - dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); + int try, ret; + uint32_t val = 0; - /* - * Perform predefined number of attempts before give up. - * We are sending the data to the port specific scratchpad, so - * to prevent a multi-port access race-condition. Additionally - * there is no need in local locking since only thread-safe - * service work is using this method. - */ for (try = 0; try < MSG_TRIES; try++) { if (!perf_link_is_up(peer)) - return -ENOLINK; + return (ENOLINK); + + ret = ntb_peer_spad_read(perf->dev, PERF_SPAD_CMD(perf->gidx), + &val); - sts = ntb_peer_spad_read(perf->ntb, peer->pidx, - PERF_SPAD_CMD(perf->gidx)); - if (sts != PERF_CMD_INVAL) { - usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); + if (val != PERF_CMD_INVAL) { + DELAY(2000); continue; } - ntb_peer_spad_write(perf->ntb, peer->pidx, - PERF_SPAD_LDATA(perf->gidx), - lower_32_bits(data)); - ntb_peer_spad_write(perf->ntb, peer->pidx, - PERF_SPAD_HDATA(perf->gidx), - upper_32_bits(data)); - mmiowb(); - ntb_peer_spad_write(perf->ntb, peer->pidx, - PERF_SPAD_CMD(perf->gidx), - cmd); - mmiowb(); - ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); + ntb_peer_spad_write(perf->dev, + PERF_SPAD_LDATA(perf->gidx), lower_32_bits(data)); - dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n", - PERF_SPAD_NOTIFY(peer->gidx)); + ntb_peer_spad_write(perf->dev, + PERF_SPAD_HDATA(perf->gidx), upper_32_bits(data)); + barrier(); + + ntb_peer_spad_write(perf->dev, PERF_SPAD_CMD(perf->gidx), cmd); + + barrier(); + + ntb_peer_db_set(perf->dev, PERF_SPAD_NOTIFY(peer->gidx)); + + ntb_perf_printf(1, "%s: DB ring peer %#llx\n", __func__, + PERF_SPAD_NOTIFY(peer->gidx)); break; } - return try < MSG_TRIES ? 0 : -EAGAIN; + return (try < MSG_TRIES ? 0 : EAGAIN); } -static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, - enum perf_cmd *cmd, u64 *data) +static int +perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, + enum perf_cmd *cmd, uint64_t *data) { struct perf_peer *peer; - u32 val; + uint32_t val, ret; - ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); + ntb_db_clear(perf->dev, PERF_SPAD_NOTIFY(perf->gidx)); - /* - * We start scanning all over, since cleared DB may have been set - * by any peer. Yes, it makes peer with smaller index being - * serviced with greater priority, but it's convenient for spad - * and message code unification and simplicity. - */ for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) { peer = &perf->peers[*pidx]; if (!perf_link_is_up(peer)) continue; - val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx)); + ret = ntb_spad_read(perf->dev, PERF_SPAD_CMD(peer->gidx), &val); if (val == PERF_CMD_INVAL) continue; *cmd = val; - val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx)); + ret = ntb_spad_read(perf->dev, PERF_SPAD_LDATA(peer->gidx), + &val); *data = val; - val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx)); - *data |= (u64)val << 32; + ret = ntb_spad_read(perf->dev, PERF_SPAD_HDATA(peer->gidx), + &val); + *data |= (uint64_t)val << 32; /* Next command can be retrieved from now */ - ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), - PERF_CMD_INVAL); - - dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); - - return 0; - } - - return -ENODATA; -} - -static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, - u64 data) -{ - struct perf_ctx *perf = peer->perf; - int try, ret; - u64 outbits; - - dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); - - /* - * Perform predefined number of attempts before give up. Message - * registers are free of race-condition problem when accessed - * from different ports, so we don't need splitting registers - * by global device index. We also won't have local locking, - * since the method is used from service work only. - */ - outbits = ntb_msg_outbits(perf->ntb); - for (try = 0; try < MSG_TRIES; try++) { - if (!perf_link_is_up(peer)) - return -ENOLINK; + ntb_spad_write(perf->dev, PERF_SPAD_CMD(peer->gidx), + PERF_CMD_INVAL); - ret = ntb_msg_clear_sts(perf->ntb, outbits); - if (ret) - return ret; - - ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA, - lower_32_bits(data)); - - if (ntb_msg_read_sts(perf->ntb) & outbits) { - usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); - continue; - } - - ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA, - upper_32_bits(data)); - mmiowb(); + ntb_perf_printf(1, "%s: CMD recv: %d 0x%lx\n", + __func__, *cmd, *data); - /* This call shall trigger peer message event */ - ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd); - - break; + return (0); } - return try < MSG_TRIES ? 0 : -EAGAIN; + return (EINVAL); } -static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, - enum perf_cmd *cmd, u64 *data) -{ - u64 inbits; - u32 val; - - inbits = ntb_msg_inbits(perf->ntb); - - if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3) - return -ENODATA; - - val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD); - *cmd = val; - - val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA); - *data = val; - - val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA); - *data |= (u64)val << 32; - - /* Next command can be retrieved from now */ - ntb_msg_clear_sts(perf->ntb, inbits); - - dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); - - return 0; -} - -static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data) +static int +perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, uint64_t data) { struct perf_ctx *perf = peer->perf; if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT) return perf->cmd_send(peer, cmd, data); - dev_err(&perf->ntb->dev, "Send invalid command\n"); - return -EINVAL; + return (EINVAL); } -static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) +static int +perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) { - switch (cmd) { + struct perf_ctx *perf = peer->perf; + + switch(cmd) { case PERF_CMD_SSIZE: case PERF_CMD_RSIZE: case PERF_CMD_SXLAT: @@ -439,30 +368,30 @@ case PERF_CMD_CLEAR: break; default: - dev_err(&peer->perf->ntb->dev, "Exec invalid command\n"); - return -EINVAL; + ntb_perf_printf(1, "%s: Exec invalid command\n", __func__); + return (EINVAL); } - /* No need of memory barrier, since bit ops have invernal lock */ set_bit(cmd, &peer->sts); + ntb_perf_printf(1, "%s: CMD exec: %d\n", __func__, cmd); + taskqueue_enqueue(taskqueue_swi, &peer->service_task); - dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd); - - (void)queue_work(system_highpri_wq, &peer->service); - - return 0; + return (0); } -static int perf_cmd_recv(struct perf_ctx *perf) +static int +perf_cmd_recv(struct perf_ctx *perf) { struct perf_peer *peer; int ret, pidx, cmd; - u64 data; + uint64_t data; + + while (!ntb_link_is_up(perf->dev, NULL, NULL)); while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) { peer = &perf->peers[pidx]; - switch (cmd) { + switch(cmd) { case PERF_CMD_SSIZE: peer->inbuf_size = data; return perf_cmd_exec(peer, PERF_CMD_RSIZE); @@ -470,16 +399,17 @@ peer->outbuf_xlat = data; return perf_cmd_exec(peer, PERF_CMD_RXLAT); default: - dev_err(&perf->ntb->dev, "Recv invalid command\n"); - return -EINVAL; + ntb_perf_printf(1, "%s: Received invalid command\n", + __func__); + return (EINVAL); } } - /* Return 0 if no data left to process, otherwise an error */ - return ret == -ENODATA ? 0 : ret; + return (0); } -static void perf_link_event(void *ctx) +static void +perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; struct perf_peer *peer; @@ -491,8 +421,8 @@ lnk_up = perf_link_is_up(peer); - if (lnk_up && - !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) { + ntb_perf_printf(1, "%s: Link status:%x\n", __func__, lnk_up); + if (lnk_up && !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) { perf_cmd_exec(peer, PERF_CMD_SSIZE); } else if (!lnk_up && test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) { @@ -501,405 +431,265 @@ } } -static void perf_db_event(void *ctx, int vec) +static void +perf_db_event(void *ctx, uint32_t vec) { struct perf_ctx *perf = ctx; - dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec, - ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb)); + ntb_perf_printf(1, "%s: DB vec %d mask %#lx bits %#lx\n", __func__, vec, + ntb_db_vector_mask(perf->dev, vec), ntb_db_read(perf->dev)); /* Just receive all available commands */ - (void)perf_cmd_recv(perf); -} - -static void perf_msg_event(void *ctx) -{ - struct perf_ctx *perf = ctx; - - dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n", - ntb_msg_read_sts(perf->ntb)); - - /* Messages are only sent one-by-one */ - (void)perf_cmd_recv(perf); + perf_cmd_recv(perf); } static const struct ntb_ctx_ops perf_ops = { .link_event = perf_link_event, .db_event = perf_db_event, - .msg_event = perf_msg_event }; -static void perf_free_outbuf(struct perf_peer *peer) +static int +perf_setup_outbuf(struct perf_peer *peer) { - (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); -} - -static int perf_setup_outbuf(struct perf_peer *peer) -{ - struct perf_ctx *perf = peer->perf; - int ret; - - /* Outbuf size can be unaligned due to custom max_mw_size */ - ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, - peer->outbuf_xlat, peer->outbuf_size); - if (ret) { - dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n"); - return ret; - } - /* Initialization is finally done */ set_bit(PERF_STS_DONE, &peer->sts); - return 0; + return (0); } -static void perf_free_inbuf(struct perf_peer *peer) +static void +perf_free_inbuf(struct perf_peer *peer) { - if (!peer->inbuf) + if (peer->inbuf == NULL) return; - (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); - dma_free_coherent(&peer->perf->ntb->dev, peer->inbuf_size, - peer->inbuf, peer->inbuf_xlat); + ntb_mw_clear_trans(peer->perf->dev, peer->gidx); + if (peer->dma_tag) { + bus_dmamap_unload(peer->dma_tag, peer->dma_map); + bus_dmamem_free(peer->dma_tag, peer->inbuf, peer->dma_map); + bus_dma_tag_destroy(peer->dma_tag); + } + peer->inbuf_size = 0; peer->inbuf = NULL; } -static int perf_setup_inbuf(struct perf_peer *peer) +static void +ntb_load_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct ntb_load_cb_args *cba = (struct ntb_load_cb_args *)xsc; + + if (!(cba->error = error)) + cba->addr = segs[0].ds_addr; +} + +static int +perf_setup_inbuf(struct perf_peer *peer) { - resource_size_t xlat_align, size_align, size_max; struct perf_ctx *perf = peer->perf; + struct ntb_load_cb_args cba; int ret; - /* Get inbound MW parameters */ - ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx, - &xlat_align, &size_align, &size_max); - if (ret) { - dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n"); - return ret; - } - - if (peer->inbuf_size > size_max) { - dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n", - &peer->inbuf_size, &size_max); - return -EINVAL; + if (peer->inbuf_size > peer->outbuf_size) { + ntb_perf_printf(1, "%s: Too big inbuf size %pa > %pa", __func__, + &peer->inbuf_size, &peer->outbuf_size); + return (EINVAL); } - peer->inbuf_size = round_up(peer->inbuf_size, size_align); - perf_free_inbuf(peer); - peer->inbuf = dma_alloc_coherent(&perf->ntb->dev, peer->inbuf_size, - &peer->inbuf_xlat, GFP_KERNEL); - if (!peer->inbuf) { - dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", - &peer->inbuf_size); - return -ENOMEM; - } - if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { - dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); - goto err_free_inbuf; + if (bus_dma_tag_create(bus_get_dma_tag(perf->dev),peer->xlat_align, 0, + peer->addr_limit, BUS_SPACE_MAXADDR, + NULL, NULL, peer->inbuf_size, 1, peer->inbuf_size, + 0, NULL, NULL, &peer->dma_tag)) { + ntb_perf_printf(1, + "%s: Unable to create MW tag of size %zu/%zu\n", + __func__, peer->inbuf_size, peer->outbuf_size); + peer->outbuf_size = 0; + peer->inbuf_size = 0; + return (ENOMEM); + } + if (bus_dmamem_alloc(peer->dma_tag, (void **)&peer->inbuf, + BUS_DMA_WAITOK | BUS_DMA_ZERO, &peer->dma_map)) { + bus_dma_tag_destroy(peer->dma_tag); + ntb_perf_printf(1, + "%s: Unable to allocate MW buffer of size %zu/%zu\n", + __func__, peer->inbuf_size, peer->outbuf_size); + peer->outbuf_size = 0; + peer->inbuf_size = 0; + return (ENOMEM); + } + if (bus_dmamap_load(peer->dma_tag, peer->dma_map, peer->inbuf, + peer->inbuf_size, ntb_load_cb, &cba, BUS_DMA_NOWAIT) || cba.error) { + bus_dma_tag_destroy(peer->dma_tag); + ntb_perf_printf(1, "%s: Unable to load MW buffer of size %zu/%zu\n", + __func__, peer->inbuf_size, peer->outbuf_size); + peer->outbuf_size = 0; + peer->inbuf_size = 0; } - ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, - peer->inbuf_xlat, peer->inbuf_size); + peer->inbuf_xlat = cba.addr; + ret = ntb_mw_set_trans(perf->dev, peer->gidx, peer->inbuf_xlat, peer->inbuf_size); if (ret) { - dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n"); + ntb_perf_printf(1, "%s: Failed to set inbuf translation\n", + __func__); goto err_free_inbuf; } - /* - * We submit inbuf xlat transmission cmd for execution here to follow - * the code architecture, even though this method is called from service - * work itself so the command will be executed right after it returns. - */ - (void)perf_cmd_exec(peer, PERF_CMD_SXLAT); + perf_cmd_exec(peer, PERF_CMD_SXLAT); - return 0; + return (0); err_free_inbuf: perf_free_inbuf(peer); - - return ret; + return (ret); } -static void perf_service_work(struct work_struct *work) +static int +perf_init_service(struct perf_ctx *perf) { - struct perf_peer *peer = to_peer_service(work); + uint64_t mask; - if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts)) - perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size); - - if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts)) - perf_setup_inbuf(peer); + /* Check MW count */ + mask = GENMASK_ULL(perf->pcnt, 0); + if (ntb_spad_count(perf->dev) >= PERF_SPAD_CNT(perf->pcnt) && + (ntb_db_valid_mask(perf->dev) & mask) == mask) { + perf->cmd_send = perf_spad_cmd_send; + perf->cmd_recv = perf_spad_cmd_recv; - if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts)) - perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat); + ntb_perf_printf(1, "%s: Scratchpad service initialized\n", + __func__); + return (0); + } - if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts)) - perf_setup_outbuf(peer); + ntb_perf_printf(1, "%s: Scratchpad service unsupported\n", __func__); - if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { - clear_bit(PERF_STS_DONE, &peer->sts); - if (test_bit(0, &peer->perf->busy_flag) && - peer == peer->perf->test_peer) { - dev_warn(&peer->perf->ntb->dev, - "Freeing while test on-fly\n"); - perf_terminate_test(peer->perf); - } - perf_free_outbuf(peer); - perf_free_inbuf(peer); - } + return (EINVAL); } -static int perf_init_service(struct perf_ctx *perf) +static int +perf_enable_service(struct perf_ctx *perf) { - u64 mask; - - if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) { - dev_err(&perf->ntb->dev, "Not enough memory windows\n"); - return -EINVAL; - } - - if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) { - perf->cmd_send = perf_msg_cmd_send; - perf->cmd_recv = perf_msg_cmd_recv; - - dev_dbg(&perf->ntb->dev, "Message service initialized\n"); - - return 0; - } + uint64_t mask, incmd_bit; + int ret, sidx, scnt; - dev_dbg(&perf->ntb->dev, "Message service unsupported\n"); + mask = ntb_db_valid_mask(perf->dev); + ntb_db_set_mask(perf->dev, mask); - mask = GENMASK_ULL(perf->pcnt, 0); - if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) && - (ntb_db_valid_mask(perf->ntb) & mask) == mask) { - perf->cmd_send = perf_spad_cmd_send; - perf->cmd_recv = perf_spad_cmd_recv; + ret = ntb_set_ctx(perf->dev, perf, &perf_ops); + if(ret) + return (ret); - dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n"); + if (perf->cmd_send == perf_spad_cmd_send) { + scnt = ntb_spad_count(perf->dev); + for (sidx = 0; sidx < scnt; sidx++) + ntb_spad_write(perf->dev, sidx, PERF_CMD_INVAL); + incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); + ntb_db_clear_mask(perf->dev, incmd_bit); - return 0; + ntb_perf_printf(1, "%s: DB bits unmasked %#lx\n", + __func__, incmd_bit); } - dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n"); - - dev_err(&perf->ntb->dev, "Command services unsupported\n"); + ntb_link_enable(perf->dev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_perf_printf(1, "%s: Service Enabled\n", __func__); - return -EINVAL; + return (0); } -static int perf_enable_service(struct perf_ctx *perf) +static void +perf_service_work(void *arg, int npending) { - u64 mask, incmd_bit; - int ret, sidx, scnt; - - mask = ntb_db_valid_mask(perf->ntb); - (void)ntb_db_set_mask(perf->ntb, mask); - - ret = ntb_set_ctx(perf->ntb, perf, &perf_ops); - if (ret) - return ret; + struct perf_peer *peer = arg; + struct perf_ctx *perf = peer->perf; - if (perf->cmd_send == perf_msg_cmd_send) { - u64 inbits, outbits; + if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts)) + perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size); - inbits = ntb_msg_inbits(perf->ntb); - outbits = ntb_msg_outbits(perf->ntb); - (void)ntb_msg_set_mask(perf->ntb, inbits | outbits); + if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts)) + perf_setup_inbuf(peer); - incmd_bit = BIT_ULL(__ffs64(inbits)); - ret = ntb_msg_clear_mask(perf->ntb, incmd_bit); + if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts)) + perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat); - dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit); - } else { - scnt = ntb_spad_count(perf->ntb); - for (sidx = 0; sidx < scnt; sidx++) - ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL); - incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); - ret = ntb_db_clear_mask(perf->ntb, incmd_bit); + if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts)) + perf_setup_outbuf(peer); - dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit); - } - if (ret) { - ntb_clear_ctx(perf->ntb); - return ret; + if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { + clear_bit(PERF_STS_DONE, &peer->sts); + if (test_bit(0, &peer->perf->busy_flag) && + peer == peer->perf->test_peer) { + ntb_perf_printf(1, "%s: Freeing while test on-fly\n", + __func__); + perf_terminate_test(peer->perf); + } + perf_free_inbuf(peer); } - - ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); - /* Might be not necessary */ - ntb_link_event(perf->ntb); - - return 0; } -static void perf_disable_service(struct perf_ctx *perf) +static void +perf_disable_service(struct perf_ctx *perf) { int pidx; - ntb_link_disable(perf->ntb); + ntb_link_disable(perf->dev); - if (perf->cmd_send == perf_msg_cmd_send) { - u64 inbits; + ntb_db_set_mask(perf->dev, PERF_SPAD_NOTIFY(perf->gidx)); - inbits = ntb_msg_inbits(perf->ntb); - (void)ntb_msg_set_mask(perf->ntb, inbits); - } else { - (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); - } - - ntb_clear_ctx(perf->ntb); + ntb_clear_ctx(perf->dev); for (pidx = 0; pidx < perf->pcnt; pidx++) perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR); for (pidx = 0; pidx < perf->pcnt; pidx++) - flush_work(&perf->peers[pidx].service); + taskqueue_drain(taskqueue_swi, &perf->peers[pidx].service_task); } -/*============================================================================== +/* + *============================================================================== * Performance measuring work-thread *============================================================================== */ - -static void perf_dma_copy_callback(void *data) -{ - struct perf_thread *pthr = data; - - atomic_dec(&pthr->dma_sync); - wake_up(&pthr->dma_wait); -} - -static int perf_copy_chunk(struct perf_thread *pthr, - void __iomem *dst, void *src, size_t len) +static int +perf_copy_chunk(struct perf_thread *pthr, + void *dst, void *src, size_t len) { - struct dma_async_tx_descriptor *tx; - struct dmaengine_unmap_data *unmap; - struct device *dma_dev; - int try = 0, ret = 0; - if (!use_dma) { - memcpy_toio(dst, src, len); + memcpy(dst, src, len); goto ret_check_tsync; } - dma_dev = pthr->dma_chan->device->dev; - - if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src), - offset_in_page(dst), len)) - return -EIO; - - unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT); - if (!unmap) - return -ENOMEM; - - unmap->len = len; - unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src), - offset_in_page(src), len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, unmap->addr[0])) { - ret = -EIO; - goto err_free_resource; - } - unmap->to_cnt = 1; - - unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst), - offset_in_page(dst), len, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_dev, unmap->addr[1])) { - ret = -EIO; - goto err_free_resource; - } - unmap->from_cnt = 1; - - do { - tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1], - unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - if (!tx) - msleep(DMA_MDELAY); - } while (!tx && (try++ < DMA_TRIES)); - - if (!tx) { - ret = -EIO; - goto err_free_resource; - } - - tx->callback = perf_dma_copy_callback; - tx->callback_param = pthr; - dma_set_unmap(tx, unmap); - - ret = dma_submit_error(dmaengine_submit(tx)); - if (ret) { - dmaengine_unmap_put(unmap); - goto err_free_resource; - } - - dmaengine_unmap_put(unmap); - - atomic_inc(&pthr->dma_sync); - dma_async_issue_pending(pthr->dma_chan); - ret_check_tsync: - return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR; - -err_free_resource: - dmaengine_unmap_put(unmap); - - return ret; -} - -static bool perf_dma_filter(struct dma_chan *chan, void *data) -{ - struct perf_ctx *perf = data; - int node; - - node = dev_to_node(&perf->ntb->dev); - - return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev); + return likely(atomic_load_int(&pthr->perf->tsync) > 0) ? 0 : EINTR; } -static int perf_init_test(struct perf_thread *pthr) +static int +perf_init_test(struct perf_thread *pthr) { struct perf_ctx *perf = pthr->perf; - dma_cap_mask_t dma_mask; - pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL, - dev_to_node(&perf->ntb->dev)); + pthr->src = malloc(perf->test_peer->outbuf_size, M_PERF, M_WAITOK); if (!pthr->src) - return -ENOMEM; + return (ENOMEM); + arc4rand(pthr->src, perf->test_peer->outbuf_size, 1); + ntb_perf_printf(1, "%s: Test init, alloc %lu random bytes\n", + __func__, perf->test_peer->outbuf_size); - get_random_bytes(pthr->src, perf->test_peer->outbuf_size); - - if (!use_dma) - return 0; - - dma_cap_zero(dma_mask); - dma_cap_set(DMA_MEMCPY, dma_mask); - pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf); - if (!pthr->dma_chan) { - dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n", - pthr->tidx); - atomic_dec(&perf->tsync); - wake_up(&perf->twait); - kfree(pthr->src); - return -ENODEV; - } - - atomic_set(&pthr->dma_sync, 0); - - return 0; + return (0); } -static int perf_run_test(struct perf_thread *pthr) +static int +perf_run_test(struct perf_thread *pthr) { struct perf_peer *peer = pthr->perf->test_peer; struct perf_ctx *perf = pthr->perf; - void __iomem *flt_dst, *bnd_dst; - u64 total_size, chunk_size; + void *flt_dst, *bnd_dst; + uint64_t total_size, chunk_size; void *flt_src; int ret = 0; total_size = 1ULL << total_order; chunk_size = 1ULL << chunk_order; - chunk_size = min_t(u64, peer->outbuf_size, chunk_size); + chunk_size = min(peer->outbuf_size, chunk_size); flt_src = pthr->src; bnd_dst = peer->outbuf + peer->outbuf_size; @@ -911,86 +701,68 @@ while (pthr->copied < total_size) { ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size); if (ret) { - dev_err(&perf->ntb->dev, "%d: Got error %d on test\n", - pthr->tidx, ret); - return ret; + ntb_perf_printf(1, "%s: %d: Got error %d on test\n", + __func__, pthr->tidx, ret); + return (ret); } - pthr->copied += chunk_size; - flt_dst += chunk_size; - flt_src += chunk_size; - if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) { + flt_dst = (char*)flt_dst + chunk_size; + flt_src = (char*)flt_src + chunk_size; + if (flt_dst >= bnd_dst || flt_dst < (void*)peer->outbuf) { flt_dst = peer->outbuf; flt_src = pthr->src; } - - /* Give up CPU to give a chance for other threads to use it */ - schedule(); + sched_relinquish(curthread); } + ntb_perf_printf(1, "%s: Data transfer complete\n", __func__); - return 0; + return (0); } -static int perf_sync_test(struct perf_thread *pthr) +static int +perf_sync_test(struct perf_thread *pthr) { struct perf_ctx *perf = pthr->perf; - if (!use_dma) - goto no_dma_ret; - - wait_event(pthr->dma_wait, - (atomic_read(&pthr->dma_sync) == 0 || - atomic_read(&perf->tsync) < 0)); - - if (atomic_read(&perf->tsync) < 0) - return -EINTR; - -no_dma_ret: pthr->duration = ktime_sub(ktime_get(), pthr->duration); - dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n", - pthr->tidx, pthr->copied); + ntb_perf_printf(1, "%s: %d: copied %lu bytes\n", + __func__, pthr->tidx, pthr->copied); - dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n", - pthr->tidx, ktime_to_us(pthr->duration)); + ntb_perf_printf(1, "%s: %d: lasted %lu usecs\n", + __func__, pthr->tidx, ktime_to_us(pthr->duration)); - dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx, - div64_u64(pthr->copied, ktime_to_us(pthr->duration))); + ntb_perf_printf(1, "%s: %d: %lu MBytes/s\n", __func__, pthr->tidx, + (pthr->copied / ktime_to_us(pthr->duration))); - return 0; + return (0); } -static void perf_clear_test(struct perf_thread *pthr) +static void +perf_clear_test(struct perf_thread *pthr) { - struct perf_ctx *perf = pthr->perf; - - if (!use_dma) - goto no_dma_notify; - - /* - * If test finished without errors, termination isn't needed. - * We call it anyway just to be sure of the transfers completion. - */ - (void)dmaengine_terminate_sync(pthr->dma_chan); - - dma_release_channel(pthr->dma_chan); - -no_dma_notify: - atomic_dec(&perf->tsync); - wake_up(&perf->twait); - kfree(pthr->src); + atomic_subtract_int(&pthr->perf->tsync, 1); + free(pthr->src, M_PERF); + if (atomic_load_int(&pthr->perf->tsync) == 0) { + clear_bit(0, &pthr->perf->busy_flag); + } } -static void perf_thread_work(struct work_struct *work) +static void +perf_thread_work(void *arg, int npending) { - struct perf_thread *pthr = to_thread_work(work); + struct perf_thread *pthr = arg; + struct perf_ctx *perf = pthr->perf; int ret; + ntb_perf_printf(1, "%s: Perf thread work tidx: %d\n", + __func__, pthr->tidx); + /* * Perform stages in compliance with use_dma flag value. * Test status is changed only if error happened, otherwise - * status -ENODATA is kept while test is on-fly. Results + * status -EINVAL is kept while test is on-fly. Results * synchronization is performed only if test fininshed * without an error or interruption. */ @@ -1012,504 +784,538 @@ perf_clear_test(pthr); } -static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt) -{ - if (tcnt == 0 || tcnt > MAX_THREADS_CNT) - return -EINVAL; - - if (test_and_set_bit_lock(0, &perf->busy_flag)) - return -EBUSY; - - perf->tcnt = tcnt; - - clear_bit_unlock(0, &perf->busy_flag); - - return 0; -} - -static void perf_terminate_test(struct perf_ctx *perf) +static int +perf_submit_test(struct perf_peer *peer) { + struct perf_ctx *perf = peer->perf; + struct perf_thread *pthr; int tidx; - atomic_set(&perf->tsync, -1); - wake_up(&perf->twait); - - for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { - wake_up(&perf->threads[tidx].dma_wait); - cancel_work_sync(&perf->threads[tidx].work); - } -} + ntb_perf_printf(1, "%s: Perf test submitted\n", __func__); -static int perf_submit_test(struct perf_peer *peer) -{ - struct perf_ctx *perf = peer->perf; - struct perf_thread *pthr; - int tidx, ret; + callout_init(&perf->clout, 1); + taskqueue_start_threads(&work_queue, perf->tcnt, + PI_DISK, "ntbtqthread"); if (!test_bit(PERF_STS_DONE, &peer->sts)) - return -ENOLINK; + return (ENOLINK); - if (test_and_set_bit_lock(0, &perf->busy_flag)) - return -EBUSY; + if (test_and_set_bit(0, &perf->busy_flag)) + return (EBUSY); perf->test_peer = peer; - atomic_set(&perf->tsync, perf->tcnt); + atomic_set_int(&perf->tsync, perf->tcnt); for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { pthr = &perf->threads[tidx]; - pthr->status = -ENODATA; + pthr->status = -EINVAL; pthr->copied = 0; pthr->duration = ktime_set(0, 0); if (tidx < perf->tcnt) - (void)queue_work(perf_wq, &pthr->work); + taskqueue_enqueue(work_queue, &pthr->work_task); } - ret = wait_event_interruptible(perf->twait, - atomic_read(&perf->tsync) <= 0); - if (ret == -ERESTARTSYS) { - perf_terminate_test(perf); - ret = -EINTR; - } - - clear_bit_unlock(0, &perf->busy_flag); + return (0); +} - return ret; +static void +perf_terminate_test(struct perf_ctx *perf) +{ + atomic_set_int(&perf->tsync, -1); } -static int perf_read_stats(struct perf_ctx *perf, char *buf, - size_t size, ssize_t *pos) +static int +perf_read_stats(struct perf_ctx *perf, struct sysctl_req *req) { struct perf_thread *pthr; + struct sbuf *sb; int tidx; + int rc; + size_t size = 1024; - if (test_and_set_bit_lock(0, &perf->busy_flag)) - return -EBUSY; + if (!perf->test_peer) + return (0); - (*pos) += scnprintf(buf + *pos, size - *pos, - " Peer %d test statistics:\n", perf->test_peer->pidx); + if (test_bit(0, &perf->busy_flag)) + return (EBUSY); + + sb = sbuf_new_for_sysctl(NULL, NULL, size, req); + if (sb == NULL) { + rc = sb->s_error; + return (rc); + } + + sbuf_printf(sb, "\nPeer %d test statistics:\n" , perf->test_peer->pidx); for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { pthr = &perf->threads[tidx]; - if (pthr->status == -ENODATA) + if (pthr->status == -EINVAL) continue; if (pthr->status) { - (*pos) += scnprintf(buf + *pos, size - *pos, - "%d: error status %d\n", tidx, pthr->status); + sbuf_printf(sb, "%d: error status %d\n", + tidx, pthr->status); continue; } - (*pos) += scnprintf(buf + *pos, size - *pos, - "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", - tidx, pthr->copied, ktime_to_us(pthr->duration), - div64_u64(pthr->copied, ktime_to_us(pthr->duration))); + sbuf_printf(sb, "%d: copied %lu bytes in %lu usecs,\ + %lu MBytes/s\n", tidx, pthr->copied, + ktime_to_us(pthr->duration), + pthr->copied / ktime_to_us(pthr->duration)); } + rc = sbuf_finish(sb); + sbuf_delete(sb); - clear_bit_unlock(0, &perf->busy_flag); - - return 0; + return (rc); } -static void perf_init_threads(struct perf_ctx *perf) +static void +perf_init_threads(struct perf_ctx *perf) { struct perf_thread *pthr; int tidx; perf->tcnt = DEF_THREADS_CNT; perf->test_peer = &perf->peers[0]; - init_waitqueue_head(&perf->twait); for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { pthr = &perf->threads[tidx]; - pthr->perf = perf; pthr->tidx = tidx; - pthr->status = -ENODATA; - init_waitqueue_head(&pthr->dma_wait); - INIT_WORK(&pthr->work, perf_thread_work); + pthr->status = -EINVAL; + TASK_INIT(&pthr->work_task, 0, perf_thread_work, pthr); } } -static void perf_clear_threads(struct perf_ctx *perf) +static void +perf_clear_threads(struct perf_ctx *perf) { perf_terminate_test(perf); } -/*============================================================================== - * DebugFS nodes +/* + *============================================================================== + * Sysctl functions *============================================================================== */ -static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) +static int +ntb_sysctl_info_handler(SYSCTL_HANDLER_ARGS) { - struct perf_ctx *perf = filep->private_data; + struct perf_ctx* perf = (struct perf_ctx*)arg1; struct perf_peer *peer; - size_t buf_size; - ssize_t pos = 0; - int ret, pidx; - char *buf; - - buf_size = min_t(size_t, size, 0x1000U); - - buf = kmalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - pos += scnprintf(buf + pos, buf_size - pos, - " Performance measuring tool info:\n\n"); - - pos += scnprintf(buf + pos, buf_size - pos, - "Local port %d, Global index %d\n", ntb_port_number(perf->ntb), - perf->gidx); - pos += scnprintf(buf + pos, buf_size - pos, "Test status: "); + struct sbuf *sb; + int rc, pidx; + uint32_t size = 4096; + + mtx_lock(&perf->lock); + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + goto error; + + sb = sbuf_new_for_sysctl(NULL, NULL, size, req); + if (sb == NULL) { + rc = sb->s_error; + goto error; + } + + sbuf_printf(sb, "NTB Performance measuring tool info:\n\n"); + sbuf_printf(sb, "Local Port %d, Global Index %d\n", + ntb_port_number(perf->dev),perf->gidx); + sbuf_printf(sb, "Test status: "); if (test_bit(0, &perf->busy_flag)) { - pos += scnprintf(buf + pos, buf_size - pos, - "on-fly with port %d (%d)\n", - ntb_peer_port_number(perf->ntb, perf->test_peer->pidx), - perf->test_peer->pidx); + sbuf_printf(sb, "on-fly with port %d (%d)\n", + ntb_peer_port_number(perf->dev, + perf->test_peer->pidx), perf->test_peer->pidx); } else { - pos += scnprintf(buf + pos, buf_size - pos, "idle\n"); + sbuf_printf(sb, "idle\n"); } for (pidx = 0; pidx < perf->pcnt; pidx++) { peer = &perf->peers[pidx]; - pos += scnprintf(buf + pos, buf_size - pos, - "Port %d (%d), Global index %d:\n", - ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx, - peer->gidx); - - pos += scnprintf(buf + pos, buf_size - pos, - "\tLink status: %s\n", - test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down"); - - pos += scnprintf(buf + pos, buf_size - pos, - "\tOut buffer addr 0x%pK\n", peer->outbuf); - - pos += scnprintf(buf + pos, buf_size - pos, - "\tOut buffer size %pa\n", &peer->outbuf_size); - - pos += scnprintf(buf + pos, buf_size - pos, - "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat); - - if (!peer->inbuf) { - pos += scnprintf(buf + pos, buf_size - pos, - "\tIn buffer addr: unallocated\n"); - continue; - } - - pos += scnprintf(buf + pos, buf_size - pos, - "\tIn buffer addr 0x%pK\n", peer->inbuf); + sbuf_printf(sb, "Port %d (%d), Global index %d:\n", + ntb_peer_port_number(perf->dev, peer->pidx), + peer->pidx, peer->gidx); - pos += scnprintf(buf + pos, buf_size - pos, - "\tIn buffer size %pa\n", &peer->inbuf_size); - - pos += scnprintf(buf + pos, buf_size - pos, - "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat); - } + sbuf_printf(sb, "\tLink Status: %s\n", + test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down"); - ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); - kfree(buf); + sbuf_printf(sb, "\tOut buffer addr %pK\n", + peer->outbuf); - return ret; -} + sbuf_printf(sb, "\tOut buffer size 0x%lx\n", + peer->outbuf_size); -static const struct file_operations perf_dbgfs_info = { - .open = simple_open, - .read = perf_dbgfs_read_info -}; + sbuf_printf(sb, "\tOut buffer xlat 0x%pad[p]\n", + &peer->outbuf_xlat); -static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) -{ - struct perf_ctx *perf = filep->private_data; - ssize_t ret, pos = 0; - char *buf; - - buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL); - if (!buf) - return -ENOMEM; + if (peer->inbuf == NULL) { + sbuf_printf(sb, "\tIn buffer addr: unallocated\n"); + continue; + } - ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos); - if (ret) - goto err_free; + sbuf_printf(sb, "\tIn buffer addr %pK\n", + peer->inbuf); - ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); -err_free: - kfree(buf); + sbuf_printf(sb, "\tIn buffer size 0x%lx\n", + peer->inbuf_size); - return ret; + sbuf_printf(sb, "\tIn buffer xlat %pad[p]\n", + &peer->inbuf_xlat); + } + rc = sbuf_finish(sb); + sbuf_delete(sb); +error: + mtx_unlock(&perf->lock); + return (rc); } -static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) +static int +ntb_sysctl_run_handler(SYSCTL_HANDLER_ARGS) { - struct perf_ctx *perf = filep->private_data; - struct perf_peer *peer; - int pidx, ret; + struct perf_ctx* perf = (struct perf_ctx*)arg1; + struct perf_peer *peer = perf->test_peer; + int ret; + int pidx = -1; - ret = kstrtoint_from_user(ubuf, size, 0, &pidx); - if (ret) - return ret; + mtx_lock(&perf->lock); + if (peer) + pidx = peer->pidx; + else + pidx = -1; + + ret = sysctl_handle_int(oidp, &pidx, 0, req); + if (ret != 0 || req->newptr == NULL) + goto error; + + if (pidx < 0 || pidx >= perf->pcnt) { + ret = EINVAL; + goto error; + } + peer = &perf->peers[pidx]; + ret = perf_submit_test(peer); +error: + mtx_unlock(&perf->lock); + return (ret); +} - if (pidx < 0 || pidx >= perf->pcnt) - return -EINVAL; +static int +ntb_sysctl_tcount_handler(SYSCTL_HANDLER_ARGS) +{ + struct perf_ctx* perf = (struct perf_ctx*)arg1; + int ret = 0; + size_t tcnt = 0; - peer = &perf->peers[pidx]; + mtx_lock(&perf->lock); + tcnt = perf->tcnt; - ret = perf_submit_test(peer); - if (ret) - return ret; + ret = sysctl_handle_int(oidp, &tcnt, 0, req); + if (ret != 0 || req->newptr == NULL) + goto error; - return size; + if (tcnt <= MAX_THREADS_CNT) + perf->tcnt = tcnt; + else { + ret = EINVAL; + goto error; + } + ntb_perf_printf(1, "%s: Thread count set to:%li\n", + __func__, perf->tcnt); +error: + mtx_unlock(&perf->lock); + return (ret); } -static const struct file_operations perf_dbgfs_run = { - .open = simple_open, - .read = perf_dbgfs_read_run, - .write = perf_dbgfs_write_run -}; - -static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) +static int +ntb_sysctl_mwsize_handler(SYSCTL_HANDLER_ARGS) { - struct perf_ctx *perf = filep->private_data; - char buf[8]; - ssize_t pos; - - pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt); + struct perf_ctx* perf = (struct perf_ctx*)arg1; + int ret = 0; - return simple_read_from_buffer(ubuf, size, offp, buf, pos); + mtx_lock(&perf->lock); + ret = sysctl_handle_int(oidp, &max_mw_size, 0, req); + if (ret != 0 || req->newptr == NULL) + goto error; + + ntb_perf_printf(1, "%s: Max MW size set to:%i\n", + __func__, max_mw_size); +error: + mtx_unlock(&perf->lock); + return (ret); } -static ssize_t perf_dbgfs_write_tcnt(struct file *filep, - const char __user *ubuf, - size_t size, loff_t *offp) +static int +ntb_sysctl_corder_handler(SYSCTL_HANDLER_ARGS) { - struct perf_ctx *perf = filep->private_data; - int ret; - u8 val; + struct perf_ctx* perf = (struct perf_ctx*)arg1; + int ret = 0; - ret = kstrtou8_from_user(ubuf, size, 0, &val); - if (ret) - return ret; + mtx_lock(&perf->lock); + ret = sysctl_handle_int(oidp, &chunk_order, 0, req); + if (ret != 0 || req->newptr == NULL) + goto error; - ret = perf_set_tcnt(perf, val); - if (ret) - return ret; + if (chunk_order > MAX_CHUNK_ORDER) + chunk_order = MAX_CHUNK_ORDER; - return size; + ntb_perf_printf(1, "%s: Chunk order set to:%u\n", + __func__, chunk_order); +error: + mtx_unlock(&perf->lock); + return (ret); } -static const struct file_operations perf_dbgfs_tcnt = { - .open = simple_open, - .read = perf_dbgfs_read_tcnt, - .write = perf_dbgfs_write_tcnt -}; - -static void perf_setup_dbgfs(struct perf_ctx *perf) +static int +ntb_sysctl_torder_handler(SYSCTL_HANDLER_ARGS) { - struct pci_dev *pdev = perf->ntb->pdev; - - perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir); - if (!perf->dbgfs_dir) { - dev_warn(&perf->ntb->dev, "DebugFS unsupported\n"); - return; - } - - debugfs_create_file("info", 0600, perf->dbgfs_dir, perf, - &perf_dbgfs_info); + struct perf_ctx* perf = (struct perf_ctx*)arg1; + int ret = 0; - debugfs_create_file("run", 0600, perf->dbgfs_dir, perf, - &perf_dbgfs_run); + mtx_lock(&perf->lock); + ret = sysctl_handle_int(oidp, &total_order, 0, req); + if (ret != 0 || req->newptr == NULL) + goto error; - debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf, - &perf_dbgfs_tcnt); + if (total_order < chunk_order) + total_order = chunk_order; - /* They are made read-only for test exec safety and integrity */ - debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order); + ntb_perf_printf(1, "%s: Total order set to:%u\n", + __func__, total_order); +error: + mtx_unlock(&perf->lock); + return (ret); +} - debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order); +static int +ntb_sysctl_usedma_handler(SYSCTL_HANDLER_ARGS) +{ + struct perf_ctx* perf = (struct perf_ctx*)arg1; + int ret = 0; - debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma); + mtx_lock(&perf->lock); + ret = sysctl_handle_bool(oidp, &use_dma, 0, req); + if (ret != 0 || req->newptr == NULL) + goto error; + + ntb_perf_printf(1, "%s: Use DMA: %s\n", __func__, + use_dma ? "True" : "False"); +error: + mtx_unlock(&perf->lock); + return (ret); } -static void perf_clear_dbgfs(struct perf_ctx *perf) +static int +ntb_sysctl_read_stats_handler(SYSCTL_HANDLER_ARGS) { - debugfs_remove_recursive(perf->dbgfs_dir); + struct perf_ctx *perf = (struct perf_ctx*)arg1; + int ret; + + mtx_lock(&perf->lock); + ret = perf_read_stats(perf, req); + if (ret != 0 || req->newptr == NULL) + goto error; +error: + mtx_unlock(&perf->lock); + return (ret); +} + +static void +perf_setup_sysctl(struct perf_ctx *perf) +{ + struct sysctl_oid_list *globals; + struct sysctl_ctx_list *ctx; + + ctx = device_get_sysctl_ctx(perf->dev); + globals = SYSCTL_CHILDREN(device_get_sysctl_tree(perf->dev)); + + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "info", + CTLTYPE_STRING | CTLFLAG_RD, perf, 0, + ntb_sysctl_info_handler, "A", + "NTB performance information" ); + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "run", + CTLTYPE_INT | CTLFLAG_RW, perf, 0, + ntb_sysctl_run_handler, "I", "NTB run" ); + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "thread_count", + CTLTYPE_UINT | CTLFLAG_RW, perf, 0, + ntb_sysctl_tcount_handler, "IU", "NTB thread count" ); + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "mw_size", + CTLTYPE_UINT | CTLFLAG_RW, perf, 0, + ntb_sysctl_mwsize_handler, "IU", + "Upper limit of memory window size" ); + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "data_order", + CTLTYPE_UINT | CTLFLAG_RW, perf, 0, + ntb_sysctl_corder_handler, "IU", + "Data chunk order [2^n] to transfer" ); + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "total_order", + CTLTYPE_UINT | CTLFLAG_RW, perf, 0, + ntb_sysctl_torder_handler, "IU", + "Total data order [2^n] to transfer" ); + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "use_dma", + CTLTYPE_U8 | CTLFLAG_RW, perf, 0, + ntb_sysctl_usedma_handler, "CU", + "Use DMA engine to measure performance" ); + SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "read_stats", + CTLTYPE_STRING | CTLFLAG_RW, perf, 0, + ntb_sysctl_read_stats_handler, "A", + "NTB Perf test statistics" ); } -/*============================================================================== +/* + *============================================================================== * Basic driver initialization *============================================================================== */ -static struct perf_ctx *perf_create_data(struct ntb_dev *ntb) +static int +perf_init(struct perf_ctx* perf) { - struct perf_ctx *perf; - - perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL); - if (!perf) - return ERR_PTR(-ENOMEM); - - perf->pcnt = ntb_peer_port_count(ntb); - perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers), - GFP_KERNEL); - if (!perf->peers) - return ERR_PTR(-ENOMEM); - - perf->ntb = ntb; + mtx_init(&perf->lock, "Perf mutex", "ntb_perf", MTX_DEF); - return perf; + work_queue = taskqueue_create("perf_wq", M_WAITOK | M_ZERO, + taskqueue_thread_enqueue, &work_queue); + return (0); } -static int perf_setup_peer_mw(struct perf_peer *peer) +static int +perf_setup_peer_mw(struct perf_peer *peer) { struct perf_ctx *perf = peer->perf; - phys_addr_t phys_addr; + vm_paddr_t bus_addr; int ret; /* Get outbound MW parameters and map it */ - ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, - &peer->outbuf_size); + ret = ntb_mw_get_range(perf->dev, peer->gidx, &bus_addr, + &peer->outbuf, &peer->outbuf_size, &peer->xlat_align, + &peer->xlat_align_size, &peer->addr_limit); + if (ret) - return ret; + return (ret); - peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr, - peer->outbuf_size); if (!peer->outbuf) - return -ENOMEM; + return (ENOMEM); + + ret = ntb_mw_set_wc(perf->dev, peer->gidx, VM_MEMATTR_WRITE_COMBINING); + if (ret) + return (ret); if (max_mw_size && peer->outbuf_size > max_mw_size) { peer->outbuf_size = max_mw_size; - dev_warn(&peer->perf->ntb->dev, - "Peer %d outbuf reduced to %pa\n", peer->pidx, - &peer->outbuf_size); + ntb_perf_printf(1, + "%s: Warning: Peer %d outbuf reduced to %lx\n", + __func__, peer->pidx, peer->outbuf_size); } - - return 0; + return (0); } -static int perf_init_peers(struct perf_ctx *perf) +static int +perf_init_peers(struct perf_ctx *perf) { struct perf_peer *peer; int pidx, lport, ret; - lport = ntb_port_number(perf->ntb); + perf->test_peer = NULL; + perf->pcnt = ntb_peer_port_count(perf->dev); + perf->peers = malloc(perf->pcnt*sizeof(*perf->peers), M_PERF, M_WAITOK); + if (!perf->peers) + return (ENOMEM); + + lport = ntb_port_number(perf->dev); perf->gidx = -1; for (pidx = 0; pidx < perf->pcnt; pidx++) { peer = &perf->peers[pidx]; peer->perf = perf; peer->pidx = pidx; - if (lport < ntb_peer_port_number(perf->ntb, pidx)) { + if (lport < ntb_peer_port_number(perf->dev, pidx)) { if (perf->gidx == -1) perf->gidx = pidx; peer->gidx = pidx + 1; } else { peer->gidx = pidx; } - INIT_WORK(&peer->service, perf_service_work); + TASK_INIT(&peer->service_task, 0, perf_service_work, peer); } if (perf->gidx == -1) perf->gidx = pidx; + ntb_perf_printf(1,"%s: Peer Count: %d\n",__func__, perf->pcnt); for (pidx = 0; pidx < perf->pcnt; pidx++) { ret = perf_setup_peer_mw(&perf->peers[pidx]); if (ret) - return ret; + return (ret); } - dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx); + ntb_perf_printf(1, "%s: Global port index %d\n", __func__, perf->gidx); - return 0; + return (0); } -static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) +static int +ntb_perf_probe(device_t dev) { - struct perf_ctx *perf; - int ret; + device_set_desc(dev, "NTB Perf"); + return (0); +} + +static int +ntb_perf_attach(device_t dev) +{ + struct perf_ctx *perf = device_get_softc(dev); + int ret=0; - perf = perf_create_data(ntb); - if (IS_ERR(perf)) - return PTR_ERR(perf); + perf->dev = dev; + + perf_init(perf); ret = perf_init_peers(perf); if (ret) - return ret; + return (ret); perf_init_threads(perf); ret = perf_init_service(perf); if (ret) - return ret; + return (ret); ret = perf_enable_service(perf); if (ret) - return ret; - - perf_setup_dbgfs(perf); + return (ret); - return 0; + perf_setup_sysctl(perf); + return (0); } -static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) -{ - struct perf_ctx *perf = ntb->ctx; - perf_clear_dbgfs(perf); +static int +ntb_perf_detach(device_t dev) +{ + struct perf_ctx *perf = device_get_softc(dev); + taskqueue_free(work_queue); perf_disable_service(perf); - perf_clear_threads(perf); -} - -static struct ntb_client perf_client = { - .ops = { - .probe = perf_probe, - .remove = perf_remove - } -}; + free(perf->peers, M_PERF); -static int __init perf_init(void) -{ - int ret; - - if (chunk_order > MAX_CHUNK_ORDER) { - chunk_order = MAX_CHUNK_ORDER; - pr_info("Chunk order reduced to %hhu\n", chunk_order); - } - - if (total_order < chunk_order) { - total_order = chunk_order; - pr_info("Total data order reduced to %hhu\n", total_order); - } - - perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0); - if (!perf_wq) - return -ENOMEM; - - if (debugfs_initialized()) - perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); + return (0); +} - ret = ntb_register_client(&perf_client); - if (ret) { - debugfs_remove_recursive(perf_dbgfs_topdir); - destroy_workqueue(perf_wq); - } - return ret; -} -module_init(perf_init); -static void __exit perf_exit(void) -{ - ntb_unregister_client(&perf_client); - debugfs_remove_recursive(perf_dbgfs_topdir); - destroy_workqueue(perf_wq); -} -module_exit(perf_exit); +static device_method_t ntb_perf_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ntb_perf_probe), + DEVMETHOD(device_attach, ntb_perf_attach), + DEVMETHOD(device_detach, ntb_perf_detach), + DEVMETHOD_END +}; +devclass_t ntb_perf_devclass; +static DEFINE_CLASS_0(ntb_perf, ntb_perf_driver, + ntb_perf_methods, sizeof(struct perf_ctx)); +DRIVER_MODULE(ntb_perf, ntb_hw, ntb_perf_driver, + ntb_perf_devclass, NULL, NULL); +MODULE_DEPEND(ntb_perf, ntb, 1, 1, 1); +MODULE_VERSION(ntb_perf, 1);