diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c index 020ef6a39b5e..0a19fbba717d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c @@ -1,339 +1,339 @@ /* * Copyright (c) 2020 iXsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_comutil.h" #include "zfs_deleg.h" #include "zfs_namecheck.h" #include "zfs_prop.h" SYSCTL_DECL(_vfs_zfs); SYSCTL_DECL(_vfs_zfs_vdev); extern uint_t rrw_tsd_key; static int zfs_version_ioctl = ZFS_IOCVER_OZFS; SYSCTL_DECL(_vfs_zfs_version); SYSCTL_INT(_vfs_zfs_version, OID_AUTO, ioctl, CTLFLAG_RD, &zfs_version_ioctl, 0, "ZFS_IOCTL_VERSION"); static struct cdev *zfsdev; static struct root_hold_token *zfs_root_token; extern uint_t rrw_tsd_key; extern uint_t zfs_allow_log_key; extern uint_t zfs_geom_probe_vdev_key; static int zfs__init(void); static int zfs__fini(void); static void zfs_shutdown(void *, int); static eventhandler_tag zfs_shutdown_event_tag; #define ZFS_MIN_KSTACK_PAGES 4 static int zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag, struct thread *td) { uint_t len; int vecnum; zfs_iocparm_t *zp; zfs_cmd_t *zc; zfs_cmd_legacy_t *zcl; int rc, error; void *uaddr; len = IOCPARM_LEN(zcmd); vecnum = zcmd & 0xff; zp = (void *)arg; - uaddr = (void *)zp->zfs_cmd; + uaddr = (void *)(uintptr_t)zp->zfs_cmd; error = 0; zcl = NULL; if (len != sizeof (zfs_iocparm_t)) { printf("len %d vecnum: %d sizeof (zfs_cmd_t) %ju\n", len, vecnum, (uintmax_t)sizeof (zfs_cmd_t)); return (EINVAL); } zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); /* * Remap ioctl code for legacy user binaries */ if (zp->zfs_ioctl_version == ZFS_IOCVER_LEGACY) { vecnum = zfs_ioctl_legacy_to_ozfs(vecnum); if (vecnum < 0) { kmem_free(zc, sizeof (zfs_cmd_t)); return (ENOTSUP); } zcl = kmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP); if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) { error = SET_ERROR(EFAULT); goto out; } zfs_cmd_legacy_to_ozfs(zcl, zc); } else if (copyin(uaddr, zc, sizeof (zfs_cmd_t))) { error = SET_ERROR(EFAULT); goto out; } error = zfsdev_ioctl_common(vecnum, zc, 0); if (zcl) { zfs_cmd_ozfs_to_legacy(zc, zcl); rc = copyout(zcl, uaddr, sizeof (*zcl)); } else { rc = copyout(zc, uaddr, sizeof (*zc)); } if (error == 0 && rc != 0) error = SET_ERROR(EFAULT); out: if (zcl) kmem_free(zcl, sizeof (zfs_cmd_legacy_t)); kmem_free(zc, sizeof (zfs_cmd_t)); MPASS(tsd_get(rrw_tsd_key) == NULL); return (error); } static void zfsdev_close(void *data) { zfsdev_state_destroy(data); } void zfsdev_private_set_state(void *priv __unused, zfsdev_state_t *zs) { devfs_set_cdevpriv(zs, zfsdev_close); } zfsdev_state_t * zfsdev_private_get_state(void *priv) { return (priv); } static int zfsdev_open(struct cdev *devp __unused, int flag __unused, int mode __unused, struct thread *td __unused) { int error; mutex_enter(&zfsdev_state_lock); error = zfsdev_state_init(NULL); mutex_exit(&zfsdev_state_lock); return (error); } static struct cdevsw zfs_cdevsw = { .d_version = D_VERSION, .d_open = zfsdev_open, .d_ioctl = zfsdev_ioctl, .d_name = ZFS_DRIVER }; int zfsdev_attach(void) { struct make_dev_args args; make_dev_args_init(&args); args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; args.mda_devsw = &zfs_cdevsw; args.mda_cr = NULL; args.mda_uid = UID_ROOT; args.mda_gid = GID_OPERATOR; args.mda_mode = 0666; return (make_dev_s(&args, &zfsdev, ZFS_DRIVER)); } void zfsdev_detach(void) { if (zfsdev != NULL) destroy_dev(zfsdev); } int zfs__init(void) { int error; #if KSTACK_PAGES < ZFS_MIN_KSTACK_PAGES printf("ZFS NOTICE: KSTACK_PAGES is %d which could result in stack " "overflow panic!\nPlease consider adding " "'options KSTACK_PAGES=%d' to your kernel config\n", KSTACK_PAGES, ZFS_MIN_KSTACK_PAGES); #endif zfs_root_token = root_mount_hold("ZFS"); if ((error = zfs_kmod_init()) != 0) { printf("ZFS: Failed to Load ZFS Filesystem" ", rc = %d\n", error); root_mount_rel(zfs_root_token); return (error); } tsd_create(&zfs_geom_probe_vdev_key, NULL); printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n"); root_mount_rel(zfs_root_token); ddi_sysevent_init(); return (0); } int zfs__fini(void) { if (zfs_busy() || zvol_busy() || zio_injection_enabled) { return (EBUSY); } zfs_kmod_fini(); tsd_destroy(&zfs_geom_probe_vdev_key); return (0); } static void zfs_shutdown(void *arg __unused, int howto __unused) { /* * ZFS fini routines can not properly work in a panic-ed system. */ if (panicstr == NULL) zfs__fini(); } static int zfs_modevent(module_t mod, int type, void *unused __unused) { int err; switch (type) { case MOD_LOAD: err = zfs__init(); if (err == 0) zfs_shutdown_event_tag = EVENTHANDLER_REGISTER( shutdown_post_sync, zfs_shutdown, NULL, SHUTDOWN_PRI_FIRST); return (err); case MOD_UNLOAD: err = zfs__fini(); if (err == 0 && zfs_shutdown_event_tag != NULL) EVENTHANDLER_DEREGISTER(shutdown_post_sync, zfs_shutdown_event_tag); return (err); case MOD_SHUTDOWN: return (0); default: break; } return (EOPNOTSUPP); } static moduledata_t zfs_mod = { "zfsctrl", zfs_modevent, 0 }; #ifdef _KERNEL EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0); #endif DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_CLOCKS, SI_ORDER_ANY); MODULE_VERSION(zfsctrl, 1); #if __FreeBSD_version > 1300092 MODULE_DEPEND(zfsctrl, xdr, 1, 1, 1); #else MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1); #endif MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1); MODULE_DEPEND(zfsctrl, crypto, 1, 1, 1); MODULE_DEPEND(zfsctrl, zlib, 1, 1, 1); diff --git a/sys/dev/iser/iser_initiator.c b/sys/dev/iser/iser_initiator.c index 49960c29a614..4e426240c882 100644 --- a/sys/dev/iser/iser_initiator.c +++ b/sys/dev/iser/iser_initiator.c @@ -1,538 +1,538 @@ /* $FreeBSD$ */ /*- * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "icl_iser.h" static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend"); /* Register user buffer memory and initialize passive rdma * dto descriptor. Data size is stored in * task->data[ISER_DIR_IN].data_len, Protection size * os stored in task->prot[ISER_DIR_IN].data_len */ static int iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu) { struct iser_hdr *hdr = &iser_pdu->desc.iser_header; struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN]; struct iser_mem_reg *mem_reg; int err; err = iser_dma_map_task_data(iser_pdu, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return (err); err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN); if (err) { ISER_ERR("Failed to set up Data-IN RDMA"); return (err); } mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(mem_reg->rkey); hdr->read_va = cpu_to_be64(mem_reg->sge.addr); return (0); } /* Register user buffer memory and initialize passive rdma * dto descriptor. Data size is stored in * task->data[ISER_DIR_OUT].data_len, Protection size * is stored at task->prot[ISER_DIR_OUT].data_len */ static int iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu) { struct iser_hdr *hdr = &iser_pdu->desc.iser_header; struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT]; struct iser_mem_reg *mem_reg; int err; err = iser_dma_map_task_data(iser_pdu, buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return (err); err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT); if (err) { ISER_ERR("Failed to set up Data-out RDMA"); return (err); } mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT]; hdr->flags |= ISER_WSV; hdr->write_stag = cpu_to_be32(mem_reg->rkey); hdr->write_va = cpu_to_be64(mem_reg->sge.addr); return (0); } /* creates a new tx descriptor and adds header regd buffer */ void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { struct iser_device *device = iser_conn->ib_conn.device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); tx_desc->iser_header.flags = ISER_VER; tx_desc->num_sge = 1; if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { tx_desc->tx_sg[0].lkey = device->mr->lkey; ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc); } } void iser_free_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; if (!iser_conn->login_buf) return; if (iser_conn->login_req_dma) ib_dma_unmap_single(device->ib_device, iser_conn->login_req_dma, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); if (iser_conn->login_resp_dma) ib_dma_unmap_single(device->ib_device, iser_conn->login_resp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); free(iser_conn->login_buf, M_ISER_INITIATOR); /* make sure we never redo any unmapping */ iser_conn->login_req_dma = 0; iser_conn->login_resp_dma = 0; iser_conn->login_buf = NULL; } int iser_alloc_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; int req_err, resp_err; BUG_ON(device == NULL); iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, M_ISER_INITIATOR, M_WAITOK | M_ZERO); if (!iser_conn->login_buf) goto out_err; iser_conn->login_req_buf = iser_conn->login_buf; iser_conn->login_resp_buf = iser_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, iser_conn->login_req_buf, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, iser_conn->login_resp_buf, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); req_err = ib_dma_mapping_error(device->ib_device, iser_conn->login_req_dma); resp_err = ib_dma_mapping_error(device->ib_device, iser_conn->login_resp_dma); if (req_err || resp_err) { if (req_err) iser_conn->login_req_dma = 0; if (resp_err) iser_conn->login_resp_dma = 0; goto free_login_buf; } return (0); free_login_buf: iser_free_login_buf(iser_conn); out_err: ISER_DBG("unable to alloc or map login buf"); return (ENOMEM); } int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max) { int i, j; u64 dma_addr; struct iser_rx_desc *rx_desc; struct ib_sge *rx_sg; struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; iser_conn->qp_max_recv_dtos = cmds_max; iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; if (iser_create_fastreg_pool(ib_conn, cmds_max)) goto create_rdma_reg_res_failed; iser_conn->num_rx_descs = cmds_max; iser_conn->rx_descs = malloc(iser_conn->num_rx_descs * sizeof(struct iser_rx_desc), M_ISER_INITIATOR, M_WAITOK | M_ZERO); if (!iser_conn->rx_descs) goto rx_desc_alloc_fail; rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) goto rx_desc_dma_map_failed; rx_desc->dma_addr = dma_addr; rx_sg = &rx_desc->rx_sg; rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; rx_sg->lkey = device->mr->lkey; } iser_conn->rx_desc_head = 0; return (0); rx_desc_dma_map_failed: rx_desc = iser_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); free(iser_conn->rx_descs, M_ISER_INITIATOR); iser_conn->rx_descs = NULL; rx_desc_alloc_fail: iser_free_fastreg_pool(ib_conn); create_rdma_reg_res_failed: ISER_ERR("failed allocating rx descriptors / data buffers"); return (ENOMEM); } void iser_free_rx_descriptors(struct iser_conn *iser_conn) { int i; struct iser_rx_desc *rx_desc; struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; iser_free_fastreg_pool(ib_conn); rx_desc = iser_conn->rx_descs; for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); free(iser_conn->rx_descs, M_ISER_INITIATOR); /* make sure we never redo any unmapping */ iser_conn->rx_descs = NULL; } static void iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf) { struct scatterlist *sg; int i; size_t len, tlen; int offset; tlen = data_buf->data_len; for (i = 0; 0 < tlen; i++, tlen -= len) { sg = &data_buf->sgl[i]; offset = ((uintptr_t)buf) & ~PAGE_MASK; len = min(PAGE_SIZE - offset, tlen); sg_set_buf(sg, buf, len); - buf = (void *)(((u64)buf) + (u64)len); + buf = (void *)((uintptr_t)buf + len); } data_buf->size = i; sg_mark_end(sg); } static void iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf) { struct scatterlist *sg; int i; size_t len, tlen; int offset; tlen = bp->bio_bcount; offset = bp->bio_ma_offset; for (i = 0; 0 < tlen; i++, tlen -= len) { sg = &data_buf->sgl[i]; len = min(PAGE_SIZE - offset, tlen); sg_set_page(sg, bp->bio_ma[i], len, offset); offset = 0; } data_buf->size = i; sg_mark_end(sg); } static int iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf) { struct ccb_hdr *ccbh; int err = 0; ccbh = &csio->ccb_h; switch ((ccbh->flags & CAM_DATA_MASK)) { case CAM_DATA_BIO: iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf); break; case CAM_DATA_VADDR: /* * Support KVA buffers for various scsi commands such as: * - REPORT_LUNS * - MODE_SENSE_6 * - INQUIRY * - SERVICE_ACTION_IN. * The data of these commands always mapped into KVA. */ iser_buf_to_sg(csio->data_ptr, data_buf); break; default: ISER_ERR("flags 0x%X unimplemented", ccbh->flags); err = EINVAL; } return (err); } static inline bool iser_signal_comp(u8 sig_count) { return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); } int iser_send_command(struct iser_conn *iser_conn, struct icl_iser_pdu *iser_pdu) { struct iser_data_buf *data_buf; struct iser_tx_desc *tx_desc = &iser_pdu->desc; struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header); struct ccb_scsiio *csio = iser_pdu->csio; int err = 0; u8 sig_count = ++iser_conn->ib_conn.sig_count; /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; iser_create_send_desc(iser_conn, tx_desc); if (hdr->bhssc_flags & BHSSC_FLAGS_R) { data_buf = &iser_pdu->data[ISER_DIR_IN]; } else { data_buf = &iser_pdu->data[ISER_DIR_OUT]; } data_buf->sg = csio->data_ptr; data_buf->data_len = csio->dxfer_len; if (likely(csio->dxfer_len)) { err = iser_csio_to_sg(csio, data_buf); if (unlikely(err)) goto send_command_error; } if (hdr->bhssc_flags & BHSSC_FLAGS_R) { err = iser_prepare_read_cmd(iser_pdu); if (err) goto send_command_error; } else if (hdr->bhssc_flags & BHSSC_FLAGS_W) { err = iser_prepare_write_cmd(iser_pdu); if (err) goto send_command_error; } err = iser_post_send(&iser_conn->ib_conn, tx_desc, iser_signal_comp(sig_count)); if (!err) return (0); send_command_error: ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn, hdr->bhssc_initiator_task_tag, hdr->bhssc_expected_data_transfer_length, err); return (err); } int iser_send_control(struct iser_conn *iser_conn, struct icl_iser_pdu *iser_pdu) { struct iser_tx_desc *mdesc; struct iser_device *device; size_t datalen = iser_pdu->icl_pdu.ip_data_len; int err; mdesc = &iser_pdu->desc; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn.device; if (datalen > 0) { struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; ib_dma_sync_single_for_cpu(device->ib_device, iser_conn->login_req_dma, datalen, DMA_TO_DEVICE); ib_dma_sync_single_for_device(device->ib_device, iser_conn->login_req_dma, datalen, DMA_TO_DEVICE); tx_dsg->addr = iser_conn->login_req_dma; tx_dsg->length = datalen; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; } /* For login phase and discovery session we re-use the login buffer */ if (!iser_conn->handoff_done) { err = iser_post_recvl(iser_conn); if (err) goto send_control_error; } err = iser_post_send(&iser_conn->ib_conn, mdesc, true); if (!err) return (0); send_control_error: ISER_ERR("conn %p failed err %d", iser_conn, err); return (err); } /** * iser_rcv_dto_completion - recv DTO completion */ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, struct ib_conn *ib_conn) { struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); struct icl_conn *ic = &iser_conn->icl_conn; struct icl_pdu *response; struct iscsi_bhs *hdr; u64 rx_dma; int rx_buflen; int outstanding, count, err; /* differentiate between login to all other PDUs */ if ((char *)rx_desc == iser_conn->login_resp_buf) { rx_dma = iser_conn->login_resp_dma; rx_buflen = ISER_RX_LOGIN_SIZE; } else { rx_dma = rx_desc->dma_addr; rx_buflen = ISER_RX_PAYLOAD_SIZE; } ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); hdr = &rx_desc->iscsi_header; response = iser_new_pdu(ic, M_NOWAIT); response->ip_bhs = hdr; response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN; /* * In case we got data in the receive buffer, assign the ip_data_mbuf * to the rx_buffer - later we'll copy it to upper layer buffers */ if (response->ip_data_len) response->ip_data_mbuf = (struct mbuf *)(rx_desc->data); ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ ib_conn->post_recv_buf_count--; if (rx_dma == iser_conn->login_resp_dma) goto receive; outstanding = ib_conn->post_recv_buf_count; if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { count = min(iser_conn->qp_max_recv_dtos - outstanding, iser_conn->min_posted_rx); err = iser_post_recvm(iser_conn, count); if (err) ISER_ERR("posting %d rx bufs err %d", count, err); } receive: (ic->ic_receive)(response); } void iser_snd_completion(struct iser_tx_desc *tx_desc, struct ib_conn *ib_conn) { struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc); struct iser_conn *iser_conn = iser_pdu->iser_conn; if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu); } diff --git a/sys/dev/mana/gdma_main.c b/sys/dev/mana/gdma_main.c index 60ddde142f44..6bda6eae13eb 100644 --- a/sys/dev/mana/gdma_main.c +++ b/sys/dev/mana/gdma_main.c @@ -1,1942 +1,1942 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gdma_util.h" #include "mana.h" static mana_vendor_id_t mana_id_table[] = { { PCI_VENDOR_ID_MICROSOFT, PCI_DEV_ID_MANA_VF}, /* Last entry */ { 0, 0} }; static inline uint32_t mana_gd_r32(struct gdma_context *g, uint64_t offset) { uint32_t v = bus_space_read_4(g->gd_bus.bar0_t, g->gd_bus.bar0_h, offset); rmb(); return (v); } #if defined(__amd64__) static inline uint64_t mana_gd_r64(struct gdma_context *g, uint64_t offset) { uint64_t v = bus_space_read_8(g->gd_bus.bar0_t, g->gd_bus.bar0_h, offset); rmb(); return (v); } #else static inline uint64_t mana_gd_r64(struct gdma_context *g, uint64_t offset) { uint64_t v; uint32_t *vp = (uint32_t *)&v; *vp = mana_gd_r32(g, offset); *(vp + 1) = mana_gd_r32(g, offset + 4); rmb(); return (v); } #endif static int mana_gd_query_max_resources(device_t dev) { struct gdma_context *gc = device_get_softc(dev); struct gdma_query_max_resources_resp resp = {}; struct gdma_general_req req = {}; int err; mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES, sizeof(req), sizeof(resp)); err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "Failed to query resource info: %d, 0x%x\n", err, resp.hdr.status); return err ? err : EPROTO; } mana_dbg(NULL, "max_msix %u, max_eq %u, max_cq %u, " "max_sq %u, max_rq %u\n", resp.max_msix, resp.max_eq, resp.max_cq, resp.max_sq, resp.max_rq); if (gc->num_msix_usable > resp.max_msix) gc->num_msix_usable = resp.max_msix; if (gc->num_msix_usable <= 1) return ENOSPC; gc->max_num_queues = mp_ncpus; if (gc->max_num_queues > MANA_MAX_NUM_QUEUES) gc->max_num_queues = MANA_MAX_NUM_QUEUES; if (gc->max_num_queues > resp.max_eq) gc->max_num_queues = resp.max_eq; if (gc->max_num_queues > resp.max_cq) gc->max_num_queues = resp.max_cq; if (gc->max_num_queues > resp.max_sq) gc->max_num_queues = resp.max_sq; if (gc->max_num_queues > resp.max_rq) gc->max_num_queues = resp.max_rq; return 0; } static int mana_gd_detect_devices(device_t dev) { struct gdma_context *gc = device_get_softc(dev); struct gdma_list_devices_resp resp = {}; struct gdma_general_req req = {}; struct gdma_dev_id gd_dev; uint32_t i, max_num_devs; uint16_t dev_type; int err; mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), sizeof(resp)); err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "Failed to detect devices: %d, 0x%x\n", err, resp.hdr.status); return err ? err : EPROTO; } max_num_devs = min_t(uint32_t, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); for (i = 0; i < max_num_devs; i++) { gd_dev = resp.devs[i]; dev_type = gd_dev.type; mana_dbg(NULL, "gdma dev %d, type %u\n", i, dev_type); /* HWC is already detected in mana_hwc_create_channel(). */ if (dev_type == GDMA_DEVICE_HWC) continue; if (dev_type == GDMA_DEVICE_MANA) { gc->mana.gdma_context = gc; gc->mana.dev_id = gd_dev; } } return gc->mana.dev_id.type == 0 ? ENODEV : 0; } int mana_gd_send_request(struct gdma_context *gc, uint32_t req_len, const void *req, uint32_t resp_len, void *resp) { struct hw_channel_context *hwc = gc->hwc.driver_data; return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); } void mana_gd_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *paddr = arg; if (error) return; KASSERT(nseg == 1, ("too many segments %d!", nseg)); *paddr = segs->ds_addr; } int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, struct gdma_mem_info *gmi) { bus_addr_t dma_handle; void *buf; int err; if (!gc || !gmi) return EINVAL; if (length < PAGE_SIZE || (length != roundup_pow_of_two(length))) return EINVAL; err = bus_dma_tag_create(bus_get_dma_tag(gc->dev), /* parent */ PAGE_SIZE, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ length, /* maxsize */ 1, /* nsegments */ length, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg*/ &gmi->dma_tag); if (err) { device_printf(gc->dev, "failed to create dma tag, err: %d\n", err); return (err); } /* * Must have BUS_DMA_ZERO flag to clear the dma memory. * Otherwise the queue overflow detection mechanism does * not work. */ err = bus_dmamem_alloc(gmi->dma_tag, &buf, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &gmi->dma_map); if (err) { device_printf(gc->dev, "failed to alloc dma mem, err: %d\n", err); bus_dma_tag_destroy(gmi->dma_tag); return (err); } err = bus_dmamap_load(gmi->dma_tag, gmi->dma_map, buf, length, mana_gd_dma_map_paddr, &dma_handle, BUS_DMA_NOWAIT); if (err) { device_printf(gc->dev, "failed to load dma mem, err: %d\n", err); bus_dmamem_free(gmi->dma_tag, buf, gmi->dma_map); bus_dma_tag_destroy(gmi->dma_tag); return (err); } gmi->dev = gc->dev; gmi->dma_handle = dma_handle; gmi->virt_addr = buf; gmi->length = length; return 0; } void mana_gd_free_memory(struct gdma_mem_info *gmi) { bus_dmamap_unload(gmi->dma_tag, gmi->dma_map); bus_dmamem_free(gmi->dma_tag, gmi->virt_addr, gmi->dma_map); bus_dma_tag_destroy(gmi->dma_tag); } int mana_gd_destroy_doorbell_page(struct gdma_context *gc, int doorbell_page) { struct gdma_destroy_resource_range_req req = {}; struct gdma_resp_hdr resp = {}; int err; mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE, sizeof(req), sizeof(resp)); req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; req.allocated_resources = doorbell_page; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.status) { device_printf(gc->dev, "Failed to destroy doorbell page: ret %d, 0x%x\n", err, resp.status); return err ? err : EPROTO; } return 0; } int mana_gd_allocate_doorbell_page(struct gdma_context *gc, int *doorbell_page) { struct gdma_allocate_resource_range_req req = {}; struct gdma_allocate_resource_range_resp resp = {}; int err; mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE, sizeof(req), sizeof(resp)); req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; req.alignment = 1; /* Have GDMA start searching from 0 */ req.allocated_resources = 0; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "Failed to allocate doorbell page: ret %d, 0x%x\n", err, resp.hdr.status); return err ? err : EPROTO; } *doorbell_page = resp.allocated_resources; return 0; } static int mana_gd_create_hw_eq(struct gdma_context *gc, struct gdma_queue *queue) { struct gdma_create_queue_resp resp = {}; struct gdma_create_queue_req req = {}; int err; if (queue->type != GDMA_EQ) return EINVAL; mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_QUEUE, sizeof(req), sizeof(resp)); req.hdr.dev_id = queue->gdma_dev->dev_id; req.type = queue->type; req.pdid = queue->gdma_dev->pdid; req.doolbell_id = queue->gdma_dev->doorbell; req.gdma_region = queue->mem_info.dma_region_handle; req.queue_size = queue->queue_size; req.log2_throttle_limit = queue->eq.log2_throttle_limit; req.eq_pci_msix_index = queue->eq.msix_index; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "Failed to create queue: %d, 0x%x\n", err, resp.hdr.status); return err ? err : EPROTO; } queue->id = resp.queue_index; queue->eq.disable_needed = true; queue->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; return 0; } static int mana_gd_disable_queue(struct gdma_queue *queue) { struct gdma_context *gc = queue->gdma_dev->gdma_context; struct gdma_disable_queue_req req = {}; struct gdma_general_resp resp = {}; int err; if (queue->type != GDMA_EQ) mana_warn(NULL, "Not event queue type 0x%x\n", queue->type); mana_gd_init_req_hdr(&req.hdr, GDMA_DISABLE_QUEUE, sizeof(req), sizeof(resp)); req.hdr.dev_id = queue->gdma_dev->dev_id; req.type = queue->type; req.queue_index = queue->id; req.alloc_res_id_on_creation = 1; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, resp.hdr.status); return err ? err : EPROTO; } return 0; } #define DOORBELL_OFFSET_SQ 0x0 #define DOORBELL_OFFSET_RQ 0x400 #define DOORBELL_OFFSET_CQ 0x800 #define DOORBELL_OFFSET_EQ 0xFF8 static void mana_gd_ring_doorbell(struct gdma_context *gc, uint32_t db_index, enum gdma_queue_type q_type, uint32_t qid, uint32_t tail_ptr, uint8_t num_req) { union gdma_doorbell_entry e = {}; void __iomem *addr; addr = (char *)gc->db_page_base + gc->db_page_size * db_index; switch (q_type) { case GDMA_EQ: e.eq.id = qid; e.eq.tail_ptr = tail_ptr; e.eq.arm = num_req; addr = (char *)addr + DOORBELL_OFFSET_EQ; break; case GDMA_CQ: e.cq.id = qid; e.cq.tail_ptr = tail_ptr; e.cq.arm = num_req; addr = (char *)addr + DOORBELL_OFFSET_CQ; break; case GDMA_RQ: e.rq.id = qid; e.rq.tail_ptr = tail_ptr; e.rq.wqe_cnt = num_req; addr = (char *)addr + DOORBELL_OFFSET_RQ; break; case GDMA_SQ: e.sq.id = qid; e.sq.tail_ptr = tail_ptr; addr = (char *)addr + DOORBELL_OFFSET_SQ; break; default: mana_warn(NULL, "Invalid queue type 0x%x\n", q_type); return; } /* Ensure all writes are done before ring doorbell */ wmb(); #if defined(__amd64__) writeq(addr, e.as_uint64); #else uint32_t *p = (uint32_t *)&e.as_uint64; writel(addr, *p); writel((char *)addr + 4, *(p + 1)); #endif } void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) { mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, queue->id, queue->head * GDMA_WQE_BU_SIZE, 1); } void mana_gd_ring_cq(struct gdma_queue *cq, uint8_t arm_bit) { struct gdma_context *gc = cq->gdma_dev->gdma_context; uint32_t num_cqe = cq->queue_size / GDMA_CQE_SIZE; uint32_t head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS); mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, head, arm_bit); } static void mana_gd_process_eqe(struct gdma_queue *eq) { uint32_t head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); struct gdma_context *gc = eq->gdma_dev->gdma_context; struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; union gdma_eqe_info eqe_info; enum gdma_eqe_type type; struct gdma_event event; struct gdma_queue *cq; struct gdma_eqe *eqe; uint32_t cq_id; eqe = &eq_eqe_ptr[head]; eqe_info.as_uint32 = eqe->eqe_info; type = eqe_info.type; switch (type) { case GDMA_EQE_COMPLETION: cq_id = eqe->details[0] & 0xFFFFFF; if (cq_id >= gc->max_num_cqs) { mana_warn(NULL, "failed: cq_id %u > max_num_cqs %u\n", cq_id, gc->max_num_cqs); break; } cq = gc->cq_table[cq_id]; if (!cq || cq->type != GDMA_CQ || cq->id != cq_id) { mana_warn(NULL, "failed: invalid cq_id %u\n", cq_id); break; } if (cq->cq.callback) cq->cq.callback(cq->cq.context, cq); break; case GDMA_EQE_TEST_EVENT: gc->test_event_eq_id = eq->id; mana_dbg(NULL, "EQE TEST EVENT received for EQ %u\n", eq->id); complete(&gc->eq_test_event); break; case GDMA_EQE_HWC_INIT_EQ_ID_DB: case GDMA_EQE_HWC_INIT_DATA: case GDMA_EQE_HWC_INIT_DONE: if (!eq->eq.callback) break; event.type = type; memcpy(&event.details, &eqe->details, GDMA_EVENT_DATA_SIZE); eq->eq.callback(eq->eq.context, eq, &event); break; default: break; } } static void mana_gd_process_eq_events(void *arg) { uint32_t owner_bits, new_bits, old_bits; union gdma_eqe_info eqe_info; struct gdma_eqe *eq_eqe_ptr; struct gdma_queue *eq = arg; struct gdma_context *gc; uint32_t head, num_eqe; struct gdma_eqe *eqe; int i, j; gc = eq->gdma_dev->gdma_context; num_eqe = eq->queue_size / GDMA_EQE_SIZE; eq_eqe_ptr = eq->queue_mem_ptr; bus_dmamap_sync(eq->mem_info.dma_tag, eq->mem_info.dma_map, BUS_DMASYNC_POSTREAD); /* Process up to 5 EQEs at a time, and update the HW head. */ for (i = 0; i < 5; i++) { eqe = &eq_eqe_ptr[eq->head % num_eqe]; eqe_info.as_uint32 = eqe->eqe_info; owner_bits = eqe_info.owner_bits; old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; /* No more entries */ if (owner_bits == old_bits) break; new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK; if (owner_bits != new_bits) { /* Something wrong. Log for debugging purpose */ device_printf(gc->dev, "EQ %d: overflow detected, " "i = %d, eq->head = %u " "got owner_bits = %u, new_bits = %u " "eqe addr %p, eqe->eqe_info 0x%x, " "eqe type = %x, reserved1 = %x, client_id = %x, " "reserved2 = %x, owner_bits = %x\n", eq->id, i, eq->head, owner_bits, new_bits, eqe, eqe->eqe_info, eqe_info.type, eqe_info.reserved1, eqe_info.client_id, eqe_info.reserved2, eqe_info.owner_bits); uint32_t *eqe_dump = (uint32_t *) eq_eqe_ptr; for (j = 0; j < 20; j++) { device_printf(gc->dev, "%p: %x\t%x\t%x\t%x\n", &eqe_dump[j * 4], eqe_dump[j * 4], eqe_dump[j * 4 + 1], eqe_dump[j * 4 + 2], eqe_dump[j * 4 + 3]); } break; } rmb(); mana_gd_process_eqe(eq); eq->head++; } bus_dmamap_sync(eq->mem_info.dma_tag, eq->mem_info.dma_map, BUS_DMASYNC_PREREAD); head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS); mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, head, SET_ARM_BIT); } static int mana_gd_register_irq(struct gdma_queue *queue, const struct gdma_queue_spec *spec) { struct gdma_dev *gd = queue->gdma_dev; struct gdma_irq_context *gic; struct gdma_context *gc; struct gdma_resource *r; unsigned int msi_index; int err; gc = gd->gdma_context; r = &gc->msix_resource; mtx_lock_spin(&r->lock_spin); msi_index = find_first_zero_bit(r->map, r->size); if (msi_index >= r->size) { err = ENOSPC; } else { bitmap_set(r->map, msi_index, 1); queue->eq.msix_index = msi_index; err = 0; } mtx_unlock_spin(&r->lock_spin); if (err) return err; if (unlikely(msi_index >= gc->num_msix_usable)) { device_printf(gc->dev, "chose an invalid msix index %d, usable %d\n", msi_index, gc->num_msix_usable); return ENOSPC; } gic = &gc->irq_contexts[msi_index]; if (unlikely(gic->handler || gic->arg)) { device_printf(gc->dev, "interrupt handler or arg already assigned, " "msix index: %d\n", msi_index); } gic->arg = queue; gic->handler = mana_gd_process_eq_events; mana_dbg(NULL, "registered msix index %d vector %d irq %ju\n", msi_index, gic->msix_e.vector, rman_get_start(gic->res)); return 0; } static void mana_gd_deregiser_irq(struct gdma_queue *queue) { struct gdma_dev *gd = queue->gdma_dev; struct gdma_irq_context *gic; struct gdma_context *gc; struct gdma_resource *r; unsigned int msix_index; gc = gd->gdma_context; r = &gc->msix_resource; /* At most num_online_cpus() + 1 interrupts are used. */ msix_index = queue->eq.msix_index; if (unlikely(msix_index >= gc->num_msix_usable)) return; gic = &gc->irq_contexts[msix_index]; gic->handler = NULL; gic->arg = NULL; mtx_lock_spin(&r->lock_spin); bitmap_clear(r->map, msix_index, 1); mtx_unlock_spin(&r->lock_spin); queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; mana_dbg(NULL, "deregistered msix index %d vector %d irq %ju\n", msix_index, gic->msix_e.vector, rman_get_start(gic->res)); } int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) { struct gdma_generate_test_event_req req = {}; struct gdma_general_resp resp = {}; device_t dev = gc->dev; int err; sx_xlock(&gc->eq_test_event_sx); init_completion(&gc->eq_test_event); gc->test_event_eq_id = INVALID_QUEUE_ID; mana_gd_init_req_hdr(&req.hdr, GDMA_GENERATE_TEST_EQE, sizeof(req), sizeof(resp)); req.hdr.dev_id = eq->gdma_dev->dev_id; req.queue_index = eq->id; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err) { device_printf(dev, "test_eq failed: %d\n", err); goto out; } err = EPROTO; if (resp.hdr.status) { device_printf(dev, "test_eq failed: 0x%x\n", resp.hdr.status); goto out; } if (wait_for_completion_timeout(&gc->eq_test_event, 30 * hz)) { device_printf(dev, "test_eq timed out on queue %d\n", eq->id); goto out; } if (eq->id != gc->test_event_eq_id) { device_printf(dev, "test_eq got an event on wrong queue %d (%d)\n", gc->test_event_eq_id, eq->id); goto out; } err = 0; out: sx_xunlock(&gc->eq_test_event_sx); return err; } static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, struct gdma_queue *queue) { int err; if (flush_evenets) { err = mana_gd_test_eq(gc, queue); if (err) device_printf(gc->dev, "Failed to flush EQ: %d\n", err); } mana_gd_deregiser_irq(queue); if (queue->eq.disable_needed) mana_gd_disable_queue(queue); } static int mana_gd_create_eq(struct gdma_dev *gd, const struct gdma_queue_spec *spec, bool create_hwq, struct gdma_queue *queue) { struct gdma_context *gc = gd->gdma_context; device_t dev = gc->dev; uint32_t log2_num_entries; int err; queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE); if (spec->eq.log2_throttle_limit > log2_num_entries) { device_printf(dev, "EQ throttling limit (%lu) > maximum EQE (%u)\n", spec->eq.log2_throttle_limit, log2_num_entries); return EINVAL; } err = mana_gd_register_irq(queue, spec); if (err) { device_printf(dev, "Failed to register irq: %d\n", err); return err; } queue->eq.callback = spec->eq.callback; queue->eq.context = spec->eq.context; queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1; if (create_hwq) { err = mana_gd_create_hw_eq(gc, queue); if (err) goto out; err = mana_gd_test_eq(gc, queue); if (err) goto out; } return 0; out: device_printf(dev, "Failed to create EQ: %d\n", err); mana_gd_destroy_eq(gc, false, queue); return err; } static void mana_gd_create_cq(const struct gdma_queue_spec *spec, struct gdma_queue *queue) { uint32_t log2_num_entries = ilog2(spec->queue_size / GDMA_CQE_SIZE); queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); queue->cq.parent = spec->cq.parent_eq; queue->cq.context = spec->cq.context; queue->cq.callback = spec->cq.callback; } static void mana_gd_destroy_cq(struct gdma_context *gc, struct gdma_queue *queue) { uint32_t id = queue->id; if (id >= gc->max_num_cqs) return; if (!gc->cq_table[id]) return; gc->cq_table[id] = NULL; } int mana_gd_create_hwc_queue(struct gdma_dev *gd, const struct gdma_queue_spec *spec, struct gdma_queue **queue_ptr) { struct gdma_context *gc = gd->gdma_context; struct gdma_mem_info *gmi; struct gdma_queue *queue; int err; queue = malloc(sizeof(*queue), M_DEVBUF, M_WAITOK | M_ZERO); if (!queue) return ENOMEM; gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); if (err) goto free_q; queue->head = 0; queue->tail = 0; queue->queue_mem_ptr = gmi->virt_addr; queue->queue_size = spec->queue_size; queue->monitor_avl_buf = spec->monitor_avl_buf; queue->type = spec->type; queue->gdma_dev = gd; if (spec->type == GDMA_EQ) err = mana_gd_create_eq(gd, spec, false, queue); else if (spec->type == GDMA_CQ) mana_gd_create_cq(spec, queue); if (err) goto out; *queue_ptr = queue; return 0; out: mana_gd_free_memory(gmi); free_q: free(queue, M_DEVBUF); return err; } int mana_gd_destroy_dma_region(struct gdma_context *gc, gdma_obj_handle_t dma_region_handle) { struct gdma_destroy_dma_region_req req = {}; struct gdma_general_resp resp = {}; int err; if (dma_region_handle == GDMA_INVALID_DMA_REGION) return 0; mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req), sizeof(resp)); req.dma_region_handle = dma_region_handle; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", err, resp.hdr.status); return EPROTO; } return 0; } static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_mem_info *gmi) { unsigned int num_page = gmi->length / PAGE_SIZE; struct gdma_create_dma_region_req *req = NULL; struct gdma_create_dma_region_resp resp = {}; struct gdma_context *gc = gd->gdma_context; struct hw_channel_context *hwc; uint32_t length = gmi->length; uint32_t req_msg_size; int err; int i; if (length < PAGE_SIZE || !is_power_of_2(length)) { mana_err(NULL, "gmi size incorrect: %u\n", length); return EINVAL; } - if (offset_in_page((uint64_t)gmi->virt_addr) != 0) { + if (offset_in_page((uintptr_t)gmi->virt_addr) != 0) { mana_err(NULL, "gmi not page aligned: %p\n", gmi->virt_addr); return EINVAL; } hwc = gc->hwc.driver_data; req_msg_size = sizeof(*req) + num_page * sizeof(uint64_t); if (req_msg_size > hwc->max_req_msg_size) { mana_err(NULL, "req msg size too large: %u, %u\n", req_msg_size, hwc->max_req_msg_size); return EINVAL; } req = malloc(req_msg_size, M_DEVBUF, M_WAITOK | M_ZERO); if (!req) return ENOMEM; mana_gd_init_req_hdr(&req->hdr, GDMA_CREATE_DMA_REGION, req_msg_size, sizeof(resp)); req->length = length; req->offset_in_page = 0; req->gdma_page_type = GDMA_PAGE_TYPE_4K; req->page_count = num_page; req->page_addr_list_len = num_page; for (i = 0; i < num_page; i++) req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); if (err) goto out; if (resp.hdr.status || resp.dma_region_handle == GDMA_INVALID_DMA_REGION) { device_printf(gc->dev, "Failed to create DMA region: 0x%x\n", resp.hdr.status); err = EPROTO; goto out; } gmi->dma_region_handle = resp.dma_region_handle; out: free(req, M_DEVBUF); return err; } int mana_gd_create_mana_eq(struct gdma_dev *gd, const struct gdma_queue_spec *spec, struct gdma_queue **queue_ptr) { struct gdma_context *gc = gd->gdma_context; struct gdma_mem_info *gmi; struct gdma_queue *queue; int err; if (spec->type != GDMA_EQ) return EINVAL; queue = malloc(sizeof(*queue), M_DEVBUF, M_WAITOK | M_ZERO); if (!queue) return ENOMEM; gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); if (err) goto free_q; err = mana_gd_create_dma_region(gd, gmi); if (err) goto out; queue->head = 0; queue->tail = 0; queue->queue_mem_ptr = gmi->virt_addr; queue->queue_size = spec->queue_size; queue->monitor_avl_buf = spec->monitor_avl_buf; queue->type = spec->type; queue->gdma_dev = gd; err = mana_gd_create_eq(gd, spec, true, queue); if (err) goto out; *queue_ptr = queue; return 0; out: mana_gd_free_memory(gmi); free_q: free(queue, M_DEVBUF); return err; } int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, const struct gdma_queue_spec *spec, struct gdma_queue **queue_ptr) { struct gdma_context *gc = gd->gdma_context; struct gdma_mem_info *gmi; struct gdma_queue *queue; int err; if (spec->type != GDMA_CQ && spec->type != GDMA_SQ && spec->type != GDMA_RQ) return EINVAL; queue = malloc(sizeof(*queue), M_DEVBUF, M_WAITOK | M_ZERO); if (!queue) return ENOMEM; gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); if (err) goto free_q; err = mana_gd_create_dma_region(gd, gmi); if (err) goto out; queue->head = 0; queue->tail = 0; queue->queue_mem_ptr = gmi->virt_addr; queue->queue_size = spec->queue_size; queue->monitor_avl_buf = spec->monitor_avl_buf; queue->type = spec->type; queue->gdma_dev = gd; if (spec->type == GDMA_CQ) mana_gd_create_cq(spec, queue); *queue_ptr = queue; return 0; out: mana_gd_free_memory(gmi); free_q: free(queue, M_DEVBUF); return err; } void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) { struct gdma_mem_info *gmi = &queue->mem_info; switch (queue->type) { case GDMA_EQ: mana_gd_destroy_eq(gc, queue->eq.disable_needed, queue); break; case GDMA_CQ: mana_gd_destroy_cq(gc, queue); break; case GDMA_RQ: break; case GDMA_SQ: break; default: device_printf(gc->dev, "Can't destroy unknown queue: type = %d\n", queue->type); return; } mana_gd_destroy_dma_region(gc, gmi->dma_region_handle); mana_gd_free_memory(gmi); free(queue, M_DEVBUF); } #define OS_MAJOR_DIV 100000 #define OS_BUILD_MOD 1000 int mana_gd_verify_vf_version(device_t dev) { struct gdma_context *gc = device_get_softc(dev); struct gdma_verify_ver_resp resp = {}; struct gdma_verify_ver_req req = {}; int err; mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION, sizeof(req), sizeof(resp)); req.protocol_ver_min = GDMA_PROTOCOL_FIRST; req.protocol_ver_max = GDMA_PROTOCOL_LAST; req.drv_ver = 0; /* Unused */ req.os_type = 0x30; /* Other */ req.os_ver_major = osreldate / OS_MAJOR_DIV; req.os_ver_minor = (osreldate % OS_MAJOR_DIV) / OS_BUILD_MOD; req.os_ver_build = osreldate % OS_BUILD_MOD; strncpy(req.os_ver_str1, ostype, sizeof(req.os_ver_str1) - 1); strncpy(req.os_ver_str2, osrelease, sizeof(req.os_ver_str2) - 1); err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n", err, resp.hdr.status); return err ? err : EPROTO; } return 0; } int mana_gd_register_device(struct gdma_dev *gd) { struct gdma_context *gc = gd->gdma_context; struct gdma_register_device_resp resp = {}; struct gdma_general_req req = {}; int err; gd->pdid = INVALID_PDID; gd->doorbell = INVALID_DOORBELL; gd->gpa_mkey = INVALID_MEM_KEY; mana_gd_init_req_hdr(&req.hdr, GDMA_REGISTER_DEVICE, sizeof(req), sizeof(resp)); req.hdr.dev_id = gd->dev_id; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "gdma_register_device_resp failed: %d, 0x%x\n", err, resp.hdr.status); return err ? err : -EPROTO; } gd->pdid = resp.pdid; gd->gpa_mkey = resp.gpa_mkey; gd->doorbell = resp.db_id; mana_dbg(NULL, "mana device pdid %u, gpa_mkey %u, doorbell %u \n", gd->pdid, gd->gpa_mkey, gd->doorbell); return 0; } int mana_gd_deregister_device(struct gdma_dev *gd) { struct gdma_context *gc = gd->gdma_context; struct gdma_general_resp resp = {}; struct gdma_general_req req = {}; int err; if (gd->pdid == INVALID_PDID) return EINVAL; mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req), sizeof(resp)); req.hdr.dev_id = gd->dev_id; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { device_printf(gc->dev, "Failed to deregister device: %d, 0x%x\n", err, resp.hdr.status); if (!err) err = EPROTO; } gd->pdid = INVALID_PDID; gd->doorbell = INVALID_DOORBELL; gd->gpa_mkey = INVALID_MEM_KEY; return err; } uint32_t mana_gd_wq_avail_space(struct gdma_queue *wq) { uint32_t used_space = (wq->head - wq->tail) * GDMA_WQE_BU_SIZE; uint32_t wq_size = wq->queue_size; if (used_space > wq_size) { mana_warn(NULL, "failed: used space %u > queue size %u\n", used_space, wq_size); } return wq_size - used_space; } uint8_t * mana_gd_get_wqe_ptr(const struct gdma_queue *wq, uint32_t wqe_offset) { uint32_t offset = (wqe_offset * GDMA_WQE_BU_SIZE) & (wq->queue_size - 1); if ((offset + GDMA_WQE_BU_SIZE) > wq->queue_size) { mana_warn(NULL, "failed: write end out of queue bound %u, " "queue size %u\n", offset + GDMA_WQE_BU_SIZE, wq->queue_size); } return (uint8_t *)wq->queue_mem_ptr + offset; } static uint32_t mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req, enum gdma_queue_type q_type, uint32_t client_oob_size, uint32_t sgl_data_size, uint8_t *wqe_ptr) { bool oob_in_sgl = !!(wqe_req->flags & GDMA_WR_OOB_IN_SGL); bool pad_data = !!(wqe_req->flags & GDMA_WR_PAD_BY_SGE0); struct gdma_wqe *header = (struct gdma_wqe *)wqe_ptr; uint8_t *ptr; memset(header, 0, sizeof(struct gdma_wqe)); header->num_sge = wqe_req->num_sge; header->inline_oob_size_div4 = client_oob_size / sizeof(uint32_t); if (oob_in_sgl) { if (!pad_data || wqe_req->num_sge < 2) { mana_warn(NULL, "no pad_data or num_sge < 2\n"); } header->client_oob_in_sgl = 1; if (pad_data) header->last_vbytes = wqe_req->sgl[0].size; } if (q_type == GDMA_SQ) header->client_data_unit = wqe_req->client_data_unit; /* * The size of gdma_wqe + client_oob_size must be less than or equal * to one Basic Unit (i.e. 32 bytes), so the pointer can't go beyond * the queue memory buffer boundary. */ ptr = wqe_ptr + sizeof(header); if (wqe_req->inline_oob_data && wqe_req->inline_oob_size > 0) { memcpy(ptr, wqe_req->inline_oob_data, wqe_req->inline_oob_size); if (client_oob_size > wqe_req->inline_oob_size) memset(ptr + wqe_req->inline_oob_size, 0, client_oob_size - wqe_req->inline_oob_size); } return sizeof(header) + client_oob_size; } static void mana_gd_write_sgl(struct gdma_queue *wq, uint8_t *wqe_ptr, const struct gdma_wqe_request *wqe_req) { uint32_t sgl_size = sizeof(struct gdma_sge) * wqe_req->num_sge; const uint8_t *address = (uint8_t *)wqe_req->sgl; uint8_t *base_ptr, *end_ptr; uint32_t size_to_end; base_ptr = wq->queue_mem_ptr; end_ptr = base_ptr + wq->queue_size; size_to_end = (uint32_t)(end_ptr - wqe_ptr); if (size_to_end < sgl_size) { memcpy(wqe_ptr, address, size_to_end); wqe_ptr = base_ptr; address += size_to_end; sgl_size -= size_to_end; } memcpy(wqe_ptr, address, sgl_size); } int mana_gd_post_work_request(struct gdma_queue *wq, const struct gdma_wqe_request *wqe_req, struct gdma_posted_wqe_info *wqe_info) { uint32_t client_oob_size = wqe_req->inline_oob_size; struct gdma_context *gc; uint32_t sgl_data_size; uint32_t max_wqe_size; uint32_t wqe_size; uint8_t *wqe_ptr; if (wqe_req->num_sge == 0) return EINVAL; if (wq->type == GDMA_RQ) { if (client_oob_size != 0) return EINVAL; client_oob_size = INLINE_OOB_SMALL_SIZE; max_wqe_size = GDMA_MAX_RQE_SIZE; } else { if (client_oob_size != INLINE_OOB_SMALL_SIZE && client_oob_size != INLINE_OOB_LARGE_SIZE) return EINVAL; max_wqe_size = GDMA_MAX_SQE_SIZE; } sgl_data_size = sizeof(struct gdma_sge) * wqe_req->num_sge; wqe_size = ALIGN(sizeof(struct gdma_wqe) + client_oob_size + sgl_data_size, GDMA_WQE_BU_SIZE); if (wqe_size > max_wqe_size) return EINVAL; if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) { gc = wq->gdma_dev->gdma_context; device_printf(gc->dev, "unsuccessful flow control!\n"); return ENOSPC; } if (wqe_info) wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE; wqe_ptr = mana_gd_get_wqe_ptr(wq, wq->head); wqe_ptr += mana_gd_write_client_oob(wqe_req, wq->type, client_oob_size, sgl_data_size, wqe_ptr); if (wqe_ptr >= (uint8_t *)wq->queue_mem_ptr + wq->queue_size) wqe_ptr -= wq->queue_size; mana_gd_write_sgl(wq, wqe_ptr, wqe_req); wq->head += wqe_size / GDMA_WQE_BU_SIZE; bus_dmamap_sync(wq->mem_info.dma_tag, wq->mem_info.dma_map, BUS_DMASYNC_PREWRITE); return 0; } int mana_gd_post_and_ring(struct gdma_queue *queue, const struct gdma_wqe_request *wqe_req, struct gdma_posted_wqe_info *wqe_info) { struct gdma_context *gc = queue->gdma_dev->gdma_context; int err; err = mana_gd_post_work_request(queue, wqe_req, wqe_info); if (err) return err; mana_gd_wq_ring_doorbell(gc, queue); return 0; } static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp) { unsigned int num_cqe = cq->queue_size / sizeof(struct gdma_cqe); struct gdma_cqe *cq_cqe = cq->queue_mem_ptr; uint32_t owner_bits, new_bits, old_bits; struct gdma_cqe *cqe; cqe = &cq_cqe[cq->head % num_cqe]; owner_bits = cqe->cqe_info.owner_bits; old_bits = (cq->head / num_cqe - 1) & GDMA_CQE_OWNER_MASK; /* Return 0 if no more entries. */ if (owner_bits == old_bits) return 0; new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK; /* Return -1 if overflow detected. */ if (owner_bits != new_bits) { mana_warn(NULL, "overflow detected! owner_bits %u != new_bits %u\n", owner_bits, new_bits); return -1; } rmb(); comp->wq_num = cqe->cqe_info.wq_num; comp->is_sq = cqe->cqe_info.is_sq; memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE); return 1; } int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe) { int cqe_idx; int ret; bus_dmamap_sync(cq->mem_info.dma_tag, cq->mem_info.dma_map, BUS_DMASYNC_POSTREAD); for (cqe_idx = 0; cqe_idx < num_cqe; cqe_idx++) { ret = mana_gd_read_cqe(cq, &comp[cqe_idx]); if (ret < 0) { cq->head -= cqe_idx; return ret; } if (ret == 0) break; cq->head++; } return cqe_idx; } static void mana_gd_intr(void *arg) { struct gdma_irq_context *gic = arg; if (gic->handler) { gic->handler(gic->arg); } } int mana_gd_alloc_res_map(uint32_t res_avail, struct gdma_resource *r, const char *lock_name) { int n = howmany(res_avail, BITS_PER_LONG); r->map = malloc(n * sizeof(unsigned long), M_DEVBUF, M_WAITOK | M_ZERO); if (!r->map) return ENOMEM; r->size = res_avail; mtx_init(&r->lock_spin, lock_name, NULL, MTX_SPIN); mana_dbg(NULL, "total res %u, total number of unsigned longs %u\n", r->size, n); return (0); } void mana_gd_free_res_map(struct gdma_resource *r) { if (!r || !r->map) return; free(r->map, M_DEVBUF); r->map = NULL; r->size = 0; } static void mana_gd_init_registers(struct gdma_context *gc) { - uint64_t bar0_va = rman_get_bushandle(gc->bar0); + uintptr_t bar0_va = rman_get_bushandle(gc->bar0); vm_paddr_t bar0_pa = rman_get_start(gc->bar0); gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; gc->db_page_base = - (void *) (bar0_va + mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET)); + (void *)(bar0_va + (size_t)mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET)); gc->phys_db_page_base = bar0_pa + mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); gc->shm_base = - (void *) (bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET)); + (void *)(bar0_va + (size_t)mana_gd_r64(gc, GDMA_REG_SHM_OFFSET)); mana_dbg(NULL, "db_page_size 0x%xx, db_page_base %p," " shm_base %p\n", gc->db_page_size, gc->db_page_base, gc->shm_base); } static struct resource * mana_gd_alloc_bar(device_t dev, int bar) { struct resource *res = NULL; struct pci_map *pm; int rid, type; if (bar < 0 || bar > PCIR_MAX_BAR_0) goto alloc_bar_out; pm = pci_find_bar(dev, PCIR_BAR(bar)); if (!pm) goto alloc_bar_out; if (PCI_BAR_IO(pm->pm_value)) type = SYS_RES_IOPORT; else type = SYS_RES_MEMORY; if (type < 0) goto alloc_bar_out; rid = PCIR_BAR(bar); res = bus_alloc_resource_any(dev, type, &rid, RF_ACTIVE); #if defined(__amd64__) if (res) mana_dbg(NULL, "bar %d: rid 0x%x, type 0x%jx," " handle 0x%jx\n", bar, rid, res->r_bustag, res->r_bushandle); #endif alloc_bar_out: return (res); } static void mana_gd_free_pci_res(struct gdma_context *gc) { if (!gc || gc->dev) return; if (gc->bar0 != NULL) { bus_release_resource(gc->dev, SYS_RES_MEMORY, PCIR_BAR(GDMA_BAR0), gc->bar0); } if (gc->msix != NULL) { bus_release_resource(gc->dev, SYS_RES_MEMORY, gc->msix_rid, gc->msix); } } static int mana_gd_setup_irqs(device_t dev) { unsigned int max_queues_per_port = mp_ncpus; struct gdma_context *gc = device_get_softc(dev); struct gdma_irq_context *gic; unsigned int max_irqs; int nvec; int rc, rcc, i; if (max_queues_per_port > MANA_MAX_NUM_QUEUES) max_queues_per_port = MANA_MAX_NUM_QUEUES; /* Need 1 interrupt for the Hardware communication Channel (HWC) */ max_irqs = max_queues_per_port + 1; nvec = max_irqs; rc = pci_alloc_msix(dev, &nvec); if (unlikely(rc != 0)) { device_printf(dev, "Failed to allocate MSIX, vectors %d, error: %d\n", nvec, rc); rc = ENOSPC; goto err_setup_irq_alloc; } if (nvec != max_irqs) { if (nvec == 1) { device_printf(dev, "Not enough number of MSI-x allocated: %d\n", nvec); rc = ENOSPC; goto err_setup_irq_release; } device_printf(dev, "Allocated only %d MSI-x (%d requested)\n", nvec, max_irqs); } gc->irq_contexts = malloc(nvec * sizeof(struct gdma_irq_context), M_DEVBUF, M_WAITOK | M_ZERO); if (!gc->irq_contexts) { rc = ENOMEM; goto err_setup_irq_release; } for (i = 0; i < nvec; i++) { gic = &gc->irq_contexts[i]; gic->msix_e.entry = i; /* Vector starts from 1. */ gic->msix_e.vector = i + 1; gic->handler = NULL; gic->arg = NULL; gic->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &gic->msix_e.vector, RF_ACTIVE | RF_SHAREABLE); if (unlikely(gic->res == NULL)) { rc = ENOMEM; device_printf(dev, "could not allocate resource " "for irq vector %d\n", gic->msix_e.vector); goto err_setup_irq; } rc = bus_setup_intr(dev, gic->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, mana_gd_intr, gic, &gic->cookie); if (unlikely(rc != 0)) { device_printf(dev, "failed to register interrupt " "handler for irq %ju vector %d: error %d\n", rman_get_start(gic->res), gic->msix_e.vector, rc); goto err_setup_irq; } gic->requested = true; mana_dbg(NULL, "added msix vector %d irq %ju\n", gic->msix_e.vector, rman_get_start(gic->res)); } rc = mana_gd_alloc_res_map(nvec, &gc->msix_resource, "gdma msix res lock"); if (rc != 0) { device_printf(dev, "failed to allocate memory " "for msix bitmap\n"); goto err_setup_irq; } gc->max_num_msix = nvec; gc->num_msix_usable = nvec; mana_dbg(NULL, "setup %d msix interrupts\n", nvec); return (0); err_setup_irq: for (; i >= 0; i--) { gic = &gc->irq_contexts[i]; rcc = 0; /* * If gic->requested is true, we need to free both intr and * resources. */ if (gic->requested) rcc = bus_teardown_intr(dev, gic->res, gic->cookie); if (unlikely(rcc != 0)) device_printf(dev, "could not release " "irq vector %d, error: %d\n", gic->msix_e.vector, rcc); rcc = 0; if (gic->res != NULL) { rcc = bus_release_resource(dev, SYS_RES_IRQ, gic->msix_e.vector, gic->res); } if (unlikely(rcc != 0)) device_printf(dev, "dev has no parent while " "releasing resource for irq vector %d\n", gic->msix_e.vector); gic->requested = false; gic->res = NULL; } free(gc->irq_contexts, M_DEVBUF); gc->irq_contexts = NULL; err_setup_irq_release: pci_release_msi(dev); err_setup_irq_alloc: return (rc); } static void mana_gd_remove_irqs(device_t dev) { struct gdma_context *gc = device_get_softc(dev); struct gdma_irq_context *gic; int rc, i; mana_gd_free_res_map(&gc->msix_resource); for (i = 0; i < gc->max_num_msix; i++) { gic = &gc->irq_contexts[i]; if (gic->requested) { rc = bus_teardown_intr(dev, gic->res, gic->cookie); if (unlikely(rc != 0)) { device_printf(dev, "failed to tear down " "irq vector %d, error: %d\n", gic->msix_e.vector, rc); } gic->requested = false; } if (gic->res != NULL) { rc = bus_release_resource(dev, SYS_RES_IRQ, gic->msix_e.vector, gic->res); if (unlikely(rc != 0)) { device_printf(dev, "dev has no parent while " "releasing resource for irq vector %d\n", gic->msix_e.vector); } gic->res = NULL; } } gc->max_num_msix = 0; gc->num_msix_usable = 0; free(gc->irq_contexts, M_DEVBUF); gc->irq_contexts = NULL; pci_release_msi(dev); } static int mana_gd_probe(device_t dev) { mana_vendor_id_t *ent; char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); ent = mana_id_table; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id)) { mana_dbg(NULL, "vendor=%x device=%x\n", pci_vendor_id, pci_device_id); sprintf(adapter_name, DEVICE_DESC); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /** * mana_attach - Device Initialization Routine * @dev: device information struct * * Returns 0 on success, otherwise on failure. * * mana_attach initializes a GDMA adapter identified by a device structure. **/ static int mana_gd_attach(device_t dev) { struct gdma_context *gc; int msix_rid; int rc; gc = device_get_softc(dev); gc->dev = dev; pci_enable_io(dev, SYS_RES_IOPORT); pci_enable_io(dev, SYS_RES_MEMORY); pci_enable_busmaster(dev); gc->bar0 = mana_gd_alloc_bar(dev, GDMA_BAR0); if (unlikely(gc->bar0 == NULL)) { device_printf(dev, "unable to allocate bus resource for bar0!\n"); rc = ENOMEM; goto err_disable_dev; } /* Store bar0 tage and handle for quick access */ gc->gd_bus.bar0_t = rman_get_bustag(gc->bar0); gc->gd_bus.bar0_h = rman_get_bushandle(gc->bar0); /* Map MSI-x vector table */ msix_rid = pci_msix_table_bar(dev); mana_dbg(NULL, "msix_rid 0x%x\n", msix_rid); gc->msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &msix_rid, RF_ACTIVE); if (unlikely(gc->msix == NULL)) { device_printf(dev, "unable to allocate bus resource for msix!\n"); rc = ENOMEM; goto err_free_pci_res; } gc->msix_rid = msix_rid; if (unlikely(gc->gd_bus.bar0_h == 0)) { device_printf(dev, "failed to map bar0!\n"); rc = ENXIO; goto err_free_pci_res; } mana_gd_init_registers(gc); mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); rc = mana_gd_setup_irqs(dev); if (rc) { goto err_free_pci_res; } sx_init(&gc->eq_test_event_sx, "gdma test event sx"); rc = mana_hwc_create_channel(gc); if (rc) { mana_dbg(NULL, "Failed to create hwc channel\n"); if (rc == EIO) goto err_clean_up_gdma; else goto err_remove_irq; } rc = mana_gd_verify_vf_version(dev); if (rc) { mana_dbg(NULL, "Failed to verify vf\n"); goto err_clean_up_gdma; } rc = mana_gd_query_max_resources(dev); if (rc) { mana_dbg(NULL, "Failed to query max resources\n"); goto err_clean_up_gdma; } rc = mana_gd_detect_devices(dev); if (rc) { mana_dbg(NULL, "Failed to detect mana device\n"); goto err_clean_up_gdma; } rc = mana_probe(&gc->mana); if (rc) { mana_dbg(NULL, "Failed to probe mana device\n"); goto err_clean_up_gdma; } return (0); err_clean_up_gdma: mana_hwc_destroy_channel(gc); err_remove_irq: mana_gd_remove_irqs(dev); err_free_pci_res: mana_gd_free_pci_res(gc); err_disable_dev: pci_disable_busmaster(dev); return(rc); } /** * mana_detach - Device Removal Routine * @pdev: device information struct * * mana_detach is called by the device subsystem to alert the driver * that it should release a PCI device. **/ static int mana_gd_detach(device_t dev) { struct gdma_context *gc = device_get_softc(dev); mana_remove(&gc->mana); mana_hwc_destroy_channel(gc); mana_gd_remove_irqs(dev); mana_gd_free_pci_res(gc); pci_disable_busmaster(dev); return (bus_generic_detach(dev)); } /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t mana_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mana_gd_probe), DEVMETHOD(device_attach, mana_gd_attach), DEVMETHOD(device_detach, mana_gd_detach), DEVMETHOD_END }; static driver_t mana_driver = { "mana", mana_methods, sizeof(struct gdma_context), }; DRIVER_MODULE(mana, pci, mana_driver, 0, 0); MODULE_PNP_INFO("U16:vendor;U16:device", pci, mana, mana_id_table, nitems(mana_id_table) - 1); MODULE_DEPEND(mana, pci, 1, 1, 1); MODULE_DEPEND(mana, ether, 1, 1, 1); /*********************************************************************/ diff --git a/sys/dev/mana/hw_channel.c b/sys/dev/mana/hw_channel.c index 19e25a8a49ab..845a47eac96a 100644 --- a/sys/dev/mana/hw_channel.c +++ b/sys/dev/mana/hw_channel.c @@ -1,943 +1,943 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "mana.h" #include "hw_channel.h" static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, uint16_t *msg_id) { struct gdma_resource *r = &hwc->inflight_msg_res; uint32_t index; sema_wait(&hwc->sema); mtx_lock_spin(&r->lock_spin); index = find_first_zero_bit(hwc->inflight_msg_res.map, hwc->inflight_msg_res.size); bitmap_set(hwc->inflight_msg_res.map, index, 1); mtx_unlock_spin(&r->lock_spin); *msg_id = index; return 0; } static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, uint16_t msg_id) { struct gdma_resource *r = &hwc->inflight_msg_res; mtx_lock_spin(&r->lock_spin); bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1); mtx_unlock_spin(&r->lock_spin); sema_post(&hwc->sema); } static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, const struct gdma_resp_hdr *resp_msg, uint32_t resp_len) { if (resp_len < sizeof(*resp_msg)) return EPROTO; if (resp_len > caller_ctx->output_buflen) return EPROTO; return 0; } static void mana_hwc_handle_resp(struct hw_channel_context *hwc, uint32_t resp_len, const struct gdma_resp_hdr *resp_msg) { struct hwc_caller_ctx *ctx; int err; if (!test_bit(resp_msg->response.hwc_msg_id, hwc->inflight_msg_res.map)) { device_printf(hwc->dev, "hwc_rx: invalid msg_id = %u\n", resp_msg->response.hwc_msg_id); return; } ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); if (err) goto out; ctx->status_code = resp_msg->status; memcpy(ctx->output_buf, resp_msg, resp_len); out: ctx->error = err; complete(&ctx->comp_event); } static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, struct hwc_work_request *req) { device_t dev = hwc_rxq->hwc->dev; struct gdma_sge *sge; int err; sge = &req->sge; - sge->address = (uint64_t)req->buf_sge_addr; + sge->address = (uintptr_t)req->buf_sge_addr; sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; sge->size = req->buf_len; memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); req->wqe_req.sgl = sge; req->wqe_req.num_sge = 1; req->wqe_req.client_data_unit = 0; err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); if (err) device_printf(dev, "Failed to post WQE on HWC RQ: %d\n", err); return err; } static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, struct gdma_event *event) { struct hw_channel_context *hwc = ctx; struct gdma_dev *gd = hwc->gdma_dev; union hwc_init_type_data type_data; union hwc_init_eq_id_db eq_db; uint32_t type, val; switch (event->type) { case GDMA_EQE_HWC_INIT_EQ_ID_DB: eq_db.as_uint32 = event->details[0]; hwc->cq->gdma_eq->id = eq_db.eq_id; gd->doorbell = eq_db.doorbell; break; case GDMA_EQE_HWC_INIT_DATA: type_data.as_uint32 = event->details[0]; type = type_data.type; val = type_data.value; switch (type) { case HWC_INIT_DATA_CQID: hwc->cq->gdma_cq->id = val; break; case HWC_INIT_DATA_RQID: hwc->rxq->gdma_wq->id = val; break; case HWC_INIT_DATA_SQID: hwc->txq->gdma_wq->id = val; break; case HWC_INIT_DATA_QUEUE_DEPTH: hwc->hwc_init_q_depth_max = (uint16_t)val; break; case HWC_INIT_DATA_MAX_REQUEST: hwc->hwc_init_max_req_msg_size = val; break; case HWC_INIT_DATA_MAX_RESPONSE: hwc->hwc_init_max_resp_msg_size = val; break; case HWC_INIT_DATA_MAX_NUM_CQS: gd->gdma_context->max_num_cqs = val; break; case HWC_INIT_DATA_PDID: hwc->gdma_dev->pdid = val; break; case HWC_INIT_DATA_GPA_MKEY: hwc->rxq->msg_buf->gpa_mkey = val; hwc->txq->msg_buf->gpa_mkey = val; break; } break; case GDMA_EQE_HWC_INIT_DONE: complete(&hwc->hwc_init_eqe_comp); break; default: /* Ignore unknown events, which should never happen. */ break; } } static void mana_hwc_rx_event_handler(void *ctx, uint32_t gdma_rxq_id, const struct hwc_rx_oob *rx_oob) { struct hw_channel_context *hwc = ctx; struct hwc_wq *hwc_rxq = hwc->rxq; struct hwc_work_request *rx_req; struct gdma_resp_hdr *resp; struct gdma_wqe *dma_oob; struct gdma_queue *rq; struct gdma_sge *sge; uint64_t rq_base_addr; uint64_t rx_req_idx; uint8_t *wqe; if (hwc_rxq->gdma_wq->id != gdma_rxq_id) { mana_warn(NULL, "unmatched rx queue %u != %u\n", hwc_rxq->gdma_wq->id, gdma_rxq_id); return; } rq = hwc_rxq->gdma_wq; wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE); dma_oob = (struct gdma_wqe *)wqe; bus_dmamap_sync(rq->mem_info.dma_tag, rq->mem_info.dma_map, BUS_DMASYNC_POSTREAD); sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4); /* Select the RX work request for virtual address and for reposting. */ rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle; rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size; bus_dmamap_sync(hwc_rxq->msg_buf->mem_info.dma_tag, hwc_rxq->msg_buf->mem_info.dma_map, BUS_DMASYNC_POSTREAD); rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; resp = (struct gdma_resp_hdr *)rx_req->buf_va; if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) { device_printf(hwc->dev, "HWC RX: wrong msg_id=%u\n", resp->response.hwc_msg_id); return; } mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); /* Do no longer use 'resp', because the buffer is posted to the HW * in the below mana_hwc_post_rx_wqe(). */ resp = NULL; bus_dmamap_sync(hwc_rxq->msg_buf->mem_info.dma_tag, hwc_rxq->msg_buf->mem_info.dma_map, BUS_DMASYNC_PREREAD); mana_hwc_post_rx_wqe(hwc_rxq, rx_req); } static void mana_hwc_tx_event_handler(void *ctx, uint32_t gdma_txq_id, const struct hwc_rx_oob *rx_oob) { struct hw_channel_context *hwc = ctx; struct hwc_wq *hwc_txq = hwc->txq; if (!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id) { mana_warn(NULL, "unmatched tx queue %u != %u\n", hwc_txq->gdma_wq->id, gdma_txq_id); } bus_dmamap_sync(hwc_txq->gdma_wq->mem_info.dma_tag, hwc_txq->gdma_wq->mem_info.dma_map, BUS_DMASYNC_POSTWRITE); } static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc, enum gdma_queue_type type, uint64_t queue_size, struct gdma_queue **queue) { struct gdma_queue_spec spec = {}; if (type != GDMA_SQ && type != GDMA_RQ) return EINVAL; spec.type = type; spec.monitor_avl_buf = false; spec.queue_size = queue_size; return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); } static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc, uint64_t queue_size, void *ctx, gdma_cq_callback *cb, struct gdma_queue *parent_eq, struct gdma_queue **queue) { struct gdma_queue_spec spec = {}; spec.type = GDMA_CQ; spec.monitor_avl_buf = false; spec.queue_size = queue_size; spec.cq.context = ctx; spec.cq.callback = cb; spec.cq.parent_eq = parent_eq; return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); } static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc, uint64_t queue_size, void *ctx, gdma_eq_callback *cb, struct gdma_queue **queue) { struct gdma_queue_spec spec = {}; spec.type = GDMA_EQ; spec.monitor_avl_buf = false; spec.queue_size = queue_size; spec.eq.context = ctx; spec.eq.callback = cb; spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ; return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); } static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self) { struct hwc_rx_oob comp_data = {}; struct gdma_comp *completions; struct hwc_cq *hwc_cq = ctx; int comp_read, i; completions = hwc_cq->comp_buf; comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth); for (i = 0; i < comp_read; ++i) { comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data; if (completions[i].is_sq) hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx, completions[i].wq_num, &comp_data); else hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx, completions[i].wq_num, &comp_data); } bus_dmamap_sync(q_self->mem_info.dma_tag, q_self->mem_info.dma_map, BUS_DMASYNC_POSTREAD); mana_gd_ring_cq(q_self, SET_ARM_BIT); } static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq) { if (hwc_cq->comp_buf) free(hwc_cq->comp_buf, M_DEVBUF); if (hwc_cq->gdma_cq) mana_gd_destroy_queue(gc, hwc_cq->gdma_cq); if (hwc_cq->gdma_eq) mana_gd_destroy_queue(gc, hwc_cq->gdma_eq); free(hwc_cq, M_DEVBUF); } static int mana_hwc_create_cq(struct hw_channel_context *hwc, uint16_t q_depth, gdma_eq_callback *callback, void *ctx, hwc_rx_event_handler_t *rx_ev_hdlr, void *rx_ev_ctx, hwc_tx_event_handler_t *tx_ev_hdlr, void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr) { struct gdma_queue *eq, *cq; struct gdma_comp *comp_buf; struct hwc_cq *hwc_cq; uint32_t eq_size, cq_size; int err; eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; hwc_cq = malloc(sizeof(*hwc_cq), M_DEVBUF, M_WAITOK | M_ZERO); if (!hwc_cq) return ENOMEM; err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq); if (err) { device_printf(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err); goto out; } hwc_cq->gdma_eq = eq; err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event, eq, &cq); if (err) { device_printf(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err); goto out; } hwc_cq->gdma_cq = cq; comp_buf = mallocarray(q_depth, sizeof(struct gdma_comp), M_DEVBUF, M_WAITOK | M_ZERO); if (!comp_buf) { err = ENOMEM; goto out; } hwc_cq->hwc = hwc; hwc_cq->comp_buf = comp_buf; hwc_cq->queue_depth = q_depth; hwc_cq->rx_event_handler = rx_ev_hdlr; hwc_cq->rx_event_ctx = rx_ev_ctx; hwc_cq->tx_event_handler = tx_ev_hdlr; hwc_cq->tx_event_ctx = tx_ev_ctx; *hwc_cq_ptr = hwc_cq; return 0; out: mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); return err; } static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, uint16_t q_depth, uint32_t max_msg_size, struct hwc_dma_buf **dma_buf_ptr) { struct gdma_context *gc = hwc->gdma_dev->gdma_context; struct hwc_work_request *hwc_wr; struct hwc_dma_buf *dma_buf; struct gdma_mem_info *gmi; uint32_t buf_size; uint8_t *base_pa; void *virt_addr; uint16_t i; int err; dma_buf = malloc(sizeof(*dma_buf) + q_depth * sizeof(struct hwc_work_request), M_DEVBUF, M_WAITOK | M_ZERO); if (!dma_buf) return ENOMEM; dma_buf->num_reqs = q_depth; buf_size = ALIGN(q_depth * max_msg_size, PAGE_SIZE); gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); if (err) { device_printf(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); goto out; } virt_addr = dma_buf->mem_info.virt_addr; base_pa = (uint8_t *)dma_buf->mem_info.dma_handle; for (i = 0; i < q_depth; i++) { hwc_wr = &dma_buf->reqs[i]; hwc_wr->buf_va = (char *)virt_addr + i * max_msg_size; hwc_wr->buf_sge_addr = base_pa + i * max_msg_size; hwc_wr->buf_len = max_msg_size; } *dma_buf_ptr = dma_buf; return 0; out: free(dma_buf, M_DEVBUF); return err; } static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc, struct hwc_dma_buf *dma_buf) { if (!dma_buf) return; mana_gd_free_memory(&dma_buf->mem_info); free(dma_buf, M_DEVBUF); } static void mana_hwc_destroy_wq(struct hw_channel_context *hwc, struct hwc_wq *hwc_wq) { mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf); if (hwc_wq->gdma_wq) mana_gd_destroy_queue(hwc->gdma_dev->gdma_context, hwc_wq->gdma_wq); free(hwc_wq, M_DEVBUF); } static int mana_hwc_create_wq(struct hw_channel_context *hwc, enum gdma_queue_type q_type, uint16_t q_depth, uint32_t max_msg_size, struct hwc_cq *hwc_cq, struct hwc_wq **hwc_wq_ptr) { struct gdma_queue *queue; struct hwc_wq *hwc_wq; uint32_t queue_size; int err; if (q_type != GDMA_SQ && q_type != GDMA_RQ) { /* XXX should fail and return error? */ mana_warn(NULL, "Invalid q_type %u\n", q_type); } if (q_type == GDMA_RQ) queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth); else queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; hwc_wq = malloc(sizeof(*hwc_wq), M_DEVBUF, M_WAITOK | M_ZERO); if (!hwc_wq) return ENOMEM; err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue); if (err) goto out; hwc_wq->hwc = hwc; hwc_wq->gdma_wq = queue; hwc_wq->queue_depth = q_depth; hwc_wq->hwc_cq = hwc_cq; err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, &hwc_wq->msg_buf); if (err) goto out; *hwc_wq_ptr = hwc_wq; return 0; out: if (err) mana_hwc_destroy_wq(hwc, hwc_wq); return err; } static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq, struct hwc_work_request *req, uint32_t dest_virt_rq_id, uint32_t dest_virt_rcq_id, bool dest_pf) { device_t dev = hwc_txq->hwc->dev; struct hwc_tx_oob *tx_oob; struct gdma_sge *sge; int err; if (req->msg_size == 0 || req->msg_size > req->buf_len) { device_printf(dev, "wrong msg_size: %u, buf_len: %u\n", req->msg_size, req->buf_len); return EINVAL; } tx_oob = &req->tx_oob; tx_oob->vrq_id = dest_virt_rq_id; tx_oob->dest_vfid = 0; tx_oob->vrcq_id = dest_virt_rcq_id; tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id; tx_oob->loopback = false; tx_oob->lso_override = false; tx_oob->dest_pf = dest_pf; tx_oob->vsq_id = hwc_txq->gdma_wq->id; sge = &req->sge; - sge->address = (uint64_t)req->buf_sge_addr; + sge->address = (uintptr_t)req->buf_sge_addr; sge->mem_key = hwc_txq->msg_buf->gpa_mkey; sge->size = req->msg_size; memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); req->wqe_req.sgl = sge; req->wqe_req.num_sge = 1; req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob); req->wqe_req.inline_oob_data = tx_oob; req->wqe_req.client_data_unit = 0; err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL); if (err) device_printf(dev, "Failed to post WQE on HWC SQ: %d\n", err); return err; } static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc, uint16_t num_msg) { int err; sema_init(&hwc->sema, num_msg, "gdma hwc sema"); err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res, "gdma hwc res lock"); if (err) device_printf(hwc->dev, "Failed to init inflight_msg_res: %d\n", err); return (err); } static int mana_hwc_test_channel(struct hw_channel_context *hwc, uint16_t q_depth, uint32_t max_req_msg_size, uint32_t max_resp_msg_size) { struct gdma_context *gc = hwc->gdma_dev->gdma_context; struct hwc_wq *hwc_rxq = hwc->rxq; struct hwc_work_request *req; struct hwc_caller_ctx *ctx; int err; int i; /* Post all WQEs on the RQ */ for (i = 0; i < q_depth; i++) { req = &hwc_rxq->msg_buf->reqs[i]; err = mana_hwc_post_rx_wqe(hwc_rxq, req); if (err) return err; } ctx = malloc(q_depth * sizeof(struct hwc_caller_ctx), M_DEVBUF, M_WAITOK | M_ZERO); if (!ctx) return ENOMEM; for (i = 0; i < q_depth; ++i) init_completion(&ctx[i].comp_event); hwc->caller_ctx = ctx; return mana_gd_test_eq(gc, hwc->cq->gdma_eq); } static int mana_hwc_establish_channel(struct gdma_context *gc, uint16_t *q_depth, uint32_t *max_req_msg_size, uint32_t *max_resp_msg_size) { struct hw_channel_context *hwc = gc->hwc.driver_data; struct gdma_queue *rq = hwc->rxq->gdma_wq; struct gdma_queue *sq = hwc->txq->gdma_wq; struct gdma_queue *eq = hwc->cq->gdma_eq; struct gdma_queue *cq = hwc->cq->gdma_cq; int err; init_completion(&hwc->hwc_init_eqe_comp); err = mana_smc_setup_hwc(&gc->shm_channel, false, eq->mem_info.dma_handle, cq->mem_info.dma_handle, rq->mem_info.dma_handle, sq->mem_info.dma_handle, eq->eq.msix_index); if (err) return err; if (wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * hz)) return ETIMEDOUT; *q_depth = hwc->hwc_init_q_depth_max; *max_req_msg_size = hwc->hwc_init_max_req_msg_size; *max_resp_msg_size = hwc->hwc_init_max_resp_msg_size; /* Both were set in mana_hwc_init_event_handler(). */ if (cq->id >= gc->max_num_cqs) { mana_warn(NULL, "invalid cq id %u > %u\n", cq->id, gc->max_num_cqs); return EPROTO; } gc->cq_table = malloc(gc->max_num_cqs * sizeof(struct gdma_queue *), M_DEVBUF, M_WAITOK | M_ZERO); if (!gc->cq_table) return ENOMEM; gc->cq_table[cq->id] = cq; return 0; } static int mana_hwc_init_queues(struct hw_channel_context *hwc, uint16_t q_depth, uint32_t max_req_msg_size, uint32_t max_resp_msg_size) { int err; err = mana_hwc_init_inflight_msg(hwc, q_depth); if (err) return err; /* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ * queue depth and RQ queue depth. */ err = mana_hwc_create_cq(hwc, q_depth * 2, mana_hwc_init_event_handler, hwc, mana_hwc_rx_event_handler, hwc, mana_hwc_tx_event_handler, hwc, &hwc->cq); if (err) { device_printf(hwc->dev, "Failed to create HWC CQ: %d\n", err); goto out; } err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size, hwc->cq, &hwc->rxq); if (err) { device_printf(hwc->dev, "Failed to create HWC RQ: %d\n", err); goto out; } err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size, hwc->cq, &hwc->txq); if (err) { device_printf(hwc->dev, "Failed to create HWC SQ: %d\n", err); goto out; } hwc->num_inflight_msg = q_depth; hwc->max_req_msg_size = max_req_msg_size; return 0; out: /* mana_hwc_create_channel() will do the cleanup.*/ return err; } int mana_hwc_create_channel(struct gdma_context *gc) { uint32_t max_req_msg_size, max_resp_msg_size; struct gdma_dev *gd = &gc->hwc; struct hw_channel_context *hwc; uint16_t q_depth_max; int err; hwc = malloc(sizeof(*hwc), M_DEVBUF, M_WAITOK | M_ZERO); if (!hwc) return ENOMEM; gd->gdma_context = gc; gd->driver_data = hwc; hwc->gdma_dev = gd; hwc->dev = gc->dev; /* HWC's instance number is always 0. */ gd->dev_id.as_uint32 = 0; gd->dev_id.type = GDMA_DEVICE_HWC; gd->pdid = INVALID_PDID; gd->doorbell = INVALID_DOORBELL; /* * mana_hwc_init_queues() only creates the required data structures, * and doesn't touch the HWC device. */ err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, HW_CHANNEL_MAX_REQUEST_SIZE, HW_CHANNEL_MAX_RESPONSE_SIZE); if (err) { device_printf(hwc->dev, "Failed to initialize HWC: %d\n", err); goto out; } err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size, &max_resp_msg_size); if (err) { device_printf(hwc->dev, "Failed to establish HWC: %d\n", err); goto out; } err = mana_hwc_test_channel(gc->hwc.driver_data, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, max_req_msg_size, max_resp_msg_size); if (err) { /* Test failed, but the channel has been established */ device_printf(hwc->dev, "Failed to test HWC: %d\n", err); return EIO; } return 0; out: mana_hwc_destroy_channel(gc); return (err); } void mana_hwc_destroy_channel(struct gdma_context *gc) { struct hw_channel_context *hwc = gc->hwc.driver_data; if (!hwc) return; /* * gc->max_num_cqs is set in mana_hwc_init_event_handler(). If it's * non-zero, the HWC worked and we should tear down the HWC here. */ if (gc->max_num_cqs > 0) { mana_smc_teardown_hwc(&gc->shm_channel, false); gc->max_num_cqs = 0; } free(hwc->caller_ctx, M_DEVBUF); hwc->caller_ctx = NULL; if (hwc->txq) mana_hwc_destroy_wq(hwc, hwc->txq); if (hwc->rxq) mana_hwc_destroy_wq(hwc, hwc->rxq); if (hwc->cq) mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq); mana_gd_free_res_map(&hwc->inflight_msg_res); hwc->num_inflight_msg = 0; hwc->gdma_dev->doorbell = INVALID_DOORBELL; hwc->gdma_dev->pdid = INVALID_PDID; free(hwc, M_DEVBUF); gc->hwc.driver_data = NULL; gc->hwc.gdma_context = NULL; free(gc->cq_table, M_DEVBUF); gc->cq_table = NULL; } int mana_hwc_send_request(struct hw_channel_context *hwc, uint32_t req_len, const void *req, uint32_t resp_len, void *resp) { struct hwc_work_request *tx_wr; struct hwc_wq *txq = hwc->txq; struct gdma_req_hdr *req_msg; struct hwc_caller_ctx *ctx; uint16_t msg_id; int err; mana_hwc_get_msg_index(hwc, &msg_id); tx_wr = &txq->msg_buf->reqs[msg_id]; if (req_len > tx_wr->buf_len) { device_printf(hwc->dev, "HWC: req msg size: %d > %d\n", req_len, tx_wr->buf_len); err = EINVAL; goto out; } ctx = hwc->caller_ctx + msg_id; ctx->output_buf = resp; ctx->output_buflen = resp_len; req_msg = (struct gdma_req_hdr *)tx_wr->buf_va; if (req) memcpy(req_msg, req, req_len); req_msg->req.hwc_msg_id = msg_id; tx_wr->msg_size = req_len; err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false); if (err) { device_printf(hwc->dev, "HWC: Failed to post send WQE: %d\n", err); goto out; } if (wait_for_completion_timeout(&ctx->comp_event, 30 * hz)) { device_printf(hwc->dev, "HWC: Request timed out!\n"); err = ETIMEDOUT; goto out; } if (ctx->error) { err = ctx->error; goto out; } if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { device_printf(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", ctx->status_code); err = EPROTO; goto out; } out: mana_hwc_put_msg_index(hwc, msg_id); return err; }