Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c (revision 279870) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c (revision 279871) @@ -1,1491 +1,1495 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * * Written by: Sreenivasa Honnur * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" +#include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for PCIE_MEM_ACCESS */ #include #include "cxgbei.h" #include "cxgbei_ulp2_ddp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* forward declarations */ struct icl_pdu * icl_pdu_new_empty(struct icl_conn *, int ); void icl_pdu_free(struct icl_pdu *); /* mbuf_tag management functions */ struct ulp_mbuf_cb * get_ulp_mbuf_cb(struct mbuf *m) { struct m_tag *mtag = NULL; mtag = m_tag_get(CXGBE_ISCSI_MBUF_TAG, sizeof(struct ulp_mbuf_cb), M_NOWAIT); if (mtag == NULL) { printf("%s: mtag alloc failed\n", __func__); return NULL; } bzero(mtag + 1, sizeof(struct ulp_mbuf_cb)); m_tag_prepend(m, mtag); return ((struct ulp_mbuf_cb *)(mtag + 1)); } static struct ulp_mbuf_cb * find_ulp_mbuf_cb(struct mbuf *m) { struct m_tag *mtag = NULL; if ((mtag = m_tag_find(m, CXGBE_ISCSI_MBUF_TAG, NULL)) == NULL) return (NULL); return ((struct ulp_mbuf_cb *)(mtag + 1)); } /* * Direct Data Placement - * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted * final destination host-memory buffers based on the Initiator Task Tag (ITT) * in Data-In or Target Task Tag (TTT) in Data-Out PDUs. * The host memory address is programmed into h/w in the format of pagepod * entries. * The location of the pagepod entry is encoded into ddp tag which is used as * the base for ITT/TTT. */ #define T4_DDP #ifdef T4_DDP /* * functions to program the pagepod in h/w */ static void * t4_tdev2ddp(void *tdev) { struct adapter *sc = ((struct toedev *)tdev)->tod_softc; return (sc->iscsi_softc); } static void inline ppod_set(struct pagepod *ppod, struct cxgbei_ulp2_pagepod_hdr *hdr, struct cxgbei_ulp2_gather_list *gl, unsigned int pidx) { int i; memcpy(ppod, hdr, sizeof(*hdr)); for (i = 0; i < (PPOD_PAGES + 1); i++, pidx++) { ppod->addr[i] = pidx < gl->nelem ? cpu_to_be64(gl->dma_sg[pidx].phys_addr) : 0ULL; } } static void inline ppod_clear(struct pagepod *ppod) { memset(ppod, 0, sizeof(*ppod)); } static inline void ulp_mem_io_set_hdr(struct adapter *sc, int tid, struct ulp_mem_io *req, unsigned int wr_len, unsigned int dlen, unsigned int pm_addr) { struct ulptx_idata *idata = (struct ulptx_idata *)(req + 1); INIT_ULPTX_WR(req, wr_len, 0, 0); req->cmd = cpu_to_be32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | V_ULP_MEMIO_ORDER(is_t4(sc)) | V_T5_ULP_MEMIO_IMM(is_t5(sc))); req->dlen = htonl(V_ULP_MEMIO_DATA_LEN(dlen >> 5)); req->len16 = htonl(DIV_ROUND_UP(wr_len - sizeof(req->wr), 16) | V_FW_WR_FLOWID(tid)); req->lock_addr = htonl(V_ULP_MEMIO_ADDR(pm_addr >> 5)); idata->cmd_more = htonl(V_ULPTX_CMD(ULP_TX_SC_IMM)); idata->len = htonl(dlen); } #define PPOD_SIZE sizeof(struct pagepod) #define ULPMEM_IDATA_MAX_NPPODS 1 /* 256/PPOD_SIZE */ #define PCIE_MEMWIN_MAX_NPPODS 16 /* 1024/PPOD_SIZE */ static int ppod_write_idata(struct cxgbei_ulp2_ddp_info *ddp, struct cxgbei_ulp2_pagepod_hdr *hdr, unsigned int idx, unsigned int npods, struct cxgbei_ulp2_gather_list *gl, unsigned int gl_pidx, struct toepcb *toep) { unsigned int dlen = PPOD_SIZE * npods; unsigned int pm_addr = idx * PPOD_SIZE + ddp->llimit; unsigned int wr_len = roundup(sizeof(struct ulp_mem_io) + sizeof(struct ulptx_idata) + dlen, 16); struct ulp_mem_io *req; struct ulptx_idata *idata; struct pagepod *ppod; unsigned int i; struct wrqe *wr; struct adapter *sc = toep->port->adapter; wr = alloc_wrqe(wr_len, toep->ctrlq); if (wr == NULL) { printf("%s: alloc wrqe failed\n", __func__); return ENOMEM; } req = wrtod(wr); memset(req, 0, wr_len); ulp_mem_io_set_hdr(sc, toep->tid, req, wr_len, dlen, pm_addr); idata = (struct ulptx_idata *)(req + 1); ppod = (struct pagepod *)(idata + 1); for (i = 0; i < npods; i++, ppod++, gl_pidx += PPOD_PAGES) { if (!hdr) /* clear the pagepod */ ppod_clear(ppod); else /* set the pagepod */ ppod_set(ppod, hdr, gl, gl_pidx); } t4_wrq_tx(sc, wr); return 0; } static int t4_ddp_set_map(struct cxgbei_ulp2_ddp_info *ddp, void *isockp, struct cxgbei_ulp2_pagepod_hdr *hdr, unsigned int idx, unsigned int npods, struct cxgbei_ulp2_gather_list *gl, int reply) { iscsi_socket *isock = (iscsi_socket *)isockp; struct socket *sk; struct toepcb *toep; struct tcpcb *tp; int err; unsigned int pidx = 0, w_npods = 0, cnt; if (isock == NULL) return EINVAL; sk = isock->sock; tp = so_sototcpcb(sk); toep = tp->t_toe; /* * on T4, if we use a mix of IMMD and DSGL with ULP_MEM_WRITE, * the order would not be garanteed, so we will stick with IMMD */ gl->tid = toep->tid; gl->port_id = toep->port->port_id; gl->egress_dev = (void *)toep->port->ifp; /* send via immediate data */ for (; w_npods < npods; idx += cnt, w_npods += cnt, pidx += PPOD_PAGES) { cnt = npods - w_npods; if (cnt > ULPMEM_IDATA_MAX_NPPODS) cnt = ULPMEM_IDATA_MAX_NPPODS; err = ppod_write_idata(ddp, hdr, idx, cnt, gl, pidx, toep); if (err) { printf("%s: ppod_write_idata failed\n", __func__); break; } } return err; } static void t4_ddp_clear_map(struct cxgbei_ulp2_ddp_info *ddp, struct cxgbei_ulp2_gather_list *gl, unsigned int tag, unsigned int idx, unsigned int npods, iscsi_socket *isock) { struct socket *sk; struct toepcb *toep; struct tcpcb *tp; int err = -1; sk = isock->sock; tp = so_sototcpcb(sk); toep = tp->t_toe; /* send via immediate data */ unsigned int pidx = 0; unsigned int w_npods = 0; unsigned int cnt; for (; w_npods < npods; idx += cnt, w_npods += cnt, pidx += PPOD_PAGES) { cnt = npods - w_npods; if (cnt > ULPMEM_IDATA_MAX_NPPODS) cnt = ULPMEM_IDATA_MAX_NPPODS; err = ppod_write_idata(ddp, NULL, idx, cnt, gl, 0, toep); if (err) break; } } #endif /* * cxgbei device management * maintains a list of the cxgbei devices */ typedef struct offload_device { SLIST_ENTRY(offload_device) link; unsigned char d_version; unsigned char d_tx_hdrlen; /* CPL_TX_DATA, < 256 */ unsigned char d_ulp_rx_datagap; /* for coalesced iscsi msg */ unsigned char filler; unsigned int d_flag; unsigned int d_payload_tmax; unsigned int d_payload_rmax; struct cxgbei_ulp2_tag_format d_tag_format; void *d_tdev; void *d_pdev; void* (*tdev2ddp)(void *tdev); }offload_device; SLIST_HEAD(, offload_device) odev_list; static void t4_unregister_cpl_handler_with_tom(struct adapter *sc); static offload_device * offload_device_new(void *tdev) { offload_device *odev = NULL; odev = malloc(sizeof(struct offload_device), M_CXGBE, M_NOWAIT | M_ZERO); if (odev) { odev->d_tdev = tdev; SLIST_INSERT_HEAD(&odev_list, odev, link); } return odev; } static offload_device * offload_device_find(struct toedev *tdev) { offload_device *odev = NULL; if (!SLIST_EMPTY(&odev_list)) { SLIST_FOREACH(odev, &odev_list, link) { if (odev->d_tdev == tdev) break; } } return odev; } static void cxgbei_odev_cleanup(offload_device *odev) { struct toedev *tdev = odev->d_tdev; struct adapter *sc = (struct adapter *)tdev->tod_softc; /* de-register ULP CPL handlers with TOM */ t4_unregister_cpl_handler_with_tom(sc); if (odev->d_flag & ODEV_FLAG_ULP_DDP_ENABLED) { if (sc->iscsi_softc) cxgbei_ulp2_ddp_cleanup( (struct cxgbei_ulp2_ddp_info **)&sc->iscsi_softc); } return; } static void offload_device_remove() { offload_device *odev = NULL, *next = NULL; if (SLIST_EMPTY(&odev_list)) return; for (odev = SLIST_FIRST(&odev_list); odev != NULL; odev = next) { SLIST_REMOVE(&odev_list, odev, offload_device, link); next = SLIST_NEXT(odev, link); cxgbei_odev_cleanup(odev); free(odev, M_CXGBE); } return; } static int cxgbei_map_sg(cxgbei_sgl *sgl, struct ccb_scsiio *csio) { unsigned int data_len = csio->dxfer_len; unsigned int sgoffset = (uint64_t)csio->data_ptr & PAGE_MASK; unsigned int nsge; unsigned char *sgaddr = csio->data_ptr; unsigned int len = 0; nsge = (csio->dxfer_len + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT; sgl->sg_addr = sgaddr; sgl->sg_offset = sgoffset; if (data_len < (PAGE_SIZE - sgoffset)) len = data_len; else len = PAGE_SIZE - sgoffset; sgl->sg_length = len; data_len -= len; sgaddr += len; sgl = sgl+1; while (data_len > 0) { sgl->sg_addr = sgaddr; len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE; sgl->sg_length = len; sgaddr += len; data_len -= len; sgl = sgl + 1; } return nsge; } static int cxgbei_map_sg_tgt(cxgbei_sgl *sgl, union ctl_io *io) { unsigned int data_len, sgoffset, nsge; unsigned char *sgaddr; unsigned int len = 0, index = 0, ctl_sg_count, i; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } sgaddr = sgl->sg_addr = ctl_sglist[index].addr; sgoffset = sgl->sg_offset = (uint64_t)sgl->sg_addr & PAGE_MASK; data_len = ctl_sglist[index].len; if (data_len < (PAGE_SIZE - sgoffset)) len = data_len; else len = PAGE_SIZE - sgoffset; sgl->sg_length = len; data_len -= len; sgaddr += len; sgl = sgl+1; len = 0; for (i = 0; i< ctl_sg_count; i++) len += ctl_sglist[i].len; nsge = (len + sgoffset + PAGE_SIZE -1) >> PAGE_SHIFT; while (data_len > 0) { sgl->sg_addr = sgaddr; len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE; sgl->sg_length = len; sgaddr += len; data_len -= len; sgl = sgl + 1; if (data_len == 0) { if (index == ctl_sg_count - 1) break; index++; sgaddr = ctl_sglist[index].addr; data_len = ctl_sglist[index].len; } } return nsge; } static int t4_sk_ddp_tag_reserve(iscsi_socket *isock, unsigned int xferlen, cxgbei_sgl *sgl, unsigned int sgcnt, unsigned int *ddp_tag) { offload_device *odev = isock->s_odev; struct toedev *tdev = odev->d_tdev; struct cxgbei_ulp2_gather_list *gl; int err = -EINVAL; struct adapter *sc = tdev->tod_softc; struct cxgbei_ulp2_ddp_info *ddp; ddp = (struct cxgbei_ulp2_ddp_info *)sc->iscsi_softc; if (ddp == NULL) return ENOMEM; gl = cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(xferlen, sgl, sgcnt, odev->d_tdev, 0); if (gl) { err = cxgbei_ulp2_ddp_tag_reserve(odev->tdev2ddp(tdev), isock, isock->s_tid, &odev->d_tag_format, ddp_tag, gl, 0, 0); if (err) { CTR1(KTR_CXGBE, "%s: ddp_tag_reserve failed\n", __func__); cxgbei_ulp2_ddp_release_gl(gl, odev->d_tdev); } } return err; } static unsigned int cxgbei_task_reserve_itt(struct icl_conn *ic, void **prv, struct ccb_scsiio *scmd, unsigned int *itt) { int xferlen = scmd->dxfer_len; cxgbei_task_data *tdata = NULL; cxgbei_sgl *sge = NULL; struct socket *so = ic->ic_socket; iscsi_socket *isock = (iscsi_socket *)(so)->so_emuldata; int err = -1; offload_device *odev = isock->s_odev; tdata = (cxgbei_task_data *)*prv; if ((xferlen == 0) || (tdata == NULL)) { goto out; } if (xferlen < DDP_THRESHOLD) goto out; if ((scmd->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { tdata->nsge = cxgbei_map_sg(tdata->sgl, scmd); if (tdata->nsge == 0) { CTR1(KTR_CXGBE, "%s: map_sg failed\n", __func__); return 0; } sge = tdata->sgl; tdata->sc_ddp_tag = *itt; CTR3(KTR_CXGBE, "%s: *itt:0x%x sc_ddp_tag:0x%x\n", __func__, *itt, tdata->sc_ddp_tag); if (cxgbei_ulp2_sw_tag_usable(&odev->d_tag_format, tdata->sc_ddp_tag)) { err = t4_sk_ddp_tag_reserve(isock, scmd->dxfer_len, sge, tdata->nsge, &tdata->sc_ddp_tag); } else { CTR3(KTR_CXGBE, "%s: itt:0x%x sc_ddp_tag:0x%x not usable\n", __func__, *itt, tdata->sc_ddp_tag); } } out: if (err < 0) tdata->sc_ddp_tag = cxgbei_ulp2_set_non_ddp_tag(&odev->d_tag_format, *itt); return tdata->sc_ddp_tag; } static unsigned int cxgbei_task_reserve_ttt(struct icl_conn *ic, void **prv, union ctl_io *io, unsigned int *ttt) { struct socket *so = ic->ic_socket; iscsi_socket *isock = (iscsi_socket *)(so)->so_emuldata; cxgbei_task_data *tdata = NULL; offload_device *odev = isock->s_odev; int xferlen, err = -1; cxgbei_sgl *sge = NULL; xferlen = (io->scsiio.kern_data_len - io->scsiio.ext_data_filled); tdata = (cxgbei_task_data *)*prv; if ((xferlen == 0) || (tdata == NULL)) goto out; if (xferlen < DDP_THRESHOLD) goto out; tdata->nsge = cxgbei_map_sg_tgt(tdata->sgl, io); if (tdata->nsge == 0) { CTR1(KTR_CXGBE, "%s: map_sg failed\n", __func__); return 0; } sge = tdata->sgl; tdata->sc_ddp_tag = *ttt; if (cxgbei_ulp2_sw_tag_usable(&odev->d_tag_format, tdata->sc_ddp_tag)) { err = t4_sk_ddp_tag_reserve(isock, xferlen, sge, tdata->nsge, &tdata->sc_ddp_tag); } else { CTR2(KTR_CXGBE, "%s: sc_ddp_tag:0x%x not usable\n", __func__, tdata->sc_ddp_tag); } out: if (err < 0) tdata->sc_ddp_tag = cxgbei_ulp2_set_non_ddp_tag(&odev->d_tag_format, *ttt); return tdata->sc_ddp_tag; } static int t4_sk_ddp_tag_release(iscsi_socket *isock, unsigned int ddp_tag) { offload_device *odev = isock->s_odev; struct toedev *tdev = odev->d_tdev; cxgbei_ulp2_ddp_tag_release(odev->tdev2ddp(tdev), ddp_tag, isock); return 0; } #ifdef T4_DDP static struct cxgbei_ulp2_ddp_info * t4_ddp_init(struct ifnet *dev, struct toedev *tdev) { struct cxgbei_ulp2_ddp_info *ddp; struct adapter *sc = tdev->tod_softc; struct ulp_iscsi_info uinfo; memset(&uinfo, 0, sizeof(struct ulp_iscsi_info)); uinfo.llimit = sc->vres.iscsi.start; uinfo.ulimit = sc->vres.iscsi.start + sc->vres.iscsi.size - 1; uinfo.max_rxsz = uinfo.max_txsz = G_MAXRXDATA(t4_read_reg(sc, A_TP_PARA_REG2)); if (sc->vres.iscsi.size == 0) { printf("%s: iSCSI capabilities not enabled.\n", __func__); return NULL; } printf("T4, ddp 0x%x ~ 0x%x, size %u, iolen %u, ulpddp:0x%p\n", uinfo.llimit, uinfo.ulimit, sc->vres.iscsi.size, uinfo.max_rxsz, sc->iscsi_softc); cxgbei_ulp2_ddp_init((void *)tdev, (struct cxgbei_ulp2_ddp_info **)&sc->iscsi_softc, &uinfo); ddp = (struct cxgbei_ulp2_ddp_info *)sc->iscsi_softc; if (ddp) { unsigned int pgsz_order[4]; int i; for (i = 0; i < 4; i++) pgsz_order[i] = uinfo.pgsz_factor[i]; t4_iscsi_init(dev, uinfo.tagmask, pgsz_order); ddp->ddp_set_map = t4_ddp_set_map; ddp->ddp_clear_map = t4_ddp_clear_map; } return ddp; } #endif static struct socket * cpl_find_sock(struct adapter *sc, unsigned int hwtid) { struct socket *sk; struct toepcb *toep = lookup_tid(sc, hwtid); struct inpcb *inp = toep->inp; INP_WLOCK(inp); sk = inp->inp_socket; INP_WUNLOCK(inp); if (sk == NULL) CTR2(KTR_CXGBE, "%s: T4 CPL tid 0x%x, sk NULL.\n", __func__, hwtid); return sk; } static void process_rx_iscsi_hdr(struct socket *sk, struct mbuf *m) { struct tcpcb *tp = so_sototcpcb(sk); struct toepcb *toep = tp->t_toe; struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); struct ulp_mbuf_cb *cb, *lcb; struct mbuf *lmbuf; unsigned char *byte; iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; unsigned int hlen, dlen, plen; if (isock == NULL) goto err_out; if (toep == NULL) goto err_out; if ((m->m_flags & M_PKTHDR) == 0) { printf("%s: m:%p no M_PKTHDR can't allocate m_tag\n", __func__, m); goto err_out; } mtx_lock(&isock->iscsi_rcv_mbufq.lock); /* allocate m_tag to hold ulp info */ cb = get_ulp_mbuf_cb(m); if (cb == NULL) { printf("%s: Error allocation m_tag\n", __func__); goto err_out1; } cb->seq = ntohl(cpl->seq); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); /* figure out if this is the pdu header or data */ cb->ulp_mode = ULP_MODE_ISCSI; if (isock->mbuf_ulp_lhdr == NULL) { iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; isock->mbuf_ulp_lhdr = lmbuf = m; lcb = cb; cb->flags = SBUF_ULP_FLAG_HDR_RCVD | SBUF_ULP_FLAG_COALESCE_OFF; /* we only update tp->rcv_nxt once per pdu */ if (cb->seq != tp->rcv_nxt) { CTR3(KTR_CXGBE, "tid 0x%x, CPL_ISCSI_HDR, BAD seq got 0x%x exp 0x%x.\n", toep->tid, cb->seq, tp->rcv_nxt); goto err_out1; } byte = m->m_data; hlen = ntohs(cpl->len); dlen = ntohl(*(unsigned int *)(byte + 4)) & 0xFFFFFF; plen = ntohs(cpl->pdu_len_ddp); lcb->ulp.iscsi.pdulen = (hlen + dlen + 3) & (~0x3); /* workaround for cpl->pdu_len_ddp since it does not include the data digest count */ if (dlen) lcb->ulp.iscsi.pdulen += isock->s_dcrc_len; tp->rcv_nxt += lcb->ulp.iscsi.pdulen; if (tp->rcv_wnd <= lcb->ulp.iscsi.pdulen) CTR3(KTR_CXGBE, "%s: Neg rcv_wnd:0x%lx pdulen:0x%x\n", __func__, tp->rcv_wnd, lcb->ulp.iscsi.pdulen); tp->rcv_wnd -= lcb->ulp.iscsi.pdulen; tp->t_rcvtime = ticks; } else { lmbuf = isock->mbuf_ulp_lhdr; lcb = find_ulp_mbuf_cb(lmbuf); if (lcb == NULL) { printf("%s: lmbuf:%p lcb is NULL\n", __func__, lmbuf); goto err_out1; } lcb->flags |= SBUF_ULP_FLAG_DATA_RCVD | SBUF_ULP_FLAG_COALESCE_OFF; cb->flags = SBUF_ULP_FLAG_DATA_RCVD; /* padding */ if ((m->m_len % 4) != 0) { m->m_len += 4 - (m->m_len % 4); } } mbufq_tail(&isock->iscsi_rcv_mbufq, m); mtx_unlock(&isock->iscsi_rcv_mbufq.lock); return; err_out1: mtx_unlock(&isock->iscsi_rcv_mbufq.lock); err_out: m_freem(m); return; } /* hand over received PDU to iscsi_initiator */ static void iscsi_conn_receive_pdu(struct iscsi_socket *isock) { struct icl_pdu *response = NULL; struct icl_conn *ic = (struct icl_conn*)isock->s_conn; struct mbuf *m; struct ulp_mbuf_cb *cb = NULL; int data_len; response = icl_pdu_new_empty(isock->s_conn, M_NOWAIT); if (response == NULL) { panic("%s: failed to alloc icl_pdu\n", __func__); return; } m = mbufq_peek(&isock->iscsi_rcv_mbufq); if (m) { cb = find_ulp_mbuf_cb(m); if (cb == NULL) { panic("%s: m:%p cb is NULL\n", __func__, m); goto err_out; } if (!(cb->flags & SBUF_ULP_FLAG_STATUS_RCVD)) goto err_out; } /* BHS */ mbufq_dequeue(&isock->iscsi_rcv_mbufq); data_len = cb->ulp.iscsi.pdulen; CTR5(KTR_CXGBE, "%s: response:%p m:%p m_len:%d data_len:%d\n", __func__, response, m, m->m_len, data_len); response->ip_bhs_mbuf = m; response->ip_bhs = mtod(response->ip_bhs_mbuf, struct iscsi_bhs *); /* data */ if (cb->flags & SBUF_ULP_FLAG_DATA_RCVD) { m = mbufq_peek(&isock->iscsi_rcv_mbufq); if (m == NULL) { CTR1(KTR_CXGBE, "%s:No Data\n", __func__); goto err_out; } mbufq_dequeue(&isock->iscsi_rcv_mbufq); response->ip_data_mbuf = m; response->ip_data_len += response->ip_data_mbuf->m_len; } else { /* Data is DDP'ed */ response->ip_ofld_prv0 = 1; } (ic->ic_receive)(response); return; err_out: icl_pdu_free(response); return; } static void process_rx_data_ddp(struct socket *sk, void *m) { struct cpl_rx_data_ddp *cpl = (struct cpl_rx_data_ddp *)m; struct tcpcb *tp = so_sototcpcb(sk); struct toepcb *toep = tp->t_toe; struct inpcb *inp = toep->inp; struct mbuf *lmbuf; struct ulp_mbuf_cb *lcb, *lcb1; unsigned int val, pdulen; iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; if (isock == NULL) return; if (isock->mbuf_ulp_lhdr == NULL) { panic("%s: tid 0x%x, rcv RX_DATA_DDP w/o pdu header.\n", __func__, toep->tid); return; } mtx_lock(&isock->iscsi_rcv_mbufq.lock); lmbuf = isock->mbuf_ulp_lhdr; if (lmbuf->m_nextpkt) { lcb1 = find_ulp_mbuf_cb(lmbuf->m_nextpkt); lcb1->flags |= SBUF_ULP_FLAG_STATUS_RCVD; } lcb = find_ulp_mbuf_cb(isock->mbuf_ulp_lhdr); if (lcb == NULL) { CTR2(KTR_CXGBE, "%s: mtag NULL lmbuf :%p\n", __func__, lmbuf); mtx_unlock(&isock->iscsi_rcv_mbufq.lock); return; } lcb->flags |= SBUF_ULP_FLAG_STATUS_RCVD; isock->mbuf_ulp_lhdr = NULL; if (ntohs(cpl->len) != lcb->ulp.iscsi.pdulen) { CTR3(KTR_CXGBE, "tid 0x%x, RX_DATA_DDP pdulen %u != %u.\n", toep->tid, ntohs(cpl->len), lcb->ulp.iscsi.pdulen); CTR4(KTR_CXGBE, "%s: lmbuf:%p lcb:%p lcb->flags:0x%x\n", __func__, lmbuf, lcb, lcb->flags); } lcb->ulp.iscsi.ddigest = ntohl(cpl->ulp_crc); pdulen = lcb->ulp.iscsi.pdulen; val = ntohl(cpl->ddpvld); if (val & F_DDP_PADDING_ERR) lcb->flags |= SBUF_ULP_FLAG_PAD_ERROR; if (val & F_DDP_HDRCRC_ERR) lcb->flags |= SBUF_ULP_FLAG_HCRC_ERROR; if (val & F_DDP_DATACRC_ERR) lcb->flags |= SBUF_ULP_FLAG_DCRC_ERROR; if (!(lcb->flags & SBUF_ULP_FLAG_DATA_RCVD)) { lcb->flags |= SBUF_ULP_FLAG_DATA_DDPED; } #ifdef __T4_DBG_DDP_FAILURE__ // else { unsigned char *bhs = lmbuf->m_data; unsigned char opcode = bhs[0]; unsigned int dlen = ntohl(*(unsigned int *)(bhs + 4)) & 0xFFFFFF; unsigned int ttt = ntohl(*(unsigned int *)(bhs + 20)); unsigned int offset = ntohl(*(unsigned int *)(bhs + 40)); if (dlen >= 2096) { /* data_out and should be ddp'ed */ if ((opcode & 0x3F) == 0x05 && ttt != 0xFFFFFFFF) { printf("CPL_RX_DATA_DDP: tid 0x%x, data-out %s ddp'ed\ (%u+%u), ttt 0x%x, seq 0x%x, ddpvld 0x%x.\n", toep->tid, (lcb->flags & SBUF_ULP_FLAG_DATA_DDPED) ? "IS" : "NOT", offset, dlen, ttt, ntohl(cpl->seq), ntohl(cpl->ddpvld)); } if ((opcode & 0x3F) == 0x25) { //if (!(lcb->flags & SBUF_ULP_FLAG_DATA_DDPED)) printf("CPL_RX_DATA_DDP: tid 0x%x, data-in %s ddp'ed\ (%u+%u), seq 0x%x, ddpvld 0x%x.\n", toep->tid, (lcb->flags & SBUF_ULP_FLAG_DATA_DDPED) ? "IS" : "NOT", offset, dlen, ntohl(cpl->seq), ntohl(cpl->ddpvld)); } } } #endif iscsi_conn_receive_pdu(isock); mtx_unlock(&isock->iscsi_rcv_mbufq.lock); /* update rx credits */ INP_WLOCK(inp); SOCK_LOCK(sk); toep->sb_cc += pdulen; SOCK_UNLOCK(sk); CTR4(KTR_CXGBE, "sk:%p sb_cc 0x%x, rcv_nxt 0x%x rcv_wnd:0x%lx.\n", sk, toep->sb_cc, tp->rcv_nxt, tp->rcv_wnd); t4_rcvd(&toep->td->tod, tp); INP_WUNLOCK(inp); return; } static void drop_fw_acked_ulp_data(struct socket *sk, struct toepcb *toep, int len) { struct mbuf *m, *next; struct ulp_mbuf_cb *cb; iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; struct icl_pdu *req; if (len == 0 || (isock == NULL)) return; mtx_lock(&isock->ulp2_wrq.lock); while (len > 0) { m = mbufq_dequeue(&isock->ulp2_wrq); if(m == NULL) break; for(next = m; next !=NULL; next = next->m_next) len -= next->m_len; cb = find_ulp_mbuf_cb(m); if (cb && isock && cb->pdu) { req = (struct icl_pdu *)cb->pdu; req->ip_bhs_mbuf = NULL; icl_pdu_free(req); } m_freem(m); } mtx_unlock(&isock->ulp2_wrq.lock); return; } static void process_fw4_ack(struct socket *sk, int *plen) { struct tcpcb *tp = so_sototcpcb(sk); struct toepcb *toep = tp->t_toe; drop_fw_acked_ulp_data(sk, toep, *plen); return; } static int do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { return 0; } static int do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct socket *sk; struct adapter *sc = iq->adapter; struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); sk = cpl_find_sock(sc, GET_TID(cpl)); if (sk == NULL) return CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE; process_rx_iscsi_hdr(sk, m); return 0; } static int do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { return 0; } static int do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct socket *sk; struct adapter *sc; const struct cpl_rx_iscsi_ddp *cpl = (const void *)(rss + 1); if (iq == NULL) return 0; sc = iq->adapter; if (sc == NULL) return 0; sk = cpl_find_sock(sc, GET_TID(cpl)); if (sk == NULL) return CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE; process_rx_data_ddp(sk, (void *)cpl); return 0; } static int t4_ulp_mbuf_push(struct socket *so, struct mbuf *m) { struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct inpcb *inp = so_sotoinpcb(so); iscsi_socket *isock = (iscsi_socket *)(so)->so_emuldata;; if (isock == NULL) { m_freem(m); return EINVAL; } /* append mbuf to ULP queue */ mtx_lock(&isock->ulp2_writeq.lock); mbufq_tail(&isock->ulp2_writeq, m); mtx_unlock(&isock->ulp2_writeq.lock); INP_WLOCK(inp); t4_ulp_push_frames(toep->td->tod.tod_softc, toep, 0); INP_WUNLOCK(inp); return 0; } static struct mbuf * iscsi_queue_handler_callback(struct socket *sk, unsigned int cmd, int *qlen) { iscsi_socket *isock; struct mbuf *m0 = NULL; if (sk == NULL) return NULL; isock = (iscsi_socket *)(sk)->so_emuldata; if (isock == NULL) return NULL; switch (cmd) { case 0:/* PEEK */ m0 = mbufq_peek(&isock->ulp2_writeq); break; case 1:/* QUEUE_LEN */ *qlen = mbufq_len(&isock->ulp2_writeq); m0 = mbufq_peek(&isock->ulp2_writeq); break; case 2:/* DEQUEUE */ mtx_lock(&isock->ulp2_writeq.lock); m0 = mbufq_dequeue(&isock->ulp2_writeq); mtx_unlock(&isock->ulp2_writeq.lock); mtx_lock(&isock->ulp2_wrq.lock); mbufq_tail(&isock->ulp2_wrq, m0); mtx_unlock(&isock->ulp2_wrq.lock); m0 = mbufq_peek(&isock->ulp2_writeq); break; } return m0; } static void iscsi_cpl_handler_callback(struct tom_data *td, struct socket *sk, void *m, unsigned int op) { if ((sk == NULL) || (sk->so_emuldata == NULL)) return; switch (op) { case CPL_ISCSI_HDR: process_rx_iscsi_hdr(sk, m); break; case CPL_RX_DATA_DDP: process_rx_data_ddp(sk, m); break; case CPL_SET_TCB_RPL: break; case CPL_FW4_ACK: process_fw4_ack(sk, m); break; default: CTR2(KTR_CXGBE, "sk 0x%p, op 0x%x from TOM, NOT supported.\n", sk, op); break; } } static void t4_register_cpl_handler_with_tom(struct adapter *sc) { t4tom_register_cpl_iscsi_callback(iscsi_cpl_handler_callback); t4tom_register_queue_iscsi_callback(iscsi_queue_handler_callback); t4_register_cpl_handler(sc, CPL_ISCSI_HDR, do_rx_iscsi_hdr); t4_register_cpl_handler(sc, CPL_ISCSI_DATA, do_rx_iscsi_hdr); t4tom_cpl_handler_register_flag |= 1 << TOM_CPL_ISCSI_HDR_REGISTERED_BIT; if (!t4tom_cpl_handler_registered(sc, CPL_SET_TCB_RPL)) { t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl); t4tom_cpl_handler_register_flag |= 1 << TOM_CPL_SET_TCB_RPL_REGISTERED_BIT; CTR0(KTR_CXGBE, "register t4 cpl handler CPL_SET_TCB_RPL.\n"); } t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp); if (!t4tom_cpl_handler_registered(sc, CPL_RX_DATA_DDP)) { t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, do_rx_data_ddp); t4tom_cpl_handler_register_flag |= 1 << TOM_CPL_RX_DATA_DDP_REGISTERED_BIT; CTR0(KTR_CXGBE, "register t4 cpl handler CPL_RX_DATA_DDP.\n"); } } static void t4_unregister_cpl_handler_with_tom(struct adapter *sc) { /* de-register CPL handles */ t4tom_register_cpl_iscsi_callback(NULL); t4tom_register_queue_iscsi_callback(NULL); if (t4tom_cpl_handler_register_flag & (1 << TOM_CPL_ISCSI_HDR_REGISTERED_BIT)) { t4_register_cpl_handler(sc, CPL_ISCSI_HDR, NULL); t4_register_cpl_handler(sc, CPL_ISCSI_DATA, NULL); } if (t4tom_cpl_handler_register_flag & (1 << TOM_CPL_SET_TCB_RPL_REGISTERED_BIT)) t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, NULL); t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, NULL); if (t4tom_cpl_handler_register_flag & (1 << TOM_CPL_RX_DATA_DDP_REGISTERED_BIT)) t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, NULL); } static int send_set_tcb_field(struct socket *sk, u16 word, u64 mask, u64 val, int no_reply) { struct wrqe *wr; struct cpl_set_tcb_field *req; struct inpcb *inp = sotoinpcb(sk); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return EINVAL; req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); req->reply_ctrl = htobe16(V_NO_REPLY(no_reply) | V_QUEUENO(toep->ofld_rxq->iq.abs_id)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); req->mask = htobe64(mask); req->val = htobe64(val); t4_wrq_tx(toep->td->tod.tod_softc, wr); return 0; } static int cxgbei_set_ulp_mode(struct socket *so, struct toepcb *toep, unsigned char hcrc, unsigned char dcrc) { int rv = 0, val = 0; toep->ulp_mode = ULP_MODE_ISCSI; if (hcrc) val |= ULP_CRC_HEADER; if (dcrc) val |= ULP_CRC_DATA; val <<= 4; val |= ULP_MODE_ISCSI; rv = send_set_tcb_field(so, 0, 0xfff, val, 0); return rv; } static offload_device * add_cxgbei_dev(struct ifnet *dev, struct toedev *tdev) { #ifdef T4_DDP struct cxgbei_ulp2_ddp_info *ddp; #endif offload_device *odev = NULL; odev = offload_device_new(tdev); if (odev == NULL) { printf("%s: odev is NULL\n", __func__); return odev; } printf("%s:New T4 %s, tdev 0x%p, odev 0x%p.\n", __func__, dev->if_xname, tdev, odev); odev->d_tdev = tdev; odev->d_ulp_rx_datagap = sizeof(struct cpl_iscsi_hdr_no_rss); odev->d_flag = ODEV_FLAG_ULP_CRC_ENABLED; #ifdef T4_DDP odev->tdev2ddp = t4_tdev2ddp; ddp = t4_ddp_init(dev, tdev); if (ddp) { printf("T4 %s, odev 0x%p, ddp 0x%p initialized.\n", dev->if_xname, odev, ddp); odev->d_flag |= ODEV_FLAG_ULP_DDP_ENABLED; cxgbei_ulp2_adapter_ddp_info(ddp, (struct cxgbei_ulp2_tag_format *)&odev->d_tag_format, &odev->d_payload_tmax, &odev->d_payload_rmax); } #endif return odev; } /* initiator */ void cxgbei_conn_task_reserve_itt(void *conn, void **prv, void *scmd, unsigned int *itt) { unsigned int tag; tag = cxgbei_task_reserve_itt(conn, prv, scmd, itt); if (tag) *itt = htonl(tag); return; } /* target */ void cxgbei_conn_transfer_reserve_ttt(void *conn, void **prv, void *scmd, unsigned int *ttt) { unsigned int tag; tag = cxgbei_task_reserve_ttt(conn, prv, scmd, ttt); if (tag) *ttt = htonl(tag); return; } void cxgbei_cleanup_task(void *conn, void *ofld_priv) { struct icl_conn *ic = (struct icl_conn *)conn; cxgbei_task_data *tdata = NULL; struct socket *so = NULL; iscsi_socket *isock = NULL; offload_device *odev = NULL; if (ic->ic_socket == NULL) return; so = ic->ic_socket; isock = (iscsi_socket *)(so)->so_emuldata; if (isock == NULL) return; odev = isock->s_odev; tdata = (cxgbei_task_data *)(ofld_priv); if (tdata == NULL) return; if (cxgbei_ulp2_is_ddp_tag(&odev->d_tag_format, tdata->sc_ddp_tag)) t4_sk_ddp_tag_release(isock, tdata->sc_ddp_tag); memset(tdata, 0, sizeof(*tdata)); return; } static void t4_sk_tx_mbuf_setmode(struct icl_pdu *req, void *toep, void *mbuf, unsigned char mode, unsigned char hcrc, unsigned char dcrc) { struct mbuf *m = (struct mbuf *)mbuf; struct ulp_mbuf_cb *cb; cb = get_ulp_mbuf_cb(m); if (cb == NULL) return; cb->ulp_mode = ULP_MODE_ISCSI << 4; if (hcrc) cb->ulp_mode |= 1; if (dcrc) cb->ulp_mode |= 2; cb->pdu = req; return; } int cxgbei_conn_xmit_pdu(void *conn, void *ioreq) { struct icl_conn *ic = (struct icl_conn *)conn; struct icl_pdu *req = (struct icl_pdu *)ioreq; struct mbuf *m = req->ip_bhs_mbuf; struct socket *so = ic->ic_socket; struct tcpcb *tp = so_sototcpcb(so); t4_sk_tx_mbuf_setmode(req, tp->t_toe, m, 2, ic->ic_header_crc32c ? ISCSI_HEADER_DIGEST_SIZE : 0, (req->ip_data_len && ic->ic_data_crc32c) ? ISCSI_DATA_DIGEST_SIZE : 0); t4_ulp_mbuf_push(ic->ic_socket, m); return 0; } /* called from host iscsi, socket is passed as argument */ int cxgbei_conn_set_ulp_mode(struct socket *so, void *conn) { struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct adapter *sc = NULL; struct toedev *tdev = NULL; iscsi_socket *isock = NULL; struct ifnet *ifp = NULL; unsigned int tid = toep->tid; offload_device *odev = NULL; struct icl_conn *ic = (struct icl_conn*)conn; if (toep == NULL) return EINVAL; ifp = toep->port->ifp; if (ifp == NULL) return EINVAL; if (!(sototcpcb(so)->t_flags & TF_TOE) || !(ifp->if_capenable & IFCAP_TOE)) { printf("%s: TOE not enabled on:%s\n", __func__, ifp->if_xname); return EINVAL; } /* if ULP_MODE is set by TOE driver, treat it as non-offloaded */ if (toep->ulp_mode) { CTR3(KTR_CXGBE, "%s: T4 sk 0x%p, ulp mode already set 0x%x.\n", __func__, so, toep->ulp_mode); return EINVAL; } sc = toep->port->adapter; tdev = &toep->td->tod; /* if toe dev is not set, treat it as non-offloaded */ if (tdev == NULL) { CTR2(KTR_CXGBE, "%s: T4 sk 0x%p, tdev NULL.\n", __func__, so); return EINVAL; } isock = (iscsi_socket *)malloc(sizeof(iscsi_socket), M_CXGBE, M_NOWAIT | M_ZERO); if (isock == NULL) { printf("%s: T4 sk 0x%p, isock alloc failed.\n", __func__, so); return EINVAL; } isock->mbuf_ulp_lhdr = NULL; isock->sock = so; isock->s_conn = conn; so->so_emuldata = isock; mtx_init(&isock->iscsi_rcv_mbufq.lock,"isock_lock" , NULL, MTX_DEF); mtx_init(&isock->ulp2_wrq.lock,"ulp2_wrq lock" , NULL, MTX_DEF); mtx_init(&isock->ulp2_writeq.lock,"ulp2_writeq lock" , NULL, MTX_DEF); CTR6(KTR_CXGBE, "%s: sc:%p toep:%p iscsi_start:0x%x iscsi_size:0x%x caps:%d.\n", __func__, sc, toep, sc->vres.iscsi.start, sc->vres.iscsi.size, sc->iscsicaps); /* * Register ULP CPL handlers with TOM * Register CPL_RX_ISCSI_HDR, CPL_RX_DATA_DDP callbacks with TOM */ t4_register_cpl_handler_with_tom(sc); /* * DDP initialization. Once for each tdev * check if DDP is already configured for this tdev */ odev = offload_device_find(tdev); if (odev == NULL) /* for each tdev we have a corresponding odev */ { if ((odev = add_cxgbei_dev(ifp, tdev)) == NULL) { CTR3(KTR_CXGBE, "T4 sk 0x%p, tdev %s, 0x%p, odev NULL.\n", so, ifp->if_xname, tdev); return EINVAL; } } CTR3(KTR_CXGBE, "tdev:%p sc->iscsi_softc:%p odev:%p\n", tdev, sc->iscsi_softc, odev); isock->s_odev = odev; isock->s_tid = tid; isock->s_rmax = odev->d_payload_rmax; isock->s_tmax = odev->d_payload_tmax; /* XXX cap the xmit pdu size to be 12K for now until f/w is ready */ if (isock->s_tmax > (12288 + ISCSI_PDU_NONPAYLOAD_LEN)) isock->s_tmax = 12288 + ISCSI_PDU_NONPAYLOAD_LEN; /* set toe DDP off */ so->so_options |= SO_NO_DDP; /* Move connection to ULP mode, SET_TCB_FIELD */ cxgbei_set_ulp_mode(so, toep, ic->ic_header_crc32c, ic->ic_data_crc32c); isock->s_hcrc_len = (ic->ic_header_crc32c ? 4 : 0); isock->s_dcrc_len = (ic->ic_data_crc32c ? 4 : 0); return 0; } int cxgbei_conn_close(struct socket *so) { iscsi_socket *isock = NULL; isock = (iscsi_socket *)(so)->so_emuldata; struct mbuf *m; struct ulp_mbuf_cb *cb; struct icl_pdu *req; so->so_emuldata = NULL; /* free isock Qs */ while ((m = mbufq_dequeue(&isock->iscsi_rcv_mbufq)) != NULL) m_freem(m); while ((m = mbufq_dequeue(&isock->ulp2_writeq)) != NULL) m_freem(m); mtx_lock(&isock->ulp2_wrq.lock); while ((m = mbufq_dequeue(&isock->ulp2_wrq)) != NULL) { cb = find_ulp_mbuf_cb(m); if (cb && isock && cb->pdu) { req = (struct icl_pdu *)cb->pdu; req->ip_bhs_mbuf = NULL; icl_pdu_free(req); } m_freem(m); } mtx_unlock(&isock->ulp2_wrq.lock); if (mtx_initialized(&isock->iscsi_rcv_mbufq.lock)) mtx_destroy(&isock->iscsi_rcv_mbufq.lock); if (mtx_initialized(&isock->ulp2_wrq.lock)) mtx_destroy(&isock->ulp2_wrq.lock); if (mtx_initialized(&isock->ulp2_writeq.lock)) mtx_destroy(&isock->ulp2_writeq.lock); free(isock, M_CXGBE); return 0; } static int cxgbei_loader(struct module *mod, int cmd, void *arg) { int err = 0; switch (cmd) { case MOD_LOAD: SLIST_INIT(&odev_list); printf("cxgbei module loaded Sucessfully.\n"); break; case MOD_UNLOAD: offload_device_remove(); printf("cxgbei cleanup completed sucessfully.\n"); break; default: err = (EINVAL); break; } return (err); } static moduledata_t cxgbei_mod = { "cxgbei", cxgbei_loader, NULL, }; MODULE_VERSION(cxgbei, 1); DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1); MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1); MODULE_DEPEND(cxgbei, icl, 1, 1, 1); Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c (revision 279870) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c (revision 279871) @@ -1,698 +1,703 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * cxgbei_ulp2_ddp.c: Chelsio iSCSI DDP Manager. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" +#include "opt_inet6.h" + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for PCIE_MEM_ACCESS */ #include #include "cxgbei.h" #include "cxgbei_ulp2_ddp.h" static inline int cxgbei_counter_dec_and_read(volatile int *p) { atomic_subtract_acq_int(p, 1); return atomic_load_acq_int(p); } static inline int get_order(unsigned long size) { int order; size = (size - 1) >> PAGE_SHIFT; order = 0; while (size) { order++; size >>= 1; } return (order); } /* * Map a single buffer address. */ static void ulp2_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *ba = arg; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); *ba = segs->ds_addr; } static int ulp2_dma_tag_create(struct cxgbei_ulp2_ddp_info *ddp) { int rc; rc = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, UINT32_MAX , 8, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &ddp->ulp_ddp_tag); if (rc != 0) { printf("%s(%d): bus_dma_tag_create() " "failed (rc = %d)!\n", __FILE__, __LINE__, rc); return rc; } return 0; } /* * iSCSI Direct Data Placement * * T4/5 ulp2 h/w can directly place the iSCSI Data-In or Data-Out PDU's * payload into pre-posted final destination host-memory buffers based on the * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) in Data-Out * PDUs. * * The host memory address is programmed into h/w in the format of pagepod * entries. * The location of the pagepod entry is encoded into ddp tag which is used or * is the base for ITT/TTT. */ unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4}; unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16}; unsigned char page_idx = DDP_PGIDX_MAX; static inline int ddp_find_unused_entries(struct cxgbei_ulp2_ddp_info *ddp, unsigned int start, unsigned int max, unsigned int count, unsigned int *idx, struct cxgbei_ulp2_gather_list *gl) { unsigned int i, j, k; /* not enough entries */ if ((max - start) < count) return EBUSY; max -= count; mtx_lock(&ddp->map_lock); for (i = start; i < max;) { for (j = 0, k = i; j < count; j++, k++) { if (ddp->gl_map[k]) break; } if (j == count) { for (j = 0, k = i; j < count; j++, k++) ddp->gl_map[k] = gl; mtx_unlock(&ddp->map_lock); *idx = i; return 0; } i += j + 1; } mtx_unlock(&ddp->map_lock); return EBUSY; } static inline void ddp_unmark_entries(struct cxgbei_ulp2_ddp_info *ddp, int start, int count) { mtx_lock(&ddp->map_lock); memset(&ddp->gl_map[start], 0, count * sizeof(struct cxgbei_ulp2_gather_list *)); mtx_unlock(&ddp->map_lock); } /** * cxgbei_ulp2_ddp_find_page_index - return ddp page index for a given page size * @pgsz: page size * return the ddp page index, if no match is found return DDP_PGIDX_MAX. */ int cxgbei_ulp2_ddp_find_page_index(unsigned long pgsz) { int i; for (i = 0; i < DDP_PGIDX_MAX; i++) { if (pgsz == (1UL << ddp_page_shift[i])) return i; } CTR1(KTR_CXGBE, "ddp page size 0x%lx not supported.\n", pgsz); return DDP_PGIDX_MAX; } static int cxgbei_ulp2_ddp_adjust_page_table(void) { int i; unsigned int base_order, order; if (PAGE_SIZE < (1UL << ddp_page_shift[0])) { CTR2(KTR_CXGBE, "PAGE_SIZE %u too small, min. %lu.\n", PAGE_SIZE, 1UL << ddp_page_shift[0]); return EINVAL; } base_order = get_order(1UL << ddp_page_shift[0]); order = get_order(1 << PAGE_SHIFT); for (i = 0; i < DDP_PGIDX_MAX; i++) { /* first is the kernel page size, then just doubling the size */ ddp_page_order[i] = order - base_order + i; ddp_page_shift[i] = PAGE_SHIFT + i; } return 0; } static inline void ddp_gl_unmap(struct toedev *tdev, struct cxgbei_ulp2_gather_list *gl) { int i; struct adapter *sc = tdev->tod_softc; struct cxgbei_ulp2_ddp_info *ddp = sc->iscsi_softc; if (!gl->pages[0]) return; for (i = 0; i < gl->nelem; i++) { bus_dmamap_unload(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map); bus_dmamap_destroy(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map); } } static inline int ddp_gl_map(struct toedev *tdev, struct cxgbei_ulp2_gather_list *gl) { int i, rc; bus_addr_t pa; struct cxgbei_ulp2_ddp_info *ddp; struct adapter *sc = tdev->tod_softc; ddp = (struct cxgbei_ulp2_ddp_info *)sc->iscsi_softc; if (ddp == NULL) { printf("%s: DDP is NULL tdev:%p sc:%p ddp:%p\n", __func__, tdev, sc, ddp); return ENOMEM; } mtx_lock(&ddp->map_lock); for (i = 0; i < gl->nelem; i++) { rc = bus_dmamap_create(ddp->ulp_ddp_tag, 0, &gl->dma_sg[i].bus_map); if (rc != 0) { printf("%s: unable to map page 0x%p.\n", __func__, gl->pages[i]); goto unmap; } rc = bus_dmamap_load(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map, gl->pages[i], PAGE_SIZE, ulp2_dma_map_addr, &pa, BUS_DMA_NOWAIT); if (rc != 0) { printf("%s:unable to load page 0x%p.\n", __func__, gl->pages[i]); goto unmap; } gl->dma_sg[i].phys_addr = pa; } mtx_unlock(&ddp->map_lock); return 0; unmap: if (i) { unsigned int nelem = gl->nelem; gl->nelem = i; ddp_gl_unmap(tdev, gl); gl->nelem = nelem; } return ENOMEM; } /** * cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec - build ddp page buffer list * @xferlen: total buffer length * @sgl: page buffer scatter-gather list (struct cxgbei_sgl) * @sgcnt: # of page buffers * @gfp: allocation mode * * construct a ddp page buffer list from the scsi scattergather list. * coalesce buffers as much as possible, and obtain dma addresses for * each page. * * Return the cxgbei_ulp2_gather_list constructed from the page buffers if the * memory can be used for ddp. Return NULL otherwise. */ struct cxgbei_ulp2_gather_list * cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec (unsigned int xferlen, cxgbei_sgl *sgl, unsigned int sgcnt, void *tdev, int gfp) { struct cxgbei_ulp2_gather_list *gl; cxgbei_sgl *sg = sgl; void *sgpage = (void *)((u64)sg->sg_addr & (~PAGE_MASK)); unsigned int sglen = sg->sg_length; unsigned int sgoffset = (u64)sg->sg_addr & PAGE_MASK; unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT; int i = 1, j = 0; if (xferlen <= DDP_THRESHOLD) { CTR2(KTR_CXGBE, "xfer %u < threshold %u, no ddp.\n", xferlen, DDP_THRESHOLD); return NULL; } gl = malloc(sizeof(struct cxgbei_ulp2_gather_list) + npages * (sizeof(struct dma_segments) + sizeof(void *)), M_DEVBUF, M_NOWAIT | M_ZERO); if (gl == NULL) { printf("%s: gl alloc failed\n", __func__); return NULL; } gl->pages = (void **)&gl->dma_sg[npages]; gl->length = xferlen; gl->offset = sgoffset; gl->pages[0] = sgpage; CTR6(KTR_CXGBE, "%s: xferlen:0x%x len:0x%x off:0x%x sg_addr:%p npages:%d\n", __func__, xferlen, gl->length, gl->offset, sg->sg_addr, npages); for (i = 1, sg = sg_next(sg); i < sgcnt; i++, sg = sg_next(sg)) { void *page = sg->sg_addr; if (sgpage == page && sg->sg_offset == sgoffset + sglen) sglen += sg->sg_length; else { /* make sure the sgl is fit for ddp: * each has the same page size, and * all of the middle pages are used completely */ if ((j && sgoffset) || ((i != sgcnt - 1) && ((sglen + sgoffset) & ~CXGBEI_PAGE_MASK))){ goto error_out; } j++; if (j == gl->nelem || sg->sg_offset) { goto error_out; } gl->pages[j] = page; sglen = sg->sg_length; sgoffset = sg->sg_offset; sgpage = page; } } gl->nelem = ++j; if (ddp_gl_map(tdev, gl) < 0) goto error_out; return gl; error_out: free(gl, M_DEVBUF); return NULL; } /** * cxgbei_ulp2_ddp_release_gl - release a page buffer list * @gl: a ddp page buffer list * @pdev: pci_dev used for pci_unmap * free a ddp page buffer list resulted from cxgbei_ulp2_ddp_make_gl(). */ void cxgbei_ulp2_ddp_release_gl(struct cxgbei_ulp2_gather_list *gl, void *tdev) { ddp_gl_unmap(tdev, gl); free(gl, M_DEVBUF); } /** * cxgbei_ulp2_ddp_tag_reserve - set up ddp for a data transfer * @ddp: adapter's ddp info * @tid: connection id * @tformat: tag format * @tagp: contains s/w tag initially, will be updated with ddp/hw tag * @gl: the page momory list * @gfp: allocation mode * * ddp setup for a given page buffer list and construct the ddp tag. * return 0 if success, < 0 otherwise. */ int cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_ulp2_ddp_info *ddp, void *isock, unsigned int tid, struct cxgbei_ulp2_tag_format *tformat, u32 *tagp, struct cxgbei_ulp2_gather_list *gl, int gfp, int reply) { struct cxgbei_ulp2_pagepod_hdr hdr; unsigned int npods, idx; int rv; u32 sw_tag = *tagp; u32 tag; if (page_idx >= DDP_PGIDX_MAX || !ddp || !gl || !gl->nelem || gl->length < DDP_THRESHOLD) { CTR3(KTR_CXGBE, "pgidx %u, xfer %u/%u, NO ddp.\n", page_idx, gl->length, DDP_THRESHOLD); return EINVAL; } npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; if (ddp->idx_last == ddp->nppods) rv = ddp_find_unused_entries(ddp, 0, ddp->nppods, npods, &idx, gl); else { rv = ddp_find_unused_entries(ddp, ddp->idx_last + 1, ddp->nppods, npods, &idx, gl); if (rv && ddp->idx_last >= npods) { rv = ddp_find_unused_entries(ddp, 0, min(ddp->idx_last + npods, ddp->nppods), npods, &idx, gl); } } if (rv) { CTR3(KTR_CXGBE, "xferlen %u, gl %u, npods %u NO DDP.\n", gl->length, gl->nelem, npods); return rv; } tag = cxgbei_ulp2_ddp_tag_base(idx, ddp, tformat, sw_tag); CTR4(KTR_CXGBE, "%s: sw_tag:0x%x idx:0x%x tag:0x%x\n", __func__, sw_tag, idx, tag); hdr.rsvd = 0; hdr.vld_tid = htonl(F_IPPOD_VALID | V_IPPOD_TID(tid)); hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask); hdr.maxoffset = htonl(gl->length); hdr.pgoffset = htonl(gl->offset); rv = ddp->ddp_set_map(ddp, isock, &hdr, idx, npods, gl, reply); if (rv < 0) goto unmark_entries; ddp->idx_last = idx; *tagp = tag; return 0; unmark_entries: ddp_unmark_entries(ddp, idx, npods); return rv; } /** * cxgbei_ulp2_ddp_tag_release - release a ddp tag * @ddp: adapter's ddp info * @tag: ddp tag * ddp cleanup for a given ddp tag and release all the resources held */ void cxgbei_ulp2_ddp_tag_release(struct cxgbei_ulp2_ddp_info *ddp, u32 tag, iscsi_socket *isock) { u32 idx; if (ddp == NULL) { CTR2(KTR_CXGBE, "%s:release ddp tag 0x%x, ddp NULL.\n", __func__, tag); return; } if (isock == NULL) return; idx = (tag >> IPPOD_IDX_SHIFT) & ddp->idx_mask; CTR3(KTR_CXGBE, "tag:0x%x idx:0x%x nppods:0x%x\n", tag, idx, ddp->nppods); if (idx < ddp->nppods) { struct cxgbei_ulp2_gather_list *gl = ddp->gl_map[idx]; unsigned int npods; if (!gl || !gl->nelem) { CTR4(KTR_CXGBE, "release 0x%x, idx 0x%x, gl 0x%p, %u.\n", tag, idx, gl, gl ? gl->nelem : 0); return; } npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; CTR3(KTR_CXGBE, "ddp tag 0x%x, release idx 0x%x, npods %u.\n", tag, idx, npods); ddp->ddp_clear_map(ddp, gl, tag, idx, npods, isock); ddp_unmark_entries(ddp, idx, npods); cxgbei_ulp2_ddp_release_gl(gl, ddp->tdev); } else CTR3(KTR_CXGBE, "ddp tag 0x%x, idx 0x%x > max 0x%x.\n", tag, idx, ddp->nppods); } /** * cxgbei_ulp2_adapter_ddp_info - read the adapter's ddp information * @ddp: adapter's ddp info * @tformat: tag format * @txsz: max tx pdu payload size, filled in by this func. * @rxsz: max rx pdu payload size, filled in by this func. * setup the tag format for a given iscsi entity */ int cxgbei_ulp2_adapter_ddp_info(struct cxgbei_ulp2_ddp_info *ddp, struct cxgbei_ulp2_tag_format *tformat, unsigned int *txsz, unsigned int *rxsz) { unsigned char idx_bits; if (tformat == NULL) return EINVAL; if (ddp == NULL) return EINVAL; idx_bits = 32 - tformat->sw_bits; tformat->sw_bits = ddp->idx_bits; tformat->rsvd_bits = ddp->idx_bits; tformat->rsvd_shift = IPPOD_IDX_SHIFT; tformat->rsvd_mask = (1 << tformat->rsvd_bits) - 1; CTR4(KTR_CXGBE, "tag format: sw %u, rsvd %u,%u, mask 0x%x.\n", tformat->sw_bits, tformat->rsvd_bits, tformat->rsvd_shift, tformat->rsvd_mask); *txsz = min(ULP2_MAX_PDU_PAYLOAD, ddp->max_txsz - ISCSI_PDU_NONPAYLOAD_LEN); *rxsz = min(ULP2_MAX_PDU_PAYLOAD, ddp->max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN); CTR4(KTR_CXGBE, "max payload size: %u/%u, %u/%u.\n", *txsz, ddp->max_txsz, *rxsz, ddp->max_rxsz); return 0; } /** * cxgbei_ulp2_ddp_cleanup - release the cxgbX adapter's ddp resource * @tdev: t4cdev adapter * release all the resource held by the ddp pagepod manager for a given * adapter if needed */ void cxgbei_ulp2_ddp_cleanup(struct cxgbei_ulp2_ddp_info **ddp_pp) { int i = 0; struct cxgbei_ulp2_ddp_info *ddp = *ddp_pp; if (ddp == NULL) return; CTR2(KTR_CXGBE, "tdev, release ddp 0x%p, ref %d.\n", ddp, atomic_load_acq_int(&ddp->refcnt)); if (ddp && (cxgbei_counter_dec_and_read(&ddp->refcnt) == 0)) { *ddp_pp = NULL; while (i < ddp->nppods) { struct cxgbei_ulp2_gather_list *gl = ddp->gl_map[i]; if (gl) { int npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; CTR2(KTR_CXGBE, "tdev, ddp %d + %d.\n", i, npods); free(gl, M_DEVBUF); i += npods; } else i++; } bus_dmamap_unload(ddp->ulp_ddp_tag, ddp->ulp_ddp_map); cxgbei_ulp2_free_big_mem(ddp); } } /** * ddp_init - initialize the cxgb3/4 adapter's ddp resource * @tdev_name: device name * @tdev: device * @ddp: adapter's ddp info * @uinfo: adapter's iscsi info * initialize the ddp pagepod manager for a given adapter */ static void ddp_init(void *tdev, struct cxgbei_ulp2_ddp_info **ddp_pp, struct ulp_iscsi_info *uinfo) { struct cxgbei_ulp2_ddp_info *ddp = *ddp_pp; unsigned int ppmax, bits; int i, rc; if (uinfo->ulimit <= uinfo->llimit) { printf("%s: tdev, ddp 0x%x >= 0x%x.\n", __func__, uinfo->llimit, uinfo->ulimit); return; } if (ddp) { atomic_add_acq_int(&ddp->refcnt, 1); CTR2(KTR_CXGBE, "tdev, ddp 0x%p already set up, %d.\n", ddp, atomic_load_acq_int(&ddp->refcnt)); return; } ppmax = (uinfo->ulimit - uinfo->llimit + 1) >> IPPOD_SIZE_SHIFT; if (ppmax <= 1024) { CTR3(KTR_CXGBE, "tdev, ddp 0x%x ~ 0x%x, nppod %u < 1K.\n", uinfo->llimit, uinfo->ulimit, ppmax); return; } bits = (fls(ppmax) - 1) + 1; if (bits > IPPOD_IDX_MAX_SIZE) bits = IPPOD_IDX_MAX_SIZE; ppmax = (1 << (bits - 1)) - 1; ddp = cxgbei_ulp2_alloc_big_mem(sizeof(struct cxgbei_ulp2_ddp_info) + ppmax * (sizeof(struct cxgbei_ulp2_gather_list *) + sizeof(unsigned char))); if (ddp == NULL) { CTR1(KTR_CXGBE, "unable to alloc ddp 0x%d, ddp disabled.\n", ppmax); return; } ddp->colors = (unsigned char *)(ddp + 1); ddp->gl_map = (struct cxgbei_ulp2_gather_list **)(ddp->colors + ppmax * sizeof(unsigned char)); *ddp_pp = ddp; mtx_init(&ddp->map_lock, "ddp lock", NULL, MTX_DEF | MTX_DUPOK| MTX_RECURSE); atomic_set_acq_int(&ddp->refcnt, 1); /* dma_tag create */ rc = ulp2_dma_tag_create(ddp); if (rc) { printf("%s: unable to alloc ddp 0x%d, ddp disabled.\n", __func__, ppmax); return; } ddp->tdev = tdev; ddp->max_txsz = min(uinfo->max_txsz, ULP2_MAX_PKT_SIZE); ddp->max_rxsz = min(uinfo->max_rxsz, ULP2_MAX_PKT_SIZE); ddp->llimit = uinfo->llimit; ddp->ulimit = uinfo->ulimit; ddp->nppods = ppmax; ddp->idx_last = ppmax; ddp->idx_bits = bits; ddp->idx_mask = (1 << bits) - 1; ddp->rsvd_tag_mask = (1 << (bits + IPPOD_IDX_SHIFT)) - 1; CTR2(KTR_CXGBE, "gl map 0x%p, idx_last %u.\n", ddp->gl_map, ddp->idx_last); uinfo->tagmask = ddp->idx_mask << IPPOD_IDX_SHIFT; for (i = 0; i < DDP_PGIDX_MAX; i++) uinfo->pgsz_factor[i] = ddp_page_order[i]; uinfo->ulimit = uinfo->llimit + (ppmax << IPPOD_SIZE_SHIFT); printf("nppods %u, bits %u, mask 0x%x,0x%x pkt %u/%u," " %u/%u.\n", ppmax, ddp->idx_bits, ddp->idx_mask, ddp->rsvd_tag_mask, ddp->max_txsz, uinfo->max_txsz, ddp->max_rxsz, uinfo->max_rxsz); rc = bus_dmamap_create(ddp->ulp_ddp_tag, 0, &ddp->ulp_ddp_map); if (rc != 0) { printf("%s: bus_dmamap_Create failed\n", __func__); return; } } /** * cxgbei_ulp2_ddp_init - initialize ddp functions */ void cxgbei_ulp2_ddp_init(void *tdev, struct cxgbei_ulp2_ddp_info **ddp_pp, struct ulp_iscsi_info *uinfo) { if (page_idx == DDP_PGIDX_MAX) { page_idx = cxgbei_ulp2_ddp_find_page_index(PAGE_SIZE); if (page_idx == DDP_PGIDX_MAX) { if (cxgbei_ulp2_ddp_adjust_page_table()) { CTR1(KTR_CXGBE, "PAGE_SIZE %x, ddp disabled.\n", PAGE_SIZE); return; } } page_idx = cxgbei_ulp2_ddp_find_page_index(PAGE_SIZE); } ddp_init(tdev, ddp_pp, uinfo); } Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 279870) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 279871) @@ -1,816 +1,819 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * cxgbei implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); +#include "opt_inet.h" +#include "opt_inet6.h" + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cxgbei.h" SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD, 0, "Chelsio iSCSI offload"); static int coalesce = 1; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 128 * 1024; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static MALLOC_DEFINE(M_ICL_CXGBEI, "icl_cxgbei", "iSCSI software backend"); static uma_zone_t icl_pdu_zone; static uma_zone_t icl_transfer_zone; static volatile u_int icl_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) STAILQ_HEAD(icl_pdu_stailq, icl_pdu); static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; static icl_conn_pdu_free_t icl_cxgbei_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_cxgbei_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; static icl_conn_handoff_t icl_cxgbei_conn_handoff; static icl_conn_free_t icl_cxgbei_conn_free; static icl_conn_close_t icl_cxgbei_conn_close; static icl_conn_connected_t icl_cxgbei_conn_connected; static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; static icl_conn_task_done_t icl_cxgbei_conn_task_done; static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; static kobj_method_t icl_cxgbei_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_cxgbei_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), KOBJMETHOD(icl_conn_connected, icl_cxgbei_conn_connected), KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), { 0, 0 } }; DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_conn)); struct icl_pdu * icl_pdu_new_empty(struct icl_conn *ic, int flags); void icl_pdu_free(struct icl_pdu *ip); struct icl_pdu * icl_pdu_new_empty(struct icl_conn *ic, int flags) { struct icl_pdu *ip; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); if (ip == NULL) { ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif return (NULL); } ip->ip_conn = ic; return (ip); } void icl_pdu_free(struct icl_pdu *ip) { struct icl_conn *ic; ic = ip->ip_conn; m_freem(ip->ip_bhs_mbuf); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); uma_zfree(icl_pdu_zone, ip); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif } void icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { icl_pdu_free(ip); } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_pdu *ip; ip = icl_pdu_new_empty(ic, flags); if (ip == NULL) return (NULL); ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), flags, MT_DATA, M_PKTHDR); if (ip->ip_bhs_mbuf == NULL) { ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); icl_pdu_free(ip); return (NULL); } ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); return (ip); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static void icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) { response->ip_bhs->bhs_data_segment_len[2] = len; response->ip_bhs->bhs_data_segment_len[1] = len >> 8; response->ip_bhs->bhs_data_segment_len[0] = len >> 16; } static size_t icl_pdu_padding(const struct icl_pdu *ip) { if ((ip->ip_data_len % 4) != 0) return (4 - (ip->ip_data_len % 4)); return (0); } static size_t icl_pdu_size(const struct icl_pdu *response) { size_t len; KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); len = sizeof(struct iscsi_bhs) + response->ip_data_len + icl_pdu_padding(response); return (len); } static uint32_t icl_conn_build_tasktag(struct icl_conn *ic, uint32_t tag) { return tag; } static int icl_soupcall_receive(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!soreadable(so)) return (SU_OK); ic = arg; cv_signal(&ic->ic_receive_cv); return (SU_OK); } static int icl_pdu_finalize(struct icl_pdu *request) { size_t padding, pdu_len; uint32_t zero = 0; int ok; struct icl_conn *ic; ic = request->ip_conn; icl_pdu_set_data_segment_length(request, request->ip_data_len); pdu_len = icl_pdu_size(request); if (request->ip_data_len != 0) { padding = icl_pdu_padding(request); if (padding > 0) { ok = m_append(request->ip_data_mbuf, padding, (void *)&zero); if (ok != 1) { ICL_WARN("failed to append padding"); return (1); } } m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); request->ip_data_mbuf = NULL; } request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; return (0); } static int icl_soupcall_send(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!sowriteable(so)) return (SU_OK); ic = arg; ICL_CONN_LOCK(ic); ic->ic_check_send_space = true; ICL_CONN_UNLOCK(ic); cv_signal(&ic->ic_send_cv); return (SU_OK); } static int icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags) { struct mbuf *mb, *newmb; size_t copylen, off = 0; KASSERT(len > 0, ("len == 0")); newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); if (newmb == NULL) { ICL_WARN("failed to allocate mbuf for %zd bytes", len); return (ENOMEM); } for (mb = newmb; mb != NULL; mb = mb->m_next) { copylen = min(M_TRAILINGSPACE(mb), len - off); memcpy(mtod(mb, char *), (const char *)addr + off, copylen); mb->m_len = copylen; off += copylen; } KASSERT(off == len, ("%s: off != len", __func__)); if (request->ip_data_mbuf == NULL) { request->ip_data_mbuf = newmb; request->ip_data_len = len; } else { m_cat(request->ip_data_mbuf, newmb); request->ip_data_len += len; } return (0); } int icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { return (icl_pdu_append_data(request, addr, len, flags)); } static void icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) { /* data is DDP'ed, no need to copy */ if (ip->ip_ofld_prv0) return; m_copydata(ip->ip_data_mbuf, off, len, addr); } void icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { return (icl_pdu_get_data(ip, off, addr, len)); } static void icl_pdu_queue(struct icl_pdu *ip) { struct icl_conn *ic; ic = ip->ip_conn; ICL_CONN_LOCK_ASSERT(ic); if (ic->ic_disconnecting || ic->ic_socket == NULL) { ICL_DEBUG("icl_pdu_queue on closed connection"); icl_pdu_free(ip); return; } icl_pdu_finalize(ip); cxgbei_conn_xmit_pdu(ic, ip); } void icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { icl_pdu_queue(ip); } static struct icl_conn * icl_cxgbei_new_conn(const char *name, struct mtx *lock) { struct icl_conn *ic; refcount_acquire(&icl_ncons); ic = (struct icl_conn *)kobj_create(&icl_cxgbei_class, M_ICL_CXGBEI, M_WAITOK | M_ZERO); STAILQ_INIT(&ic->ic_to_send); ic->ic_lock = lock; cv_init(&ic->ic_send_cv, "icl_tx"); cv_init(&ic->ic_receive_cv, "icl_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; ic->ic_name = name; ic->ic_offload = strdup("cxgbei", M_TEMP);; return (ic); } void icl_cxgbei_conn_free(struct icl_conn *ic) { cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); kobj_delete((struct kobj *)ic, M_ICL_CXGBEI); refcount_release(&icl_ncons); } static int icl_conn_start(struct icl_conn *ic) { size_t minspace; struct sockopt opt; int error, one = 1; ICL_CONN_LOCK(ic); /* * XXX: Ugly hack. */ if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (EINVAL); } ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_disconnecting = false; ICL_CONN_UNLOCK(ic); /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. * * What we should actually do here is to use autoscaling, but set * some minimal buffer size to "minspace". I don't know a way to do * that, though. */ minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (sendspace < minspace) { ICL_WARN("kern.icl.sendspace too low; must be at least %zd", minspace); sendspace = minspace; } if (recvspace < minspace) { ICL_WARN("kern.icl.recvspace too low; must be at least %zd", minspace); recvspace = minspace; } error = soreserve(ic->ic_socket, sendspace, recvspace); if (error != 0) { ICL_WARN("soreserve failed with error %d", error); icl_cxgbei_conn_close(ic); return (error); } ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(ic->ic_socket, &opt); if (error != 0) { ICL_WARN("disabling TCP_NODELAY failed with error %d", error); icl_cxgbei_conn_close(ic); return (error); } /* * Register socket upcall, to get notified about incoming PDUs * and free space to send outgoing ones. */ SOCKBUF_LOCK(&ic->ic_socket->so_snd); soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); return (0); } int icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) { struct file *fp; struct socket *so; cap_rights_t rights; int error; ICL_CONN_LOCK_ASSERT_NOT(ic); /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_socket = fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); if(!error) { cxgbei_conn_set_ulp_mode(ic->ic_socket, ic); } return (error); } void icl_cxgbei_conn_close(struct icl_conn *ic) { struct icl_pdu *pdu; ICL_CONN_LOCK_ASSERT_NOT(ic); ICL_CONN_LOCK(ic); if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return; } /* * Deregister socket upcalls. */ ICL_CONN_UNLOCK(ic); SOCKBUF_LOCK(&ic->ic_socket->so_snd); if (ic->ic_socket->so_snd.sb_upcall != NULL) soupcall_clear(ic->ic_socket, SO_SND); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); if (ic->ic_socket->so_rcv.sb_upcall != NULL) soupcall_clear(ic->ic_socket, SO_RCV); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); ICL_CONN_LOCK(ic); ic->ic_disconnecting = true; /* * Wake up the threads, so they can properly terminate. */ while (ic->ic_receive_running || ic->ic_send_running) { //ICL_DEBUG("waiting for send/receive threads to terminate"); cv_signal(&ic->ic_receive_cv); cv_signal(&ic->ic_send_cv); cv_wait(&ic->ic_send_cv, ic->ic_lock); } //ICL_DEBUG("send/receive threads terminated"); ICL_CONN_UNLOCK(ic); cxgbei_conn_close(ic->ic_socket); soclose(ic->ic_socket); ICL_CONN_LOCK(ic); ic->ic_socket = NULL; if (ic->ic_receive_pdu != NULL) { //ICL_DEBUG("freeing partially received PDU"); icl_pdu_free(ic->ic_receive_pdu); ic->ic_receive_pdu = NULL; } /* * Remove any outstanding PDUs from the send queue. */ while (!STAILQ_EMPTY(&ic->ic_to_send)) { pdu = STAILQ_FIRST(&ic->ic_to_send); STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); icl_pdu_free(pdu); } KASSERT(STAILQ_EMPTY(&ic->ic_to_send), ("destroying session with non-empty send queue")); #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif ICL_CONN_UNLOCK(ic); } bool icl_cxgbei_conn_connected(struct icl_conn *ic) { ICL_CONN_LOCK_ASSERT_NOT(ic); ICL_CONN_LOCK(ic); if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (false); } if (ic->ic_socket->so_error != 0) { ICL_CONN_UNLOCK(ic); return (false); } ICL_CONN_UNLOCK(ic); return (true); } int icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { void *prv; *task_tagp = icl_conn_build_tasktag(ic, *task_tagp); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_task_reserve_itt(ic, prvp, csio, task_tagp); return (0); } void icl_cxgbei_conn_task_done(struct icl_conn *ic, void *prv) { cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } int icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { void *prv; *transfer_tag = icl_conn_build_tasktag(ic, *transfer_tag); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_transfer_reserve_ttt(ic, prvp, io, transfer_tag); return (0); } void icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *prv) { cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } static int icl_cxgbei_limits(size_t *limitp) { *limitp = 8 * 1024; return (0); } #ifdef ICL_KERNEL_PROXY int icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) { int error; ICL_CONN_LOCK_ASSERT_NOT(ic); if (so->so_type != SOCK_STREAM) return (EINVAL); ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); return (EBUSY); } ic->ic_socket = so; ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } #endif /* ICL_KERNEL_PROXY */ static int icl_cxgbei_load(void) { int error; icl_pdu_zone = uma_zcreate("icl_pdu", sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); icl_transfer_zone = uma_zcreate("icl_transfer", 16 * 1024, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_ncons, 0); /* * The reason we call this "none" is that to the user, * it's known as "offload driver"; "offload driver: soft" * doesn't make much sense. */ error = icl_register("cxgbei", 100, icl_cxgbei_limits, icl_cxgbei_new_conn); KASSERT(error == 0, ("failed to register")); return (error); } static int icl_cxgbei_unload(void) { if (icl_ncons != 0) return (EBUSY); icl_unregister("cxgbei"); uma_zdestroy(icl_pdu_zone); uma_zdestroy(icl_transfer_zone); return (0); } static int icl_cxgbei_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_cxgbei_load()); case MOD_UNLOAD: return (icl_cxgbei_unload()); default: return (EINVAL); } } moduledata_t icl_cxgbei_data = { "icl_cxgbei", icl_cxgbei_modevent, 0 }; DECLARE_MODULE(icl_cxgbei, icl_cxgbei_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_cxgbei, icl, 1, 1, 1); MODULE_VERSION(icl_cxgbei, 1);