Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c (revision 290971) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c (revision 290972) @@ -1,1378 +1,977 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * * Written by: Sreenivasa Honnur * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include -#include -#include /* for PCIE_MEM_ACCESS */ -#include -#include "cxgbei.h" - -#include "cxgbei_ulp2_ddp.h" - #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include -/* forward declarations */ -struct icl_pdu * icl_pdu_new_empty(struct icl_conn *, int ); -void icl_pdu_free(struct icl_pdu *); +#include "common/common.h" +#include "common/t4_msg.h" +#include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */ +#include "tom/t4_tom.h" +#include "cxgbei.h" +#include "cxgbei_ulp2_ddp.h" -/* mbuf_tag management functions */ -struct ulp_mbuf_cb * -get_ulp_mbuf_cb(struct mbuf *m) -{ - struct m_tag *mtag = NULL; +/* XXX some header instead. */ +struct icl_pdu *icl_cxgbei_conn_new_pdu(struct icl_conn *, int); +void icl_cxgbei_conn_pdu_free(struct icl_conn *, struct icl_pdu *); - mtag = m_tag_get(CXGBE_ISCSI_MBUF_TAG, sizeof(struct ulp_mbuf_cb), - M_NOWAIT); - if (mtag == NULL) { - printf("%s: mtag alloc failed\n", __func__); - return NULL; - } - bzero(mtag + 1, sizeof(struct ulp_mbuf_cb)); - m_tag_prepend(m, mtag); - - return ((struct ulp_mbuf_cb *)(mtag + 1)); -} - -static struct ulp_mbuf_cb * -find_ulp_mbuf_cb(struct mbuf *m) -{ - struct m_tag *mtag = NULL; - - if ((mtag = m_tag_find(m, CXGBE_ISCSI_MBUF_TAG, NULL)) == NULL) - return (NULL); - - return ((struct ulp_mbuf_cb *)(mtag + 1)); -} - /* * Direct Data Placement - * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted * final destination host-memory buffers based on the Initiator Task Tag (ITT) * in Data-In or Target Task Tag (TTT) in Data-Out PDUs. * The host memory address is programmed into h/w in the format of pagepod * entries. * The location of the pagepod entry is encoded into ddp tag which is used as * the base for ITT/TTT. */ /* * functions to program the pagepod in h/w */ static void inline ppod_set(struct pagepod *ppod, struct cxgbei_ulp2_pagepod_hdr *hdr, struct cxgbei_ulp2_gather_list *gl, unsigned int pidx) { int i; memcpy(ppod, hdr, sizeof(*hdr)); for (i = 0; i < (PPOD_PAGES + 1); i++, pidx++) { ppod->addr[i] = pidx < gl->nelem ? cpu_to_be64(gl->dma_sg[pidx].phys_addr) : 0ULL; } } static void inline ppod_clear(struct pagepod *ppod) { memset(ppod, 0, sizeof(*ppod)); } static inline void ulp_mem_io_set_hdr(struct adapter *sc, int tid, struct ulp_mem_io *req, unsigned int wr_len, unsigned int dlen, unsigned int pm_addr) { struct ulptx_idata *idata = (struct ulptx_idata *)(req + 1); INIT_ULPTX_WR(req, wr_len, 0, 0); req->cmd = cpu_to_be32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | V_ULP_MEMIO_ORDER(is_t4(sc)) | V_T5_ULP_MEMIO_IMM(is_t5(sc))); req->dlen = htonl(V_ULP_MEMIO_DATA_LEN(dlen >> 5)); req->len16 = htonl(DIV_ROUND_UP(wr_len - sizeof(req->wr), 16) | V_FW_WR_FLOWID(tid)); req->lock_addr = htonl(V_ULP_MEMIO_ADDR(pm_addr >> 5)); idata->cmd_more = htonl(V_ULPTX_CMD(ULP_TX_SC_IMM)); idata->len = htonl(dlen); } #define PPOD_SIZE sizeof(struct pagepod) #define ULPMEM_IDATA_MAX_NPPODS 1 /* 256/PPOD_SIZE */ #define PCIE_MEMWIN_MAX_NPPODS 16 /* 1024/PPOD_SIZE */ static int ppod_write_idata(struct cxgbei_data *ci, struct cxgbei_ulp2_pagepod_hdr *hdr, unsigned int idx, unsigned int npods, struct cxgbei_ulp2_gather_list *gl, unsigned int gl_pidx, struct toepcb *toep) { u_int dlen = PPOD_SIZE * npods; u_int pm_addr = idx * PPOD_SIZE + ci->llimit; u_int wr_len = roundup(sizeof(struct ulp_mem_io) + sizeof(struct ulptx_idata) + dlen, 16); struct ulp_mem_io *req; struct ulptx_idata *idata; struct pagepod *ppod; u_int i; struct wrqe *wr; struct adapter *sc = toep->port->adapter; wr = alloc_wrqe(wr_len, toep->ctrlq); if (wr == NULL) { CXGBE_UNIMPLEMENTED("ppod_write_idata: alloc_wrqe failure"); return (ENOMEM); } req = wrtod(wr); memset(req, 0, wr_len); ulp_mem_io_set_hdr(sc, toep->tid, req, wr_len, dlen, pm_addr); idata = (struct ulptx_idata *)(req + 1); ppod = (struct pagepod *)(idata + 1); for (i = 0; i < npods; i++, ppod++, gl_pidx += PPOD_PAGES) { if (!hdr) /* clear the pagepod */ ppod_clear(ppod); else /* set the pagepod */ ppod_set(ppod, hdr, gl, gl_pidx); } t4_wrq_tx(sc, wr); return 0; } int -t4_ddp_set_map(struct cxgbei_data *ci, void *isockp, +t4_ddp_set_map(struct cxgbei_data *ci, void *iccp, struct cxgbei_ulp2_pagepod_hdr *hdr, u_int idx, u_int npods, struct cxgbei_ulp2_gather_list *gl, int reply) { - struct iscsi_socket *isock = (struct iscsi_socket *)isockp; - struct toepcb *toep = isock->toep; + struct icl_cxgbei_conn *icc = (struct icl_cxgbei_conn *)iccp; + struct toepcb *toep = icc->toep; int err; unsigned int pidx = 0, w_npods = 0, cnt; /* * on T4, if we use a mix of IMMD and DSGL with ULP_MEM_WRITE, * the order would not be garanteed, so we will stick with IMMD */ gl->tid = toep->tid; gl->port_id = toep->port->port_id; gl->egress_dev = (void *)toep->port->ifp; /* send via immediate data */ for (; w_npods < npods; idx += cnt, w_npods += cnt, pidx += PPOD_PAGES) { cnt = npods - w_npods; if (cnt > ULPMEM_IDATA_MAX_NPPODS) cnt = ULPMEM_IDATA_MAX_NPPODS; - err = ppod_write_idata(ci, hdr, idx, cnt, gl, - pidx, toep); + err = ppod_write_idata(ci, hdr, idx, cnt, gl, pidx, toep); if (err) { printf("%s: ppod_write_idata failed\n", __func__); break; } } return err; } void t4_ddp_clear_map(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl, - u_int tag, u_int idx, u_int npods, struct iscsi_socket *isock) + u_int tag, u_int idx, u_int npods, struct icl_cxgbei_conn *icc) { - struct toepcb *toep = isock->toep; + struct toepcb *toep = icc->toep; int err = -1; u_int pidx = 0; u_int w_npods = 0; u_int cnt; for (; w_npods < npods; idx += cnt, w_npods += cnt, pidx += PPOD_PAGES) { cnt = npods - w_npods; if (cnt > ULPMEM_IDATA_MAX_NPPODS) cnt = ULPMEM_IDATA_MAX_NPPODS; err = ppod_write_idata(ci, NULL, idx, cnt, gl, 0, toep); if (err) break; } } static int cxgbei_map_sg(struct cxgbei_sgl *sgl, struct ccb_scsiio *csio) { unsigned int data_len = csio->dxfer_len; unsigned int sgoffset = (uint64_t)csio->data_ptr & PAGE_MASK; unsigned int nsge; unsigned char *sgaddr = csio->data_ptr; unsigned int len = 0; nsge = (csio->dxfer_len + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT; sgl->sg_addr = sgaddr; sgl->sg_offset = sgoffset; if (data_len < (PAGE_SIZE - sgoffset)) len = data_len; else len = PAGE_SIZE - sgoffset; sgl->sg_length = len; data_len -= len; sgaddr += len; sgl = sgl+1; while (data_len > 0) { sgl->sg_addr = sgaddr; len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE; sgl->sg_length = len; sgaddr += len; data_len -= len; sgl = sgl + 1; } return nsge; } static int cxgbei_map_sg_tgt(struct cxgbei_sgl *sgl, union ctl_io *io) { unsigned int data_len, sgoffset, nsge; unsigned char *sgaddr; unsigned int len = 0, index = 0, ctl_sg_count, i; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } sgaddr = sgl->sg_addr = ctl_sglist[index].addr; sgoffset = sgl->sg_offset = (uint64_t)sgl->sg_addr & PAGE_MASK; data_len = ctl_sglist[index].len; if (data_len < (PAGE_SIZE - sgoffset)) len = data_len; else len = PAGE_SIZE - sgoffset; sgl->sg_length = len; data_len -= len; sgaddr += len; sgl = sgl+1; len = 0; for (i = 0; i< ctl_sg_count; i++) len += ctl_sglist[i].len; nsge = (len + sgoffset + PAGE_SIZE -1) >> PAGE_SHIFT; while (data_len > 0) { sgl->sg_addr = sgaddr; len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE; sgl->sg_length = len; sgaddr += len; data_len -= len; sgl = sgl + 1; if (data_len == 0) { if (index == ctl_sg_count - 1) break; index++; sgaddr = ctl_sglist[index].addr; data_len = ctl_sglist[index].len; } } return nsge; } static int -t4_sk_ddp_tag_reserve(struct cxgbei_data *ci, struct iscsi_socket *isock, +t4_sk_ddp_tag_reserve(struct cxgbei_data *ci, struct icl_cxgbei_conn *icc, u_int xferlen, struct cxgbei_sgl *sgl, u_int sgcnt, u_int *ddp_tag) { struct cxgbei_ulp2_gather_list *gl; int err = -EINVAL; - struct toepcb *toep = isock->toep; + struct toepcb *toep = icc->toep; gl = cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(xferlen, sgl, sgcnt, ci, 0); if (gl) { - err = cxgbei_ulp2_ddp_tag_reserve(ci, isock, toep->tid, + err = cxgbei_ulp2_ddp_tag_reserve(ci, icc, toep->tid, &ci->tag_format, ddp_tag, gl, 0, 0); if (err) { cxgbei_ulp2_ddp_release_gl(ci, gl); } } return err; } static unsigned int cxgbei_task_reserve_itt(struct icl_conn *ic, void **prv, struct ccb_scsiio *scmd, unsigned int *itt) { + struct icl_cxgbei_conn *icc = ic_to_icc(ic); int xferlen = scmd->dxfer_len; struct cxgbei_task_data *tdata = NULL; struct cxgbei_sgl *sge = NULL; - struct iscsi_socket *isock = ic->ic_ofld_prv0; - struct toepcb *toep = isock->toep; + struct toepcb *toep = icc->toep; struct adapter *sc = td_adapter(toep->td); struct cxgbei_data *ci = sc->iscsi_softc; int err = -1; + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); + tdata = (struct cxgbei_task_data *)*prv; if (xferlen == 0 || tdata == NULL) goto out; if (xferlen < DDP_THRESHOLD) goto out; if ((scmd->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { tdata->nsge = cxgbei_map_sg(tdata->sgl, scmd); if (tdata->nsge == 0) { CTR1(KTR_CXGBE, "%s: map_sg failed", __func__); return 0; } sge = tdata->sgl; tdata->sc_ddp_tag = *itt; CTR3(KTR_CXGBE, "%s: *itt:0x%x sc_ddp_tag:0x%x", __func__, *itt, tdata->sc_ddp_tag); if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format, tdata->sc_ddp_tag)) { - err = t4_sk_ddp_tag_reserve(ci, isock, scmd->dxfer_len, + err = t4_sk_ddp_tag_reserve(ci, icc, scmd->dxfer_len, sge, tdata->nsge, &tdata->sc_ddp_tag); } else { CTR3(KTR_CXGBE, "%s: itt:0x%x sc_ddp_tag:0x%x not usable", __func__, *itt, tdata->sc_ddp_tag); } } out: if (err < 0) tdata->sc_ddp_tag = cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *itt); return tdata->sc_ddp_tag; } static unsigned int cxgbei_task_reserve_ttt(struct icl_conn *ic, void **prv, union ctl_io *io, unsigned int *ttt) { - struct iscsi_socket *isock = ic->ic_ofld_prv0; - struct toepcb *toep = isock->toep; + struct icl_cxgbei_conn *icc = ic_to_icc(ic); + struct toepcb *toep = icc->toep; struct adapter *sc = td_adapter(toep->td); struct cxgbei_data *ci = sc->iscsi_softc; struct cxgbei_task_data *tdata = NULL; int xferlen, err = -1; struct cxgbei_sgl *sge = NULL; + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); + xferlen = (io->scsiio.kern_data_len - io->scsiio.ext_data_filled); tdata = (struct cxgbei_task_data *)*prv; if ((xferlen == 0) || (tdata == NULL)) goto out; if (xferlen < DDP_THRESHOLD) goto out; tdata->nsge = cxgbei_map_sg_tgt(tdata->sgl, io); if (tdata->nsge == 0) { CTR1(KTR_CXGBE, "%s: map_sg failed", __func__); return 0; } sge = tdata->sgl; tdata->sc_ddp_tag = *ttt; if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format, tdata->sc_ddp_tag)) { - err = t4_sk_ddp_tag_reserve(ci, isock, xferlen, sge, + err = t4_sk_ddp_tag_reserve(ci, icc, xferlen, sge, tdata->nsge, &tdata->sc_ddp_tag); } else { CTR2(KTR_CXGBE, "%s: sc_ddp_tag:0x%x not usable", __func__, tdata->sc_ddp_tag); } out: if (err < 0) tdata->sc_ddp_tag = cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *ttt); return tdata->sc_ddp_tag; } static int -t4_sk_ddp_tag_release(struct iscsi_socket *isock, unsigned int ddp_tag) +t4_sk_ddp_tag_release(struct icl_cxgbei_conn *icc, unsigned int ddp_tag) { - struct toepcb *toep = isock->toep; + struct toepcb *toep = icc->toep; struct adapter *sc = td_adapter(toep->td); struct cxgbei_data *ci = sc->iscsi_softc; - cxgbei_ulp2_ddp_tag_release(ci, ddp_tag, isock); + cxgbei_ulp2_ddp_tag_release(ci, ddp_tag, icc); return (0); } static int cxgbei_ddp_init(struct adapter *sc, struct cxgbei_data *ci) { int nppods, bits, max_sz, rc; static const u_int pgsz_order[] = {0, 1, 2, 3}; MPASS(sc->vres.iscsi.size > 0); ci->llimit = sc->vres.iscsi.start; ci->ulimit = sc->vres.iscsi.start + sc->vres.iscsi.size - 1; max_sz = G_MAXRXDATA(t4_read_reg(sc, A_TP_PARA_REG2)); nppods = sc->vres.iscsi.size >> IPPOD_SIZE_SHIFT; if (nppods <= 1024) return (ENXIO); bits = fls(nppods); if (bits > IPPOD_IDX_MAX_SIZE) bits = IPPOD_IDX_MAX_SIZE; nppods = (1 << (bits - 1)) - 1; rc = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, UINT32_MAX , 8, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &ci->ulp_ddp_tag); if (rc != 0) { device_printf(sc->dev, "%s: failed to create DMA tag: %u.\n", __func__, rc); return (rc); } ci->colors = malloc(nppods * sizeof(char), M_CXGBE, M_NOWAIT | M_ZERO); ci->gl_map = malloc(nppods * sizeof(struct cxgbei_ulp2_gather_list *), M_CXGBE, M_NOWAIT | M_ZERO); if (ci->colors == NULL || ci->gl_map == NULL) { bus_dma_tag_destroy(ci->ulp_ddp_tag); free(ci->colors, M_CXGBE); free(ci->gl_map, M_CXGBE); return (ENOMEM); } mtx_init(&ci->map_lock, "ddp lock", NULL, MTX_DEF | MTX_DUPOK); ci->max_txsz = ci->max_rxsz = min(max_sz, ULP2_MAX_PKT_SIZE); ci->nppods = nppods; ci->idx_last = nppods; ci->idx_bits = bits; ci->idx_mask = (1 << bits) - 1; ci->rsvd_tag_mask = (1 << (bits + IPPOD_IDX_SHIFT)) - 1; ci->tag_format.sw_bits = bits; ci->tag_format.rsvd_bits = bits; ci->tag_format.rsvd_shift = IPPOD_IDX_SHIFT; ci->tag_format.rsvd_mask = ci->idx_mask; t4_iscsi_init(sc, ci->idx_mask << IPPOD_IDX_SHIFT, pgsz_order); return (rc); } -static void -process_rx_iscsi_hdr(struct toepcb *toep, struct mbuf *m) +static int +do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { - struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); - struct ulp_mbuf_cb *cb, *lcb; - struct mbuf *lmbuf; - u_char *byte; - struct iscsi_socket *isock = toep->ulpcb; - struct tcpcb *tp = intotcpcb(toep->inp); - u_int hlen, dlen, plen; + struct adapter *sc = iq->adapter; + struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); + u_int tid = GET_TID(cpl); + struct toepcb *toep = lookup_tid(sc, tid); + struct icl_cxgbei_conn *icc = toep->ulpcb; + struct icl_pdu *ip; + struct icl_cxgbei_pdu *icp; - MPASS(isock != NULL); + MPASS(icc != NULL); + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); M_ASSERTPKTHDR(m); - mtx_lock(&isock->iscsi_rcvq_lock); + ip = icl_cxgbei_conn_new_pdu(&icc->ic, M_NOWAIT); + if (ip == NULL) + CXGBE_UNIMPLEMENTED("PDU allocation failure"); + icp = ip_to_icp(ip); + bcopy(mtod(m, caddr_t) + sizeof(*cpl), icp->ip.ip_bhs, sizeof(struct + iscsi_bhs)); + icp->pdu_flags = SBUF_ULP_FLAG_HDR_RCVD; - /* allocate m_tag to hold ulp info */ - cb = get_ulp_mbuf_cb(m); - if (cb == NULL) - CXGBE_UNIMPLEMENTED(__func__); + /* This is the start of a new PDU. There should be no old state. */ + MPASS(icc->icp == NULL); + icc->icp = icp; + icc->pdu_seq = ntohl(cpl->seq); - /* strip off CPL header */ - m_adj(m, sizeof(*cpl)); - - /* figure out if this is the pdu header or data */ - cb->ulp_mode = ULP_MODE_ISCSI; - if (isock->mbuf_ulp_lhdr == NULL) { - - isock->mbuf_ulp_lhdr = lmbuf = m; - lcb = cb; - cb->flags = SBUF_ULP_FLAG_HDR_RCVD; - /* we only update tp->rcv_nxt once per pdu */ - if (__predict_false(ntohl(cpl->seq) != tp->rcv_nxt)) { - panic("%s: seq# 0x%x (expected 0x%x) for tid %u", - __func__, ntohl(cpl->seq), tp->rcv_nxt, toep->tid); - } - byte = m->m_data; - hlen = ntohs(cpl->len); - dlen = ntohl(*(unsigned int *)(byte + 4)) & 0xFFFFFF; - - plen = ntohs(cpl->pdu_len_ddp); - lcb->pdulen = (hlen + dlen + 3) & (~0x3); - /* workaround for cpl->pdu_len_ddp since it does not include - the data digest count */ - if (dlen) - lcb->pdulen += isock->s_dcrc_len; - - tp->rcv_nxt += lcb->pdulen; - if (tp->rcv_wnd <= lcb->pdulen) - CTR3(KTR_CXGBE, "%s: Neg rcv_wnd:0x%lx pdulen:0x%x", - __func__, tp->rcv_wnd, lcb->pdulen); - tp->rcv_wnd -= lcb->pdulen; - tp->t_rcvtime = ticks; - } else { - lmbuf = isock->mbuf_ulp_lhdr; - lcb = find_ulp_mbuf_cb(lmbuf); - if (lcb == NULL) - CXGBE_UNIMPLEMENTED(__func__); - lcb->flags |= SBUF_ULP_FLAG_DATA_RCVD; - cb->flags = SBUF_ULP_FLAG_DATA_RCVD; - - /* padding */ - if ((m->m_len % 4) != 0) { - m->m_len += 4 - (m->m_len % 4); - } - } - mbufq_enqueue(&isock->iscsi_rcvq, m); - mtx_unlock(&isock->iscsi_rcvq_lock); -} - -/* hand over received PDU to iscsi_initiator */ -static void -iscsi_conn_receive_pdu(struct iscsi_socket *isock) -{ - struct icl_pdu *response = NULL; - struct icl_conn *ic = (struct icl_conn*)isock->s_conn; - struct mbuf *m; - struct ulp_mbuf_cb *cb = NULL; - int data_len; - - response = icl_pdu_new_empty(isock->s_conn, M_NOWAIT); - if (response == NULL) { - panic("%s: failed to alloc icl_pdu\n", __func__); - return; - } - m = mbufq_first(&isock->iscsi_rcvq); - if (m) { - cb = find_ulp_mbuf_cb(m); - if (cb == NULL) { - panic("%s: m:%p cb is NULL\n", __func__, m); - goto err_out; - } - if (!(cb->flags & SBUF_ULP_FLAG_STATUS_RCVD)) - goto err_out; - } - /* BHS */ - mbufq_dequeue(&isock->iscsi_rcvq); - data_len = cb->pdulen; - - CTR5(KTR_CXGBE, "%s: response:%p m:%p m_len:%d data_len:%d", - __func__, response, m, m->m_len, data_len); - response->ip_bhs_mbuf = m; - response->ip_bhs = mtod(response->ip_bhs_mbuf, struct iscsi_bhs *); - - /* data */ - if (cb->flags & SBUF_ULP_FLAG_DATA_RCVD) { - m = mbufq_first(&isock->iscsi_rcvq); - if (m == NULL) { - CTR1(KTR_CXGBE, "%s:No Data", __func__); - goto err_out; - } - mbufq_dequeue(&isock->iscsi_rcvq); - response->ip_data_mbuf = m; - response->ip_data_len += response->ip_data_mbuf->m_len; - } else { - /* Data is DDP'ed */ - response->ip_ofld_prv0 = 1; - } - (ic->ic_receive)(response); - return; - -err_out: - icl_pdu_free(response); - return; -} - -static void -process_rx_data_ddp(struct toepcb *toep, const struct cpl_rx_data_ddp *cpl) -{ - struct mbuf *lmbuf; - struct ulp_mbuf_cb *lcb, *lcb1; - unsigned int val, pdulen; - struct iscsi_socket *isock = toep->ulpcb; - struct inpcb *inp = toep->inp; - - MPASS(isock != NULL); - - if (isock->mbuf_ulp_lhdr == NULL) { - panic("%s: tid 0x%x, rcv RX_DATA_DDP w/o pdu header.\n", - __func__, toep->tid); - return; - } - mtx_lock(&isock->iscsi_rcvq_lock); - lmbuf = isock->mbuf_ulp_lhdr; - if (lmbuf->m_nextpkt) { - lcb1 = find_ulp_mbuf_cb(lmbuf->m_nextpkt); - lcb1->flags |= SBUF_ULP_FLAG_STATUS_RCVD; - } - lcb = find_ulp_mbuf_cb(isock->mbuf_ulp_lhdr); - if (lcb == NULL) { - CTR2(KTR_CXGBE, "%s: mtag NULL lmbuf :%p", __func__, lmbuf); - mtx_unlock(&isock->iscsi_rcvq_lock); - return; - } - lcb->flags |= SBUF_ULP_FLAG_STATUS_RCVD; - isock->mbuf_ulp_lhdr = NULL; - - if (ntohs(cpl->len) != lcb->pdulen) { - CTR3(KTR_CXGBE, "tid 0x%x, RX_DATA_DDP pdulen %u != %u.", - toep->tid, ntohs(cpl->len), lcb->pdulen); - CTR4(KTR_CXGBE, "%s: lmbuf:%p lcb:%p lcb->flags:0x%x", - __func__, lmbuf, lcb, lcb->flags); - } - - lcb->ddigest = ntohl(cpl->ulp_crc); - pdulen = lcb->pdulen; - - val = ntohl(cpl->ddpvld); - if (val & F_DDP_PADDING_ERR) - lcb->flags |= SBUF_ULP_FLAG_PAD_ERROR; - if (val & F_DDP_HDRCRC_ERR) - lcb->flags |= SBUF_ULP_FLAG_HCRC_ERROR; - if (val & F_DDP_DATACRC_ERR) - lcb->flags |= SBUF_ULP_FLAG_DCRC_ERROR; - if (!(lcb->flags & SBUF_ULP_FLAG_DATA_RCVD)) { - lcb->flags |= SBUF_ULP_FLAG_DATA_DDPED; - } -#ifdef __T4_DBG_DDP_FAILURE__ -// else - { - unsigned char *bhs = lmbuf->m_data; - unsigned char opcode = bhs[0]; - unsigned int dlen = ntohl(*(unsigned int *)(bhs + 4)) & 0xFFFFFF; - unsigned int ttt = ntohl(*(unsigned int *)(bhs + 20)); - unsigned int offset = ntohl(*(unsigned int *)(bhs + 40)); - - if (dlen >= 2096) { - /* data_out and should be ddp'ed */ - if ((opcode & 0x3F) == 0x05 && ttt != 0xFFFFFFFF) { - printf("CPL_RX_DATA_DDP: tid 0x%x, data-out %s ddp'ed\ - (%u+%u), ttt 0x%x, seq 0x%x, ddpvld 0x%x.\n", - toep->tid, - (lcb->flags & SBUF_ULP_FLAG_DATA_DDPED) ? "IS" : "NOT", - offset, dlen, ttt, ntohl(cpl->seq), ntohl(cpl->ddpvld)); - } - if ((opcode & 0x3F) == 0x25) { - //if (!(lcb->flags & SBUF_ULP_FLAG_DATA_DDPED)) - printf("CPL_RX_DATA_DDP: tid 0x%x, data-in %s ddp'ed\ - (%u+%u), seq 0x%x, ddpvld 0x%x.\n", - toep->tid, - (lcb->flags & SBUF_ULP_FLAG_DATA_DDPED) ? "IS" : "NOT", - offset, dlen, ntohl(cpl->seq), ntohl(cpl->ddpvld)); - } - } - } +#if 0 + CTR4(KTR_CXGBE, "%s: tid %u, cpl->len hlen %u, m->m_len hlen %u", + __func__, tid, ntohs(cpl->len), m->m_len); #endif - iscsi_conn_receive_pdu(isock); - mtx_unlock(&isock->iscsi_rcvq_lock); - - /* update rx credits */ - INP_WLOCK(inp); - /* XXXNP: does this want the so_rcv lock? (happens to be the same) */ - SOCK_LOCK(inp->inp_socket); - toep->sb_cc += pdulen; - SOCK_UNLOCK(inp->inp_socket); - t4_rcvd(&toep->td->tod, intotcpcb(inp)); - INP_WUNLOCK(inp); - return; + m_freem(m); + return (0); } -static void -drop_fw_acked_ulp_data(struct toepcb *toep, int len) -{ - struct mbuf *m, *next; - struct ulp_mbuf_cb *cb; - struct icl_pdu *req; - struct iscsi_socket *isock = toep->ulpcb; - - MPASS(len > 0); - - mtx_lock(&isock->ulp2_wrq_lock); - while (len > 0) { - m = mbufq_dequeue(&isock->ulp2_wrq); - MPASS(m != NULL); /* excess credits */ - - for (next = m; next != NULL; next = next->m_next) { - MPASS(len >= next->m_len); /* excess credits */ - len -= next->m_len; - } - - cb = find_ulp_mbuf_cb(m); - if (cb && cb->pdu) { - req = (struct icl_pdu *)cb->pdu; - req->ip_bhs_mbuf = NULL; - icl_pdu_free(req); - } - m_freem(m); - } - mtx_unlock(&isock->ulp2_wrq_lock); -} - static int -do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) +do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; - struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); /* XXXNP */ + struct cpl_iscsi_data *cpl = mtod(m, struct cpl_iscsi_data *); u_int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); + struct icl_cxgbei_conn *icc = toep->ulpcb; + struct icl_cxgbei_pdu *icp = icc->icp; - process_rx_iscsi_hdr(toep, m); + MPASS(icc != NULL); + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); + M_ASSERTPKTHDR(m); + /* Must already have received the header (but not the data). */ + MPASS(icp != NULL); + MPASS(icp->pdu_flags == SBUF_ULP_FLAG_HDR_RCVD); + MPASS(icp->ip.ip_data_mbuf == NULL); + MPASS(icp->ip.ip_data_len == 0); + + m_adj(m, sizeof(*cpl)); + + icp->pdu_flags |= SBUF_ULP_FLAG_DATA_RCVD; + icp->ip.ip_data_mbuf = m; + icp->ip.ip_data_len = m->m_pkthdr.len; /* XXXNP: round up to 4? */ + +#if 0 + CTR4(KTR_CXGBE, "%s: tid %u, cpl->len dlen %u, m->m_len dlen %u", + __func__, tid, ntohs(cpl->len), m->m_len); +#endif + return (0); } static int do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); u_int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); + struct inpcb *inp = toep->inp; + struct tcpcb *tp; + struct icl_cxgbei_conn *icc = toep->ulpcb; + struct icl_conn *ic = &icc->ic; + struct icl_cxgbei_pdu *icp = icc->icp; + u_int pdu_len, val; - process_rx_data_ddp(toep, cpl); + MPASS(icc != NULL); + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); + MPASS(m == NULL); - return (0); -} + /* Must already be assembling a PDU. */ + MPASS(icp != NULL); + MPASS(icp->pdu_flags & SBUF_ULP_FLAG_HDR_RCVD); /* Data is optional. */ -static int -t4_ulp_mbuf_push(struct iscsi_socket *isock, struct mbuf *m) -{ - struct toepcb *toep = isock->toep; + icp->pdu_flags |= SBUF_ULP_FLAG_STATUS_RCVD; - /* append mbuf to ULP queue */ - mtx_lock(&isock->ulp2_writeq_lock); - mbufq_enqueue(&isock->ulp2_writeq, m); - mtx_unlock(&isock->ulp2_writeq_lock); + pdu_len = ntohs(cpl->len); /* includes everything. */ - INP_WLOCK(toep->inp); - t4_ulp_push_frames(toep->td->tod.tod_softc, toep, 0); - INP_WUNLOCK(toep->inp); + INP_WLOCK(inp); + /* XXXNP: check inp for dropped etc., and toep for abort in progress. */ - return (0); -} + tp = intotcpcb(inp); + MPASS(icc->pdu_seq == tp->rcv_nxt); + MPASS(tp->rcv_wnd >= pdu_len); + tp->rcv_nxt += pdu_len; + tp->rcv_wnd -= pdu_len; + tp->t_rcvtime = ticks; -static struct mbuf * -get_writeq_len(struct toepcb *toep, int *qlen) -{ - struct iscsi_socket *isock = toep->ulpcb; + /* update rx credits */ + toep->rx_credits += pdu_len; + t4_rcvd(&toep->td->tod, tp); /* XXX: sc->tom_softc.tod */ + INP_WUNLOCK(inp); - *qlen = mbufq_len(&isock->ulp2_writeq); - return (mbufq_first(&isock->ulp2_writeq)); -} + val = ntohl(cpl->ddpvld); + if (val & F_DDP_PADDING_ERR) + icp->pdu_flags |= SBUF_ULP_FLAG_PAD_ERROR; + if (val & F_DDP_HDRCRC_ERR) + icp->pdu_flags |= SBUF_ULP_FLAG_HCRC_ERROR; + if (val & F_DDP_DATACRC_ERR) + icp->pdu_flags |= SBUF_ULP_FLAG_DCRC_ERROR; + if (icp->ip.ip_data_mbuf == NULL) + icp->pdu_flags |= SBUF_ULP_FLAG_DATA_DDPED; -static struct mbuf * -do_writeq_next(struct toepcb *toep) -{ - struct iscsi_socket *isock = toep->ulpcb; - struct mbuf *m; +#if 0 + CTR4(KTR_CXGBE, "%s: tid %u, pdu_len %u, pdu_flags 0x%x", + __func__, tid, pdu_len, icp->pdu_flags); +#endif - mtx_lock(&isock->ulp2_writeq_lock); - m = mbufq_dequeue(&isock->ulp2_writeq); - mtx_unlock(&isock->ulp2_writeq_lock); + icc->icp = NULL; + ic->ic_receive(&icp->ip); - mtx_lock(&isock->ulp2_wrq_lock); - mbufq_enqueue(&isock->ulp2_wrq, m); - mtx_unlock(&isock->ulp2_wrq_lock); - - return (mbufq_first(&isock->ulp2_writeq)); + return (0); } static void t4_register_cpl_handler_with_tom(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_ISCSI_HDR, do_rx_iscsi_hdr); - t4_register_cpl_handler(sc, CPL_ISCSI_DATA, do_rx_iscsi_hdr); + t4_register_cpl_handler(sc, CPL_ISCSI_DATA, do_rx_iscsi_data); t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp); } static void t4_unregister_cpl_handler_with_tom(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_ISCSI_HDR, NULL); t4_register_cpl_handler(sc, CPL_ISCSI_DATA, NULL); t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, NULL); } -static int -send_set_tcb_field(struct toepcb * toep, uint16_t word, uint64_t mask, - uint64_t val, int no_reply) -{ - struct wrqe *wr; - struct cpl_set_tcb_field *req; - - wr = alloc_wrqe(sizeof(*req), toep->ctrlq); - if (wr == NULL) - return EINVAL; - req = wrtod(wr); - - INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); - req->reply_ctrl = htobe16(V_NO_REPLY(no_reply) | - V_QUEUENO(toep->ofld_rxq->iq.abs_id)); - req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); - req->mask = htobe64(mask); - req->val = htobe64(val); - - t4_wrq_tx(toep->td->tod.tod_softc, wr); - - return (0); -} - -static int -cxgbei_set_ulp_mode(struct toepcb *toep, u_char hcrc, u_char dcrc) -{ - int val = 0; - - if (hcrc) - val |= ULP_CRC_HEADER; - if (dcrc) - val |= ULP_CRC_DATA; - val <<= 4; - val |= ULP_MODE_ISCSI; - - return (send_set_tcb_field(toep, 0, 0xfff, val, 1)); -} - /* initiator */ void cxgbei_conn_task_reserve_itt(void *conn, void **prv, void *scmd, unsigned int *itt) { unsigned int tag; tag = cxgbei_task_reserve_itt(conn, prv, scmd, itt); if (tag) *itt = htonl(tag); return; } /* target */ void cxgbei_conn_transfer_reserve_ttt(void *conn, void **prv, void *scmd, unsigned int *ttt) { unsigned int tag; tag = cxgbei_task_reserve_ttt(conn, prv, scmd, ttt); if (tag) *ttt = htonl(tag); return; } void cxgbei_cleanup_task(void *conn, void *ofld_priv) { struct icl_conn *ic = (struct icl_conn *)conn; + struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct cxgbei_task_data *tdata = ofld_priv; - struct iscsi_socket *isock = ic->ic_ofld_prv0; - struct toepcb *toep = isock->toep; - struct adapter *sc = td_adapter(toep->td); + struct adapter *sc = icc->sc; struct cxgbei_data *ci = sc->iscsi_softc; - MPASS(isock != NULL); + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); MPASS(tdata != NULL); if (cxgbei_ulp2_is_ddp_tag(&ci->tag_format, tdata->sc_ddp_tag)) - t4_sk_ddp_tag_release(isock, tdata->sc_ddp_tag); + t4_sk_ddp_tag_release(icc, tdata->sc_ddp_tag); memset(tdata, 0, sizeof(*tdata)); } -static void -t4_sk_tx_mbuf_setmode(struct icl_pdu *req, void *toep, void *mbuf, - unsigned char mode, unsigned char hcrc, unsigned char dcrc) -{ - struct mbuf *m = (struct mbuf *)mbuf; - struct ulp_mbuf_cb *cb; - - cb = get_ulp_mbuf_cb(m); - if (cb == NULL) - return; - cb->ulp_mode = ULP_MODE_ISCSI << 4; - if (hcrc) - cb->ulp_mode |= 1; - if (dcrc) - cb->ulp_mode |= 2; - cb->pdu = req; - return; -} - -int -cxgbei_conn_xmit_pdu(struct icl_conn *ic, struct icl_pdu *req) -{ - struct mbuf *m = req->ip_bhs_mbuf; - struct iscsi_socket *isock = ic->ic_ofld_prv0; - struct toepcb *toep = isock->toep; - - t4_sk_tx_mbuf_setmode(req, toep, m, 2, - ic->ic_header_crc32c ? ISCSI_HEADER_DIGEST_SIZE : 0, - (req->ip_data_len && ic->ic_data_crc32c) ? ISCSI_DATA_DIGEST_SIZE : 0); - - t4_ulp_mbuf_push(isock, m); - return (0); -} - -int -cxgbei_conn_handoff(struct icl_conn *ic) -{ - struct tcpcb *tp = so_sototcpcb(ic->ic_socket); - struct toepcb *toep; - struct iscsi_socket *isock; - - if (!(tp->t_flags & TF_TOE)) - return (ENOTSUP); /* Connection is not offloaded. */ - MPASS(tp->tod != NULL); - MPASS(tp->t_toe != NULL); - - /* - * XXXNP: Seems broken. How can we assume that the tod/toep is what we - * think it is? - */ - - toep = tp->t_toe; - if (toep->ulp_mode) - return (EBUSY); /* Stay away if ulp_mode is already set. */ - - isock = malloc(sizeof(struct iscsi_socket), M_CXGBE, M_NOWAIT | M_ZERO); - if (isock == NULL) - return (ENOMEM); - isock->s_conn = ic; - isock->toep = toep; - isock->s_dcrc_len = ic->ic_data_crc32c ? 4 : 0; - - mbufq_init(&isock->iscsi_rcvq, INT_MAX); - mtx_init(&isock->iscsi_rcvq_lock,"isock_lock" , NULL, MTX_DEF); - - mbufq_init(&isock->ulp2_wrq, INT_MAX); - mtx_init(&isock->ulp2_wrq_lock,"ulp2_wrq lock" , NULL, MTX_DEF); - - mbufq_init(&isock->ulp2_writeq, INT_MAX); - mtx_init(&isock->ulp2_writeq_lock,"ulp2_writeq lock" , NULL, MTX_DEF); - - /* Move connection to ULP mode. */ - ic->ic_socket->so_options |= SO_NO_DDP; - toep->ulp_mode = ULP_MODE_ISCSI; - toep->ulpcb = isock; - ic->ic_ofld_prv0 = isock; - - return (cxgbei_set_ulp_mode(toep, ic->ic_header_crc32c, ic->ic_data_crc32c)); -} - -int -cxgbei_conn_close(struct icl_conn *ic) -{ - struct iscsi_socket *isock = ic->ic_ofld_prv0; - struct toepcb *toep = isock->toep; - struct mbuf *m; - struct ulp_mbuf_cb *cb; - struct icl_pdu *req; - - MPASS(isock != NULL); - - /* free isock Qs */ - /* - * XXXNP: some drained with lock held, some without. And the test for - * whether the lock has even been initialized is after it has been - * grabbed and released already. - * - * An even larger issue is whether the TCP connection is going down - * gracefully or not. Can't simply throw away stuff in send/rcv buffers - * if the TCP shutdown is supposed to be graceful. - */ - mbufq_drain(&isock->iscsi_rcvq); - mbufq_drain(&isock->ulp2_writeq); - - mtx_lock(&isock->ulp2_wrq_lock); - while ((m = mbufq_dequeue(&isock->ulp2_wrq)) != NULL) { - cb = find_ulp_mbuf_cb(m); - if (cb && cb->pdu) { - req = (struct icl_pdu *)cb->pdu; - req->ip_bhs_mbuf = NULL; - icl_pdu_free(req); - } - m_freem(m); - } - mtx_unlock(&isock->ulp2_wrq_lock); - - if (mtx_initialized(&isock->iscsi_rcvq_lock)) - mtx_destroy(&isock->iscsi_rcvq_lock); - - if (mtx_initialized(&isock->ulp2_wrq_lock)) - mtx_destroy(&isock->ulp2_wrq_lock); - - if (mtx_initialized(&isock->ulp2_writeq_lock)) - mtx_destroy(&isock->ulp2_writeq_lock); - - /* XXXNP: Should the ulpcb and ulp_mode be cleared here? */ - toep->ulp_mode = ULP_MODE_NONE; /* dubious without inp lock */ - - free(isock, M_CXGBE); - - return (0); -} - static int cxgbei_activate(struct adapter *sc) { struct cxgbei_data *ci; int rc; ASSERT_SYNCHRONIZED_OP(sc); if (uld_active(sc, ULD_ISCSI)) { KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p", __func__, sc)); return (0); } if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) { device_printf(sc->dev, "not iSCSI offload capable, or capability disabled.\n"); return (ENOSYS); } /* per-adapter softc for iSCSI */ ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_NOWAIT); if (ci == NULL) return (ENOMEM); rc = cxgbei_ddp_init(sc, ci); if (rc != 0) { free(ci, M_CXGBE); return (rc); } t4_register_cpl_handler_with_tom(sc); sc->iscsi_softc = ci; return (0); } static int cxgbei_deactivate(struct adapter *sc) { ASSERT_SYNCHRONIZED_OP(sc); if (sc->iscsi_softc != NULL) { cxgbei_ddp_cleanup(sc->iscsi_softc); t4_unregister_cpl_handler_with_tom(sc); free(sc->iscsi_softc, M_CXGBE); sc->iscsi_softc = NULL; } return (0); } static void cxgbei_activate_all(struct adapter *sc, void *arg __unused) { if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0) return; /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */ if (sc->offload_map && !uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); end_synchronized_op(sc, 0); } static void cxgbei_deactivate_all(struct adapter *sc, void *arg __unused) { if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0) return; if (uld_active(sc, ULD_ISCSI)) (void) t4_deactivate_uld(sc, ULD_ISCSI); end_synchronized_op(sc, 0); } static struct uld_info cxgbei_uld_info = { .uld_id = ULD_ISCSI, .activate = cxgbei_activate, .deactivate = cxgbei_deactivate, }; enum { CWT_RUNNING = 1, CWT_STOP = 2, CWT_STOPPED = 3, }; struct cxgbei_worker_thread_softc { struct mtx cwt_lock; struct cv cwt_cv; volatile int cwt_state; } __aligned(CACHE_LINE_SIZE); int worker_thread_count; static struct cxgbei_worker_thread_softc *cwt_softc; static struct proc *cxgbei_proc; static void cwt_main(void *arg) { struct cxgbei_worker_thread_softc *cwt = arg; MPASS(cwt != NULL); mtx_lock(&cwt->cwt_lock); MPASS(cwt->cwt_state == 0); cwt->cwt_state = CWT_RUNNING; cv_signal(&cwt->cwt_cv); for (;;) { cv_wait(&cwt->cwt_cv, &cwt->cwt_lock); if (cwt->cwt_state == CWT_STOP) break; } mtx_assert(&cwt->cwt_lock, MA_OWNED); cwt->cwt_state = CWT_STOPPED; cv_signal(&cwt->cwt_cv); mtx_unlock(&cwt->cwt_lock); kthread_exit(); } static int start_worker_threads(void) { int i, rc; struct cxgbei_worker_thread_softc *cwt; worker_thread_count = min(mp_ncpus, 32); cwt_softc = malloc(worker_thread_count * sizeof(*cwt), M_CXGBE, M_WAITOK | M_ZERO); MPASS(cxgbei_proc == NULL); for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) { mtx_init(&cwt->cwt_lock, "cwt lock", NULL, MTX_DEF); cv_init(&cwt->cwt_cv, "cwt cv"); rc = kproc_kthread_add(cwt_main, cwt, &cxgbei_proc, NULL, 0, 0, "cxgbei", "%d", i); if (rc != 0) { printf("cxgbei: failed to start thread #%d/%d (%d)\n", i + 1, worker_thread_count, rc); mtx_destroy(&cwt->cwt_lock); cv_destroy(&cwt->cwt_cv); bzero(&cwt, sizeof(*cwt)); if (i == 0) { free(cwt_softc, M_CXGBE); worker_thread_count = 0; return (rc); } /* Not fatal, carry on with fewer threads. */ worker_thread_count = i; rc = 0; break; } /* Wait for thread to start before moving on to the next one. */ mtx_lock(&cwt->cwt_lock); while (cwt->cwt_state != CWT_RUNNING) cv_wait(&cwt->cwt_cv, &cwt->cwt_lock); mtx_unlock(&cwt->cwt_lock); } MPASS(cwt_softc != NULL); MPASS(worker_thread_count > 0); return (0); } static void stop_worker_threads(void) { int i; struct cxgbei_worker_thread_softc *cwt = &cwt_softc[0]; MPASS(worker_thread_count >= 0); for (i = 0, cwt = &cwt_softc[0]; i < worker_thread_count; i++, cwt++) { mtx_lock(&cwt->cwt_lock); MPASS(cwt->cwt_state == CWT_RUNNING); cwt->cwt_state = CWT_STOP; cv_signal(&cwt->cwt_cv); do { cv_wait(&cwt->cwt_cv, &cwt->cwt_lock); } while (cwt->cwt_state != CWT_STOPPED); mtx_unlock(&cwt->cwt_lock); } free(cwt_softc, M_CXGBE); } -extern void (*cxgbei_fw4_ack)(struct toepcb *, int); -extern void (*cxgbei_rx_data_ddp)(struct toepcb *, - const struct cpl_rx_data_ddp *); -extern struct mbuf *(*cxgbei_writeq_len)(struct toepcb *, int *); -extern struct mbuf *(*cxgbei_writeq_next)(struct toepcb *); - static int cxgbei_mod_load(void) { int rc; - - cxgbei_fw4_ack = drop_fw_acked_ulp_data; - cxgbei_rx_data_ddp = process_rx_data_ddp; - cxgbei_writeq_len = get_writeq_len; - cxgbei_writeq_next = do_writeq_next; rc = start_worker_threads(); if (rc != 0) return (rc); rc = t4_register_uld(&cxgbei_uld_info); if (rc != 0) { stop_worker_threads(); return (rc); } t4_iterate(cxgbei_activate_all, NULL); return (rc); } static int cxgbei_mod_unload(void) { t4_iterate(cxgbei_deactivate_all, NULL); if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY) return (EBUSY); stop_worker_threads(); return (0); } static int cxgbei_modevent(module_t mod, int cmd, void *arg) { int rc = 0; switch (cmd) { case MOD_LOAD: rc = cxgbei_mod_load(); break; case MOD_UNLOAD: rc = cxgbei_mod_unload(); break; default: rc = EINVAL; } return (rc); } static moduledata_t cxgbei_mod = { "cxgbei", cxgbei_modevent, NULL, }; MODULE_VERSION(cxgbei, 1); DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1); MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1); MODULE_DEPEND(cxgbei, icl, 1, 1, 1); Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.h =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.h (revision 290971) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.h (revision 290972) @@ -1,154 +1,142 @@ /*- * Copyright (c) 2012, 2015 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef __CXGBEI_OFLD_H__ #define __CXGBEI_OFLD_H__ #include -struct iscsi_socket { - u_char s_dcrc_len; - void *s_conn; /* ic_conn pointer */ - struct toepcb *toep; +#define CXGBEI_CONN_SIGNATURE 0x56788765 - /* - * XXXNP: locks on the same line. - * XXXNP: are the locks even needed? Why not use so_snd/so_rcv mtx to - * guard the write and rcv queues? - */ - struct mbufq iscsi_rcvq; /* rx - ULP mbufs */ - struct mtx iscsi_rcvq_lock; - - struct mbufq ulp2_writeq; /* tx - ULP mbufs */ - struct mtx ulp2_writeq_lock; - - struct mbufq ulp2_wrq; /* tx wr- ULP mbufs */ - struct mtx ulp2_wrq_lock; - - struct mbuf *mbuf_ulp_lhdr; - struct mbuf *mbuf_ulp_ldata; -}; - struct icl_cxgbei_conn { struct icl_conn ic; /* cxgbei specific stuff goes here. */ uint32_t icc_signature; + int ulp_submode; + struct adapter *sc; + struct toepcb *toep; + + /* PDU currently being assembled. */ + /* XXXNP: maybe just use ic->ic_receive_pdu instead? */ + struct icl_cxgbei_pdu *icp; + uint32_t pdu_seq; /* For debug only */ }; +static inline struct icl_cxgbei_conn * +ic_to_icc(struct icl_conn *ic) +{ + + return (__containerof(ic, struct icl_cxgbei_conn, ic)); +} + +#define CXGBEI_PDU_SIGNATURE 0x12344321 + struct icl_cxgbei_pdu { struct icl_pdu ip; /* cxgbei specific stuff goes here. */ uint32_t icp_signature; + u_int pdu_flags; }; +static inline struct icl_cxgbei_pdu * +ip_to_icp(struct icl_pdu *ip) +{ + + return (__containerof(ip, struct icl_cxgbei_pdu, ip)); +} + struct cxgbei_sgl { int sg_flag; void *sg_addr; void *sg_dma_addr; size_t sg_offset; size_t sg_length; }; #define cxgbei_scsi_for_each_sg(_sgl, _sgel, _n, _i) \ for (_i = 0, _sgel = (cxgbei_sgl*) (_sgl); _i < _n; _i++, \ _sgel++) #define sg_dma_addr(_sgel) _sgel->sg_dma_addr #define sg_virt(_sgel) _sgel->sg_addr #define sg_len(_sgel) _sgel->sg_length #define sg_off(_sgel) _sgel->sg_offset #define sg_next(_sgel) _sgel + 1 #define SBUF_ULP_FLAG_HDR_RCVD 0x1 #define SBUF_ULP_FLAG_DATA_RCVD 0x2 #define SBUF_ULP_FLAG_STATUS_RCVD 0x4 #define SBUF_ULP_FLAG_HCRC_ERROR 0x10 #define SBUF_ULP_FLAG_DCRC_ERROR 0x20 #define SBUF_ULP_FLAG_PAD_ERROR 0x40 #define SBUF_ULP_FLAG_DATA_DDPED 0x80 -/* - * Similar to tcp_skb_cb but with ULP elements added to support DDP, iSCSI, - * etc. - */ -struct ulp_mbuf_cb { - uint8_t ulp_mode; /* ULP mode/submode of sk_buff */ - uint8_t flags; /* TCP-like flags */ - uint32_t ddigest; /* ULP rx_data_ddp selected field*/ - uint32_t pdulen; /* ULP rx_data_ddp selected field*/ - void *pdu; /* pdu pointer */ -}; - /* private data for each scsi task */ struct cxgbei_task_data { struct cxgbei_sgl sgl[256]; u_int nsge; u_int sc_ddp_tag; }; struct cxgbei_ulp2_tag_format { u_char sw_bits; u_char rsvd_bits; u_char rsvd_shift; u_char filler[1]; uint32_t rsvd_mask; }; struct cxgbei_data { u_int max_txsz; u_int max_rxsz; u_int llimit; u_int ulimit; u_int nppods; u_int idx_last; u_char idx_bits; uint32_t idx_mask; uint32_t rsvd_tag_mask; struct mtx map_lock; bus_dma_tag_t ulp_ddp_tag; unsigned char *colors; struct cxgbei_ulp2_gather_list **gl_map; struct cxgbei_ulp2_tag_format tag_format; }; -struct ulp_mbuf_cb *get_ulp_mbuf_cb(struct mbuf *); -int cxgbei_conn_handoff(struct icl_conn *); -int cxgbei_conn_close(struct icl_conn *); void cxgbei_conn_task_reserve_itt(void *, void **, void *, unsigned int *); void cxgbei_conn_transfer_reserve_ttt(void *, void **, void *, unsigned int *); void cxgbei_cleanup_task(void *, void *); -int cxgbei_conn_xmit_pdu(struct icl_conn *, struct icl_pdu *); struct cxgbei_ulp2_pagepod_hdr; int t4_ddp_set_map(struct cxgbei_data *, void *, struct cxgbei_ulp2_pagepod_hdr *, u_int, u_int, struct cxgbei_ulp2_gather_list *, int); void t4_ddp_clear_map(struct cxgbei_data *, struct cxgbei_ulp2_gather_list *, - u_int, u_int, u_int, struct iscsi_socket *); + u_int, u_int, u_int, struct icl_cxgbei_conn *); #endif Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c (revision 290971) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c (revision 290972) @@ -1,413 +1,415 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * cxgbei_ulp2_ddp.c: Chelsio iSCSI DDP Manager. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include -#include -#include /* for PCIE_MEM_ACCESS */ -#include +#include +#include +#include "common/common.h" +#include "common/t4_msg.h" +#include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */ +#include "tom/t4_tom.h" #include "cxgbei.h" #include "cxgbei_ulp2_ddp.h" /* * Map a single buffer address. */ static void ulp2_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *ba = arg; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); *ba = segs->ds_addr; } /* * iSCSI Direct Data Placement * * T4/5 ulp2 h/w can directly place the iSCSI Data-In or Data-Out PDU's * payload into pre-posted final destination host-memory buffers based on the * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) in Data-Out * PDUs. * * The host memory address is programmed into h/w in the format of pagepod * entries. * The location of the pagepod entry is encoded into ddp tag which is used or * is the base for ITT/TTT. */ static inline int ddp_find_unused_entries(struct cxgbei_data *ci, u_int start, u_int max, u_int count, u_int *idx, struct cxgbei_ulp2_gather_list *gl) { unsigned int i, j, k; /* not enough entries */ if (max - start < count) return (EBUSY); max -= count; mtx_lock(&ci->map_lock); for (i = start; i < max;) { for (j = 0, k = i; j < count; j++, k++) { if (ci->gl_map[k]) break; } if (j == count) { for (j = 0, k = i; j < count; j++, k++) ci->gl_map[k] = gl; mtx_unlock(&ci->map_lock); *idx = i; return (0); } i += j + 1; } mtx_unlock(&ci->map_lock); return (EBUSY); } static inline void ddp_unmark_entries(struct cxgbei_data *ci, u_int start, u_int count) { mtx_lock(&ci->map_lock); memset(&ci->gl_map[start], 0, count * sizeof(struct cxgbei_ulp2_gather_list *)); mtx_unlock(&ci->map_lock); } static inline void ddp_gl_unmap(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl) { int i; if (!gl->pages[0]) return; for (i = 0; i < gl->nelem; i++) { bus_dmamap_unload(ci->ulp_ddp_tag, gl->dma_sg[i].bus_map); bus_dmamap_destroy(ci->ulp_ddp_tag, gl->dma_sg[i].bus_map); } } static inline int ddp_gl_map(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl) { int i, rc; bus_addr_t pa; MPASS(ci != NULL); mtx_lock(&ci->map_lock); for (i = 0; i < gl->nelem; i++) { rc = bus_dmamap_create(ci->ulp_ddp_tag, 0, &gl->dma_sg[i].bus_map); if (rc != 0) goto unmap; rc = bus_dmamap_load(ci->ulp_ddp_tag, gl->dma_sg[i].bus_map, gl->pages[i], PAGE_SIZE, ulp2_dma_map_addr, &pa, BUS_DMA_NOWAIT); if (rc != 0) goto unmap; gl->dma_sg[i].phys_addr = pa; } mtx_unlock(&ci->map_lock); return (0); unmap: if (i) { u_int nelem = gl->nelem; gl->nelem = i; ddp_gl_unmap(ci, gl); gl->nelem = nelem; } return (ENOMEM); } /** * cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec - build ddp page buffer list * @xferlen: total buffer length * @sgl: page buffer scatter-gather list (struct cxgbei_sgl) * @sgcnt: # of page buffers * @gfp: allocation mode * * construct a ddp page buffer list from the scsi scattergather list. * coalesce buffers as much as possible, and obtain dma addresses for * each page. * * Return the cxgbei_ulp2_gather_list constructed from the page buffers if the * memory can be used for ddp. Return NULL otherwise. */ struct cxgbei_ulp2_gather_list * cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(u_int xferlen, struct cxgbei_sgl *sgl, u_int sgcnt, struct cxgbei_data *ci, int gfp) { struct cxgbei_ulp2_gather_list *gl; struct cxgbei_sgl *sg = sgl; void *sgpage = (void *)((u64)sg->sg_addr & (~PAGE_MASK)); unsigned int sglen = sg->sg_length; unsigned int sgoffset = (u64)sg->sg_addr & PAGE_MASK; unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT; int i = 1, j = 0; if (xferlen <= DDP_THRESHOLD) { CTR2(KTR_CXGBE, "xfer %u < threshold %u, no ddp.", xferlen, DDP_THRESHOLD); return NULL; } gl = malloc(sizeof(struct cxgbei_ulp2_gather_list) + npages * (sizeof(struct dma_segments) + sizeof(void *)), M_DEVBUF, M_NOWAIT | M_ZERO); if (gl == NULL) return (NULL); gl->pages = (void **)&gl->dma_sg[npages]; gl->length = xferlen; gl->offset = sgoffset; gl->pages[0] = sgpage; CTR6(KTR_CXGBE, "%s: xferlen:0x%x len:0x%x off:0x%x sg_addr:%p npages:%d", __func__, xferlen, gl->length, gl->offset, sg->sg_addr, npages); for (i = 1, sg = sg_next(sg); i < sgcnt; i++, sg = sg_next(sg)) { void *page = sg->sg_addr; if (sgpage == page && sg->sg_offset == sgoffset + sglen) sglen += sg->sg_length; else { /* make sure the sgl is fit for ddp: * each has the same page size, and * all of the middle pages are used completely */ if ((j && sgoffset) || ((i != sgcnt - 1) && ((sglen + sgoffset) & ~CXGBEI_PAGE_MASK))){ goto error_out; } j++; if (j == gl->nelem || sg->sg_offset) { goto error_out; } gl->pages[j] = page; sglen = sg->sg_length; sgoffset = sg->sg_offset; sgpage = page; } } gl->nelem = ++j; if (ddp_gl_map(ci, gl) < 0) goto error_out; return gl; error_out: free(gl, M_DEVBUF); return NULL; } /** * cxgbei_ulp2_ddp_release_gl - release a page buffer list * @gl: a ddp page buffer list * @pdev: pci_dev used for pci_unmap * free a ddp page buffer list resulted from cxgbei_ulp2_ddp_make_gl(). */ void cxgbei_ulp2_ddp_release_gl(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl) { ddp_gl_unmap(ci, gl); free(gl, M_DEVBUF); } /** * cxgbei_ulp2_ddp_tag_reserve - set up ddp for a data transfer * @ci: adapter's ddp info * @tid: connection id * @tformat: tag format * @tagp: contains s/w tag initially, will be updated with ddp/hw tag * @gl: the page momory list * @gfp: allocation mode * * ddp setup for a given page buffer list and construct the ddp tag. * return 0 if success, < 0 otherwise. */ int -cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_data *ci, void *isock, u_int tid, +cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_data *ci, void *icc, u_int tid, struct cxgbei_ulp2_tag_format *tformat, u32 *tagp, struct cxgbei_ulp2_gather_list *gl, int gfp, int reply) { struct cxgbei_ulp2_pagepod_hdr hdr; u_int npods, idx; int rc; u32 sw_tag = *tagp; u32 tag; MPASS(ci != NULL); if (!gl || !gl->nelem || gl->length < DDP_THRESHOLD) return (EINVAL); npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; if (ci->idx_last == ci->nppods) rc = ddp_find_unused_entries(ci, 0, ci->nppods, npods, &idx, gl); else { rc = ddp_find_unused_entries(ci, ci->idx_last + 1, ci->nppods, npods, &idx, gl); if (rc && ci->idx_last >= npods) { rc = ddp_find_unused_entries(ci, 0, min(ci->idx_last + npods, ci->nppods), npods, &idx, gl); } } if (rc) { CTR3(KTR_CXGBE, "xferlen %u, gl %u, npods %u NO DDP.", gl->length, gl->nelem, npods); return (rc); } tag = cxgbei_ulp2_ddp_tag_base(idx, ci->colors, tformat, sw_tag); CTR4(KTR_CXGBE, "%s: sw_tag:0x%x idx:0x%x tag:0x%x", __func__, sw_tag, idx, tag); hdr.rsvd = 0; hdr.vld_tid = htonl(F_IPPOD_VALID | V_IPPOD_TID(tid)); hdr.pgsz_tag_clr = htonl(tag & ci->rsvd_tag_mask); hdr.maxoffset = htonl(gl->length); hdr.pgoffset = htonl(gl->offset); - rc = t4_ddp_set_map(ci, isock, &hdr, idx, npods, gl, reply); + rc = t4_ddp_set_map(ci, icc, &hdr, idx, npods, gl, reply); if (rc < 0) goto unmark_entries; ci->idx_last = idx; *tagp = tag; return (0); unmark_entries: ddp_unmark_entries(ci, idx, npods); return (rc); } /** * cxgbei_ulp2_ddp_tag_release - release a ddp tag * @ci: adapter's ddp info * @tag: ddp tag * ddp cleanup for a given ddp tag and release all the resources held */ void cxgbei_ulp2_ddp_tag_release(struct cxgbei_data *ci, uint32_t tag, - struct iscsi_socket *isock) + struct icl_cxgbei_conn *icc) { uint32_t idx; MPASS(ci != NULL); - MPASS(isock != NULL); + MPASS(icc != NULL); idx = (tag >> IPPOD_IDX_SHIFT) & ci->idx_mask; CTR3(KTR_CXGBE, "tag:0x%x idx:0x%x nppods:0x%x", tag, idx, ci->nppods); if (idx < ci->nppods) { struct cxgbei_ulp2_gather_list *gl = ci->gl_map[idx]; unsigned int npods; if (!gl || !gl->nelem) { CTR4(KTR_CXGBE, "release 0x%x, idx 0x%x, gl 0x%p, %u.", tag, idx, gl, gl ? gl->nelem : 0); return; } npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; CTR3(KTR_CXGBE, "ddp tag 0x%x, release idx 0x%x, npods %u.", tag, idx, npods); - t4_ddp_clear_map(ci, gl, tag, idx, npods, isock); + t4_ddp_clear_map(ci, gl, tag, idx, npods, icc); ddp_unmark_entries(ci, idx, npods); cxgbei_ulp2_ddp_release_gl(ci, gl); } else CTR3(KTR_CXGBE, "ddp tag 0x%x, idx 0x%x > max 0x%x.", tag, idx, ci->nppods); } /** * cxgbei_ddp_cleanup - release the adapter's ddp resources */ void cxgbei_ddp_cleanup(struct cxgbei_data *ci) { int i = 0; while (i < ci->nppods) { struct cxgbei_ulp2_gather_list *gl = ci->gl_map[i]; if (gl) { int npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; free(gl, M_DEVBUF); i += npods; } else i++; } free(ci->colors, M_CXGBE); free(ci->gl_map, M_CXGBE); } Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.h =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.h (revision 290971) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.h (revision 290972) @@ -1,214 +1,214 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * cxgbei_ulp2_ddp.c: Chelsio iSCSI DDP Manager. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef __CXGBEI_ULP2_DDP_H__ #define __CXGBEI_ULP2_DDP_H__ #define CXGBEI_PAGE_MASK (~(PAGE_SIZE-1)) #define DDP_THRESHOLD 2048 /* * cxgbei ddp tag are 32 bits, it consists of reserved bits used by h/w and * non-reserved bits that can be used by the iscsi s/w. * The reserved bits are identified by the rsvd_bits and rsvd_shift fields * in struct cxgbei_ulp2_tag_format. * * The upper most reserved bit can be used to check if a tag is ddp tag or not: * if the bit is 0, the tag is a valid ddp tag */ /* * cxgbei_ulp2_is_ddp_tag - check if a given tag is a hw/ddp tag * @tformat: tag format information * @tag: tag to be checked * * return true if the tag is a ddp tag, false otherwise. */ static inline int cxgbei_ulp2_is_ddp_tag(struct cxgbei_ulp2_tag_format *tformat, uint32_t tag) { return (!(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)))); } /* * cxgbei_ulp2_sw_tag_usable - check if s/w tag has enough bits left for hw bits * @tformat: tag format information * @sw_tag: s/w tag to be checked * * return true if the tag can be used for hw ddp tag, false otherwise. */ static inline int cxgbei_ulp2_sw_tag_usable(struct cxgbei_ulp2_tag_format *tformat, uint32_t sw_tag) { return (1); /* XXXNP: huh? */ sw_tag >>= (32 - tformat->rsvd_bits + tformat->rsvd_shift); return !sw_tag; } /* * cxgbei_ulp2_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag * @tformat: tag format information * @sw_tag: s/w tag to be checked * * insert 1 at the upper most reserved bit to mark it as an invalid ddp tag. */ static inline uint32_t cxgbei_ulp2_set_non_ddp_tag(struct cxgbei_ulp2_tag_format *tformat, uint32_t sw_tag) { uint32_t rsvd_bits = tformat->rsvd_bits + tformat->rsvd_shift; if (sw_tag) { u32 v1 = sw_tag & ((1 << (rsvd_bits - 1)) - 1); u32 v2 = (sw_tag >> (rsvd_bits - 1)) << rsvd_bits; return v2 | (1 << (rsvd_bits - 1)) | v1; } return sw_tag | (1 << (rsvd_bits - 1)) ; } struct dma_segments { bus_dmamap_t bus_map; bus_addr_t phys_addr; }; /* * struct cxgbei_ulp2_gather_list - cxgbei direct data placement memory * * @tag: ddp tag * @length: total data buffer length * @offset: initial offset to the 1st page * @nelem: # of pages * @pages: page pointers * @phys_addr: physical address */ struct cxgbei_ulp2_gather_list { uint32_t tag; uint32_t tid; uint32_t port_id; void *egress_dev; unsigned int length; unsigned int offset; unsigned int nelem; bus_size_t mapsize; bus_dmamap_t bus_map; bus_dma_segment_t *segments; void **pages; struct dma_segments dma_sg[0]; }; #define IPPOD_SIZE sizeof(struct cxgbei_ulp2_pagepod) /* 64 */ #define IPPOD_SIZE_SHIFT 6 #define IPPOD_COLOR_SHIFT 0 #define IPPOD_COLOR_SIZE 6 #define IPPOD_COLOR_MASK ((1 << IPPOD_COLOR_SIZE) - 1) #define IPPOD_IDX_SHIFT IPPOD_COLOR_SIZE #define IPPOD_IDX_MAX_SIZE 24 #define S_IPPOD_TID 0 #define M_IPPOD_TID 0xFFFFFF #define V_IPPOD_TID(x) ((x) << S_IPPOD_TID) #define S_IPPOD_VALID 24 #define V_IPPOD_VALID(x) ((x) << S_IPPOD_VALID) #define F_IPPOD_VALID V_IPPOD_VALID(1U) #define S_IPPOD_COLOR 0 #define M_IPPOD_COLOR 0x3F #define V_IPPOD_COLOR(x) ((x) << S_IPPOD_COLOR) #define S_IPPOD_TAG 6 #define M_IPPOD_TAG 0xFFFFFF #define V_IPPOD_TAG(x) ((x) << S_IPPOD_TAG) #define S_IPPOD_PGSZ 30 #define M_IPPOD_PGSZ 0x3 #define V_IPPOD_PGSZ(x) ((x) << S_IPPOD_PGSZ) static inline uint32_t cxgbei_ulp2_ddp_tag_base(u_int idx, u_char *colors, struct cxgbei_ulp2_tag_format *tformat, uint32_t sw_tag) { if (__predict_false(++colors[idx] == 1 << IPPOD_IDX_SHIFT)) colors[idx] = 0; sw_tag <<= tformat->rsvd_bits + tformat->rsvd_shift; return (sw_tag | idx << IPPOD_IDX_SHIFT | colors[idx]); } #define ISCSI_PDU_NONPAYLOAD_LEN 312 /* bhs(48) + ahs(256) + digest(8) */ /* * align pdu size to multiple of 512 for better performance */ #define cxgbei_align_pdu_size(n) do { n = (n) & (~511); } while (0) #define ULP2_MAX_PKT_SIZE 16224 #define ULP2_MAX_PDU_PAYLOAD (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_LEN) #define IPPOD_PAGES_MAX 4 #define IPPOD_PAGES_SHIFT 2 /* 4 pages per pod */ /* * struct pagepod_hdr, pagepod - pagepod format */ struct cxgbei_ulp2_pagepod_hdr { uint32_t vld_tid; uint32_t pgsz_tag_clr; uint32_t maxoffset; uint32_t pgoffset; uint64_t rsvd; }; struct cxgbei_ulp2_pagepod { struct cxgbei_ulp2_pagepod_hdr hdr; uint64_t addr[IPPOD_PAGES_MAX + 1]; }; int cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_data *, void *, unsigned int, struct cxgbei_ulp2_tag_format *, uint32_t *, struct cxgbei_ulp2_gather_list *, int , int ); void cxgbei_ulp2_ddp_tag_release(struct cxgbei_data *, uint32_t, - struct iscsi_socket *); + struct icl_cxgbei_conn *); struct cxgbei_ulp2_gather_list *cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(u_int, struct cxgbei_sgl *, u_int, struct cxgbei_data *, int); void cxgbei_ulp2_ddp_release_gl(struct cxgbei_data *, struct cxgbei_ulp2_gather_list *); int cxgbei_ulp2_ddp_find_page_index(u_long); int cxgbei_ulp2_adapter_ddp_info(struct cxgbei_data *, struct cxgbei_ulp2_tag_format *); void cxgbei_ddp_cleanup(struct cxgbei_data *); #endif Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 290971) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 290972) @@ -1,813 +1,798 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * Copyright (c) 2015 Chelsio Communications, Inc. * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * cxgbei implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include +#include +#include #include #include #include #include "common/common.h" +#include "tom/t4_tom.h" #include "cxgbei.h" SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD, 0, "Chelsio iSCSI offload"); static int coalesce = 1; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 128 * 1024; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); -static uma_zone_t icl_cxgbei_pdu_zone; static uma_zone_t icl_transfer_zone; static volatile u_int icl_cxgbei_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; static icl_conn_pdu_free_t icl_cxgbei_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_cxgbei_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; static icl_conn_handoff_t icl_cxgbei_conn_handoff; static icl_conn_free_t icl_cxgbei_conn_free; static icl_conn_close_t icl_cxgbei_conn_close; static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; static icl_conn_task_done_t icl_cxgbei_conn_task_done; static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; static kobj_method_t icl_cxgbei_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_cxgbei_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), { 0, 0 } }; DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); -struct icl_pdu * icl_pdu_new_empty(struct icl_conn *ic, int flags); -void icl_pdu_free(struct icl_pdu *ip); +/* + * Subtract another 256 for AHS from MAX_DSL if AHS could be used. + */ +#define CXGBEI_MAX_PDU 16224 +#define CXGBEI_MAX_DSL (CXGBEI_MAX_PDU - sizeof(struct iscsi_bhs) - 8) -#define CXGBEI_PDU_SIGNATURE 0x12344321 - -struct icl_pdu * -icl_pdu_new_empty(struct icl_conn *ic, int flags) +void +icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { - struct icl_cxgbei_pdu *icp; - struct icl_pdu *ip; - -#ifdef DIAGNOSTIC - refcount_acquire(&ic->ic_outstanding_pdus); +#ifdef INVARIANTS + struct icl_cxgbei_pdu *icp = ip_to_icp(ip); #endif - icp = uma_zalloc(icl_cxgbei_pdu_zone, flags | M_ZERO); - if (icp == NULL) { -#ifdef DIAGNOSTIC - refcount_release(&ic->ic_outstanding_pdus); -#endif - return (NULL); - } - icp->icp_signature = CXGBEI_PDU_SIGNATURE; - ip = &icp->ip; - ip->ip_conn = ic; - - return (ip); -} - -void -icl_pdu_free(struct icl_pdu *ip) -{ - struct icl_conn *ic; - struct icl_cxgbei_pdu *icp; - - icp = (void *)ip; MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); - ic = ip->ip_conn; + MPASS(ic == ip->ip_conn); + MPASS(ip->ip_bhs_mbuf != NULL); - m_freem(ip->ip_bhs_mbuf); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); - uma_zfree(icl_cxgbei_pdu_zone, ip); + m_freem(ip->ip_bhs_mbuf); /* storage for icl_cxgbei_pdu itself */ + #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif } -void -icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) -{ - - icl_pdu_free(ip); -} - /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) { + struct icl_cxgbei_pdu *icp; struct icl_pdu *ip; + struct mbuf *m; + uintptr_t a; - ip = icl_pdu_new_empty(ic, flags); - if (ip == NULL) + m = m_gethdr(flags, MT_DATA); + if (m == NULL) return (NULL); - ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), - flags, MT_DATA, M_PKTHDR); - if (ip->ip_bhs_mbuf == NULL) { - ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); - icl_pdu_free(ip); - return (NULL); - } - ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); - memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); - ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); + a = roundup2(mtod(m, uintptr_t), _Alignof(struct icl_cxgbei_pdu)); + icp = (struct icl_cxgbei_pdu *)a; + bzero(icp, sizeof(*icp)); + icp->icp_signature = CXGBEI_PDU_SIGNATURE; + ip = &icp->ip; + ip->ip_conn = ic; + ip->ip_bhs_mbuf = m; + + a = roundup2((uintptr_t)(icp + 1), _Alignof(struct iscsi_bhs *)); + ip->ip_bhs = (struct iscsi_bhs *)a; +#ifdef INVARIANTS + /* Everything must fit entirely in the mbuf. */ + a = (uintptr_t)(ip->ip_bhs + 1); + MPASS(a <= (uintptr_t)m + MSIZE); +#endif + bzero(ip->ip_bhs, sizeof(*ip->ip_bhs)); + + m->m_data = (void *)ip->ip_bhs; + m->m_len = sizeof(struct iscsi_bhs); + m->m_pkthdr.len = m->m_len; + + +#ifdef DIAGNOSTIC + refcount_acquire(&ic->ic_outstanding_pdus); +#endif return (ip); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } -static void -icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) -{ - - response->ip_bhs->bhs_data_segment_len[2] = len; - response->ip_bhs->bhs_data_segment_len[1] = len >> 8; - response->ip_bhs->bhs_data_segment_len[0] = len >> 16; -} - -static size_t -icl_pdu_padding(const struct icl_pdu *ip) -{ - - if ((ip->ip_data_len % 4) != 0) - return (4 - (ip->ip_data_len % 4)); - - return (0); -} - -static size_t -icl_pdu_size(const struct icl_pdu *response) -{ - size_t len; - - KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); - - len = sizeof(struct iscsi_bhs) + response->ip_data_len + - icl_pdu_padding(response); - - return (len); -} - static uint32_t icl_conn_build_tasktag(struct icl_conn *ic, uint32_t tag) { return tag; } -static int -icl_soupcall_receive(struct socket *so, void *arg, int waitflag) +static struct mbuf * +finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) { - struct icl_conn *ic; + struct icl_pdu *ip = &icp->ip; + uint8_t ulp_submode, padding; + struct mbuf *m, *last; + struct iscsi_bhs *bhs; - if (!soreadable(so)) - return (SU_OK); + /* + * Fix up the data segment mbuf first. + */ + m = ip->ip_data_mbuf; + ulp_submode = icc->ulp_submode; + if (m) { + last = m_last(m); - ic = arg; - cv_signal(&ic->ic_receive_cv); - return (SU_OK); -} - -static int -icl_pdu_finalize(struct icl_pdu *request) -{ - size_t padding, pdu_len; - uint32_t zero = 0; - int ok; - struct icl_conn *ic; - - ic = request->ip_conn; - - icl_pdu_set_data_segment_length(request, request->ip_data_len); - - pdu_len = icl_pdu_size(request); - - if (request->ip_data_len != 0) { - padding = icl_pdu_padding(request); - if (padding > 0) { - ok = m_append(request->ip_data_mbuf, padding, - (void *)&zero); - if (ok != 1) { - ICL_WARN("failed to append padding"); - return (1); - } + /* + * Round up the data segment to a 4B boundary. Pad with 0 if + * necessary. There will definitely be room in the mbuf. + */ + padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; + if (padding) { + bzero(mtod(last, uint8_t *) + last->m_len, padding); + last->m_len += padding; } - - m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); - request->ip_data_mbuf = NULL; + } else { + MPASS(ip->ip_data_len == 0); + ulp_submode &= ~ULP_CRC_DATA; + padding = 0; } - request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; + /* + * Now the header mbuf that has the BHS. + */ + m = ip->ip_bhs_mbuf; + MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); + MPASS(m->m_len == sizeof(struct iscsi_bhs)); - return (0); -} + bhs = ip->ip_bhs; + bhs->bhs_data_segment_len[2] = ip->ip_data_len; + bhs->bhs_data_segment_len[1] = ip->ip_data_len >> 8; + bhs->bhs_data_segment_len[0] = ip->ip_data_len >> 16; -static int -icl_soupcall_send(struct socket *so, void *arg, int waitflag) -{ - struct icl_conn *ic; + /* "Convert" PDU to mbuf chain. Do not use icp/ip after this. */ + m->m_pkthdr.len = sizeof(struct iscsi_bhs) + ip->ip_data_len + padding; + m->m_next = ip->ip_data_mbuf; + set_mbuf_ulp_submode(m, ulp_submode); +#ifdef INVARIANTS + bzero(icp, sizeof(*icp)); +#endif +#ifdef DIAGNOSTIC + refcount_release(&icc->ic.ic_outstanding_pdus); +#endif - if (!sowriteable(so)) - return (SU_OK); - - ic = arg; - - ICL_CONN_LOCK(ic); - ic->ic_check_send_space = true; - ICL_CONN_UNLOCK(ic); - - cv_signal(&ic->ic_send_cv); - - return (SU_OK); + return (m); } -static int -icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, - int flags) +int +icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, + const void *addr, size_t len, int flags) { - struct mbuf *mb, *newmb; - size_t copylen, off = 0; + struct mbuf *m; +#ifdef INVARIANTS + struct icl_cxgbei_pdu *icp = ip_to_icp(ip); +#endif - KASSERT(len > 0, ("len == 0")); + MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); + MPASS(ic == ip->ip_conn); + KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); - newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); - if (newmb == NULL) { - ICL_WARN("failed to allocate mbuf for %zd bytes", len); - return (ENOMEM); - } + /* + * XXXNP: add assertions here, after fixing the problems around + * max_data_segment_length: + * a) len should not cause the max_data_segment_length to be exceeded. + * b) all data should fit in a single jumbo16. The hardware limit just + * happens to be within jumbo16 so this is very convenient. + */ - for (mb = newmb; mb != NULL; mb = mb->m_next) { - copylen = min(M_TRAILINGSPACE(mb), len - off); - memcpy(mtod(mb, char *), (const char *)addr + off, copylen); - mb->m_len = copylen; - off += copylen; + m = ip->ip_data_mbuf; + if (m == NULL) { + m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); + if (__predict_false(m == NULL)) + return (ENOMEM); + + ip->ip_data_mbuf = m; } - KASSERT(off == len, ("%s: off != len", __func__)); - if (request->ip_data_mbuf == NULL) { - request->ip_data_mbuf = newmb; - request->ip_data_len = len; + if (__predict_true(m_append(m, len, addr) != 0)) { + ip->ip_data_len += len; + MPASS(ip->ip_data_len <= CXGBEI_MAX_DSL); + return (0); } else { - m_cat(request->ip_data_mbuf, newmb); - request->ip_data_len += len; + if (flags & M_WAITOK) { + CXGBE_UNIMPLEMENTED("fail safe append"); + } + ip->ip_data_len = m_length(m, NULL); + return (1); } - - return (0); } -int -icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, - const void *addr, size_t len, int flags) -{ - - return (icl_pdu_append_data(request, addr, len, flags)); -} - -static void -icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) -{ - /* data is DDP'ed, no need to copy */ - if (ip->ip_ofld_prv0) return; - m_copydata(ip->ip_data_mbuf, off, len, addr); -} - void icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { + struct icl_cxgbei_pdu *icp = ip_to_icp(ip); - return (icl_pdu_get_data(ip, off, addr, len)); + if (icp->pdu_flags & SBUF_ULP_FLAG_DATA_DDPED) + return; /* data is DDP'ed, no need to copy */ + m_copydata(ip->ip_data_mbuf, off, len, addr); } -static void -icl_pdu_queue(struct icl_pdu *ip) +void +icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { - struct icl_conn *ic; + struct icl_cxgbei_conn *icc = ic_to_icc(ic); + struct icl_cxgbei_pdu *icp = ip_to_icp(ip); + struct socket *so = ic->ic_socket; + struct toepcb *toep = icc->toep; + struct inpcb *inp; + struct mbuf *m; - ic = ip->ip_conn; + MPASS(ic == ip->ip_conn); + MPASS(ip->ip_bhs_mbuf != NULL); + /* The kernel doesn't generate PDUs with AHS. */ + MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); ICL_CONN_LOCK_ASSERT(ic); - - if (ic->ic_disconnecting || ic->ic_socket == NULL) { - ICL_DEBUG("icl_pdu_queue on closed connection"); - icl_pdu_free(ip); + /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ + if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { + icl_cxgbei_conn_pdu_free(ic, ip); return; } - icl_pdu_finalize(ip); - cxgbei_conn_xmit_pdu(ic, ip); -} -void -icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) -{ + m = finalize_pdu(icc, icp); + M_ASSERTPKTHDR(m); + MPASS((m->m_pkthdr.len & 3) == 0); + MPASS(m->m_pkthdr.len + 8 <= CXGBEI_MAX_PDU); - icl_pdu_queue(ip); + /* + * Do not get inp from toep->inp as the toepcb might have detached + * already. + */ + inp = sotoinpcb(so); + INP_WLOCK(inp); + if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || + __predict_false((toep->flags & TPF_ATTACHED) == 0)) + m_freem(m); + else { + mbufq_enqueue(&toep->ulp_pduq, m); + t4_push_pdus(icc->sc, toep, 0); + } + INP_WUNLOCK(inp); } -#define CXGBEI_CONN_SIGNATURE 0x56788765 - static struct icl_conn * icl_cxgbei_new_conn(const char *name, struct mtx *lock) { struct icl_cxgbei_conn *icc; struct icl_conn *ic; refcount_acquire(&icl_cxgbei_ncons); icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, M_WAITOK | M_ZERO); icc->icc_signature = CXGBEI_CONN_SIGNATURE; ic = &icc->ic; - STAILQ_INIT(&ic->ic_to_send); ic->ic_lock = lock; + + /* XXXNP: review. Most of these icl_conn fields aren't really used */ + STAILQ_INIT(&ic->ic_to_send); cv_init(&ic->ic_send_cv, "icl_cxgbei_tx"); cv_init(&ic->ic_receive_cv, "icl_cxgbei_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif - ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; + ic->ic_max_data_segment_length = CXGBEI_MAX_DSL; ic->ic_name = name; ic->ic_offload = "cxgbei"; + CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); + return (ic); } void icl_cxgbei_conn_free(struct icl_conn *ic) { - struct icl_cxgbei_conn *icc = (void *)ic; + struct icl_cxgbei_conn *icc = ic_to_icc(ic); MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); + CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); + cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); + kobj_delete((struct kobj *)icc, M_CXGBE); refcount_release(&icl_cxgbei_ncons); } -/* XXXNP: what is this for? There's no conn_start method. */ static int -icl_conn_start(struct icl_conn *ic) +icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so) { size_t minspace; struct sockopt opt; int error, one = 1; - ICL_CONN_LOCK(ic); - /* - * XXX: Ugly hack. - */ - if (ic->ic_socket == NULL) { - ICL_CONN_UNLOCK(ic); - return (EINVAL); - } - - ic->ic_receive_state = ICL_CONN_STATE_BHS; - ic->ic_receive_len = sizeof(struct iscsi_bhs); - ic->ic_disconnecting = false; - - ICL_CONN_UNLOCK(ic); - - /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. * * What we should actually do here is to use autoscaling, but set * some minimal buffer size to "minspace". I don't know a way to do * that, though. */ minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; - if (sendspace < minspace) { - ICL_WARN("kern.icl.sendspace too low; must be at least %zd", - minspace); + if (sendspace < minspace) sendspace = minspace; - } - if (recvspace < minspace) { - ICL_WARN("kern.icl.recvspace too low; must be at least %zd", - minspace); + if (recvspace < minspace) recvspace = minspace; - } - error = soreserve(ic->ic_socket, sendspace, recvspace); + error = soreserve(so, sendspace, recvspace); if (error != 0) { - ICL_WARN("soreserve failed with error %d", error); icl_cxgbei_conn_close(ic); return (error); } - ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; - ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_flags |= SB_AUTOSIZE; + SOCKBUF_UNLOCK(&so->so_snd); + SOCKBUF_LOCK(&so->so_rcv); + so->so_rcv.sb_flags |= SB_AUTOSIZE; + SOCKBUF_UNLOCK(&so->so_rcv); /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); - error = sosetopt(ic->ic_socket, &opt); + error = sosetopt(so, &opt); if (error != 0) { - ICL_WARN("disabling TCP_NODELAY failed with error %d", error); icl_cxgbei_conn_close(ic); return (error); } - /* - * Register socket upcall, to get notified about incoming PDUs - * and free space to send outgoing ones. - */ - SOCKBUF_LOCK(&ic->ic_socket->so_snd); - soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); - SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); - SOCKBUF_LOCK(&ic->ic_socket->so_rcv); - soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); - SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); - return (0); } +/* + * Request/response structure used to find out the adapter offloading a socket. + */ +struct find_ofld_adapter_rr { + struct socket *so; + struct adapter *sc; /* result */ +}; + +static void +find_offload_adapter(struct adapter *sc, void *arg) +{ + struct find_ofld_adapter_rr *fa = arg; + struct socket *so = fa->so; + struct tom_data *td = sc->tom_softc; + struct tcpcb *tp; + struct inpcb *inp; + + /* Non-TCP were filtered out earlier. */ + MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); + + if (fa->sc != NULL) + return; /* Found already. */ + + if (td == NULL) + return; /* TOE not enabled on this adapter. */ + + inp = sotoinpcb(so); + INP_WLOCK(inp); + if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { + tp = intotcpcb(inp); + if (tp->t_flags & TF_TOE && tp->tod == &td->tod) + fa->sc = sc; /* Found. */ + } + INP_WUNLOCK(inp); +} + +static void +set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, int hcrc, int dcrc) +{ + uint64_t val = 0; + + if (hcrc) + val |= ULP_CRC_HEADER; + if (dcrc) + val |= ULP_CRC_DATA; + val <<= 4; + val |= ULP_MODE_ISCSI; + + CTR4(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, CRC hdr=%d data=%d", + __func__, toep->tid, hcrc, dcrc); + + t4_set_tcb_field(sc, toep, 1, 0, 0xfff, val); +} + +/* + * XXXNP: Who is responsible for cleaning up the socket if this returns with an + * error? Review all error paths. + * + * XXXNP: What happens to the socket's fd reference if the operation is + * successful, and how does that affect the socket's life cycle? + */ int icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) { + struct icl_cxgbei_conn *icc = ic_to_icc(ic); + struct find_ofld_adapter_rr fa; struct file *fp; struct socket *so; + struct inpcb *inp; + struct tcpcb *tp; + struct toepcb *toep; cap_rights_t rights; int error; + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; - if (so->so_type != SOCK_STREAM) { + if (so->so_type != SOCK_STREAM || + so->so_proto->pr_protocol != IPPROTO_TCP) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); - if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } - - ic->ic_socket = fp->f_data; + ic->ic_disconnecting = false; + ic->ic_socket = so; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); - error = icl_conn_start(ic); - if (!error) - cxgbei_conn_handoff(ic); + /* Find the adapter offloading this socket. */ + fa.sc = NULL; + fa.so = so; + t4_iterate(find_offload_adapter, &fa); + if (fa.sc == NULL) + return (EINVAL); + icc->sc = fa.sc; + error = icl_cxgbei_setsockopt(ic, so); + if (error) + return (error); + + inp = sotoinpcb(so); + INP_WLOCK(inp); + tp = intotcpcb(inp); + if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) + error = EBUSY; + else { + /* + * socket could not have been "unoffloaded" if here. + */ + MPASS(tp->t_flags & TF_TOE); + MPASS(tp->tod != NULL); + MPASS(tp->t_toe != NULL); + + toep = tp->t_toe; + icc->toep = toep; + icc->ulp_submode = 0; + if (ic->ic_header_crc32c) + icc->ulp_submode |= ULP_CRC_HEADER; + if (ic->ic_data_crc32c) + icc->ulp_submode |= ULP_CRC_DATA; + so->so_options |= SO_NO_DDP; + toep->ulp_mode = ULP_MODE_ISCSI; + toep->ulpcb = icc; + + set_ulp_mode_iscsi(icc->sc, toep, ic->ic_header_crc32c, + ic->ic_data_crc32c); + error = 0; + } + INP_WUNLOCK(inp); + return (error); } void icl_cxgbei_conn_close(struct icl_conn *ic) { - struct icl_pdu *pdu; + struct icl_cxgbei_conn *icc = ic_to_icc(ic); + struct socket *so; + struct toepcb *toep = icc->toep; + MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); + CTR3(KTR_CXGBE, "%s: tid %u, icc %p", __func__, toep->tid, icc); + ICL_CONN_LOCK(ic); - if (ic->ic_socket == NULL) { + so = ic->ic_socket; + if (so == NULL) { ICL_CONN_UNLOCK(ic); return; } - - /* - * Deregister socket upcalls. - */ - ICL_CONN_UNLOCK(ic); - SOCKBUF_LOCK(&ic->ic_socket->so_snd); - if (ic->ic_socket->so_snd.sb_upcall != NULL) - soupcall_clear(ic->ic_socket, SO_SND); - SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); - SOCKBUF_LOCK(&ic->ic_socket->so_rcv); - if (ic->ic_socket->so_rcv.sb_upcall != NULL) - soupcall_clear(ic->ic_socket, SO_RCV); - SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); - ICL_CONN_LOCK(ic); - + ic->ic_socket = NULL; ic->ic_disconnecting = true; - /* - * Wake up the threads, so they can properly terminate. - */ - while (ic->ic_receive_running || ic->ic_send_running) { - //ICL_DEBUG("waiting for send/receive threads to terminate"); - cv_signal(&ic->ic_receive_cv); - cv_signal(&ic->ic_send_cv); - cv_wait(&ic->ic_send_cv, ic->ic_lock); - } - //ICL_DEBUG("send/receive threads terminated"); + mbufq_drain(&toep->ulp_pduq); - ICL_CONN_UNLOCK(ic); - cxgbei_conn_close(ic); - soclose(ic->ic_socket); - ICL_CONN_LOCK(ic); - ic->ic_socket = NULL; + /* These are unused in this driver right now. */ + MPASS(STAILQ_EMPTY(&ic->ic_to_send)); + MPASS(ic->ic_receive_pdu == NULL); - if (ic->ic_receive_pdu != NULL) { - //ICL_DEBUG("freeing partially received PDU"); - icl_pdu_free(ic->ic_receive_pdu); - ic->ic_receive_pdu = NULL; - } - - /* - * Remove any outstanding PDUs from the send queue. - */ - while (!STAILQ_EMPTY(&ic->ic_to_send)) { - pdu = STAILQ_FIRST(&ic->ic_to_send); - STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); - icl_pdu_free(pdu); - } - - KASSERT(STAILQ_EMPTY(&ic->ic_to_send), - ("destroying session with non-empty send queue")); #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif ICL_CONN_UNLOCK(ic); + + /* + * XXXNP: we should send RST instead of FIN when PDUs held in various + * queues were purged instead of delivered reliably but soabort isn't + * really general purpose and wouldn't do the right thing here. + */ + soclose(so); } int icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { void *prv; *task_tagp = icl_conn_build_tasktag(ic, *task_tagp); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_task_reserve_itt(ic, prvp, csio, task_tagp); return (0); } void icl_cxgbei_conn_task_done(struct icl_conn *ic, void *prv) { cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } int icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { void *prv; *transfer_tag = icl_conn_build_tasktag(ic, *transfer_tag); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_transfer_reserve_ttt(ic, prvp, io, transfer_tag); return (0); } void icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *prv) { cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } static int icl_cxgbei_limits(size_t *limitp) { - *limitp = 8 * 1024; + *limitp = CXGBEI_MAX_DSL; return (0); } -#ifdef ICL_KERNEL_PROXY -int -icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) -{ - int error; - - ICL_CONN_LOCK_ASSERT_NOT(ic); - - if (so->so_type != SOCK_STREAM) - return (EINVAL); - - ICL_CONN_LOCK(ic); - if (ic->ic_socket != NULL) { - ICL_CONN_UNLOCK(ic); - return (EBUSY); - } - ic->ic_socket = so; - ICL_CONN_UNLOCK(ic); - - error = icl_conn_start(ic); - - return (error); -} -#endif /* ICL_KERNEL_PROXY */ - static int icl_cxgbei_load(void) { int error; - icl_cxgbei_pdu_zone = uma_zcreate("icl_cxgbei_pdu", - sizeof(struct icl_cxgbei_pdu), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); icl_transfer_zone = uma_zcreate("icl_transfer", 16 * 1024, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_cxgbei_ncons, 0); error = icl_register("cxgbei", 100, icl_cxgbei_limits, icl_cxgbei_new_conn); KASSERT(error == 0, ("failed to register")); return (error); } static int icl_cxgbei_unload(void) { if (icl_cxgbei_ncons != 0) return (EBUSY); icl_unregister("cxgbei"); - uma_zdestroy(icl_cxgbei_pdu_zone); uma_zdestroy(icl_transfer_zone); return (0); } static int icl_cxgbei_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_cxgbei_load()); case MOD_UNLOAD: return (icl_cxgbei_unload()); default: return (EINVAL); } } moduledata_t icl_cxgbei_data = { "icl_cxgbei", icl_cxgbei_modevent, 0 }; DECLARE_MODULE(icl_cxgbei, icl_cxgbei_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_cxgbei, icl, 1, 1, 1); MODULE_VERSION(icl_cxgbei, 1);