diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c --- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c +++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c @@ -873,6 +873,28 @@ } } +static inline bool +ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen) +{ + int total_len = 0; + + MPASS(entries > 0); + if (((vm_offset_t)sg[--entries].addr & 3U) != 0) + return (false); + + total_len += sg[entries].len; + + while (--entries >= 0) { + if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 || + (sg[entries].len % PAGE_SIZE) != 0) + return (false); + total_len += sg[entries].len; + } + + MPASS(total_len == xferlen); + return (true); +} + /* XXXNP: PDU should be passed in as parameter, like on the initiator. */ #define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr) #define io_to_ppod_reservation(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr) @@ -888,6 +910,8 @@ struct cxgbei_data *ci = sc->iscsi_ulp_softc; struct ppod_region *pr = &ci->pr; struct ppod_reservation *prsv; + struct ctl_sg_entry *sgl, sg_entry; + int sg_entries = ctsio->kern_sg_entries; uint32_t ttt; int xferlen, rc = 0, alias; @@ -898,7 +922,6 @@ if (ctsio->ext_data_filled == 0) { int first_burst; struct icl_pdu *ip = io_to_request_pdu(io); - vm_offset_t buf; #ifdef INVARIANTS struct icl_cxgbei_pdu *icp = ip_to_icp(ip); @@ -931,18 +954,16 @@ return (0); } - if (ctsio->kern_sg_entries == 0) - buf = (vm_offset_t)ctsio->kern_data_ptr; - else if (ctsio->kern_sg_entries == 1) { - struct ctl_sg_entry *sgl = (void *)ctsio->kern_data_ptr; + if (sg_entries == 0) { + sgl = &sg_entry; + sgl->len = xferlen; + sgl->addr = (void *)ctsio->kern_data_ptr; + sg_entries = 1; + } else + sgl = (void *)ctsio->kern_data_ptr; - MPASS(sgl->len == xferlen); - buf = (vm_offset_t)sgl->addr; - } else { - rc = EAGAIN; /* XXX implement */ + if (!ddp_sgl_check(sgl, sg_entries, xferlen)) goto no_ddp; - } - /* * Reserve resources for DDP, update the ttt that should be used @@ -956,14 +977,15 @@ goto no_ddp; } - rc = t4_alloc_page_pods_for_buf(pr, buf, xferlen, prsv); + rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv); if (rc != 0) { uma_zfree(prsv_zone, prsv); goto no_ddp; } - rc = t4_write_page_pods_for_buf(sc, toep, prsv, buf, xferlen); - if (rc != 0) { + rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries, + xferlen); + if (__predict_false(rc != 0)) { t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); goto no_ddp; diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -62,6 +62,9 @@ #include #include +#include +#include + #ifdef TCP_OFFLOAD #include "common/common.h" #include "common/t4_msg.h" @@ -981,6 +984,76 @@ return (0); } +int +t4_alloc_page_pods_for_sgl(struct ppod_region *pr, struct ctl_sg_entry *sgl, + int entries, struct ppod_reservation *prsv) +{ + int hcf, seglen, idx = 0, npages, nppods, i, len; + uintptr_t start_pva, end_pva, pva, p1 ; + vm_offset_t buf; + struct ctl_sg_entry *sge; + + MPASS(entries > 0); + MPASS(sgl); + + /* + * The DDP page size is unrelated to the VM page size. We combine + * contiguous physical pages into larger segments to get the best DDP + * page size possible. This is the largest of the four sizes in + * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes + * in the page list. + */ + hcf = 0; + for (i = entries - 1; i >= 0; i--) { + sge = sgl + i; + buf = (vm_offset_t)sge->addr; + len = sge->len; + start_pva = trunc_page(buf); + end_pva = trunc_page(buf + len - 1); + pva = start_pva; + while (pva <= end_pva) { + seglen = PAGE_SIZE; + p1 = pmap_kextract(pva); + pva += PAGE_SIZE; + while (pva <= end_pva && p1 + seglen == + pmap_kextract(pva)) { + seglen += PAGE_SIZE; + pva += PAGE_SIZE; + } + + hcf = calculate_hcf(hcf, seglen); + if (hcf < (1 << pr->pr_page_shift[1])) { + idx = 0; + goto have_pgsz; /* give up, short circuit */ + } + } + } +#define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1) + MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */ + for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) { + if ((hcf & PR_PAGE_MASK(idx)) == 0) + break; + } +#undef PR_PAGE_MASK + +have_pgsz: + MPASS(idx <= M_PPOD_PGSZ); + + npages = 0; + while (entries--) { + npages++; + start_pva = trunc_page(sgl->addr); + end_pva = trunc_page((vm_offset_t)sgl->addr + sgl->len - 1); + npages += (end_pva - start_pva) >> pr->pr_page_shift[idx]; + sgl = sgl + 1; + } + nppods = howmany(npages, PPOD_PAGES); + if (alloc_page_pods(pr, nppods, idx, prsv) != 0) + return (ENOMEM); + MPASS(prsv->prsv_nppods > 0); + return (0); +} + void t4_free_page_pods(struct ppod_reservation *prsv) { @@ -1197,6 +1270,124 @@ return (0); } +int +t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep, + struct ppod_reservation *prsv, struct ctl_sg_entry *sgl, int entries, + int xferlen) +{ + struct inpcb *inp = toep->inp; + struct ulp_mem_io *ulpmc; + struct ulptx_idata *ulpsc; + struct pagepod *ppod; + int i, j, k, n, chunk, len, ddp_pgsz; + u_int ppod_addr, offset, sg_offset = 0; + uint32_t cmd; + struct ppod_region *pr = prsv->prsv_pr; + uintptr_t pva, pa; + struct mbuf *m; + struct mbufq wrq; + + MPASS(sgl != NULL); + MPASS(entries > 0); + cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); + if (is_t4(sc)) + cmd |= htobe32(F_ULP_MEMIO_ORDER); + else + cmd |= htobe32(F_T5_ULP_MEMIO_IMM); + ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)]; + offset = (vm_offset_t)sgl->addr & PAGE_MASK; + ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask); + pva = trunc_page(sgl->addr); + mbufq_init(&wrq, INT_MAX); + for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) { + + /* How many page pods are we writing in this cycle */ + n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS); + MPASS(n > 0); + chunk = PPOD_SZ(n); + len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); + + m = alloc_raw_wr_mbuf(len); + if (m == NULL) { + mbufq_drain(&wrq); + return (ENOMEM); + } + ulpmc = mtod(m, struct ulp_mem_io *); + + INIT_ULPTX_WR(ulpmc, len, 0, toep->tid); + ulpmc->cmd = cmd; + ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); + ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); + ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); + + ulpsc = (struct ulptx_idata *)(ulpmc + 1); + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + ulpsc->len = htobe32(chunk); + + ppod = (struct pagepod *)(ulpsc + 1); + for (j = 0; j < n; i++, j++, ppod++) { + ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | + V_PPOD_TID(toep->tid) | + (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ))); + ppod->len_offset = htobe64(V_PPOD_LEN(xferlen) | + V_PPOD_OFST(offset)); + ppod->rsvd = 0; + + for (k = 0; k < nitems(ppod->addr); k++) { + if (entries != 0) { + pa = pmap_kextract(pva + sg_offset); + ppod->addr[k] = htobe64(pa); + } else + ppod->addr[k] = 0; + +#if 0 + CTR5(KTR_CXGBE, + "%s: tid %d ppod[%d]->addr[%d] = %p", + __func__, toep->tid, i, k, + htobe64(ppod->addr[k])); +#endif + + /* + * If this is the last entry in a pod, + * reuse the same entry for first address + * in the next pod. + */ + if (k + 1 == nitems(ppod->addr)) + break; + + /* + * Don't move to the next DDP page if the + * sgl is already finished. + */ + if (entries == 0) + continue; + + sg_offset += ddp_pgsz; + if (sg_offset == sgl->len) { + /* + * This sgl entry is done. Go + * to the next. + */ + entries--; + sgl++; + sg_offset = 0; + if (entries != 0) + pva = trunc_page( + (vm_offset_t)sgl->addr); + } + } + } + + mbufq_enqueue(&wrq, m); + } + + INP_WLOCK(inp); + mbufq_concat(&toep->ulp_pduq, &wrq); + INP_WUNLOCK(inp); + + return (0); +} + /* * Prepare a pageset for DDP. This sets up page pods. */ diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -88,6 +88,7 @@ DDP_DEAD = (1 << 6), /* toepcb is shutting down */ }; +struct ctl_sg_entry; struct sockopt; struct offload_settings; @@ -437,10 +438,14 @@ int t4_alloc_page_pods_for_ps(struct ppod_region *, struct pageset *); int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int, struct ppod_reservation *); +int t4_alloc_page_pods_for_sgl(struct ppod_region *, struct ctl_sg_entry *, int, + struct ppod_reservation *); int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int, struct pageset *); int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *, struct ppod_reservation *, vm_offset_t, int); +int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *, + struct ppod_reservation *, struct ctl_sg_entry *, int, int); void t4_free_page_pods(struct ppod_reservation *); int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *);