Index: head/sys/dev/cxgbe/cxgbei/icl_cxgbei.c =================================================================== --- head/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 300039) +++ head/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 300040) @@ -1,896 +1,896 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * Copyright (c) 2015 Chelsio Communications, Inc. * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * cxgbei implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "tom/t4_tom.h" #include "cxgbei.h" SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD, 0, "Chelsio iSCSI offload"); static int coalesce = 1; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 128 * 1024; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static uma_zone_t icl_transfer_zone; static volatile u_int icl_cxgbei_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) struct icl_pdu *icl_cxgbei_new_pdu(int); void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *); static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; icl_conn_pdu_free_t icl_cxgbei_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_cxgbei_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; static icl_conn_handoff_t icl_cxgbei_conn_handoff; static icl_conn_free_t icl_cxgbei_conn_free; static icl_conn_close_t icl_cxgbei_conn_close; static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; static icl_conn_task_done_t icl_cxgbei_conn_task_done; static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; static kobj_method_t icl_cxgbei_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_cxgbei_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), { 0, 0 } }; DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); #if 0 /* * Subtract another 256 for AHS from MAX_DSL if AHS could be used. */ #define CXGBEI_MAX_PDU 16224 #define CXGBEI_MAX_DSL (CXGBEI_MAX_PDU - sizeof(struct iscsi_bhs) - 8) #endif #define CXGBEI_MAX_DSL 8192 #define CXGBEI_MAX_PDU (CXGBEI_MAX_DSL + sizeof(struct iscsi_bhs) + 8) void icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { #ifdef INVARIANTS struct icl_cxgbei_pdu *icp = ip_to_icp(ip); #endif MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); MPASS(ip->ip_bhs_mbuf != NULL); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); m_freem(ip->ip_bhs_mbuf); /* storage for icl_cxgbei_pdu itself */ #ifdef DIAGNOSTIC if (__predict_true(ic != NULL)) refcount_release(&ic->ic_outstanding_pdus); #endif } struct icl_pdu * icl_cxgbei_new_pdu(int flags) { struct icl_cxgbei_pdu *icp; struct icl_pdu *ip; struct mbuf *m; uintptr_t a; m = m_gethdr(flags, MT_DATA); if (__predict_false(m == NULL)) return (NULL); a = roundup2(mtod(m, uintptr_t), _Alignof(struct icl_cxgbei_pdu)); icp = (struct icl_cxgbei_pdu *)a; bzero(icp, sizeof(*icp)); icp->icp_signature = CXGBEI_PDU_SIGNATURE; ip = &icp->ip; ip->ip_bhs_mbuf = m; a = roundup2((uintptr_t)(icp + 1), _Alignof(struct iscsi_bhs *)); ip->ip_bhs = (struct iscsi_bhs *)a; #ifdef INVARIANTS /* Everything must fit entirely in the mbuf. */ a = (uintptr_t)(ip->ip_bhs + 1); MPASS(a <= (uintptr_t)m + MSIZE); #endif bzero(ip->ip_bhs, sizeof(*ip->ip_bhs)); m->m_data = (void *)ip->ip_bhs; m->m_len = sizeof(struct iscsi_bhs); m->m_pkthdr.len = m->m_len; return (ip); } void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic) { ip->ip_conn = ic; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ static struct icl_pdu * icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_pdu *ip; ip = icl_cxgbei_new_pdu(flags); if (__predict_false(ip == NULL)) return (NULL); icl_cxgbei_new_pdu_set_conn(ip, ic); return (ip); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static uint32_t icl_conn_build_tasktag(struct icl_conn *ic, uint32_t tag) { return tag; } static struct mbuf * finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) { struct icl_pdu *ip = &icp->ip; uint8_t ulp_submode, padding; struct mbuf *m, *last; struct iscsi_bhs *bhs; /* * Fix up the data segment mbuf first. */ m = ip->ip_data_mbuf; ulp_submode = icc->ulp_submode; if (m) { last = m_last(m); /* * Round up the data segment to a 4B boundary. Pad with 0 if * necessary. There will definitely be room in the mbuf. */ padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; if (padding) { bzero(mtod(last, uint8_t *) + last->m_len, padding); last->m_len += padding; } } else { MPASS(ip->ip_data_len == 0); ulp_submode &= ~ULP_CRC_DATA; padding = 0; } /* * Now the header mbuf that has the BHS. */ m = ip->ip_bhs_mbuf; MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); MPASS(m->m_len == sizeof(struct iscsi_bhs)); bhs = ip->ip_bhs; bhs->bhs_data_segment_len[2] = ip->ip_data_len; bhs->bhs_data_segment_len[1] = ip->ip_data_len >> 8; bhs->bhs_data_segment_len[0] = ip->ip_data_len >> 16; /* "Convert" PDU to mbuf chain. Do not use icp/ip after this. */ m->m_pkthdr.len = sizeof(struct iscsi_bhs) + ip->ip_data_len + padding; m->m_next = ip->ip_data_mbuf; set_mbuf_ulp_submode(m, ulp_submode); #ifdef INVARIANTS bzero(icp, sizeof(*icp)); #endif #ifdef DIAGNOSTIC refcount_release(&icc->ic.ic_outstanding_pdus); #endif return (m); } int icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, const void *addr, size_t len, int flags) { struct mbuf *m; #ifdef INVARIANTS struct icl_cxgbei_pdu *icp = ip_to_icp(ip); #endif MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); m = ip->ip_data_mbuf; if (m == NULL) { m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); if (__predict_false(m == NULL)) return (ENOMEM); ip->ip_data_mbuf = m; } if (__predict_true(m_append(m, len, addr) != 0)) { ip->ip_data_len += len; MPASS(ip->ip_data_len <= CXGBEI_MAX_DSL); return (0); } else { if (flags & M_WAITOK) { CXGBE_UNIMPLEMENTED("fail safe append"); } ip->ip_data_len = m_length(m, NULL); return (1); } } void icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); if (icp->pdu_flags & SBUF_ULP_FLAG_DATA_DDPED) return; /* data is DDP'ed, no need to copy */ m_copydata(ip->ip_data_mbuf, off, len, addr); } void icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct icl_cxgbei_pdu *icp = ip_to_icp(ip); struct socket *so = ic->ic_socket; struct toepcb *toep = icc->toep; struct inpcb *inp; struct mbuf *m; MPASS(ic == ip->ip_conn); MPASS(ip->ip_bhs_mbuf != NULL); /* The kernel doesn't generate PDUs with AHS. */ MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); ICL_CONN_LOCK_ASSERT(ic); /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { icl_cxgbei_conn_pdu_free(ic, ip); return; } m = finalize_pdu(icc, icp); M_ASSERTPKTHDR(m); MPASS((m->m_pkthdr.len & 3) == 0); MPASS(m->m_pkthdr.len + 8 <= CXGBEI_MAX_PDU); /* * Do not get inp from toep->inp as the toepcb might have detached * already. */ inp = sotoinpcb(so); INP_WLOCK(inp); if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || __predict_false((toep->flags & TPF_ATTACHED) == 0)) m_freem(m); else { mbufq_enqueue(&toep->ulp_pduq, m); t4_push_pdus(icc->sc, toep, 0); } INP_WUNLOCK(inp); } static struct icl_conn * icl_cxgbei_new_conn(const char *name, struct mtx *lock) { struct icl_cxgbei_conn *icc; struct icl_conn *ic; refcount_acquire(&icl_cxgbei_ncons); icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, M_WAITOK | M_ZERO); icc->icc_signature = CXGBEI_CONN_SIGNATURE; STAILQ_INIT(&icc->rcvd_pdus); ic = &icc->ic; ic->ic_lock = lock; /* XXXNP: review. Most of these icl_conn fields aren't really used */ STAILQ_INIT(&ic->ic_to_send); cv_init(&ic->ic_send_cv, "icl_cxgbei_tx"); cv_init(&ic->ic_receive_cv, "icl_cxgbei_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif ic->ic_max_data_segment_length = CXGBEI_MAX_DSL; ic->ic_name = name; ic->ic_offload = "cxgbei"; CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); return (ic); } void icl_cxgbei_conn_free(struct icl_conn *ic) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); kobj_delete((struct kobj *)icc, M_CXGBE); refcount_release(&icl_cxgbei_ncons); } static int icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so) { size_t minspace; struct sockopt opt; int error, one = 1; /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. * * What we should actually do here is to use autoscaling, but set * some minimal buffer size to "minspace". I don't know a way to do * that, though. */ minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (sendspace < minspace) sendspace = minspace; if (recvspace < minspace) recvspace = minspace; error = soreserve(so, sendspace, recvspace); if (error != 0) { icl_cxgbei_conn_close(ic); return (error); } SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags |= SB_AUTOSIZE; SOCKBUF_UNLOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_flags |= SB_AUTOSIZE; SOCKBUF_UNLOCK(&so->so_rcv); /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error != 0) { icl_cxgbei_conn_close(ic); return (error); } return (0); } /* * Request/response structure used to find out the adapter offloading a socket. */ struct find_ofld_adapter_rr { struct socket *so; struct adapter *sc; /* result */ }; static void find_offload_adapter(struct adapter *sc, void *arg) { struct find_ofld_adapter_rr *fa = arg; struct socket *so = fa->so; struct tom_data *td = sc->tom_softc; struct tcpcb *tp; struct inpcb *inp; /* Non-TCP were filtered out earlier. */ MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); if (fa->sc != NULL) return; /* Found already. */ if (td == NULL) return; /* TOE not enabled on this adapter. */ inp = sotoinpcb(so); INP_WLOCK(inp); if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { tp = intotcpcb(inp); if (tp->t_flags & TF_TOE && tp->tod == &td->tod) fa->sc = sc; /* Found. */ } INP_WUNLOCK(inp); } /* XXXNP: move this to t4_tom. */ static void send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen) { struct wrqe *wr; struct fw_flowc_wr *flowc; const u_int nparams = 1; u_int flowclen; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; flowc->mnemval[0].val = htobe32(maxlen); txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; t4_wrq_tx(sc, wr); } static void set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, int hcrc, int dcrc) { uint64_t val = 0; if (hcrc) val |= ULP_CRC_HEADER; if (dcrc) val |= ULP_CRC_DATA; val <<= 4; val |= ULP_MODE_ISCSI; CTR4(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, CRC hdr=%d data=%d", __func__, toep->tid, hcrc, dcrc); t4_set_tcb_field(sc, toep, 1, 0, 0xfff, val); } /* * XXXNP: Who is responsible for cleaning up the socket if this returns with an * error? Review all error paths. * * XXXNP: What happens to the socket's fd reference if the operation is * successful, and how does that affect the socket's life cycle? */ int icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct find_ofld_adapter_rr fa; struct file *fp; struct socket *so; struct inpcb *inp; struct tcpcb *tp; struct toepcb *toep; cap_rights_t rights; int error; MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM || so->so_proto->pr_protocol != IPPROTO_TCP) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_disconnecting = false; ic->ic_socket = so; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); /* Find the adapter offloading this socket. */ fa.sc = NULL; fa.so = so; t4_iterate(find_offload_adapter, &fa); if (fa.sc == NULL) return (EINVAL); icc->sc = fa.sc; error = icl_cxgbei_setsockopt(ic, so); if (error) return (error); inp = sotoinpcb(so); INP_WLOCK(inp); tp = intotcpcb(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) error = EBUSY; else { /* * socket could not have been "unoffloaded" if here. */ MPASS(tp->t_flags & TF_TOE); MPASS(tp->tod != NULL); MPASS(tp->t_toe != NULL); toep = tp->t_toe; MPASS(toep->vi->pi->adapter == icc->sc); icc->toep = toep; icc->cwt = cxgbei_select_worker_thread(icc); icc->ulp_submode = 0; if (ic->ic_header_crc32c) icc->ulp_submode |= ULP_CRC_HEADER; if (ic->ic_data_crc32c) icc->ulp_submode |= ULP_CRC_DATA; so->so_options |= SO_NO_DDP; toep->ulp_mode = ULP_MODE_ISCSI; toep->ulpcb = icc; send_iscsi_flowc_wr(icc->sc, toep, CXGBEI_MAX_PDU); set_ulp_mode_iscsi(icc->sc, toep, ic->ic_header_crc32c, ic->ic_data_crc32c); error = 0; } INP_WUNLOCK(inp); return (error); } void icl_cxgbei_conn_close(struct icl_conn *ic) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct icl_pdu *ip; struct socket *so; struct sockbuf *sb; struct inpcb *inp; struct toepcb *toep = icc->toep; MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); ICL_CONN_LOCK(ic); so = ic->ic_socket; if (ic->ic_disconnecting || so == NULL) { CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p", __func__, icc, ic->ic_disconnecting, so); ICL_CONN_UNLOCK(ic); return; } ic->ic_disconnecting = true; /* These are unused in this driver right now. */ MPASS(STAILQ_EMPTY(&ic->ic_to_send)); MPASS(ic->ic_receive_pdu == NULL); #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif ICL_CONN_UNLOCK(ic); CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1, icc); inp = sotoinpcb(so); sb = &so->so_rcv; INP_WLOCK(inp); if (toep != NULL) { /* NULL if connection was never offloaded. */ toep->ulpcb = NULL; mbufq_drain(&toep->ulp_pduq); SOCKBUF_LOCK(sb); if (icc->rx_flags & RXF_ACTIVE) { volatile u_int *p = &icc->rx_flags; SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); while (*p & RXF_ACTIVE) pause("conclo", 1); INP_WLOCK(inp); SOCKBUF_LOCK(sb); } while (!STAILQ_EMPTY(&icc->rcvd_pdus)) { ip = STAILQ_FIRST(&icc->rcvd_pdus); STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next); icl_cxgbei_conn_pdu_free(ic, ip); } SOCKBUF_UNLOCK(sb); } INP_WUNLOCK(inp); ICL_CONN_LOCK(ic); ic->ic_socket = NULL; ICL_CONN_UNLOCK(ic); /* * XXXNP: we should send RST instead of FIN when PDUs held in various * queues were purged instead of delivered reliably but soabort isn't * really general purpose and wouldn't do the right thing here. */ soclose(so); } int -icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, - uint32_t *task_tagp, void **prvp) +icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, + struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { void *prv; *task_tagp = icl_conn_build_tasktag(ic, *task_tagp); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_task_reserve_itt(ic, prvp, csio, task_tagp); return (0); } void icl_cxgbei_conn_task_done(struct icl_conn *ic, void *prv) { cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } int icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { void *prv; *transfer_tag = icl_conn_build_tasktag(ic, *transfer_tag); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_transfer_reserve_ttt(ic, prvp, io, transfer_tag); return (0); } void icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *prv) { cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } static int icl_cxgbei_limits(size_t *limitp) { *limitp = CXGBEI_MAX_DSL; return (0); } static int icl_cxgbei_load(void) { int error; icl_transfer_zone = uma_zcreate("icl_transfer", 16 * 1024, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_cxgbei_ncons, 0); error = icl_register("cxgbei", 100, icl_cxgbei_limits, icl_cxgbei_new_conn); KASSERT(error == 0, ("failed to register")); return (error); } static int icl_cxgbei_unload(void) { if (icl_cxgbei_ncons != 0) return (EBUSY); icl_unregister("cxgbei"); uma_zdestroy(icl_transfer_zone); return (0); } static int icl_cxgbei_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_cxgbei_load()); case MOD_UNLOAD: return (icl_cxgbei_unload()); default: return (EINVAL); } } moduledata_t icl_cxgbei_data = { "icl_cxgbei", icl_cxgbei_modevent, 0 }; DECLARE_MODULE(icl_cxgbei, icl_cxgbei_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_cxgbei, icl, 1, 1, 1); MODULE_VERSION(icl_cxgbei, 1); #endif Index: head/sys/dev/iscsi/icl_conn_if.m =================================================================== --- head/sys/dev/iscsi/icl_conn_if.m (revision 300039) +++ head/sys/dev/iscsi/icl_conn_if.m (revision 300040) @@ -1,107 +1,108 @@ #- # Copyright (c) 2014 The FreeBSD Foundation # All rights reserved. # # This software was developed by Edward Tomasz Napierala under sponsorship # from the FreeBSD Foundation. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # #include INTERFACE icl_conn; METHOD size_t pdu_data_segment_length { struct icl_conn *_ic; const struct icl_pdu *_ip; }; METHOD int pdu_append_data { struct icl_conn *_ic; struct icl_pdu *_ip; const void *_addr; size_t _len; int _flags; }; METHOD void pdu_get_data { struct icl_conn *_ic; struct icl_pdu *_ip; size_t _off; void *_addr; size_t _len; }; METHOD void pdu_queue { struct icl_conn *_ic; struct icl_pdu *_ip; }; METHOD void pdu_free { struct icl_conn *_ic; struct icl_pdu *_ip; }; METHOD struct icl_pdu * new_pdu { struct icl_conn *_ic; int _flags; }; METHOD void free { struct icl_conn *_ic; }; METHOD int handoff { struct icl_conn *_ic; int _fd; }; METHOD void close { struct icl_conn *_ic; }; METHOD int task_setup { struct icl_conn *_ic; + struct icl_pdu *_ip; struct ccb_scsiio *_csio; uint32_t *_task_tag; void **_prvp; }; METHOD void task_done { struct icl_conn *_ic; void *_prv; }; METHOD int transfer_setup { struct icl_conn *_ic; union ctl_io *_io; uint32_t *_transfer_tag; void **_prvp; }; METHOD void transfer_done { struct icl_conn *_ic; void *_prv; }; Index: head/sys/dev/iscsi/icl_soft.c =================================================================== --- head/sys/dev/iscsi/icl_soft.c (revision 300039) +++ head/sys/dev/iscsi/icl_soft.c (revision 300040) @@ -1,1543 +1,1543 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Software implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int coalesce = 1; SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 128 * 1024; SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); static uma_zone_t icl_pdu_zone; static volatile u_int icl_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) STAILQ_HEAD(icl_pdu_stailq, icl_pdu); static icl_conn_new_pdu_t icl_soft_conn_new_pdu; static icl_conn_pdu_free_t icl_soft_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_soft_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; static icl_conn_handoff_t icl_soft_conn_handoff; static icl_conn_free_t icl_soft_conn_free; static icl_conn_close_t icl_soft_conn_close; static icl_conn_task_setup_t icl_soft_conn_task_setup; static icl_conn_task_done_t icl_soft_conn_task_done; static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; static icl_conn_transfer_done_t icl_soft_conn_transfer_done; static kobj_method_t icl_soft_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_soft_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), KOBJMETHOD(icl_conn_free, icl_soft_conn_free), KOBJMETHOD(icl_conn_close, icl_soft_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), { 0, 0 } }; DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); static void icl_conn_fail(struct icl_conn *ic) { if (ic->ic_socket == NULL) return; /* * XXX */ ic->ic_socket->so_error = EDOOFUS; (ic->ic_error)(ic); } static struct mbuf * icl_conn_receive(struct icl_conn *ic, size_t len) { struct uio uio; struct socket *so; struct mbuf *m; int error, flags; so = ic->ic_socket; memset(&uio, 0, sizeof(uio)); uio.uio_resid = len; flags = MSG_DONTWAIT; error = soreceive(so, NULL, &uio, &m, NULL, &flags); if (error != 0) { ICL_DEBUG("soreceive error %d", error); return (NULL); } if (uio.uio_resid != 0) { m_freem(m); ICL_DEBUG("short read"); return (NULL); } return (m); } static struct icl_pdu * icl_pdu_new_empty(struct icl_conn *ic, int flags) { struct icl_pdu *ip; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); if (ip == NULL) { ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif return (NULL); } ip->ip_conn = ic; return (ip); } static void icl_pdu_free(struct icl_pdu *ip) { struct icl_conn *ic; ic = ip->ip_conn; m_freem(ip->ip_bhs_mbuf); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); uma_zfree(icl_pdu_zone, ip); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif } void icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { icl_pdu_free(ip); } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_pdu *ip; ip = icl_pdu_new_empty(ic, flags); if (ip == NULL) return (NULL); ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), flags, MT_DATA, M_PKTHDR); if (ip->ip_bhs_mbuf == NULL) { ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); icl_pdu_free(ip); return (NULL); } ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); return (ip); } static int icl_pdu_ahs_length(const struct icl_pdu *request) { return (request->ip_bhs->bhs_total_ahs_len * 4); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static void icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) { response->ip_bhs->bhs_data_segment_len[2] = len; response->ip_bhs->bhs_data_segment_len[1] = len >> 8; response->ip_bhs->bhs_data_segment_len[0] = len >> 16; } static size_t icl_pdu_padding(const struct icl_pdu *ip) { if ((ip->ip_data_len % 4) != 0) return (4 - (ip->ip_data_len % 4)); return (0); } static size_t icl_pdu_size(const struct icl_pdu *response) { size_t len; KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); len = sizeof(struct iscsi_bhs) + response->ip_data_len + icl_pdu_padding(response); if (response->ip_conn->ic_header_crc32c) len += ISCSI_HEADER_DIGEST_SIZE; if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) len += ISCSI_DATA_DIGEST_SIZE; return (len); } static int icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) { struct mbuf *m; m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs)); if (m == NULL) { ICL_DEBUG("failed to receive BHS"); return (-1); } request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs)); if (request->ip_bhs_mbuf == NULL) { ICL_WARN("m_pullup failed"); return (-1); } request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *); /* * XXX: For architectures with strict alignment requirements * we may need to allocate ip_bhs and copy the data into it. * For some reason, though, not doing this doesn't seem * to cause problems; tested on sparc64. */ *availablep -= sizeof(struct iscsi_bhs); return (0); } static int icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) { request->ip_ahs_len = icl_pdu_ahs_length(request); if (request->ip_ahs_len == 0) return (0); request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, request->ip_ahs_len); if (request->ip_ahs_mbuf == NULL) { ICL_DEBUG("failed to receive AHS"); return (-1); } *availablep -= request->ip_ahs_len; return (0); } static uint32_t icl_mbuf_to_crc32c(const struct mbuf *m0) { uint32_t digest = 0xffffffff; const struct mbuf *m; for (m = m0; m != NULL; m = m->m_next) digest = calculate_crc32c(digest, mtod(m, const void *), m->m_len); digest = digest ^ 0xffffffff; return (digest); } static int icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) { struct mbuf *m; uint32_t received_digest, valid_digest; if (request->ip_conn->ic_header_crc32c == false) return (0); m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE); if (m == NULL) { ICL_DEBUG("failed to receive header digest"); return (-1); } CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest); m_freem(m); *availablep -= ISCSI_HEADER_DIGEST_SIZE; /* * XXX: Handle AHS. */ valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); if (received_digest != valid_digest) { ICL_WARN("header digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Return the number of bytes that should be waiting in the receive socket * before icl_pdu_receive_data_segment() gets called. */ static size_t icl_pdu_data_segment_receive_len(const struct icl_pdu *request) { size_t len; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); /* * Account for the parts of data segment already read from * the socket buffer. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; /* * Don't always wait for the full data segment to be delivered * to the socket; this might badly affect performance due to * TCP window scaling. */ if (len > partial_receive_len) { #if 0 ICL_DEBUG("need %zd bytes of data, limiting to %zd", len, partial_receive_len)); #endif len = partial_receive_len; return (len); } /* * Account for padding. Note that due to the way code is written, * the icl_pdu_receive_data_segment() must always receive padding * along with the last part of data segment, because it would be * impossible to tell whether we've already received the full data * segment including padding, or without it. */ if ((len % 4) != 0) len += 4 - (len % 4); #if 0 ICL_DEBUG("need %zd bytes of data", len)); #endif return (len); } static int icl_pdu_receive_data_segment(struct icl_pdu *request, size_t *availablep, bool *more_neededp) { struct icl_conn *ic; size_t len, padding = 0; struct mbuf *m; ic = request->ip_conn; *more_neededp = false; ic->ic_receive_len = 0; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); if ((len % 4) != 0) padding = 4 - (len % 4); /* * Account for already received parts of data segment. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; if (len + padding > *availablep) { /* * Not enough data in the socket buffer. Receive as much * as we can. Don't receive padding, since, obviously, it's * not the end of data segment yet. */ #if 0 ICL_DEBUG("limited from %zd to %zd", len + padding, *availablep - padding)); #endif len = *availablep - padding; *more_neededp = true; padding = 0; } /* * Must not try to receive padding without at least one byte * of actual data segment. */ if (len > 0) { m = icl_conn_receive(request->ip_conn, len + padding); if (m == NULL) { ICL_DEBUG("failed to receive data segment"); return (-1); } if (request->ip_data_mbuf == NULL) request->ip_data_mbuf = m; else m_cat(request->ip_data_mbuf, m); request->ip_data_len += len; *availablep -= len + padding; } else ICL_DEBUG("len 0"); if (*more_neededp) ic->ic_receive_len = icl_pdu_data_segment_receive_len(request); return (0); } static int icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) { struct mbuf *m; uint32_t received_digest, valid_digest; if (request->ip_conn->ic_data_crc32c == false) return (0); if (request->ip_data_len == 0) return (0); m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE); if (m == NULL) { ICL_DEBUG("failed to receive data digest"); return (-1); } CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest); m_freem(m); *availablep -= ISCSI_DATA_DIGEST_SIZE; /* * Note that ip_data_mbuf also contains padding; since digest * calculation is supposed to include that, we iterate over * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. */ valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); if (received_digest != valid_digest) { ICL_WARN("data digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Somewhat contrary to the name, this attempts to receive only one * "part" of PDU at a time; call it repeatedly until it returns non-NULL. */ static struct icl_pdu * icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) { struct icl_pdu *request; struct socket *so; size_t len; int error; bool more_needed; so = ic->ic_socket; if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { KASSERT(ic->ic_receive_pdu == NULL, ("ic->ic_receive_pdu != NULL")); request = icl_pdu_new_empty(ic, M_NOWAIT); if (request == NULL) { ICL_DEBUG("failed to allocate PDU; " "dropping connection"); icl_conn_fail(ic); return (NULL); } ic->ic_receive_pdu = request; } else { KASSERT(ic->ic_receive_pdu != NULL, ("ic->ic_receive_pdu == NULL")); request = ic->ic_receive_pdu; } if (*availablep < ic->ic_receive_len) { #if 0 ICL_DEBUG("not enough data; need %zd, " "have %zd", ic->ic_receive_len, *availablep); #endif return (NULL); } switch (ic->ic_receive_state) { case ICL_CONN_STATE_BHS: //ICL_DEBUG("receiving BHS"); error = icl_pdu_receive_bhs(request, availablep); if (error != 0) { ICL_DEBUG("failed to receive BHS; " "dropping connection"); break; } /* * We don't enforce any limit for AHS length; * its length is stored in 8 bit field. */ len = icl_pdu_data_segment_length(request); if (len > ic->ic_max_data_segment_length) { ICL_WARN("received data segment " "length %zd is larger than negotiated " "MaxDataSegmentLength %zd; " "dropping connection", len, ic->ic_max_data_segment_length); error = EINVAL; break; } ic->ic_receive_state = ICL_CONN_STATE_AHS; ic->ic_receive_len = icl_pdu_ahs_length(request); break; case ICL_CONN_STATE_AHS: //ICL_DEBUG("receiving AHS"); error = icl_pdu_receive_ahs(request, availablep); if (error != 0) { ICL_DEBUG("failed to receive AHS; " "dropping connection"); break; } ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; if (ic->ic_header_crc32c == false) ic->ic_receive_len = 0; else ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; break; case ICL_CONN_STATE_HEADER_DIGEST: //ICL_DEBUG("receiving header digest"); error = icl_pdu_check_header_digest(request, availablep); if (error != 0) { ICL_DEBUG("header digest failed; " "dropping connection"); break; } ic->ic_receive_state = ICL_CONN_STATE_DATA; ic->ic_receive_len = icl_pdu_data_segment_receive_len(request); break; case ICL_CONN_STATE_DATA: //ICL_DEBUG("receiving data segment"); error = icl_pdu_receive_data_segment(request, availablep, &more_needed); if (error != 0) { ICL_DEBUG("failed to receive data segment;" "dropping connection"); break; } if (more_needed) break; ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) ic->ic_receive_len = 0; else ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; break; case ICL_CONN_STATE_DATA_DIGEST: //ICL_DEBUG("receiving data digest"); error = icl_pdu_check_data_digest(request, availablep); if (error != 0) { ICL_DEBUG("data digest failed; " "dropping connection"); break; } /* * We've received complete PDU; reset the receive state machine * and return the PDU. */ ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_receive_pdu = NULL; return (request); default: panic("invalid ic_receive_state %d\n", ic->ic_receive_state); } if (error != 0) { /* * Don't free the PDU; it's pointed to by ic->ic_receive_pdu * and will get freed in icl_soft_conn_close(). */ icl_conn_fail(ic); } return (NULL); } static void icl_conn_receive_pdus(struct icl_conn *ic, size_t available) { struct icl_pdu *response; struct socket *so; so = ic->ic_socket; /* * This can never happen; we're careful to only mess with ic->ic_socket * pointer when the send/receive threads are not running. */ KASSERT(so != NULL, ("NULL socket")); for (;;) { if (ic->ic_disconnecting) return; if (so->so_error != 0) { ICL_DEBUG("connection error %d; " "dropping connection", so->so_error); icl_conn_fail(ic); return; } /* * Loop until we have a complete PDU or there is not enough * data in the socket buffer. */ if (available < ic->ic_receive_len) { #if 0 ICL_DEBUG("not enough data; have %zd, " "need %zd", available, ic->ic_receive_len); #endif return; } response = icl_conn_receive_pdu(ic, &available); if (response == NULL) continue; if (response->ip_ahs_len > 0) { ICL_WARN("received PDU with unsupported " "AHS; opcode 0x%x; dropping connection", response->ip_bhs->bhs_opcode); icl_pdu_free(response); icl_conn_fail(ic); return; } (ic->ic_receive)(response); } } static void icl_receive_thread(void *arg) { struct icl_conn *ic; size_t available; struct socket *so; ic = arg; so = ic->ic_socket; for (;;) { if (ic->ic_disconnecting) { //ICL_DEBUG("terminating"); break; } /* * Set the low watermark, to be checked by * soreadable() in icl_soupcall_receive() * to avoid unnecessary wakeups until there * is enough data received to read the PDU. */ SOCKBUF_LOCK(&so->so_rcv); available = sbavail(&so->so_rcv); if (available < ic->ic_receive_len) { so->so_rcv.sb_lowat = ic->ic_receive_len; cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); } else so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; SOCKBUF_UNLOCK(&so->so_rcv); icl_conn_receive_pdus(ic, available); } ICL_CONN_LOCK(ic); ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_receive(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!soreadable(so)) return (SU_OK); ic = arg; cv_signal(&ic->ic_receive_cv); return (SU_OK); } static int icl_pdu_finalize(struct icl_pdu *request) { size_t padding, pdu_len; uint32_t digest, zero = 0; int ok; struct icl_conn *ic; ic = request->ip_conn; icl_pdu_set_data_segment_length(request, request->ip_data_len); pdu_len = icl_pdu_size(request); if (ic->ic_header_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); ok = m_append(request->ip_bhs_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append header digest"); return (1); } } if (request->ip_data_len != 0) { padding = icl_pdu_padding(request); if (padding > 0) { ok = m_append(request->ip_data_mbuf, padding, (void *)&zero); if (ok != 1) { ICL_WARN("failed to append padding"); return (1); } } if (ic->ic_data_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); ok = m_append(request->ip_data_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append data digest"); return (1); } } m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); request->ip_data_mbuf = NULL; } request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; return (0); } static void icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) { struct icl_pdu *request, *request2; struct socket *so; size_t available, size, size2; int coalesced, error; ICL_CONN_LOCK_ASSERT_NOT(ic); so = ic->ic_socket; SOCKBUF_LOCK(&so->so_snd); /* * Check how much space do we have for transmit. We can't just * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, * as it always frees the mbuf chain passed to it, even in case * of error. */ available = sbspace(&so->so_snd); /* * Notify the socket upcall that we don't need wakeups * for the time being. */ so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; SOCKBUF_UNLOCK(&so->so_snd); while (!STAILQ_EMPTY(queue)) { request = STAILQ_FIRST(queue); size = icl_pdu_size(request); if (available < size) { /* * Set the low watermark, to be checked by * sowriteable() in icl_soupcall_send() * to avoid unnecessary wakeups until there * is enough space for the PDU to fit. */ SOCKBUF_LOCK(&so->so_snd); available = sbspace(&so->so_snd); if (available < size) { #if 1 ICL_DEBUG("no space to send; " "have %zd, need %zd", available, size); #endif so->so_snd.sb_lowat = size; SOCKBUF_UNLOCK(&so->so_snd); return; } SOCKBUF_UNLOCK(&so->so_snd); } STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); icl_conn_fail(ic); icl_pdu_free(request); return; } if (coalesce) { coalesced = 1; for (;;) { request2 = STAILQ_FIRST(queue); if (request2 == NULL) break; size2 = icl_pdu_size(request2); if (available < size + size2) break; STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request2); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); icl_conn_fail(ic); icl_pdu_free(request); icl_pdu_free(request2); return; } m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); request2->ip_bhs_mbuf = NULL; request->ip_bhs_mbuf->m_pkthdr.len += size2; size += size2; STAILQ_REMOVE_AFTER(queue, request, ip_next); icl_pdu_free(request2); coalesced++; } #if 0 if (coalesced > 1) { ICL_DEBUG("coalesced %d PDUs into %zd bytes", coalesced, size); } #endif } available -= size; error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, NULL, MSG_DONTWAIT, curthread); request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ if (error != 0) { ICL_DEBUG("failed to send PDU, error %d; " "dropping connection", error); icl_conn_fail(ic); icl_pdu_free(request); return; } icl_pdu_free(request); } } static void icl_send_thread(void *arg) { struct icl_conn *ic; struct icl_pdu_stailq queue; ic = arg; STAILQ_INIT(&queue); ICL_CONN_LOCK(ic); for (;;) { for (;;) { /* * If the local queue is empty, populate it from * the main one. This way the icl_conn_send_pdus() * can go through all the queued PDUs without holding * any locks. */ if (STAILQ_EMPTY(&queue)) STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); ic->ic_check_send_space = false; ICL_CONN_UNLOCK(ic); icl_conn_send_pdus(ic, &queue); ICL_CONN_LOCK(ic); /* * The icl_soupcall_send() was called since the last * call to sbspace(); go around; */ if (ic->ic_check_send_space) continue; /* * Local queue is empty, but we still have PDUs * in the main one; go around. */ if (STAILQ_EMPTY(&queue) && !STAILQ_EMPTY(&ic->ic_to_send)) continue; /* * There might be some stuff in the local queue, * which didn't get sent due to not having enough send * space. Wait for socket upcall. */ break; } if (ic->ic_disconnecting) { //ICL_DEBUG("terminating"); break; } cv_wait(&ic->ic_send_cv, ic->ic_lock); } /* * We're exiting; move PDUs back to the main queue, so they can * get freed properly. At this point ordering doesn't matter. */ STAILQ_CONCAT(&ic->ic_to_send, &queue); ic->ic_send_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_send(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!sowriteable(so)) return (SU_OK); ic = arg; ICL_CONN_LOCK(ic); ic->ic_check_send_space = true; ICL_CONN_UNLOCK(ic); cv_signal(&ic->ic_send_cv); return (SU_OK); } static int icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags) { struct mbuf *mb, *newmb; size_t copylen, off = 0; KASSERT(len > 0, ("len == 0")); newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); if (newmb == NULL) { ICL_WARN("failed to allocate mbuf for %zd bytes", len); return (ENOMEM); } for (mb = newmb; mb != NULL; mb = mb->m_next) { copylen = min(M_TRAILINGSPACE(mb), len - off); memcpy(mtod(mb, char *), (const char *)addr + off, copylen); mb->m_len = copylen; off += copylen; } KASSERT(off == len, ("%s: off != len", __func__)); if (request->ip_data_mbuf == NULL) { request->ip_data_mbuf = newmb; request->ip_data_len = len; } else { m_cat(request->ip_data_mbuf, newmb); request->ip_data_len += len; } return (0); } int icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { return (icl_pdu_append_data(request, addr, len, flags)); } static void icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) { m_copydata(ip->ip_data_mbuf, off, len, addr); } void icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { return (icl_pdu_get_data(ip, off, addr, len)); } static void icl_pdu_queue(struct icl_pdu *ip) { struct icl_conn *ic; ic = ip->ip_conn; ICL_CONN_LOCK_ASSERT(ic); if (ic->ic_disconnecting || ic->ic_socket == NULL) { ICL_DEBUG("icl_pdu_queue on closed connection"); icl_pdu_free(ip); return; } if (!STAILQ_EMPTY(&ic->ic_to_send)) { STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); /* * If the queue is not empty, someone else had already * signaled the send thread; no need to do that again, * just return. */ return; } STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); cv_signal(&ic->ic_send_cv); } void icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { icl_pdu_queue(ip); } static struct icl_conn * icl_soft_new_conn(const char *name, struct mtx *lock) { struct icl_conn *ic; refcount_acquire(&icl_ncons); ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); STAILQ_INIT(&ic->ic_to_send); ic->ic_lock = lock; cv_init(&ic->ic_send_cv, "icl_tx"); cv_init(&ic->ic_receive_cv, "icl_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; ic->ic_name = name; ic->ic_offload = "None"; return (ic); } void icl_soft_conn_free(struct icl_conn *ic) { cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); kobj_delete((struct kobj *)ic, M_ICL_SOFT); refcount_release(&icl_ncons); } static int icl_conn_start(struct icl_conn *ic) { size_t minspace; struct sockopt opt; int error, one = 1; ICL_CONN_LOCK(ic); /* * XXX: Ugly hack. */ if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (EINVAL); } ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_disconnecting = false; ICL_CONN_UNLOCK(ic); /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. * * What we should actually do here is to use autoscaling, but set * some minimal buffer size to "minspace". I don't know a way to do * that, though. */ minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (sendspace < minspace) { ICL_WARN("kern.icl.sendspace too low; must be at least %zd", minspace); sendspace = minspace; } if (recvspace < minspace) { ICL_WARN("kern.icl.recvspace too low; must be at least %zd", minspace); recvspace = minspace; } error = soreserve(ic->ic_socket, sendspace, recvspace); if (error != 0) { ICL_WARN("soreserve failed with error %d", error); icl_soft_conn_close(ic); return (error); } ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(ic->ic_socket, &opt); if (error != 0) { ICL_WARN("disabling TCP_NODELAY failed with error %d", error); icl_soft_conn_close(ic); return (error); } /* * Register socket upcall, to get notified about incoming PDUs * and free space to send outgoing ones. */ SOCKBUF_LOCK(&ic->ic_socket->so_snd); soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); /* * Start threads. */ ICL_CONN_LOCK(ic); ic->ic_send_running = ic->ic_receive_running = true; ICL_CONN_UNLOCK(ic); error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); ic->ic_send_running = ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } return (0); } int icl_soft_conn_handoff(struct icl_conn *ic, int fd) { struct file *fp; struct socket *so; cap_rights_t rights; int error; ICL_CONN_LOCK_ASSERT_NOT(ic); /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_socket = fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } void icl_soft_conn_close(struct icl_conn *ic) { struct icl_pdu *pdu; struct socket *so; ICL_CONN_LOCK(ic); /* * Wake up the threads, so they can properly terminate. */ ic->ic_disconnecting = true; while (ic->ic_receive_running || ic->ic_send_running) { cv_signal(&ic->ic_receive_cv); cv_signal(&ic->ic_send_cv); cv_wait(&ic->ic_send_cv, ic->ic_lock); } /* Some other thread could close the connection same time. */ so = ic->ic_socket; if (so == NULL) { ICL_CONN_UNLOCK(ic); return; } ic->ic_socket = NULL; /* * Deregister socket upcalls. */ ICL_CONN_UNLOCK(ic); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_upcall != NULL) soupcall_clear(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_upcall != NULL) soupcall_clear(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); soclose(so); ICL_CONN_LOCK(ic); if (ic->ic_receive_pdu != NULL) { //ICL_DEBUG("freeing partially received PDU"); icl_pdu_free(ic->ic_receive_pdu); ic->ic_receive_pdu = NULL; } /* * Remove any outstanding PDUs from the send queue. */ while (!STAILQ_EMPTY(&ic->ic_to_send)) { pdu = STAILQ_FIRST(&ic->ic_to_send); STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); icl_pdu_free(pdu); } KASSERT(STAILQ_EMPTY(&ic->ic_to_send), ("destroying session with non-empty send queue")); #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif ICL_CONN_UNLOCK(ic); } int -icl_soft_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, - uint32_t *task_tagp, void **prvp) +icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, + struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { return (0); } void icl_soft_conn_task_done(struct icl_conn *ic, void *prv) { } int icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { return (0); } void icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) { } static int icl_soft_limits(size_t *limitp) { *limitp = 128 * 1024; return (0); } #ifdef ICL_KERNEL_PROXY int icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) { int error; ICL_CONN_LOCK_ASSERT_NOT(ic); if (so->so_type != SOCK_STREAM) return (EINVAL); ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); return (EBUSY); } ic->ic_socket = so; ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } #endif /* ICL_KERNEL_PROXY */ static int icl_soft_load(void) { int error; icl_pdu_zone = uma_zcreate("icl_pdu", sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_ncons, 0); /* * The reason we call this "none" is that to the user, * it's known as "offload driver"; "offload driver: soft" * doesn't make much sense. */ error = icl_register("none", 0, icl_soft_limits, icl_soft_new_conn); KASSERT(error == 0, ("failed to register")); return (error); } static int icl_soft_unload(void) { if (icl_ncons != 0) return (EBUSY); icl_unregister("none"); uma_zdestroy(icl_pdu_zone); return (0); } static int icl_soft_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_soft_load()); case MOD_UNLOAD: return (icl_soft_unload()); default: return (EINVAL); } } moduledata_t icl_soft_data = { "icl_soft", icl_soft_modevent, 0 }; DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_soft, icl, 1, 1, 1); MODULE_VERSION(icl_soft, 1); Index: head/sys/dev/iscsi/icl_wrappers.h =================================================================== --- head/sys/dev/iscsi/icl_wrappers.h (revision 300039) +++ head/sys/dev/iscsi/icl_wrappers.h (revision 300040) @@ -1,138 +1,138 @@ /*- * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This file is used to provide the initiator and target with a prettier * interface. It must not be included by ICL modules, such as icl_soft.c. */ #ifndef ICL_WRAPPERS_H #define ICL_WRAPPERS_H #include #include #include static inline struct icl_pdu * icl_pdu_new(struct icl_conn *ic, int flags) { return (ICL_CONN_NEW_PDU(ic, flags)); } static inline size_t icl_pdu_data_segment_length(const struct icl_pdu *ip) { return (ICL_CONN_PDU_DATA_SEGMENT_LENGTH(ip->ip_conn, ip)); } static inline int icl_pdu_append_data(struct icl_pdu *ip, const void *addr, size_t len, int flags) { return (ICL_CONN_PDU_APPEND_DATA(ip->ip_conn, ip, addr, len, flags)); } static inline void icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) { ICL_CONN_PDU_GET_DATA(ip->ip_conn, ip, off, addr, len); } static inline void icl_pdu_queue(struct icl_pdu *ip) { ICL_CONN_PDU_QUEUE(ip->ip_conn, ip); } static inline void icl_pdu_free(struct icl_pdu *ip) { ICL_CONN_PDU_FREE(ip->ip_conn, ip); } static inline void icl_conn_free(struct icl_conn *ic) { ICL_CONN_FREE(ic); } static inline int icl_conn_handoff(struct icl_conn *ic, int fd) { return (ICL_CONN_HANDOFF(ic, fd)); } static inline void icl_conn_close(struct icl_conn *ic) { ICL_CONN_CLOSE(ic); } static inline int -icl_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, - uint32_t *task_tagp, void **prvp) +icl_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, + struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { - return (ICL_CONN_TASK_SETUP(ic, csio, task_tagp, prvp)); + return (ICL_CONN_TASK_SETUP(ic, ip, csio, task_tagp, prvp)); } static inline void icl_conn_task_done(struct icl_conn *ic, void *prv) { ICL_CONN_TASK_DONE(ic, prv); } static inline int icl_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tagp, void **prvp) { return (ICL_CONN_TRANSFER_SETUP(ic, io, transfer_tagp, prvp)); } static inline void icl_conn_transfer_done(struct icl_conn *ic, void *prv) { ICL_CONN_TRANSFER_DONE(ic, prv); } #endif /* !ICL_WRAPPERS_H */ Index: head/sys/dev/iscsi/iscsi.c =================================================================== --- head/sys/dev/iscsi/iscsi.c (revision 300039) +++ head/sys/dev/iscsi/iscsi.c (revision 300040) @@ -1,2500 +1,2501 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ICL_KERNEL_PROXY #include #endif #ifdef ICL_KERNEL_PROXY FEATURE(iscsi_kernel_proxy, "iSCSI initiator built with ICL_KERNEL_PROXY"); #endif /* * XXX: This is global so the iscsi_unload() can access it. * Think about how to do this properly. */ static struct iscsi_softc *sc; SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD, 0, "iSCSI initiator"); static int debug = 1; SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 0, "Enable debug messages"); static int ping_timeout = 5; SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout, 0, "Timeout for ping (NOP-Out) requests, in seconds"); static int iscsid_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RWTUN, &iscsid_timeout, 0, "Time to wait for iscsid(8) to handle reconnection, in seconds"); static int login_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout, 0, "Time to wait for iscsid(8) to finish Login Phase, in seconds"); static int maxtags = 255; SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, 0, "Max number of IO requests queued"); static int fail_on_disconnection = 0; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, &fail_on_disconnection, 0, "Destroy CAM SIM on connection failure"); static int fail_on_shutdown = 1; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_shutdown, CTLFLAG_RWTUN, &fail_on_shutdown, 0, "Fail disconnected sessions on shutdown"); static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator"); static uma_zone_t iscsi_outstanding_zone; #define CONN_SESSION(X) ((struct iscsi_session *)X->ic_prv0) #define PDU_SESSION(X) (CONN_SESSION(X->ip_conn)) #define ISCSI_DEBUG(X, ...) \ do { \ if (debug > 1) \ printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ } while (0) #define ISCSI_WARN(X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_DEBUG(S, X, ...) \ do { \ if (debug > 1) { \ printf("%s: %s (%s): " X "\n", \ __func__, S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_WARN(S, X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s (%s): " X "\n", \ S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_LOCK(X) mtx_lock(&X->is_lock) #define ISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->is_lock) #define ISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->is_lock, MA_OWNED) #define ISCSI_SESSION_LOCK_ASSERT_NOT(X) mtx_assert(&X->is_lock, MA_NOTOWNED) static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td); static struct cdevsw iscsi_cdevsw = { .d_version = D_VERSION, .d_ioctl = iscsi_ioctl, .d_name = "iscsi", }; static void iscsi_pdu_queue_locked(struct icl_pdu *request); static void iscsi_pdu_queue(struct icl_pdu *request); static void iscsi_pdu_update_statsn(const struct icl_pdu *response); static void iscsi_pdu_handle_nop_in(struct icl_pdu *response); static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response); static void iscsi_pdu_handle_task_response(struct icl_pdu *response); static void iscsi_pdu_handle_data_in(struct icl_pdu *response); static void iscsi_pdu_handle_logout_response(struct icl_pdu *response); static void iscsi_pdu_handle_r2t(struct icl_pdu *response); static void iscsi_pdu_handle_async_message(struct icl_pdu *response); static void iscsi_pdu_handle_reject(struct icl_pdu *response); static void iscsi_session_reconnect(struct iscsi_session *is); static void iscsi_session_terminate(struct iscsi_session *is); static void iscsi_action(struct cam_sim *sim, union ccb *ccb); static void iscsi_poll(struct cam_sim *sim); static struct iscsi_outstanding *iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag); static struct iscsi_outstanding *iscsi_outstanding_add(struct iscsi_session *is, - union ccb *ccb, uint32_t *initiator_task_tagp); + struct icl_pdu *request, union ccb *ccb, + uint32_t *initiator_task_tagp); static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io); static bool iscsi_pdu_prepare(struct icl_pdu *request) { struct iscsi_session *is; struct iscsi_bhs_scsi_command *bhssc; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the request * (initiator -> target) PDUs. */ bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; /* * Data-Out PDU does not contain CmdSN. */ if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { if (ISCSI_SNGT(is->is_cmdsn, is->is_maxcmdsn) && (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) { /* * Current MaxCmdSN prevents us from sending any more * SCSI Command PDUs to the target; postpone the PDU. * It will get resent by either iscsi_pdu_queue(), * or by maintenance thread. */ #if 0 ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %u, " "ExpCmdSN %u, MaxCmdSN %u, opcode 0x%x", is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn, bhssc->bhssc_opcode); #endif return (true); } bhssc->bhssc_cmdsn = htonl(is->is_cmdsn); if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) is->is_cmdsn++; } bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1); return (false); } static void iscsi_session_send_postponed(struct iscsi_session *is) { struct icl_pdu *request; bool postpone; ISCSI_SESSION_LOCK_ASSERT(is); while (!STAILQ_EMPTY(&is->is_postponed)) { request = STAILQ_FIRST(&is->is_postponed); postpone = iscsi_pdu_prepare(request); if (postpone) break; STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_queue(request); } } static void iscsi_pdu_queue_locked(struct icl_pdu *request) { struct iscsi_session *is; bool postpone; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); iscsi_session_send_postponed(is); postpone = iscsi_pdu_prepare(request); if (postpone) { STAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next); return; } icl_pdu_queue(request); } static void iscsi_pdu_queue(struct icl_pdu *request) { struct iscsi_session *is; is = PDU_SESSION(request); ISCSI_SESSION_LOCK(is); iscsi_pdu_queue_locked(request); ISCSI_SESSION_UNLOCK(is); } static void iscsi_session_logout(struct iscsi_session *is) { struct icl_pdu *request; struct iscsi_bhs_logout_request *bhslr; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) return; bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST; bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION; iscsi_pdu_queue_locked(request); } static void iscsi_session_terminate_task(struct iscsi_session *is, struct iscsi_outstanding *io, bool requeue) { ISCSI_SESSION_LOCK_ASSERT(is); if (io->io_ccb != NULL) { io->io_ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK); if (requeue) io->io_ccb->ccb_h.status |= CAM_REQUEUE_REQ; else io->io_ccb->ccb_h.status |= CAM_REQ_ABORTED; if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN; xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } xpt_done(io->io_ccb); } iscsi_outstanding_remove(is, io); } static void iscsi_session_terminate_tasks(struct iscsi_session *is, bool requeue) { struct iscsi_outstanding *io, *tmp; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) { iscsi_session_terminate_task(is, io, requeue); } } static void iscsi_session_cleanup(struct iscsi_session *is, bool destroy_sim) { struct icl_pdu *pdu; ISCSI_SESSION_LOCK_ASSERT(is); /* * Don't queue any new PDUs. */ if (is->is_sim != NULL && is->is_simq_frozen == false) { ISCSI_SESSION_DEBUG(is, "freezing"); xpt_freeze_simq(is->is_sim, 1); is->is_simq_frozen = true; } /* * Remove postponed PDUs. */ while (!STAILQ_EMPTY(&is->is_postponed)) { pdu = STAILQ_FIRST(&is->is_postponed); STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_free(pdu); } if (destroy_sim == false) { /* * Terminate SCSI tasks, asking CAM to requeue them. */ iscsi_session_terminate_tasks(is, true); return; } iscsi_session_terminate_tasks(is, false); if (is->is_sim == NULL) return; ISCSI_SESSION_DEBUG(is, "deregistering SIM"); xpt_async(AC_LOST_DEVICE, is->is_path, NULL); if (is->is_simq_frozen) { xpt_release_simq(is->is_sim, 1); is->is_simq_frozen = false; } xpt_free_path(is->is_path); is->is_path = NULL; xpt_bus_deregister(cam_sim_path(is->is_sim)); cam_sim_free(is->is_sim, TRUE /*free_devq*/); is->is_sim = NULL; is->is_devq = NULL; } static void iscsi_maintenance_thread_reconnect(struct iscsi_session *is) { icl_conn_close(is->is_conn); ISCSI_SESSION_LOCK(is); is->is_connected = false; is->is_reconnecting = false; is->is_login_phase = false; #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif if (fail_on_disconnection) { ISCSI_SESSION_DEBUG(is, "connection failed, destroying devices"); iscsi_session_cleanup(is, true); } else { iscsi_session_cleanup(is, false); } KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); /* * Request immediate reconnection from iscsid(8). */ //ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)"); is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_softc->sc_cv); } static void iscsi_maintenance_thread_terminate(struct iscsi_session *is) { struct iscsi_softc *sc; sc = is->is_softc; sx_xlock(&sc->sc_lock); icl_conn_close(is->is_conn); callout_drain(&is->is_callout); ISCSI_SESSION_LOCK(is); KASSERT(is->is_terminating, ("is_terminating == false")); #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif iscsi_session_cleanup(is, true); KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); ISCSI_SESSION_UNLOCK(is); icl_conn_free(is->is_conn); mtx_destroy(&is->is_lock); cv_destroy(&is->is_maintenance_cv); #ifdef ICL_KERNEL_PROXY cv_destroy(&is->is_login_cv); #endif TAILQ_REMOVE(&sc->sc_sessions, is, is_next); sx_xunlock(&sc->sc_lock); ISCSI_SESSION_DEBUG(is, "terminated"); free(is, M_ISCSI); /* * The iscsi_unload() routine might be waiting. */ cv_signal(&sc->sc_cv); } static void iscsi_maintenance_thread(void *arg) { struct iscsi_session *is; is = arg; for (;;) { ISCSI_SESSION_LOCK(is); if (is->is_reconnecting == false && is->is_terminating == false && STAILQ_EMPTY(&is->is_postponed)) cv_wait(&is->is_maintenance_cv, &is->is_lock); /* Terminate supersedes reconnect. */ if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_terminate(is); kthread_exit(); return; } if (is->is_reconnecting) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_reconnect(is); continue; } iscsi_session_send_postponed(is); ISCSI_SESSION_UNLOCK(is); } } static void iscsi_session_reconnect(struct iscsi_session *is) { /* * XXX: We can't use locking here, because * it's being called from various contexts. * Hope it doesn't break anything. */ if (is->is_reconnecting) return; is->is_reconnecting = true; cv_signal(&is->is_maintenance_cv); } static void iscsi_session_terminate(struct iscsi_session *is) { if (is->is_terminating) return; is->is_terminating = true; #if 0 iscsi_session_logout(is); #endif cv_signal(&is->is_maintenance_cv); } static void iscsi_callout(void *context) { struct icl_pdu *request; struct iscsi_bhs_nop_out *bhsno; struct iscsi_session *is; bool reconnect_needed = false; is = context; ISCSI_SESSION_LOCK(is); if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); return; } callout_schedule(&is->is_callout, 1 * hz); is->is_timeout++; if (is->is_waiting_for_iscsid) { if (iscsid_timeout > 0 && is->is_timeout > iscsid_timeout) { ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) " "for %d seconds; reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (is->is_login_phase) { if (login_timeout > 0 && is->is_timeout > login_timeout) { ISCSI_SESSION_WARN(is, "login timed out after %d seconds; " "reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (ping_timeout <= 0) { /* * Pings are disabled. Don't send NOP-Out in this case. * Reset the timeout, to avoid triggering reconnection, * should the user decide to reenable them. */ is->is_timeout = 0; goto out; } if (is->is_timeout >= ping_timeout) { ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; " "reconnecting", ping_timeout); reconnect_needed = true; goto out; } ISCSI_SESSION_UNLOCK(is); /* * If the ping was reset less than one second ago - which means * that we've received some PDU during the last second - assume * the traffic flows correctly and don't bother sending a NOP-Out. * * (It's 2 - one for one second, and one for incrementing is_timeout * earlier in this routine.) */ if (is->is_timeout < 2) return; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate PDU"); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_target_transfer_tag = 0xffffffff; iscsi_pdu_queue(request); return; out: if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); return; } ISCSI_SESSION_UNLOCK(is); if (reconnect_needed) iscsi_session_reconnect(is); } static void iscsi_pdu_update_statsn(const struct icl_pdu *response) { const struct iscsi_bhs_data_in *bhsdi; struct iscsi_session *is; uint32_t expcmdsn, maxcmdsn, statsn; is = PDU_SESSION(response); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the response * (target -> initiator) PDUs. */ bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs; /* * Ok, I lied. In case of Data-In, "The fields StatSN, Status, * and Residual Count only have meaningful content if the S bit * is set to 1", so we also need to check the bit specific for * Data-In PDU. */ if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) { statsn = ntohl(bhsdi->bhsdi_statsn); if (statsn != is->is_statsn && statsn != (is->is_statsn + 1)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_WARN(is, "PDU 0x%x StatSN %u != " "session ExpStatSN %u (or + 1); reconnecting", bhsdi->bhsdi_opcode, statsn, is->is_statsn); iscsi_session_reconnect(is); } if (ISCSI_SNGT(statsn, is->is_statsn)) is->is_statsn = statsn; } expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn); maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn); if (ISCSI_SNLT(maxcmdsn + 1, expcmdsn)) { ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u + 1 < PDU ExpCmdSN %u; ignoring", maxcmdsn, expcmdsn); } else { if (ISCSI_SNGT(maxcmdsn, is->is_maxcmdsn)) { is->is_maxcmdsn = maxcmdsn; /* * Command window increased; kick the maintanance thread * to send out postponed commands. */ if (!STAILQ_EMPTY(&is->is_postponed)) cv_signal(&is->is_maintenance_cv); } else if (ISCSI_SNLT(maxcmdsn, is->is_maxcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u < session MaxCmdSN %u; ignoring", maxcmdsn, is->is_maxcmdsn); } if (ISCSI_SNGT(expcmdsn, is->is_expcmdsn)) { is->is_expcmdsn = expcmdsn; } else if (ISCSI_SNLT(expcmdsn, is->is_expcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU ExpCmdSN %u < session ExpCmdSN %u; ignoring", expcmdsn, is->is_expcmdsn); } } /* * Every incoming PDU - not just NOP-In - resets the ping timer. * The purpose of the timeout is to reset the connection when it stalls; * we don't want this to happen when NOP-In or NOP-Out ends up delayed * in some queue. */ is->is_timeout = 0; } static void iscsi_receive_callback(struct icl_pdu *response) { struct iscsi_session *is; is = PDU_SESSION(response); ISCSI_SESSION_LOCK(is); #ifdef ICL_KERNEL_PROXY if (is->is_login_phase) { if (is->is_login_pdu == NULL) is->is_login_pdu = response; else icl_pdu_free(response); ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_login_cv); return; } #endif iscsi_pdu_update_statsn(response); /* * The handling routine is responsible for freeing the PDU * when it's no longer needed. */ switch (response->ip_bhs->bhs_opcode) { case ISCSI_BHS_OPCODE_NOP_IN: iscsi_pdu_handle_nop_in(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_RESPONSE: iscsi_pdu_handle_scsi_response(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_TASK_RESPONSE: iscsi_pdu_handle_task_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_DATA_IN: iscsi_pdu_handle_data_in(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE: iscsi_pdu_handle_logout_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_R2T: iscsi_pdu_handle_r2t(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_ASYNC_MESSAGE: iscsi_pdu_handle_async_message(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_REJECT: iscsi_pdu_handle_reject(response); ISCSI_SESSION_UNLOCK(is); break; default: ISCSI_SESSION_WARN(is, "received PDU with unsupported " "opcode 0x%x; reconnecting", response->ip_bhs->bhs_opcode); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); } } static void iscsi_error_callback(struct icl_conn *ic) { struct iscsi_session *is; is = CONN_SESSION(ic); ISCSI_SESSION_WARN(is, "connection error; reconnecting"); iscsi_session_reconnect(is); } static void iscsi_pdu_handle_nop_in(struct icl_pdu *response) { struct iscsi_session *is; struct iscsi_bhs_nop_out *bhsno; struct iscsi_bhs_nop_in *bhsni; struct icl_pdu *request; void *data = NULL; size_t datasize; int error; is = PDU_SESSION(response); bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; if (bhsni->bhsni_target_transfer_tag == 0xffffffff) { /* * Nothing to do; iscsi_pdu_update_statsn() already * zeroed the timeout. */ icl_pdu_free(response); return; } datasize = icl_pdu_data_segment_length(response); if (datasize > 0) { data = malloc(datasize, M_ISCSI, M_NOWAIT | M_ZERO); if (data == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } icl_pdu_get_data(response, 0, data, datasize); } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_initiator_task_tag = 0xffffffff; bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag; if (datasize > 0) { error = icl_pdu_append_data(request, data, datasize, M_NOWAIT); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } free(data, M_ISCSI); } icl_pdu_free(response); iscsi_pdu_queue_locked(request); } static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response) { struct iscsi_bhs_scsi_response *bhssr; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received; uint16_t sense_len; is = PDU_SESSION(response); bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; received = io->io_received; iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) { ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; } else if (bhssr->bhssr_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; ccb->csio.scsi_status = bhssr->bhssr_status; } csio = &ccb->csio; data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len > 0) { if (data_segment_len < sizeof(sense_len)) { ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)", data_segment_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len)); sense_len = ntohs(sense_len); #if 0 ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd", sense_len, data_segment_len); #endif if (sizeof(sense_len) + sense_len > data_segment_len) { ISCSI_SESSION_WARN(is, "truncated data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } else if (sizeof(sense_len) + sense_len < data_segment_len) ISCSI_SESSION_WARN(is, "oversize data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if (sense_len > csio->sense_len) { ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d", sense_len, csio->sense_len); sense_len = csio->sense_len; } icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len); csio->sense_resid = csio->sense_len - sense_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } out: if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW) csio->resid = ntohl(bhssr->bhssr_residual_count); if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_task_response(struct icl_pdu *response) { struct iscsi_bhs_task_management_response *bhstmr; struct iscsi_outstanding *io, *aio; struct iscsi_session *is; is = PDU_SESSION(response); bhstmr = (struct iscsi_bhs_task_management_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhstmr->bhstmr_initiator_task_tag); if (io == NULL || io->io_ccb != NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhstmr->bhstmr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } if (bhstmr->bhstmr_response != BHSTMR_RESPONSE_FUNCTION_COMPLETE) { ISCSI_SESSION_WARN(is, "task response 0x%x", bhstmr->bhstmr_response); } else { aio = iscsi_outstanding_find(is, io->io_datasn); if (aio != NULL && aio->io_ccb != NULL) iscsi_session_terminate_task(is, aio, false); } iscsi_outstanding_remove(is, io); icl_pdu_free(response); } static void iscsi_pdu_handle_data_in(struct icl_pdu *response) { struct iscsi_bhs_data_in *bhsdi; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received, oreceived; is = PDU_SESSION(response); bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len == 0) { /* * "The sending of 0 length data segments should be avoided, * but initiators and targets MUST be able to properly receive * 0 length data segments." */ ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); return; } /* * We need to track this for security reasons - without it, malicious target * could respond to SCSI READ without sending Data-In PDUs, which would result * in read operation on the initiator side returning random kernel data. */ if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) { ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd", io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset)); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; csio = &ccb->csio; if (io->io_received + data_segment_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes " "at offset %zd, buffer is %d)", data_segment_len, io->io_received, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } oreceived = io->io_received; io->io_received += data_segment_len; received = io->io_received; if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); icl_pdu_get_data(response, 0, csio->data_ptr + oreceived, data_segment_len); /* * XXX: Check DataSN. * XXX: Check F. */ if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) == 0) { /* * Nothing more to do. */ icl_pdu_free(response); return; } //ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status); if (bhsdi->bhsdi_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; csio->scsi_status = bhsdi->bhsdi_status; } if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { csio->resid = ntohl(bhsdi->bhsdi_residual_count); if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_logout_response(struct icl_pdu *response) { ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response"); icl_pdu_free(response); } static void iscsi_pdu_handle_r2t(struct icl_pdu *response) { struct icl_pdu *request; struct iscsi_session *is; struct iscsi_bhs_r2t *bhsr2t; struct iscsi_bhs_data_out *bhsdo; struct iscsi_outstanding *io; struct ccb_scsiio *csio; size_t off, len, total_len; int error; is = PDU_SESSION(response); bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting", bhsr2t->bhsr2t_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } csio = &io->io_ccb->csio; if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) { ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } /* * XXX: Verify R2TSN. */ io->io_datasn = 0; off = ntohl(bhsr2t->bhsr2t_buffer_offset); if (off > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid offset " "%zd, buffer is is %d; reconnecting", off, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length); if (total_len == 0 || total_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid length " "%zd, buffer is %d; reconnecting", total_len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } //ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len); for (;;) { len = total_len; if (len > is->is_max_data_segment_length) len = is->is_max_data_segment_length; if (off + len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid " "length/offset %zd, buffer is %d; reconnecting", off + len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT; bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun; bhsdo->bhsdo_initiator_task_tag = bhsr2t->bhsr2t_initiator_task_tag; bhsdo->bhsdo_target_transfer_tag = bhsr2t->bhsr2t_target_transfer_tag; bhsdo->bhsdo_datasn = htonl(io->io_datasn++); bhsdo->bhsdo_buffer_offset = htonl(off); error = icl_pdu_append_data(request, csio->data_ptr + off, len, M_NOWAIT); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } off += len; total_len -= len; if (total_len == 0) { bhsdo->bhsdo_flags |= BHSDO_FLAGS_F; //ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off); } else { //ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off); } iscsi_pdu_queue_locked(request); if (total_len == 0) break; } icl_pdu_free(response); } static void iscsi_pdu_handle_async_message(struct icl_pdu *response) { struct iscsi_bhs_asynchronous_message *bhsam; struct iscsi_session *is; is = PDU_SESSION(response); bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; switch (bhsam->bhsam_async_event) { case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT: ISCSI_SESSION_WARN(is, "target requests logout; removing session"); iscsi_session_logout(is); iscsi_session_terminate(is); break; case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION: ISCSI_SESSION_WARN(is, "target indicates it will drop drop the connection"); break; case BHSAM_EVENT_TARGET_TERMINATES_SESSION: ISCSI_SESSION_WARN(is, "target indicates it will drop drop the session"); break; default: /* * XXX: Technically, we're obligated to also handle * parameter renegotiation. */ ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event); break; } icl_pdu_free(response); } static void iscsi_pdu_handle_reject(struct icl_pdu *response) { struct iscsi_bhs_reject *bhsr; struct iscsi_session *is; is = PDU_SESSION(response); bhsr = (struct iscsi_bhs_reject *)response->ip_bhs; ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?", bhsr->bhsr_reason); icl_pdu_free(response); } static int iscsi_ioctl_daemon_wait(struct iscsi_softc *sc, struct iscsi_daemon_request *request) { struct iscsi_session *is; int error; sx_slock(&sc->sc_lock); for (;;) { TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_waiting_for_iscsid) break; ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { /* * No session requires attention from iscsid(8); wait. */ error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } continue; } is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); request->idr_session_id = is->is_id; memcpy(&request->idr_isid, &is->is_isid, sizeof(request->idr_isid)); request->idr_tsih = 0; /* New or reinstated session. */ memcpy(&request->idr_conf, &is->is_conf, sizeof(request->idr_conf)); error = icl_limits(is->is_conf.isc_offload, &request->idr_limits.isl_max_data_segment_length); if (error != 0) { ISCSI_SESSION_WARN(is, "icl_limits for offload \"%s\" " "failed with error %d", is->is_conf.isc_offload, error); sx_sunlock(&sc->sc_lock); return (error); } sx_sunlock(&sc->sc_lock); return (0); } } static int iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc, struct iscsi_daemon_handoff *handoff) { struct iscsi_session *is; struct icl_conn *ic; int error; sx_slock(&sc->sc_lock); /* * Find the session to hand off socket to. */ TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == handoff->idh_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ic = is->is_conn; if (is->is_conf.isc_discovery || is->is_terminating) { ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EINVAL); } if (is->is_connected) { /* * This might have happened because another iscsid(8) * instance handed off the connection in the meantime. * Just return. */ ISCSI_SESSION_WARN(is, "handoff on already connected " "session"); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EBUSY); } strlcpy(is->is_target_alias, handoff->idh_target_alias, sizeof(is->is_target_alias)); is->is_tsih = handoff->idh_tsih; is->is_statsn = handoff->idh_statsn; is->is_initial_r2t = handoff->idh_initial_r2t; is->is_immediate_data = handoff->idh_immediate_data; /* * Cap MaxRecvDataSegmentLength obtained from the target to the maximum * size supported by our ICL module. */ is->is_max_data_segment_length = min(ic->ic_max_data_segment_length, handoff->idh_max_data_segment_length); is->is_max_burst_length = handoff->idh_max_burst_length; is->is_first_burst_length = handoff->idh_first_burst_length; if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C) ic->ic_header_crc32c = true; else ic->ic_header_crc32c = false; if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C) ic->ic_data_crc32c = true; else ic->ic_data_crc32c = false; is->is_cmdsn = 0; is->is_expcmdsn = 0; is->is_maxcmdsn = 0; is->is_waiting_for_iscsid = false; is->is_login_phase = false; is->is_timeout = 0; is->is_connected = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); #ifdef ICL_KERNEL_PROXY if (handoff->idh_socket != 0) { #endif /* * Handoff without using ICL proxy. */ error = icl_conn_handoff(ic, handoff->idh_socket); if (error != 0) { sx_sunlock(&sc->sc_lock); iscsi_session_terminate(is); return (error); } #ifdef ICL_KERNEL_PROXY } #endif sx_sunlock(&sc->sc_lock); if (is->is_sim != NULL) { /* * When reconnecting, there already is SIM allocated for the session. */ KASSERT(is->is_simq_frozen, ("reconnect without frozen simq")); ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "releasing"); xpt_release_simq(is->is_sim, 1); is->is_simq_frozen = false; ISCSI_SESSION_UNLOCK(is); } else { ISCSI_SESSION_LOCK(is); is->is_devq = cam_simq_alloc(maxtags); if (is->is_devq == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate simq"); iscsi_session_terminate(is); return (ENOMEM); } is->is_sim = cam_sim_alloc(iscsi_action, iscsi_poll, "iscsi", is, is->is_id /* unit */, &is->is_lock, 1, maxtags, is->is_devq); if (is->is_sim == NULL) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to allocate SIM"); cam_simq_free(is->is_devq); iscsi_session_terminate(is); return (ENOMEM); } error = xpt_bus_register(is->is_sim, NULL, 0); if (error != 0) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to register bus"); iscsi_session_terminate(is); return (ENOMEM); } error = xpt_create_path(&is->is_path, /*periph*/NULL, cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD); if (error != CAM_REQ_CMP) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to create path"); iscsi_session_terminate(is); return (ENOMEM); } ISCSI_SESSION_UNLOCK(is); } return (0); } static int iscsi_ioctl_daemon_fail(struct iscsi_softc *sc, struct iscsi_daemon_fail *fail) { struct iscsi_session *is; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == fail->idf_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s", fail->idf_reason); strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason)); //is->is_waiting_for_iscsid = false; //is->is_login_phase = true; //iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (0); } #ifdef ICL_KERNEL_PROXY static int iscsi_ioctl_daemon_connect(struct iscsi_softc *sc, struct iscsi_daemon_connect *idc) { struct iscsi_session *is; struct sockaddr *from_sa, *to_sa; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idc->idc_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (idc->idc_from_addrlen > 0) { error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); return (error); } } else { from_sa = NULL; } error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); free(from_sa, M_SONAME); return (error); } ISCSI_SESSION_LOCK(is); is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); error = icl_conn_connect(is->is_conn, idc->idc_iser, idc->idc_domain, idc->idc_socktype, idc->idc_protocol, from_sa, to_sa); free(from_sa, M_SONAME); free(to_sa, M_SONAME); /* * Digests are always disabled during login phase. */ is->is_conn->ic_header_crc32c = false; is->is_conn->ic_data_crc32c = false; return (error); } static int iscsi_ioctl_daemon_send(struct iscsi_softc *sc, struct iscsi_daemon_send *ids) { struct iscsi_session *is; struct icl_pdu *ip; size_t datalen; void *data; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == ids->ids_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); if (is->is_terminating || is->is_reconnecting) return (EIO); datalen = ids->ids_data_segment_len; if (datalen > ISCSI_MAX_DATA_SEGMENT_LENGTH) return (EINVAL); if (datalen > 0) { data = malloc(datalen, M_ISCSI, M_WAITOK); error = copyin(ids->ids_data_segment, data, datalen); if (error != 0) { free(data, M_ISCSI); return (error); } } ip = icl_pdu_new(is->is_conn, M_WAITOK); memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs)); if (datalen > 0) { error = icl_pdu_append_data(ip, data, datalen, M_WAITOK); KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed")); free(data, M_ISCSI); } icl_pdu_queue(ip); return (0); } static int iscsi_ioctl_daemon_receive(struct iscsi_softc *sc, struct iscsi_daemon_receive *idr) { struct iscsi_session *is; struct icl_pdu *ip; void *data; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idr->idr_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); ISCSI_SESSION_LOCK(is); while (is->is_login_pdu == NULL && is->is_terminating == false && is->is_reconnecting == false) cv_wait(&is->is_login_cv, &is->is_lock); if (is->is_terminating || is->is_reconnecting) { ISCSI_SESSION_UNLOCK(is); return (EIO); } ip = is->is_login_pdu; is->is_login_pdu = NULL; ISCSI_SESSION_UNLOCK(is); if (ip->ip_data_len > idr->idr_data_segment_len) { icl_pdu_free(ip); return (EMSGSIZE); } copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs)); if (ip->ip_data_len > 0) { data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK); icl_pdu_get_data(ip, 0, data, ip->ip_data_len); copyout(data, idr->idr_data_segment, ip->ip_data_len); free(data, M_ISCSI); } icl_pdu_free(ip); return (0); } #endif /* ICL_KERNEL_PROXY */ static void iscsi_sanitize_session_conf(struct iscsi_session_conf *isc) { /* * Just make sure all the fields are null-terminated. * * XXX: This is not particularly secure. We should * create our own conf and then copy in relevant * fields. */ isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0'; isc->isc_target[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0'; isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0'; } static bool iscsi_valid_session_conf(const struct iscsi_session_conf *isc) { if (isc->isc_initiator[0] == '\0') { ISCSI_DEBUG("empty isc_initiator"); return (false); } if (isc->isc_target_addr[0] == '\0') { ISCSI_DEBUG("empty isc_target_addr"); return (false); } if (isc->isc_discovery != 0 && isc->isc_target[0] != 0) { ISCSI_DEBUG("non-empty isc_target for discovery session"); return (false); } if (isc->isc_discovery == 0 && isc->isc_target[0] == 0) { ISCSI_DEBUG("empty isc_target for non-discovery session"); return (false); } return (true); } static int iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa) { struct iscsi_session *is; const struct iscsi_session *is2; int error; iscsi_sanitize_session_conf(&isa->isa_conf); if (iscsi_valid_session_conf(&isa->isa_conf) == false) return (EINVAL); is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK); memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf)); sx_xlock(&sc->sc_lock); /* * Prevent duplicates. */ TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) { if (!!is->is_conf.isc_discovery != !!is2->is_conf.isc_discovery) continue; if (strcmp(is->is_conf.isc_target_addr, is2->is_conf.isc_target_addr) != 0) continue; if (is->is_conf.isc_discovery == 0 && strcmp(is->is_conf.isc_target, is2->is_conf.isc_target) != 0) continue; sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EBUSY); } is->is_conn = icl_new_conn(is->is_conf.isc_offload, "iscsi", &is->is_lock); if (is->is_conn == NULL) { sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EINVAL); } is->is_conn->ic_receive = iscsi_receive_callback; is->is_conn->ic_error = iscsi_error_callback; is->is_conn->ic_prv0 = is; TAILQ_INIT(&is->is_outstanding); STAILQ_INIT(&is->is_postponed); mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF); cv_init(&is->is_maintenance_cv, "iscsi_mt"); #ifdef ICL_KERNEL_PROXY cv_init(&is->is_login_cv, "iscsi_login"); #endif is->is_softc = sc; sc->sc_last_session_id++; is->is_id = sc->sc_last_session_id; is->is_isid[0] = 0x80; /* RFC 3720, 10.12.5: 10b, "Random" ISID. */ arc4rand(&is->is_isid[1], 5, 0); is->is_tsih = 0; callout_init(&is->is_callout, 1); error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt"); if (error != 0) { ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error); sx_xunlock(&sc->sc_lock); return (error); } callout_reset(&is->is_callout, 1 * hz, iscsi_callout, is); TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next); /* * Trigger immediate reconnection. */ ISCSI_SESSION_LOCK(is); is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); ISCSI_SESSION_UNLOCK(is); cv_signal(&sc->sc_cv); sx_xunlock(&sc->sc_lock); return (0); } static bool iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1, unsigned int id2, const struct iscsi_session_conf *c2) { if (id2 != 0 && id2 != id1) return (false); if (c2->isc_target[0] != '\0' && strcmp(c1->isc_target, c2->isc_target) != 0) return (false); if (c2->isc_target_addr[0] != '\0' && strcmp(c1->isc_target_addr, c2->isc_target_addr) != 0) return (false); return (true); } static int iscsi_ioctl_session_remove(struct iscsi_softc *sc, struct iscsi_session_remove *isr) { struct iscsi_session *is, *tmp; bool found = false; iscsi_sanitize_session_conf(&isr->isr_conf); sx_xlock(&sc->sc_lock); TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) { ISCSI_SESSION_LOCK(is); if (iscsi_session_conf_matches(is->is_id, &is->is_conf, isr->isr_session_id, &isr->isr_conf)) { found = true; iscsi_session_logout(is); iscsi_session_terminate(is); } ISCSI_SESSION_UNLOCK(is); } sx_xunlock(&sc->sc_lock); if (!found) return (ESRCH); return (0); } static int iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl) { int error; unsigned int i = 0; struct iscsi_session *is; struct iscsi_session_state iss; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (i >= isl->isl_nentries) { sx_sunlock(&sc->sc_lock); return (EMSGSIZE); } memset(&iss, 0, sizeof(iss)); memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf)); iss.iss_id = is->is_id; strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias)); strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason)); strlcpy(iss.iss_offload, is->is_conn->ic_offload, sizeof(iss.iss_offload)); if (is->is_conn->ic_header_crc32c) iss.iss_header_digest = ISCSI_DIGEST_CRC32C; else iss.iss_header_digest = ISCSI_DIGEST_NONE; if (is->is_conn->ic_data_crc32c) iss.iss_data_digest = ISCSI_DIGEST_CRC32C; else iss.iss_data_digest = ISCSI_DIGEST_NONE; iss.iss_max_data_segment_length = is->is_max_data_segment_length; iss.iss_immediate_data = is->is_immediate_data; iss.iss_connected = is->is_connected; error = copyout(&iss, isl->isl_pstates + i, sizeof(iss)); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } i++; } sx_sunlock(&sc->sc_lock); isl->isl_nentries = i; return (0); } static int iscsi_ioctl_session_modify(struct iscsi_softc *sc, struct iscsi_session_modify *ism) { struct iscsi_session *is; iscsi_sanitize_session_conf(&ism->ism_conf); if (iscsi_valid_session_conf(&ism->ism_conf) == false) return (EINVAL); sx_xlock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_id == ism->ism_session_id) break; ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { sx_xunlock(&sc->sc_lock); return (ESRCH); } sx_xunlock(&sc->sc_lock); memcpy(&is->is_conf, &ism->ism_conf, sizeof(is->is_conf)); ISCSI_SESSION_UNLOCK(is); iscsi_session_reconnect(is); return (0); } static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td) { struct iscsi_softc *sc; sc = dev->si_drv1; switch (cmd) { case ISCSIDWAIT: return (iscsi_ioctl_daemon_wait(sc, (struct iscsi_daemon_request *)arg)); case ISCSIDHANDOFF: return (iscsi_ioctl_daemon_handoff(sc, (struct iscsi_daemon_handoff *)arg)); case ISCSIDFAIL: return (iscsi_ioctl_daemon_fail(sc, (struct iscsi_daemon_fail *)arg)); #ifdef ICL_KERNEL_PROXY case ISCSIDCONNECT: return (iscsi_ioctl_daemon_connect(sc, (struct iscsi_daemon_connect *)arg)); case ISCSIDSEND: return (iscsi_ioctl_daemon_send(sc, (struct iscsi_daemon_send *)arg)); case ISCSIDRECEIVE: return (iscsi_ioctl_daemon_receive(sc, (struct iscsi_daemon_receive *)arg)); #endif /* ICL_KERNEL_PROXY */ case ISCSISADD: return (iscsi_ioctl_session_add(sc, (struct iscsi_session_add *)arg)); case ISCSISREMOVE: return (iscsi_ioctl_session_remove(sc, (struct iscsi_session_remove *)arg)); case ISCSISLIST: return (iscsi_ioctl_session_list(sc, (struct iscsi_session_list *)arg)); case ISCSISMODIFY: return (iscsi_ioctl_session_modify(sc, (struct iscsi_session_modify *)arg)); default: return (EINVAL); } } static struct iscsi_outstanding * iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_initiator_task_tag == initiator_task_tag) return (io); } return (NULL); } static struct iscsi_outstanding * iscsi_outstanding_find_ccb(struct iscsi_session *is, union ccb *ccb) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_ccb == ccb) return (io); } return (NULL); } static struct iscsi_outstanding * -iscsi_outstanding_add(struct iscsi_session *is, +iscsi_outstanding_add(struct iscsi_session *is, struct icl_pdu *request, union ccb *ccb, uint32_t *initiator_task_tagp) { struct iscsi_outstanding *io; int error; ISCSI_SESSION_LOCK_ASSERT(is); io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO); if (io == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes", sizeof(*io)); return (NULL); } - error = icl_conn_task_setup(is->is_conn, &ccb->csio, + error = icl_conn_task_setup(is->is_conn, request, &ccb->csio, initiator_task_tagp, &io->io_icl_prv); if (error != 0) { ISCSI_SESSION_WARN(is, "icl_conn_task_setup() failed with error %d", error); uma_zfree(iscsi_outstanding_zone, io); return (NULL); } KASSERT(iscsi_outstanding_find(is, *initiator_task_tagp) == NULL, ("initiator_task_tag 0x%x already added", *initiator_task_tagp)); io->io_initiator_task_tag = *initiator_task_tagp; io->io_ccb = ccb; TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next); return (io); } static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io) { ISCSI_SESSION_LOCK_ASSERT(is); icl_conn_task_done(is->is_conn, io->io_icl_prv); TAILQ_REMOVE(&is->is_outstanding, io, io_next); uma_zfree(iscsi_outstanding_zone, io); } static void iscsi_action_abort(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_task_management_request *bhstmr; struct ccb_abort *cab = &ccb->cab; struct iscsi_outstanding *io, *aio; uint32_t initiator_task_tag; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); return; } #endif aio = iscsi_outstanding_find_ccb(is, cab->abort_ccb); if (aio == NULL) { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; - io = iscsi_outstanding_add(is, NULL, &initiator_task_tag); + io = iscsi_outstanding_add(is, request, NULL, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } io->io_datasn = aio->io_initiator_task_tag; bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; bhstmr->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_REQUEST; bhstmr->bhstmr_function = 0x80 | BHSTMR_FUNCTION_ABORT_TASK; bhstmr->bhstmr_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhstmr->bhstmr_initiator_task_tag = initiator_task_tag; bhstmr->bhstmr_referenced_task_tag = aio->io_initiator_task_tag; iscsi_pdu_queue_locked(request); } static void iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_scsi_command *bhssc; struct ccb_scsiio *csio; struct iscsi_outstanding *io; size_t len; uint32_t initiator_task_tag; int error; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ISCSI_SESSION_DEBUG(is, "called during login phase"); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN; xpt_done(ccb); return; } #endif request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; - io = iscsi_outstanding_add(is, ccb, &initiator_task_tag); + io = iscsi_outstanding_add(is, request, ccb, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } csio = &ccb->csio; bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND; bhssc->bhssc_flags |= BHSSC_FLAGS_F; switch (csio->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_IN: bhssc->bhssc_flags |= BHSSC_FLAGS_R; break; case CAM_DIR_OUT: bhssc->bhssc_flags |= BHSSC_FLAGS_W; break; } if ((ccb->ccb_h.flags & CAM_TAG_ACTION_VALID) != 0) { switch (csio->tag_action) { case MSG_HEAD_OF_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ; break; case MSG_ORDERED_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED; break; case MSG_ACA_TASK: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA; break; case MSG_SIMPLE_Q_TAG: default: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE; break; } } else bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_UNTAGGED; bhssc->bhssc_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhssc->bhssc_initiator_task_tag = initiator_task_tag; bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len); KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb), ("unsupported CDB size %zd", (size_t)csio->cdb_len)); if (csio->ccb_h.flags & CAM_CDB_POINTER) memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); else memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); if (is->is_immediate_data && (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { len = csio->dxfer_len; //ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len); if (len > is->is_first_burst_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_first_burst_length); len = is->is_first_burst_length; } if (len > is->is_max_data_segment_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_max_data_segment_length); len = is->is_max_data_segment_length; } error = icl_pdu_append_data(request, csio->data_ptr, len, M_NOWAIT); if (error != 0) { iscsi_outstanding_remove(is, io); icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } } iscsi_pdu_queue_locked(request); } static void iscsi_action(struct cam_sim *sim, union ccb *ccb) { struct iscsi_session *is; is = cam_sim_softc(sim); ISCSI_SESSION_LOCK_ASSERT(is); if (is->is_terminating || (is->is_connected == false && fail_on_disconnection)) { ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_EXTLUNS; cpi->hba_eng_cnt = 0; cpi->max_target = 0; /* * Note that the variable below is only relevant for targets * that don't claim compliance with anything above SPC2, which * means they don't support REPORT_LUNS. */ cpi->max_lun = 255; cpi->initiator_id = ~0; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "iSCSI", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; /* XXX */ cpi->transport = XPORT_ISCSI; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC3; cpi->maxio = MAXPHYS; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts; struct ccb_trans_settings_scsi *scsi; cts = &ccb->cts; scsi = &cts->proto_specific.scsi; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC3; cts->transport = XPORT_ISCSI; cts->transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; cts->ccb_h.status = CAM_REQ_CMP; break; } case XPT_CALC_GEOMETRY: cam_calc_geometry(&ccb->ccg, /*extended*/1); ccb->ccb_h.status = CAM_REQ_CMP; break; #if 0 /* * XXX: What's the point? */ case XPT_RESET_BUS: case XPT_TERM_IO: ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io"); ccb->ccb_h.status = CAM_REQ_CMP; break; #endif case XPT_ABORT: iscsi_action_abort(is, ccb); return; case XPT_SCSI_IO: iscsi_action_scsiio(is, ccb); return; default: #if 0 ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code); #endif ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; } xpt_done(ccb); } static void iscsi_poll(struct cam_sim *sim) { KASSERT(0, ("%s: you're not supposed to be here", __func__)); } static void iscsi_terminate_sessions(struct iscsi_softc *sc) { struct iscsi_session *is; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) iscsi_session_terminate(is); while(!TAILQ_EMPTY(&sc->sc_sessions)) { ISCSI_DEBUG("waiting for sessions to terminate"); cv_wait(&sc->sc_cv, &sc->sc_lock); } ISCSI_DEBUG("all sessions terminated"); sx_sunlock(&sc->sc_lock); } static void iscsi_shutdown_pre(struct iscsi_softc *sc) { struct iscsi_session *is; if (!fail_on_shutdown) return; /* * If we have any sessions waiting for reconnection, request * maintenance thread to fail them immediately instead of waiting * for reconnect timeout. * * This prevents LUNs with mounted filesystems that are supported * by disconnected iSCSI sessions from hanging, however it will * fail all queued BIOs. */ ISCSI_DEBUG("forcing failing all disconnected sessions due to shutdown"); fail_on_disconnection = 1; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (!is->is_connected) { ISCSI_SESSION_DEBUG(is, "force failing disconnected session early"); iscsi_session_reconnect(is); } ISCSI_SESSION_UNLOCK(is); } sx_sunlock(&sc->sc_lock); } static void iscsi_shutdown_post(struct iscsi_softc *sc) { ISCSI_DEBUG("removing all sessions due to shutdown"); iscsi_terminate_sessions(sc); } static int iscsi_load(void) { int error; sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK); sx_init(&sc->sc_lock, "iscsi"); TAILQ_INIT(&sc->sc_sessions); cv_init(&sc->sc_cv, "iscsi_cv"); iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding", sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi"); if (error != 0) { ISCSI_WARN("failed to create device node, error %d", error); return (error); } sc->sc_cdev->si_drv1 = sc; sc->sc_shutdown_pre_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, iscsi_shutdown_pre, sc, SHUTDOWN_PRI_FIRST); /* * shutdown_post_sync needs to run after filesystem shutdown and before * CAM shutdown - otherwise when rebooting with an iSCSI session that is * disconnected but has outstanding requests, dashutdown() will hang on * cam_periph_runccb(). */ sc->sc_shutdown_post_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, iscsi_shutdown_post, sc, SHUTDOWN_PRI_DEFAULT - 1); return (0); } static int iscsi_unload(void) { if (sc->sc_cdev != NULL) { ISCSI_DEBUG("removing device node"); destroy_dev(sc->sc_cdev); ISCSI_DEBUG("device node removed"); } if (sc->sc_shutdown_pre_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_pre_eh); if (sc->sc_shutdown_post_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_post_eh); iscsi_terminate_sessions(sc); uma_zdestroy(iscsi_outstanding_zone); sx_destroy(&sc->sc_lock); cv_destroy(&sc->sc_cv); free(sc, M_ISCSI); return (0); } static int iscsi_quiesce(void) { sx_slock(&sc->sc_lock); if (!TAILQ_EMPTY(&sc->sc_sessions)) { sx_sunlock(&sc->sc_lock); return (EBUSY); } sx_sunlock(&sc->sc_lock); return (0); } static int iscsi_modevent(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: error = iscsi_load(); break; case MOD_UNLOAD: error = iscsi_unload(); break; case MOD_QUIESCE: error = iscsi_quiesce(); break; default: error = EINVAL; break; } return (error); } moduledata_t iscsi_data = { "iscsi", iscsi_modevent, 0 }; DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(iscsi, cam, 1, 1, 1); MODULE_DEPEND(iscsi, icl, 1, 1, 1);