diff --git a/sys/cam/ctl/ctl_frontend_iscsi.c b/sys/cam/ctl/ctl_frontend_iscsi.c index a5a80848c763..b3cd8ab79d76 100644 --- a/sys/cam/ctl/ctl_frontend_iscsi.c +++ b/sys/cam/ctl/ctl_frontend_iscsi.c @@ -1,3048 +1,3053 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * CTL frontend for the iSCSI protocol. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ICL_KERNEL_PROXY #include #endif #ifdef ICL_KERNEL_PROXY FEATURE(cfiscsi_kernel_proxy, "iSCSI target built with ICL_KERNEL_PROXY"); #endif static MALLOC_DEFINE(M_CFISCSI, "cfiscsi", "Memory used for CTL iSCSI frontend"); static uma_zone_t cfiscsi_data_wait_zone; SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, iscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "CAM Target Layer iSCSI Frontend"); static int debug = 1; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 1, "Enable debug messages"); static int ping_timeout = 5; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout, 5, "Interval between ping (NOP-Out) requests, in seconds"); static int login_timeout = 60; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout, 60, "Time to wait for ctld(8) to finish Login Phase, in seconds"); static int maxtags = 256; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, 0, "Max number of requests queued by initiator"); #define CFISCSI_DEBUG(X, ...) \ do { \ if (debug > 1) { \ printf("%s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_WARN(X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_DEBUG(S, X, ...) \ do { \ if (debug > 1) { \ printf("%s: %s (%s): " X "\n", \ __func__, S->cs_initiator_addr, \ S->cs_initiator_name, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_WARN(S, X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s (%s): " X "\n", \ S->cs_initiator_addr, \ S->cs_initiator_name, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_LOCK(X) mtx_lock(&X->cs_lock) #define CFISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->cs_lock) #define CFISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->cs_lock, MA_OWNED) #define CONN_SESSION(X) ((struct cfiscsi_session *)(X)->ic_prv0) #define PDU_SESSION(X) CONN_SESSION((X)->ip_conn) struct cfiscsi_priv { void *request; uint32_t expdatasn; uint32_t r2tsn; }; #define PRIV(io) \ ((struct cfiscsi_priv *)&(io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND]) #define PRIV_REQUEST(io) PRIV(io)->request #define PRIV_EXPDATASN(io) PRIV(io)->expdatasn #define PRIV_R2TSN(io) PRIV(io)->r2tsn static int cfiscsi_init(void); static int cfiscsi_shutdown(void); static void cfiscsi_online(void *arg); static void cfiscsi_offline(void *arg); static int cfiscsi_info(void *arg, struct sbuf *sb); static int cfiscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static void cfiscsi_datamove(union ctl_io *io); static void cfiscsi_datamove_in(union ctl_io *io); static void cfiscsi_datamove_out(union ctl_io *io); static void cfiscsi_done(union ctl_io *io); static bool cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request); static void cfiscsi_pdu_handle_nop_out(struct icl_pdu *request); static void cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request); static void cfiscsi_pdu_handle_task_request(struct icl_pdu *request); static void cfiscsi_pdu_handle_data_out(struct icl_pdu *request); static void cfiscsi_pdu_handle_logout_request(struct icl_pdu *request); static void cfiscsi_session_terminate(struct cfiscsi_session *cs); static struct cfiscsi_data_wait *cfiscsi_data_wait_new( struct cfiscsi_session *cs, union ctl_io *io, uint32_t initiator_task_tag, uint32_t *target_transfer_tagp); static void cfiscsi_data_wait_free(struct cfiscsi_session *cs, struct cfiscsi_data_wait *cdw); static struct cfiscsi_target *cfiscsi_target_find(struct cfiscsi_softc *softc, const char *name, uint16_t tag); static struct cfiscsi_target *cfiscsi_target_find_or_create( struct cfiscsi_softc *softc, const char *name, const char *alias, uint16_t tag); static void cfiscsi_target_release(struct cfiscsi_target *ct); static void cfiscsi_session_delete(struct cfiscsi_session *cs); static struct cfiscsi_softc cfiscsi_softc; static struct ctl_frontend cfiscsi_frontend = { .name = "iscsi", .init = cfiscsi_init, .ioctl = cfiscsi_ioctl, .shutdown = cfiscsi_shutdown, }; CTL_FRONTEND_DECLARE(cfiscsi, cfiscsi_frontend); MODULE_DEPEND(cfiscsi, icl, 1, 1, 1); static struct icl_pdu * cfiscsi_pdu_new_response(struct icl_pdu *request, int flags) { return (icl_pdu_new(request->ip_conn, flags)); } static bool cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request) { const struct iscsi_bhs_scsi_command *bhssc; struct cfiscsi_session *cs; uint32_t cmdsn, curcmdsn; cs = PDU_SESSION(request); /* * Every incoming PDU - not just NOP-Out - resets the ping timer. * The purpose of the timeout is to reset the connection when it stalls; * we don't want this to happen when NOP-In or NOP-Out ends up delayed * in some queue. */ cs->cs_timeout = 0; /* * Immediate commands carry cmdsn, but it is neither incremented nor * verified. */ if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) return (false); /* * Data-Out PDUs don't contain CmdSN. */ if (request->ip_bhs->bhs_opcode == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) return (false); /* * We're only using fields common for all the request * (initiator -> target) PDUs. */ bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; curcmdsn = cmdsn = ntohl(bhssc->bhssc_cmdsn); /* * Increment session cmdsn and exit if we received the expected value. */ do { if (atomic_fcmpset_32(&cs->cs_cmdsn, &curcmdsn, cmdsn + 1)) return (false); } while (curcmdsn == cmdsn); /* * The target MUST silently ignore any non-immediate command outside * of this range. */ if (ISCSI_SNLT(cmdsn, curcmdsn) || ISCSI_SNGT(cmdsn, curcmdsn - 1 + maxtags)) { CFISCSI_SESSION_WARN(cs, "received PDU with CmdSN %u, " "while expected %u", cmdsn, curcmdsn); return (true); } /* * We don't support multiple connections now, so any discontinuity in * CmdSN means lost PDUs. Since we don't support PDU retransmission -- * terminate the connection. */ CFISCSI_SESSION_WARN(cs, "received PDU with CmdSN %u, " "while expected %u; dropping connection", cmdsn, curcmdsn); cfiscsi_session_terminate(cs); return (true); } static void cfiscsi_pdu_handle(struct icl_pdu *request) { struct cfiscsi_session *cs; bool ignore; cs = PDU_SESSION(request); ignore = cfiscsi_pdu_update_cmdsn(request); if (ignore) { icl_pdu_free(request); return; } /* * Handle the PDU; this includes e.g. receiving the remaining * part of PDU and submitting the SCSI command to CTL * or queueing a reply. The handling routine is responsible * for freeing the PDU when it's no longer needed. */ switch (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { case ISCSI_BHS_OPCODE_NOP_OUT: cfiscsi_pdu_handle_nop_out(request); break; case ISCSI_BHS_OPCODE_SCSI_COMMAND: cfiscsi_pdu_handle_scsi_command(request); break; case ISCSI_BHS_OPCODE_TASK_REQUEST: cfiscsi_pdu_handle_task_request(request); break; case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: cfiscsi_pdu_handle_data_out(request); break; case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: cfiscsi_pdu_handle_logout_request(request); break; default: CFISCSI_SESSION_WARN(cs, "received PDU with unsupported " "opcode 0x%x; dropping connection", request->ip_bhs->bhs_opcode); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static void cfiscsi_receive_callback(struct icl_pdu *request) { #ifdef ICL_KERNEL_PROXY struct cfiscsi_session *cs; cs = PDU_SESSION(request); if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { if (cs->cs_login_pdu == NULL) cs->cs_login_pdu = request; else icl_pdu_free(request); cv_signal(&cs->cs_login_cv); return; } #endif cfiscsi_pdu_handle(request); } static void cfiscsi_error_callback(struct icl_conn *ic) { struct cfiscsi_session *cs; cs = CONN_SESSION(ic); CFISCSI_SESSION_WARN(cs, "connection error; dropping connection"); cfiscsi_session_terminate(cs); } static int cfiscsi_pdu_prepare(struct icl_pdu *response) { struct cfiscsi_session *cs; struct iscsi_bhs_scsi_response *bhssr; bool advance_statsn = true; uint32_t cmdsn; cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK_ASSERT(cs); /* * We're only using fields common for all the response * (target -> initiator) PDUs. */ bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; /* * 10.8.3: "The StatSN for this connection is not advanced * after this PDU is sent." */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_R2T) advance_statsn = false; /* * 10.19.2: "However, when the Initiator Task Tag is set to 0xffffffff, * StatSN for the connection is not advanced after this PDU is sent." */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_NOP_IN && bhssr->bhssr_initiator_task_tag == 0xffffffff) advance_statsn = false; /* * See the comment below - StatSN is not meaningful and must * not be advanced. */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_SCSI_DATA_IN && (bhssr->bhssr_flags & BHSDI_FLAGS_S) == 0) advance_statsn = false; /* * 10.7.3: "The fields StatSN, Status, and Residual Count * only have meaningful content if the S bit is set to 1." */ if (bhssr->bhssr_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || (bhssr->bhssr_flags & BHSDI_FLAGS_S)) bhssr->bhssr_statsn = htonl(cs->cs_statsn); cmdsn = cs->cs_cmdsn; bhssr->bhssr_expcmdsn = htonl(cmdsn); bhssr->bhssr_maxcmdsn = htonl(cmdsn - 1 + imax(0, maxtags - cs->cs_outstanding_ctl_pdus)); if (advance_statsn) cs->cs_statsn++; return (0); } static void cfiscsi_pdu_queue(struct icl_pdu *response) { struct cfiscsi_session *cs; cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK(cs); cfiscsi_pdu_prepare(response); icl_pdu_queue(response); CFISCSI_SESSION_UNLOCK(cs); } static void cfiscsi_pdu_queue_cb(struct icl_pdu *response, icl_pdu_cb cb) { struct cfiscsi_session *cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK(cs); cfiscsi_pdu_prepare(response); icl_pdu_queue_cb(response, cb); CFISCSI_SESSION_UNLOCK(cs); } static void cfiscsi_pdu_handle_nop_out(struct icl_pdu *request) { struct cfiscsi_session *cs; struct iscsi_bhs_nop_out *bhsno; struct iscsi_bhs_nop_in *bhsni; struct icl_pdu *response; void *data = NULL; size_t datasize; int error; cs = PDU_SESSION(request); bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; if (bhsno->bhsno_initiator_task_tag == 0xffffffff) { /* * Nothing to do, iscsi_pdu_update_statsn() already * zeroed the timeout. */ icl_pdu_free(request); return; } datasize = icl_pdu_data_segment_length(request); if (datasize > 0) { data = malloc(datasize, M_CFISCSI, M_NOWAIT | M_ZERO); if (data == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } icl_pdu_get_data(request, 0, data, datasize); } response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "droppping connection"); free(data, M_CFISCSI); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; bhsni->bhsni_flags = 0x80; bhsni->bhsni_initiator_task_tag = bhsno->bhsno_initiator_task_tag; bhsni->bhsni_target_transfer_tag = 0xffffffff; if (datasize > 0) { error = icl_pdu_append_data(response, data, datasize, M_NOWAIT); if (error != 0) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); free(data, M_CFISCSI); icl_pdu_free(request); icl_pdu_free(response); cfiscsi_session_terminate(cs); return; } free(data, M_CFISCSI); } icl_pdu_free(request); cfiscsi_pdu_queue(response); } static void cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request) { struct iscsi_bhs_scsi_command *bhssc; struct cfiscsi_session *cs; union ctl_io *io; int error; cs = PDU_SESSION(request); bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", // bhssc->bhssc_initiator_task_tag); if (request->ip_data_len > 0 && cs->cs_immediate_data == false) { CFISCSI_SESSION_WARN(cs, "unsolicited data with " "ImmediateData=No; dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = request; io->io_hdr.io_type = CTL_IO_SCSI; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = ctl_decode_lun(be64toh(bhssc->bhssc_lun)); io->scsiio.priority = (bhssc->bhssc_pri & BHSSC_PRI_MASK) >> BHSSC_PRI_SHIFT; io->scsiio.tag_num = bhssc->bhssc_initiator_task_tag; switch ((bhssc->bhssc_flags & BHSSC_FLAGS_ATTR)) { case BHSSC_FLAGS_ATTR_UNTAGGED: io->scsiio.tag_type = CTL_TAG_UNTAGGED; break; case BHSSC_FLAGS_ATTR_SIMPLE: io->scsiio.tag_type = CTL_TAG_SIMPLE; break; case BHSSC_FLAGS_ATTR_ORDERED: io->scsiio.tag_type = CTL_TAG_ORDERED; break; case BHSSC_FLAGS_ATTR_HOQ: io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; break; case BHSSC_FLAGS_ATTR_ACA: io->scsiio.tag_type = CTL_TAG_ACA; break; default: io->scsiio.tag_type = CTL_TAG_UNTAGGED; CFISCSI_SESSION_WARN(cs, "unhandled tag type %d", bhssc->bhssc_flags & BHSSC_FLAGS_ATTR); break; } io->scsiio.cdb_len = sizeof(bhssc->bhssc_cdb); /* Which is 16. */ memcpy(io->scsiio.cdb, bhssc->bhssc_cdb, sizeof(bhssc->bhssc_cdb)); refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_run(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_run() failed; error %d; " "dropping connection", error); ctl_free_io(io); refcount_release(&cs->cs_outstanding_ctl_pdus); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static void cfiscsi_pdu_handle_task_request(struct icl_pdu *request) { struct iscsi_bhs_task_management_request *bhstmr; struct iscsi_bhs_task_management_response *bhstmr2; struct icl_pdu *response; struct cfiscsi_session *cs; union ctl_io *io; int error; cs = PDU_SESSION(request); bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = request; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = ctl_decode_lun(be64toh(bhstmr->bhstmr_lun)); io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ switch (bhstmr->bhstmr_function & ~0x80) { case BHSTMR_FUNCTION_ABORT_TASK: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_ABORT_TASK"); #endif io->taskio.task_action = CTL_TASK_ABORT_TASK; io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag; break; case BHSTMR_FUNCTION_ABORT_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_ABORT_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_ABORT_TASK_SET; break; case BHSTMR_FUNCTION_CLEAR_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_CLEAR_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET; break; case BHSTMR_FUNCTION_LOGICAL_UNIT_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_LOGICAL_UNIT_RESET"); #endif io->taskio.task_action = CTL_TASK_LUN_RESET; break; case BHSTMR_FUNCTION_TARGET_WARM_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_WARM_RESET"); #endif io->taskio.task_action = CTL_TASK_TARGET_RESET; break; case BHSTMR_FUNCTION_TARGET_COLD_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_COLD_RESET"); #endif io->taskio.task_action = CTL_TASK_TARGET_RESET; break; case BHSTMR_FUNCTION_QUERY_TASK: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK"); #endif io->taskio.task_action = CTL_TASK_QUERY_TASK; io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag; break; case BHSTMR_FUNCTION_QUERY_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_QUERY_TASK_SET; break; case BHSTMR_FUNCTION_I_T_NEXUS_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_I_T_NEXUS_RESET"); #endif io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; break; case BHSTMR_FUNCTION_QUERY_ASYNC_EVENT: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_ASYNC_EVENT"); #endif io->taskio.task_action = CTL_TASK_QUERY_ASYNC_EVENT; break; default: CFISCSI_SESSION_DEBUG(cs, "unsupported function 0x%x", bhstmr->bhstmr_function & ~0x80); ctl_free_io(io); response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhstmr2 = (struct iscsi_bhs_task_management_response *) response->ip_bhs; bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; bhstmr2->bhstmr_flags = 0x80; bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); return; } refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_run(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_run() failed; error %d; " "dropping connection", error); ctl_free_io(io); refcount_release(&cs->cs_outstanding_ctl_pdus); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static bool cfiscsi_handle_data_segment(struct icl_pdu *request, struct cfiscsi_data_wait *cdw) { struct iscsi_bhs_data_out *bhsdo; struct cfiscsi_session *cs; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; size_t copy_len, len, off, buffer_offset; int ctl_sg_count; union ctl_io *io; cs = PDU_SESSION(request); KASSERT((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT || (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bad opcode 0x%x", request->ip_bhs->bhs_opcode)); /* * We're only using fields common for Data-Out and SCSI Command PDUs. */ bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; io = cdw->cdw_ctl_io; KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, ("CTL_FLAG_DATA_IN")); #if 0 CFISCSI_SESSION_DEBUG(cs, "received %zd bytes out of %d", request->ip_data_len, io->scsiio.kern_total_len); #endif if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) buffer_offset = ntohl(bhsdo->bhsdo_buffer_offset); else buffer_offset = 0; len = icl_pdu_data_segment_length(request); /* * Make sure the offset, as sent by the initiator, matches the offset * we're supposed to be at in the scatter-gather list. */ if (buffer_offset > io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled || buffer_offset + len <= io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled) { CFISCSI_SESSION_WARN(cs, "received bad buffer offset %zd, " "expected %zd; dropping connection", buffer_offset, (size_t)io->scsiio.kern_rel_offset + (size_t)io->scsiio.ext_data_filled); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } /* * This is the offset within the PDU data segment, as opposed * to buffer_offset, which is the offset within the task (SCSI * command). */ off = io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled - buffer_offset; /* * Iterate over the scatter/gather segments, filling them with data * from the PDU data segment. Note that this can get called multiple * times for one SCSI command; the cdw structure holds state for the * scatter/gather list. */ for (;;) { KASSERT(cdw->cdw_sg_index < ctl_sg_count, ("cdw->cdw_sg_index >= ctl_sg_count")); if (cdw->cdw_sg_len == 0) { cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; } KASSERT(off <= len, ("len > off")); copy_len = len - off; if (copy_len > cdw->cdw_sg_len) copy_len = cdw->cdw_sg_len; icl_pdu_get_data(request, off, cdw->cdw_sg_addr, copy_len); cdw->cdw_sg_addr += copy_len; cdw->cdw_sg_len -= copy_len; off += copy_len; io->scsiio.ext_data_filled += copy_len; io->scsiio.kern_data_resid -= copy_len; if (cdw->cdw_sg_len == 0) { /* * End of current segment. */ if (cdw->cdw_sg_index == ctl_sg_count - 1) { /* * Last segment in scatter/gather list. */ break; } cdw->cdw_sg_index++; } if (off == len) { /* * End of PDU payload. */ break; } } if (len > off) { /* * In case of unsolicited data, it's possible that the buffer * provided by CTL is smaller than negotiated FirstBurstLength. * Just ignore the superfluous data; will ask for them with R2T * on next call to cfiscsi_datamove(). * * This obviously can only happen with SCSI Command PDU. */ if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND) return (true); CFISCSI_SESSION_WARN(cs, "received too much data: got %zd bytes, " "expected %zd; dropping connection", icl_pdu_data_segment_length(request), off); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } if (io->scsiio.ext_data_filled == cdw->cdw_r2t_end && (bhsdo->bhsdo_flags & BHSDO_FLAGS_F) == 0) { CFISCSI_SESSION_WARN(cs, "got the final packet without " "the F flag; flags = 0x%x; dropping connection", bhsdo->bhsdo_flags); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } if (io->scsiio.ext_data_filled != cdw->cdw_r2t_end && (bhsdo->bhsdo_flags & BHSDO_FLAGS_F) != 0) { if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { CFISCSI_SESSION_WARN(cs, "got the final packet, but the " "transmitted size was %zd bytes instead of %d; " "dropping connection", (size_t)io->scsiio.ext_data_filled, cdw->cdw_r2t_end); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } else { /* * For SCSI Command PDU, this just means we need to * solicit more data by sending R2T. */ return (false); } } if (io->scsiio.ext_data_filled == cdw->cdw_r2t_end) { #if 0 CFISCSI_SESSION_DEBUG(cs, "no longer expecting Data-Out with target " "transfer tag 0x%x", cdw->cdw_target_transfer_tag); #endif return (true); } return (false); } static void cfiscsi_pdu_handle_data_out(struct icl_pdu *request) { struct iscsi_bhs_data_out *bhsdo; struct cfiscsi_session *cs; struct cfiscsi_data_wait *cdw = NULL; union ctl_io *io; bool done; cs = PDU_SESSION(request); bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) { #if 0 CFISCSI_SESSION_DEBUG(cs, "have ttt 0x%x, itt 0x%x; looking for " "ttt 0x%x, itt 0x%x", bhsdo->bhsdo_target_transfer_tag, bhsdo->bhsdo_initiator_task_tag, cdw->cdw_target_transfer_tag, cdw->cdw_initiator_task_tag)); #endif if (bhsdo->bhsdo_target_transfer_tag == cdw->cdw_target_transfer_tag) break; } CFISCSI_SESSION_UNLOCK(cs); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "data transfer tag 0x%x, initiator task tag " "0x%x, not found; dropping connection", bhsdo->bhsdo_target_transfer_tag, bhsdo->bhsdo_initiator_task_tag); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } if (cdw->cdw_datasn != ntohl(bhsdo->bhsdo_datasn)) { CFISCSI_SESSION_WARN(cs, "received Data-Out PDU with " "DataSN %u, while expected %u; dropping connection", ntohl(bhsdo->bhsdo_datasn), cdw->cdw_datasn); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } cdw->cdw_datasn++; io = cdw->cdw_ctl_io; KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, ("CTL_FLAG_DATA_IN")); done = cfiscsi_handle_data_segment(request, cdw); if (done) { CFISCSI_SESSION_LOCK(cs); TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); done = (io->scsiio.ext_data_filled != cdw->cdw_r2t_end || io->scsiio.ext_data_filled == io->scsiio.kern_data_len); cfiscsi_data_wait_free(cs, cdw); io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; if (done) ctl_datamove_done(io, false); else cfiscsi_datamove_out(io); } icl_pdu_free(request); } static void cfiscsi_pdu_handle_logout_request(struct icl_pdu *request) { struct iscsi_bhs_logout_request *bhslr; struct iscsi_bhs_logout_response *bhslr2; struct icl_pdu *response; struct cfiscsi_session *cs; cs = PDU_SESSION(request); bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; switch (bhslr->bhslr_reason & 0x7f) { case BHSLR_REASON_CLOSE_SESSION: case BHSLR_REASON_CLOSE_CONNECTION: response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_DEBUG(cs, "failed to allocate memory"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; bhslr2->bhslr_flags = 0x80; bhslr2->bhslr_response = BHSLR_RESPONSE_CLOSED_SUCCESSFULLY; bhslr2->bhslr_initiator_task_tag = bhslr->bhslr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); cfiscsi_session_terminate(cs); break; case BHSLR_REASON_REMOVE_FOR_RECOVERY: response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; bhslr2->bhslr_flags = 0x80; bhslr2->bhslr_response = BHSLR_RESPONSE_RECOVERY_NOT_SUPPORTED; bhslr2->bhslr_initiator_task_tag = bhslr->bhslr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); break; default: CFISCSI_SESSION_WARN(cs, "invalid reason 0%x; dropping connection", bhslr->bhslr_reason); icl_pdu_free(request); cfiscsi_session_terminate(cs); break; } } static void cfiscsi_callout(void *context) { struct icl_pdu *cp; struct iscsi_bhs_nop_in *bhsni; struct cfiscsi_session *cs; cs = context; if (cs->cs_terminating) return; callout_schedule(&cs->cs_callout, 1 * hz); atomic_add_int(&cs->cs_timeout, 1); #ifdef ICL_KERNEL_PROXY if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { if (login_timeout > 0 && cs->cs_timeout > login_timeout) { CFISCSI_SESSION_WARN(cs, "login timed out after " "%d seconds; dropping connection", cs->cs_timeout); cfiscsi_session_terminate(cs); } return; } #endif if (ping_timeout <= 0) { /* * Pings are disabled. Don't send NOP-In in this case; * user might have disabled pings to work around problems * with certain initiators that can't properly handle * NOP-In, such as iPXE. Reset the timeout, to avoid * triggering reconnection, should the user decide to * reenable them. */ cs->cs_timeout = 0; return; } if (cs->cs_timeout >= ping_timeout) { CFISCSI_SESSION_WARN(cs, "no ping reply (NOP-Out) after %d seconds; " "dropping connection", ping_timeout); cfiscsi_session_terminate(cs); return; } /* * If the ping was reset less than one second ago - which means * that we've received some PDU during the last second - assume * the traffic flows correctly and don't bother sending a NOP-Out. * * (It's 2 - one for one second, and one for incrementing is_timeout * earlier in this routine.) */ if (cs->cs_timeout < 2) return; cp = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (cp == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory"); return; } bhsni = (struct iscsi_bhs_nop_in *)cp->ip_bhs; bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; bhsni->bhsni_flags = 0x80; bhsni->bhsni_initiator_task_tag = 0xffffffff; cfiscsi_pdu_queue(cp); } static struct cfiscsi_data_wait * cfiscsi_data_wait_new(struct cfiscsi_session *cs, union ctl_io *io, uint32_t initiator_task_tag, uint32_t *target_transfer_tagp) { struct cfiscsi_data_wait *cdw; int error; cdw = uma_zalloc(cfiscsi_data_wait_zone, M_NOWAIT | M_ZERO); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate %zd bytes", sizeof(*cdw)); return (NULL); } error = icl_conn_transfer_setup(cs->cs_conn, io, target_transfer_tagp, &cdw->cdw_icl_prv); if (error != 0) { CFISCSI_SESSION_WARN(cs, "icl_conn_transfer_setup() failed with error %d", error); uma_zfree(cfiscsi_data_wait_zone, cdw); return (NULL); } cdw->cdw_ctl_io = io; cdw->cdw_target_transfer_tag = *target_transfer_tagp; cdw->cdw_initiator_task_tag = initiator_task_tag; return (cdw); } static void cfiscsi_data_wait_free(struct cfiscsi_session *cs, struct cfiscsi_data_wait *cdw) { icl_conn_transfer_done(cs->cs_conn, cdw->cdw_icl_prv); uma_zfree(cfiscsi_data_wait_zone, cdw); } static void cfiscsi_session_terminate_tasks(struct cfiscsi_session *cs) { struct cfiscsi_data_wait *cdw; union ctl_io *io, *cdw_io; int error, last, wait; if (cs->cs_target == NULL) return; /* No target yet, so nothing to do. */ io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = cs; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = 0; io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; wait = cs->cs_outstanding_ctl_pdus; refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_run(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_run() failed; error %d", error); refcount_release(&cs->cs_outstanding_ctl_pdus); ctl_free_io(io); } CFISCSI_SESSION_LOCK(cs); while ((cdw = TAILQ_FIRST(&cs->cs_waiting_for_data_out)) != NULL) { TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); /* * Set nonzero port status; this prevents backends from * assuming that the data transfer actually succeeded * and writing uninitialized data to disk. */ cdw_io = cdw->cdw_ctl_io; cdw_io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; cdw_io->scsiio.io_hdr.port_status = 42; cfiscsi_data_wait_free(cs, cdw); ctl_datamove_done(cdw_io, false); CFISCSI_SESSION_LOCK(cs); } CFISCSI_SESSION_UNLOCK(cs); /* * Wait for CTL to terminate all the tasks. */ if (wait > 0) CFISCSI_SESSION_WARN(cs, "waiting for CTL to terminate %d tasks", wait); for (;;) { refcount_acquire(&cs->cs_outstanding_ctl_pdus); last = refcount_release(&cs->cs_outstanding_ctl_pdus); if (last != 0) break; tsleep(__DEVOLATILE(void *, &cs->cs_outstanding_ctl_pdus), 0, "cfiscsi_terminate", hz / 100); } if (wait > 0) CFISCSI_SESSION_WARN(cs, "tasks terminated"); } static void cfiscsi_maintenance_thread(void *arg) { struct cfiscsi_session *cs; cs = arg; for (;;) { CFISCSI_SESSION_LOCK(cs); if (cs->cs_terminating == false || cs->cs_handoff_in_progress) cv_wait(&cs->cs_maintenance_cv, &cs->cs_lock); CFISCSI_SESSION_UNLOCK(cs); if (cs->cs_terminating && cs->cs_handoff_in_progress == false) { /* * We used to wait up to 30 seconds to deliver queued * PDUs to the initiator. We also tried hard to deliver * SCSI Responses for the aborted PDUs. We don't do * that anymore. We might need to revisit that. */ callout_drain(&cs->cs_callout); icl_conn_close(cs->cs_conn); /* * At this point ICL receive thread is no longer * running; no new tasks can be queued. */ cfiscsi_session_terminate_tasks(cs); cfiscsi_session_delete(cs); kthread_exit(); return; } CFISCSI_SESSION_DEBUG(cs, "nothing to do"); } } static void cfiscsi_session_terminate(struct cfiscsi_session *cs) { cs->cs_terminating = true; cv_signal(&cs->cs_maintenance_cv); #ifdef ICL_KERNEL_PROXY cv_signal(&cs->cs_login_cv); #endif } static int cfiscsi_session_register_initiator(struct cfiscsi_session *cs) { struct cfiscsi_target *ct; char *name; int i; KASSERT(cs->cs_ctl_initid == -1, ("already registered")); ct = cs->cs_target; name = strdup(cs->cs_initiator_id, M_CTL); i = ctl_add_initiator(&ct->ct_port, -1, 0, name); if (i < 0) { CFISCSI_SESSION_WARN(cs, "ctl_add_initiator failed with error %d", i); cs->cs_ctl_initid = -1; return (1); } cs->cs_ctl_initid = i; #if 0 CFISCSI_SESSION_DEBUG(cs, "added initiator id %d", i); #endif return (0); } static void cfiscsi_session_unregister_initiator(struct cfiscsi_session *cs) { int error; if (cs->cs_ctl_initid == -1) return; error = ctl_remove_initiator(&cs->cs_target->ct_port, cs->cs_ctl_initid); if (error != 0) { CFISCSI_SESSION_WARN(cs, "ctl_remove_initiator failed with error %d", error); } cs->cs_ctl_initid = -1; } static struct cfiscsi_session * cfiscsi_session_new(struct cfiscsi_softc *softc, const char *offload) { struct cfiscsi_session *cs; int error; cs = malloc(sizeof(*cs), M_CFISCSI, M_NOWAIT | M_ZERO); if (cs == NULL) { CFISCSI_WARN("malloc failed"); return (NULL); } cs->cs_ctl_initid = -1; refcount_init(&cs->cs_outstanding_ctl_pdus, 0); TAILQ_INIT(&cs->cs_waiting_for_data_out); mtx_init(&cs->cs_lock, "cfiscsi_lock", NULL, MTX_DEF); cv_init(&cs->cs_maintenance_cv, "cfiscsi_mt"); #ifdef ICL_KERNEL_PROXY cv_init(&cs->cs_login_cv, "cfiscsi_login"); #endif /* * The purpose of this is to avoid racing with session shutdown. * Otherwise we could have the maintenance thread call icl_conn_close() * before we call icl_conn_handoff(). */ cs->cs_handoff_in_progress = true; cs->cs_conn = icl_new_conn(offload, false, "cfiscsi", &cs->cs_lock); if (cs->cs_conn == NULL) { free(cs, M_CFISCSI); return (NULL); } cs->cs_conn->ic_receive = cfiscsi_receive_callback; cs->cs_conn->ic_error = cfiscsi_error_callback; cs->cs_conn->ic_prv0 = cs; error = kthread_add(cfiscsi_maintenance_thread, cs, NULL, NULL, 0, 0, "cfiscsimt"); if (error != 0) { CFISCSI_SESSION_WARN(cs, "kthread_add(9) failed with error %d", error); free(cs, M_CFISCSI); return (NULL); } mtx_lock(&softc->lock); cs->cs_id = ++softc->last_session_id; TAILQ_INSERT_TAIL(&softc->sessions, cs, cs_next); mtx_unlock(&softc->lock); /* * Start pinging the initiator. */ callout_init(&cs->cs_callout, 1); callout_reset(&cs->cs_callout, 1 * hz, cfiscsi_callout, cs); return (cs); } static void cfiscsi_session_delete(struct cfiscsi_session *cs) { struct cfiscsi_softc *softc; softc = &cfiscsi_softc; KASSERT(cs->cs_outstanding_ctl_pdus == 0, ("destroying session with outstanding CTL pdus")); KASSERT(TAILQ_EMPTY(&cs->cs_waiting_for_data_out), ("destroying session with non-empty queue")); mtx_lock(&softc->lock); TAILQ_REMOVE(&softc->sessions, cs, cs_next); mtx_unlock(&softc->lock); cfiscsi_session_unregister_initiator(cs); if (cs->cs_target != NULL) cfiscsi_target_release(cs->cs_target); icl_conn_close(cs->cs_conn); icl_conn_free(cs->cs_conn); free(cs, M_CFISCSI); cv_signal(&softc->sessions_cv); } static int cfiscsi_init(void) { struct cfiscsi_softc *softc; softc = &cfiscsi_softc; bzero(softc, sizeof(*softc)); mtx_init(&softc->lock, "cfiscsi", NULL, MTX_DEF); cv_init(&softc->sessions_cv, "cfiscsi_sessions"); #ifdef ICL_KERNEL_PROXY cv_init(&softc->accept_cv, "cfiscsi_accept"); #endif TAILQ_INIT(&softc->sessions); TAILQ_INIT(&softc->targets); cfiscsi_data_wait_zone = uma_zcreate("cfiscsi_data_wait", sizeof(struct cfiscsi_data_wait), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); return (0); } static int cfiscsi_shutdown(void) { struct cfiscsi_softc *softc = &cfiscsi_softc; if (!TAILQ_EMPTY(&softc->sessions) || !TAILQ_EMPTY(&softc->targets)) return (EBUSY); uma_zdestroy(cfiscsi_data_wait_zone); #ifdef ICL_KERNEL_PROXY cv_destroy(&softc->accept_cv); #endif cv_destroy(&softc->sessions_cv); mtx_destroy(&softc->lock); return (0); } #ifdef ICL_KERNEL_PROXY static void cfiscsi_accept(struct socket *so, struct sockaddr *sa, int portal_id) { struct cfiscsi_session *cs; cs = cfiscsi_session_new(&cfiscsi_softc, NULL); if (cs == NULL) { CFISCSI_WARN("failed to create session"); return; } icl_conn_handoff_sock(cs->cs_conn, so); cs->cs_initiator_sa = sa; cs->cs_portal_id = portal_id; cs->cs_handoff_in_progress = false; cs->cs_waiting_for_ctld = true; cv_signal(&cfiscsi_softc.accept_cv); CFISCSI_SESSION_LOCK(cs); /* * Wake up the maintenance thread if we got scheduled for termination * somewhere between cfiscsi_session_new() and icl_conn_handoff_sock(). */ if (cs->cs_terminating) cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); } #endif static void cfiscsi_online(void *arg) { struct cfiscsi_softc *softc; struct cfiscsi_target *ct; int online; ct = (struct cfiscsi_target *)arg; softc = ct->ct_softc; mtx_lock(&softc->lock); if (ct->ct_online) { mtx_unlock(&softc->lock); return; } ct->ct_online = 1; online = softc->online++; mtx_unlock(&softc->lock); if (online > 0) return; #ifdef ICL_KERNEL_PROXY if (softc->listener != NULL) icl_listen_free(softc->listener); softc->listener = icl_listen_new(cfiscsi_accept); #endif } static void cfiscsi_offline(void *arg) { struct cfiscsi_softc *softc; struct cfiscsi_target *ct; struct cfiscsi_session *cs; int error, online; ct = (struct cfiscsi_target *)arg; softc = ct->ct_softc; mtx_lock(&softc->lock); if (!ct->ct_online) { mtx_unlock(&softc->lock); return; } ct->ct_online = 0; online = --softc->online; do { TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == ct) cfiscsi_session_terminate(cs); } TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == ct) break; } if (cs != NULL) { error = cv_wait_sig(&softc->sessions_cv, &softc->lock); if (error != 0) { CFISCSI_SESSION_DEBUG(cs, "cv_wait failed with error %d\n", error); break; } } } while (cs != NULL && ct->ct_online == 0); mtx_unlock(&softc->lock); if (online > 0) return; #ifdef ICL_KERNEL_PROXY icl_listen_free(softc->listener); softc->listener = NULL; #endif } static int cfiscsi_info(void *arg, struct sbuf *sb) { struct cfiscsi_target *ct = (struct cfiscsi_target *)arg; int retval; retval = sbuf_printf(sb, "\t%d\n", ct->ct_state); return (retval); } static void cfiscsi_ioctl_handoff(struct ctl_iscsi *ci) { struct cfiscsi_softc *softc; struct cfiscsi_session *cs, *cs2; struct cfiscsi_target *ct; struct ctl_iscsi_handoff_params *cihp; int error; cihp = (struct ctl_iscsi_handoff_params *)&(ci->data); softc = &cfiscsi_softc; CFISCSI_DEBUG("new connection from %s (%s) to %s", cihp->initiator_name, cihp->initiator_addr, cihp->target_name); ct = cfiscsi_target_find(softc, cihp->target_name, cihp->portal_group_tag); if (ct == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: target not found", __func__); return; } #ifdef ICL_KERNEL_PROXY if (cihp->socket > 0 && cihp->connection_id > 0) { snprintf(ci->error_str, sizeof(ci->error_str), "both socket and connection_id set"); ci->status = CTL_ISCSI_ERROR; cfiscsi_target_release(ct); return; } if (cihp->socket == 0) { mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cihp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; cfiscsi_target_release(ct); return; } mtx_unlock(&cfiscsi_softc.lock); } else { #endif cs = cfiscsi_session_new(softc, cihp->offload); if (cs == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: cfiscsi_session_new failed", __func__); cfiscsi_target_release(ct); return; } #ifdef ICL_KERNEL_PROXY } #endif /* * First PDU of Full Feature phase has the same CmdSN as the last * PDU from the Login Phase received from the initiator. Thus, * the -1 below. */ cs->cs_cmdsn = cihp->cmdsn; cs->cs_statsn = cihp->statsn; - cs->cs_max_recv_data_segment_length = cihp->max_recv_data_segment_length; - cs->cs_max_send_data_segment_length = cihp->max_send_data_segment_length; + cs->cs_conn->ic_max_recv_data_segment_length = + cihp->max_recv_data_segment_length; + cs->cs_conn->ic_max_send_data_segment_length = + cihp->max_send_data_segment_length; cs->cs_max_burst_length = cihp->max_burst_length; cs->cs_first_burst_length = cihp->first_burst_length; cs->cs_immediate_data = !!cihp->immediate_data; if (cihp->header_digest == CTL_ISCSI_DIGEST_CRC32C) cs->cs_conn->ic_header_crc32c = true; if (cihp->data_digest == CTL_ISCSI_DIGEST_CRC32C) cs->cs_conn->ic_data_crc32c = true; strlcpy(cs->cs_initiator_name, cihp->initiator_name, sizeof(cs->cs_initiator_name)); strlcpy(cs->cs_initiator_addr, cihp->initiator_addr, sizeof(cs->cs_initiator_addr)); strlcpy(cs->cs_initiator_alias, cihp->initiator_alias, sizeof(cs->cs_initiator_alias)); memcpy(cs->cs_initiator_isid, cihp->initiator_isid, sizeof(cs->cs_initiator_isid)); snprintf(cs->cs_initiator_id, sizeof(cs->cs_initiator_id), "%s,i,0x%02x%02x%02x%02x%02x%02x", cs->cs_initiator_name, cihp->initiator_isid[0], cihp->initiator_isid[1], cihp->initiator_isid[2], cihp->initiator_isid[3], cihp->initiator_isid[4], cihp->initiator_isid[5]); mtx_lock(&softc->lock); if (ct->ct_online == 0) { mtx_unlock(&softc->lock); CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); cfiscsi_target_release(ct); ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: port offline", __func__); return; } cs->cs_target = ct; mtx_unlock(&softc->lock); restart: if (!cs->cs_terminating) { mtx_lock(&softc->lock); TAILQ_FOREACH(cs2, &softc->sessions, cs_next) { if (cs2 != cs && cs2->cs_tasks_aborted == false && cs->cs_target == cs2->cs_target && strcmp(cs->cs_initiator_id, cs2->cs_initiator_id) == 0) { if (strcmp(cs->cs_initiator_addr, cs2->cs_initiator_addr) != 0) { CFISCSI_SESSION_WARN(cs2, "session reinstatement from " "different address %s", cs->cs_initiator_addr); } else { CFISCSI_SESSION_DEBUG(cs2, "session reinstatement"); } cfiscsi_session_terminate(cs2); mtx_unlock(&softc->lock); pause("cfiscsi_reinstate", 1); goto restart; } } mtx_unlock(&softc->lock); } /* * Register initiator with CTL. */ cfiscsi_session_register_initiator(cs); #ifdef ICL_KERNEL_PROXY if (cihp->socket > 0) { #endif error = icl_conn_handoff(cs->cs_conn, cihp->socket); if (error != 0) { CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: icl_conn_handoff failed with error %d", __func__, error); return; } #ifdef ICL_KERNEL_PROXY } #endif #ifdef ICL_KERNEL_PROXY cs->cs_login_phase = false; /* * First PDU of the Full Feature phase has likely already arrived. * We have to pick it up and execute properly. */ if (cs->cs_login_pdu != NULL) { CFISCSI_SESSION_DEBUG(cs, "picking up first PDU"); cfiscsi_pdu_handle(cs->cs_login_pdu); cs->cs_login_pdu = NULL; } #endif CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; /* * Wake up the maintenance thread if we got scheduled for termination. */ if (cs->cs_terminating) cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_list(struct ctl_iscsi *ci) { struct ctl_iscsi_list_params *cilp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; struct sbuf *sb; int error; cilp = (struct ctl_iscsi_list_params *)&(ci->data); softc = &cfiscsi_softc; sb = sbuf_new(NULL, NULL, cilp->alloc_len, SBUF_FIXEDLEN); if (sb == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Unable to allocate %d bytes for iSCSI session list", cilp->alloc_len); return; } sbuf_printf(sb, "\n"); mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == NULL) continue; error = sbuf_printf(sb, "" "%s" "%s" "%s" "%s" "%s" "%u" "%s" "%s" "%d" "%d" "%d" "%d" "%d" "%d" "%s" "\n", cs->cs_id, cs->cs_initiator_name, cs->cs_initiator_addr, cs->cs_initiator_alias, cs->cs_target->ct_name, cs->cs_target->ct_alias, cs->cs_target->ct_tag, cs->cs_conn->ic_header_crc32c ? "CRC32C" : "None", cs->cs_conn->ic_data_crc32c ? "CRC32C" : "None", - cs->cs_max_recv_data_segment_length, - cs->cs_max_send_data_segment_length, + cs->cs_conn->ic_max_recv_data_segment_length, + cs->cs_conn->ic_max_send_data_segment_length, cs->cs_max_burst_length, cs->cs_first_burst_length, cs->cs_immediate_data, cs->cs_conn->ic_iser, cs->cs_conn->ic_offload); if (error != 0) break; } mtx_unlock(&softc->lock); error = sbuf_printf(sb, "\n"); if (error != 0) { sbuf_delete(sb); ci->status = CTL_ISCSI_LIST_NEED_MORE_SPACE; snprintf(ci->error_str, sizeof(ci->error_str), "Out of space, %d bytes is too small", cilp->alloc_len); return; } sbuf_finish(sb); error = copyout(sbuf_data(sb), cilp->conn_xml, sbuf_len(sb) + 1); if (error != 0) { sbuf_delete(sb); snprintf(ci->error_str, sizeof(ci->error_str), "copyout failed with error %d", error); ci->status = CTL_ISCSI_ERROR; return; } cilp->fill_len = sbuf_len(sb) + 1; ci->status = CTL_ISCSI_OK; sbuf_delete(sb); } static void cfiscsi_ioctl_logout(struct ctl_iscsi *ci) { struct icl_pdu *response; struct iscsi_bhs_asynchronous_message *bhsam; struct ctl_iscsi_logout_params *cilp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; int found = 0; cilp = (struct ctl_iscsi_logout_params *)&(ci->data); softc = &cfiscsi_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cilp->all == 0 && cs->cs_id != cilp->connection_id && strcmp(cs->cs_initiator_name, cilp->initiator_name) != 0 && strcmp(cs->cs_initiator_addr, cilp->initiator_addr) != 0) continue; response = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (response == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Unable to allocate memory"); mtx_unlock(&softc->lock); return; } bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; bhsam->bhsam_flags = 0x80; bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_REQUESTS_LOGOUT; bhsam->bhsam_parameter3 = htons(10); cfiscsi_pdu_queue(response); found++; } mtx_unlock(&softc->lock); if (found == 0) { ci->status = CTL_ISCSI_SESSION_NOT_FOUND; snprintf(ci->error_str, sizeof(ci->error_str), "No matching connections found"); return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_terminate(struct ctl_iscsi *ci) { struct icl_pdu *response; struct iscsi_bhs_asynchronous_message *bhsam; struct ctl_iscsi_terminate_params *citp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; int found = 0; citp = (struct ctl_iscsi_terminate_params *)&(ci->data); softc = &cfiscsi_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (citp->all == 0 && cs->cs_id != citp->connection_id && strcmp(cs->cs_initiator_name, citp->initiator_name) != 0 && strcmp(cs->cs_initiator_addr, citp->initiator_addr) != 0) continue; response = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (response == NULL) { /* * Oh well. Just terminate the connection. */ } else { bhsam = (struct iscsi_bhs_asynchronous_message *) response->ip_bhs; bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; bhsam->bhsam_flags = 0x80; bhsam->bhsam_0xffffffff = 0xffffffff; bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_TERMINATES_SESSION; cfiscsi_pdu_queue(response); } cfiscsi_session_terminate(cs); found++; } mtx_unlock(&softc->lock); if (found == 0) { ci->status = CTL_ISCSI_SESSION_NOT_FOUND; snprintf(ci->error_str, sizeof(ci->error_str), "No matching connections found"); return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_limits(struct ctl_iscsi *ci) { struct ctl_iscsi_limits_params *cilp; struct icl_drv_limits idl; int error; cilp = (struct ctl_iscsi_limits_params *)&(ci->data); error = icl_limits(cilp->offload, false, &idl); if (error != 0) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: icl_limits failed with error %d", __func__, error); return; } cilp->max_recv_data_segment_length = idl.idl_max_recv_data_segment_length; cilp->max_send_data_segment_length = idl.idl_max_send_data_segment_length; cilp->max_burst_length = idl.idl_max_burst_length; cilp->first_burst_length = idl.idl_first_burst_length; ci->status = CTL_ISCSI_OK; } #ifdef ICL_KERNEL_PROXY static void cfiscsi_ioctl_listen(struct ctl_iscsi *ci) { struct ctl_iscsi_listen_params *cilp; struct sockaddr *sa; int error; cilp = (struct ctl_iscsi_listen_params *)&(ci->data); if (cfiscsi_softc.listener == NULL) { CFISCSI_DEBUG("no listener"); snprintf(ci->error_str, sizeof(ci->error_str), "no listener"); ci->status = CTL_ISCSI_ERROR; return; } error = getsockaddr(&sa, (void *)cilp->addr, cilp->addrlen); if (error != 0) { CFISCSI_DEBUG("getsockaddr, error %d", error); snprintf(ci->error_str, sizeof(ci->error_str), "getsockaddr failed"); ci->status = CTL_ISCSI_ERROR; return; } error = icl_listen_add(cfiscsi_softc.listener, cilp->iser, cilp->domain, cilp->socktype, cilp->protocol, sa, cilp->portal_id); if (error != 0) { free(sa, M_SONAME); CFISCSI_DEBUG("icl_listen_add, error %d", error); snprintf(ci->error_str, sizeof(ci->error_str), "icl_listen_add failed, error %d", error); ci->status = CTL_ISCSI_ERROR; return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_accept(struct ctl_iscsi *ci) { struct ctl_iscsi_accept_params *ciap; struct cfiscsi_session *cs; int error; ciap = (struct ctl_iscsi_accept_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); for (;;) { TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_waiting_for_ctld) break; } if (cs != NULL) break; error = cv_wait_sig(&cfiscsi_softc.accept_cv, &cfiscsi_softc.lock); if (error != 0) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "interrupted"); ci->status = CTL_ISCSI_ERROR; return; } } mtx_unlock(&cfiscsi_softc.lock); cs->cs_waiting_for_ctld = false; cs->cs_login_phase = true; ciap->connection_id = cs->cs_id; ciap->portal_id = cs->cs_portal_id; ciap->initiator_addrlen = cs->cs_initiator_sa->sa_len; error = copyout(cs->cs_initiator_sa, ciap->initiator_addr, cs->cs_initiator_sa->sa_len); if (error != 0) { snprintf(ci->error_str, sizeof(ci->error_str), "copyout failed with error %d", error); ci->status = CTL_ISCSI_ERROR; return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_send(struct ctl_iscsi *ci) { struct ctl_iscsi_send_params *cisp; struct cfiscsi_session *cs; struct icl_pdu *ip; size_t datalen; void *data; int error; cisp = (struct ctl_iscsi_send_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cisp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; return; } mtx_unlock(&cfiscsi_softc.lock); #if 0 if (cs->cs_login_phase == false) return (EBUSY); #endif if (cs->cs_terminating) { snprintf(ci->error_str, sizeof(ci->error_str), "connection is terminating"); ci->status = CTL_ISCSI_ERROR; return; } datalen = cisp->data_segment_len; /* * XXX */ //if (datalen > CFISCSI_MAX_DATA_SEGMENT_LENGTH) { if (datalen > 65535) { snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); ci->status = CTL_ISCSI_ERROR; return; } if (datalen > 0) { data = malloc(datalen, M_CFISCSI, M_WAITOK); error = copyin(cisp->data_segment, data, datalen); if (error != 0) { free(data, M_CFISCSI); snprintf(ci->error_str, sizeof(ci->error_str), "copyin error %d", error); ci->status = CTL_ISCSI_ERROR; return; } } ip = icl_pdu_new(cs->cs_conn, M_WAITOK); memcpy(ip->ip_bhs, cisp->bhs, sizeof(*ip->ip_bhs)); if (datalen > 0) { icl_pdu_append_data(ip, data, datalen, M_WAITOK); free(data, M_CFISCSI); } CFISCSI_SESSION_LOCK(cs); icl_pdu_queue(ip); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_receive(struct ctl_iscsi *ci) { struct ctl_iscsi_receive_params *cirp; struct cfiscsi_session *cs; struct icl_pdu *ip; void *data; int error; cirp = (struct ctl_iscsi_receive_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cirp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; return; } mtx_unlock(&cfiscsi_softc.lock); #if 0 if (is->is_login_phase == false) return (EBUSY); #endif CFISCSI_SESSION_LOCK(cs); while (cs->cs_login_pdu == NULL && cs->cs_terminating == false) { error = cv_wait_sig(&cs->cs_login_cv, &cs->cs_lock); if (error != 0) { CFISCSI_SESSION_UNLOCK(cs); snprintf(ci->error_str, sizeof(ci->error_str), "interrupted by signal"); ci->status = CTL_ISCSI_ERROR; return; } } if (cs->cs_terminating) { CFISCSI_SESSION_UNLOCK(cs); snprintf(ci->error_str, sizeof(ci->error_str), "connection terminating"); ci->status = CTL_ISCSI_ERROR; return; } ip = cs->cs_login_pdu; cs->cs_login_pdu = NULL; CFISCSI_SESSION_UNLOCK(cs); if (ip->ip_data_len > cirp->data_segment_len) { icl_pdu_free(ip); snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); ci->status = CTL_ISCSI_ERROR; return; } copyout(ip->ip_bhs, cirp->bhs, sizeof(*ip->ip_bhs)); if (ip->ip_data_len > 0) { data = malloc(ip->ip_data_len, M_CFISCSI, M_WAITOK); icl_pdu_get_data(ip, 0, data, ip->ip_data_len); copyout(data, cirp->data_segment, ip->ip_data_len); free(data, M_CFISCSI); } icl_pdu_free(ip); ci->status = CTL_ISCSI_OK; } #endif /* !ICL_KERNEL_PROXY */ static void cfiscsi_ioctl_port_create(struct ctl_req *req) { struct cfiscsi_target *ct; struct ctl_port *port; const char *target, *alias, *val; struct scsi_vpd_id_descriptor *desc; int retval, len, idlen; uint16_t tag; target = dnvlist_get_string(req->args_nvl, "cfiscsi_target", NULL); alias = dnvlist_get_string(req->args_nvl, "cfiscsi_target_alias", NULL); val = dnvlist_get_string(req->args_nvl, "cfiscsi_portal_group_tag", NULL); if (target == NULL || val == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Missing required argument"); return; } tag = strtoul(val, NULL, 0); ct = cfiscsi_target_find_or_create(&cfiscsi_softc, target, alias, tag); if (ct == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "failed to create target \"%s\"", target); return; } if (ct->ct_state == CFISCSI_TARGET_STATE_ACTIVE) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "target \"%s\" for portal group tag %u already exists", target, tag); cfiscsi_target_release(ct); return; } port = &ct->ct_port; // WAT if (ct->ct_state == CFISCSI_TARGET_STATE_DYING) goto done; port->frontend = &cfiscsi_frontend; port->port_type = CTL_PORT_ISCSI; /* XXX KDM what should the real number be here? */ port->num_requested_ctl_io = 4096; port->port_name = "iscsi"; port->physical_port = (int)tag; port->virtual_port = ct->ct_target_id; port->port_online = cfiscsi_online; port->port_offline = cfiscsi_offline; port->port_info = cfiscsi_info; port->onoff_arg = ct; port->fe_datamove = cfiscsi_datamove; port->fe_done = cfiscsi_done; port->targ_port = -1; port->options = nvlist_clone(req->args_nvl); /* Generate Port ID. */ idlen = strlen(target) + strlen(",t,0x0001") + 1; idlen = roundup2(idlen, 4); len = sizeof(struct scsi_vpd_device_id) + idlen; port->port_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); port->port_devid->len = len; desc = (struct scsi_vpd_id_descriptor *)port->port_devid->data; desc->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen; snprintf(desc->identifier, idlen, "%s,t,0x%4.4x", target, tag); /* Generate Target ID. */ idlen = strlen(target) + 1; idlen = roundup2(idlen, 4); len = sizeof(struct scsi_vpd_device_id) + idlen; port->target_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); port->target_devid->len = len; desc = (struct scsi_vpd_id_descriptor *)port->target_devid->data; desc->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_TARGET | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen; strlcpy(desc->identifier, target, idlen); retval = ctl_port_register(port); if (retval != 0) { free(port->port_devid, M_CFISCSI); free(port->target_devid, M_CFISCSI); cfiscsi_target_release(ct); req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "ctl_port_register() failed with error %d", retval); return; } done: ct->ct_state = CFISCSI_TARGET_STATE_ACTIVE; req->status = CTL_LUN_OK; req->result_nvl = nvlist_create(0); nvlist_add_number(req->result_nvl, "port_id", port->targ_port); } static void cfiscsi_ioctl_port_remove(struct ctl_req *req) { struct cfiscsi_target *ct; const char *target, *val; uint16_t tag; target = dnvlist_get_string(req->args_nvl, "cfiscsi_target", NULL); val = dnvlist_get_string(req->args_nvl, "cfiscsi_portal_group_tag", NULL); if (target == NULL || val == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Missing required argument"); return; } tag = strtoul(val, NULL, 0); ct = cfiscsi_target_find(&cfiscsi_softc, target, tag); if (ct == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "can't find target \"%s\"", target); return; } ct->ct_state = CFISCSI_TARGET_STATE_DYING; ctl_port_offline(&ct->ct_port); cfiscsi_target_release(ct); cfiscsi_target_release(ct); req->status = CTL_LUN_OK; } static int cfiscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_iscsi *ci; struct ctl_req *req; if (cmd == CTL_PORT_REQ) { req = (struct ctl_req *)addr; switch (req->reqtype) { case CTL_REQ_CREATE: cfiscsi_ioctl_port_create(req); break; case CTL_REQ_REMOVE: cfiscsi_ioctl_port_remove(req); break; default: req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Unsupported request type %d", req->reqtype); } return (0); } if (cmd != CTL_ISCSI) return (ENOTTY); ci = (struct ctl_iscsi *)addr; switch (ci->type) { case CTL_ISCSI_HANDOFF: cfiscsi_ioctl_handoff(ci); break; case CTL_ISCSI_LIST: cfiscsi_ioctl_list(ci); break; case CTL_ISCSI_LOGOUT: cfiscsi_ioctl_logout(ci); break; case CTL_ISCSI_TERMINATE: cfiscsi_ioctl_terminate(ci); break; case CTL_ISCSI_LIMITS: cfiscsi_ioctl_limits(ci); break; #ifdef ICL_KERNEL_PROXY case CTL_ISCSI_LISTEN: cfiscsi_ioctl_listen(ci); break; case CTL_ISCSI_ACCEPT: cfiscsi_ioctl_accept(ci); break; case CTL_ISCSI_SEND: cfiscsi_ioctl_send(ci); break; case CTL_ISCSI_RECEIVE: cfiscsi_ioctl_receive(ci); break; #else case CTL_ISCSI_LISTEN: case CTL_ISCSI_ACCEPT: case CTL_ISCSI_SEND: case CTL_ISCSI_RECEIVE: ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: CTL compiled without ICL_KERNEL_PROXY", __func__); break; #endif /* !ICL_KERNEL_PROXY */ default: ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: invalid iSCSI request type %d", __func__, ci->type); break; } return (0); } static void cfiscsi_target_hold(struct cfiscsi_target *ct) { refcount_acquire(&ct->ct_refcount); } static void cfiscsi_target_release(struct cfiscsi_target *ct) { struct cfiscsi_softc *softc; softc = ct->ct_softc; mtx_lock(&softc->lock); if (refcount_release(&ct->ct_refcount)) { TAILQ_REMOVE(&softc->targets, ct, ct_next); mtx_unlock(&softc->lock); if (ct->ct_state != CFISCSI_TARGET_STATE_INVALID) { ct->ct_state = CFISCSI_TARGET_STATE_INVALID; if (ctl_port_deregister(&ct->ct_port) != 0) printf("%s: ctl_port_deregister() failed\n", __func__); } free(ct, M_CFISCSI); return; } mtx_unlock(&softc->lock); } static struct cfiscsi_target * cfiscsi_target_find(struct cfiscsi_softc *softc, const char *name, uint16_t tag) { struct cfiscsi_target *ct; mtx_lock(&softc->lock); TAILQ_FOREACH(ct, &softc->targets, ct_next) { if (ct->ct_tag != tag || strcmp(name, ct->ct_name) != 0 || ct->ct_state != CFISCSI_TARGET_STATE_ACTIVE) continue; cfiscsi_target_hold(ct); mtx_unlock(&softc->lock); return (ct); } mtx_unlock(&softc->lock); return (NULL); } static struct cfiscsi_target * cfiscsi_target_find_or_create(struct cfiscsi_softc *softc, const char *name, const char *alias, uint16_t tag) { struct cfiscsi_target *ct, *newct; if (name[0] == '\0' || strlen(name) >= CTL_ISCSI_NAME_LEN) return (NULL); newct = malloc(sizeof(*newct), M_CFISCSI, M_WAITOK | M_ZERO); mtx_lock(&softc->lock); TAILQ_FOREACH(ct, &softc->targets, ct_next) { if (ct->ct_tag != tag || strcmp(name, ct->ct_name) != 0 || ct->ct_state == CFISCSI_TARGET_STATE_INVALID) continue; cfiscsi_target_hold(ct); mtx_unlock(&softc->lock); free(newct, M_CFISCSI); return (ct); } strlcpy(newct->ct_name, name, sizeof(newct->ct_name)); if (alias != NULL) strlcpy(newct->ct_alias, alias, sizeof(newct->ct_alias)); newct->ct_tag = tag; refcount_init(&newct->ct_refcount, 1); newct->ct_softc = softc; if (TAILQ_EMPTY(&softc->targets)) softc->last_target_id = 0; newct->ct_target_id = ++softc->last_target_id; TAILQ_INSERT_TAIL(&softc->targets, newct, ct_next); mtx_unlock(&softc->lock); return (newct); } static void cfiscsi_pdu_done(struct icl_pdu *ip, int error) { if (error != 0) ; // XXX: Do something on error? ((ctl_ref)ip->ip_prv0)(ip->ip_prv1, -1); } static void cfiscsi_datamove_in(union ctl_io *io) { struct cfiscsi_session *cs; struct icl_pdu *request, *response; const struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_data_in *bhsdi; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; size_t len, expected_len, sg_len, buffer_offset; const char *sg_addr; icl_pdu_cb cb; int ctl_sg_count, error, i; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND")); if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } /* * This is the offset within the current SCSI command; for the first * call to cfiscsi_datamove() it will be 0, and for subsequent ones * it will be the sum of lengths of previous ones. */ buffer_offset = io->scsiio.kern_rel_offset; /* * This is the transfer length expected by the initiator. It can be * different from the amount of data from the SCSI point of view. */ expected_len = ntohl(bhssc->bhssc_expected_data_transfer_length); /* * If the transfer is outside of expected length -- we are done. */ if (buffer_offset >= expected_len) { #if 0 CFISCSI_SESSION_DEBUG(cs, "buffer_offset = %zd, " "already sent the expected len", buffer_offset); #endif ctl_datamove_done(io, true); return; } if (io->scsiio.kern_data_ref != NULL) cb = cfiscsi_pdu_done; else cb = NULL; i = 0; sg_addr = NULL; sg_len = 0; response = NULL; bhsdi = NULL; for (;;) { if (response == NULL) { response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; bhsdi->bhsdi_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_IN; bhsdi->bhsdi_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhsdi->bhsdi_target_transfer_tag = 0xffffffff; bhsdi->bhsdi_datasn = htonl(PRIV_EXPDATASN(io)++); bhsdi->bhsdi_buffer_offset = htonl(buffer_offset); } KASSERT(i < ctl_sg_count, ("i >= ctl_sg_count")); if (sg_len == 0) { sg_addr = ctl_sglist[i].addr; sg_len = ctl_sglist[i].len; KASSERT(sg_len > 0, ("sg_len <= 0")); } len = sg_len; /* * Truncate to maximum data segment length. */ - KASSERT(response->ip_data_len < cs->cs_max_send_data_segment_length, + KASSERT(response->ip_data_len < + cs->cs_conn->ic_max_send_data_segment_length, ("ip_data_len %zd >= max_send_data_segment_length %d", - response->ip_data_len, cs->cs_max_send_data_segment_length)); + response->ip_data_len, + cs->cs_conn->ic_max_send_data_segment_length)); if (response->ip_data_len + len > - cs->cs_max_send_data_segment_length) { - len = cs->cs_max_send_data_segment_length - + cs->cs_conn->ic_max_send_data_segment_length) { + len = cs->cs_conn->ic_max_send_data_segment_length - response->ip_data_len; KASSERT(len <= sg_len, ("len %zd > sg_len %zd", len, sg_len)); } /* * Truncate to expected data transfer length. */ KASSERT(buffer_offset + response->ip_data_len < expected_len, ("buffer_offset %zd + ip_data_len %zd >= expected_len %zd", buffer_offset, response->ip_data_len, expected_len)); if (buffer_offset + response->ip_data_len + len > expected_len) { CFISCSI_SESSION_DEBUG(cs, "truncating from %zd " "to expected data transfer length %zd", buffer_offset + response->ip_data_len + len, expected_len); len = expected_len - (buffer_offset + response->ip_data_len); KASSERT(len <= sg_len, ("len %zd > sg_len %zd", len, sg_len)); } error = icl_pdu_append_data(response, sg_addr, len, M_NOWAIT | (cb ? ICL_NOCOPY : 0)); if (error != 0) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); icl_pdu_free(response); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } sg_addr += len; sg_len -= len; io->scsiio.kern_data_resid -= len; KASSERT(buffer_offset + response->ip_data_len <= expected_len, ("buffer_offset %zd + ip_data_len %zd > expected_len %zd", buffer_offset, response->ip_data_len, expected_len)); if (buffer_offset + response->ip_data_len == expected_len) { /* * Already have the amount of data the initiator wanted. */ break; } if (sg_len == 0) { /* * End of scatter-gather segment; * proceed to the next one... */ if (i == ctl_sg_count - 1) { /* * ... unless this was the last one. */ break; } i++; } - if (response->ip_data_len == cs->cs_max_send_data_segment_length) { + if (response->ip_data_len == + cs->cs_conn->ic_max_send_data_segment_length) { /* * Can't stuff more data into the current PDU; * queue it. Note that's not enough to check * for kern_data_resid == 0 instead; there * may be several Data-In PDUs for the final * call to cfiscsi_datamove(), and we want * to set the F flag only on the last of them. */ buffer_offset += response->ip_data_len; if (buffer_offset == io->scsiio.kern_total_len || buffer_offset == expected_len) { buffer_offset -= response->ip_data_len; break; } if (cb != NULL) { response->ip_prv0 = io->scsiio.kern_data_ref; response->ip_prv1 = io->scsiio.kern_data_arg; io->scsiio.kern_data_ref(io->scsiio.kern_data_arg, 1); } cfiscsi_pdu_queue_cb(response, cb); response = NULL; bhsdi = NULL; } } if (response != NULL) { buffer_offset += response->ip_data_len; if (buffer_offset == io->scsiio.kern_total_len || buffer_offset == expected_len) { bhsdi->bhsdi_flags |= BHSDI_FLAGS_F; if (io->io_hdr.status == CTL_SUCCESS) { bhsdi->bhsdi_flags |= BHSDI_FLAGS_S; if (io->scsiio.kern_total_len < ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhsdi->bhsdi_flags |= BHSSR_FLAGS_RESIDUAL_UNDERFLOW; bhsdi->bhsdi_residual_count = htonl(ntohl(bhssc->bhssc_expected_data_transfer_length) - io->scsiio.kern_total_len); } else if (io->scsiio.kern_total_len > ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhsdi->bhsdi_flags |= BHSSR_FLAGS_RESIDUAL_OVERFLOW; bhsdi->bhsdi_residual_count = htonl(io->scsiio.kern_total_len - ntohl(bhssc->bhssc_expected_data_transfer_length)); } bhsdi->bhsdi_status = io->scsiio.scsi_status; io->io_hdr.flags |= CTL_FLAG_STATUS_SENT; } } KASSERT(response->ip_data_len > 0, ("sending empty Data-In")); if (cb != NULL) { response->ip_prv0 = io->scsiio.kern_data_ref; response->ip_prv1 = io->scsiio.kern_data_arg; io->scsiio.kern_data_ref(io->scsiio.kern_data_arg, 1); } cfiscsi_pdu_queue_cb(response, cb); } ctl_datamove_done(io, true); } static void cfiscsi_datamove_out(union ctl_io *io) { struct cfiscsi_session *cs; struct icl_pdu *request, *response; const struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_r2t *bhsr2t; struct cfiscsi_data_wait *cdw; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; uint32_t expected_len, datamove_len, r2t_off, r2t_len; uint32_t target_transfer_tag; bool done; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND")); /* * Complete write underflow. Not a single byte to read. Return. */ expected_len = ntohl(bhssc->bhssc_expected_data_transfer_length); if (io->scsiio.kern_rel_offset >= expected_len) { ctl_datamove_done(io, true); return; } datamove_len = MIN(io->scsiio.kern_data_len, expected_len - io->scsiio.kern_rel_offset); target_transfer_tag = atomic_fetchadd_32(&cs->cs_target_transfer_tag, 1); if (target_transfer_tag == 0xffffffff) { target_transfer_tag = atomic_fetchadd_32(&cs->cs_target_transfer_tag, 1); } cdw = cfiscsi_data_wait_new(cs, io, bhssc->bhssc_initiator_task_tag, &target_transfer_tag); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } #if 0 CFISCSI_SESSION_DEBUG(cs, "expecting Data-Out with initiator " "task tag 0x%x, target transfer tag 0x%x", bhssc->bhssc_initiator_task_tag, target_transfer_tag); #endif cdw->cdw_ctl_io = io; cdw->cdw_target_transfer_tag = target_transfer_tag; cdw->cdw_initiator_task_tag = bhssc->bhssc_initiator_task_tag; cdw->cdw_r2t_end = datamove_len; cdw->cdw_datasn = 0; /* Set initial data pointer for the CDW respecting ext_data_filled. */ if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = datamove_len; } cdw->cdw_sg_index = 0; cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; r2t_off = io->scsiio.ext_data_filled; while (r2t_off > 0) { if (r2t_off >= cdw->cdw_sg_len) { r2t_off -= cdw->cdw_sg_len; cdw->cdw_sg_index++; cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; continue; } cdw->cdw_sg_addr += r2t_off; cdw->cdw_sg_len -= r2t_off; r2t_off = 0; } if (cs->cs_immediate_data && io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled < icl_pdu_data_segment_length(request)) { done = cfiscsi_handle_data_segment(request, cdw); if (done) { cfiscsi_data_wait_free(cs, cdw); ctl_datamove_done(io, true); return; } } r2t_off = io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled; r2t_len = MIN(datamove_len - io->scsiio.ext_data_filled, cs->cs_max_burst_length); cdw->cdw_r2t_end = io->scsiio.ext_data_filled + r2t_len; CFISCSI_SESSION_LOCK(cs); TAILQ_INSERT_TAIL(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); /* * XXX: We should limit the number of outstanding R2T PDUs * per task to MaxOutstandingR2T. */ response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } io->io_hdr.flags |= CTL_FLAG_DMA_INPROG; bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; bhsr2t->bhsr2t_opcode = ISCSI_BHS_OPCODE_R2T; bhsr2t->bhsr2t_flags = 0x80; bhsr2t->bhsr2t_lun = bhssc->bhssc_lun; bhsr2t->bhsr2t_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhsr2t->bhsr2t_target_transfer_tag = target_transfer_tag; /* * XXX: Here we assume that cfiscsi_datamove() won't ever * be running concurrently on several CPUs for a given * command. */ bhsr2t->bhsr2t_r2tsn = htonl(PRIV_R2TSN(io)++); /* * This is the offset within the current SCSI command; * i.e. for the first call of datamove(), it will be 0, * and for subsequent ones it will be the sum of lengths * of previous ones. * * The ext_data_filled is to account for unsolicited * (immediate) data that might have already arrived. */ bhsr2t->bhsr2t_buffer_offset = htonl(r2t_off); /* * This is the total length (sum of S/G lengths) this call * to cfiscsi_datamove() is supposed to handle, limited by * MaxBurstLength. */ bhsr2t->bhsr2t_desired_data_transfer_length = htonl(r2t_len); cfiscsi_pdu_queue(response); } static void cfiscsi_datamove(union ctl_io *io) { if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) cfiscsi_datamove_in(io); else { /* We hadn't received anything during this datamove yet. */ io->scsiio.ext_data_filled = 0; cfiscsi_datamove_out(io); } } static void cfiscsi_scsi_command_done(union ctl_io *io) { struct icl_pdu *request, *response; struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_scsi_response *bhssr; #ifdef DIAGNOSTIC struct cfiscsi_data_wait *cdw; #endif struct cfiscsi_session *cs; uint16_t sense_length; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("replying to wrong opcode 0x%x", bhssc->bhssc_opcode)); //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", // bhssc->bhssc_initiator_task_tag); #ifdef DIAGNOSTIC CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) KASSERT(bhssc->bhssc_initiator_task_tag != cdw->cdw_initiator_task_tag, ("dangling cdw")); CFISCSI_SESSION_UNLOCK(cs); #endif /* * Do not return status for aborted commands. * There are exceptions, but none supported by CTL yet. */ if (((io->io_hdr.flags & CTL_FLAG_ABORT) && (io->io_hdr.flags & CTL_FLAG_ABORT_STATUS) == 0) || (io->io_hdr.flags & CTL_FLAG_STATUS_SENT)) { ctl_free_io(io); icl_pdu_free(request); return; } response = cfiscsi_pdu_new_response(request, M_WAITOK); bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; bhssr->bhssr_opcode = ISCSI_BHS_OPCODE_SCSI_RESPONSE; bhssr->bhssr_flags = 0x80; /* * XXX: We don't deal with bidirectional under/overflows; * does anything actually support those? */ if (io->scsiio.kern_total_len < ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_UNDERFLOW; bhssr->bhssr_residual_count = htonl(ntohl(bhssc->bhssc_expected_data_transfer_length) - io->scsiio.kern_total_len); //CFISCSI_SESSION_DEBUG(cs, "underflow; residual count %d", // ntohl(bhssr->bhssr_residual_count)); } else if (io->scsiio.kern_total_len > ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_OVERFLOW; bhssr->bhssr_residual_count = htonl(io->scsiio.kern_total_len - ntohl(bhssc->bhssc_expected_data_transfer_length)); //CFISCSI_SESSION_DEBUG(cs, "overflow; residual count %d", // ntohl(bhssr->bhssr_residual_count)); } bhssr->bhssr_response = BHSSR_RESPONSE_COMMAND_COMPLETED; bhssr->bhssr_status = io->scsiio.scsi_status; bhssr->bhssr_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhssr->bhssr_expdatasn = htonl(PRIV_EXPDATASN(io)); if (io->scsiio.sense_len > 0) { #if 0 CFISCSI_SESSION_DEBUG(cs, "returning %d bytes of sense data", io->scsiio.sense_len); #endif sense_length = htons(io->scsiio.sense_len); icl_pdu_append_data(response, &sense_length, sizeof(sense_length), M_WAITOK); icl_pdu_append_data(response, &io->scsiio.sense_data, io->scsiio.sense_len, M_WAITOK); } ctl_free_io(io); icl_pdu_free(request); cfiscsi_pdu_queue(response); } static void cfiscsi_task_management_done(union ctl_io *io) { struct icl_pdu *request, *response; struct iscsi_bhs_task_management_request *bhstmr; struct iscsi_bhs_task_management_response *bhstmr2; struct cfiscsi_data_wait *cdw, *tmpcdw; struct cfiscsi_session *cs, *tcs; struct cfiscsi_softc *softc; union ctl_io *cdw_io; int cold_reset = 0; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; KASSERT((bhstmr->bhstmr_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_TASK_REQUEST, ("replying to wrong opcode 0x%x", bhstmr->bhstmr_opcode)); #if 0 CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x; referenced task tag 0x%x", bhstmr->bhstmr_initiator_task_tag, bhstmr->bhstmr_referenced_task_tag); #endif if ((bhstmr->bhstmr_function & ~0x80) == BHSTMR_FUNCTION_ABORT_TASK) { /* * Make sure we no longer wait for Data-Out for this command. */ CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH_SAFE(cdw, &cs->cs_waiting_for_data_out, cdw_next, tmpcdw) { if (bhstmr->bhstmr_referenced_task_tag != cdw->cdw_initiator_task_tag) continue; #if 0 CFISCSI_SESSION_DEBUG(cs, "removing csw for initiator task " "tag 0x%x", bhstmr->bhstmr_initiator_task_tag); #endif TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); cdw_io = cdw->cdw_ctl_io; cdw_io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; cdw_io->scsiio.io_hdr.port_status = 43; cfiscsi_data_wait_free(cs, cdw); ctl_datamove_done(cdw_io, false); } CFISCSI_SESSION_UNLOCK(cs); } if ((bhstmr->bhstmr_function & ~0x80) == BHSTMR_FUNCTION_TARGET_COLD_RESET && io->io_hdr.status == CTL_SUCCESS) cold_reset = 1; response = cfiscsi_pdu_new_response(request, M_WAITOK); bhstmr2 = (struct iscsi_bhs_task_management_response *) response->ip_bhs; bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; bhstmr2->bhstmr_flags = 0x80; switch (io->taskio.task_status) { case CTL_TASK_FUNCTION_COMPLETE: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_COMPLETE; break; case CTL_TASK_FUNCTION_SUCCEEDED: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_SUCCEEDED; break; case CTL_TASK_LUN_DOES_NOT_EXIST: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_LUN_DOES_NOT_EXIST; break; case CTL_TASK_FUNCTION_NOT_SUPPORTED: default: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; break; } memcpy(bhstmr2->bhstmr_additional_reponse_information, io->taskio.task_resp, sizeof(io->taskio.task_resp)); bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag; ctl_free_io(io); icl_pdu_free(request); cfiscsi_pdu_queue(response); if (cold_reset) { softc = cs->cs_target->ct_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(tcs, &softc->sessions, cs_next) { if (tcs->cs_target == cs->cs_target) cfiscsi_session_terminate(tcs); } mtx_unlock(&softc->lock); } } static void cfiscsi_done(union ctl_io *io) { struct icl_pdu *request; struct cfiscsi_session *cs; KASSERT(((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE), ("invalid CTL status %#x", io->io_hdr.status)); if (io->io_hdr.io_type == CTL_IO_TASK && io->taskio.task_action == CTL_TASK_I_T_NEXUS_RESET) { /* * Implicit task termination has just completed; nothing to do. */ cs = PRIV_REQUEST(io); cs->cs_tasks_aborted = true; refcount_release(&cs->cs_outstanding_ctl_pdus); wakeup(__DEVOLATILE(void *, &cs->cs_outstanding_ctl_pdus)); ctl_free_io(io); return; } request = PRIV_REQUEST(io); cs = PDU_SESSION(request); switch (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { case ISCSI_BHS_OPCODE_SCSI_COMMAND: cfiscsi_scsi_command_done(io); break; case ISCSI_BHS_OPCODE_TASK_REQUEST: cfiscsi_task_management_done(io); break; default: panic("cfiscsi_done called with wrong opcode 0x%x", request->ip_bhs->bhs_opcode); } refcount_release(&cs->cs_outstanding_ctl_pdus); } diff --git a/sys/cam/ctl/ctl_frontend_iscsi.h b/sys/cam/ctl/ctl_frontend_iscsi.h index a1c857231428..7c7f422a8d1f 100644 --- a/sys/cam/ctl/ctl_frontend_iscsi.h +++ b/sys/cam/ctl/ctl_frontend_iscsi.h @@ -1,130 +1,128 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef CTL_FRONTEND_ISCSI_H #define CTL_FRONTEND_ISCSI_H #define CFISCSI_TARGET_STATE_INVALID 0 #define CFISCSI_TARGET_STATE_ACTIVE 1 #define CFISCSI_TARGET_STATE_DYING 2 struct cfiscsi_target { TAILQ_ENTRY(cfiscsi_target) ct_next; struct cfiscsi_softc *ct_softc; volatile u_int ct_refcount; char ct_name[CTL_ISCSI_NAME_LEN]; char ct_alias[CTL_ISCSI_ALIAS_LEN]; uint16_t ct_tag; int ct_state; int ct_online; int ct_target_id; struct ctl_port ct_port; }; struct cfiscsi_data_wait { TAILQ_ENTRY(cfiscsi_data_wait) cdw_next; union ctl_io *cdw_ctl_io; uint32_t cdw_target_transfer_tag; uint32_t cdw_initiator_task_tag; int cdw_sg_index; char *cdw_sg_addr; size_t cdw_sg_len; uint32_t cdw_r2t_end; uint32_t cdw_datasn; void *cdw_icl_prv; }; #define CFISCSI_SESSION_STATE_INVALID 0 #define CFISCSI_SESSION_STATE_BHS 1 #define CFISCSI_SESSION_STATE_AHS 2 #define CFISCSI_SESSION_STATE_HEADER_DIGEST 3 #define CFISCSI_SESSION_STATE_DATA 4 #define CFISCSI_SESSION_STATE_DATA_DIGEST 5 struct cfiscsi_session { TAILQ_ENTRY(cfiscsi_session) cs_next; struct mtx cs_lock; struct icl_conn *cs_conn; uint32_t cs_cmdsn; uint32_t cs_statsn; uint32_t cs_target_transfer_tag; volatile u_int cs_outstanding_ctl_pdus; TAILQ_HEAD(, cfiscsi_data_wait) cs_waiting_for_data_out; struct cfiscsi_target *cs_target; struct callout cs_callout; int cs_timeout; struct cv cs_maintenance_cv; bool cs_terminating; bool cs_handoff_in_progress; bool cs_tasks_aborted; - int cs_max_recv_data_segment_length; - int cs_max_send_data_segment_length; int cs_max_burst_length; int cs_first_burst_length; bool cs_immediate_data; char cs_initiator_name[CTL_ISCSI_NAME_LEN]; char cs_initiator_addr[CTL_ISCSI_ADDR_LEN]; char cs_initiator_alias[CTL_ISCSI_ALIAS_LEN]; char cs_initiator_isid[6]; char cs_initiator_id[CTL_ISCSI_NAME_LEN + 5 + 6 + 1]; unsigned int cs_id; int cs_ctl_initid; #ifdef ICL_KERNEL_PROXY struct sockaddr *cs_initiator_sa; int cs_portal_id; bool cs_login_phase; bool cs_waiting_for_ctld; struct cv cs_login_cv; struct icl_pdu *cs_login_pdu; #endif }; #ifdef ICL_KERNEL_PROXY struct icl_listen; #endif struct cfiscsi_softc { struct mtx lock; char port_name[32]; int online; int last_target_id; unsigned int last_session_id; TAILQ_HEAD(, cfiscsi_target) targets; TAILQ_HEAD(, cfiscsi_session) sessions; struct cv sessions_cv; #ifdef ICL_KERNEL_PROXY struct icl_listen *listener; struct cv accept_cv; #endif }; #endif /* !CTL_FRONTEND_ISCSI_H */ diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c index fce593b54032..17d5685f1c1a 100644 --- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c +++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c @@ -1,1256 +1,1239 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * Copyright (c) 2015 Chelsio Communications, Inc. * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * cxgbei implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_tcb.h" #include "tom/t4_tom.h" #include "cxgbei.h" static MALLOC_DEFINE(M_CXGBEI, "cxgbei", "cxgbei(4)"); SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Chelsio iSCSI offload"); static int first_burst_length = 8192; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, &first_burst_length, 0, "First burst length"); static int max_burst_length = 2 * 1024 * 1024; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, &max_burst_length, 0, "Maximum burst length"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static uma_zone_t prsv_zone; static volatile u_int icl_cxgbei_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; static icl_conn_pdu_data_segment_length_t icl_cxgbei_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; static icl_conn_pdu_queue_cb_t icl_cxgbei_conn_pdu_queue_cb; static icl_conn_handoff_t icl_cxgbei_conn_handoff; static icl_conn_free_t icl_cxgbei_conn_free; static icl_conn_close_t icl_cxgbei_conn_close; static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; static icl_conn_task_done_t icl_cxgbei_conn_task_done; static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; static kobj_method_t icl_cxgbei_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_cxgbei_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), KOBJMETHOD(icl_conn_pdu_queue_cb, icl_cxgbei_conn_pdu_queue_cb), KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), { 0, 0 } }; DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); void icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); KASSERT(icp->ref_cnt != 0, ("freeing deleted PDU")); MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); m_freem(ip->ip_bhs_mbuf); KASSERT(ic != NULL || icp->ref_cnt == 1, ("orphaned PDU has oustanding references")); if (atomic_fetchadd_int(&icp->ref_cnt, -1) != 1) return; free(icp, M_CXGBEI); #ifdef DIAGNOSTIC if (__predict_true(ic != NULL)) refcount_release(&ic->ic_outstanding_pdus); #endif } static void icl_cxgbei_pdu_call_cb(struct icl_pdu *ip) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); if (icp->cb != NULL) icp->cb(ip, icp->error); #ifdef DIAGNOSTIC if (__predict_true(ip->ip_conn != NULL)) refcount_release(&ip->ip_conn->ic_outstanding_pdus); #endif free(icp, M_CXGBEI); } static void icl_cxgbei_pdu_done(struct icl_pdu *ip, int error) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); if (error != 0) icp->error = error; m_freem(ip->ip_ahs_mbuf); ip->ip_ahs_mbuf = NULL; m_freem(ip->ip_data_mbuf); ip->ip_data_mbuf = NULL; m_freem(ip->ip_bhs_mbuf); ip->ip_bhs_mbuf = NULL; /* * All other references to this PDU should have been dropped * by the m_freem() of ip_data_mbuf. */ if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) icl_cxgbei_pdu_call_cb(ip); else __assert_unreachable(); } static void icl_cxgbei_mbuf_done(struct mbuf *mb) { struct icl_cxgbei_pdu *icp = (struct icl_cxgbei_pdu *)mb->m_ext.ext_arg1; /* * NB: mb_free_mext() might leave ref_cnt as 1 without * decrementing it if it hits the fast path in the ref_cnt * check. */ icl_cxgbei_pdu_call_cb(&icp->ip); } struct icl_pdu * icl_cxgbei_new_pdu(int flags) { struct icl_cxgbei_pdu *icp; struct icl_pdu *ip; struct mbuf *m; icp = malloc(sizeof(*icp), M_CXGBEI, flags | M_ZERO); if (__predict_false(icp == NULL)) return (NULL); icp->icp_signature = CXGBEI_PDU_SIGNATURE; icp->ref_cnt = 1; ip = &icp->ip; m = m_gethdr(flags, MT_DATA); if (__predict_false(m == NULL)) { free(icp, M_CXGBEI); return (NULL); } ip->ip_bhs_mbuf = m; ip->ip_bhs = mtod(m, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(*ip->ip_bhs)); m->m_len = sizeof(struct iscsi_bhs); m->m_pkthdr.len = m->m_len; return (ip); } void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic) { ip->ip_conn = ic; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ static struct icl_pdu * icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_pdu *ip; ip = icl_cxgbei_new_pdu(flags); if (__predict_false(ip == NULL)) return (NULL); icl_cxgbei_new_pdu_set_conn(ip, ic); return (ip); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static struct mbuf * finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) { struct icl_pdu *ip = &icp->ip; uint8_t ulp_submode, padding; struct mbuf *m, *last; struct iscsi_bhs *bhs; /* * Fix up the data segment mbuf first. */ m = ip->ip_data_mbuf; ulp_submode = icc->ulp_submode; if (m) { last = m_last(m); /* * Round up the data segment to a 4B boundary. Pad with 0 if * necessary. There will definitely be room in the mbuf. */ padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; if (padding) { bzero(mtod(last, uint8_t *) + last->m_len, padding); last->m_len += padding; } } else { MPASS(ip->ip_data_len == 0); ulp_submode &= ~ULP_CRC_DATA; padding = 0; } /* * Now the header mbuf that has the BHS. */ m = ip->ip_bhs_mbuf; MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); MPASS(m->m_len == sizeof(struct iscsi_bhs)); bhs = ip->ip_bhs; bhs->bhs_data_segment_len[2] = ip->ip_data_len; bhs->bhs_data_segment_len[1] = ip->ip_data_len >> 8; bhs->bhs_data_segment_len[0] = ip->ip_data_len >> 16; /* * Extract mbuf chain from PDU. */ m->m_pkthdr.len += ip->ip_data_len + padding; m->m_next = ip->ip_data_mbuf; set_mbuf_ulp_submode(m, ulp_submode); ip->ip_bhs_mbuf = NULL; ip->ip_data_mbuf = NULL; ip->ip_bhs = NULL; /* * Drop PDU reference on icp. Additional references might * still be held by zero-copy PDU buffers (ICL_NOCOPY). */ if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) icl_cxgbei_pdu_call_cb(ip); return (m); } int icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, const void *addr, size_t len, int flags) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); struct mbuf *m, *m_tail; const char *src; MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); m_tail = ip->ip_data_mbuf; if (m_tail != NULL) for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) ; if (flags & ICL_NOCOPY) { m = m_get(flags & ~ICL_NOCOPY, MT_DATA); if (m == NULL) { ICL_WARN("failed to allocate mbuf"); return (ENOMEM); } m->m_flags |= M_RDONLY; m_extaddref(m, __DECONST(char *, addr), len, &icp->ref_cnt, icl_cxgbei_mbuf_done, icp, NULL); m->m_len = len; if (ip->ip_data_mbuf == NULL) { ip->ip_data_mbuf = m; ip->ip_data_len = len; } else { m_tail->m_next = m; m_tail = m_tail->m_next; ip->ip_data_len += len; } return (0); } src = (const char *)addr; /* Allocate as jumbo mbufs of size MJUM16BYTES. */ while (len >= MJUM16BYTES) { m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); if (__predict_false(m == NULL)) { if ((flags & M_WAITOK) != 0) { /* Fall back to non-jumbo mbufs. */ break; } return (ENOMEM); } memcpy(mtod(m, void *), src, MJUM16BYTES); m->m_len = MJUM16BYTES; if (ip->ip_data_mbuf == NULL) { ip->ip_data_mbuf = m_tail = m; ip->ip_data_len = MJUM16BYTES; } else { m_tail->m_next = m; m_tail = m_tail->m_next; ip->ip_data_len += MJUM16BYTES; } src += MJUM16BYTES; len -= MJUM16BYTES; } /* Allocate mbuf chain for the remaining data. */ if (len != 0) { m = m_getm2(NULL, len, flags, MT_DATA, 0); if (__predict_false(m == NULL)) return (ENOMEM); if (ip->ip_data_mbuf == NULL) { ip->ip_data_mbuf = m; ip->ip_data_len = len; } else { m_tail->m_next = m; ip->ip_data_len += len; } for (; m != NULL; m = m->m_next) { m->m_len = min(len, M_SIZE(m)); memcpy(mtod(m, void *), src, m->m_len); src += m->m_len; len -= m->m_len; } MPASS(len == 0); } - MPASS(ip->ip_data_len <= ic->ic_max_data_segment_length); + MPASS(ip->ip_data_len <= ic->ic_max_send_data_segment_length); return (0); } void icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); if (icp->icp_flags & ICPF_RX_DDP) return; /* data is DDP'ed, no need to copy */ m_copydata(ip->ip_data_mbuf, off, len, addr); } void icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { icl_cxgbei_conn_pdu_queue_cb(ic, ip, NULL); } void icl_cxgbei_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, icl_pdu_cb cb) { struct epoch_tracker et; struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct icl_cxgbei_pdu *icp = ip_to_icp(ip); struct socket *so = ic->ic_socket; struct toepcb *toep = icc->toep; struct inpcb *inp; struct mbuf *m; MPASS(ic == ip->ip_conn); MPASS(ip->ip_bhs_mbuf != NULL); /* The kernel doesn't generate PDUs with AHS. */ MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); ICL_CONN_LOCK_ASSERT(ic); icp->cb = cb; /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { icl_cxgbei_pdu_done(ip, ENOTCONN); return; } m = finalize_pdu(icc, icp); M_ASSERTPKTHDR(m); MPASS((m->m_pkthdr.len & 3) == 0); /* * Do not get inp from toep->inp as the toepcb might have detached * already. */ inp = sotoinpcb(so); CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); INP_WLOCK(inp); if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || __predict_false((toep->flags & TPF_ATTACHED) == 0)) m_freem(m); else { mbufq_enqueue(&toep->ulp_pduq, m); t4_push_pdus(icc->sc, toep, 0); } INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } static struct icl_conn * icl_cxgbei_new_conn(const char *name, struct mtx *lock) { struct icl_cxgbei_conn *icc; struct icl_conn *ic; refcount_acquire(&icl_cxgbei_ncons); icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, M_WAITOK | M_ZERO); icc->icc_signature = CXGBEI_CONN_SIGNATURE; STAILQ_INIT(&icc->rcvd_pdus); ic = &icc->ic; ic->ic_lock = lock; /* XXXNP: review. Most of these icl_conn fields aren't really used */ STAILQ_INIT(&ic->ic_to_send); cv_init(&ic->ic_send_cv, "icl_cxgbei_tx"); cv_init(&ic->ic_receive_cv, "icl_cxgbei_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif - /* This is a stop-gap value that will be corrected during handoff. */ - ic->ic_max_data_segment_length = 16384; ic->ic_name = name; ic->ic_offload = "cxgbei"; ic->ic_unmapped = false; CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); return (ic); } void icl_cxgbei_conn_free(struct icl_conn *ic) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); kobj_delete((struct kobj *)icc, M_CXGBE); refcount_release(&icl_cxgbei_ncons); } static int icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so, int sspace, int rspace) { struct sockopt opt; int error, one = 1, ss, rs; ss = max(sendspace, sspace); rs = max(recvspace, rspace); error = soreserve(so, ss, rs); if (error != 0) { icl_cxgbei_conn_close(ic); return (error); } SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags |= SB_AUTOSIZE; SOCKBUF_UNLOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_flags |= SB_AUTOSIZE; SOCKBUF_UNLOCK(&so->so_rcv); /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error != 0) { icl_cxgbei_conn_close(ic); return (error); } return (0); } /* * Request/response structure used to find out the adapter offloading a socket. */ struct find_ofld_adapter_rr { struct socket *so; struct adapter *sc; /* result */ }; static void find_offload_adapter(struct adapter *sc, void *arg) { struct find_ofld_adapter_rr *fa = arg; struct socket *so = fa->so; struct tom_data *td = sc->tom_softc; struct tcpcb *tp; struct inpcb *inp; /* Non-TCP were filtered out earlier. */ MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); if (fa->sc != NULL) return; /* Found already. */ if (td == NULL) return; /* TOE not enabled on this adapter. */ inp = sotoinpcb(so); INP_WLOCK(inp); if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { tp = intotcpcb(inp); if (tp->t_flags & TF_TOE && tp->tod == &td->tod) fa->sc = sc; /* Found. */ } INP_WUNLOCK(inp); } /* XXXNP: move this to t4_tom. */ static void send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen) { struct wrqe *wr; struct fw_flowc_wr *flowc; const u_int nparams = 1; u_int flowclen; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; flowc->mnemval[0].val = htobe32(maxlen); txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; t4_wrq_tx(sc, wr); } static void set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, u_int ulp_submode) { uint64_t val; CTR3(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, submode=%#x", __func__, toep->tid, ulp_submode); val = V_TCB_ULP_TYPE(ULP_MODE_ISCSI) | V_TCB_ULP_RAW(ulp_submode); t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_ULP_TYPE, V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val, 0, 0); val = V_TF_RX_FLOW_CONTROL_DISABLE(1ULL); t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, val, val, 0, 0); } /* * XXXNP: Who is responsible for cleaning up the socket if this returns with an * error? Review all error paths. * * XXXNP: What happens to the socket's fd reference if the operation is * successful, and how does that affect the socket's life cycle? */ int icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct cxgbei_data *ci; struct find_ofld_adapter_rr fa; struct file *fp; struct socket *so; struct inpcb *inp; struct tcpcb *tp; struct toepcb *toep; cap_rights_t rights; int error; MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM || so->so_proto->pr_protocol != IPPROTO_TCP) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_disconnecting = false; ic->ic_socket = so; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); /* Find the adapter offloading this socket. */ fa.sc = NULL; fa.so = so; t4_iterate(find_offload_adapter, &fa); if (fa.sc == NULL) return (EINVAL); icc->sc = fa.sc; ci = icc->sc->iscsi_ulp_softc; inp = sotoinpcb(so); INP_WLOCK(inp); tp = intotcpcb(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) error = EBUSY; else { /* * socket could not have been "unoffloaded" if here. */ MPASS(tp->t_flags & TF_TOE); MPASS(tp->tod != NULL); MPASS(tp->t_toe != NULL); toep = tp->t_toe; MPASS(toep->vi->adapter == icc->sc); icc->toep = toep; icc->cwt = cxgbei_select_worker_thread(icc); - /* - * We maintain the _send_ DSL in this field just to have a - * convenient way to assert that the kernel never sends - * oversized PDUs. This field is otherwise unused in the driver - * or the kernel. - */ - ic->ic_max_data_segment_length = ci->max_tx_pdu_len - - ISCSI_BHS_SIZE; - icc->ulp_submode = 0; - if (ic->ic_header_crc32c) { + if (ic->ic_header_crc32c) icc->ulp_submode |= ULP_CRC_HEADER; - ic->ic_max_data_segment_length -= - ISCSI_HEADER_DIGEST_SIZE; - } - if (ic->ic_data_crc32c) { + if (ic->ic_data_crc32c) icc->ulp_submode |= ULP_CRC_DATA; - ic->ic_max_data_segment_length -= - ISCSI_DATA_DIGEST_SIZE; - } so->so_options |= SO_NO_DDP; toep->params.ulp_mode = ULP_MODE_ISCSI; toep->ulpcb = icc; send_iscsi_flowc_wr(icc->sc, toep, ci->max_tx_pdu_len); set_ulp_mode_iscsi(icc->sc, toep, icc->ulp_submode); error = 0; } INP_WUNLOCK(inp); if (error == 0) { error = icl_cxgbei_setsockopt(ic, so, ci->max_tx_pdu_len, ci->max_rx_pdu_len); } return (error); } void icl_cxgbei_conn_close(struct icl_conn *ic) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct icl_pdu *ip; struct socket *so; struct sockbuf *sb; struct inpcb *inp; struct toepcb *toep = icc->toep; MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); ICL_CONN_LOCK(ic); so = ic->ic_socket; if (ic->ic_disconnecting || so == NULL) { CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p", __func__, icc, ic->ic_disconnecting, so); ICL_CONN_UNLOCK(ic); return; } ic->ic_disconnecting = true; /* These are unused in this driver right now. */ MPASS(STAILQ_EMPTY(&ic->ic_to_send)); MPASS(ic->ic_receive_pdu == NULL); #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif ICL_CONN_UNLOCK(ic); CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1, icc); inp = sotoinpcb(so); sb = &so->so_rcv; INP_WLOCK(inp); if (toep != NULL) { /* NULL if connection was never offloaded. */ toep->ulpcb = NULL; mbufq_drain(&toep->ulp_pduq); SOCKBUF_LOCK(sb); if (icc->rx_flags & RXF_ACTIVE) { volatile u_int *p = &icc->rx_flags; SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); while (*p & RXF_ACTIVE) pause("conclo", 1); INP_WLOCK(inp); SOCKBUF_LOCK(sb); } while (!STAILQ_EMPTY(&icc->rcvd_pdus)) { ip = STAILQ_FIRST(&icc->rcvd_pdus); STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next); icl_cxgbei_pdu_done(ip, ENOTCONN); } SOCKBUF_UNLOCK(sb); } INP_WUNLOCK(inp); ICL_CONN_LOCK(ic); ic->ic_socket = NULL; ICL_CONN_UNLOCK(ic); /* * XXXNP: we should send RST instead of FIN when PDUs held in various * queues were purged instead of delivered reliably but soabort isn't * really general purpose and wouldn't do the right thing here. */ soclose(so); } int icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ccb_scsiio *csio, uint32_t *ittp, void **arg) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct toepcb *toep = icc->toep; struct adapter *sc = icc->sc; struct cxgbei_data *ci = sc->iscsi_ulp_softc; struct ppod_region *pr = &ci->pr; struct ppod_reservation *prsv; uint32_t itt; int rc = 0; /* This is for the offload driver's state. Must not be set already. */ MPASS(arg != NULL); MPASS(*arg == NULL); if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_IN || csio->dxfer_len < ci->ddp_threshold) { no_ddp: /* * No DDP for this I/O. Allocate an ITT (based on the one * passed in) that cannot be a valid hardware DDP tag in the * iSCSI region. */ itt = *ittp & M_PPOD_TAG; itt = V_PPOD_TAG(itt) | pr->pr_invalid_bit; *ittp = htobe32(itt); MPASS(*arg == NULL); /* State is maintained for DDP only. */ if (rc != 0) counter_u64_add( toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); return (0); } /* * Reserve resources for DDP, update the itt that should be used in the * PDU, and save DDP specific state for this I/O in *arg. */ prsv = uma_zalloc(prsv_zone, M_NOWAIT); if (prsv == NULL) { rc = ENOMEM; goto no_ddp; } /* XXX add support for all CAM_DATA_ types */ MPASS((csio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR); rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr, csio->dxfer_len, prsv); if (rc != 0) { uma_zfree(prsv_zone, prsv); goto no_ddp; } rc = t4_write_page_pods_for_buf(sc, toep, prsv, (vm_offset_t)csio->data_ptr, csio->dxfer_len); if (rc != 0) { t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); goto no_ddp; } *ittp = htobe32(prsv->prsv_tag); *arg = prsv; counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); return (0); } void icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg) { if (arg != NULL) { struct ppod_reservation *prsv = arg; t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); } } static inline bool ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen) { int total_len = 0; MPASS(entries > 0); if (((vm_offset_t)sg[--entries].addr & 3U) != 0) return (false); total_len += sg[entries].len; while (--entries >= 0) { if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 || (sg[entries].len % PAGE_SIZE) != 0) return (false); total_len += sg[entries].len; } MPASS(total_len == xferlen); return (true); } /* XXXNP: PDU should be passed in as parameter, like on the initiator. */ #define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr) #define io_to_ppod_reservation(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr) int icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *tttp, void **arg) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct toepcb *toep = icc->toep; struct ctl_scsiio *ctsio = &io->scsiio; struct adapter *sc = icc->sc; struct cxgbei_data *ci = sc->iscsi_ulp_softc; struct ppod_region *pr = &ci->pr; struct ppod_reservation *prsv; struct ctl_sg_entry *sgl, sg_entry; int sg_entries = ctsio->kern_sg_entries; uint32_t ttt; int xferlen, rc = 0, alias; /* This is for the offload driver's state. Must not be set already. */ MPASS(arg != NULL); MPASS(*arg == NULL); if (ctsio->ext_data_filled == 0) { int first_burst; struct icl_pdu *ip = io_to_request_pdu(io); #ifdef INVARIANTS struct icl_cxgbei_pdu *icp = ip_to_icp(ip); MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); MPASS(ip->ip_bhs_mbuf != NULL); #endif first_burst = icl_pdu_data_segment_length(ip); /* * Note that ICL calls conn_transfer_setup even if the first * burst had everything and there's nothing left to transfer. * * NB: The CTL frontend might have provided a buffer * whose length (kern_data_len) is smaller than the * FirstBurstLength of unsolicited data. Treat those * as an empty transfer. */ xferlen = ctsio->kern_data_len; if (xferlen < first_burst || xferlen - first_burst < ci->ddp_threshold) { no_ddp: /* * No DDP for this transfer. Allocate a TTT (based on * the one passed in) that cannot be a valid hardware * DDP tag in the iSCSI region. */ ttt = *tttp & M_PPOD_TAG; ttt = V_PPOD_TAG(ttt) | pr->pr_invalid_bit; *tttp = htobe32(ttt); MPASS(io_to_ppod_reservation(io) == NULL); if (rc != 0) counter_u64_add( toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); return (0); } if (sg_entries == 0) { sgl = &sg_entry; sgl->len = xferlen; sgl->addr = (void *)ctsio->kern_data_ptr; sg_entries = 1; } else sgl = (void *)ctsio->kern_data_ptr; if (!ddp_sgl_check(sgl, sg_entries, xferlen)) goto no_ddp; /* * Reserve resources for DDP, update the ttt that should be used * in the PDU, and save DDP specific state for this I/O. */ MPASS(io_to_ppod_reservation(io) == NULL); prsv = uma_zalloc(prsv_zone, M_NOWAIT); if (prsv == NULL) { rc = ENOMEM; goto no_ddp; } rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv); if (rc != 0) { uma_zfree(prsv_zone, prsv); goto no_ddp; } rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries, xferlen); if (__predict_false(rc != 0)) { t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); goto no_ddp; } *tttp = htobe32(prsv->prsv_tag); io_to_ppod_reservation(io) = prsv; *arg = ctsio; counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); return (0); } /* * In the middle of an I/O. A non-NULL page pod reservation indicates * that a DDP buffer is being used for the I/O. */ prsv = io_to_ppod_reservation(ctsio); if (prsv == NULL) goto no_ddp; alias = (prsv->prsv_tag & pr->pr_alias_mask) >> pr->pr_alias_shift; alias++; prsv->prsv_tag &= ~pr->pr_alias_mask; prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask; *tttp = htobe32(prsv->prsv_tag); *arg = ctsio; return (0); } void icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *arg) { struct ctl_scsiio *ctsio = arg; if (ctsio != NULL && (ctsio->kern_data_len == ctsio->ext_data_filled || ic->ic_disconnecting)) { struct ppod_reservation *prsv; prsv = io_to_ppod_reservation(ctsio); MPASS(prsv != NULL); t4_free_page_pods(prsv); uma_zfree(prsv_zone, prsv); io_to_ppod_reservation(ctsio) = NULL; } } static void cxgbei_limits(struct adapter *sc, void *arg) { struct icl_drv_limits *idl = arg; struct cxgbei_data *ci; int max_dsl; if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4lims") != 0) return; if (uld_active(sc, ULD_ISCSI)) { ci = sc->iscsi_ulp_softc; MPASS(ci != NULL); /* * AHS is not supported by the kernel so we'll not account for * it either in our PDU len -> data segment len conversions. */ max_dsl = ci->max_rx_pdu_len - ISCSI_BHS_SIZE - ISCSI_HEADER_DIGEST_SIZE - ISCSI_DATA_DIGEST_SIZE; if (idl->idl_max_recv_data_segment_length > max_dsl) idl->idl_max_recv_data_segment_length = max_dsl; max_dsl = ci->max_tx_pdu_len - ISCSI_BHS_SIZE - ISCSI_HEADER_DIGEST_SIZE - ISCSI_DATA_DIGEST_SIZE; if (idl->idl_max_send_data_segment_length > max_dsl) idl->idl_max_send_data_segment_length = max_dsl; } end_synchronized_op(sc, LOCK_HELD); } static int icl_cxgbei_limits(struct icl_drv_limits *idl) { /* Maximum allowed by the RFC. cxgbei_limits will clip them. */ idl->idl_max_recv_data_segment_length = (1 << 24) - 1; idl->idl_max_send_data_segment_length = (1 << 24) - 1; /* These are somewhat arbitrary. */ idl->idl_max_burst_length = max_burst_length; idl->idl_first_burst_length = first_burst_length; t4_iterate(cxgbei_limits, idl); return (0); } int icl_cxgbei_mod_load(void) { int rc; /* * Space to track pagepod reservations. */ prsv_zone = uma_zcreate("Pagepod reservations", sizeof(struct ppod_reservation), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); refcount_init(&icl_cxgbei_ncons, 0); rc = icl_register("cxgbei", false, -100, icl_cxgbei_limits, icl_cxgbei_new_conn); return (rc); } int icl_cxgbei_mod_unload(void) { if (icl_cxgbei_ncons != 0) return (EBUSY); icl_unregister("cxgbei", false); uma_zdestroy(prsv_zone); return (0); } #endif diff --git a/sys/dev/iscsi/icl.h b/sys/dev/iscsi/icl.h index 0b897a50302a..94600c0edad1 100644 --- a/sys/dev/iscsi/icl.h +++ b/sys/dev/iscsi/icl.h @@ -1,173 +1,174 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ICL_H #define ICL_H /* * iSCSI Common Layer. It's used by both the initiator and target to send * and receive iSCSI PDUs. */ #include #include #include #include SYSCTL_DECL(_kern_icl); extern int icl_debug; #define ICL_DEBUG(X, ...) \ do { \ if (icl_debug > 1) \ printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ } while (0) #define ICL_WARN(X, ...) \ do { \ if (icl_debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) struct icl_conn; struct ccb_scsiio; union ctl_io; struct icl_pdu { STAILQ_ENTRY(icl_pdu) ip_next; struct icl_conn *ip_conn; struct iscsi_bhs *ip_bhs; struct mbuf *ip_bhs_mbuf; size_t ip_ahs_len; struct mbuf *ip_ahs_mbuf; size_t ip_data_len; struct mbuf *ip_data_mbuf; /* * User (initiator or provider) private fields. */ void *ip_prv0; void *ip_prv1; }; #define ICL_CONN_STATE_INVALID 0 #define ICL_CONN_STATE_BHS 1 #define ICL_CONN_STATE_AHS 2 #define ICL_CONN_STATE_HEADER_DIGEST 3 #define ICL_CONN_STATE_DATA 4 #define ICL_CONN_STATE_DATA_DIGEST 5 #define ICL_NOCOPY (1 << 30) struct icl_conn { KOBJ_FIELDS; struct mtx *ic_lock; struct socket *ic_socket; #ifdef DIAGNOSTIC volatile u_int ic_outstanding_pdus; #endif STAILQ_HEAD(, icl_pdu) ic_to_send; bool ic_check_send_space; size_t ic_receive_len; int ic_receive_state; struct icl_pdu *ic_receive_pdu; struct cv ic_send_cv; struct cv ic_receive_cv; bool ic_header_crc32c; bool ic_data_crc32c; bool ic_send_running; bool ic_receive_running; - size_t ic_max_data_segment_length; + uint32_t ic_max_recv_data_segment_length; + uint32_t ic_max_send_data_segment_length; size_t ic_maxtags; bool ic_disconnecting; bool ic_iser; bool ic_unmapped; const char *ic_name; const char *ic_offload; void (*ic_receive)(struct icl_pdu *); void (*ic_error)(struct icl_conn *); /* * User (initiator or provider) private fields. */ void *ic_prv0; }; struct icl_drv_limits { int idl_max_recv_data_segment_length; int idl_max_send_data_segment_length; int idl_max_burst_length; int idl_first_burst_length; int spare[4]; }; typedef void (*icl_pdu_cb)(struct icl_pdu *, int error); struct icl_conn *icl_new_conn(const char *offload, bool iser, const char *name, struct mtx *lock); int icl_limits(const char *offload, bool iser, struct icl_drv_limits *idl); int icl_register(const char *offload, bool iser, int priority, int (*limits)(struct icl_drv_limits *), struct icl_conn *(*new_conn)(const char *, struct mtx *)); int icl_unregister(const char *offload, bool rdma); #ifdef ICL_KERNEL_PROXY struct sockaddr; struct icl_listen; /* * Target part. */ struct icl_listen *icl_listen_new(void (*accept_cb)(struct socket *, struct sockaddr *, int)); void icl_listen_free(struct icl_listen *il); int icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol, struct sockaddr *sa, int portal_id); int icl_listen_remove(struct icl_listen *il, struct sockaddr *sa); /* * Those two are not a public API; only to be used between icl_soft.c * and icl_soft_proxy.c. */ int icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so); int icl_soft_proxy_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa); #endif /* ICL_KERNEL_PROXY */ #endif /* !ICL_H */ diff --git a/sys/dev/iscsi/icl_soft.c b/sys/dev/iscsi/icl_soft.c index a48ad53d70d7..001c595af7ec 100644 --- a/sys/dev/iscsi/icl_soft.c +++ b/sys/dev/iscsi/icl_soft.c @@ -1,1562 +1,1565 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Software implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct icl_soft_pdu { struct icl_pdu ip; /* soft specific stuff goes here. */ u_int ref_cnt; icl_pdu_cb cb; int error; }; SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Software iSCSI"); static int coalesce = 1; SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 256 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int max_data_segment_length = 256 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, &max_data_segment_length, 0, "Maximum data segment length"); static int first_burst_length = 1024 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, &first_burst_length, 0, "First burst length"); static int max_burst_length = 1024 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, &max_burst_length, 0, "Maximum burst length"); static int sendspace = 1536 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1536 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); static uma_zone_t icl_soft_pdu_zone; static volatile u_int icl_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) STAILQ_HEAD(icl_pdu_stailq, icl_pdu); static icl_conn_new_pdu_t icl_soft_conn_new_pdu; static icl_conn_pdu_free_t icl_soft_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_soft_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; static icl_conn_handoff_t icl_soft_conn_handoff; static icl_conn_free_t icl_soft_conn_free; static icl_conn_close_t icl_soft_conn_close; static icl_conn_task_setup_t icl_soft_conn_task_setup; static icl_conn_task_done_t icl_soft_conn_task_done; static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; static icl_conn_transfer_done_t icl_soft_conn_transfer_done; #ifdef ICL_KERNEL_PROXY static icl_conn_connect_t icl_soft_conn_connect; #endif static kobj_method_t icl_soft_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_soft_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), KOBJMETHOD(icl_conn_free, icl_soft_conn_free), KOBJMETHOD(icl_conn_close, icl_soft_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), #ifdef ICL_KERNEL_PROXY KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), #endif { 0, 0 } }; DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); static void icl_conn_fail(struct icl_conn *ic) { if (ic->ic_socket == NULL) return; /* * XXX */ ic->ic_socket->so_error = EDOOFUS; (ic->ic_error)(ic); } static void icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); m_freem(ip->ip_bhs_mbuf); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); uma_zfree(icl_soft_pdu_zone, isp); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif } static void icl_soft_pdu_call_cb(struct icl_pdu *ip) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; if (isp->cb != NULL) isp->cb(ip, isp->error); #ifdef DIAGNOSTIC refcount_release(&ip->ip_conn->ic_outstanding_pdus); #endif uma_zfree(icl_soft_pdu_zone, isp); } static void icl_soft_pdu_done(struct icl_pdu *ip, int error) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; if (error != 0) isp->error = error; m_freem(ip->ip_bhs_mbuf); ip->ip_bhs_mbuf = NULL; m_freem(ip->ip_ahs_mbuf); ip->ip_ahs_mbuf = NULL; m_freem(ip->ip_data_mbuf); ip->ip_data_mbuf = NULL; if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) icl_soft_pdu_call_cb(ip); } static void icl_soft_mbuf_done(struct mbuf *mb) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; icl_soft_pdu_call_cb(&isp->ip); } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_soft_pdu *isp; struct icl_pdu *ip; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); if (isp == NULL) { ICL_WARN("failed to allocate soft PDU"); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif return (NULL); } ip = &isp->ip; ip->ip_conn = ic; CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); if (ip->ip_bhs_mbuf == NULL) { ICL_WARN("failed to allocate BHS mbuf"); icl_soft_conn_pdu_free(ic, ip); return (NULL); } ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); return (ip); } static int icl_pdu_ahs_length(const struct icl_pdu *request) { return (request->ip_bhs->bhs_total_ahs_len * 4); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static void icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) { response->ip_bhs->bhs_data_segment_len[2] = len; response->ip_bhs->bhs_data_segment_len[1] = len >> 8; response->ip_bhs->bhs_data_segment_len[0] = len >> 16; } static size_t icl_pdu_padding(const struct icl_pdu *ip) { if ((ip->ip_data_len % 4) != 0) return (4 - (ip->ip_data_len % 4)); return (0); } static size_t icl_pdu_size(const struct icl_pdu *response) { size_t len; KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); len = sizeof(struct iscsi_bhs) + response->ip_data_len + icl_pdu_padding(response); if (response->ip_conn->ic_header_crc32c) len += ISCSI_HEADER_DIGEST_SIZE; if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) len += ISCSI_DATA_DIGEST_SIZE; return (len); } static void icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s) { m_copydata(*r, 0, s, buf); m_adj(*r, s); while ((*r) != NULL && (*r)->m_len == 0) *r = m_free(*r); *rs -= s; } static void icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs) { request->ip_ahs_len = icl_pdu_ahs_length(request); if (request->ip_ahs_len == 0) return; request->ip_ahs_mbuf = *r; *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK); *rs -= request->ip_ahs_len; } static uint32_t icl_mbuf_to_crc32c(const struct mbuf *m0) { uint32_t digest = 0xffffffff; const struct mbuf *m; for (m = m0; m != NULL; m = m->m_next) digest = calculate_crc32c(digest, mtod(m, const void *), m->m_len); digest = digest ^ 0xffffffff; return (digest); } static int icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) { uint32_t received_digest, valid_digest; if (request->ip_conn->ic_header_crc32c == false) return (0); CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE); /* Temporary attach AHS to BHS to calculate header digest. */ request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); request->ip_bhs_mbuf->m_next = NULL; if (received_digest != valid_digest) { ICL_WARN("header digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Return the number of bytes that should be waiting in the receive socket * before icl_pdu_receive_data_segment() gets called. */ static size_t icl_pdu_data_segment_receive_len(const struct icl_pdu *request) { size_t len; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); /* * Account for the parts of data segment already read from * the socket buffer. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; /* * Don't always wait for the full data segment to be delivered * to the socket; this might badly affect performance due to * TCP window scaling. */ if (len > partial_receive_len) { #if 0 ICL_DEBUG("need %zd bytes of data, limiting to %zd", len, partial_receive_len)); #endif len = partial_receive_len; return (len); } /* * Account for padding. Note that due to the way code is written, * the icl_pdu_receive_data_segment() must always receive padding * along with the last part of data segment, because it would be * impossible to tell whether we've already received the full data * segment including padding, or without it. */ if ((len % 4) != 0) len += 4 - (len % 4); #if 0 ICL_DEBUG("need %zd bytes of data", len)); #endif return (len); } static int icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r, size_t *rs, bool *more_neededp) { struct icl_conn *ic; size_t len, padding = 0; struct mbuf *m; ic = request->ip_conn; *more_neededp = false; ic->ic_receive_len = 0; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); if ((len % 4) != 0) padding = 4 - (len % 4); /* * Account for already received parts of data segment. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; if (len + padding > *rs) { /* * Not enough data in the socket buffer. Receive as much * as we can. Don't receive padding, since, obviously, it's * not the end of data segment yet. */ #if 0 ICL_DEBUG("limited from %zd to %zd", len + padding, *rs - padding)); #endif len = *rs - padding; *more_neededp = true; padding = 0; } /* * Must not try to receive padding without at least one byte * of actual data segment. */ if (len > 0) { m = *r; *r = m_split(m, len + padding, M_WAITOK); *rs -= len + padding; if (request->ip_data_mbuf == NULL) request->ip_data_mbuf = m; else m_cat(request->ip_data_mbuf, m); request->ip_data_len += len; } else ICL_DEBUG("len 0"); if (*more_neededp) ic->ic_receive_len = icl_pdu_data_segment_receive_len(request); return (0); } static int icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) { uint32_t received_digest, valid_digest; if (request->ip_conn->ic_data_crc32c == false) return (0); if (request->ip_data_len == 0) return (0); CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE); /* * Note that ip_data_mbuf also contains padding; since digest * calculation is supposed to include that, we iterate over * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. */ valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); if (received_digest != valid_digest) { ICL_WARN("data digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Somewhat contrary to the name, this attempts to receive only one * "part" of PDU at a time; call it repeatedly until it returns non-NULL. */ static struct icl_pdu * icl_conn_receive_pdu(struct icl_conn *ic, struct mbuf **r, size_t *rs) { struct icl_pdu *request; size_t len; int error = 0; bool more_needed; if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { KASSERT(ic->ic_receive_pdu == NULL, ("ic->ic_receive_pdu != NULL")); request = icl_soft_conn_new_pdu(ic, M_NOWAIT); if (request == NULL) { ICL_DEBUG("failed to allocate PDU; " "dropping connection"); icl_conn_fail(ic); return (NULL); } ic->ic_receive_pdu = request; } else { KASSERT(ic->ic_receive_pdu != NULL, ("ic->ic_receive_pdu == NULL")); request = ic->ic_receive_pdu; } switch (ic->ic_receive_state) { case ICL_CONN_STATE_BHS: //ICL_DEBUG("receiving BHS"); icl_soft_receive_buf(r, rs, request->ip_bhs, sizeof(struct iscsi_bhs)); /* * We don't enforce any limit for AHS length; * its length is stored in 8 bit field. */ len = icl_pdu_data_segment_length(request); - if (len > ic->ic_max_data_segment_length) { + if (len > ic->ic_max_recv_data_segment_length) { ICL_WARN("received data segment " "length %zd is larger than negotiated; " "dropping connection", len); error = EINVAL; break; } ic->ic_receive_state = ICL_CONN_STATE_AHS; ic->ic_receive_len = icl_pdu_ahs_length(request); break; case ICL_CONN_STATE_AHS: //ICL_DEBUG("receiving AHS"); icl_pdu_receive_ahs(request, r, rs); ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; if (ic->ic_header_crc32c == false) ic->ic_receive_len = 0; else ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; break; case ICL_CONN_STATE_HEADER_DIGEST: //ICL_DEBUG("receiving header digest"); error = icl_pdu_check_header_digest(request, r, rs); if (error != 0) { ICL_DEBUG("header digest failed; " "dropping connection"); break; } ic->ic_receive_state = ICL_CONN_STATE_DATA; ic->ic_receive_len = icl_pdu_data_segment_receive_len(request); break; case ICL_CONN_STATE_DATA: //ICL_DEBUG("receiving data segment"); error = icl_pdu_receive_data_segment(request, r, rs, &more_needed); if (error != 0) { ICL_DEBUG("failed to receive data segment;" "dropping connection"); break; } if (more_needed) break; ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) ic->ic_receive_len = 0; else ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; break; case ICL_CONN_STATE_DATA_DIGEST: //ICL_DEBUG("receiving data digest"); error = icl_pdu_check_data_digest(request, r, rs); if (error != 0) { ICL_DEBUG("data digest failed; " "dropping connection"); break; } /* * We've received complete PDU; reset the receive state machine * and return the PDU. */ ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_receive_pdu = NULL; return (request); default: panic("invalid ic_receive_state %d\n", ic->ic_receive_state); } if (error != 0) { /* * Don't free the PDU; it's pointed to by ic->ic_receive_pdu * and will get freed in icl_soft_conn_close(). */ icl_conn_fail(ic); } return (NULL); } static void icl_conn_receive_pdus(struct icl_conn *ic, struct mbuf **r, size_t *rs) { struct icl_pdu *response; for (;;) { if (ic->ic_disconnecting) return; /* * Loop until we have a complete PDU or there is not enough * data in the socket buffer. */ if (*rs < ic->ic_receive_len) { #if 0 ICL_DEBUG("not enough data; have %zd, need %zd", *rs, ic->ic_receive_len); #endif return; } response = icl_conn_receive_pdu(ic, r, rs); if (response == NULL) continue; if (response->ip_ahs_len > 0) { ICL_WARN("received PDU with unsupported " "AHS; opcode 0x%x; dropping connection", response->ip_bhs->bhs_opcode); icl_soft_conn_pdu_free(ic, response); icl_conn_fail(ic); return; } (ic->ic_receive)(response); } } static void icl_receive_thread(void *arg) { struct icl_conn *ic; size_t available, read = 0; struct socket *so; struct mbuf *m, *r = NULL; struct uio uio; int error, flags; ic = arg; so = ic->ic_socket; for (;;) { SOCKBUF_LOCK(&so->so_rcv); if (ic->ic_disconnecting) { SOCKBUF_UNLOCK(&so->so_rcv); break; } /* * Set the low watermark, to be checked by * soreadable() in icl_soupcall_receive() * to avoid unnecessary wakeups until there * is enough data received to read the PDU. */ available = sbavail(&so->so_rcv); if (read + available < ic->ic_receive_len) { so->so_rcv.sb_lowat = ic->ic_receive_len - read; cv_wait(&ic->ic_receive_cv, SOCKBUF_MTX(&so->so_rcv)); so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; available = sbavail(&so->so_rcv); } SOCKBUF_UNLOCK(&so->so_rcv); if (available == 0) { if (so->so_error != 0) { ICL_DEBUG("connection error %d; " "dropping connection", so->so_error); icl_conn_fail(ic); break; } continue; } memset(&uio, 0, sizeof(uio)); uio.uio_resid = available; flags = MSG_DONTWAIT; error = soreceive(so, NULL, &uio, &m, NULL, &flags); if (error != 0) { ICL_DEBUG("soreceive error %d", error); break; } if (uio.uio_resid != 0) { m_freem(m); ICL_DEBUG("short read"); break; } if (r) m_cat(r, m); else r = m; read += available; icl_conn_receive_pdus(ic, &r, &read); } if (r) m_freem(r); ICL_CONN_LOCK(ic); ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_receive(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!soreadable(so)) return (SU_OK); ic = arg; cv_signal(&ic->ic_receive_cv); return (SU_OK); } static int icl_pdu_finalize(struct icl_pdu *request) { size_t padding, pdu_len; uint32_t digest, zero = 0; int ok; struct icl_conn *ic; ic = request->ip_conn; icl_pdu_set_data_segment_length(request, request->ip_data_len); pdu_len = icl_pdu_size(request); if (ic->ic_header_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); ok = m_append(request->ip_bhs_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append header digest"); return (1); } } if (request->ip_data_len != 0) { padding = icl_pdu_padding(request); if (padding > 0) { ok = m_append(request->ip_data_mbuf, padding, (void *)&zero); if (ok != 1) { ICL_WARN("failed to append padding"); return (1); } } if (ic->ic_data_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); ok = m_append(request->ip_data_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append data digest"); return (1); } } m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); request->ip_data_mbuf = NULL; } request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; return (0); } static void icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) { struct icl_pdu *request, *request2; struct mbuf *m; struct socket *so; long available, size, size2; int coalesced, error; ICL_CONN_LOCK_ASSERT_NOT(ic); so = ic->ic_socket; SOCKBUF_LOCK(&so->so_snd); /* * Check how much space do we have for transmit. We can't just * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, * as it always frees the mbuf chain passed to it, even in case * of error. */ available = sbspace(&so->so_snd); ic->ic_check_send_space = false; /* * Notify the socket upcall that we don't need wakeups * for the time being. */ so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; SOCKBUF_UNLOCK(&so->so_snd); while (!STAILQ_EMPTY(queue)) { request = STAILQ_FIRST(queue); size = icl_pdu_size(request); if (available < size) { /* * Set the low watermark, to be checked by * sowriteable() in icl_soupcall_send() * to avoid unnecessary wakeups until there * is enough space for the PDU to fit. */ SOCKBUF_LOCK(&so->so_snd); available = sbspace(&so->so_snd); if (available < size) { #if 1 ICL_DEBUG("no space to send; " "have %ld, need %ld", available, size); #endif so->so_snd.sb_lowat = max(size, so->so_snd.sb_hiwat / 8); SOCKBUF_UNLOCK(&so->so_snd); return; } SOCKBUF_UNLOCK(&so->so_snd); } STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); icl_soft_pdu_done(request, EIO); icl_conn_fail(ic); return; } if (coalesce) { m = request->ip_bhs_mbuf; for (coalesced = 1; ; coalesced++) { request2 = STAILQ_FIRST(queue); if (request2 == NULL) break; size2 = icl_pdu_size(request2); if (available < size + size2) break; STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request2); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); icl_soft_pdu_done(request, EIO); icl_soft_pdu_done(request2, EIO); icl_conn_fail(ic); return; } while (m->m_next) m = m->m_next; m_cat(m, request2->ip_bhs_mbuf); request2->ip_bhs_mbuf = NULL; request->ip_bhs_mbuf->m_pkthdr.len += size2; size += size2; icl_soft_pdu_done(request2, 0); } #if 0 if (coalesced > 1) { ICL_DEBUG("coalesced %d PDUs into %ld bytes", coalesced, size); } #endif } available -= size; error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, NULL, MSG_DONTWAIT, curthread); request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ if (error != 0) { ICL_DEBUG("failed to send PDU, error %d; " "dropping connection", error); icl_soft_pdu_done(request, error); icl_conn_fail(ic); return; } icl_soft_pdu_done(request, 0); } } static void icl_send_thread(void *arg) { struct icl_conn *ic; struct icl_pdu_stailq queue; ic = arg; STAILQ_INIT(&queue); ICL_CONN_LOCK(ic); for (;;) { for (;;) { /* * Populate the local queue from the main one. * This way the icl_conn_send_pdus() can go through * all the queued PDUs without holding any locks. */ if (STAILQ_EMPTY(&queue) || ic->ic_check_send_space) STAILQ_CONCAT(&queue, &ic->ic_to_send); ICL_CONN_UNLOCK(ic); icl_conn_send_pdus(ic, &queue); ICL_CONN_LOCK(ic); /* * The icl_soupcall_send() was called since the last * call to sbspace(); go around; */ if (ic->ic_check_send_space) continue; /* * Local queue is empty, but we still have PDUs * in the main one; go around. */ if (STAILQ_EMPTY(&queue) && !STAILQ_EMPTY(&ic->ic_to_send)) continue; /* * There might be some stuff in the local queue, * which didn't get sent due to not having enough send * space. Wait for socket upcall. */ break; } if (ic->ic_disconnecting) { //ICL_DEBUG("terminating"); break; } cv_wait(&ic->ic_send_cv, ic->ic_lock); } /* * We're exiting; move PDUs back to the main queue, so they can * get freed properly. At this point ordering doesn't matter. */ STAILQ_CONCAT(&ic->ic_to_send, &queue); ic->ic_send_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_send(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!sowriteable(so)) return (SU_OK); ic = arg; ICL_CONN_LOCK(ic); ic->ic_check_send_space = true; ICL_CONN_UNLOCK(ic); cv_signal(&ic->ic_send_cv); return (SU_OK); } static int icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; struct mbuf *mb, *newmb; size_t copylen, off = 0; KASSERT(len > 0, ("len == 0")); if (flags & ICL_NOCOPY) { newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); if (newmb == NULL) { ICL_WARN("failed to allocate mbuf"); return (ENOMEM); } newmb->m_flags |= M_RDONLY; m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, icl_soft_mbuf_done, isp, NULL); newmb->m_len = len; } else { newmb = m_getm2(NULL, len, flags, MT_DATA, 0); if (newmb == NULL) { ICL_WARN("failed to allocate mbuf for %zd bytes", len); return (ENOMEM); } for (mb = newmb; mb != NULL; mb = mb->m_next) { copylen = min(M_TRAILINGSPACE(mb), len - off); memcpy(mtod(mb, char *), (const char *)addr + off, copylen); mb->m_len = copylen; off += copylen; } KASSERT(off == len, ("%s: off != len", __func__)); } if (request->ip_data_mbuf == NULL) { request->ip_data_mbuf = newmb; request->ip_data_len = len; } else { m_cat(request->ip_data_mbuf, newmb); request->ip_data_len += len; } return (0); } void icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { m_copydata(ip->ip_data_mbuf, off, len, addr); } static void icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { icl_soft_conn_pdu_queue_cb(ic, ip, NULL); } static void icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, icl_pdu_cb cb) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; ICL_CONN_LOCK_ASSERT(ic); isp->ref_cnt++; isp->cb = cb; if (ic->ic_disconnecting || ic->ic_socket == NULL) { ICL_DEBUG("icl_pdu_queue on closed connection"); icl_soft_pdu_done(ip, ENOTCONN); return; } if (!STAILQ_EMPTY(&ic->ic_to_send)) { STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); /* * If the queue is not empty, someone else had already * signaled the send thread; no need to do that again, * just return. */ return; } STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); cv_signal(&ic->ic_send_cv); } static struct icl_conn * icl_soft_new_conn(const char *name, struct mtx *lock) { struct icl_conn *ic; refcount_acquire(&icl_ncons); ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); STAILQ_INIT(&ic->ic_to_send); ic->ic_lock = lock; cv_init(&ic->ic_send_cv, "icl_tx"); cv_init(&ic->ic_receive_cv, "icl_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif - ic->ic_max_data_segment_length = max_data_segment_length; ic->ic_name = name; ic->ic_offload = "None"; ic->ic_unmapped = false; return (ic); } void icl_soft_conn_free(struct icl_conn *ic) { #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); kobj_delete((struct kobj *)ic, M_ICL_SOFT); refcount_release(&icl_ncons); } static int icl_conn_start(struct icl_conn *ic) { size_t minspace; struct sockopt opt; int error, one = 1; ICL_CONN_LOCK(ic); /* * XXX: Ugly hack. */ if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (EINVAL); } ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_disconnecting = false; ICL_CONN_UNLOCK(ic); /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. */ - minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + + minspace = sizeof(struct iscsi_bhs) + + ic->ic_max_send_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (sendspace < minspace) { ICL_WARN("kern.icl.sendspace too low; must be at least %zd", minspace); sendspace = minspace; } + minspace = sizeof(struct iscsi_bhs) + + ic->ic_max_recv_data_segment_length + + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (recvspace < minspace) { ICL_WARN("kern.icl.recvspace too low; must be at least %zd", minspace); recvspace = minspace; } error = soreserve(ic->ic_socket, sendspace, recvspace); if (error != 0) { ICL_WARN("soreserve failed with error %d", error); icl_soft_conn_close(ic); return (error); } ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(ic->ic_socket, &opt); if (error != 0) { ICL_WARN("disabling TCP_NODELAY failed with error %d", error); icl_soft_conn_close(ic); return (error); } /* * Register socket upcall, to get notified about incoming PDUs * and free space to send outgoing ones. */ SOCKBUF_LOCK(&ic->ic_socket->so_snd); soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); /* * Start threads. */ ICL_CONN_LOCK(ic); ic->ic_send_running = ic->ic_receive_running = true; ICL_CONN_UNLOCK(ic); error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); ic->ic_send_running = ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } return (0); } int icl_soft_conn_handoff(struct icl_conn *ic, int fd) { struct file *fp; struct socket *so; cap_rights_t rights; int error; ICL_CONN_LOCK_ASSERT_NOT(ic); #ifdef ICL_KERNEL_PROXY /* * We're transitioning to Full Feature phase, and we don't * really care. */ if (fd == 0) { ICL_CONN_LOCK(ic); if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); ICL_WARN("proxy handoff without connect"); return (EINVAL); } ICL_CONN_UNLOCK(ic); return (0); } #endif /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_socket = fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } void icl_soft_conn_close(struct icl_conn *ic) { struct icl_pdu *pdu; struct socket *so; /* * Wake up the threads, so they can properly terminate. * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock. */ ICL_CONN_LOCK(ic); if (!ic->ic_disconnecting) { so = ic->ic_socket; if (so) SOCKBUF_LOCK(&so->so_rcv); ic->ic_disconnecting = true; if (so) SOCKBUF_UNLOCK(&so->so_rcv); } while (ic->ic_receive_running || ic->ic_send_running) { cv_signal(&ic->ic_receive_cv); cv_signal(&ic->ic_send_cv); cv_wait(&ic->ic_send_cv, ic->ic_lock); } /* Some other thread could close the connection same time. */ so = ic->ic_socket; if (so == NULL) { ICL_CONN_UNLOCK(ic); return; } ic->ic_socket = NULL; /* * Deregister socket upcalls. */ ICL_CONN_UNLOCK(ic); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_upcall != NULL) soupcall_clear(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_upcall != NULL) soupcall_clear(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); soclose(so); ICL_CONN_LOCK(ic); if (ic->ic_receive_pdu != NULL) { //ICL_DEBUG("freeing partially received PDU"); icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu); ic->ic_receive_pdu = NULL; } /* * Remove any outstanding PDUs from the send queue. */ while (!STAILQ_EMPTY(&ic->ic_to_send)) { pdu = STAILQ_FIRST(&ic->ic_to_send); STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); icl_soft_pdu_done(pdu, ENOTCONN); } KASSERT(STAILQ_EMPTY(&ic->ic_to_send), ("destroying session with non-empty send queue")); ICL_CONN_UNLOCK(ic); } int icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { return (0); } void icl_soft_conn_task_done(struct icl_conn *ic, void *prv) { } int icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { return (0); } void icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) { } static int icl_soft_limits(struct icl_drv_limits *idl) { idl->idl_max_recv_data_segment_length = max_data_segment_length; idl->idl_max_send_data_segment_length = max_data_segment_length; idl->idl_max_burst_length = max_burst_length; idl->idl_first_burst_length = first_burst_length; return (0); } #ifdef ICL_KERNEL_PROXY int icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) { return (icl_soft_proxy_connect(ic, domain, socktype, protocol, from_sa, to_sa)); } int icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) { int error; ICL_CONN_LOCK_ASSERT_NOT(ic); if (so->so_type != SOCK_STREAM) return (EINVAL); ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); return (EBUSY); } ic->ic_socket = so; ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } #endif /* ICL_KERNEL_PROXY */ static int icl_soft_load(void) { int error; icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_ncons, 0); /* * The reason we call this "none" is that to the user, * it's known as "offload driver"; "offload driver: soft" * doesn't make much sense. */ error = icl_register("none", false, 0, icl_soft_limits, icl_soft_new_conn); KASSERT(error == 0, ("failed to register")); #if defined(ICL_KERNEL_PROXY) && 0 /* * Debugging aid for kernel proxy functionality. */ error = icl_register("proxytest", true, 0, icl_soft_limits, icl_soft_new_conn); KASSERT(error == 0, ("failed to register")); #endif return (error); } static int icl_soft_unload(void) { if (icl_ncons != 0) return (EBUSY); icl_unregister("none", false); #if defined(ICL_KERNEL_PROXY) && 0 icl_unregister("proxytest", true); #endif uma_zdestroy(icl_soft_pdu_zone); return (0); } static int icl_soft_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_soft_load()); case MOD_UNLOAD: return (icl_soft_unload()); default: return (EINVAL); } } moduledata_t icl_soft_data = { "icl_soft", icl_soft_modevent, 0 }; DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_soft, icl, 1, 1, 1); MODULE_VERSION(icl_soft, 1); diff --git a/sys/dev/iscsi/iscsi.c b/sys/dev/iscsi/iscsi.c index 13a35c371c40..7ddb5a9ce1ec 100644 --- a/sys/dev/iscsi/iscsi.c +++ b/sys/dev/iscsi/iscsi.c @@ -1,2636 +1,2636 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ICL_KERNEL_PROXY #include #endif #ifdef ICL_KERNEL_PROXY FEATURE(iscsi_kernel_proxy, "iSCSI initiator built with ICL_KERNEL_PROXY"); #endif /* * XXX: This is global so the iscsi_unload() can access it. * Think about how to do this properly. */ static struct iscsi_softc *sc; SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iSCSI initiator"); static int debug = 1; SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 0, "Enable debug messages"); static int ping_timeout = 5; SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout, 0, "Timeout for ping (NOP-Out) requests, in seconds"); static int iscsid_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RWTUN, &iscsid_timeout, 0, "Time to wait for iscsid(8) to handle reconnection, in seconds"); static int login_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout, 0, "Time to wait for iscsid(8) to finish Login Phase, in seconds"); static int maxtags = 255; SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, 0, "Max number of IO requests queued"); static int fail_on_disconnection = 0; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, &fail_on_disconnection, 0, "Destroy CAM SIM on connection failure"); static int fail_on_shutdown = 1; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_shutdown, CTLFLAG_RWTUN, &fail_on_shutdown, 0, "Fail disconnected sessions on shutdown"); static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator"); static uma_zone_t iscsi_outstanding_zone; #define CONN_SESSION(X) ((struct iscsi_session *)X->ic_prv0) #define PDU_SESSION(X) (CONN_SESSION(X->ip_conn)) #define ISCSI_DEBUG(X, ...) \ do { \ if (debug > 1) \ printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ } while (0) #define ISCSI_WARN(X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_DEBUG(S, X, ...) \ do { \ if (debug > 1) { \ printf("%s: %s (%s): " X "\n", \ __func__, S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_WARN(S, X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s (%s): " X "\n", \ S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_LOCK(X) mtx_lock(&X->is_lock) #define ISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->is_lock) #define ISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->is_lock, MA_OWNED) #define ISCSI_SESSION_LOCK_ASSERT_NOT(X) mtx_assert(&X->is_lock, MA_NOTOWNED) static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td); static struct cdevsw iscsi_cdevsw = { .d_version = D_VERSION, .d_ioctl = iscsi_ioctl, .d_name = "iscsi", }; static void iscsi_pdu_queue_locked(struct icl_pdu *request); static void iscsi_pdu_queue(struct icl_pdu *request); static void iscsi_pdu_update_statsn(const struct icl_pdu *response); static void iscsi_pdu_handle_nop_in(struct icl_pdu *response); static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response); static void iscsi_pdu_handle_task_response(struct icl_pdu *response); static void iscsi_pdu_handle_data_in(struct icl_pdu *response); static void iscsi_pdu_handle_logout_response(struct icl_pdu *response); static void iscsi_pdu_handle_r2t(struct icl_pdu *response); static void iscsi_pdu_handle_async_message(struct icl_pdu *response); static void iscsi_pdu_handle_reject(struct icl_pdu *response); static void iscsi_session_reconnect(struct iscsi_session *is); static void iscsi_session_terminate(struct iscsi_session *is); static void iscsi_action(struct cam_sim *sim, union ccb *ccb); static struct iscsi_outstanding *iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag); static struct iscsi_outstanding *iscsi_outstanding_add(struct iscsi_session *is, struct icl_pdu *request, union ccb *ccb, uint32_t *initiator_task_tagp); static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io); static bool iscsi_pdu_prepare(struct icl_pdu *request) { struct iscsi_session *is; struct iscsi_bhs_scsi_command *bhssc; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the request * (initiator -> target) PDUs. */ bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; /* * Data-Out PDU does not contain CmdSN. */ if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { if (ISCSI_SNGT(is->is_cmdsn, is->is_maxcmdsn) && (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) { /* * Current MaxCmdSN prevents us from sending any more * SCSI Command PDUs to the target; postpone the PDU. * It will get resent by either iscsi_pdu_queue(), * or by maintenance thread. */ #if 0 ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %u, " "ExpCmdSN %u, MaxCmdSN %u, opcode 0x%x", is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn, bhssc->bhssc_opcode); #endif return (true); } bhssc->bhssc_cmdsn = htonl(is->is_cmdsn); if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) is->is_cmdsn++; } bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1); return (false); } static void iscsi_session_send_postponed(struct iscsi_session *is) { struct icl_pdu *request; bool postpone; ISCSI_SESSION_LOCK_ASSERT(is); if (STAILQ_EMPTY(&is->is_postponed)) return; while ((request = STAILQ_FIRST(&is->is_postponed)) != NULL) { postpone = iscsi_pdu_prepare(request); if (postpone) return; STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_queue(request); } xpt_release_simq(is->is_sim, 1); } static void iscsi_pdu_queue_locked(struct icl_pdu *request) { struct iscsi_session *is; bool postpone; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); iscsi_session_send_postponed(is); postpone = iscsi_pdu_prepare(request); if (postpone) { if (STAILQ_EMPTY(&is->is_postponed)) xpt_freeze_simq(is->is_sim, 1); STAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next); return; } icl_pdu_queue(request); } static void iscsi_pdu_queue(struct icl_pdu *request) { struct iscsi_session *is; is = PDU_SESSION(request); ISCSI_SESSION_LOCK(is); iscsi_pdu_queue_locked(request); ISCSI_SESSION_UNLOCK(is); } static void iscsi_session_logout(struct iscsi_session *is) { struct icl_pdu *request; struct iscsi_bhs_logout_request *bhslr; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) return; bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST; bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION; iscsi_pdu_queue_locked(request); } static void iscsi_session_terminate_task(struct iscsi_session *is, struct iscsi_outstanding *io, cam_status status) { ISCSI_SESSION_LOCK_ASSERT(is); if (io->io_ccb != NULL) { io->io_ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK); io->io_ccb->ccb_h.status |= status; if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN; xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } xpt_done(io->io_ccb); } iscsi_outstanding_remove(is, io); } static void iscsi_session_terminate_tasks(struct iscsi_session *is, cam_status status) { struct iscsi_outstanding *io, *tmp; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) { iscsi_session_terminate_task(is, io, status); } } static void iscsi_session_cleanup(struct iscsi_session *is, bool destroy_sim) { struct icl_pdu *pdu; ISCSI_SESSION_LOCK_ASSERT(is); /* * Don't queue any new PDUs. */ if (is->is_sim != NULL && is->is_simq_frozen == false) { ISCSI_SESSION_DEBUG(is, "freezing"); xpt_freeze_simq(is->is_sim, 1); is->is_simq_frozen = true; } /* * Remove postponed PDUs. */ if (!STAILQ_EMPTY(&is->is_postponed)) xpt_release_simq(is->is_sim, 1); while ((pdu = STAILQ_FIRST(&is->is_postponed)) != NULL) { STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_free(pdu); } if (destroy_sim == false) { /* * Terminate SCSI tasks, asking CAM to requeue them. */ iscsi_session_terminate_tasks(is, CAM_REQUEUE_REQ); return; } iscsi_session_terminate_tasks(is, CAM_DEV_NOT_THERE); if (is->is_sim == NULL) return; ISCSI_SESSION_DEBUG(is, "deregistering SIM"); xpt_async(AC_LOST_DEVICE, is->is_path, NULL); if (is->is_simq_frozen) { is->is_simq_frozen = false; xpt_release_simq(is->is_sim, 1); } xpt_free_path(is->is_path); is->is_path = NULL; xpt_bus_deregister(cam_sim_path(is->is_sim)); cam_sim_free(is->is_sim, TRUE /*free_devq*/); is->is_sim = NULL; is->is_devq = NULL; } static void iscsi_maintenance_thread_reconnect(struct iscsi_session *is) { icl_conn_close(is->is_conn); ISCSI_SESSION_LOCK(is); is->is_connected = false; is->is_reconnecting = false; is->is_login_phase = false; #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif if (fail_on_disconnection) { ISCSI_SESSION_DEBUG(is, "connection failed, destroying devices"); iscsi_session_cleanup(is, true); } else { iscsi_session_cleanup(is, false); } KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); if (is->is_conf.isc_enable == 0 && is->is_conf.isc_discovery == 0) { ISCSI_SESSION_UNLOCK(is); return; } /* * Request immediate reconnection from iscsid(8). */ //ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)"); is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_softc->sc_cv); } static void iscsi_maintenance_thread_terminate(struct iscsi_session *is) { struct iscsi_softc *sc; sc = is->is_softc; sx_xlock(&sc->sc_lock); TAILQ_REMOVE(&sc->sc_sessions, is, is_next); sx_xunlock(&sc->sc_lock); icl_conn_close(is->is_conn); callout_drain(&is->is_callout); ISCSI_SESSION_LOCK(is); KASSERT(is->is_terminating, ("is_terminating == false")); #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif iscsi_session_cleanup(is, true); KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); ISCSI_SESSION_UNLOCK(is); icl_conn_free(is->is_conn); mtx_destroy(&is->is_lock); cv_destroy(&is->is_maintenance_cv); #ifdef ICL_KERNEL_PROXY cv_destroy(&is->is_login_cv); #endif ISCSI_SESSION_DEBUG(is, "terminated"); free(is, M_ISCSI); /* * The iscsi_unload() routine might be waiting. */ cv_signal(&sc->sc_cv); } static void iscsi_maintenance_thread(void *arg) { struct iscsi_session *is = arg; ISCSI_SESSION_LOCK(is); for (;;) { if (is->is_reconnecting == false && is->is_terminating == false && (STAILQ_EMPTY(&is->is_postponed) || ISCSI_SNGT(is->is_cmdsn, is->is_maxcmdsn))) cv_wait(&is->is_maintenance_cv, &is->is_lock); /* Terminate supersedes reconnect. */ if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_terminate(is); kthread_exit(); return; } if (is->is_reconnecting) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_reconnect(is); ISCSI_SESSION_LOCK(is); continue; } iscsi_session_send_postponed(is); } ISCSI_SESSION_UNLOCK(is); } static void iscsi_session_reconnect(struct iscsi_session *is) { /* * XXX: We can't use locking here, because * it's being called from various contexts. * Hope it doesn't break anything. */ if (is->is_reconnecting) return; is->is_reconnecting = true; cv_signal(&is->is_maintenance_cv); } static void iscsi_session_terminate(struct iscsi_session *is) { if (is->is_terminating) return; is->is_terminating = true; #if 0 iscsi_session_logout(is); #endif cv_signal(&is->is_maintenance_cv); } static void iscsi_callout(void *context) { struct icl_pdu *request; struct iscsi_bhs_nop_out *bhsno; struct iscsi_session *is; bool reconnect_needed = false; is = context; ISCSI_SESSION_LOCK(is); if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); return; } callout_schedule(&is->is_callout, 1 * hz); if (is->is_conf.isc_enable == 0) goto out; is->is_timeout++; if (is->is_waiting_for_iscsid) { if (iscsid_timeout > 0 && is->is_timeout > iscsid_timeout) { ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) " "for %d seconds; reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (is->is_login_phase) { if (login_timeout > 0 && is->is_timeout > login_timeout) { ISCSI_SESSION_WARN(is, "login timed out after %d seconds; " "reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (ping_timeout <= 0) { /* * Pings are disabled. Don't send NOP-Out in this case. * Reset the timeout, to avoid triggering reconnection, * should the user decide to reenable them. */ is->is_timeout = 0; goto out; } if (is->is_timeout >= ping_timeout) { ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; " "reconnecting", ping_timeout); reconnect_needed = true; goto out; } ISCSI_SESSION_UNLOCK(is); /* * If the ping was reset less than one second ago - which means * that we've received some PDU during the last second - assume * the traffic flows correctly and don't bother sending a NOP-Out. * * (It's 2 - one for one second, and one for incrementing is_timeout * earlier in this routine.) */ if (is->is_timeout < 2) return; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate PDU"); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_target_transfer_tag = 0xffffffff; iscsi_pdu_queue(request); return; out: if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); return; } ISCSI_SESSION_UNLOCK(is); if (reconnect_needed) iscsi_session_reconnect(is); } static void iscsi_pdu_update_statsn(const struct icl_pdu *response) { const struct iscsi_bhs_data_in *bhsdi; struct iscsi_session *is; uint32_t expcmdsn, maxcmdsn, statsn; is = PDU_SESSION(response); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the response * (target -> initiator) PDUs. */ bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs; /* * Ok, I lied. In case of Data-In, "The fields StatSN, Status, * and Residual Count only have meaningful content if the S bit * is set to 1", so we also need to check the bit specific for * Data-In PDU. */ if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) { statsn = ntohl(bhsdi->bhsdi_statsn); if (statsn != is->is_statsn && statsn != (is->is_statsn + 1)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_WARN(is, "PDU 0x%x StatSN %u != " "session ExpStatSN %u (or + 1); reconnecting", bhsdi->bhsdi_opcode, statsn, is->is_statsn); iscsi_session_reconnect(is); } if (ISCSI_SNGT(statsn, is->is_statsn)) is->is_statsn = statsn; } expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn); maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn); if (ISCSI_SNLT(maxcmdsn + 1, expcmdsn)) { ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u + 1 < PDU ExpCmdSN %u; ignoring", maxcmdsn, expcmdsn); } else { if (ISCSI_SNGT(maxcmdsn, is->is_maxcmdsn)) { is->is_maxcmdsn = maxcmdsn; /* * Command window increased; kick the maintanance thread * to send out postponed commands. */ if (!STAILQ_EMPTY(&is->is_postponed)) cv_signal(&is->is_maintenance_cv); } else if (ISCSI_SNLT(maxcmdsn, is->is_maxcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u < session MaxCmdSN %u; ignoring", maxcmdsn, is->is_maxcmdsn); } if (ISCSI_SNGT(expcmdsn, is->is_expcmdsn)) { is->is_expcmdsn = expcmdsn; } else if (ISCSI_SNLT(expcmdsn, is->is_expcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU ExpCmdSN %u < session ExpCmdSN %u; ignoring", expcmdsn, is->is_expcmdsn); } } /* * Every incoming PDU - not just NOP-In - resets the ping timer. * The purpose of the timeout is to reset the connection when it stalls; * we don't want this to happen when NOP-In or NOP-Out ends up delayed * in some queue. */ is->is_timeout = 0; } static void iscsi_receive_callback(struct icl_pdu *response) { struct iscsi_session *is; is = PDU_SESSION(response); ISCSI_SESSION_LOCK(is); iscsi_pdu_update_statsn(response); #ifdef ICL_KERNEL_PROXY if (is->is_login_phase) { if (is->is_login_pdu == NULL) is->is_login_pdu = response; else icl_pdu_free(response); ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_login_cv); return; } #endif /* * The handling routine is responsible for freeing the PDU * when it's no longer needed. */ switch (response->ip_bhs->bhs_opcode) { case ISCSI_BHS_OPCODE_NOP_IN: iscsi_pdu_handle_nop_in(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_RESPONSE: iscsi_pdu_handle_scsi_response(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_TASK_RESPONSE: iscsi_pdu_handle_task_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_DATA_IN: iscsi_pdu_handle_data_in(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE: iscsi_pdu_handle_logout_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_R2T: iscsi_pdu_handle_r2t(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_ASYNC_MESSAGE: iscsi_pdu_handle_async_message(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_REJECT: iscsi_pdu_handle_reject(response); ISCSI_SESSION_UNLOCK(is); break; default: ISCSI_SESSION_WARN(is, "received PDU with unsupported " "opcode 0x%x; reconnecting", response->ip_bhs->bhs_opcode); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); } } static void iscsi_error_callback(struct icl_conn *ic) { struct iscsi_session *is; is = CONN_SESSION(ic); ISCSI_SESSION_WARN(is, "connection error; reconnecting"); iscsi_session_reconnect(is); } static void iscsi_pdu_handle_nop_in(struct icl_pdu *response) { struct iscsi_session *is; struct iscsi_bhs_nop_out *bhsno; struct iscsi_bhs_nop_in *bhsni; struct icl_pdu *request; void *data = NULL; size_t datasize; int error; is = PDU_SESSION(response); bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; if (bhsni->bhsni_target_transfer_tag == 0xffffffff) { /* * Nothing to do; iscsi_pdu_update_statsn() already * zeroed the timeout. */ icl_pdu_free(response); return; } datasize = icl_pdu_data_segment_length(response); if (datasize > 0) { data = malloc(datasize, M_ISCSI, M_NOWAIT | M_ZERO); if (data == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } icl_pdu_get_data(response, 0, data, datasize); } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_initiator_task_tag = 0xffffffff; bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag; if (datasize > 0) { error = icl_pdu_append_data(request, data, datasize, M_NOWAIT); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } free(data, M_ISCSI); } icl_pdu_free(response); iscsi_pdu_queue_locked(request); } static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response) { struct iscsi_bhs_scsi_response *bhssr; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received; uint16_t sense_len; uint32_t resid; is = PDU_SESSION(response); bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; /* * With iSER, after getting good response we can be sure * that all the data has been successfully transferred. */ if (is->is_conn->ic_iser) { resid = ntohl(bhssr->bhssr_residual_count); if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW) { io->io_received = ccb->csio.dxfer_len - resid; } else if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_OVERFLOW) { ISCSI_SESSION_WARN(is, "overflow: target indicates %d", resid); } else { io->io_received = ccb->csio.dxfer_len; } } received = io->io_received; iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) { ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; } else if (bhssr->bhssr_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; ccb->csio.scsi_status = bhssr->bhssr_status; } csio = &ccb->csio; data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len > 0) { if (data_segment_len < sizeof(sense_len)) { ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)", data_segment_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len)); sense_len = ntohs(sense_len); #if 0 ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd", sense_len, data_segment_len); #endif if (sizeof(sense_len) + sense_len > data_segment_len) { ISCSI_SESSION_WARN(is, "truncated data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } else if (sizeof(sense_len) + sense_len < data_segment_len) ISCSI_SESSION_WARN(is, "oversize data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if (sense_len > csio->sense_len) { ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d", sense_len, csio->sense_len); sense_len = csio->sense_len; } icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len); csio->sense_resid = csio->sense_len - sense_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } out: if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW) csio->resid = ntohl(bhssr->bhssr_residual_count); if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_task_response(struct icl_pdu *response) { struct iscsi_bhs_task_management_response *bhstmr; struct iscsi_outstanding *io, *aio; struct iscsi_session *is; is = PDU_SESSION(response); bhstmr = (struct iscsi_bhs_task_management_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhstmr->bhstmr_initiator_task_tag); if (io == NULL || io->io_ccb != NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhstmr->bhstmr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } if (bhstmr->bhstmr_response != BHSTMR_RESPONSE_FUNCTION_COMPLETE) { ISCSI_SESSION_WARN(is, "task response 0x%x", bhstmr->bhstmr_response); } else { aio = iscsi_outstanding_find(is, io->io_referenced_task_tag); if (aio != NULL && aio->io_ccb != NULL) iscsi_session_terminate_task(is, aio, CAM_REQ_ABORTED); } iscsi_outstanding_remove(is, io); icl_pdu_free(response); } static void iscsi_pdu_handle_data_in(struct icl_pdu *response) { struct iscsi_bhs_data_in *bhsdi; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received, oreceived; is = PDU_SESSION(response); bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len == 0) { /* * "The sending of 0 length data segments should be avoided, * but initiators and targets MUST be able to properly receive * 0 length data segments." */ ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); return; } /* * We need to track this for security reasons - without it, malicious target * could respond to SCSI READ without sending Data-In PDUs, which would result * in read operation on the initiator side returning random kernel data. */ if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) { ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd", io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset)); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; csio = &ccb->csio; if (io->io_received + data_segment_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes " "at offset %zd, buffer is %d)", data_segment_len, io->io_received, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } oreceived = io->io_received; io->io_received += data_segment_len; received = io->io_received; if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); icl_pdu_get_data(response, 0, csio->data_ptr + oreceived, data_segment_len); /* * XXX: Check DataSN. * XXX: Check F. */ if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) == 0) { /* * Nothing more to do. */ icl_pdu_free(response); return; } //ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status); if (bhsdi->bhsdi_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; csio->scsi_status = bhsdi->bhsdi_status; } if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { csio->resid = ntohl(bhsdi->bhsdi_residual_count); if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_logout_response(struct icl_pdu *response) { ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response"); icl_pdu_free(response); } static void iscsi_pdu_handle_r2t(struct icl_pdu *response) { struct icl_pdu *request; struct iscsi_session *is; struct iscsi_bhs_r2t *bhsr2t; struct iscsi_bhs_data_out *bhsdo; struct iscsi_outstanding *io; struct ccb_scsiio *csio; size_t off, len, total_len; int error; uint32_t datasn = 0; is = PDU_SESSION(response); bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting", bhsr2t->bhsr2t_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } csio = &io->io_ccb->csio; if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) { ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } /* * XXX: Verify R2TSN. */ off = ntohl(bhsr2t->bhsr2t_buffer_offset); if (off > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid offset " "%zd, buffer is is %d; reconnecting", off, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length); if (total_len == 0 || total_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid length " "%zd, buffer is %d; reconnecting", total_len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } //ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len); for (;;) { len = total_len; - if (len > is->is_max_send_data_segment_length) - len = is->is_max_send_data_segment_length; + if (len > is->is_conn->ic_max_send_data_segment_length) + len = is->is_conn->ic_max_send_data_segment_length; if (off + len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid " "length/offset %zd, buffer is %d; reconnecting", off + len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT; bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun; bhsdo->bhsdo_initiator_task_tag = bhsr2t->bhsr2t_initiator_task_tag; bhsdo->bhsdo_target_transfer_tag = bhsr2t->bhsr2t_target_transfer_tag; bhsdo->bhsdo_datasn = htonl(datasn++); bhsdo->bhsdo_buffer_offset = htonl(off); error = icl_pdu_append_data(request, csio->data_ptr + off, len, M_NOWAIT); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } off += len; total_len -= len; if (total_len == 0) { bhsdo->bhsdo_flags |= BHSDO_FLAGS_F; //ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off); } else { //ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off); } iscsi_pdu_queue_locked(request); if (total_len == 0) break; } icl_pdu_free(response); } static void iscsi_pdu_handle_async_message(struct icl_pdu *response) { struct iscsi_bhs_asynchronous_message *bhsam; struct iscsi_session *is; is = PDU_SESSION(response); bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; switch (bhsam->bhsam_async_event) { case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT: ISCSI_SESSION_WARN(is, "target requests logout; removing session"); iscsi_session_logout(is); iscsi_session_terminate(is); break; case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION: ISCSI_SESSION_WARN(is, "target indicates it will drop the connection"); break; case BHSAM_EVENT_TARGET_TERMINATES_SESSION: ISCSI_SESSION_WARN(is, "target indicates it will drop the session"); break; default: /* * XXX: Technically, we're obligated to also handle * parameter renegotiation. */ ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event); break; } icl_pdu_free(response); } static void iscsi_pdu_handle_reject(struct icl_pdu *response) { struct iscsi_bhs_reject *bhsr; struct iscsi_session *is; is = PDU_SESSION(response); bhsr = (struct iscsi_bhs_reject *)response->ip_bhs; ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?", bhsr->bhsr_reason); icl_pdu_free(response); } static int iscsi_ioctl_daemon_wait(struct iscsi_softc *sc, struct iscsi_daemon_request *request) { struct iscsi_session *is; struct icl_drv_limits idl; int error; sx_slock(&sc->sc_lock); for (;;) { TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_conf.isc_enable == 0 && is->is_conf.isc_discovery == 0) { ISCSI_SESSION_UNLOCK(is); continue; } if (is->is_waiting_for_iscsid) break; ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { if (sc->sc_unloading) { sx_sunlock(&sc->sc_lock); return (ENXIO); } /* * No session requires attention from iscsid(8); wait. */ error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } continue; } is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); request->idr_session_id = is->is_id; memcpy(&request->idr_isid, &is->is_isid, sizeof(request->idr_isid)); request->idr_tsih = 0; /* New or reinstated session. */ memcpy(&request->idr_conf, &is->is_conf, sizeof(request->idr_conf)); error = icl_limits(is->is_conf.isc_offload, is->is_conf.isc_iser, &idl); if (error != 0) { ISCSI_SESSION_WARN(is, "icl_limits for offload \"%s\" " "failed with error %d", is->is_conf.isc_offload, error); sx_sunlock(&sc->sc_lock); return (error); } request->idr_limits.isl_max_recv_data_segment_length = idl.idl_max_recv_data_segment_length; request->idr_limits.isl_max_send_data_segment_length = idl.idl_max_send_data_segment_length; request->idr_limits.isl_max_burst_length = idl.idl_max_burst_length; request->idr_limits.isl_first_burst_length = idl.idl_first_burst_length; sx_sunlock(&sc->sc_lock); return (0); } } static int iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc, struct iscsi_daemon_handoff *handoff) { struct iscsi_session *is; struct icl_conn *ic; int error; sx_slock(&sc->sc_lock); /* * Find the session to hand off socket to. */ TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == handoff->idh_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ic = is->is_conn; if (is->is_conf.isc_discovery || is->is_terminating) { ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EINVAL); } if (is->is_connected) { /* * This might have happened because another iscsid(8) * instance handed off the connection in the meantime. * Just return. */ ISCSI_SESSION_WARN(is, "handoff on already connected " "session"); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EBUSY); } strlcpy(is->is_target_alias, handoff->idh_target_alias, sizeof(is->is_target_alias)); is->is_tsih = handoff->idh_tsih; is->is_statsn = handoff->idh_statsn; is->is_protocol_level = handoff->idh_protocol_level; is->is_initial_r2t = handoff->idh_initial_r2t; is->is_immediate_data = handoff->idh_immediate_data; - is->is_max_recv_data_segment_length = + ic->ic_max_recv_data_segment_length = handoff->idh_max_recv_data_segment_length; - is->is_max_send_data_segment_length = + ic->ic_max_send_data_segment_length = handoff->idh_max_send_data_segment_length; is->is_max_burst_length = handoff->idh_max_burst_length; is->is_first_burst_length = handoff->idh_first_burst_length; if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C) ic->ic_header_crc32c = true; else ic->ic_header_crc32c = false; if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C) ic->ic_data_crc32c = true; else ic->ic_data_crc32c = false; ic->ic_maxtags = maxtags; is->is_cmdsn = 0; is->is_expcmdsn = 0; is->is_maxcmdsn = 0; is->is_waiting_for_iscsid = false; is->is_login_phase = false; is->is_timeout = 0; is->is_connected = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); /* * If we're going through the proxy, the idh_socket will be 0, * and the ICL module can simply ignore this call. It can also * use it to determine it's no longer in the Login phase. */ error = icl_conn_handoff(ic, handoff->idh_socket); if (error != 0) { sx_sunlock(&sc->sc_lock); iscsi_session_terminate(is); return (error); } sx_sunlock(&sc->sc_lock); if (is->is_sim != NULL) { /* * When reconnecting, there already is SIM allocated for the session. */ KASSERT(is->is_simq_frozen, ("reconnect without frozen simq")); ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "releasing"); is->is_simq_frozen = false; xpt_release_simq(is->is_sim, 1); ISCSI_SESSION_UNLOCK(is); } else { ISCSI_SESSION_LOCK(is); is->is_devq = cam_simq_alloc(ic->ic_maxtags); if (is->is_devq == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate simq"); iscsi_session_terminate(is); return (ENOMEM); } is->is_sim = cam_sim_alloc(iscsi_action, NULL, "iscsi", is, is->is_id /* unit */, &is->is_lock, 1, ic->ic_maxtags, is->is_devq); if (is->is_sim == NULL) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to allocate SIM"); cam_simq_free(is->is_devq); iscsi_session_terminate(is); return (ENOMEM); } error = xpt_bus_register(is->is_sim, NULL, 0); if (error != 0) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to register bus"); iscsi_session_terminate(is); return (ENOMEM); } error = xpt_create_path(&is->is_path, /*periph*/NULL, cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD); if (error != CAM_REQ_CMP) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to create path"); iscsi_session_terminate(is); return (ENOMEM); } ISCSI_SESSION_UNLOCK(is); } return (0); } static int iscsi_ioctl_daemon_fail(struct iscsi_softc *sc, struct iscsi_daemon_fail *fail) { struct iscsi_session *is; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == fail->idf_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s", fail->idf_reason); strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason)); //is->is_waiting_for_iscsid = false; //is->is_login_phase = true; //iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (0); } #ifdef ICL_KERNEL_PROXY static int iscsi_ioctl_daemon_connect(struct iscsi_softc *sc, struct iscsi_daemon_connect *idc) { struct iscsi_session *is; struct sockaddr *from_sa, *to_sa; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idc->idc_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (idc->idc_from_addrlen > 0) { error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); return (error); } } else { from_sa = NULL; } error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); free(from_sa, M_SONAME); return (error); } ISCSI_SESSION_LOCK(is); is->is_statsn = 0; is->is_cmdsn = 0; is->is_expcmdsn = 0; is->is_maxcmdsn = 0; is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); error = icl_conn_connect(is->is_conn, idc->idc_domain, idc->idc_socktype, idc->idc_protocol, from_sa, to_sa); free(from_sa, M_SONAME); free(to_sa, M_SONAME); /* * Digests are always disabled during login phase. */ is->is_conn->ic_header_crc32c = false; is->is_conn->ic_data_crc32c = false; return (error); } static int iscsi_ioctl_daemon_send(struct iscsi_softc *sc, struct iscsi_daemon_send *ids) { struct iscsi_session *is; struct icl_pdu *ip; size_t datalen; void *data; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == ids->ids_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); if (is->is_terminating || is->is_reconnecting) return (EIO); datalen = ids->ids_data_segment_len; - if (datalen > is->is_max_send_data_segment_length) + if (datalen > is->is_conn->ic_max_send_data_segment_length) return (EINVAL); if (datalen > 0) { data = malloc(datalen, M_ISCSI, M_WAITOK); error = copyin(ids->ids_data_segment, data, datalen); if (error != 0) { free(data, M_ISCSI); return (error); } } ip = icl_pdu_new(is->is_conn, M_WAITOK); memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs)); if (datalen > 0) { error = icl_pdu_append_data(ip, data, datalen, M_WAITOK); KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed")); free(data, M_ISCSI); } iscsi_pdu_queue(ip); return (0); } static int iscsi_ioctl_daemon_receive(struct iscsi_softc *sc, struct iscsi_daemon_receive *idr) { struct iscsi_session *is; struct icl_pdu *ip; void *data; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idr->idr_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); ISCSI_SESSION_LOCK(is); while (is->is_login_pdu == NULL && is->is_terminating == false && is->is_reconnecting == false) { error = cv_wait_sig(&is->is_login_cv, &is->is_lock); if (error != 0) { ISCSI_SESSION_UNLOCK(is); return (error); } } if (is->is_terminating || is->is_reconnecting) { ISCSI_SESSION_UNLOCK(is); return (EIO); } ip = is->is_login_pdu; is->is_login_pdu = NULL; ISCSI_SESSION_UNLOCK(is); if (ip->ip_data_len > idr->idr_data_segment_len) { icl_pdu_free(ip); return (EMSGSIZE); } copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs)); if (ip->ip_data_len > 0) { data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK); icl_pdu_get_data(ip, 0, data, ip->ip_data_len); copyout(data, idr->idr_data_segment, ip->ip_data_len); free(data, M_ISCSI); } icl_pdu_free(ip); return (0); } #endif /* ICL_KERNEL_PROXY */ static void iscsi_sanitize_session_conf(struct iscsi_session_conf *isc) { /* * Just make sure all the fields are null-terminated. * * XXX: This is not particularly secure. We should * create our own conf and then copy in relevant * fields. */ isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0'; isc->isc_target[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0'; isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0'; } static bool iscsi_valid_session_conf(const struct iscsi_session_conf *isc) { if (isc->isc_initiator[0] == '\0') { ISCSI_DEBUG("empty isc_initiator"); return (false); } if (isc->isc_target_addr[0] == '\0') { ISCSI_DEBUG("empty isc_target_addr"); return (false); } if (isc->isc_discovery != 0 && isc->isc_target[0] != 0) { ISCSI_DEBUG("non-empty isc_target for discovery session"); return (false); } if (isc->isc_discovery == 0 && isc->isc_target[0] == 0) { ISCSI_DEBUG("empty isc_target for non-discovery session"); return (false); } return (true); } static int iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa) { struct iscsi_session *is; const struct iscsi_session *is2; int error; iscsi_sanitize_session_conf(&isa->isa_conf); if (iscsi_valid_session_conf(&isa->isa_conf) == false) return (EINVAL); is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK); memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf)); - /* - * Set some default values, from RFC 3720, section 12. - * - * These values are updated by the handoff IOCTL, but are - * needed prior to the handoff to support sending the ISER - * login PDU. - */ - is->is_max_recv_data_segment_length = 8192; - is->is_max_send_data_segment_length = 8192; - is->is_max_burst_length = 262144; - is->is_first_burst_length = 65536; - sx_xlock(&sc->sc_lock); /* * Prevent duplicates. */ TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) { if (!!is->is_conf.isc_discovery != !!is2->is_conf.isc_discovery) continue; if (strcmp(is->is_conf.isc_target_addr, is2->is_conf.isc_target_addr) != 0) continue; if (is->is_conf.isc_discovery == 0 && strcmp(is->is_conf.isc_target, is2->is_conf.isc_target) != 0) continue; sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EBUSY); } is->is_conn = icl_new_conn(is->is_conf.isc_offload, is->is_conf.isc_iser, "iscsi", &is->is_lock); if (is->is_conn == NULL) { sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EINVAL); } is->is_conn->ic_receive = iscsi_receive_callback; is->is_conn->ic_error = iscsi_error_callback; is->is_conn->ic_prv0 = is; TAILQ_INIT(&is->is_outstanding); STAILQ_INIT(&is->is_postponed); mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF); cv_init(&is->is_maintenance_cv, "iscsi_mt"); #ifdef ICL_KERNEL_PROXY cv_init(&is->is_login_cv, "iscsi_login"); #endif + /* + * Set some default values, from RFC 3720, section 12. + * + * These values are updated by the handoff IOCTL, but are + * needed prior to the handoff to support sending the ISER + * login PDU. + */ + is->is_conn->ic_max_recv_data_segment_length = 8192; + is->is_conn->ic_max_send_data_segment_length = 8192; + is->is_max_burst_length = 262144; + is->is_first_burst_length = 65536; + is->is_softc = sc; sc->sc_last_session_id++; is->is_id = sc->sc_last_session_id; is->is_isid[0] = 0x80; /* RFC 3720, 10.12.5: 10b, "Random" ISID. */ arc4rand(&is->is_isid[1], 5, 0); is->is_tsih = 0; callout_init(&is->is_callout, 1); error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt"); if (error != 0) { ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error); sx_xunlock(&sc->sc_lock); return (error); } callout_reset(&is->is_callout, 1 * hz, iscsi_callout, is); TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next); ISCSI_SESSION_LOCK(is); /* * Don't notify iscsid(8) if the session is disabled and it's not * a discovery session, */ if (is->is_conf.isc_enable == 0 && is->is_conf.isc_discovery == 0) { ISCSI_SESSION_UNLOCK(is); sx_xunlock(&sc->sc_lock); return (0); } is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); ISCSI_SESSION_UNLOCK(is); cv_signal(&sc->sc_cv); sx_xunlock(&sc->sc_lock); return (0); } static bool iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1, unsigned int id2, const struct iscsi_session_conf *c2) { if (id2 != 0 && id2 != id1) return (false); if (c2->isc_target[0] != '\0' && strcmp(c1->isc_target, c2->isc_target) != 0) return (false); if (c2->isc_target_addr[0] != '\0' && strcmp(c1->isc_target_addr, c2->isc_target_addr) != 0) return (false); return (true); } static int iscsi_ioctl_session_remove(struct iscsi_softc *sc, struct iscsi_session_remove *isr) { struct iscsi_session *is, *tmp; bool found = false; iscsi_sanitize_session_conf(&isr->isr_conf); sx_xlock(&sc->sc_lock); TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) { ISCSI_SESSION_LOCK(is); if (iscsi_session_conf_matches(is->is_id, &is->is_conf, isr->isr_session_id, &isr->isr_conf)) { found = true; iscsi_session_logout(is); iscsi_session_terminate(is); } ISCSI_SESSION_UNLOCK(is); } sx_xunlock(&sc->sc_lock); if (!found) return (ESRCH); return (0); } static int iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl) { int error; unsigned int i = 0; struct iscsi_session *is; struct iscsi_session_state iss; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (i >= isl->isl_nentries) { sx_sunlock(&sc->sc_lock); return (EMSGSIZE); } memset(&iss, 0, sizeof(iss)); memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf)); iss.iss_id = is->is_id; strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias)); strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason)); strlcpy(iss.iss_offload, is->is_conn->ic_offload, sizeof(iss.iss_offload)); if (is->is_conn->ic_header_crc32c) iss.iss_header_digest = ISCSI_DIGEST_CRC32C; else iss.iss_header_digest = ISCSI_DIGEST_NONE; if (is->is_conn->ic_data_crc32c) iss.iss_data_digest = ISCSI_DIGEST_CRC32C; else iss.iss_data_digest = ISCSI_DIGEST_NONE; iss.iss_max_send_data_segment_length = - is->is_max_send_data_segment_length; + is->is_conn->ic_max_send_data_segment_length; iss.iss_max_recv_data_segment_length = - is->is_max_recv_data_segment_length; + is->is_conn->ic_max_recv_data_segment_length; iss.iss_max_burst_length = is->is_max_burst_length; iss.iss_first_burst_length = is->is_first_burst_length; iss.iss_immediate_data = is->is_immediate_data; iss.iss_connected = is->is_connected; error = copyout(&iss, isl->isl_pstates + i, sizeof(iss)); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } i++; } sx_sunlock(&sc->sc_lock); isl->isl_nentries = i; return (0); } static int iscsi_ioctl_session_modify(struct iscsi_softc *sc, struct iscsi_session_modify *ism) { struct iscsi_session *is; const struct iscsi_session *is2; iscsi_sanitize_session_conf(&ism->ism_conf); if (iscsi_valid_session_conf(&ism->ism_conf) == false) return (EINVAL); sx_xlock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_id == ism->ism_session_id) { /* Note that the session remains locked. */ break; } ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { sx_xunlock(&sc->sc_lock); return (ESRCH); } /* * Prevent duplicates. */ TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) { if (is == is2) continue; if (!!ism->ism_conf.isc_discovery != !!is2->is_conf.isc_discovery) continue; if (strcmp(ism->ism_conf.isc_target_addr, is2->is_conf.isc_target_addr) != 0) continue; if (ism->ism_conf.isc_discovery == 0 && strcmp(ism->ism_conf.isc_target, is2->is_conf.isc_target) != 0) continue; ISCSI_SESSION_UNLOCK(is); sx_xunlock(&sc->sc_lock); return (EBUSY); } sx_xunlock(&sc->sc_lock); memcpy(&is->is_conf, &ism->ism_conf, sizeof(is->is_conf)); ISCSI_SESSION_UNLOCK(is); iscsi_session_reconnect(is); return (0); } static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td) { struct iscsi_softc *sc; sc = dev->si_drv1; switch (cmd) { case ISCSIDWAIT: return (iscsi_ioctl_daemon_wait(sc, (struct iscsi_daemon_request *)arg)); case ISCSIDHANDOFF: return (iscsi_ioctl_daemon_handoff(sc, (struct iscsi_daemon_handoff *)arg)); case ISCSIDFAIL: return (iscsi_ioctl_daemon_fail(sc, (struct iscsi_daemon_fail *)arg)); #ifdef ICL_KERNEL_PROXY case ISCSIDCONNECT: return (iscsi_ioctl_daemon_connect(sc, (struct iscsi_daemon_connect *)arg)); case ISCSIDSEND: return (iscsi_ioctl_daemon_send(sc, (struct iscsi_daemon_send *)arg)); case ISCSIDRECEIVE: return (iscsi_ioctl_daemon_receive(sc, (struct iscsi_daemon_receive *)arg)); #endif /* ICL_KERNEL_PROXY */ case ISCSISADD: return (iscsi_ioctl_session_add(sc, (struct iscsi_session_add *)arg)); case ISCSISREMOVE: return (iscsi_ioctl_session_remove(sc, (struct iscsi_session_remove *)arg)); case ISCSISLIST: return (iscsi_ioctl_session_list(sc, (struct iscsi_session_list *)arg)); case ISCSISMODIFY: return (iscsi_ioctl_session_modify(sc, (struct iscsi_session_modify *)arg)); default: return (EINVAL); } } static struct iscsi_outstanding * iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_initiator_task_tag == initiator_task_tag) return (io); } return (NULL); } static struct iscsi_outstanding * iscsi_outstanding_find_ccb(struct iscsi_session *is, union ccb *ccb) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_ccb == ccb) return (io); } return (NULL); } static struct iscsi_outstanding * iscsi_outstanding_add(struct iscsi_session *is, struct icl_pdu *request, union ccb *ccb, uint32_t *initiator_task_tagp) { struct iscsi_outstanding *io; int error; ISCSI_SESSION_LOCK_ASSERT(is); io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO); if (io == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes", sizeof(*io)); return (NULL); } error = icl_conn_task_setup(is->is_conn, request, &ccb->csio, initiator_task_tagp, &io->io_icl_prv); if (error != 0) { ISCSI_SESSION_WARN(is, "icl_conn_task_setup() failed with error %d", error); uma_zfree(iscsi_outstanding_zone, io); return (NULL); } KASSERT(iscsi_outstanding_find(is, *initiator_task_tagp) == NULL, ("initiator_task_tag 0x%x already added", *initiator_task_tagp)); io->io_initiator_task_tag = *initiator_task_tagp; io->io_ccb = ccb; TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next); return (io); } static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io) { ISCSI_SESSION_LOCK_ASSERT(is); icl_conn_task_done(is->is_conn, io->io_icl_prv); TAILQ_REMOVE(&is->is_outstanding, io, io_next); uma_zfree(iscsi_outstanding_zone, io); } static void iscsi_action_abort(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_task_management_request *bhstmr; struct ccb_abort *cab = &ccb->cab; struct iscsi_outstanding *io, *aio; uint32_t initiator_task_tag; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); return; } #endif aio = iscsi_outstanding_find_ccb(is, cab->abort_ccb); if (aio == NULL) { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; if (initiator_task_tag == 0xffffffff) initiator_task_tag = is->is_initiator_task_tag++; io = iscsi_outstanding_add(is, request, NULL, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } io->io_referenced_task_tag = aio->io_initiator_task_tag; bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; bhstmr->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_REQUEST; bhstmr->bhstmr_function = 0x80 | BHSTMR_FUNCTION_ABORT_TASK; bhstmr->bhstmr_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhstmr->bhstmr_initiator_task_tag = initiator_task_tag; bhstmr->bhstmr_referenced_task_tag = aio->io_initiator_task_tag; iscsi_pdu_queue_locked(request); } static void iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_scsi_command *bhssc; struct ccb_scsiio *csio; struct iscsi_outstanding *io; size_t len; uint32_t initiator_task_tag; int error; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ISCSI_SESSION_DEBUG(is, "called during login phase"); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN; xpt_done(ccb); return; } #endif request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; if (initiator_task_tag == 0xffffffff) initiator_task_tag = is->is_initiator_task_tag++; io = iscsi_outstanding_add(is, request, ccb, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } csio = &ccb->csio; bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND; bhssc->bhssc_flags |= BHSSC_FLAGS_F; switch (csio->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_IN: bhssc->bhssc_flags |= BHSSC_FLAGS_R; break; case CAM_DIR_OUT: bhssc->bhssc_flags |= BHSSC_FLAGS_W; break; } if ((ccb->ccb_h.flags & CAM_TAG_ACTION_VALID) != 0) { switch (csio->tag_action) { case MSG_HEAD_OF_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ; break; case MSG_ORDERED_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED; break; case MSG_ACA_TASK: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA; break; case MSG_SIMPLE_Q_TAG: default: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE; break; } } else bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_UNTAGGED; if (is->is_protocol_level >= 2) { bhssc->bhssc_pri = (csio->priority << BHSSC_PRI_SHIFT) & BHSSC_PRI_MASK; } bhssc->bhssc_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhssc->bhssc_initiator_task_tag = initiator_task_tag; bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len); KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb), ("unsupported CDB size %zd", (size_t)csio->cdb_len)); if (csio->ccb_h.flags & CAM_CDB_POINTER) memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); else memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); if (is->is_immediate_data && (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { len = csio->dxfer_len; //ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len); if (len > is->is_first_burst_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %d", len, is->is_first_burst_length); len = is->is_first_burst_length; } - if (len > is->is_max_send_data_segment_length) { + if (len > is->is_conn->ic_max_send_data_segment_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %d", len, - is->is_max_send_data_segment_length); - len = is->is_max_send_data_segment_length; + is->is_conn->ic_max_send_data_segment_length); + len = is->is_conn->ic_max_send_data_segment_length; } error = icl_pdu_append_data(request, csio->data_ptr, len, M_NOWAIT); if (error != 0) { iscsi_outstanding_remove(is, io); icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } } iscsi_pdu_queue_locked(request); } static void iscsi_action(struct cam_sim *sim, union ccb *ccb) { struct iscsi_session *is; is = cam_sim_softc(sim); ISCSI_SESSION_LOCK_ASSERT(is); if (is->is_terminating || (is->is_connected == false && fail_on_disconnection)) { ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } /* * Make sure CAM doesn't sneak in a CCB just after freezing the queue. */ if (is->is_simq_frozen == true) { ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK); ccb->ccb_h.status |= CAM_REQUEUE_REQ; /* Don't freeze the devq - the SIM queue is already frozen. */ xpt_done(ccb); return; } switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_EXTLUNS; /* * XXX: It shouldn't ever be NULL; this could be turned * into a KASSERT eventually. */ if (is->is_conn == NULL) ISCSI_WARN("NULL conn"); else if (is->is_conn->ic_unmapped) cpi->hba_misc |= PIM_UNMAPPED; cpi->hba_eng_cnt = 0; cpi->max_target = 0; /* * Note that the variable below is only relevant for targets * that don't claim compliance with anything above SPC2, which * means they don't support REPORT_LUNS. */ cpi->max_lun = 255; cpi->initiator_id = ~0; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "iSCSI", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; /* XXX */ cpi->transport = XPORT_ISCSI; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC3; cpi->maxio = maxphys; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts; struct ccb_trans_settings_scsi *scsi; cts = &ccb->cts; scsi = &cts->proto_specific.scsi; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC3; cts->transport = XPORT_ISCSI; cts->transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; cts->ccb_h.status = CAM_REQ_CMP; break; } case XPT_CALC_GEOMETRY: cam_calc_geometry(&ccb->ccg, /*extended*/1); ccb->ccb_h.status = CAM_REQ_CMP; break; #if 0 /* * XXX: What's the point? */ case XPT_RESET_BUS: case XPT_TERM_IO: ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io"); ccb->ccb_h.status = CAM_REQ_CMP; break; #endif case XPT_ABORT: iscsi_action_abort(is, ccb); return; case XPT_SCSI_IO: iscsi_action_scsiio(is, ccb); return; default: #if 0 ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code); #endif ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; } xpt_done(ccb); } static void iscsi_terminate_sessions(struct iscsi_softc *sc) { struct iscsi_session *is; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) iscsi_session_terminate(is); while(!TAILQ_EMPTY(&sc->sc_sessions)) { ISCSI_DEBUG("waiting for sessions to terminate"); cv_wait(&sc->sc_cv, &sc->sc_lock); } ISCSI_DEBUG("all sessions terminated"); sx_sunlock(&sc->sc_lock); } static void iscsi_shutdown_pre(struct iscsi_softc *sc) { struct iscsi_session *is; if (!fail_on_shutdown) return; /* * If we have any sessions waiting for reconnection, request * maintenance thread to fail them immediately instead of waiting * for reconnect timeout. * * This prevents LUNs with mounted filesystems that are supported * by disconnected iSCSI sessions from hanging, however it will * fail all queued BIOs. */ ISCSI_DEBUG("forcing failing all disconnected sessions due to shutdown"); fail_on_disconnection = 1; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (!is->is_connected) { ISCSI_SESSION_DEBUG(is, "force failing disconnected session early"); iscsi_session_reconnect(is); } ISCSI_SESSION_UNLOCK(is); } sx_sunlock(&sc->sc_lock); } static void iscsi_shutdown_post(struct iscsi_softc *sc) { if (!KERNEL_PANICKED()) { ISCSI_DEBUG("removing all sessions due to shutdown"); iscsi_terminate_sessions(sc); } } static int iscsi_load(void) { int error; sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK); sx_init(&sc->sc_lock, "iscsi"); TAILQ_INIT(&sc->sc_sessions); cv_init(&sc->sc_cv, "iscsi_cv"); iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding", sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi"); if (error != 0) { ISCSI_WARN("failed to create device node, error %d", error); return (error); } sc->sc_cdev->si_drv1 = sc; sc->sc_shutdown_pre_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, iscsi_shutdown_pre, sc, SHUTDOWN_PRI_FIRST); /* * shutdown_post_sync needs to run after filesystem shutdown and before * CAM shutdown - otherwise when rebooting with an iSCSI session that is * disconnected but has outstanding requests, dashutdown() will hang on * cam_periph_runccb(). */ sc->sc_shutdown_post_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, iscsi_shutdown_post, sc, SHUTDOWN_PRI_DEFAULT - 1); return (0); } static int iscsi_unload(void) { /* Awaken any threads asleep in iscsi_ioctl(). */ sx_xlock(&sc->sc_lock); sc->sc_unloading = true; cv_signal(&sc->sc_cv); sx_xunlock(&sc->sc_lock); if (sc->sc_cdev != NULL) { ISCSI_DEBUG("removing device node"); destroy_dev(sc->sc_cdev); ISCSI_DEBUG("device node removed"); } if (sc->sc_shutdown_pre_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_pre_eh); if (sc->sc_shutdown_post_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_post_eh); iscsi_terminate_sessions(sc); uma_zdestroy(iscsi_outstanding_zone); sx_destroy(&sc->sc_lock); cv_destroy(&sc->sc_cv); free(sc, M_ISCSI); return (0); } static int iscsi_quiesce(void) { sx_slock(&sc->sc_lock); if (!TAILQ_EMPTY(&sc->sc_sessions)) { sx_sunlock(&sc->sc_lock); return (EBUSY); } sx_sunlock(&sc->sc_lock); return (0); } static int iscsi_modevent(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: error = iscsi_load(); break; case MOD_UNLOAD: error = iscsi_unload(); break; case MOD_QUIESCE: error = iscsi_quiesce(); break; default: error = EINVAL; break; } return (error); } moduledata_t iscsi_data = { "iscsi", iscsi_modevent, 0 }; DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(iscsi, cam, 1, 1, 1); MODULE_DEPEND(iscsi, icl, 1, 1, 1); diff --git a/sys/dev/iscsi/iscsi.h b/sys/dev/iscsi/iscsi.h index 793b7529c7c0..fe1cc64f88db 100644 --- a/sys/dev/iscsi/iscsi.h +++ b/sys/dev/iscsi/iscsi.h @@ -1,140 +1,138 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ISCSI_H #define ISCSI_H struct iscsi_softc; struct icl_conn; #define ISCSI_NAME_LEN 224 /* 223 bytes, by RFC 3720, + '\0' */ #define ISCSI_ADDR_LEN 47 /* INET6_ADDRSTRLEN + '\0' */ #define ISCSI_SECRET_LEN 17 /* 16 + '\0' */ struct iscsi_outstanding { TAILQ_ENTRY(iscsi_outstanding) io_next; union ccb *io_ccb; size_t io_received; uint32_t io_initiator_task_tag; uint32_t io_referenced_task_tag; void *io_icl_prv; }; struct iscsi_session { TAILQ_ENTRY(iscsi_session) is_next; struct icl_conn *is_conn; struct mtx is_lock; uint32_t is_statsn; uint32_t is_cmdsn; uint32_t is_expcmdsn; uint32_t is_maxcmdsn; uint32_t is_initiator_task_tag; int is_protocol_level; int is_initial_r2t; int is_max_burst_length; int is_first_burst_length; uint8_t is_isid[6]; uint16_t is_tsih; bool is_immediate_data; - int is_max_recv_data_segment_length; - int is_max_send_data_segment_length; char is_target_alias[ISCSI_ALIAS_LEN]; TAILQ_HEAD(, iscsi_outstanding) is_outstanding; STAILQ_HEAD(, icl_pdu) is_postponed; struct callout is_callout; unsigned int is_timeout; /* * XXX: This could be rewritten using a single variable, * but somehow it results in uglier code. */ /* * We're waiting for iscsid(8); after iscsid_timeout * expires, kernel will wake up an iscsid(8) to handle * the session. */ bool is_waiting_for_iscsid; /* * Some iscsid(8) instance is handling the session; * after login_timeout expires, kernel will wake up * another iscsid(8) to handle the session. */ bool is_login_phase; /* * We're in the process of removing the iSCSI session. */ bool is_terminating; /* * We're waiting for the maintenance thread to do some * reconnection tasks. */ bool is_reconnecting; bool is_connected; struct cam_devq *is_devq; struct cam_sim *is_sim; struct cam_path *is_path; struct cv is_maintenance_cv; struct iscsi_softc *is_softc; unsigned int is_id; struct iscsi_session_conf is_conf; bool is_simq_frozen; char is_reason[ISCSI_REASON_LEN]; #ifdef ICL_KERNEL_PROXY struct cv is_login_cv; struct icl_pdu *is_login_pdu; #endif }; struct iscsi_softc { device_t sc_dev; struct sx sc_lock; struct cdev *sc_cdev; TAILQ_HEAD(, iscsi_session) sc_sessions; struct cv sc_cv; unsigned int sc_last_session_id; bool sc_unloading; eventhandler_tag sc_shutdown_pre_eh; eventhandler_tag sc_shutdown_post_eh; }; #endif /* !ISCSI_H */