diff --git a/sys/cam/ctl/ctl_frontend_iscsi.c b/sys/cam/ctl/ctl_frontend_iscsi.c index cccb3fd90317..b78c6a7d8441 100644 --- a/sys/cam/ctl/ctl_frontend_iscsi.c +++ b/sys/cam/ctl/ctl_frontend_iscsi.c @@ -1,3077 +1,3077 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * CTL frontend for the iSCSI protocol. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ICL_KERNEL_PROXY #include #endif #ifdef ICL_KERNEL_PROXY FEATURE(cfiscsi_kernel_proxy, "iSCSI target built with ICL_KERNEL_PROXY"); #endif /* Used for internal nexus reset task. */ #define ISCSI_BHS_OPCODE_INTERNAL 0x3e static MALLOC_DEFINE(M_CFISCSI, "cfiscsi", "Memory used for CTL iSCSI frontend"); static uma_zone_t cfiscsi_data_wait_zone; SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, iscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "CAM Target Layer iSCSI Frontend"); static int debug = 1; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 1, "Enable debug messages"); static int ping_timeout = 5; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout, 5, "Interval between ping (NOP-Out) requests, in seconds"); static int login_timeout = 60; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout, 60, "Time to wait for ctld(8) to finish Login Phase, in seconds"); static int maxtags = 256; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, 0, "Max number of requests queued by initiator"); #define CFISCSI_DEBUG(X, ...) \ do { \ if (debug > 1) { \ printf("%s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_WARN(X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_DEBUG(S, X, ...) \ do { \ if (debug > 1) { \ printf("%s: %s (%s): " X "\n", \ __func__, S->cs_initiator_addr, \ S->cs_initiator_name, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_WARN(S, X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s (%s): " X "\n", \ S->cs_initiator_addr, \ S->cs_initiator_name, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_LOCK(X) mtx_lock(&X->cs_lock) #define CFISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->cs_lock) #define CFISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->cs_lock, MA_OWNED) #define CONN_SESSION(X) ((struct cfiscsi_session *)(X)->ic_prv0) #define PDU_SESSION(X) CONN_SESSION((X)->ip_conn) struct cfiscsi_priv { void *request; uint32_t expdatasn; uint32_t r2tsn; }; #define PRIV(io) \ ((struct cfiscsi_priv *)&(io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND]) #define PRIV_REQUEST(io) PRIV(io)->request #define PRIV_EXPDATASN(io) PRIV(io)->expdatasn #define PRIV_R2TSN(io) PRIV(io)->r2tsn static int cfiscsi_init(void); static int cfiscsi_shutdown(void); static void cfiscsi_online(void *arg); static void cfiscsi_offline(void *arg); static int cfiscsi_info(void *arg, struct sbuf *sb); static int cfiscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static void cfiscsi_datamove(union ctl_io *io); static void cfiscsi_datamove_in(union ctl_io *io); static void cfiscsi_datamove_out(union ctl_io *io); static void cfiscsi_done(union ctl_io *io); static bool cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request); static void cfiscsi_pdu_handle_nop_out(struct icl_pdu *request); static void cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request); static void cfiscsi_pdu_handle_task_request(struct icl_pdu *request); static void cfiscsi_pdu_handle_data_out(struct icl_pdu *request); static void cfiscsi_pdu_handle_logout_request(struct icl_pdu *request); static void cfiscsi_session_terminate(struct cfiscsi_session *cs); static struct cfiscsi_data_wait *cfiscsi_data_wait_new( struct cfiscsi_session *cs, union ctl_io *io, uint32_t initiator_task_tag, uint32_t *target_transfer_tagp); static void cfiscsi_data_wait_free(struct cfiscsi_session *cs, struct cfiscsi_data_wait *cdw); static struct cfiscsi_target *cfiscsi_target_find(struct cfiscsi_softc *softc, const char *name, uint16_t tag); static struct cfiscsi_target *cfiscsi_target_find_or_create( struct cfiscsi_softc *softc, const char *name, const char *alias, uint16_t tag); static void cfiscsi_target_release(struct cfiscsi_target *ct); static void cfiscsi_session_delete(struct cfiscsi_session *cs); static struct cfiscsi_softc cfiscsi_softc; static struct ctl_frontend cfiscsi_frontend = { .name = "iscsi", .init = cfiscsi_init, .ioctl = cfiscsi_ioctl, .shutdown = cfiscsi_shutdown, }; CTL_FRONTEND_DECLARE(cfiscsi, cfiscsi_frontend); MODULE_DEPEND(cfiscsi, icl, 1, 1, 1); static struct icl_pdu * cfiscsi_pdu_new_response(struct icl_pdu *request, int flags) { return (icl_pdu_new(request->ip_conn, flags)); } static bool cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request) { const struct iscsi_bhs_scsi_command *bhssc; struct cfiscsi_session *cs; uint32_t cmdsn, curcmdsn; cs = PDU_SESSION(request); /* * Every incoming PDU - not just NOP-Out - resets the ping timer. * The purpose of the timeout is to reset the connection when it stalls; * we don't want this to happen when NOP-In or NOP-Out ends up delayed * in some queue. */ cs->cs_timeout = 0; /* * Immediate commands carry cmdsn, but it is neither incremented nor * verified. */ if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) return (false); /* * Data-Out PDUs don't contain CmdSN. */ if (request->ip_bhs->bhs_opcode == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) return (false); /* * We're only using fields common for all the request * (initiator -> target) PDUs. */ bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; curcmdsn = cmdsn = ntohl(bhssc->bhssc_cmdsn); /* * Increment session cmdsn and exit if we received the expected value. */ do { if (atomic_fcmpset_32(&cs->cs_cmdsn, &curcmdsn, cmdsn + 1)) return (false); } while (curcmdsn == cmdsn); /* * The target MUST silently ignore any non-immediate command outside * of this range. */ if (ISCSI_SNLT(cmdsn, curcmdsn) || ISCSI_SNGT(cmdsn, curcmdsn - 1 + maxtags)) { CFISCSI_SESSION_WARN(cs, "received PDU with CmdSN %u, " "while expected %u", cmdsn, curcmdsn); return (true); } /* * We don't support multiple connections now, so any discontinuity in * CmdSN means lost PDUs. Since we don't support PDU retransmission -- * terminate the connection. */ CFISCSI_SESSION_WARN(cs, "received PDU with CmdSN %u, " "while expected %u; dropping connection", cmdsn, curcmdsn); cfiscsi_session_terminate(cs); return (true); } static void cfiscsi_pdu_handle(struct icl_pdu *request) { struct cfiscsi_session *cs; bool ignore; cs = PDU_SESSION(request); ignore = cfiscsi_pdu_update_cmdsn(request); if (ignore) { icl_pdu_free(request); return; } /* * Handle the PDU; this includes e.g. receiving the remaining * part of PDU and submitting the SCSI command to CTL * or queueing a reply. The handling routine is responsible * for freeing the PDU when it's no longer needed. */ switch (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { case ISCSI_BHS_OPCODE_NOP_OUT: cfiscsi_pdu_handle_nop_out(request); break; case ISCSI_BHS_OPCODE_SCSI_COMMAND: cfiscsi_pdu_handle_scsi_command(request); break; case ISCSI_BHS_OPCODE_TASK_REQUEST: cfiscsi_pdu_handle_task_request(request); break; case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: cfiscsi_pdu_handle_data_out(request); break; case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: cfiscsi_pdu_handle_logout_request(request); break; default: CFISCSI_SESSION_WARN(cs, "received PDU with unsupported " "opcode 0x%x; dropping connection", request->ip_bhs->bhs_opcode); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static void cfiscsi_receive_callback(struct icl_pdu *request) { #ifdef ICL_KERNEL_PROXY struct cfiscsi_session *cs; cs = PDU_SESSION(request); if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { if (cs->cs_login_pdu == NULL) cs->cs_login_pdu = request; else icl_pdu_free(request); cv_signal(&cs->cs_login_cv); return; } #endif cfiscsi_pdu_handle(request); } static void cfiscsi_error_callback(struct icl_conn *ic) { struct cfiscsi_session *cs; cs = CONN_SESSION(ic); CFISCSI_SESSION_WARN(cs, "connection error; dropping connection"); cfiscsi_session_terminate(cs); } static int cfiscsi_pdu_prepare(struct icl_pdu *response) { struct cfiscsi_session *cs; struct iscsi_bhs_scsi_response *bhssr; bool advance_statsn = true; uint32_t cmdsn; cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK_ASSERT(cs); /* * We're only using fields common for all the response * (target -> initiator) PDUs. */ bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; /* * 10.8.3: "The StatSN for this connection is not advanced * after this PDU is sent." */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_R2T) advance_statsn = false; /* * 10.19.2: "However, when the Initiator Task Tag is set to 0xffffffff, * StatSN for the connection is not advanced after this PDU is sent." */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_NOP_IN && bhssr->bhssr_initiator_task_tag == 0xffffffff) advance_statsn = false; /* * See the comment below - StatSN is not meaningful and must * not be advanced. */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_SCSI_DATA_IN && (bhssr->bhssr_flags & BHSDI_FLAGS_S) == 0) advance_statsn = false; /* * 10.7.3: "The fields StatSN, Status, and Residual Count * only have meaningful content if the S bit is set to 1." */ if (bhssr->bhssr_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || (bhssr->bhssr_flags & BHSDI_FLAGS_S)) bhssr->bhssr_statsn = htonl(cs->cs_statsn); cmdsn = cs->cs_cmdsn; bhssr->bhssr_expcmdsn = htonl(cmdsn); bhssr->bhssr_maxcmdsn = htonl(cmdsn - 1 + imax(0, maxtags - cs->cs_outstanding_ctl_pdus)); if (advance_statsn) cs->cs_statsn++; return (0); } static void cfiscsi_pdu_queue(struct icl_pdu *response) { struct cfiscsi_session *cs; cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK(cs); cfiscsi_pdu_prepare(response); icl_pdu_queue(response); CFISCSI_SESSION_UNLOCK(cs); } static void cfiscsi_pdu_queue_cb(struct icl_pdu *response, icl_pdu_cb cb) { struct cfiscsi_session *cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK(cs); cfiscsi_pdu_prepare(response); icl_pdu_queue_cb(response, cb); CFISCSI_SESSION_UNLOCK(cs); } static void cfiscsi_pdu_handle_nop_out(struct icl_pdu *request) { struct cfiscsi_session *cs; struct iscsi_bhs_nop_out *bhsno; struct iscsi_bhs_nop_in *bhsni; struct icl_pdu *response; void *data = NULL; size_t datasize; int error; cs = PDU_SESSION(request); bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; if (bhsno->bhsno_initiator_task_tag == 0xffffffff) { /* * Nothing to do, iscsi_pdu_update_statsn() already * zeroed the timeout. */ icl_pdu_free(request); return; } datasize = icl_pdu_data_segment_length(request); if (datasize > 0) { data = malloc(datasize, M_CFISCSI, M_NOWAIT | M_ZERO); if (data == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } icl_pdu_get_data(request, 0, data, datasize); } response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "droppping connection"); free(data, M_CFISCSI); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; bhsni->bhsni_flags = 0x80; bhsni->bhsni_initiator_task_tag = bhsno->bhsno_initiator_task_tag; bhsni->bhsni_target_transfer_tag = 0xffffffff; if (datasize > 0) { error = icl_pdu_append_data(response, data, datasize, M_NOWAIT); if (error != 0) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); free(data, M_CFISCSI); icl_pdu_free(request); icl_pdu_free(response); cfiscsi_session_terminate(cs); return; } free(data, M_CFISCSI); } icl_pdu_free(request); cfiscsi_pdu_queue(response); } static void cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request) { struct iscsi_bhs_scsi_command *bhssc; struct cfiscsi_session *cs; union ctl_io *io; int error; cs = PDU_SESSION(request); bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", // bhssc->bhssc_initiator_task_tag); if (request->ip_data_len > 0 && cs->cs_immediate_data == false) { CFISCSI_SESSION_WARN(cs, "unsolicited data with " "ImmediateData=No; dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = request; io->io_hdr.io_type = CTL_IO_SCSI; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = ctl_decode_lun(be64toh(bhssc->bhssc_lun)); io->scsiio.priority = (bhssc->bhssc_pri & BHSSC_PRI_MASK) >> BHSSC_PRI_SHIFT; io->scsiio.tag_num = bhssc->bhssc_initiator_task_tag; switch ((bhssc->bhssc_flags & BHSSC_FLAGS_ATTR)) { case BHSSC_FLAGS_ATTR_UNTAGGED: io->scsiio.tag_type = CTL_TAG_UNTAGGED; break; case BHSSC_FLAGS_ATTR_SIMPLE: io->scsiio.tag_type = CTL_TAG_SIMPLE; break; case BHSSC_FLAGS_ATTR_ORDERED: io->scsiio.tag_type = CTL_TAG_ORDERED; break; case BHSSC_FLAGS_ATTR_HOQ: io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; break; case BHSSC_FLAGS_ATTR_ACA: io->scsiio.tag_type = CTL_TAG_ACA; break; default: io->scsiio.tag_type = CTL_TAG_UNTAGGED; CFISCSI_SESSION_WARN(cs, "unhandled tag type %d", bhssc->bhssc_flags & BHSSC_FLAGS_ATTR); break; } io->scsiio.cdb_len = sizeof(bhssc->bhssc_cdb); /* Which is 16. */ memcpy(io->scsiio.cdb, bhssc->bhssc_cdb, sizeof(bhssc->bhssc_cdb)); refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_run(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_run() failed; error %d; " "dropping connection", error); ctl_free_io(io); refcount_release(&cs->cs_outstanding_ctl_pdus); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static void cfiscsi_pdu_handle_task_request(struct icl_pdu *request) { struct iscsi_bhs_task_management_request *bhstmr; struct iscsi_bhs_task_management_response *bhstmr2; struct icl_pdu *response; struct cfiscsi_session *cs; union ctl_io *io; int error; cs = PDU_SESSION(request); bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = request; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = ctl_decode_lun(be64toh(bhstmr->bhstmr_lun)); io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ switch (bhstmr->bhstmr_function & ~0x80) { case BHSTMR_FUNCTION_ABORT_TASK: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_ABORT_TASK"); #endif io->taskio.task_action = CTL_TASK_ABORT_TASK; io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag; break; case BHSTMR_FUNCTION_ABORT_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_ABORT_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_ABORT_TASK_SET; break; case BHSTMR_FUNCTION_CLEAR_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_CLEAR_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET; break; case BHSTMR_FUNCTION_LOGICAL_UNIT_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_LOGICAL_UNIT_RESET"); #endif io->taskio.task_action = CTL_TASK_LUN_RESET; break; case BHSTMR_FUNCTION_TARGET_WARM_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_WARM_RESET"); #endif io->taskio.task_action = CTL_TASK_TARGET_RESET; break; case BHSTMR_FUNCTION_TARGET_COLD_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_COLD_RESET"); #endif io->taskio.task_action = CTL_TASK_TARGET_RESET; break; case BHSTMR_FUNCTION_QUERY_TASK: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK"); #endif io->taskio.task_action = CTL_TASK_QUERY_TASK; io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag; break; case BHSTMR_FUNCTION_QUERY_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_QUERY_TASK_SET; break; case BHSTMR_FUNCTION_I_T_NEXUS_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_I_T_NEXUS_RESET"); #endif io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; break; case BHSTMR_FUNCTION_QUERY_ASYNC_EVENT: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_ASYNC_EVENT"); #endif io->taskio.task_action = CTL_TASK_QUERY_ASYNC_EVENT; break; default: CFISCSI_SESSION_DEBUG(cs, "unsupported function 0x%x", bhstmr->bhstmr_function & ~0x80); ctl_free_io(io); response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhstmr2 = (struct iscsi_bhs_task_management_response *) response->ip_bhs; bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; bhstmr2->bhstmr_flags = 0x80; bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); return; } refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_run(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_run() failed; error %d; " "dropping connection", error); ctl_free_io(io); refcount_release(&cs->cs_outstanding_ctl_pdus); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static bool cfiscsi_handle_data_segment(struct icl_pdu *request, struct cfiscsi_data_wait *cdw) { struct iscsi_bhs_data_out *bhsdo; struct cfiscsi_session *cs; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; size_t copy_len, len, off, buffer_offset; int ctl_sg_count; union ctl_io *io; cs = PDU_SESSION(request); KASSERT((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT || (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bad opcode 0x%x", request->ip_bhs->bhs_opcode)); /* * We're only using fields common for Data-Out and SCSI Command PDUs. */ bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; io = cdw->cdw_ctl_io; KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, ("CTL_FLAG_DATA_IN")); #if 0 CFISCSI_SESSION_DEBUG(cs, "received %zd bytes out of %d", request->ip_data_len, io->scsiio.kern_total_len); #endif if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) buffer_offset = ntohl(bhsdo->bhsdo_buffer_offset); else buffer_offset = 0; len = icl_pdu_data_segment_length(request); /* * Make sure the offset, as sent by the initiator, matches the offset * we're supposed to be at in the scatter-gather list. */ if (buffer_offset > io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled || buffer_offset + len <= io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled) { CFISCSI_SESSION_WARN(cs, "received bad buffer offset %zd, " "expected %zd; dropping connection", buffer_offset, (size_t)io->scsiio.kern_rel_offset + (size_t)io->scsiio.ext_data_filled); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } /* * This is the offset within the PDU data segment, as opposed * to buffer_offset, which is the offset within the task (SCSI * command). */ off = io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled - buffer_offset; /* * Iterate over the scatter/gather segments, filling them with data * from the PDU data segment. Note that this can get called multiple * times for one SCSI command; the cdw structure holds state for the * scatter/gather list. */ for (;;) { KASSERT(cdw->cdw_sg_index < ctl_sg_count, ("cdw->cdw_sg_index >= ctl_sg_count")); if (cdw->cdw_sg_len == 0) { cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; } KASSERT(off <= len, ("len > off")); copy_len = len - off; if (copy_len > cdw->cdw_sg_len) copy_len = cdw->cdw_sg_len; icl_pdu_get_data(request, off, cdw->cdw_sg_addr, copy_len); cdw->cdw_sg_addr += copy_len; cdw->cdw_sg_len -= copy_len; off += copy_len; io->scsiio.ext_data_filled += copy_len; io->scsiio.kern_data_resid -= copy_len; if (cdw->cdw_sg_len == 0) { /* * End of current segment. */ if (cdw->cdw_sg_index == ctl_sg_count - 1) { /* * Last segment in scatter/gather list. */ break; } cdw->cdw_sg_index++; } if (off == len) { /* * End of PDU payload. */ break; } } if (len > off) { /* * In case of unsolicited data, it's possible that the buffer * provided by CTL is smaller than negotiated FirstBurstLength. * Just ignore the superfluous data; will ask for them with R2T * on next call to cfiscsi_datamove(). * * This obviously can only happen with SCSI Command PDU. */ if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND) return (true); CFISCSI_SESSION_WARN(cs, "received too much data: got %zd bytes, " "expected %zd; dropping connection", icl_pdu_data_segment_length(request), off); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } if (io->scsiio.ext_data_filled == cdw->cdw_r2t_end && (bhsdo->bhsdo_flags & BHSDO_FLAGS_F) == 0) { CFISCSI_SESSION_WARN(cs, "got the final packet without " "the F flag; flags = 0x%x; dropping connection", bhsdo->bhsdo_flags); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } if (io->scsiio.ext_data_filled != cdw->cdw_r2t_end && (bhsdo->bhsdo_flags & BHSDO_FLAGS_F) != 0) { if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { CFISCSI_SESSION_WARN(cs, "got the final packet, but the " "transmitted size was %zd bytes instead of %d; " "dropping connection", (size_t)io->scsiio.ext_data_filled, cdw->cdw_r2t_end); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } else { /* * For SCSI Command PDU, this just means we need to * solicit more data by sending R2T. */ return (false); } } if (io->scsiio.ext_data_filled == cdw->cdw_r2t_end) { #if 0 CFISCSI_SESSION_DEBUG(cs, "no longer expecting Data-Out with target " "transfer tag 0x%x", cdw->cdw_target_transfer_tag); #endif return (true); } return (false); } static void cfiscsi_pdu_handle_data_out(struct icl_pdu *request) { struct iscsi_bhs_data_out *bhsdo; struct cfiscsi_session *cs; struct cfiscsi_data_wait *cdw = NULL; union ctl_io *io; bool done; cs = PDU_SESSION(request); bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) { #if 0 CFISCSI_SESSION_DEBUG(cs, "have ttt 0x%x, itt 0x%x; looking for " "ttt 0x%x, itt 0x%x", bhsdo->bhsdo_target_transfer_tag, bhsdo->bhsdo_initiator_task_tag, cdw->cdw_target_transfer_tag, cdw->cdw_initiator_task_tag)); #endif if (bhsdo->bhsdo_target_transfer_tag == cdw->cdw_target_transfer_tag) break; } CFISCSI_SESSION_UNLOCK(cs); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "data transfer tag 0x%x, initiator task tag " "0x%x, not found; dropping connection", bhsdo->bhsdo_target_transfer_tag, bhsdo->bhsdo_initiator_task_tag); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } if (cdw->cdw_datasn != ntohl(bhsdo->bhsdo_datasn)) { CFISCSI_SESSION_WARN(cs, "received Data-Out PDU with " "DataSN %u, while expected %u; dropping connection", ntohl(bhsdo->bhsdo_datasn), cdw->cdw_datasn); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } cdw->cdw_datasn += request->ip_additional_pdus + 1; io = cdw->cdw_ctl_io; KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, ("CTL_FLAG_DATA_IN")); done = cfiscsi_handle_data_segment(request, cdw); if (done) { CFISCSI_SESSION_LOCK(cs); TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); done = (io->scsiio.ext_data_filled != cdw->cdw_r2t_end || io->scsiio.ext_data_filled == io->scsiio.kern_data_len); cfiscsi_data_wait_free(cs, cdw); io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; if (done) ctl_datamove_done(io, false); else cfiscsi_datamove_out(io); } icl_pdu_free(request); } static void cfiscsi_pdu_handle_logout_request(struct icl_pdu *request) { struct iscsi_bhs_logout_request *bhslr; struct iscsi_bhs_logout_response *bhslr2; struct icl_pdu *response; struct cfiscsi_session *cs; cs = PDU_SESSION(request); bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; switch (bhslr->bhslr_reason & 0x7f) { case BHSLR_REASON_CLOSE_SESSION: case BHSLR_REASON_CLOSE_CONNECTION: response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_DEBUG(cs, "failed to allocate memory"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; bhslr2->bhslr_flags = 0x80; bhslr2->bhslr_response = BHSLR_RESPONSE_CLOSED_SUCCESSFULLY; bhslr2->bhslr_initiator_task_tag = bhslr->bhslr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); cfiscsi_session_terminate(cs); break; case BHSLR_REASON_REMOVE_FOR_RECOVERY: response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; bhslr2->bhslr_flags = 0x80; bhslr2->bhslr_response = BHSLR_RESPONSE_RECOVERY_NOT_SUPPORTED; bhslr2->bhslr_initiator_task_tag = bhslr->bhslr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); break; default: CFISCSI_SESSION_WARN(cs, "invalid reason 0%x; dropping connection", bhslr->bhslr_reason); icl_pdu_free(request); cfiscsi_session_terminate(cs); break; } } static void cfiscsi_callout(void *context) { struct icl_pdu *cp; struct iscsi_bhs_nop_in *bhsni; struct cfiscsi_session *cs; cs = context; if (cs->cs_terminating) return; callout_schedule(&cs->cs_callout, 1 * hz); atomic_add_int(&cs->cs_timeout, 1); #ifdef ICL_KERNEL_PROXY if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { if (login_timeout > 0 && cs->cs_timeout > login_timeout) { CFISCSI_SESSION_WARN(cs, "login timed out after " "%d seconds; dropping connection", cs->cs_timeout); cfiscsi_session_terminate(cs); } return; } #endif if (ping_timeout <= 0) { /* * Pings are disabled. Don't send NOP-In in this case; * user might have disabled pings to work around problems * with certain initiators that can't properly handle * NOP-In, such as iPXE. Reset the timeout, to avoid * triggering reconnection, should the user decide to * reenable them. */ cs->cs_timeout = 0; return; } if (cs->cs_timeout >= ping_timeout) { CFISCSI_SESSION_WARN(cs, "no ping reply (NOP-Out) after %d seconds; " "dropping connection", ping_timeout); cfiscsi_session_terminate(cs); return; } /* * If the ping was reset less than one second ago - which means * that we've received some PDU during the last second - assume * the traffic flows correctly and don't bother sending a NOP-Out. * * (It's 2 - one for one second, and one for incrementing is_timeout * earlier in this routine.) */ if (cs->cs_timeout < 2) return; cp = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (cp == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory"); return; } bhsni = (struct iscsi_bhs_nop_in *)cp->ip_bhs; bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; bhsni->bhsni_flags = 0x80; bhsni->bhsni_initiator_task_tag = 0xffffffff; cfiscsi_pdu_queue(cp); } static struct cfiscsi_data_wait * cfiscsi_data_wait_new(struct cfiscsi_session *cs, union ctl_io *io, uint32_t initiator_task_tag, uint32_t *target_transfer_tagp) { struct cfiscsi_data_wait *cdw; int error; cdw = uma_zalloc(cfiscsi_data_wait_zone, M_NOWAIT | M_ZERO); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate %zd bytes", sizeof(*cdw)); return (NULL); } error = icl_conn_transfer_setup(cs->cs_conn, PRIV_REQUEST(io), io, target_transfer_tagp, &cdw->cdw_icl_prv); if (error != 0) { CFISCSI_SESSION_WARN(cs, "icl_conn_transfer_setup() failed with error %d", error); uma_zfree(cfiscsi_data_wait_zone, cdw); return (NULL); } cdw->cdw_ctl_io = io; cdw->cdw_target_transfer_tag = *target_transfer_tagp; cdw->cdw_initiator_task_tag = initiator_task_tag; return (cdw); } static void cfiscsi_data_wait_free(struct cfiscsi_session *cs, struct cfiscsi_data_wait *cdw) { icl_conn_transfer_done(cs->cs_conn, cdw->cdw_icl_prv); uma_zfree(cfiscsi_data_wait_zone, cdw); } static void cfiscsi_data_wait_abort(struct cfiscsi_session *cs, struct cfiscsi_data_wait *cdw, int status) { union ctl_io *cdw_io; /* * Set nonzero port status; this prevents backends from * assuming that the data transfer actually succeeded * and writing uninitialized data to disk. */ MPASS(status != 0); cdw_io = cdw->cdw_ctl_io; cdw_io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; cdw_io->scsiio.io_hdr.port_status = status; cfiscsi_data_wait_free(cs, cdw); ctl_datamove_done(cdw_io, false); } static void cfiscsi_session_terminate_tasks(struct cfiscsi_session *cs) { struct cfiscsi_data_wait *cdw; struct icl_pdu *ip; union ctl_io *io; int error, last, wait; if (cs->cs_target == NULL) return; /* No target yet, so nothing to do. */ ip = icl_pdu_new(cs->cs_conn, M_WAITOK); ip->ip_bhs->bhs_opcode = ISCSI_BHS_OPCODE_INTERNAL; io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = ip; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = 0; io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; wait = cs->cs_outstanding_ctl_pdus; refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_run(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_run() failed; error %d", error); refcount_release(&cs->cs_outstanding_ctl_pdus); ctl_free_io(io); icl_pdu_free(ip); } CFISCSI_SESSION_LOCK(cs); cs->cs_terminating_tasks = true; while ((cdw = TAILQ_FIRST(&cs->cs_waiting_for_data_out)) != NULL) { TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); cfiscsi_data_wait_abort(cs, cdw, 42); CFISCSI_SESSION_LOCK(cs); } CFISCSI_SESSION_UNLOCK(cs); /* * Wait for CTL to terminate all the tasks. */ if (wait > 0) CFISCSI_SESSION_WARN(cs, "waiting for CTL to terminate %d tasks", wait); for (;;) { refcount_acquire(&cs->cs_outstanding_ctl_pdus); last = refcount_release(&cs->cs_outstanding_ctl_pdus); if (last != 0) break; tsleep(__DEVOLATILE(void *, &cs->cs_outstanding_ctl_pdus), 0, "cfiscsi_terminate", hz / 100); } if (wait > 0) CFISCSI_SESSION_WARN(cs, "tasks terminated"); } static void cfiscsi_maintenance_thread(void *arg) { struct cfiscsi_session *cs; cs = arg; for (;;) { CFISCSI_SESSION_LOCK(cs); if (cs->cs_terminating == false || cs->cs_handoff_in_progress) cv_wait(&cs->cs_maintenance_cv, &cs->cs_lock); CFISCSI_SESSION_UNLOCK(cs); if (cs->cs_terminating && cs->cs_handoff_in_progress == false) { /* * We used to wait up to 30 seconds to deliver queued * PDUs to the initiator. We also tried hard to deliver * SCSI Responses for the aborted PDUs. We don't do * that anymore. We might need to revisit that. */ callout_drain(&cs->cs_callout); icl_conn_close(cs->cs_conn); /* * At this point ICL receive thread is no longer * running; no new tasks can be queued. */ cfiscsi_session_terminate_tasks(cs); cfiscsi_session_delete(cs); kthread_exit(); return; } CFISCSI_SESSION_DEBUG(cs, "nothing to do"); } } static void cfiscsi_session_terminate(struct cfiscsi_session *cs) { cs->cs_terminating = true; cv_signal(&cs->cs_maintenance_cv); #ifdef ICL_KERNEL_PROXY cv_signal(&cs->cs_login_cv); #endif } static int cfiscsi_session_register_initiator(struct cfiscsi_session *cs) { struct cfiscsi_target *ct; char *name; int i; KASSERT(cs->cs_ctl_initid == -1, ("already registered")); ct = cs->cs_target; name = strdup(cs->cs_initiator_id, M_CTL); i = ctl_add_initiator(&ct->ct_port, -1, 0, name); if (i < 0) { CFISCSI_SESSION_WARN(cs, "ctl_add_initiator failed with error %d", i); cs->cs_ctl_initid = -1; return (1); } cs->cs_ctl_initid = i; #if 0 CFISCSI_SESSION_DEBUG(cs, "added initiator id %d", i); #endif return (0); } static void cfiscsi_session_unregister_initiator(struct cfiscsi_session *cs) { int error; if (cs->cs_ctl_initid == -1) return; error = ctl_remove_initiator(&cs->cs_target->ct_port, cs->cs_ctl_initid); if (error != 0) { CFISCSI_SESSION_WARN(cs, "ctl_remove_initiator failed with error %d", error); } cs->cs_ctl_initid = -1; } static struct cfiscsi_session * cfiscsi_session_new(struct cfiscsi_softc *softc, const char *offload) { struct cfiscsi_session *cs; int error; cs = malloc(sizeof(*cs), M_CFISCSI, M_NOWAIT | M_ZERO); if (cs == NULL) { CFISCSI_WARN("malloc failed"); return (NULL); } cs->cs_ctl_initid = -1; refcount_init(&cs->cs_outstanding_ctl_pdus, 0); TAILQ_INIT(&cs->cs_waiting_for_data_out); mtx_init(&cs->cs_lock, "cfiscsi_lock", NULL, MTX_DEF); cv_init(&cs->cs_maintenance_cv, "cfiscsi_mt"); #ifdef ICL_KERNEL_PROXY cv_init(&cs->cs_login_cv, "cfiscsi_login"); #endif /* * The purpose of this is to avoid racing with session shutdown. * Otherwise we could have the maintenance thread call icl_conn_close() * before we call icl_conn_handoff(). */ cs->cs_handoff_in_progress = true; cs->cs_conn = icl_new_conn(offload, false, "cfiscsi", &cs->cs_lock); if (cs->cs_conn == NULL) { free(cs, M_CFISCSI); return (NULL); } cs->cs_conn->ic_receive = cfiscsi_receive_callback; cs->cs_conn->ic_error = cfiscsi_error_callback; cs->cs_conn->ic_prv0 = cs; error = kthread_add(cfiscsi_maintenance_thread, cs, NULL, NULL, 0, 0, "cfiscsimt"); if (error != 0) { CFISCSI_SESSION_WARN(cs, "kthread_add(9) failed with error %d", error); free(cs, M_CFISCSI); return (NULL); } mtx_lock(&softc->lock); cs->cs_id = ++softc->last_session_id; TAILQ_INSERT_TAIL(&softc->sessions, cs, cs_next); mtx_unlock(&softc->lock); /* * Start pinging the initiator. */ callout_init(&cs->cs_callout, 1); callout_reset(&cs->cs_callout, 1 * hz, cfiscsi_callout, cs); return (cs); } static void cfiscsi_session_delete(struct cfiscsi_session *cs) { struct cfiscsi_softc *softc; softc = &cfiscsi_softc; KASSERT(cs->cs_outstanding_ctl_pdus == 0, ("destroying session with outstanding CTL pdus")); KASSERT(TAILQ_EMPTY(&cs->cs_waiting_for_data_out), ("destroying session with non-empty queue")); mtx_lock(&softc->lock); TAILQ_REMOVE(&softc->sessions, cs, cs_next); mtx_unlock(&softc->lock); cfiscsi_session_unregister_initiator(cs); if (cs->cs_target != NULL) cfiscsi_target_release(cs->cs_target); icl_conn_close(cs->cs_conn); icl_conn_free(cs->cs_conn); free(cs, M_CFISCSI); cv_signal(&softc->sessions_cv); } static int cfiscsi_init(void) { struct cfiscsi_softc *softc; softc = &cfiscsi_softc; bzero(softc, sizeof(*softc)); mtx_init(&softc->lock, "cfiscsi", NULL, MTX_DEF); cv_init(&softc->sessions_cv, "cfiscsi_sessions"); #ifdef ICL_KERNEL_PROXY cv_init(&softc->accept_cv, "cfiscsi_accept"); #endif TAILQ_INIT(&softc->sessions); TAILQ_INIT(&softc->targets); cfiscsi_data_wait_zone = uma_zcreate("cfiscsi_data_wait", sizeof(struct cfiscsi_data_wait), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); return (0); } static int cfiscsi_shutdown(void) { struct cfiscsi_softc *softc = &cfiscsi_softc; if (!TAILQ_EMPTY(&softc->sessions) || !TAILQ_EMPTY(&softc->targets)) return (EBUSY); uma_zdestroy(cfiscsi_data_wait_zone); #ifdef ICL_KERNEL_PROXY cv_destroy(&softc->accept_cv); #endif cv_destroy(&softc->sessions_cv); mtx_destroy(&softc->lock); return (0); } #ifdef ICL_KERNEL_PROXY static void cfiscsi_accept(struct socket *so, struct sockaddr *sa, int portal_id) { struct cfiscsi_session *cs; cs = cfiscsi_session_new(&cfiscsi_softc, NULL); if (cs == NULL) { CFISCSI_WARN("failed to create session"); return; } icl_conn_handoff_sock(cs->cs_conn, so); cs->cs_initiator_sa = sa; cs->cs_portal_id = portal_id; cs->cs_handoff_in_progress = false; cs->cs_waiting_for_ctld = true; cv_signal(&cfiscsi_softc.accept_cv); CFISCSI_SESSION_LOCK(cs); /* * Wake up the maintenance thread if we got scheduled for termination * somewhere between cfiscsi_session_new() and icl_conn_handoff_sock(). */ if (cs->cs_terminating) cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); } #endif static void cfiscsi_online(void *arg) { struct cfiscsi_softc *softc; struct cfiscsi_target *ct; int online; ct = (struct cfiscsi_target *)arg; softc = ct->ct_softc; mtx_lock(&softc->lock); if (ct->ct_online) { mtx_unlock(&softc->lock); return; } ct->ct_online = 1; online = softc->online++; mtx_unlock(&softc->lock); if (online > 0) return; #ifdef ICL_KERNEL_PROXY if (softc->listener != NULL) icl_listen_free(softc->listener); softc->listener = icl_listen_new(cfiscsi_accept); #endif } static void cfiscsi_offline(void *arg) { struct cfiscsi_softc *softc; struct cfiscsi_target *ct; struct cfiscsi_session *cs; int error, online; ct = (struct cfiscsi_target *)arg; softc = ct->ct_softc; mtx_lock(&softc->lock); if (!ct->ct_online) { mtx_unlock(&softc->lock); return; } ct->ct_online = 0; online = --softc->online; do { TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == ct) cfiscsi_session_terminate(cs); } TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == ct) break; } if (cs != NULL) { error = cv_wait_sig(&softc->sessions_cv, &softc->lock); if (error != 0) { CFISCSI_SESSION_DEBUG(cs, "cv_wait failed with error %d\n", error); break; } } } while (cs != NULL && ct->ct_online == 0); mtx_unlock(&softc->lock); if (online > 0) return; #ifdef ICL_KERNEL_PROXY icl_listen_free(softc->listener); softc->listener = NULL; #endif } static int cfiscsi_info(void *arg, struct sbuf *sb) { struct cfiscsi_target *ct = (struct cfiscsi_target *)arg; int retval; retval = sbuf_printf(sb, "\t%d\n", ct->ct_state); return (retval); } static void cfiscsi_ioctl_handoff(struct ctl_iscsi *ci) { struct cfiscsi_softc *softc; struct cfiscsi_session *cs, *cs2; struct cfiscsi_target *ct; struct ctl_iscsi_handoff_params *cihp; int error; cihp = (struct ctl_iscsi_handoff_params *)&(ci->data); softc = &cfiscsi_softc; CFISCSI_DEBUG("new connection from %s (%s) to %s", cihp->initiator_name, cihp->initiator_addr, cihp->target_name); ct = cfiscsi_target_find(softc, cihp->target_name, cihp->portal_group_tag); if (ct == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: target not found", __func__); return; } #ifdef ICL_KERNEL_PROXY if (cihp->socket > 0 && cihp->connection_id > 0) { snprintf(ci->error_str, sizeof(ci->error_str), "both socket and connection_id set"); ci->status = CTL_ISCSI_ERROR; cfiscsi_target_release(ct); return; } if (cihp->socket == 0) { mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cihp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; cfiscsi_target_release(ct); return; } mtx_unlock(&cfiscsi_softc.lock); } else { #endif cs = cfiscsi_session_new(softc, cihp->offload); if (cs == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: cfiscsi_session_new failed", __func__); cfiscsi_target_release(ct); return; } #ifdef ICL_KERNEL_PROXY } #endif /* * First PDU of Full Feature phase has the same CmdSN as the last * PDU from the Login Phase received from the initiator. Thus, * the -1 below. */ cs->cs_cmdsn = cihp->cmdsn; cs->cs_statsn = cihp->statsn; cs->cs_conn->ic_max_recv_data_segment_length = cihp->max_recv_data_segment_length; cs->cs_conn->ic_max_send_data_segment_length = cihp->max_send_data_segment_length; cs->cs_max_burst_length = cihp->max_burst_length; cs->cs_first_burst_length = cihp->first_burst_length; cs->cs_immediate_data = !!cihp->immediate_data; if (cihp->header_digest == CTL_ISCSI_DIGEST_CRC32C) cs->cs_conn->ic_header_crc32c = true; if (cihp->data_digest == CTL_ISCSI_DIGEST_CRC32C) cs->cs_conn->ic_data_crc32c = true; strlcpy(cs->cs_initiator_name, cihp->initiator_name, sizeof(cs->cs_initiator_name)); strlcpy(cs->cs_initiator_addr, cihp->initiator_addr, sizeof(cs->cs_initiator_addr)); strlcpy(cs->cs_initiator_alias, cihp->initiator_alias, sizeof(cs->cs_initiator_alias)); memcpy(cs->cs_initiator_isid, cihp->initiator_isid, sizeof(cs->cs_initiator_isid)); snprintf(cs->cs_initiator_id, sizeof(cs->cs_initiator_id), "%s,i,0x%02x%02x%02x%02x%02x%02x", cs->cs_initiator_name, cihp->initiator_isid[0], cihp->initiator_isid[1], cihp->initiator_isid[2], cihp->initiator_isid[3], cihp->initiator_isid[4], cihp->initiator_isid[5]); mtx_lock(&softc->lock); if (ct->ct_online == 0) { mtx_unlock(&softc->lock); CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); cfiscsi_target_release(ct); ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: port offline", __func__); return; } cs->cs_target = ct; mtx_unlock(&softc->lock); restart: if (!cs->cs_terminating) { mtx_lock(&softc->lock); TAILQ_FOREACH(cs2, &softc->sessions, cs_next) { if (cs2 != cs && cs2->cs_tasks_aborted == false && cs->cs_target == cs2->cs_target && strcmp(cs->cs_initiator_id, cs2->cs_initiator_id) == 0) { if (strcmp(cs->cs_initiator_addr, cs2->cs_initiator_addr) != 0) { CFISCSI_SESSION_WARN(cs2, "session reinstatement from " "different address %s", cs->cs_initiator_addr); } else { CFISCSI_SESSION_DEBUG(cs2, "session reinstatement"); } cfiscsi_session_terminate(cs2); mtx_unlock(&softc->lock); pause("cfiscsi_reinstate", 1); goto restart; } } mtx_unlock(&softc->lock); } /* * Register initiator with CTL. */ cfiscsi_session_register_initiator(cs); #ifdef ICL_KERNEL_PROXY if (cihp->socket > 0) { #endif error = icl_conn_handoff(cs->cs_conn, cihp->socket); if (error != 0) { CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: icl_conn_handoff failed with error %d", __func__, error); return; } #ifdef ICL_KERNEL_PROXY } #endif #ifdef ICL_KERNEL_PROXY cs->cs_login_phase = false; /* * First PDU of the Full Feature phase has likely already arrived. * We have to pick it up and execute properly. */ if (cs->cs_login_pdu != NULL) { CFISCSI_SESSION_DEBUG(cs, "picking up first PDU"); cfiscsi_pdu_handle(cs->cs_login_pdu); cs->cs_login_pdu = NULL; } #endif CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; /* * Wake up the maintenance thread if we got scheduled for termination. */ if (cs->cs_terminating) cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_list(struct ctl_iscsi *ci) { struct ctl_iscsi_list_params *cilp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; struct sbuf *sb; int error; cilp = (struct ctl_iscsi_list_params *)&(ci->data); softc = &cfiscsi_softc; sb = sbuf_new(NULL, NULL, cilp->alloc_len, SBUF_FIXEDLEN); if (sb == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Unable to allocate %d bytes for iSCSI session list", cilp->alloc_len); return; } sbuf_printf(sb, "\n"); mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == NULL) continue; error = sbuf_printf(sb, "" "%s" "%s" "%s" "%s" "%s" "%u" "%s" "%s" "%d" "%d" "%d" "%d" "%d" "%d" "%s" "\n", cs->cs_id, cs->cs_initiator_name, cs->cs_initiator_addr, cs->cs_initiator_alias, cs->cs_target->ct_name, cs->cs_target->ct_alias, cs->cs_target->ct_tag, cs->cs_conn->ic_header_crc32c ? "CRC32C" : "None", cs->cs_conn->ic_data_crc32c ? "CRC32C" : "None", cs->cs_conn->ic_max_recv_data_segment_length, cs->cs_conn->ic_max_send_data_segment_length, cs->cs_max_burst_length, cs->cs_first_burst_length, cs->cs_immediate_data, cs->cs_conn->ic_iser, cs->cs_conn->ic_offload); if (error != 0) break; } mtx_unlock(&softc->lock); error = sbuf_printf(sb, "\n"); if (error != 0) { sbuf_delete(sb); ci->status = CTL_ISCSI_LIST_NEED_MORE_SPACE; snprintf(ci->error_str, sizeof(ci->error_str), "Out of space, %d bytes is too small", cilp->alloc_len); return; } sbuf_finish(sb); error = copyout(sbuf_data(sb), cilp->conn_xml, sbuf_len(sb) + 1); if (error != 0) { sbuf_delete(sb); snprintf(ci->error_str, sizeof(ci->error_str), "copyout failed with error %d", error); ci->status = CTL_ISCSI_ERROR; return; } cilp->fill_len = sbuf_len(sb) + 1; ci->status = CTL_ISCSI_OK; sbuf_delete(sb); } static void cfiscsi_ioctl_logout(struct ctl_iscsi *ci) { struct icl_pdu *response; struct iscsi_bhs_asynchronous_message *bhsam; struct ctl_iscsi_logout_params *cilp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; int found = 0; cilp = (struct ctl_iscsi_logout_params *)&(ci->data); softc = &cfiscsi_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cilp->all == 0 && cs->cs_id != cilp->connection_id && strcmp(cs->cs_initiator_name, cilp->initiator_name) != 0 && strcmp(cs->cs_initiator_addr, cilp->initiator_addr) != 0) continue; response = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (response == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Unable to allocate memory"); mtx_unlock(&softc->lock); return; } bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; bhsam->bhsam_flags = 0x80; bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_REQUESTS_LOGOUT; bhsam->bhsam_parameter3 = htons(10); cfiscsi_pdu_queue(response); found++; } mtx_unlock(&softc->lock); if (found == 0) { ci->status = CTL_ISCSI_SESSION_NOT_FOUND; snprintf(ci->error_str, sizeof(ci->error_str), "No matching connections found"); return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_terminate(struct ctl_iscsi *ci) { struct icl_pdu *response; struct iscsi_bhs_asynchronous_message *bhsam; struct ctl_iscsi_terminate_params *citp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; int found = 0; citp = (struct ctl_iscsi_terminate_params *)&(ci->data); softc = &cfiscsi_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (citp->all == 0 && cs->cs_id != citp->connection_id && strcmp(cs->cs_initiator_name, citp->initiator_name) != 0 && strcmp(cs->cs_initiator_addr, citp->initiator_addr) != 0) continue; response = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (response == NULL) { /* * Oh well. Just terminate the connection. */ } else { bhsam = (struct iscsi_bhs_asynchronous_message *) response->ip_bhs; bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; bhsam->bhsam_flags = 0x80; bhsam->bhsam_0xffffffff = 0xffffffff; bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_TERMINATES_SESSION; cfiscsi_pdu_queue(response); } cfiscsi_session_terminate(cs); found++; } mtx_unlock(&softc->lock); if (found == 0) { ci->status = CTL_ISCSI_SESSION_NOT_FOUND; snprintf(ci->error_str, sizeof(ci->error_str), "No matching connections found"); return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_limits(struct ctl_iscsi *ci) { struct ctl_iscsi_limits_params *cilp; struct icl_drv_limits idl; int error; cilp = (struct ctl_iscsi_limits_params *)&(ci->data); - error = icl_limits(cilp->offload, false, &idl); + error = icl_limits(cilp->offload, false, cilp->socket, &idl); if (error != 0) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: icl_limits failed with error %d", __func__, error); return; } cilp->max_recv_data_segment_length = idl.idl_max_recv_data_segment_length; cilp->max_send_data_segment_length = idl.idl_max_send_data_segment_length; cilp->max_burst_length = idl.idl_max_burst_length; cilp->first_burst_length = idl.idl_first_burst_length; ci->status = CTL_ISCSI_OK; } #ifdef ICL_KERNEL_PROXY static void cfiscsi_ioctl_listen(struct ctl_iscsi *ci) { struct ctl_iscsi_listen_params *cilp; struct sockaddr *sa; int error; cilp = (struct ctl_iscsi_listen_params *)&(ci->data); if (cfiscsi_softc.listener == NULL) { CFISCSI_DEBUG("no listener"); snprintf(ci->error_str, sizeof(ci->error_str), "no listener"); ci->status = CTL_ISCSI_ERROR; return; } error = getsockaddr(&sa, (void *)cilp->addr, cilp->addrlen); if (error != 0) { CFISCSI_DEBUG("getsockaddr, error %d", error); snprintf(ci->error_str, sizeof(ci->error_str), "getsockaddr failed"); ci->status = CTL_ISCSI_ERROR; return; } error = icl_listen_add(cfiscsi_softc.listener, cilp->iser, cilp->domain, cilp->socktype, cilp->protocol, sa, cilp->portal_id); if (error != 0) { free(sa, M_SONAME); CFISCSI_DEBUG("icl_listen_add, error %d", error); snprintf(ci->error_str, sizeof(ci->error_str), "icl_listen_add failed, error %d", error); ci->status = CTL_ISCSI_ERROR; return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_accept(struct ctl_iscsi *ci) { struct ctl_iscsi_accept_params *ciap; struct cfiscsi_session *cs; int error; ciap = (struct ctl_iscsi_accept_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); for (;;) { TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_waiting_for_ctld) break; } if (cs != NULL) break; error = cv_wait_sig(&cfiscsi_softc.accept_cv, &cfiscsi_softc.lock); if (error != 0) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "interrupted"); ci->status = CTL_ISCSI_ERROR; return; } } mtx_unlock(&cfiscsi_softc.lock); cs->cs_waiting_for_ctld = false; cs->cs_login_phase = true; ciap->connection_id = cs->cs_id; ciap->portal_id = cs->cs_portal_id; ciap->initiator_addrlen = cs->cs_initiator_sa->sa_len; error = copyout(cs->cs_initiator_sa, ciap->initiator_addr, cs->cs_initiator_sa->sa_len); if (error != 0) { snprintf(ci->error_str, sizeof(ci->error_str), "copyout failed with error %d", error); ci->status = CTL_ISCSI_ERROR; return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_send(struct ctl_iscsi *ci) { struct ctl_iscsi_send_params *cisp; struct cfiscsi_session *cs; struct icl_pdu *ip; size_t datalen; void *data; int error; cisp = (struct ctl_iscsi_send_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cisp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; return; } mtx_unlock(&cfiscsi_softc.lock); #if 0 if (cs->cs_login_phase == false) return (EBUSY); #endif if (cs->cs_terminating) { snprintf(ci->error_str, sizeof(ci->error_str), "connection is terminating"); ci->status = CTL_ISCSI_ERROR; return; } datalen = cisp->data_segment_len; /* * XXX */ //if (datalen > CFISCSI_MAX_DATA_SEGMENT_LENGTH) { if (datalen > 65535) { snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); ci->status = CTL_ISCSI_ERROR; return; } if (datalen > 0) { data = malloc(datalen, M_CFISCSI, M_WAITOK); error = copyin(cisp->data_segment, data, datalen); if (error != 0) { free(data, M_CFISCSI); snprintf(ci->error_str, sizeof(ci->error_str), "copyin error %d", error); ci->status = CTL_ISCSI_ERROR; return; } } ip = icl_pdu_new(cs->cs_conn, M_WAITOK); memcpy(ip->ip_bhs, cisp->bhs, sizeof(*ip->ip_bhs)); if (datalen > 0) { icl_pdu_append_data(ip, data, datalen, M_WAITOK); free(data, M_CFISCSI); } CFISCSI_SESSION_LOCK(cs); icl_pdu_queue(ip); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_receive(struct ctl_iscsi *ci) { struct ctl_iscsi_receive_params *cirp; struct cfiscsi_session *cs; struct icl_pdu *ip; void *data; int error; cirp = (struct ctl_iscsi_receive_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cirp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; return; } mtx_unlock(&cfiscsi_softc.lock); #if 0 if (is->is_login_phase == false) return (EBUSY); #endif CFISCSI_SESSION_LOCK(cs); while (cs->cs_login_pdu == NULL && cs->cs_terminating == false) { error = cv_wait_sig(&cs->cs_login_cv, &cs->cs_lock); if (error != 0) { CFISCSI_SESSION_UNLOCK(cs); snprintf(ci->error_str, sizeof(ci->error_str), "interrupted by signal"); ci->status = CTL_ISCSI_ERROR; return; } } if (cs->cs_terminating) { CFISCSI_SESSION_UNLOCK(cs); snprintf(ci->error_str, sizeof(ci->error_str), "connection terminating"); ci->status = CTL_ISCSI_ERROR; return; } ip = cs->cs_login_pdu; cs->cs_login_pdu = NULL; CFISCSI_SESSION_UNLOCK(cs); if (ip->ip_data_len > cirp->data_segment_len) { icl_pdu_free(ip); snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); ci->status = CTL_ISCSI_ERROR; return; } copyout(ip->ip_bhs, cirp->bhs, sizeof(*ip->ip_bhs)); if (ip->ip_data_len > 0) { data = malloc(ip->ip_data_len, M_CFISCSI, M_WAITOK); icl_pdu_get_data(ip, 0, data, ip->ip_data_len); copyout(data, cirp->data_segment, ip->ip_data_len); free(data, M_CFISCSI); } icl_pdu_free(ip); ci->status = CTL_ISCSI_OK; } #endif /* !ICL_KERNEL_PROXY */ static void cfiscsi_ioctl_port_create(struct ctl_req *req) { struct cfiscsi_target *ct; struct ctl_port *port; const char *target, *alias, *val; struct scsi_vpd_id_descriptor *desc; int retval, len, idlen; uint16_t tag; target = dnvlist_get_string(req->args_nvl, "cfiscsi_target", NULL); alias = dnvlist_get_string(req->args_nvl, "cfiscsi_target_alias", NULL); val = dnvlist_get_string(req->args_nvl, "cfiscsi_portal_group_tag", NULL); if (target == NULL || val == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Missing required argument"); return; } tag = strtoul(val, NULL, 0); ct = cfiscsi_target_find_or_create(&cfiscsi_softc, target, alias, tag); if (ct == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "failed to create target \"%s\"", target); return; } if (ct->ct_state == CFISCSI_TARGET_STATE_ACTIVE) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "target \"%s\" for portal group tag %u already exists", target, tag); cfiscsi_target_release(ct); return; } port = &ct->ct_port; // WAT if (ct->ct_state == CFISCSI_TARGET_STATE_DYING) goto done; port->frontend = &cfiscsi_frontend; port->port_type = CTL_PORT_ISCSI; /* XXX KDM what should the real number be here? */ port->num_requested_ctl_io = 4096; port->port_name = "iscsi"; port->physical_port = (int)tag; port->virtual_port = ct->ct_target_id; port->port_online = cfiscsi_online; port->port_offline = cfiscsi_offline; port->port_info = cfiscsi_info; port->onoff_arg = ct; port->fe_datamove = cfiscsi_datamove; port->fe_done = cfiscsi_done; port->targ_port = -1; port->options = nvlist_clone(req->args_nvl); /* Generate Port ID. */ idlen = strlen(target) + strlen(",t,0x0001") + 1; idlen = roundup2(idlen, 4); len = sizeof(struct scsi_vpd_device_id) + idlen; port->port_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); port->port_devid->len = len; desc = (struct scsi_vpd_id_descriptor *)port->port_devid->data; desc->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen; snprintf(desc->identifier, idlen, "%s,t,0x%4.4x", target, tag); /* Generate Target ID. */ idlen = strlen(target) + 1; idlen = roundup2(idlen, 4); len = sizeof(struct scsi_vpd_device_id) + idlen; port->target_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); port->target_devid->len = len; desc = (struct scsi_vpd_id_descriptor *)port->target_devid->data; desc->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_TARGET | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen; strlcpy(desc->identifier, target, idlen); retval = ctl_port_register(port); if (retval != 0) { free(port->port_devid, M_CFISCSI); free(port->target_devid, M_CFISCSI); cfiscsi_target_release(ct); req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "ctl_port_register() failed with error %d", retval); return; } done: ct->ct_state = CFISCSI_TARGET_STATE_ACTIVE; req->status = CTL_LUN_OK; req->result_nvl = nvlist_create(0); nvlist_add_number(req->result_nvl, "port_id", port->targ_port); } static void cfiscsi_ioctl_port_remove(struct ctl_req *req) { struct cfiscsi_target *ct; const char *target, *val; uint16_t tag; target = dnvlist_get_string(req->args_nvl, "cfiscsi_target", NULL); val = dnvlist_get_string(req->args_nvl, "cfiscsi_portal_group_tag", NULL); if (target == NULL || val == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Missing required argument"); return; } tag = strtoul(val, NULL, 0); ct = cfiscsi_target_find(&cfiscsi_softc, target, tag); if (ct == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "can't find target \"%s\"", target); return; } ct->ct_state = CFISCSI_TARGET_STATE_DYING; ctl_port_offline(&ct->ct_port); cfiscsi_target_release(ct); cfiscsi_target_release(ct); req->status = CTL_LUN_OK; } static int cfiscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_iscsi *ci; struct ctl_req *req; if (cmd == CTL_PORT_REQ) { req = (struct ctl_req *)addr; switch (req->reqtype) { case CTL_REQ_CREATE: cfiscsi_ioctl_port_create(req); break; case CTL_REQ_REMOVE: cfiscsi_ioctl_port_remove(req); break; default: req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Unsupported request type %d", req->reqtype); } return (0); } if (cmd != CTL_ISCSI) return (ENOTTY); ci = (struct ctl_iscsi *)addr; switch (ci->type) { case CTL_ISCSI_HANDOFF: cfiscsi_ioctl_handoff(ci); break; case CTL_ISCSI_LIST: cfiscsi_ioctl_list(ci); break; case CTL_ISCSI_LOGOUT: cfiscsi_ioctl_logout(ci); break; case CTL_ISCSI_TERMINATE: cfiscsi_ioctl_terminate(ci); break; case CTL_ISCSI_LIMITS: cfiscsi_ioctl_limits(ci); break; #ifdef ICL_KERNEL_PROXY case CTL_ISCSI_LISTEN: cfiscsi_ioctl_listen(ci); break; case CTL_ISCSI_ACCEPT: cfiscsi_ioctl_accept(ci); break; case CTL_ISCSI_SEND: cfiscsi_ioctl_send(ci); break; case CTL_ISCSI_RECEIVE: cfiscsi_ioctl_receive(ci); break; #else case CTL_ISCSI_LISTEN: case CTL_ISCSI_ACCEPT: case CTL_ISCSI_SEND: case CTL_ISCSI_RECEIVE: ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: CTL compiled without ICL_KERNEL_PROXY", __func__); break; #endif /* !ICL_KERNEL_PROXY */ default: ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: invalid iSCSI request type %d", __func__, ci->type); break; } return (0); } static void cfiscsi_target_hold(struct cfiscsi_target *ct) { refcount_acquire(&ct->ct_refcount); } static void cfiscsi_target_release(struct cfiscsi_target *ct) { struct cfiscsi_softc *softc; softc = ct->ct_softc; mtx_lock(&softc->lock); if (refcount_release(&ct->ct_refcount)) { TAILQ_REMOVE(&softc->targets, ct, ct_next); mtx_unlock(&softc->lock); if (ct->ct_state != CFISCSI_TARGET_STATE_INVALID) { ct->ct_state = CFISCSI_TARGET_STATE_INVALID; if (ctl_port_deregister(&ct->ct_port) != 0) printf("%s: ctl_port_deregister() failed\n", __func__); } free(ct, M_CFISCSI); return; } mtx_unlock(&softc->lock); } static struct cfiscsi_target * cfiscsi_target_find(struct cfiscsi_softc *softc, const char *name, uint16_t tag) { struct cfiscsi_target *ct; mtx_lock(&softc->lock); TAILQ_FOREACH(ct, &softc->targets, ct_next) { if (ct->ct_tag != tag || strcmp(name, ct->ct_name) != 0 || ct->ct_state != CFISCSI_TARGET_STATE_ACTIVE) continue; cfiscsi_target_hold(ct); mtx_unlock(&softc->lock); return (ct); } mtx_unlock(&softc->lock); return (NULL); } static struct cfiscsi_target * cfiscsi_target_find_or_create(struct cfiscsi_softc *softc, const char *name, const char *alias, uint16_t tag) { struct cfiscsi_target *ct, *newct; if (name[0] == '\0' || strlen(name) >= CTL_ISCSI_NAME_LEN) return (NULL); newct = malloc(sizeof(*newct), M_CFISCSI, M_WAITOK | M_ZERO); mtx_lock(&softc->lock); TAILQ_FOREACH(ct, &softc->targets, ct_next) { if (ct->ct_tag != tag || strcmp(name, ct->ct_name) != 0 || ct->ct_state == CFISCSI_TARGET_STATE_INVALID) continue; cfiscsi_target_hold(ct); mtx_unlock(&softc->lock); free(newct, M_CFISCSI); return (ct); } strlcpy(newct->ct_name, name, sizeof(newct->ct_name)); if (alias != NULL) strlcpy(newct->ct_alias, alias, sizeof(newct->ct_alias)); newct->ct_tag = tag; refcount_init(&newct->ct_refcount, 1); newct->ct_softc = softc; if (TAILQ_EMPTY(&softc->targets)) softc->last_target_id = 0; newct->ct_target_id = ++softc->last_target_id; TAILQ_INSERT_TAIL(&softc->targets, newct, ct_next); mtx_unlock(&softc->lock); return (newct); } static void cfiscsi_pdu_done(struct icl_pdu *ip, int error) { if (error != 0) ; // XXX: Do something on error? ((ctl_ref)ip->ip_prv0)(ip->ip_prv1, -1); } static void cfiscsi_datamove_in(union ctl_io *io) { struct cfiscsi_session *cs; struct icl_pdu *request, *response; const struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_data_in *bhsdi; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; size_t len, expected_len, sg_len, buffer_offset; size_t max_send_data_segment_length; const char *sg_addr; icl_pdu_cb cb; int ctl_sg_count, error, i; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND")); if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } /* * This is the offset within the current SCSI command; for the first * call to cfiscsi_datamove() it will be 0, and for subsequent ones * it will be the sum of lengths of previous ones. */ buffer_offset = io->scsiio.kern_rel_offset; /* * This is the transfer length expected by the initiator. It can be * different from the amount of data from the SCSI point of view. */ expected_len = ntohl(bhssc->bhssc_expected_data_transfer_length); /* * If the transfer is outside of expected length -- we are done. */ if (buffer_offset >= expected_len) { #if 0 CFISCSI_SESSION_DEBUG(cs, "buffer_offset = %zd, " "already sent the expected len", buffer_offset); #endif ctl_datamove_done(io, true); return; } if (io->scsiio.kern_data_ref != NULL) cb = cfiscsi_pdu_done; else cb = NULL; i = 0; sg_addr = NULL; sg_len = 0; response = NULL; bhsdi = NULL; if (cs->cs_conn->ic_hw_isomax != 0) max_send_data_segment_length = cs->cs_conn->ic_hw_isomax; else max_send_data_segment_length = cs->cs_conn->ic_max_send_data_segment_length; for (;;) { if (response == NULL) { response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; bhsdi->bhsdi_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_IN; bhsdi->bhsdi_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhsdi->bhsdi_target_transfer_tag = 0xffffffff; bhsdi->bhsdi_datasn = htonl(PRIV_EXPDATASN(io)); bhsdi->bhsdi_buffer_offset = htonl(buffer_offset); } KASSERT(i < ctl_sg_count, ("i >= ctl_sg_count")); if (sg_len == 0) { sg_addr = ctl_sglist[i].addr; sg_len = ctl_sglist[i].len; KASSERT(sg_len > 0, ("sg_len <= 0")); } len = sg_len; /* * Truncate to maximum data segment length. */ KASSERT(response->ip_data_len < max_send_data_segment_length, ("ip_data_len %zd >= max_send_data_segment_length %zd", response->ip_data_len, max_send_data_segment_length)); if (response->ip_data_len + len > max_send_data_segment_length) { len = max_send_data_segment_length - response->ip_data_len; KASSERT(len <= sg_len, ("len %zd > sg_len %zd", len, sg_len)); } /* * Truncate to expected data transfer length. */ KASSERT(buffer_offset + response->ip_data_len < expected_len, ("buffer_offset %zd + ip_data_len %zd >= expected_len %zd", buffer_offset, response->ip_data_len, expected_len)); if (buffer_offset + response->ip_data_len + len > expected_len) { CFISCSI_SESSION_DEBUG(cs, "truncating from %zd " "to expected data transfer length %zd", buffer_offset + response->ip_data_len + len, expected_len); len = expected_len - (buffer_offset + response->ip_data_len); KASSERT(len <= sg_len, ("len %zd > sg_len %zd", len, sg_len)); } error = icl_pdu_append_data(response, sg_addr, len, M_NOWAIT | (cb ? ICL_NOCOPY : 0)); if (error != 0) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); icl_pdu_free(response); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } sg_addr += len; sg_len -= len; io->scsiio.kern_data_resid -= len; KASSERT(buffer_offset + response->ip_data_len <= expected_len, ("buffer_offset %zd + ip_data_len %zd > expected_len %zd", buffer_offset, response->ip_data_len, expected_len)); if (buffer_offset + response->ip_data_len == expected_len) { /* * Already have the amount of data the initiator wanted. */ break; } if (sg_len == 0) { /* * End of scatter-gather segment; * proceed to the next one... */ if (i == ctl_sg_count - 1) { /* * ... unless this was the last one. */ break; } i++; } if (response->ip_data_len == max_send_data_segment_length) { /* * Can't stuff more data into the current PDU; * queue it. Note that's not enough to check * for kern_data_resid == 0 instead; there * may be several Data-In PDUs for the final * call to cfiscsi_datamove(), and we want * to set the F flag only on the last of them. */ buffer_offset += response->ip_data_len; if (buffer_offset == io->scsiio.kern_total_len || buffer_offset == expected_len) { buffer_offset -= response->ip_data_len; break; } PRIV_EXPDATASN(io) += howmany(response->ip_data_len, cs->cs_conn->ic_max_send_data_segment_length); if (cb != NULL) { response->ip_prv0 = io->scsiio.kern_data_ref; response->ip_prv1 = io->scsiio.kern_data_arg; io->scsiio.kern_data_ref(io->scsiio.kern_data_arg, 1); } cfiscsi_pdu_queue_cb(response, cb); response = NULL; bhsdi = NULL; } } if (response != NULL) { buffer_offset += response->ip_data_len; if (buffer_offset == io->scsiio.kern_total_len || buffer_offset == expected_len) { bhsdi->bhsdi_flags |= BHSDI_FLAGS_F; if (io->io_hdr.status == CTL_SUCCESS) { bhsdi->bhsdi_flags |= BHSDI_FLAGS_S; if (io->scsiio.kern_total_len < ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhsdi->bhsdi_flags |= BHSSR_FLAGS_RESIDUAL_UNDERFLOW; bhsdi->bhsdi_residual_count = htonl(ntohl(bhssc->bhssc_expected_data_transfer_length) - io->scsiio.kern_total_len); } else if (io->scsiio.kern_total_len > ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhsdi->bhsdi_flags |= BHSSR_FLAGS_RESIDUAL_OVERFLOW; bhsdi->bhsdi_residual_count = htonl(io->scsiio.kern_total_len - ntohl(bhssc->bhssc_expected_data_transfer_length)); } bhsdi->bhsdi_status = io->scsiio.scsi_status; io->io_hdr.flags |= CTL_FLAG_STATUS_SENT; } } KASSERT(response->ip_data_len > 0, ("sending empty Data-In")); PRIV_EXPDATASN(io) += howmany(response->ip_data_len, cs->cs_conn->ic_max_send_data_segment_length); if (cb != NULL) { response->ip_prv0 = io->scsiio.kern_data_ref; response->ip_prv1 = io->scsiio.kern_data_arg; io->scsiio.kern_data_ref(io->scsiio.kern_data_arg, 1); } cfiscsi_pdu_queue_cb(response, cb); } ctl_datamove_done(io, true); } static void cfiscsi_datamove_out(union ctl_io *io) { struct cfiscsi_session *cs; struct icl_pdu *request, *response; const struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_r2t *bhsr2t; struct cfiscsi_data_wait *cdw; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; uint32_t expected_len, datamove_len, r2t_off, r2t_len; uint32_t target_transfer_tag; bool done; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND")); /* * Complete write underflow. Not a single byte to read. Return. */ expected_len = ntohl(bhssc->bhssc_expected_data_transfer_length); if (io->scsiio.kern_rel_offset >= expected_len) { ctl_datamove_done(io, true); return; } datamove_len = MIN(io->scsiio.kern_data_len, expected_len - io->scsiio.kern_rel_offset); target_transfer_tag = atomic_fetchadd_32(&cs->cs_target_transfer_tag, 1); if (target_transfer_tag == 0xffffffff) { target_transfer_tag = atomic_fetchadd_32(&cs->cs_target_transfer_tag, 1); } cdw = cfiscsi_data_wait_new(cs, io, bhssc->bhssc_initiator_task_tag, &target_transfer_tag); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } #if 0 CFISCSI_SESSION_DEBUG(cs, "expecting Data-Out with initiator " "task tag 0x%x, target transfer tag 0x%x", bhssc->bhssc_initiator_task_tag, target_transfer_tag); #endif cdw->cdw_ctl_io = io; cdw->cdw_target_transfer_tag = target_transfer_tag; cdw->cdw_initiator_task_tag = bhssc->bhssc_initiator_task_tag; cdw->cdw_r2t_end = datamove_len; cdw->cdw_datasn = 0; /* Set initial data pointer for the CDW respecting ext_data_filled. */ if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = datamove_len; } cdw->cdw_sg_index = 0; cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; r2t_off = io->scsiio.ext_data_filled; while (r2t_off > 0) { if (r2t_off >= cdw->cdw_sg_len) { r2t_off -= cdw->cdw_sg_len; cdw->cdw_sg_index++; cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; continue; } cdw->cdw_sg_addr += r2t_off; cdw->cdw_sg_len -= r2t_off; r2t_off = 0; } if (cs->cs_immediate_data && io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled < icl_pdu_data_segment_length(request)) { done = cfiscsi_handle_data_segment(request, cdw); if (done) { cfiscsi_data_wait_free(cs, cdw); ctl_datamove_done(io, true); return; } } r2t_off = io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled; r2t_len = MIN(datamove_len - io->scsiio.ext_data_filled, cs->cs_max_burst_length); cdw->cdw_r2t_end = io->scsiio.ext_data_filled + r2t_len; CFISCSI_SESSION_LOCK(cs); if (cs->cs_terminating_tasks) { CFISCSI_SESSION_UNLOCK(cs); KASSERT((io->io_hdr.flags & CTL_FLAG_ABORT) != 0, ("%s: I/O request %p on termating session %p not aborted", __func__, io, cs)); CFISCSI_SESSION_WARN(cs, "aborting data_wait for aborted I/O"); cfiscsi_data_wait_abort(cs, cdw, 44); return; } TAILQ_INSERT_TAIL(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); /* * XXX: We should limit the number of outstanding R2T PDUs * per task to MaxOutstandingR2T. */ response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); ctl_datamove_done(io, true); cfiscsi_session_terminate(cs); return; } io->io_hdr.flags |= CTL_FLAG_DMA_INPROG; bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; bhsr2t->bhsr2t_opcode = ISCSI_BHS_OPCODE_R2T; bhsr2t->bhsr2t_flags = 0x80; bhsr2t->bhsr2t_lun = bhssc->bhssc_lun; bhsr2t->bhsr2t_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhsr2t->bhsr2t_target_transfer_tag = target_transfer_tag; /* * XXX: Here we assume that cfiscsi_datamove() won't ever * be running concurrently on several CPUs for a given * command. */ bhsr2t->bhsr2t_r2tsn = htonl(PRIV_R2TSN(io)++); /* * This is the offset within the current SCSI command; * i.e. for the first call of datamove(), it will be 0, * and for subsequent ones it will be the sum of lengths * of previous ones. * * The ext_data_filled is to account for unsolicited * (immediate) data that might have already arrived. */ bhsr2t->bhsr2t_buffer_offset = htonl(r2t_off); /* * This is the total length (sum of S/G lengths) this call * to cfiscsi_datamove() is supposed to handle, limited by * MaxBurstLength. */ bhsr2t->bhsr2t_desired_data_transfer_length = htonl(r2t_len); cfiscsi_pdu_queue(response); } static void cfiscsi_datamove(union ctl_io *io) { if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) cfiscsi_datamove_in(io); else { /* We hadn't received anything during this datamove yet. */ io->scsiio.ext_data_filled = 0; cfiscsi_datamove_out(io); } } static void cfiscsi_scsi_command_done(union ctl_io *io) { struct icl_pdu *request, *response; struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_scsi_response *bhssr; #ifdef DIAGNOSTIC struct cfiscsi_data_wait *cdw; struct cfiscsi_session *cs; #endif uint16_t sense_length; request = PRIV_REQUEST(io); bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("replying to wrong opcode 0x%x", bhssc->bhssc_opcode)); //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", // bhssc->bhssc_initiator_task_tag); #ifdef DIAGNOSTIC cs = PDU_SESSION(request); CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) KASSERT(bhssc->bhssc_initiator_task_tag != cdw->cdw_initiator_task_tag, ("dangling cdw")); CFISCSI_SESSION_UNLOCK(cs); #endif /* * Do not return status for aborted commands. * There are exceptions, but none supported by CTL yet. */ if (((io->io_hdr.flags & CTL_FLAG_ABORT) && (io->io_hdr.flags & CTL_FLAG_ABORT_STATUS) == 0) || (io->io_hdr.flags & CTL_FLAG_STATUS_SENT)) { ctl_free_io(io); icl_pdu_free(request); return; } response = cfiscsi_pdu_new_response(request, M_WAITOK); bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; bhssr->bhssr_opcode = ISCSI_BHS_OPCODE_SCSI_RESPONSE; bhssr->bhssr_flags = 0x80; /* * XXX: We don't deal with bidirectional under/overflows; * does anything actually support those? */ if (io->scsiio.kern_total_len < ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_UNDERFLOW; bhssr->bhssr_residual_count = htonl(ntohl(bhssc->bhssc_expected_data_transfer_length) - io->scsiio.kern_total_len); //CFISCSI_SESSION_DEBUG(cs, "underflow; residual count %d", // ntohl(bhssr->bhssr_residual_count)); } else if (io->scsiio.kern_total_len > ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_OVERFLOW; bhssr->bhssr_residual_count = htonl(io->scsiio.kern_total_len - ntohl(bhssc->bhssc_expected_data_transfer_length)); //CFISCSI_SESSION_DEBUG(cs, "overflow; residual count %d", // ntohl(bhssr->bhssr_residual_count)); } bhssr->bhssr_response = BHSSR_RESPONSE_COMMAND_COMPLETED; bhssr->bhssr_status = io->scsiio.scsi_status; bhssr->bhssr_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhssr->bhssr_expdatasn = htonl(PRIV_EXPDATASN(io)); if (io->scsiio.sense_len > 0) { #if 0 CFISCSI_SESSION_DEBUG(cs, "returning %d bytes of sense data", io->scsiio.sense_len); #endif sense_length = htons(io->scsiio.sense_len); icl_pdu_append_data(response, &sense_length, sizeof(sense_length), M_WAITOK); icl_pdu_append_data(response, &io->scsiio.sense_data, io->scsiio.sense_len, M_WAITOK); } ctl_free_io(io); icl_pdu_free(request); cfiscsi_pdu_queue(response); } static void cfiscsi_task_management_done(union ctl_io *io) { struct icl_pdu *request, *response; struct iscsi_bhs_task_management_request *bhstmr; struct iscsi_bhs_task_management_response *bhstmr2; struct cfiscsi_data_wait *cdw, *tmpcdw; struct cfiscsi_session *cs, *tcs; struct cfiscsi_softc *softc; int cold_reset = 0; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; KASSERT((bhstmr->bhstmr_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_TASK_REQUEST, ("replying to wrong opcode 0x%x", bhstmr->bhstmr_opcode)); #if 0 CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x; referenced task tag 0x%x", bhstmr->bhstmr_initiator_task_tag, bhstmr->bhstmr_referenced_task_tag); #endif if ((bhstmr->bhstmr_function & ~0x80) == BHSTMR_FUNCTION_ABORT_TASK) { /* * Make sure we no longer wait for Data-Out for this command. */ CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH_SAFE(cdw, &cs->cs_waiting_for_data_out, cdw_next, tmpcdw) { if (bhstmr->bhstmr_referenced_task_tag != cdw->cdw_initiator_task_tag) continue; #if 0 CFISCSI_SESSION_DEBUG(cs, "removing csw for initiator task " "tag 0x%x", bhstmr->bhstmr_initiator_task_tag); #endif TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); cfiscsi_data_wait_abort(cs, cdw, 43); } CFISCSI_SESSION_UNLOCK(cs); } if ((bhstmr->bhstmr_function & ~0x80) == BHSTMR_FUNCTION_TARGET_COLD_RESET && io->io_hdr.status == CTL_SUCCESS) cold_reset = 1; response = cfiscsi_pdu_new_response(request, M_WAITOK); bhstmr2 = (struct iscsi_bhs_task_management_response *) response->ip_bhs; bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; bhstmr2->bhstmr_flags = 0x80; switch (io->taskio.task_status) { case CTL_TASK_FUNCTION_COMPLETE: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_COMPLETE; break; case CTL_TASK_FUNCTION_SUCCEEDED: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_SUCCEEDED; break; case CTL_TASK_LUN_DOES_NOT_EXIST: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_LUN_DOES_NOT_EXIST; break; case CTL_TASK_FUNCTION_NOT_SUPPORTED: default: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; break; } memcpy(bhstmr2->bhstmr_additional_reponse_information, io->taskio.task_resp, sizeof(io->taskio.task_resp)); bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag; ctl_free_io(io); icl_pdu_free(request); cfiscsi_pdu_queue(response); if (cold_reset) { softc = cs->cs_target->ct_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(tcs, &softc->sessions, cs_next) { if (tcs->cs_target == cs->cs_target) cfiscsi_session_terminate(tcs); } mtx_unlock(&softc->lock); } } static void cfiscsi_done(union ctl_io *io) { struct icl_pdu *request; struct cfiscsi_session *cs; KASSERT(((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE), ("invalid CTL status %#x", io->io_hdr.status)); request = PRIV_REQUEST(io); cs = PDU_SESSION(request); switch (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { case ISCSI_BHS_OPCODE_SCSI_COMMAND: cfiscsi_scsi_command_done(io); break; case ISCSI_BHS_OPCODE_TASK_REQUEST: cfiscsi_task_management_done(io); break; case ISCSI_BHS_OPCODE_INTERNAL: /* * Implicit task termination has just completed; nothing to do. */ icl_pdu_free(request); cs->cs_tasks_aborted = true; refcount_release(&cs->cs_outstanding_ctl_pdus); wakeup(__DEVOLATILE(void *, &cs->cs_outstanding_ctl_pdus)); ctl_free_io(io); return; default: panic("cfiscsi_done called with wrong opcode 0x%x", request->ip_bhs->bhs_opcode); } refcount_release(&cs->cs_outstanding_ctl_pdus); } diff --git a/sys/cam/ctl/ctl_ioctl.h b/sys/cam/ctl/ctl_ioctl.h index 49c48afbd766..5cb7f29d82a7 100644 --- a/sys/cam/ctl/ctl_ioctl.h +++ b/sys/cam/ctl/ctl_ioctl.h @@ -1,782 +1,785 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 Silicon Graphics International Corp. * Copyright (c) 2011 Spectra Logic Corporation * Copyright (c) 2014-2017 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_ioctl.h#4 $ * $FreeBSD$ */ /* * CAM Target Layer ioctl interface. * * Author: Ken Merry */ #ifndef _CTL_IOCTL_H_ #define _CTL_IOCTL_H_ #ifdef ICL_KERNEL_PROXY #include #endif #include #include #define CTL_DEFAULT_DEV "/dev/cam/ctl" /* * Maximum number of targets we support. */ #define CTL_MAX_TARGETS 1 /* * Maximum target ID we support. */ #define CTL_MAX_TARGID 15 /* * Maximum number of initiators per port. */ #define CTL_MAX_INIT_PER_PORT 2048 /* Hopefully this won't conflict with new misc devices that pop up */ #define CTL_MINOR 225 typedef enum { CTL_DELAY_TYPE_NONE, CTL_DELAY_TYPE_CONT, CTL_DELAY_TYPE_ONESHOT } ctl_delay_type; typedef enum { CTL_DELAY_LOC_NONE, CTL_DELAY_LOC_DATAMOVE, CTL_DELAY_LOC_DONE, } ctl_delay_location; typedef enum { CTL_DELAY_STATUS_NONE, CTL_DELAY_STATUS_OK, CTL_DELAY_STATUS_INVALID_LUN, CTL_DELAY_STATUS_INVALID_TYPE, CTL_DELAY_STATUS_INVALID_LOC, CTL_DELAY_STATUS_NOT_IMPLEMENTED } ctl_delay_status; struct ctl_io_delay_info { uint32_t lun_id; ctl_delay_type delay_type; ctl_delay_location delay_loc; uint32_t delay_secs; ctl_delay_status status; }; typedef enum { CTL_STATS_NO_IO, CTL_STATS_READ, CTL_STATS_WRITE } ctl_stat_types; #define CTL_STATS_NUM_TYPES 3 typedef enum { CTL_SS_OK, CTL_SS_NEED_MORE_SPACE, CTL_SS_ERROR } ctl_stats_status; typedef enum { CTL_STATS_FLAG_NONE = 0x00, CTL_STATS_FLAG_TIME_VALID = 0x01 } ctl_stats_flags; struct ctl_io_stats { uint32_t item; uint64_t bytes[CTL_STATS_NUM_TYPES]; uint64_t operations[CTL_STATS_NUM_TYPES]; uint64_t dmas[CTL_STATS_NUM_TYPES]; struct bintime time[CTL_STATS_NUM_TYPES]; struct bintime dma_time[CTL_STATS_NUM_TYPES]; }; struct ctl_get_io_stats { struct ctl_io_stats *stats; /* passed to/from kernel */ size_t alloc_len; /* passed to kernel */ size_t fill_len; /* passed to userland */ int first_item; /* passed to kernel */ int num_items; /* passed to userland */ ctl_stats_status status; /* passed to userland */ ctl_stats_flags flags; /* passed to userland */ struct timespec timestamp; /* passed to userland */ }; /* * The types of errors that can be injected: * * NONE: No error specified. * ABORTED: SSD_KEY_ABORTED_COMMAND, 0x45, 0x00 * MEDIUM_ERR: Medium error, different asc/ascq depending on read/write. * UA: Unit attention. * CUSTOM: User specifies the sense data. * TYPE: Mask to use with error types. * * Flags that affect injection behavior: * CONTINUOUS: This error will stay around until explicitly cleared. * DESCRIPTOR: Use descriptor sense instead of fixed sense. */ typedef enum { CTL_LUN_INJ_NONE = 0x000, CTL_LUN_INJ_ABORTED = 0x001, CTL_LUN_INJ_MEDIUM_ERR = 0x002, CTL_LUN_INJ_UA = 0x003, CTL_LUN_INJ_CUSTOM = 0x004, CTL_LUN_INJ_TYPE = 0x0ff, CTL_LUN_INJ_CONTINUOUS = 0x100, CTL_LUN_INJ_DESCRIPTOR = 0x200 } ctl_lun_error; /* * Flags to specify what type of command the given error pattern will * execute on. The first group of types can be ORed together. * * READ: Any read command. * WRITE: Any write command. * READWRITE: Any read or write command. * READCAP: Any read capacity command. * TUR: Test Unit Ready. * ANY: Any command. * MASK: Mask for basic command patterns. * * Special types: * * CMD: The CDB to act on is specified in struct ctl_error_desc_cmd. * RANGE: For read/write commands, act when the LBA is in the * specified range. */ typedef enum { CTL_LUN_PAT_NONE = 0x000, CTL_LUN_PAT_READ = 0x001, CTL_LUN_PAT_WRITE = 0x002, CTL_LUN_PAT_READWRITE = CTL_LUN_PAT_READ | CTL_LUN_PAT_WRITE, CTL_LUN_PAT_READCAP = 0x004, CTL_LUN_PAT_TUR = 0x008, CTL_LUN_PAT_ANY = 0x0ff, CTL_LUN_PAT_MASK = 0x0ff, CTL_LUN_PAT_CMD = 0x100, CTL_LUN_PAT_RANGE = 0x200 } ctl_lun_error_pattern; /* * This structure allows the user to specify a particular CDB pattern to * look for. * * cdb_pattern: Fill in the relevant bytes to look for in the CDB. * cdb_valid_bytes: Bitmask specifying valid bytes in the cdb_pattern. * flags: Specify any command flags (see ctl_io_flags) that * should be set. */ struct ctl_error_desc_cmd { uint8_t cdb_pattern[CTL_MAX_CDBLEN]; uint32_t cdb_valid_bytes; uint32_t flags; }; /* * Error injection descriptor. * * lun_id LUN to act on. * lun_error: The type of error to inject. See above for descriptions. * error_pattern: What kind of command to act on. See above. * cmd_desc: For CTL_LUN_PAT_CMD only. * lba_range: For CTL_LUN_PAT_RANGE only. * custom_sense: Specify sense. For CTL_LUN_INJ_CUSTOM only. * serial: Serial number returned by the kernel. Use for deletion. * links: Kernel use only. */ struct ctl_error_desc { uint32_t lun_id; /* To kernel */ ctl_lun_error lun_error; /* To kernel */ ctl_lun_error_pattern error_pattern; /* To kernel */ struct ctl_error_desc_cmd cmd_desc; /* To kernel */ struct ctl_lba_len lba_range; /* To kernel */ struct scsi_sense_data custom_sense; /* To kernel */ uint64_t serial; /* From kernel */ STAILQ_ENTRY(ctl_error_desc) links; /* Kernel use only */ }; typedef enum { CTL_OOA_FLAG_NONE = 0x00, CTL_OOA_FLAG_ALL_LUNS = 0x01 } ctl_ooa_flags; typedef enum { CTL_OOA_OK, CTL_OOA_NEED_MORE_SPACE, CTL_OOA_ERROR } ctl_get_ooa_status; typedef enum { CTL_OOACMD_FLAG_NONE = 0x00, CTL_OOACMD_FLAG_DMA = 0x01, CTL_OOACMD_FLAG_BLOCKED = 0x02, CTL_OOACMD_FLAG_ABORT = 0x04, CTL_OOACMD_FLAG_RTR = 0x08, CTL_OOACMD_FLAG_DMA_QUEUED = 0x10, CTL_OOACMD_FLAG_STATUS_QUEUED = 0x20, CTL_OOACMD_FLAG_STATUS_SENT = 0x40 } ctl_ooa_cmd_flags; struct ctl_ooa_entry { ctl_ooa_cmd_flags cmd_flags; uint8_t cdb[CTL_MAX_CDBLEN]; uint8_t cdb_len; uint32_t tag_num; uint32_t lun_num; struct bintime start_bt; }; struct ctl_ooa { ctl_ooa_flags flags; /* passed to kernel */ uint64_t lun_num; /* passed to kernel */ uint32_t alloc_len; /* passed to kernel */ uint32_t alloc_num; /* passed to kernel */ struct ctl_ooa_entry *entries; /* filled in kernel */ uint32_t fill_len; /* passed to userland */ uint32_t fill_num; /* passed to userland */ uint32_t dropped_num; /* passed to userland */ struct bintime cur_bt; /* passed to userland */ ctl_get_ooa_status status; /* passed to userland */ }; typedef enum { CTL_LUN_NOSTATUS, CTL_LUN_OK, CTL_LUN_ERROR, CTL_LUN_WARNING } ctl_lun_status; #define CTL_ERROR_STR_LEN 160 typedef enum { CTL_LUNREQ_CREATE, CTL_LUNREQ_RM, CTL_LUNREQ_MODIFY, } ctl_lunreq_type; /* * The ID_REQ flag is used to say that the caller has requested a * particular LUN ID in the req_lun_id field. If we cannot allocate that * LUN ID, the ctl_add_lun() call will fail. * * The STOPPED flag tells us that the LUN should default to the powered * off state. It will return 0x04,0x02 until it is powered up. ("Logical * unit not ready, initializing command required.") * * The NO_MEDIA flag tells us that the LUN has no media inserted. * * The PRIMARY flag tells us that this LUN is registered as a Primary LUN * which is accessible via the Master shelf controller in an HA. This flag * being set indicates a Primary LUN. This flag being reset represents a * Secondary LUN controlled by the Secondary controller in an HA * configuration. Flag is applicable at this time to T_DIRECT types. * * The SERIAL_NUM flag tells us that the serial_num field is filled in and * valid for use in SCSI INQUIRY VPD page 0x80. * * The DEVID flag tells us that the device_id field is filled in and * valid for use in SCSI INQUIRY VPD page 0x83. * * The DEV_TYPE flag tells us that the device_type field is filled in. * * The EJECTED flag tells us that the removable LUN has tray open. * * The UNMAP flag tells us that this LUN supports UNMAP. * * The OFFLINE flag tells us that this LUN can not access backing store. */ typedef enum { CTL_LUN_FLAG_ID_REQ = 0x01, CTL_LUN_FLAG_STOPPED = 0x02, CTL_LUN_FLAG_NO_MEDIA = 0x04, CTL_LUN_FLAG_PRIMARY = 0x08, CTL_LUN_FLAG_SERIAL_NUM = 0x10, CTL_LUN_FLAG_DEVID = 0x20, CTL_LUN_FLAG_DEV_TYPE = 0x40, CTL_LUN_FLAG_UNMAP = 0x80, CTL_LUN_FLAG_EJECTED = 0x100, CTL_LUN_FLAG_READONLY = 0x200 } ctl_backend_lun_flags; /* * LUN creation parameters: * * flags: Various LUN flags, see above. * * device_type: The SCSI device type. e.g. 0 for Direct Access, * 3 for Processor, etc. Only certain backends may * support setting this field. The CTL_LUN_FLAG_DEV_TYPE * flag should be set in the flags field if the device * type is set. * * lun_size_bytes: The size of the LUN in bytes. For some backends * this is relevant (e.g. ramdisk), for others, it may * be ignored in favor of using the properties of the * backing store. If specified, this should be a * multiple of the blocksize. * * The actual size of the LUN is returned in this * field. * * blocksize_bytes: The LUN blocksize in bytes. For some backends this * is relevant, for others it may be ignored in * favor of using the properties of the backing store. * * The actual blocksize of the LUN is returned in this * field. * * req_lun_id: The requested LUN ID. The CTL_LUN_FLAG_ID_REQ flag * should be set if this is set. The request will be * granted if the LUN number is available, otherwise * the LUN addition request will fail. * * The allocated LUN number is returned in this field. * * serial_num: This is the value returned in SCSI INQUIRY VPD page * 0x80. If it is specified, the CTL_LUN_FLAG_SERIAL_NUM * flag should be set. * * The serial number value used is returned in this * field. * * device_id: This is the value returned in the T10 vendor ID * based DESIGNATOR field in the SCSI INQUIRY VPD page * 0x83 data. If it is specified, the CTL_LUN_FLAG_DEVID * flag should be set. * * The device id value used is returned in this field. */ struct ctl_lun_create_params { ctl_backend_lun_flags flags; uint8_t device_type; uint64_t lun_size_bytes; uint32_t blocksize_bytes; uint32_t req_lun_id; uint8_t serial_num[CTL_SN_LEN]; uint8_t device_id[CTL_DEVID_LEN]; }; /* * LUN removal parameters: * * lun_id: The number of the LUN to delete. This must be set. * The LUN must be backed by the given backend. */ struct ctl_lun_rm_params { uint32_t lun_id; }; /* * LUN modification parameters: * * lun_id: The number of the LUN to modify. This must be set. * The LUN must be backed by the given backend. * * lun_size_bytes: The size of the LUN in bytes. If zero, update * the size using the backing file size, if possible. */ struct ctl_lun_modify_params { uint32_t lun_id; uint64_t lun_size_bytes; }; /* * Union of request type data. Fill in the appropriate union member for * the request type. */ union ctl_lunreq_data { struct ctl_lun_create_params create; struct ctl_lun_rm_params rm; struct ctl_lun_modify_params modify; }; /* * LUN request interface: * * backend: This is required, and is NUL-terminated a string * that is the name of the backend, like "ramdisk" or * "block". * * reqtype: The type of request, CTL_LUNREQ_CREATE to create a * LUN, CTL_LUNREQ_RM to delete a LUN. * * reqdata: Request type-specific information. See the * description of individual the union members above * for more information. * * num_be_args: This is the number of backend-specific arguments * in the be_args array. * * be_args: This is an array of backend-specific arguments. * See above for a description of the fields in this * structure. * * status: Status of the LUN request. * * error_str: If the status is CTL_LUN_ERROR, this will * contain a string describing the error. * * kern_be_args: For kernel use only. */ struct ctl_lun_req { #define CTL_BE_NAME_LEN 32 char backend[CTL_BE_NAME_LEN]; ctl_lunreq_type reqtype; union ctl_lunreq_data reqdata; void * args; nvlist_t * args_nvl; size_t args_len; void * result; nvlist_t * result_nvl; size_t result_len; ctl_lun_status status; char error_str[CTL_ERROR_STR_LEN]; }; /* * LUN list status: * * NONE: No status. * * OK: Request completed successfully. * * NEED_MORE_SPACE: The allocated length of the entries field is too * small for the available data. * * ERROR: An error occurred, look at the error string for a * description of the error. */ typedef enum { CTL_LUN_LIST_NONE, CTL_LUN_LIST_OK, CTL_LUN_LIST_NEED_MORE_SPACE, CTL_LUN_LIST_ERROR } ctl_lun_list_status; /* * LUN list interface * * backend_name: This is a NUL-terminated string. If the string * length is 0, then all LUNs on all backends will * be enumerated. Otherwise this is the name of the * backend to be enumerated, like "ramdisk" or "block". * * alloc_len: The length of the data buffer allocated for entries. * In order to properly size the buffer, make one call * with alloc_len set to 0, and then use the returned * dropped_len as the buffer length to allocate and * pass in on a subsequent call. * * lun_xml: XML-formatted information on the requested LUNs. * * fill_len: The amount of data filled in the storage for entries. * * status: The status of the request. See above for the * description of the values of this field. * * error_str: If the status indicates an error, this string will * be filled in to describe the error. */ struct ctl_lun_list { char backend[CTL_BE_NAME_LEN]; /* passed to kernel*/ uint32_t alloc_len; /* passed to kernel */ char *lun_xml; /* filled in kernel */ uint32_t fill_len; /* passed to userland */ ctl_lun_list_status status; /* passed to userland */ char error_str[CTL_ERROR_STR_LEN]; /* passed to userland */ }; /* * Port request interface: * * driver: This is required, and is NUL-terminated a string * that is the name of the frontend, like "iscsi" . * * reqtype: The type of request, CTL_REQ_CREATE to create a * port, CTL_REQ_REMOVE to delete a port. * * num_be_args: This is the number of frontend-specific arguments * in the be_args array. * * be_args: This is an array of frontend-specific arguments. * See above for a description of the fields in this * structure. * * status: Status of the request. * * error_str: If the status is CTL_LUN_ERROR, this will * contain a string describing the error. * * kern_be_args: For kernel use only. */ typedef enum { CTL_REQ_CREATE, CTL_REQ_REMOVE, CTL_REQ_MODIFY, } ctl_req_type; struct ctl_req { char driver[CTL_DRIVER_NAME_LEN]; ctl_req_type reqtype; void * args; nvlist_t * args_nvl; size_t args_len; void * result; nvlist_t * result_nvl; size_t result_len; ctl_lun_status status; char error_str[CTL_ERROR_STR_LEN]; }; /* * iSCSI status * * OK: Request completed successfully. * * ERROR: An error occurred, look at the error string for a * description of the error. * * CTL_ISCSI_LIST_NEED_MORE_SPACE: * User has to pass larger buffer for CTL_ISCSI_LIST ioctl. */ typedef enum { CTL_ISCSI_OK, CTL_ISCSI_ERROR, CTL_ISCSI_LIST_NEED_MORE_SPACE, CTL_ISCSI_SESSION_NOT_FOUND } ctl_iscsi_status; typedef enum { CTL_ISCSI_HANDOFF, CTL_ISCSI_LIST, CTL_ISCSI_LOGOUT, CTL_ISCSI_TERMINATE, CTL_ISCSI_LIMITS, #if defined(ICL_KERNEL_PROXY) || 1 /* * We actually need those in all cases, but leave the ICL_KERNEL_PROXY, * to remember to remove them along with rest of proxy code, eventually. */ CTL_ISCSI_LISTEN, CTL_ISCSI_ACCEPT, CTL_ISCSI_SEND, CTL_ISCSI_RECEIVE, #endif } ctl_iscsi_type; typedef enum { CTL_ISCSI_DIGEST_NONE, CTL_ISCSI_DIGEST_CRC32C } ctl_iscsi_digest; #define CTL_ISCSI_NAME_LEN 224 /* 223 bytes, by RFC 3720, + '\0' */ #define CTL_ISCSI_ADDR_LEN 47 /* INET6_ADDRSTRLEN + '\0' */ #define CTL_ISCSI_ALIAS_LEN 128 /* Arbitrary. */ #define CTL_ISCSI_OFFLOAD_LEN 8 /* Arbitrary. */ struct ctl_iscsi_handoff_params { char initiator_name[CTL_ISCSI_NAME_LEN]; char initiator_addr[CTL_ISCSI_ADDR_LEN]; char initiator_alias[CTL_ISCSI_ALIAS_LEN]; uint8_t initiator_isid[6]; char target_name[CTL_ISCSI_NAME_LEN]; int socket; int portal_group_tag; /* * Connection parameters negotiated by ctld(8). */ ctl_iscsi_digest header_digest; ctl_iscsi_digest data_digest; uint32_t cmdsn; uint32_t statsn; int max_recv_data_segment_length; int max_burst_length; int first_burst_length; uint32_t immediate_data; char offload[CTL_ISCSI_OFFLOAD_LEN]; #ifdef ICL_KERNEL_PROXY int connection_id; #else int spare; #endif int max_send_data_segment_length; }; struct ctl_iscsi_list_params { uint32_t alloc_len; /* passed to kernel */ char *conn_xml; /* filled in kernel */ uint32_t fill_len; /* passed to userland */ int spare[4]; }; struct ctl_iscsi_logout_params { int connection_id; /* passed to kernel */ char initiator_name[CTL_ISCSI_NAME_LEN]; /* passed to kernel */ char initiator_addr[CTL_ISCSI_ADDR_LEN]; /* passed to kernel */ int all; /* passed to kernel */ int spare[4]; }; struct ctl_iscsi_terminate_params { int connection_id; /* passed to kernel */ char initiator_name[CTL_ISCSI_NAME_LEN]; /* passed to kernel */ char initiator_addr[CTL_ISCSI_NAME_LEN]; /* passed to kernel */ int all; /* passed to kernel */ int spare[4]; }; struct ctl_iscsi_limits_params { /* passed to kernel */ char offload[CTL_ISCSI_OFFLOAD_LEN]; + int socket; /* passed to userland */ - size_t spare; +#ifdef __LP64__ + int spare; +#endif int max_recv_data_segment_length; int max_send_data_segment_length; int max_burst_length; int first_burst_length; }; #ifdef ICL_KERNEL_PROXY struct ctl_iscsi_listen_params { int iser; int domain; int socktype; int protocol; struct sockaddr *addr; socklen_t addrlen; int portal_id; int spare[4]; }; struct ctl_iscsi_accept_params { int connection_id; int portal_id; struct sockaddr *initiator_addr; socklen_t initiator_addrlen; int spare[4]; }; struct ctl_iscsi_send_params { int connection_id; void *bhs; size_t spare; void *spare2; size_t data_segment_len; void *data_segment; int spare3[4]; }; struct ctl_iscsi_receive_params { int connection_id; void *bhs; size_t spare; void *spare2; size_t data_segment_len; void *data_segment; int spare3[4]; }; #endif /* ICL_KERNEL_PROXY */ union ctl_iscsi_data { struct ctl_iscsi_handoff_params handoff; struct ctl_iscsi_list_params list; struct ctl_iscsi_logout_params logout; struct ctl_iscsi_terminate_params terminate; struct ctl_iscsi_limits_params limits; #ifdef ICL_KERNEL_PROXY struct ctl_iscsi_listen_params listen; struct ctl_iscsi_accept_params accept; struct ctl_iscsi_send_params send; struct ctl_iscsi_receive_params receive; #endif }; /* * iSCSI interface * * status: The status of the request. See above for the * description of the values of this field. * * error_str: If the status indicates an error, this string will * be filled in to describe the error. */ struct ctl_iscsi { ctl_iscsi_type type; /* passed to kernel */ union ctl_iscsi_data data; /* passed to kernel */ ctl_iscsi_status status; /* passed to userland */ char error_str[CTL_ERROR_STR_LEN]; /* passed to userland */ }; struct ctl_lun_map { uint32_t port; uint32_t plun; uint32_t lun; }; #define CTL_IO _IOWR(CTL_MINOR, 0x00, union ctl_io) #define CTL_ENABLE_PORT _IOW(CTL_MINOR, 0x04, struct ctl_port_entry) #define CTL_DISABLE_PORT _IOW(CTL_MINOR, 0x05, struct ctl_port_entry) #define CTL_DELAY_IO _IOWR(CTL_MINOR, 0x10, struct ctl_io_delay_info) #define CTL_ERROR_INJECT _IOWR(CTL_MINOR, 0x16, struct ctl_error_desc) #define CTL_GET_OOA _IOWR(CTL_MINOR, 0x18, struct ctl_ooa) #define CTL_DUMP_STRUCTS _IO(CTL_MINOR, 0x19) #define CTL_LUN_REQ _IOWR(CTL_MINOR, 0x21, struct ctl_lun_req) #define CTL_LUN_LIST _IOWR(CTL_MINOR, 0x22, struct ctl_lun_list) #define CTL_ERROR_INJECT_DELETE _IOW(CTL_MINOR, 0x23, struct ctl_error_desc) #define CTL_SET_PORT_WWNS _IOW(CTL_MINOR, 0x24, struct ctl_port_entry) #define CTL_ISCSI _IOWR(CTL_MINOR, 0x25, struct ctl_iscsi) #define CTL_PORT_REQ _IOWR(CTL_MINOR, 0x26, struct ctl_req) #define CTL_PORT_LIST _IOWR(CTL_MINOR, 0x27, struct ctl_lun_list) #define CTL_LUN_MAP _IOW(CTL_MINOR, 0x28, struct ctl_lun_map) #define CTL_GET_LUN_STATS _IOWR(CTL_MINOR, 0x29, struct ctl_get_io_stats) #define CTL_GET_PORT_STATS _IOWR(CTL_MINOR, 0x2a, struct ctl_get_io_stats) #endif /* _CTL_IOCTL_H_ */ /* * vim: ts=8 */ diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c index a3dbf4cb88a2..f0189372bd03 100644 --- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c +++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c @@ -1,1816 +1,1816 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * Copyright (c) 2015 Chelsio Communications, Inc. * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * cxgbei implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "tom/t4_tom.h" #include "cxgbei.h" /* * Use the page pod tag for the TT hash. */ #define TT_HASH(icc, tt) (G_PPOD_TAG(tt) & (icc)->cmp_hash_mask) struct cxgbei_ddp_state { struct ppod_reservation prsv; struct cxgbei_cmp cmp; }; static MALLOC_DEFINE(M_CXGBEI, "cxgbei", "cxgbei(4)"); SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Chelsio iSCSI offload"); static int first_burst_length = 8192; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, &first_burst_length, 0, "First burst length"); static int max_burst_length = 2 * 1024 * 1024; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, &max_burst_length, 0, "Maximum burst length"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static volatile u_int icl_cxgbei_ncons; static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; static icl_conn_pdu_data_segment_length_t icl_cxgbei_conn_pdu_data_segment_length; static icl_conn_pdu_append_bio_t icl_cxgbei_conn_pdu_append_bio; static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; static icl_conn_pdu_get_bio_t icl_cxgbei_conn_pdu_get_bio; static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; static icl_conn_pdu_queue_cb_t icl_cxgbei_conn_pdu_queue_cb; static icl_conn_handoff_t icl_cxgbei_conn_handoff; static icl_conn_free_t icl_cxgbei_conn_free; static icl_conn_close_t icl_cxgbei_conn_close; static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; static icl_conn_task_done_t icl_cxgbei_conn_task_done; static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; static kobj_method_t icl_cxgbei_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_cxgbei_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_bio, icl_cxgbei_conn_pdu_append_bio), KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_bio, icl_cxgbei_conn_pdu_get_bio), KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), KOBJMETHOD(icl_conn_pdu_queue_cb, icl_cxgbei_conn_pdu_queue_cb), KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), { 0, 0 } }; DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); void icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); KASSERT(icp->ref_cnt != 0, ("freeing deleted PDU")); MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); m_freem(ip->ip_bhs_mbuf); KASSERT(ic != NULL || icp->ref_cnt == 1, ("orphaned PDU has oustanding references")); if (atomic_fetchadd_int(&icp->ref_cnt, -1) != 1) return; free(icp, M_CXGBEI); #ifdef DIAGNOSTIC if (__predict_true(ic != NULL)) refcount_release(&ic->ic_outstanding_pdus); #endif } static void icl_cxgbei_pdu_call_cb(struct icl_pdu *ip) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); if (icp->cb != NULL) icp->cb(ip, icp->error); #ifdef DIAGNOSTIC if (__predict_true(ip->ip_conn != NULL)) refcount_release(&ip->ip_conn->ic_outstanding_pdus); #endif free(icp, M_CXGBEI); } static void icl_cxgbei_pdu_done(struct icl_pdu *ip, int error) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); if (error != 0) icp->error = error; m_freem(ip->ip_ahs_mbuf); ip->ip_ahs_mbuf = NULL; m_freem(ip->ip_data_mbuf); ip->ip_data_mbuf = NULL; m_freem(ip->ip_bhs_mbuf); ip->ip_bhs_mbuf = NULL; /* * All other references to this PDU should have been dropped * by the m_freem() of ip_data_mbuf. */ if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) icl_cxgbei_pdu_call_cb(ip); else __assert_unreachable(); } static void icl_cxgbei_mbuf_done(struct mbuf *mb) { struct icl_cxgbei_pdu *icp = (struct icl_cxgbei_pdu *)mb->m_ext.ext_arg1; /* * NB: mb_free_mext() might leave ref_cnt as 1 without * decrementing it if it hits the fast path in the ref_cnt * check. */ icl_cxgbei_pdu_call_cb(&icp->ip); } struct icl_pdu * icl_cxgbei_new_pdu(int flags) { struct icl_cxgbei_pdu *icp; struct icl_pdu *ip; struct mbuf *m; icp = malloc(sizeof(*icp), M_CXGBEI, flags | M_ZERO); if (__predict_false(icp == NULL)) return (NULL); icp->icp_signature = CXGBEI_PDU_SIGNATURE; icp->ref_cnt = 1; ip = &icp->ip; m = m_gethdr(flags, MT_DATA); if (__predict_false(m == NULL)) { free(icp, M_CXGBEI); return (NULL); } ip->ip_bhs_mbuf = m; ip->ip_bhs = mtod(m, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(*ip->ip_bhs)); m->m_len = sizeof(struct iscsi_bhs); m->m_pkthdr.len = m->m_len; return (ip); } void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic) { ip->ip_conn = ic; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ static struct icl_pdu * icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_pdu *ip; ip = icl_cxgbei_new_pdu(flags); if (__predict_false(ip == NULL)) return (NULL); icl_cxgbei_new_pdu_set_conn(ip, ic); return (ip); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static struct mbuf * finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) { struct icl_pdu *ip = &icp->ip; uint8_t ulp_submode, padding; struct mbuf *m, *last; struct iscsi_bhs *bhs; int data_len; /* * Fix up the data segment mbuf first. */ m = ip->ip_data_mbuf; ulp_submode = icc->ulp_submode; if (m != NULL) { last = m_last(m); /* * Round up the data segment to a 4B boundary. Pad with 0 if * necessary. There will definitely be room in the mbuf. */ padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; if (padding != 0) { MPASS(padding <= M_TRAILINGSPACE(last)); bzero(mtod(last, uint8_t *) + last->m_len, padding); last->m_len += padding; } } else { MPASS(ip->ip_data_len == 0); ulp_submode &= ~ULP_CRC_DATA; padding = 0; } /* * Now the header mbuf that has the BHS. */ m = ip->ip_bhs_mbuf; MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); MPASS(m->m_len == sizeof(struct iscsi_bhs)); bhs = ip->ip_bhs; data_len = ip->ip_data_len; if (data_len > icc->ic.ic_max_send_data_segment_length) { struct iscsi_bhs_data_in *bhsdi; int flags; KASSERT(padding == 0, ("%s: ISO with padding %d for icp %p", __func__, padding, icp)); switch (bhs->bhs_opcode) { case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: flags = 1; break; case ISCSI_BHS_OPCODE_SCSI_DATA_IN: flags = 2; break; default: panic("invalid opcode %#x for ISO", bhs->bhs_opcode); } data_len = icc->ic.ic_max_send_data_segment_length; bhsdi = (struct iscsi_bhs_data_in *)bhs; if (bhsdi->bhsdi_flags & BHSDI_FLAGS_F) { /* * Firmware will set F on the final PDU in the * burst. */ flags |= CXGBE_ISO_F; bhsdi->bhsdi_flags &= ~BHSDI_FLAGS_F; } set_mbuf_iscsi_iso(m, true); set_mbuf_iscsi_iso_flags(m, flags); set_mbuf_iscsi_iso_mss(m, data_len); } bhs->bhs_data_segment_len[2] = data_len; bhs->bhs_data_segment_len[1] = data_len >> 8; bhs->bhs_data_segment_len[0] = data_len >> 16; /* * Extract mbuf chain from PDU. */ m->m_pkthdr.len += ip->ip_data_len + padding; m->m_next = ip->ip_data_mbuf; set_mbuf_ulp_submode(m, ulp_submode); ip->ip_bhs_mbuf = NULL; ip->ip_data_mbuf = NULL; ip->ip_bhs = NULL; /* * Drop PDU reference on icp. Additional references might * still be held by zero-copy PDU buffers (ICL_NOCOPY). */ if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) icl_cxgbei_pdu_call_cb(ip); return (m); } static void icl_cxgbei_tx_main(void *arg) { struct epoch_tracker et; struct icl_cxgbei_conn *icc = arg; struct icl_conn *ic = &icc->ic; struct toepcb *toep = icc->toep; struct socket *so = ic->ic_socket; struct inpcb *inp = sotoinpcb(so); struct icl_pdu *ip; struct mbuf *m; struct mbufq mq; STAILQ_HEAD(, icl_pdu) tx_pdus = STAILQ_HEAD_INITIALIZER(tx_pdus); mbufq_init(&mq, INT_MAX); ICL_CONN_LOCK(ic); while (__predict_true(!ic->ic_disconnecting)) { while (STAILQ_EMPTY(&icc->sent_pdus)) { icc->tx_active = false; mtx_sleep(&icc->tx_active, ic->ic_lock, 0, "-", 0); if (__predict_false(ic->ic_disconnecting)) goto out; MPASS(icc->tx_active); } STAILQ_SWAP(&icc->sent_pdus, &tx_pdus, icl_pdu); ICL_CONN_UNLOCK(ic); while ((ip = STAILQ_FIRST(&tx_pdus)) != NULL) { STAILQ_REMOVE_HEAD(&tx_pdus, ip_next); m = finalize_pdu(icc, ip_to_icp(ip)); M_ASSERTPKTHDR(m); MPASS((m->m_pkthdr.len & 3) == 0); mbufq_enqueue(&mq, m); } ICL_CONN_LOCK(ic); if (__predict_false(ic->ic_disconnecting) || __predict_false(ic->ic_socket == NULL)) { mbufq_drain(&mq); break; } CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); INP_WLOCK(inp); ICL_CONN_UNLOCK(ic); if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || __predict_false((toep->flags & TPF_ATTACHED) == 0)) { mbufq_drain(&mq); } else { mbufq_concat(&toep->ulp_pduq, &mq); t4_push_pdus(icc->sc, toep, 0); } INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); ICL_CONN_LOCK(ic); } out: ICL_CONN_UNLOCK(ic); kthread_exit(); } static void icl_cxgbei_rx_main(void *arg) { struct icl_cxgbei_conn *icc = arg; struct icl_conn *ic = &icc->ic; struct icl_pdu *ip; struct sockbuf *sb; STAILQ_HEAD(, icl_pdu) rx_pdus = STAILQ_HEAD_INITIALIZER(rx_pdus); bool cantrcvmore; sb = &ic->ic_socket->so_rcv; SOCKBUF_LOCK(sb); while (__predict_true(!ic->ic_disconnecting)) { while (STAILQ_EMPTY(&icc->rcvd_pdus)) { icc->rx_active = false; mtx_sleep(&icc->rx_active, SOCKBUF_MTX(sb), 0, "-", 0); if (__predict_false(ic->ic_disconnecting)) goto out; MPASS(icc->rx_active); } if (__predict_false(sbused(sb)) != 0) { /* * PDUs were received before the tid * transitioned to ULP mode. Convert * them to icl_cxgbei_pdus and insert * them into the head of rcvd_pdus. */ parse_pdus(icc, sb); } cantrcvmore = (sb->sb_state & SBS_CANTRCVMORE) != 0; MPASS(STAILQ_EMPTY(&rx_pdus)); STAILQ_SWAP(&icc->rcvd_pdus, &rx_pdus, icl_pdu); SOCKBUF_UNLOCK(sb); /* Hand over PDUs to ICL. */ while ((ip = STAILQ_FIRST(&rx_pdus)) != NULL) { STAILQ_REMOVE_HEAD(&rx_pdus, ip_next); if (cantrcvmore) icl_cxgbei_pdu_done(ip, ENOTCONN); else ic->ic_receive(ip); } SOCKBUF_LOCK(sb); } out: /* * Since ic_disconnecting is set before the SOCKBUF_MTX is * locked in icl_cxgbei_conn_close, the loop above can exit * before icl_cxgbei_conn_close can lock SOCKBUF_MTX and block * waiting for the thread exit. */ while (!icc->rx_exiting) mtx_sleep(&icc->rx_active, SOCKBUF_MTX(sb), 0, "-", 0); SOCKBUF_UNLOCK(sb); kthread_exit(); } static void cxgbei_free_mext_pg(struct mbuf *m) { struct icl_cxgbei_pdu *icp; M_ASSERTEXTPG(m); /* * Nothing to do for the pages; they are owned by the PDU / * I/O request. */ /* Drop reference on the PDU. */ icp = m->m_ext.ext_arg1; if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) icl_cxgbei_pdu_call_cb(&icp->ip); } static struct mbuf * cxgbei_getm(size_t len, int flags) { struct mbuf *m, *m0, *m_tail; m_tail = m0 = NULL; /* Allocate as jumbo mbufs of size MJUM16BYTES. */ while (len >= MJUM16BYTES) { m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); if (__predict_false(m == NULL)) { if ((flags & M_WAITOK) != 0) { /* Fall back to non-jumbo mbufs. */ break; } return (NULL); } if (m0 == NULL) { m0 = m_tail = m; } else { m_tail->m_next = m; m_tail = m; } len -= MJUM16BYTES; } /* Allocate mbuf chain for the remaining data. */ if (len != 0) { m = m_getm2(NULL, len, flags, MT_DATA, 0); if (__predict_false(m == NULL)) { m_freem(m0); return (NULL); } if (m0 == NULL) m0 = m; else m_tail->m_next = m; } return (m0); } int icl_cxgbei_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *ip, struct bio *bp, size_t offset, size_t len, int flags) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); struct mbuf *m, *m_tail; vm_offset_t vaddr; size_t page_offset, todo, mtodo; boolean_t mapped; int i; MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); m_tail = ip->ip_data_mbuf; if (m_tail != NULL) for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) ; MPASS(bp->bio_flags & BIO_UNMAPPED); if (offset < PAGE_SIZE - bp->bio_ma_offset) { page_offset = bp->bio_ma_offset + offset; i = 0; } else { offset -= PAGE_SIZE - bp->bio_ma_offset; for (i = 1; offset >= PAGE_SIZE; i++) offset -= PAGE_SIZE; page_offset = offset; } if (flags & ICL_NOCOPY) { m = NULL; while (len > 0) { if (m == NULL) { m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY, cxgbei_free_mext_pg); if (__predict_false(m == NULL)) return (ENOMEM); atomic_add_int(&icp->ref_cnt, 1); m->m_ext.ext_arg1 = icp; m->m_epg_1st_off = page_offset; } todo = MIN(len, PAGE_SIZE - page_offset); m->m_epg_pa[m->m_epg_npgs] = VM_PAGE_TO_PHYS(bp->bio_ma[i]); m->m_epg_npgs++; m->m_epg_last_len = todo; m->m_len += todo; m->m_ext.ext_size += PAGE_SIZE; MBUF_EXT_PGS_ASSERT_SANITY(m); if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) { if (m_tail != NULL) m_tail->m_next = m; else ip->ip_data_mbuf = m; m_tail = m; ip->ip_data_len += m->m_len; m = NULL; } page_offset = 0; len -= todo; i++; } if (m != NULL) { if (m_tail != NULL) m_tail->m_next = m; else ip->ip_data_mbuf = m; ip->ip_data_len += m->m_len; } return (0); } m = cxgbei_getm(len, flags); if (__predict_false(m == NULL)) return (ENOMEM); if (ip->ip_data_mbuf == NULL) { ip->ip_data_mbuf = m; ip->ip_data_len = len; } else { m_tail->m_next = m; ip->ip_data_len += len; } while (len > 0) { todo = MIN(len, PAGE_SIZE - page_offset); mapped = pmap_map_io_transient(bp->bio_ma + i, &vaddr, 1, FALSE); do { mtodo = min(todo, M_SIZE(m) - m->m_len); memcpy(mtod(m, char *) + m->m_len, (char *)vaddr + page_offset, mtodo); m->m_len += mtodo; if (m->m_len == M_SIZE(m)) m = m->m_next; page_offset += mtodo; todo -= mtodo; } while (todo > 0); if (__predict_false(mapped)) pmap_unmap_io_transient(bp->bio_ma + 1, &vaddr, 1, FALSE); page_offset = 0; len -= todo; i++; } MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length, ic->ic_hw_isomax)); return (0); } int icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, const void *addr, size_t len, int flags) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); struct mbuf *m, *m_tail; const char *src; MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); m_tail = ip->ip_data_mbuf; if (m_tail != NULL) for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) ; if (flags & ICL_NOCOPY) { m = m_get(flags & ~ICL_NOCOPY, MT_DATA); if (m == NULL) { ICL_WARN("failed to allocate mbuf"); return (ENOMEM); } m->m_flags |= M_RDONLY; m_extaddref(m, __DECONST(char *, addr), len, &icp->ref_cnt, icl_cxgbei_mbuf_done, icp, NULL); m->m_len = len; if (ip->ip_data_mbuf == NULL) { ip->ip_data_mbuf = m; ip->ip_data_len = len; } else { m_tail->m_next = m; m_tail = m_tail->m_next; ip->ip_data_len += len; } return (0); } m = cxgbei_getm(len, flags); if (__predict_false(m == NULL)) return (ENOMEM); if (ip->ip_data_mbuf == NULL) { ip->ip_data_mbuf = m; ip->ip_data_len = len; } else { m_tail->m_next = m; ip->ip_data_len += len; } src = (const char *)addr; for (; m != NULL; m = m->m_next) { m->m_len = min(len, M_SIZE(m)); memcpy(mtod(m, void *), src, m->m_len); src += m->m_len; len -= m->m_len; } MPASS(len == 0); MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length, ic->ic_hw_isomax)); return (0); } void icl_cxgbei_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, size_t pdu_off, struct bio *bp, size_t bio_off, size_t len) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); vm_offset_t vaddr; size_t page_offset, todo; boolean_t mapped; int i; if (icp->icp_flags & ICPF_RX_DDP) return; /* data is DDP'ed, no need to copy */ MPASS(bp->bio_flags & BIO_UNMAPPED); if (bio_off < PAGE_SIZE - bp->bio_ma_offset) { page_offset = bp->bio_ma_offset + bio_off; i = 0; } else { bio_off -= PAGE_SIZE - bp->bio_ma_offset; for (i = 1; bio_off >= PAGE_SIZE; i++) bio_off -= PAGE_SIZE; page_offset = bio_off; } while (len > 0) { todo = MIN(len, PAGE_SIZE - page_offset); mapped = pmap_map_io_transient(bp->bio_ma + i, &vaddr, 1, FALSE); m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr + page_offset); if (__predict_false(mapped)) pmap_unmap_io_transient(bp->bio_ma + 1, &vaddr, 1, FALSE); page_offset = 0; pdu_off += todo; len -= todo; i++; } } void icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { struct icl_cxgbei_pdu *icp = ip_to_icp(ip); if (icp->icp_flags & ICPF_RX_DDP) return; /* data is DDP'ed, no need to copy */ m_copydata(ip->ip_data_mbuf, off, len, addr); } void icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { icl_cxgbei_conn_pdu_queue_cb(ic, ip, NULL); } void icl_cxgbei_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, icl_pdu_cb cb) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct icl_cxgbei_pdu *icp = ip_to_icp(ip); struct socket *so = ic->ic_socket; MPASS(ic == ip->ip_conn); MPASS(ip->ip_bhs_mbuf != NULL); /* The kernel doesn't generate PDUs with AHS. */ MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); ICL_CONN_LOCK_ASSERT(ic); icp->cb = cb; /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { icl_cxgbei_pdu_done(ip, ENOTCONN); return; } STAILQ_INSERT_TAIL(&icc->sent_pdus, ip, ip_next); if (!icc->tx_active) { icc->tx_active = true; wakeup(&icc->tx_active); } } static struct icl_conn * icl_cxgbei_new_conn(const char *name, struct mtx *lock) { struct icl_cxgbei_conn *icc; struct icl_conn *ic; refcount_acquire(&icl_cxgbei_ncons); icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, M_WAITOK | M_ZERO); icc->icc_signature = CXGBEI_CONN_SIGNATURE; STAILQ_INIT(&icc->rcvd_pdus); STAILQ_INIT(&icc->sent_pdus); icc->cmp_table = hashinit(64, M_CXGBEI, &icc->cmp_hash_mask); mtx_init(&icc->cmp_lock, "cxgbei_cmp", NULL, MTX_DEF); ic = &icc->ic; ic->ic_lock = lock; #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif ic->ic_name = name; ic->ic_offload = "cxgbei"; ic->ic_unmapped = true; CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); return (ic); } void icl_cxgbei_conn_free(struct icl_conn *ic) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); mtx_destroy(&icc->cmp_lock); hashdestroy(icc->cmp_table, M_CXGBEI, icc->cmp_hash_mask); kobj_delete((struct kobj *)icc, M_CXGBE); refcount_release(&icl_cxgbei_ncons); } static int icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so, int sspace, int rspace) { struct sockopt opt; int error, one = 1, ss, rs; ss = max(sendspace, sspace); rs = max(recvspace, rspace); error = soreserve(so, ss, rs); if (error != 0) return (error); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_flags |= SB_AUTOSIZE; SOCKBUF_UNLOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_flags |= SB_AUTOSIZE; SOCKBUF_UNLOCK(&so->so_rcv); /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(so, &opt); if (error != 0) return (error); return (0); } /* * Request/response structure used to find out the adapter offloading a socket. */ struct find_ofld_adapter_rr { struct socket *so; struct adapter *sc; /* result */ }; static void find_offload_adapter(struct adapter *sc, void *arg) { struct find_ofld_adapter_rr *fa = arg; struct socket *so = fa->so; struct tom_data *td = sc->tom_softc; struct tcpcb *tp; struct inpcb *inp; /* Non-TCP were filtered out earlier. */ MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); if (fa->sc != NULL) return; /* Found already. */ if (td == NULL) return; /* TOE not enabled on this adapter. */ inp = sotoinpcb(so); INP_WLOCK(inp); if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { tp = intotcpcb(inp); if (tp->t_flags & TF_TOE && tp->tod == &td->tod) fa->sc = sc; /* Found. */ } INP_WUNLOCK(inp); } static bool is_memfree(struct adapter *sc) { uint32_t em; em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if ((em & F_EXT_MEM_ENABLE) != 0) return (false); if (is_t5(sc) && (em & F_EXT_MEM1_ENABLE) != 0) return (false); return (true); } /* XXXNP: move this to t4_tom. */ static void send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen) { struct wrqe *wr; struct fw_flowc_wr *flowc; const u_int nparams = 1; u_int flowclen; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; flowc->mnemval[0].val = htobe32(maxlen); txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; t4_wrq_tx(sc, wr); } static void set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, u_int ulp_submode) { uint64_t val; CTR3(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, submode=%#x", __func__, toep->tid, ulp_submode); val = V_TCB_ULP_TYPE(ULP_MODE_ISCSI) | V_TCB_ULP_RAW(ulp_submode); t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_ULP_TYPE, V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val, 0, 0); val = V_TF_RX_FLOW_CONTROL_DISABLE(1ULL); t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, val, val, 0, 0); } /* * XXXNP: Who is responsible for cleaning up the socket if this returns with an * error? Review all error paths. * * XXXNP: What happens to the socket's fd reference if the operation is * successful, and how does that affect the socket's life cycle? */ int icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct find_ofld_adapter_rr fa; struct file *fp; struct socket *so; struct inpcb *inp; struct tcpcb *tp; struct toepcb *toep; cap_rights_t rights; u_int max_rx_pdu_len, max_tx_pdu_len; int error, max_iso_pdus; MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM || so->so_proto->pr_protocol != IPPROTO_TCP) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_disconnecting = false; ic->ic_socket = so; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); /* Find the adapter offloading this socket. */ fa.sc = NULL; fa.so = so; t4_iterate(find_offload_adapter, &fa); if (fa.sc == NULL) { error = EINVAL; goto out; } icc->sc = fa.sc; max_rx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_recv_data_segment_length; max_tx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_send_data_segment_length; if (ic->ic_header_crc32c) { max_rx_pdu_len += ISCSI_HEADER_DIGEST_SIZE; max_tx_pdu_len += ISCSI_HEADER_DIGEST_SIZE; } if (ic->ic_data_crc32c) { max_rx_pdu_len += ISCSI_DATA_DIGEST_SIZE; max_tx_pdu_len += ISCSI_DATA_DIGEST_SIZE; } inp = sotoinpcb(so); INP_WLOCK(inp); tp = intotcpcb(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { INP_WUNLOCK(inp); error = ENOTCONN; goto out; } /* * socket could not have been "unoffloaded" if here. */ MPASS(tp->t_flags & TF_TOE); MPASS(tp->tod != NULL); MPASS(tp->t_toe != NULL); toep = tp->t_toe; MPASS(toep->vi->adapter == icc->sc); if (ulp_mode(toep) != ULP_MODE_NONE) { INP_WUNLOCK(inp); error = EINVAL; goto out; } icc->toep = toep; icc->ulp_submode = 0; if (ic->ic_header_crc32c) icc->ulp_submode |= ULP_CRC_HEADER; if (ic->ic_data_crc32c) icc->ulp_submode |= ULP_CRC_DATA; if (icc->sc->tt.iso && chip_id(icc->sc) >= CHELSIO_T5 && !is_memfree(icc->sc)) { max_iso_pdus = CXGBEI_MAX_ISO_PAYLOAD / max_tx_pdu_len; ic->ic_hw_isomax = max_iso_pdus * ic->ic_max_send_data_segment_length; } else max_iso_pdus = 1; toep->params.ulp_mode = ULP_MODE_ISCSI; toep->ulpcb = icc; send_iscsi_flowc_wr(icc->sc, toep, roundup(max_iso_pdus * max_tx_pdu_len, tp->t_maxseg)); set_ulp_mode_iscsi(icc->sc, toep, icc->ulp_submode); INP_WUNLOCK(inp); error = kthread_add(icl_cxgbei_tx_main, icc, NULL, &icc->tx_thread, 0, 0, "%stx (cxgbei)", ic->ic_name); if (error != 0) goto out; error = kthread_add(icl_cxgbei_rx_main, icc, NULL, &icc->rx_thread, 0, 0, "%srx (cxgbei)", ic->ic_name); if (error != 0) goto out; error = icl_cxgbei_setsockopt(ic, so, max_tx_pdu_len, max_rx_pdu_len); out: if (error != 0) icl_cxgbei_conn_close(ic); return (error); } void icl_cxgbei_conn_close(struct icl_conn *ic) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct icl_pdu *ip; struct socket *so; struct sockbuf *sb; struct inpcb *inp; struct toepcb *toep = icc->toep; MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ICL_CONN_LOCK_ASSERT_NOT(ic); ICL_CONN_LOCK(ic); so = ic->ic_socket; if (ic->ic_disconnecting || so == NULL) { CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p", __func__, icc, ic->ic_disconnecting, so); ICL_CONN_UNLOCK(ic); return; } ic->ic_disconnecting = true; #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1, icc); /* * Wait for the transmit thread to stop processing * this connection. */ if (icc->tx_thread != NULL) { wakeup(&icc->tx_active); mtx_sleep(icc->tx_thread, ic->ic_lock, 0, "conclo", 0); } /* Discard PDUs queued for TX. */ while (!STAILQ_EMPTY(&icc->sent_pdus)) { ip = STAILQ_FIRST(&icc->sent_pdus); STAILQ_REMOVE_HEAD(&icc->sent_pdus, ip_next); icl_cxgbei_pdu_done(ip, ENOTCONN); } ICL_CONN_UNLOCK(ic); inp = sotoinpcb(so); sb = &so->so_rcv; /* * Wait for the receive thread to stop processing this * connection. */ SOCKBUF_LOCK(sb); if (icc->rx_thread != NULL) { icc->rx_exiting = true; wakeup(&icc->rx_active); mtx_sleep(icc->rx_thread, SOCKBUF_MTX(sb), 0, "conclo", 0); } /* * Discard received PDUs not passed to the iSCSI layer. */ while (!STAILQ_EMPTY(&icc->rcvd_pdus)) { ip = STAILQ_FIRST(&icc->rcvd_pdus); STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next); icl_cxgbei_pdu_done(ip, ENOTCONN); } SOCKBUF_UNLOCK(sb); INP_WLOCK(inp); if (toep != NULL) { /* NULL if connection was never offloaded. */ toep->ulpcb = NULL; /* Discard mbufs queued for TX. */ mbufq_drain(&toep->ulp_pduq); /* * Grab a reference to use when waiting for the final * CPL to be received. If toep->inp is NULL, then * final_cpl_received() has already been called (e.g. * due to the peer sending a RST). */ if (toep->inp != NULL) { toep = hold_toepcb(toep); toep->flags |= TPF_WAITING_FOR_FINAL; } else toep = NULL; } INP_WUNLOCK(inp); ICL_CONN_LOCK(ic); ic->ic_socket = NULL; ICL_CONN_UNLOCK(ic); /* * XXXNP: we should send RST instead of FIN when PDUs held in various * queues were purged instead of delivered reliably but soabort isn't * really general purpose and wouldn't do the right thing here. */ soclose(so); /* * Wait for the socket to fully close. This ensures any * pending received data has been received (and in particular, * any data that would be received by DDP has been handled). * Callers assume that it is safe to free buffers for tasks * and transfers after this function returns. */ if (toep != NULL) { struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep); mtx_lock(lock); while ((toep->flags & TPF_WAITING_FOR_FINAL) != 0) mtx_sleep(toep, lock, PSOCK, "conclo2", 0); mtx_unlock(lock); free_toepcb(toep); } } static void cxgbei_insert_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp, uint32_t tt) { #ifdef INVARIANTS struct cxgbei_cmp *cmp2; #endif cmp->tt = tt; mtx_lock(&icc->cmp_lock); #ifdef INVARIANTS LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, tt)], link) { KASSERT(cmp2->tt != tt, ("%s: duplicate cmp", __func__)); } #endif LIST_INSERT_HEAD(&icc->cmp_table[TT_HASH(icc, tt)], cmp, link); mtx_unlock(&icc->cmp_lock); } struct cxgbei_cmp * cxgbei_find_cmp(struct icl_cxgbei_conn *icc, uint32_t tt) { struct cxgbei_cmp *cmp; mtx_lock(&icc->cmp_lock); LIST_FOREACH(cmp, &icc->cmp_table[TT_HASH(icc, tt)], link) { if (cmp->tt == tt) break; } mtx_unlock(&icc->cmp_lock); return (cmp); } static void cxgbei_rm_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp) { #ifdef INVARIANTS struct cxgbei_cmp *cmp2; #endif mtx_lock(&icc->cmp_lock); #ifdef INVARIANTS LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, cmp->tt)], link) { if (cmp2 == cmp) goto found; } panic("%s: could not find cmp", __func__); found: #endif LIST_REMOVE(cmp, link); mtx_unlock(&icc->cmp_lock); } int icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ccb_scsiio *csio, uint32_t *ittp, void **arg) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct toepcb *toep = icc->toep; struct adapter *sc = icc->sc; struct cxgbei_data *ci = sc->iscsi_ulp_softc; struct ppod_region *pr = &ci->pr; struct cxgbei_ddp_state *ddp; struct ppod_reservation *prsv; struct inpcb *inp; struct mbufq mq; uint32_t itt; int rc = 0; ICL_CONN_LOCK_ASSERT(ic); /* This is for the offload driver's state. Must not be set already. */ MPASS(arg != NULL); MPASS(*arg == NULL); if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_IN || csio->dxfer_len < ci->ddp_threshold || ic->ic_disconnecting || ic->ic_socket == NULL) { no_ddp: /* * No DDP for this I/O. Allocate an ITT (based on the one * passed in) that cannot be a valid hardware DDP tag in the * iSCSI region. */ itt = *ittp & M_PPOD_TAG; itt = V_PPOD_TAG(itt) | pr->pr_invalid_bit; *ittp = htobe32(itt); MPASS(*arg == NULL); /* State is maintained for DDP only. */ if (rc != 0) counter_u64_add( toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); return (0); } /* * Reserve resources for DDP, update the itt that should be used in the * PDU, and save DDP specific state for this I/O in *arg. */ ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO); if (ddp == NULL) { rc = ENOMEM; goto no_ddp; } prsv = &ddp->prsv; mbufq_init(&mq, INT_MAX); switch (csio->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_BIO: rc = t4_alloc_page_pods_for_bio(pr, (struct bio *)csio->data_ptr, prsv); if (rc != 0) { free(ddp, M_CXGBEI); goto no_ddp; } rc = t4_write_page_pods_for_bio(sc, toep, prsv, (struct bio *)csio->data_ptr, &mq); if (__predict_false(rc != 0)) { mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); goto no_ddp; } break; case CAM_DATA_VADDR: rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr, csio->dxfer_len, prsv); if (rc != 0) { free(ddp, M_CXGBEI); goto no_ddp; } rc = t4_write_page_pods_for_buf(sc, toep, prsv, (vm_offset_t)csio->data_ptr, csio->dxfer_len, &mq); if (__predict_false(rc != 0)) { mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); goto no_ddp; } break; default: free(ddp, M_CXGBEI); rc = EINVAL; goto no_ddp; } /* * Do not get inp from toep->inp as the toepcb might have * detached already. */ inp = sotoinpcb(ic->ic_socket); INP_WLOCK(inp); if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { INP_WUNLOCK(inp); mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); goto no_ddp; } mbufq_concat(&toep->ulp_pduq, &mq); INP_WUNLOCK(inp); ddp->cmp.last_datasn = -1; cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); *ittp = htobe32(prsv->prsv_tag); *arg = prsv; counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); return (0); } void icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg) { if (arg != NULL) { struct cxgbei_ddp_state *ddp = arg; cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp); t4_free_page_pods(&ddp->prsv); free(ddp, M_CXGBEI); } } static inline bool ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen) { #ifdef INVARIANTS int total_len = 0; #endif MPASS(entries > 0); if (((vm_offset_t)sg[--entries].addr & 3U) != 0) return (false); #ifdef INVARIANTS total_len += sg[entries].len; #endif while (--entries >= 0) { if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 || (sg[entries].len % PAGE_SIZE) != 0) return (false); #ifdef INVARIANTS total_len += sg[entries].len; #endif } MPASS(total_len == xferlen); return (true); } #define io_to_ddp_state(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr) int icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip, union ctl_io *io, uint32_t *tttp, void **arg) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); struct toepcb *toep = icc->toep; struct ctl_scsiio *ctsio = &io->scsiio; struct adapter *sc = icc->sc; struct cxgbei_data *ci = sc->iscsi_ulp_softc; struct ppod_region *pr = &ci->pr; struct cxgbei_ddp_state *ddp; struct ppod_reservation *prsv; struct ctl_sg_entry *sgl, sg_entry; struct inpcb *inp; struct mbufq mq; int sg_entries = ctsio->kern_sg_entries; uint32_t ttt; int xferlen, rc = 0, alias; /* This is for the offload driver's state. Must not be set already. */ MPASS(arg != NULL); MPASS(*arg == NULL); if (ctsio->ext_data_filled == 0) { int first_burst; #ifdef INVARIANTS struct icl_cxgbei_pdu *icp = ip_to_icp(ip); MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); MPASS(ic == ip->ip_conn); MPASS(ip->ip_bhs_mbuf != NULL); #endif first_burst = icl_pdu_data_segment_length(ip); /* * Note that ICL calls conn_transfer_setup even if the first * burst had everything and there's nothing left to transfer. * * NB: The CTL frontend might have provided a buffer * whose length (kern_data_len) is smaller than the * FirstBurstLength of unsolicited data. Treat those * as an empty transfer. */ xferlen = ctsio->kern_data_len; if (xferlen < first_burst || xferlen - first_burst < ci->ddp_threshold) { no_ddp: /* * No DDP for this transfer. Allocate a TTT (based on * the one passed in) that cannot be a valid hardware * DDP tag in the iSCSI region. */ ttt = *tttp & M_PPOD_TAG; ttt = V_PPOD_TAG(ttt) | pr->pr_invalid_bit; *tttp = htobe32(ttt); MPASS(io_to_ddp_state(io) == NULL); if (rc != 0) counter_u64_add( toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); return (0); } if (sg_entries == 0) { sgl = &sg_entry; sgl->len = xferlen; sgl->addr = (void *)ctsio->kern_data_ptr; sg_entries = 1; } else sgl = (void *)ctsio->kern_data_ptr; if (!ddp_sgl_check(sgl, sg_entries, xferlen)) goto no_ddp; /* * Reserve resources for DDP, update the ttt that should be used * in the PDU, and save DDP specific state for this I/O. */ MPASS(io_to_ddp_state(io) == NULL); ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO); if (ddp == NULL) { rc = ENOMEM; goto no_ddp; } prsv = &ddp->prsv; rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv); if (rc != 0) { free(ddp, M_CXGBEI); goto no_ddp; } mbufq_init(&mq, INT_MAX); rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries, xferlen, &mq); if (__predict_false(rc != 0)) { mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); goto no_ddp; } /* * Do not get inp from toep->inp as the toepcb might * have detached already. */ ICL_CONN_LOCK(ic); if (ic->ic_disconnecting || ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); return (ECONNRESET); } inp = sotoinpcb(ic->ic_socket); INP_WLOCK(inp); ICL_CONN_UNLOCK(ic); if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { INP_WUNLOCK(inp); mbufq_drain(&mq); t4_free_page_pods(prsv); free(ddp, M_CXGBEI); return (ECONNRESET); } mbufq_concat(&toep->ulp_pduq, &mq); INP_WUNLOCK(inp); ddp->cmp.next_buffer_offset = ctsio->kern_rel_offset + first_burst; ddp->cmp.last_datasn = -1; cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); *tttp = htobe32(prsv->prsv_tag); io_to_ddp_state(io) = ddp; *arg = ctsio; counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); return (0); } /* * In the middle of an I/O. A non-NULL page pod reservation indicates * that a DDP buffer is being used for the I/O. */ ddp = io_to_ddp_state(ctsio); if (ddp == NULL) goto no_ddp; prsv = &ddp->prsv; alias = (prsv->prsv_tag & pr->pr_alias_mask) >> pr->pr_alias_shift; alias++; prsv->prsv_tag &= ~pr->pr_alias_mask; prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask; ddp->cmp.last_datasn = -1; cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); *tttp = htobe32(prsv->prsv_tag); *arg = ctsio; return (0); } void icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *arg) { struct ctl_scsiio *ctsio = arg; if (ctsio != NULL) { struct cxgbei_ddp_state *ddp; ddp = io_to_ddp_state(ctsio); MPASS(ddp != NULL); cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp); if (ctsio->kern_data_len == ctsio->ext_data_filled || ic->ic_disconnecting) { t4_free_page_pods(&ddp->prsv); free(ddp, M_CXGBEI); io_to_ddp_state(ctsio) = NULL; } } } static void cxgbei_limits(struct adapter *sc, void *arg) { struct icl_drv_limits *idl = arg; struct cxgbei_data *ci; int max_dsl; if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4lims") != 0) return; if (uld_active(sc, ULD_ISCSI)) { ci = sc->iscsi_ulp_softc; MPASS(ci != NULL); max_dsl = ci->max_rx_data_len; if (idl->idl_max_recv_data_segment_length > max_dsl) idl->idl_max_recv_data_segment_length = max_dsl; max_dsl = ci->max_tx_data_len; if (idl->idl_max_send_data_segment_length > max_dsl) idl->idl_max_send_data_segment_length = max_dsl; } end_synchronized_op(sc, LOCK_HELD); } static int -icl_cxgbei_limits(struct icl_drv_limits *idl) +icl_cxgbei_limits(struct icl_drv_limits *idl, int socket) { /* Maximum allowed by the RFC. cxgbei_limits will clip them. */ idl->idl_max_recv_data_segment_length = (1 << 24) - 1; idl->idl_max_send_data_segment_length = (1 << 24) - 1; /* These are somewhat arbitrary. */ idl->idl_max_burst_length = max_burst_length; idl->idl_first_burst_length = first_burst_length; t4_iterate(cxgbei_limits, idl); return (0); } int icl_cxgbei_mod_load(void) { int rc; refcount_init(&icl_cxgbei_ncons, 0); rc = icl_register("cxgbei", false, -100, icl_cxgbei_limits, icl_cxgbei_new_conn); return (rc); } int icl_cxgbei_mod_unload(void) { if (icl_cxgbei_ncons != 0) return (EBUSY); icl_unregister("cxgbei", false); return (0); } #endif diff --git a/sys/dev/iscsi/icl.c b/sys/dev/iscsi/icl.c index 1e1f1bef91bb..1a86474a5033 100644 --- a/sys/dev/iscsi/icl.c +++ b/sys/dev/iscsi/icl.c @@ -1,331 +1,333 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * iSCSI Common Layer. It's used by both the initiator and target to send * and receive iSCSI PDUs. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct icl_module { TAILQ_ENTRY(icl_module) im_next; char *im_name; bool im_iser; int im_priority; - int (*im_limits)(struct icl_drv_limits *idl); + int (*im_limits)(struct icl_drv_limits *idl, + int socket); struct icl_conn *(*im_new_conn)(const char *name, struct mtx *lock); }; struct icl_softc { struct sx sc_lock; TAILQ_HEAD(, icl_module) sc_modules; }; static int sysctl_kern_icl_offloads(SYSCTL_HANDLER_ARGS); static MALLOC_DEFINE(M_ICL, "icl", "iSCSI Common Layer"); static struct icl_softc *sc; SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iSCSI Common Layer"); int icl_debug = 1; SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN, &icl_debug, 0, "Enable debug messages"); SYSCTL_PROC(_kern_icl, OID_AUTO, offloads, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, false, sysctl_kern_icl_offloads, "A", "List of ICL modules"); SYSCTL_PROC(_kern_icl, OID_AUTO, iser_offloads, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, true, sysctl_kern_icl_offloads, "A", "List of iSER ICL modules"); static int sysctl_kern_icl_offloads(SYSCTL_HANDLER_ARGS) { const struct icl_module *im; struct sbuf sb; bool iser = arg2; int error; sbuf_new(&sb, NULL, 256, SBUF_AUTOEXTEND | SBUF_INCLUDENUL); sx_slock(&sc->sc_lock); TAILQ_FOREACH(im, &sc->sc_modules, im_next) { if (im->im_iser != iser) continue; if (im != TAILQ_FIRST(&sc->sc_modules)) sbuf_putc(&sb, ' '); sbuf_printf(&sb, "%s", im->im_name); } sx_sunlock(&sc->sc_lock); error = sbuf_finish(&sb); if (error == 0) error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb)); sbuf_delete(&sb); return (error); } static struct icl_module * icl_find(const char *name, bool iser, bool quiet) { struct icl_module *im, *im_max; sx_assert(&sc->sc_lock, SA_LOCKED); /* * If the name was not specified, pick a module with highest * priority. */ if (name == NULL || name[0] == '\0') { im_max = NULL; TAILQ_FOREACH(im, &sc->sc_modules, im_next) { if (im->im_iser != iser) continue; if (im_max == NULL || im->im_priority > im_max->im_priority) im_max = im; } if (iser && im_max == NULL && !quiet) ICL_WARN("no iSER-capable offload found"); return (im_max); } TAILQ_FOREACH(im, &sc->sc_modules, im_next) { if (strcasecmp(im->im_name, name) != 0) continue; if (!im->im_iser && iser && !quiet) { ICL_WARN("offload \"%s\" is not iSER-capable", name); return (NULL); } if (im->im_iser && !iser && !quiet) { ICL_WARN("offload \"%s\" is iSER-only", name); return (NULL); } return (im); } if (!quiet) ICL_WARN("offload \"%s\" not found", name); return (NULL); } struct icl_conn * icl_new_conn(const char *offload, bool iser, const char *name, struct mtx *lock) { struct icl_module *im; struct icl_conn *ic; sx_slock(&sc->sc_lock); im = icl_find(offload, iser, false); if (im == NULL) { sx_sunlock(&sc->sc_lock); return (NULL); } ic = im->im_new_conn(name, lock); sx_sunlock(&sc->sc_lock); return (ic); } int -icl_limits(const char *offload, bool iser, struct icl_drv_limits *idl) +icl_limits(const char *offload, bool iser, int socket, + struct icl_drv_limits *idl) { struct icl_module *im; int error; bzero(idl, sizeof(*idl)); sx_slock(&sc->sc_lock); im = icl_find(offload, iser, false); if (im == NULL) { sx_sunlock(&sc->sc_lock); return (ENXIO); } - error = im->im_limits(idl); + error = im->im_limits(idl, socket); sx_sunlock(&sc->sc_lock); /* * Validate the limits provided by the driver against values allowed by * the iSCSI RFC. 0 means iscsid/ctld should pick a reasonable value. * * Note that max_send_dsl is an internal implementation detail and not * part of the RFC. */ #define OUT_OF_RANGE(x, lo, hi) ((x) != 0 && ((x) < (lo) || (x) > (hi))) if (error == 0 && (OUT_OF_RANGE(idl->idl_max_recv_data_segment_length, 512, 16777215) || OUT_OF_RANGE(idl->idl_max_send_data_segment_length, 512, 16777215) || OUT_OF_RANGE(idl->idl_max_burst_length, 512, 16777215) || OUT_OF_RANGE(idl->idl_first_burst_length, 512, 16777215))) { error = EINVAL; } #undef OUT_OF_RANGE /* * If both first_burst and max_burst are provided then first_burst must * not exceed max_burst. */ if (error == 0 && idl->idl_first_burst_length > 0 && idl->idl_max_burst_length > 0 && idl->idl_first_burst_length > idl->idl_max_burst_length) { error = EINVAL; } return (error); } int icl_register(const char *offload, bool iser, int priority, - int (*limits)(struct icl_drv_limits *), + int (*limits)(struct icl_drv_limits *, int), struct icl_conn *(*new_conn)(const char *, struct mtx *)) { struct icl_module *im; sx_xlock(&sc->sc_lock); im = icl_find(offload, iser, true); if (im != NULL) { ICL_WARN("offload \"%s\" already registered", offload); sx_xunlock(&sc->sc_lock); return (EBUSY); } im = malloc(sizeof(*im), M_ICL, M_ZERO | M_WAITOK); im->im_name = strdup(offload, M_ICL); im->im_iser = iser; im->im_priority = priority; im->im_limits = limits; im->im_new_conn = new_conn; TAILQ_INSERT_HEAD(&sc->sc_modules, im, im_next); sx_xunlock(&sc->sc_lock); ICL_DEBUG("offload \"%s\" registered", offload); return (0); } int icl_unregister(const char *offload, bool rdma) { struct icl_module *im; sx_xlock(&sc->sc_lock); im = icl_find(offload, rdma, true); if (im == NULL) { ICL_WARN("offload \"%s\" not registered", offload); sx_xunlock(&sc->sc_lock); return (ENXIO); } TAILQ_REMOVE(&sc->sc_modules, im, im_next); sx_xunlock(&sc->sc_lock); free(im->im_name, M_ICL); free(im, M_ICL); ICL_DEBUG("offload \"%s\" unregistered", offload); return (0); } static int icl_load(void) { sc = malloc(sizeof(*sc), M_ICL, M_ZERO | M_WAITOK); sx_init(&sc->sc_lock, "icl"); TAILQ_INIT(&sc->sc_modules); return (0); } static int icl_unload(void) { sx_slock(&sc->sc_lock); KASSERT(TAILQ_EMPTY(&sc->sc_modules), ("still have modules")); sx_sunlock(&sc->sc_lock); sx_destroy(&sc->sc_lock); free(sc, M_ICL); return (0); } static int icl_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_load()); case MOD_UNLOAD: return (icl_unload()); default: return (EINVAL); } } moduledata_t icl_data = { "icl", icl_modevent, 0 }; DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST); MODULE_VERSION(icl, 1); diff --git a/sys/dev/iscsi/icl.h b/sys/dev/iscsi/icl.h index edd43a45ba2e..59b1e2aacc96 100644 --- a/sys/dev/iscsi/icl.h +++ b/sys/dev/iscsi/icl.h @@ -1,172 +1,172 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ICL_H #define ICL_H /* * iSCSI Common Layer. It's used by both the initiator and target to send * and receive iSCSI PDUs. */ #include #include #include #include SYSCTL_DECL(_kern_icl); extern int icl_debug; #define ICL_DEBUG(X, ...) \ do { \ if (icl_debug > 1) \ printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ } while (0) #define ICL_WARN(X, ...) \ do { \ if (icl_debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) struct icl_conn; struct ccb_scsiio; union ctl_io; struct icl_pdu { STAILQ_ENTRY(icl_pdu) ip_next; struct icl_conn *ip_conn; struct iscsi_bhs *ip_bhs; struct mbuf *ip_bhs_mbuf; size_t ip_ahs_len; struct mbuf *ip_ahs_mbuf; size_t ip_data_len; struct mbuf *ip_data_mbuf; /* * When a "large" received PDU represents multiple on-the-wire * PDUs, this is the count of additional on-the-wire PDUs. * For PDUs that match on-the-wire PDUs, this should be set to * zero. */ u_int ip_additional_pdus; /* * User (initiator or provider) private fields. */ void *ip_prv0; void *ip_prv1; }; #define ICL_NOCOPY (1 << 30) struct icl_conn { KOBJ_FIELDS; struct mtx *ic_lock; struct socket *ic_socket; #ifdef DIAGNOSTIC volatile u_int ic_outstanding_pdus; #endif uint32_t ic_max_recv_data_segment_length; uint32_t ic_max_send_data_segment_length; size_t ic_hw_isomax; size_t ic_maxtags; bool ic_header_crc32c; bool ic_data_crc32c; bool ic_disconnecting; bool ic_iser; bool ic_unmapped; const char *ic_name; const char *ic_offload; void (*ic_receive)(struct icl_pdu *); void (*ic_error)(struct icl_conn *); /* * User (initiator or provider) private fields. */ void *ic_prv0; }; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) struct icl_drv_limits { int idl_max_recv_data_segment_length; int idl_max_send_data_segment_length; int idl_max_burst_length; int idl_first_burst_length; int spare[4]; }; typedef void (*icl_pdu_cb)(struct icl_pdu *, int error); struct icl_conn *icl_new_conn(const char *offload, bool iser, const char *name, struct mtx *lock); -int icl_limits(const char *offload, bool iser, +int icl_limits(const char *offload, bool iser, int socket, struct icl_drv_limits *idl); int icl_register(const char *offload, bool iser, int priority, - int (*limits)(struct icl_drv_limits *), + int (*limits)(struct icl_drv_limits *, int), struct icl_conn *(*new_conn)(const char *, struct mtx *)); int icl_unregister(const char *offload, bool rdma); #ifdef ICL_KERNEL_PROXY struct sockaddr; struct icl_listen; /* * Target part. */ struct icl_listen *icl_listen_new(void (*accept_cb)(struct socket *, struct sockaddr *, int)); void icl_listen_free(struct icl_listen *il); int icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol, struct sockaddr *sa, int portal_id); int icl_listen_remove(struct icl_listen *il, struct sockaddr *sa); /* * Those two are not a public API; only to be used between icl_soft.c * and icl_soft_proxy.c. */ int icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so); int icl_soft_proxy_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa); #endif /* ICL_KERNEL_PROXY */ #endif /* !ICL_H */ diff --git a/sys/dev/iscsi/icl_soft.c b/sys/dev/iscsi/icl_soft.c index c8adad7e9d03..669cb9618d3a 100644 --- a/sys/dev/iscsi/icl_soft.c +++ b/sys/dev/iscsi/icl_soft.c @@ -1,1770 +1,1770 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Software implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ICL_CONN_STATE_BHS 1 #define ICL_CONN_STATE_AHS 2 #define ICL_CONN_STATE_HEADER_DIGEST 3 #define ICL_CONN_STATE_DATA 4 #define ICL_CONN_STATE_DATA_DIGEST 5 struct icl_soft_conn { struct icl_conn ic; /* soft specific stuff goes here. */ STAILQ_HEAD(, icl_pdu) to_send; struct cv send_cv; struct cv receive_cv; struct icl_pdu *receive_pdu; size_t receive_len; int receive_state; bool receive_running; bool check_send_space; bool send_running; }; struct icl_soft_pdu { struct icl_pdu ip; /* soft specific stuff goes here. */ u_int ref_cnt; icl_pdu_cb cb; int error; }; SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Software iSCSI"); static int coalesce = 1; SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 256 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int max_data_segment_length = 256 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN, &max_data_segment_length, 0, "Maximum data segment length"); static int first_burst_length = 1024 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, &first_burst_length, 0, "First burst length"); static int max_burst_length = 1024 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, &max_burst_length, 0, "Maximum burst length"); static int sendspace = 1536 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1536 * 1024; SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); static uma_zone_t icl_soft_pdu_zone; static volatile u_int icl_ncons; STAILQ_HEAD(icl_pdu_stailq, icl_pdu); static icl_conn_new_pdu_t icl_soft_conn_new_pdu; static icl_conn_pdu_free_t icl_soft_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_soft_conn_pdu_data_segment_length; static icl_conn_pdu_append_bio_t icl_soft_conn_pdu_append_bio; static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; static icl_conn_pdu_get_bio_t icl_soft_conn_pdu_get_bio; static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; static icl_conn_handoff_t icl_soft_conn_handoff; static icl_conn_free_t icl_soft_conn_free; static icl_conn_close_t icl_soft_conn_close; static icl_conn_task_setup_t icl_soft_conn_task_setup; static icl_conn_task_done_t icl_soft_conn_task_done; static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; static icl_conn_transfer_done_t icl_soft_conn_transfer_done; #ifdef ICL_KERNEL_PROXY static icl_conn_connect_t icl_soft_conn_connect; #endif static kobj_method_t icl_soft_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_soft_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_bio, icl_soft_conn_pdu_append_bio), KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_bio, icl_soft_conn_pdu_get_bio), KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), KOBJMETHOD(icl_conn_free, icl_soft_conn_free), KOBJMETHOD(icl_conn_close, icl_soft_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), #ifdef ICL_KERNEL_PROXY KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), #endif { 0, 0 } }; DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_soft_conn)); static void icl_conn_fail(struct icl_conn *ic) { if (ic->ic_socket == NULL) return; /* * XXX */ ic->ic_socket->so_error = EDOOFUS; (ic->ic_error)(ic); } static void icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); m_freem(ip->ip_bhs_mbuf); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); uma_zfree(icl_soft_pdu_zone, isp); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif } static void icl_soft_pdu_call_cb(struct icl_pdu *ip) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; if (isp->cb != NULL) isp->cb(ip, isp->error); #ifdef DIAGNOSTIC refcount_release(&ip->ip_conn->ic_outstanding_pdus); #endif uma_zfree(icl_soft_pdu_zone, isp); } static void icl_soft_pdu_done(struct icl_pdu *ip, int error) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; if (error != 0) isp->error = error; m_freem(ip->ip_bhs_mbuf); ip->ip_bhs_mbuf = NULL; m_freem(ip->ip_ahs_mbuf); ip->ip_ahs_mbuf = NULL; m_freem(ip->ip_data_mbuf); ip->ip_data_mbuf = NULL; if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) icl_soft_pdu_call_cb(ip); } static void icl_soft_mbuf_done(struct mbuf *mb) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; icl_soft_pdu_call_cb(&isp->ip); } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_soft_pdu *isp; struct icl_pdu *ip; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); if (isp == NULL) { ICL_WARN("failed to allocate soft PDU"); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif return (NULL); } ip = &isp->ip; ip->ip_conn = ic; CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); if (ip->ip_bhs_mbuf == NULL) { ICL_WARN("failed to allocate BHS mbuf"); icl_soft_conn_pdu_free(ic, ip); return (NULL); } ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); return (ip); } static int icl_pdu_ahs_length(const struct icl_pdu *request) { return (request->ip_bhs->bhs_total_ahs_len * 4); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static void icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) { response->ip_bhs->bhs_data_segment_len[2] = len; response->ip_bhs->bhs_data_segment_len[1] = len >> 8; response->ip_bhs->bhs_data_segment_len[0] = len >> 16; } static size_t icl_pdu_padding(const struct icl_pdu *ip) { if ((ip->ip_data_len % 4) != 0) return (4 - (ip->ip_data_len % 4)); return (0); } static size_t icl_pdu_size(const struct icl_pdu *response) { size_t len; KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); len = sizeof(struct iscsi_bhs) + response->ip_data_len + icl_pdu_padding(response); if (response->ip_conn->ic_header_crc32c) len += ISCSI_HEADER_DIGEST_SIZE; if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) len += ISCSI_DATA_DIGEST_SIZE; return (len); } static void icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s) { m_copydata(*r, 0, s, buf); m_adj(*r, s); while ((*r) != NULL && (*r)->m_len == 0) *r = m_free(*r); *rs -= s; } static void icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs) { request->ip_ahs_len = icl_pdu_ahs_length(request); if (request->ip_ahs_len == 0) return; request->ip_ahs_mbuf = *r; *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK); *rs -= request->ip_ahs_len; } static int mbuf_crc32c_helper(void *arg, void *data, u_int len) { uint32_t *digestp = arg; *digestp = calculate_crc32c(*digestp, data, len); return (0); } static uint32_t icl_mbuf_to_crc32c(struct mbuf *m0, size_t len) { uint32_t digest = 0xffffffff; m_apply(m0, 0, len, mbuf_crc32c_helper, &digest); digest = digest ^ 0xffffffff; return (digest); } static int icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) { uint32_t received_digest, valid_digest; if (request->ip_conn->ic_header_crc32c == false) return (0); CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE); /* Temporary attach AHS to BHS to calculate header digest. */ request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, ISCSI_BHS_SIZE); request->ip_bhs_mbuf->m_next = NULL; if (received_digest != valid_digest) { ICL_WARN("header digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Return the number of bytes that should be waiting in the receive socket * before icl_pdu_receive_data_segment() gets called. */ static size_t icl_pdu_data_segment_receive_len(const struct icl_pdu *request) { size_t len; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); /* * Account for the parts of data segment already read from * the socket buffer. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; /* * Don't always wait for the full data segment to be delivered * to the socket; this might badly affect performance due to * TCP window scaling. */ if (len > partial_receive_len) { #if 0 ICL_DEBUG("need %zd bytes of data, limiting to %zd", len, partial_receive_len)); #endif len = partial_receive_len; return (len); } /* * Account for padding. Note that due to the way code is written, * the icl_pdu_receive_data_segment() must always receive padding * along with the last part of data segment, because it would be * impossible to tell whether we've already received the full data * segment including padding, or without it. */ if ((len % 4) != 0) len += 4 - (len % 4); #if 0 ICL_DEBUG("need %zd bytes of data", len)); #endif return (len); } static int icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r, size_t *rs, bool *more_neededp) { struct icl_soft_conn *isc; size_t len, padding = 0; struct mbuf *m; isc = (struct icl_soft_conn *)request->ip_conn; *more_neededp = false; isc->receive_len = 0; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); if ((len % 4) != 0) padding = 4 - (len % 4); /* * Account for already received parts of data segment. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; if (len + padding > *rs) { /* * Not enough data in the socket buffer. Receive as much * as we can. Don't receive padding, since, obviously, it's * not the end of data segment yet. */ #if 0 ICL_DEBUG("limited from %zd to %zd", len + padding, *rs - padding)); #endif len = *rs - padding; *more_neededp = true; padding = 0; } /* * Must not try to receive padding without at least one byte * of actual data segment. */ if (len > 0) { m = *r; *r = m_split(m, len + padding, M_WAITOK); *rs -= len + padding; if (request->ip_data_mbuf == NULL) request->ip_data_mbuf = m; else m_cat(request->ip_data_mbuf, m); request->ip_data_len += len; } else ICL_DEBUG("len 0"); if (*more_neededp) isc->receive_len = icl_pdu_data_segment_receive_len(request); return (0); } static int icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs) { uint32_t received_digest, valid_digest; if (request->ip_conn->ic_data_crc32c == false) return (0); if (request->ip_data_len == 0) return (0); CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE); /* * Note that ip_data_mbuf also contains padding; since digest * calculation is supposed to include that, we iterate over * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. */ valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, roundup2(request->ip_data_len, 4)); if (received_digest != valid_digest) { ICL_WARN("data digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Somewhat contrary to the name, this attempts to receive only one * "part" of PDU at a time; call it repeatedly until it returns non-NULL. */ static struct icl_pdu * icl_conn_receive_pdu(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) { struct icl_conn *ic = &isc->ic; struct icl_pdu *request; size_t len; int error = 0; bool more_needed; if (isc->receive_state == ICL_CONN_STATE_BHS) { KASSERT(isc->receive_pdu == NULL, ("isc->receive_pdu != NULL")); request = icl_soft_conn_new_pdu(ic, M_NOWAIT); if (request == NULL) { ICL_DEBUG("failed to allocate PDU; " "dropping connection"); icl_conn_fail(ic); return (NULL); } isc->receive_pdu = request; } else { KASSERT(isc->receive_pdu != NULL, ("isc->receive_pdu == NULL")); request = isc->receive_pdu; } switch (isc->receive_state) { case ICL_CONN_STATE_BHS: //ICL_DEBUG("receiving BHS"); icl_soft_receive_buf(r, rs, request->ip_bhs, sizeof(struct iscsi_bhs)); /* * We don't enforce any limit for AHS length; * its length is stored in 8 bit field. */ len = icl_pdu_data_segment_length(request); if (len > ic->ic_max_recv_data_segment_length) { ICL_WARN("received data segment " "length %zd is larger than negotiated; " "dropping connection", len); error = EINVAL; break; } isc->receive_state = ICL_CONN_STATE_AHS; isc->receive_len = icl_pdu_ahs_length(request); break; case ICL_CONN_STATE_AHS: //ICL_DEBUG("receiving AHS"); icl_pdu_receive_ahs(request, r, rs); isc->receive_state = ICL_CONN_STATE_HEADER_DIGEST; if (ic->ic_header_crc32c == false) isc->receive_len = 0; else isc->receive_len = ISCSI_HEADER_DIGEST_SIZE; break; case ICL_CONN_STATE_HEADER_DIGEST: //ICL_DEBUG("receiving header digest"); error = icl_pdu_check_header_digest(request, r, rs); if (error != 0) { ICL_DEBUG("header digest failed; " "dropping connection"); break; } isc->receive_state = ICL_CONN_STATE_DATA; isc->receive_len = icl_pdu_data_segment_receive_len(request); break; case ICL_CONN_STATE_DATA: //ICL_DEBUG("receiving data segment"); error = icl_pdu_receive_data_segment(request, r, rs, &more_needed); if (error != 0) { ICL_DEBUG("failed to receive data segment;" "dropping connection"); break; } if (more_needed) break; isc->receive_state = ICL_CONN_STATE_DATA_DIGEST; if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) isc->receive_len = 0; else isc->receive_len = ISCSI_DATA_DIGEST_SIZE; break; case ICL_CONN_STATE_DATA_DIGEST: //ICL_DEBUG("receiving data digest"); error = icl_pdu_check_data_digest(request, r, rs); if (error != 0) { ICL_DEBUG("data digest failed; " "dropping connection"); break; } /* * We've received complete PDU; reset the receive state machine * and return the PDU. */ isc->receive_state = ICL_CONN_STATE_BHS; isc->receive_len = sizeof(struct iscsi_bhs); isc->receive_pdu = NULL; return (request); default: panic("invalid receive_state %d\n", isc->receive_state); } if (error != 0) { /* * Don't free the PDU; it's pointed to by isc->receive_pdu * and will get freed in icl_soft_conn_close(). */ icl_conn_fail(ic); } return (NULL); } static void icl_conn_receive_pdus(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs) { struct icl_conn *ic = &isc->ic; struct icl_pdu *response; for (;;) { if (ic->ic_disconnecting) return; /* * Loop until we have a complete PDU or there is not enough * data in the socket buffer. */ if (*rs < isc->receive_len) { #if 0 ICL_DEBUG("not enough data; have %zd, need %zd", *rs, isc->receive_len); #endif return; } response = icl_conn_receive_pdu(isc, r, rs); if (response == NULL) continue; if (response->ip_ahs_len > 0) { ICL_WARN("received PDU with unsupported " "AHS; opcode 0x%x; dropping connection", response->ip_bhs->bhs_opcode); icl_soft_conn_pdu_free(ic, response); icl_conn_fail(ic); return; } (ic->ic_receive)(response); } } static void icl_receive_thread(void *arg) { struct icl_soft_conn *isc = arg; struct icl_conn *ic = &isc->ic; size_t available, read = 0; struct socket *so; struct mbuf *m, *r = NULL; struct uio uio; int error, flags; so = ic->ic_socket; for (;;) { SOCKBUF_LOCK(&so->so_rcv); if (ic->ic_disconnecting) { SOCKBUF_UNLOCK(&so->so_rcv); break; } /* * Set the low watermark, to be checked by * soreadable() in icl_soupcall_receive() * to avoid unnecessary wakeups until there * is enough data received to read the PDU. */ available = sbavail(&so->so_rcv); if (read + available < isc->receive_len) { so->so_rcv.sb_lowat = isc->receive_len - read; cv_wait(&isc->receive_cv, SOCKBUF_MTX(&so->so_rcv)); so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; available = sbavail(&so->so_rcv); } SOCKBUF_UNLOCK(&so->so_rcv); if (available == 0) { if (so->so_error != 0) { ICL_DEBUG("connection error %d; " "dropping connection", so->so_error); icl_conn_fail(ic); break; } continue; } memset(&uio, 0, sizeof(uio)); uio.uio_resid = available; flags = MSG_DONTWAIT; error = soreceive(so, NULL, &uio, &m, NULL, &flags); if (error != 0) { ICL_DEBUG("soreceive error %d", error); break; } if (uio.uio_resid != 0) { m_freem(m); ICL_DEBUG("short read"); break; } if (r) m_cat(r, m); else r = m; read += available; icl_conn_receive_pdus(isc, &r, &read); } if (r) m_freem(r); ICL_CONN_LOCK(ic); isc->receive_running = false; cv_signal(&isc->send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_receive(struct socket *so, void *arg, int waitflag) { struct icl_soft_conn *isc; if (!soreadable(so)) return (SU_OK); isc = arg; cv_signal(&isc->receive_cv); return (SU_OK); } static int icl_pdu_finalize(struct icl_pdu *request) { size_t padding, pdu_len; uint32_t digest, zero = 0; int ok; struct icl_conn *ic; ic = request->ip_conn; icl_pdu_set_data_segment_length(request, request->ip_data_len); pdu_len = icl_pdu_size(request); if (ic->ic_header_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, ISCSI_BHS_SIZE); ok = m_append(request->ip_bhs_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append header digest"); return (1); } } if (request->ip_data_len != 0) { padding = icl_pdu_padding(request); if (padding > 0) { ok = m_append(request->ip_data_mbuf, padding, (void *)&zero); if (ok != 1) { ICL_WARN("failed to append padding"); return (1); } } if (ic->ic_data_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_data_mbuf, roundup2(request->ip_data_len, 4)); ok = m_append(request->ip_data_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append data digest"); return (1); } } m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); request->ip_data_mbuf = NULL; } request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; return (0); } static void icl_conn_send_pdus(struct icl_soft_conn *isc, struct icl_pdu_stailq *queue) { struct icl_conn *ic = &isc->ic; struct icl_pdu *request, *request2; struct mbuf *m; struct socket *so; long available, size, size2; int coalesced, error; ICL_CONN_LOCK_ASSERT_NOT(ic); so = ic->ic_socket; SOCKBUF_LOCK(&so->so_snd); /* * Check how much space do we have for transmit. We can't just * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, * as it always frees the mbuf chain passed to it, even in case * of error. */ available = sbspace(&so->so_snd); isc->check_send_space = false; /* * Notify the socket upcall that we don't need wakeups * for the time being. */ so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; SOCKBUF_UNLOCK(&so->so_snd); while (!STAILQ_EMPTY(queue)) { request = STAILQ_FIRST(queue); size = icl_pdu_size(request); if (available < size) { /* * Set the low watermark, to be checked by * sowriteable() in icl_soupcall_send() * to avoid unnecessary wakeups until there * is enough space for the PDU to fit. */ SOCKBUF_LOCK(&so->so_snd); available = sbspace(&so->so_snd); if (available < size) { #if 1 ICL_DEBUG("no space to send; " "have %ld, need %ld", available, size); #endif so->so_snd.sb_lowat = max(size, so->so_snd.sb_hiwat / 8); SOCKBUF_UNLOCK(&so->so_snd); return; } SOCKBUF_UNLOCK(&so->so_snd); } STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); icl_soft_pdu_done(request, EIO); icl_conn_fail(ic); return; } if (coalesce) { m = request->ip_bhs_mbuf; for (coalesced = 1; ; coalesced++) { request2 = STAILQ_FIRST(queue); if (request2 == NULL) break; size2 = icl_pdu_size(request2); if (available < size + size2) break; STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request2); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); icl_soft_pdu_done(request, EIO); icl_soft_pdu_done(request2, EIO); icl_conn_fail(ic); return; } while (m->m_next) m = m->m_next; m_cat(m, request2->ip_bhs_mbuf); request2->ip_bhs_mbuf = NULL; request->ip_bhs_mbuf->m_pkthdr.len += size2; size += size2; icl_soft_pdu_done(request2, 0); } #if 0 if (coalesced > 1) { ICL_DEBUG("coalesced %d PDUs into %ld bytes", coalesced, size); } #endif } available -= size; error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, NULL, MSG_DONTWAIT, curthread); request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ if (error != 0) { ICL_DEBUG("failed to send PDU, error %d; " "dropping connection", error); icl_soft_pdu_done(request, error); icl_conn_fail(ic); return; } icl_soft_pdu_done(request, 0); } } static void icl_send_thread(void *arg) { struct icl_soft_conn *isc; struct icl_conn *ic; struct icl_pdu_stailq queue; isc = arg; ic = &isc->ic; STAILQ_INIT(&queue); ICL_CONN_LOCK(ic); for (;;) { for (;;) { /* * Populate the local queue from the main one. * This way the icl_conn_send_pdus() can go through * all the queued PDUs without holding any locks. */ if (STAILQ_EMPTY(&queue) || isc->check_send_space) STAILQ_CONCAT(&queue, &isc->to_send); ICL_CONN_UNLOCK(ic); icl_conn_send_pdus(isc, &queue); ICL_CONN_LOCK(ic); /* * The icl_soupcall_send() was called since the last * call to sbspace(); go around; */ if (isc->check_send_space) continue; /* * Local queue is empty, but we still have PDUs * in the main one; go around. */ if (STAILQ_EMPTY(&queue) && !STAILQ_EMPTY(&isc->to_send)) continue; /* * There might be some stuff in the local queue, * which didn't get sent due to not having enough send * space. Wait for socket upcall. */ break; } if (ic->ic_disconnecting) { //ICL_DEBUG("terminating"); break; } cv_wait(&isc->send_cv, ic->ic_lock); } /* * We're exiting; move PDUs back to the main queue, so they can * get freed properly. At this point ordering doesn't matter. */ STAILQ_CONCAT(&isc->to_send, &queue); isc->send_running = false; cv_signal(&isc->send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_send(struct socket *so, void *arg, int waitflag) { struct icl_soft_conn *isc; struct icl_conn *ic; if (!sowriteable(so)) return (SU_OK); isc = arg; ic = &isc->ic; ICL_CONN_LOCK(ic); isc->check_send_space = true; ICL_CONN_UNLOCK(ic); cv_signal(&isc->send_cv); return (SU_OK); } static void icl_soft_free_mext_pg(struct mbuf *m) { struct icl_soft_pdu *isp; M_ASSERTEXTPG(m); /* * Nothing to do for the pages; they are owned by the PDU / * I/O request. */ /* Drop reference on the PDU. */ isp = m->m_ext.ext_arg1; if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) icl_soft_pdu_call_cb(&isp->ip); } static int icl_soft_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, struct bio *bp, size_t offset, size_t len, int flags) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; struct mbuf *m, *m_tail; vm_offset_t vaddr; size_t mtodo, page_offset, todo; int i; KASSERT(len > 0, ("len == 0")); m_tail = request->ip_data_mbuf; if (m_tail != NULL) for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) ; MPASS(bp->bio_flags & BIO_UNMAPPED); if (offset < PAGE_SIZE - bp->bio_ma_offset) { page_offset = bp->bio_ma_offset + offset; i = 0; } else { offset -= PAGE_SIZE - bp->bio_ma_offset; for (i = 1; offset >= PAGE_SIZE; i++) offset -= PAGE_SIZE; page_offset = offset; } if (flags & ICL_NOCOPY) { m = NULL; while (len > 0) { if (m == NULL) { m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY, icl_soft_free_mext_pg); if (__predict_false(m == NULL)) return (ENOMEM); atomic_add_int(&isp->ref_cnt, 1); m->m_ext.ext_arg1 = isp; m->m_epg_1st_off = page_offset; } todo = MIN(len, PAGE_SIZE - page_offset); m->m_epg_pa[m->m_epg_npgs] = VM_PAGE_TO_PHYS(bp->bio_ma[i]); m->m_epg_npgs++; m->m_epg_last_len = todo; m->m_len += todo; m->m_ext.ext_size += PAGE_SIZE; MBUF_EXT_PGS_ASSERT_SANITY(m); if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) { if (m_tail != NULL) m_tail->m_next = m; else request->ip_data_mbuf = m; m_tail = m; request->ip_data_len += m->m_len; m = NULL; } page_offset = 0; len -= todo; i++; } if (m != NULL) { if (m_tail != NULL) m_tail->m_next = m; else request->ip_data_mbuf = m; request->ip_data_len += m->m_len; } return (0); } m = m_getm2(NULL, len, flags, MT_DATA, 0); if (__predict_false(m == NULL)) return (ENOMEM); if (request->ip_data_mbuf == NULL) { request->ip_data_mbuf = m; request->ip_data_len = len; } else { m_tail->m_next = m; request->ip_data_len += len; } while (len > 0) { todo = MIN(len, PAGE_SIZE - page_offset); vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); do { mtodo = min(todo, M_SIZE(m) - m->m_len); memcpy(mtod(m, char *) + m->m_len, (char *)vaddr + page_offset, mtodo); m->m_len += mtodo; if (m->m_len == M_SIZE(m)) m = m->m_next; page_offset += mtodo; todo -= mtodo; } while (todo > 0); page_offset = 0; len -= todo; i++; } return (0); } static int icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; struct mbuf *mb, *newmb; size_t copylen, off = 0; KASSERT(len > 0, ("len == 0")); if (flags & ICL_NOCOPY) { newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); if (newmb == NULL) { ICL_WARN("failed to allocate mbuf"); return (ENOMEM); } newmb->m_flags |= M_RDONLY; m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, icl_soft_mbuf_done, isp, NULL); newmb->m_len = len; } else { newmb = m_getm2(NULL, len, flags, MT_DATA, 0); if (newmb == NULL) { ICL_WARN("failed to allocate mbuf for %zd bytes", len); return (ENOMEM); } for (mb = newmb; mb != NULL; mb = mb->m_next) { copylen = min(M_TRAILINGSPACE(mb), len - off); memcpy(mtod(mb, char *), (const char *)addr + off, copylen); mb->m_len = copylen; off += copylen; } KASSERT(off == len, ("%s: off != len", __func__)); } if (request->ip_data_mbuf == NULL) { request->ip_data_mbuf = newmb; request->ip_data_len = len; } else { m_cat(request->ip_data_mbuf, newmb); request->ip_data_len += len; } return (0); } void icl_soft_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, size_t pdu_off, struct bio *bp, size_t bio_off, size_t len) { vm_offset_t vaddr; size_t page_offset, todo; int i; MPASS(bp->bio_flags & BIO_UNMAPPED); if (bio_off < PAGE_SIZE - bp->bio_ma_offset) { page_offset = bp->bio_ma_offset + bio_off; i = 0; } else { bio_off -= PAGE_SIZE - bp->bio_ma_offset; for (i = 1; bio_off >= PAGE_SIZE; i++) bio_off -= PAGE_SIZE; page_offset = bio_off; } while (len > 0) { todo = MIN(len, PAGE_SIZE - page_offset); vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i])); m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr + page_offset); page_offset = 0; pdu_off += todo; len -= todo; i++; } } void icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { m_copydata(ip->ip_data_mbuf, off, len, addr); } static void icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { icl_soft_conn_pdu_queue_cb(ic, ip, NULL); } static void icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, icl_pdu_cb cb) { struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; ICL_CONN_LOCK_ASSERT(ic); isp->ref_cnt++; isp->cb = cb; if (ic->ic_disconnecting || ic->ic_socket == NULL) { ICL_DEBUG("icl_pdu_queue on closed connection"); icl_soft_pdu_done(ip, ENOTCONN); return; } if (!STAILQ_EMPTY(&isc->to_send)) { STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); /* * If the queue is not empty, someone else had already * signaled the send thread; no need to do that again, * just return. */ return; } STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next); cv_signal(&isc->send_cv); } static struct icl_conn * icl_soft_new_conn(const char *name, struct mtx *lock) { struct icl_soft_conn *isc; struct icl_conn *ic; refcount_acquire(&icl_ncons); isc = (struct icl_soft_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); STAILQ_INIT(&isc->to_send); cv_init(&isc->send_cv, "icl_tx"); cv_init(&isc->receive_cv, "icl_rx"); ic = &isc->ic; ic->ic_lock = lock; #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif ic->ic_name = name; ic->ic_offload = "None"; ic->ic_unmapped = PMAP_HAS_DMAP; return (ic); } void icl_soft_conn_free(struct icl_conn *ic) { struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif cv_destroy(&isc->send_cv); cv_destroy(&isc->receive_cv); kobj_delete((struct kobj *)isc, M_ICL_SOFT); refcount_release(&icl_ncons); } static int icl_conn_start(struct icl_conn *ic) { struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; size_t minspace; struct sockopt opt; int error, one = 1; ICL_CONN_LOCK(ic); /* * XXX: Ugly hack. */ if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (EINVAL); } isc->receive_state = ICL_CONN_STATE_BHS; isc->receive_len = sizeof(struct iscsi_bhs); ic->ic_disconnecting = false; ICL_CONN_UNLOCK(ic); /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. */ minspace = sizeof(struct iscsi_bhs) + ic->ic_max_send_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (sendspace < minspace) { ICL_WARN("kern.icl.sendspace too low; must be at least %zd", minspace); sendspace = minspace; } minspace = sizeof(struct iscsi_bhs) + ic->ic_max_recv_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (recvspace < minspace) { ICL_WARN("kern.icl.recvspace too low; must be at least %zd", minspace); recvspace = minspace; } error = soreserve(ic->ic_socket, sendspace, recvspace); if (error != 0) { ICL_WARN("soreserve failed with error %d", error); icl_soft_conn_close(ic); return (error); } ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(ic->ic_socket, &opt); if (error != 0) { ICL_WARN("disabling TCP_NODELAY failed with error %d", error); icl_soft_conn_close(ic); return (error); } /* * Register socket upcall, to get notified about incoming PDUs * and free space to send outgoing ones. */ SOCKBUF_LOCK(&ic->ic_socket->so_snd); soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, isc); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, isc); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); /* * Start threads. */ ICL_CONN_LOCK(ic); isc->send_running = isc->receive_running = true; ICL_CONN_UNLOCK(ic); error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); isc->send_running = isc->receive_running = false; cv_signal(&isc->send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); isc->receive_running = false; cv_signal(&isc->send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } return (0); } int icl_soft_conn_handoff(struct icl_conn *ic, int fd) { struct file *fp; struct socket *so; cap_rights_t rights; int error; ICL_CONN_LOCK_ASSERT_NOT(ic); #ifdef ICL_KERNEL_PROXY /* * We're transitioning to Full Feature phase, and we don't * really care. */ if (fd == 0) { ICL_CONN_LOCK(ic); if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); ICL_WARN("proxy handoff without connect"); return (EINVAL); } ICL_CONN_UNLOCK(ic); return (0); } #endif /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_socket = fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } void icl_soft_conn_close(struct icl_conn *ic) { struct icl_soft_conn *isc = (struct icl_soft_conn *)ic; struct icl_pdu *pdu; struct socket *so; /* * Wake up the threads, so they can properly terminate. * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock. */ ICL_CONN_LOCK(ic); if (!ic->ic_disconnecting) { so = ic->ic_socket; if (so) SOCKBUF_LOCK(&so->so_rcv); ic->ic_disconnecting = true; if (so) SOCKBUF_UNLOCK(&so->so_rcv); } while (isc->receive_running || isc->send_running) { cv_signal(&isc->receive_cv); cv_signal(&isc->send_cv); cv_wait(&isc->send_cv, ic->ic_lock); } /* Some other thread could close the connection same time. */ so = ic->ic_socket; if (so == NULL) { ICL_CONN_UNLOCK(ic); return; } ic->ic_socket = NULL; /* * Deregister socket upcalls. */ ICL_CONN_UNLOCK(ic); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_upcall != NULL) soupcall_clear(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_upcall != NULL) soupcall_clear(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); soclose(so); ICL_CONN_LOCK(ic); if (isc->receive_pdu != NULL) { //ICL_DEBUG("freeing partially received PDU"); icl_soft_conn_pdu_free(ic, isc->receive_pdu); isc->receive_pdu = NULL; } /* * Remove any outstanding PDUs from the send queue. */ while (!STAILQ_EMPTY(&isc->to_send)) { pdu = STAILQ_FIRST(&isc->to_send); STAILQ_REMOVE_HEAD(&isc->to_send, ip_next); icl_soft_pdu_done(pdu, ENOTCONN); } KASSERT(STAILQ_EMPTY(&isc->to_send), ("destroying session with non-empty send queue")); ICL_CONN_UNLOCK(ic); } int icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { return (0); } void icl_soft_conn_task_done(struct icl_conn *ic, void *prv) { } int icl_soft_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { return (0); } void icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) { } static int -icl_soft_limits(struct icl_drv_limits *idl) +icl_soft_limits(struct icl_drv_limits *idl, int socket) { idl->idl_max_recv_data_segment_length = max_data_segment_length; idl->idl_max_send_data_segment_length = max_data_segment_length; idl->idl_max_burst_length = max_burst_length; idl->idl_first_burst_length = first_burst_length; return (0); } #ifdef ICL_KERNEL_PROXY int icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) { return (icl_soft_proxy_connect(ic, domain, socktype, protocol, from_sa, to_sa)); } int icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) { int error; ICL_CONN_LOCK_ASSERT_NOT(ic); if (so->so_type != SOCK_STREAM) return (EINVAL); ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); return (EBUSY); } ic->ic_socket = so; ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } #endif /* ICL_KERNEL_PROXY */ static int icl_soft_load(void) { int error; icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_ncons, 0); /* * The reason we call this "none" is that to the user, * it's known as "offload driver"; "offload driver: soft" * doesn't make much sense. */ error = icl_register("none", false, 0, icl_soft_limits, icl_soft_new_conn); KASSERT(error == 0, ("failed to register")); #if defined(ICL_KERNEL_PROXY) && 0 /* * Debugging aid for kernel proxy functionality. */ error = icl_register("proxytest", true, 0, icl_soft_limits, icl_soft_new_conn); KASSERT(error == 0, ("failed to register")); #endif return (error); } static int icl_soft_unload(void) { if (icl_ncons != 0) return (EBUSY); icl_unregister("none", false); #if defined(ICL_KERNEL_PROXY) && 0 icl_unregister("proxytest", true); #endif uma_zdestroy(icl_soft_pdu_zone); return (0); } static int icl_soft_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_soft_load()); case MOD_UNLOAD: return (icl_soft_unload()); default: return (EINVAL); } } moduledata_t icl_soft_data = { "icl_soft", icl_soft_modevent, 0 }; DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_soft, icl, 1, 1, 1); MODULE_VERSION(icl_soft, 1); diff --git a/sys/dev/iscsi/iscsi.c b/sys/dev/iscsi/iscsi.c index 9535aa9b9e2a..f7a585c02545 100644 --- a/sys/dev/iscsi/iscsi.c +++ b/sys/dev/iscsi/iscsi.c @@ -1,2763 +1,2833 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ICL_KERNEL_PROXY #include #endif #ifdef ICL_KERNEL_PROXY FEATURE(iscsi_kernel_proxy, "iSCSI initiator built with ICL_KERNEL_PROXY"); #endif +#ifdef COMPAT_FREEBSD13 +struct iscsi_daemon_request13 { + unsigned int idr_session_id; + struct iscsi_session_conf idr_conf; + uint8_t idr_isid[6]; + uint16_t idr_tsih; + uint16_t idr_spare_cid; + struct iscsi_session_limits idr_limits; + int idr_spare[4]; +}; + +#define ISCSIDWAIT13 _IOR('I', 0x01, struct iscsi_daemon_request13) +#endif + /* * XXX: This is global so the iscsi_unload() can access it. * Think about how to do this properly. */ static struct iscsi_softc *sc; SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iSCSI initiator"); static int debug = 1; SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 0, "Enable debug messages"); static int ping_timeout = 5; SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout, 0, "Timeout for ping (NOP-Out) requests, in seconds"); static int iscsid_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RWTUN, &iscsid_timeout, 0, "Time to wait for iscsid(8) to handle reconnection, in seconds"); static int login_timeout = 60; SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout, 0, "Time to wait for iscsid(8) to finish Login Phase, in seconds"); static int maxtags = 255; SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, 0, "Max number of IO requests queued"); static int fail_on_disconnection = 0; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, &fail_on_disconnection, 0, "Destroy CAM SIM on connection failure"); static int fail_on_shutdown = 1; SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_shutdown, CTLFLAG_RWTUN, &fail_on_shutdown, 0, "Fail disconnected sessions on shutdown"); static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator"); static uma_zone_t iscsi_outstanding_zone; #define CONN_SESSION(X) ((struct iscsi_session *)X->ic_prv0) #define PDU_SESSION(X) (CONN_SESSION(X->ip_conn)) #define ISCSI_DEBUG(X, ...) \ do { \ if (debug > 1) \ printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ } while (0) #define ISCSI_WARN(X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_DEBUG(S, X, ...) \ do { \ if (debug > 1) { \ printf("%s: %s (%s): " X "\n", \ __func__, S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_WARN(S, X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s (%s): " X "\n", \ S->is_conf.isc_target_addr, \ S->is_conf.isc_target, ## __VA_ARGS__); \ } \ } while (0) #define ISCSI_SESSION_LOCK(X) mtx_lock(&X->is_lock) #define ISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->is_lock) #define ISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->is_lock, MA_OWNED) #define ISCSI_SESSION_LOCK_ASSERT_NOT(X) mtx_assert(&X->is_lock, MA_NOTOWNED) static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td); static struct cdevsw iscsi_cdevsw = { .d_version = D_VERSION, .d_ioctl = iscsi_ioctl, .d_name = "iscsi", }; static void iscsi_pdu_queue_locked(struct icl_pdu *request); static void iscsi_pdu_queue(struct icl_pdu *request); static void iscsi_pdu_update_statsn(const struct icl_pdu *response); static void iscsi_pdu_handle_nop_in(struct icl_pdu *response); static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response); static void iscsi_pdu_handle_task_response(struct icl_pdu *response); static void iscsi_pdu_handle_data_in(struct icl_pdu *response); static void iscsi_pdu_handle_logout_response(struct icl_pdu *response); static void iscsi_pdu_handle_r2t(struct icl_pdu *response); static void iscsi_pdu_handle_async_message(struct icl_pdu *response); static void iscsi_pdu_handle_reject(struct icl_pdu *response); static void iscsi_session_reconnect(struct iscsi_session *is); static void iscsi_session_terminate(struct iscsi_session *is); static void iscsi_action(struct cam_sim *sim, union ccb *ccb); static struct iscsi_outstanding *iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag); static struct iscsi_outstanding *iscsi_outstanding_add(struct iscsi_session *is, struct icl_pdu *request, union ccb *ccb, uint32_t *initiator_task_tagp); static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io); static bool iscsi_pdu_prepare(struct icl_pdu *request) { struct iscsi_session *is; struct iscsi_bhs_scsi_command *bhssc; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the request * (initiator -> target) PDUs. */ bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; /* * Data-Out PDU does not contain CmdSN. */ if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { if (ISCSI_SNGT(is->is_cmdsn, is->is_maxcmdsn) && (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) { /* * Current MaxCmdSN prevents us from sending any more * SCSI Command PDUs to the target; postpone the PDU. * It will get resent by either iscsi_pdu_queue(), * or by maintenance thread. */ #if 0 ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %u, " "ExpCmdSN %u, MaxCmdSN %u, opcode 0x%x", is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn, bhssc->bhssc_opcode); #endif return (true); } bhssc->bhssc_cmdsn = htonl(is->is_cmdsn); if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) is->is_cmdsn++; } bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1); return (false); } static void iscsi_session_send_postponed(struct iscsi_session *is) { struct icl_pdu *request; bool postpone; ISCSI_SESSION_LOCK_ASSERT(is); if (STAILQ_EMPTY(&is->is_postponed)) return; while ((request = STAILQ_FIRST(&is->is_postponed)) != NULL) { postpone = iscsi_pdu_prepare(request); if (postpone) return; STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_queue(request); } xpt_release_simq(is->is_sim, 1); } static void iscsi_pdu_queue_locked(struct icl_pdu *request) { struct iscsi_session *is; bool postpone; is = PDU_SESSION(request); ISCSI_SESSION_LOCK_ASSERT(is); iscsi_session_send_postponed(is); postpone = iscsi_pdu_prepare(request); if (postpone) { if (STAILQ_EMPTY(&is->is_postponed)) xpt_freeze_simq(is->is_sim, 1); STAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next); return; } icl_pdu_queue(request); } static void iscsi_pdu_queue(struct icl_pdu *request) { struct iscsi_session *is; is = PDU_SESSION(request); ISCSI_SESSION_LOCK(is); iscsi_pdu_queue_locked(request); ISCSI_SESSION_UNLOCK(is); } static void iscsi_session_logout(struct iscsi_session *is) { struct icl_pdu *request; struct iscsi_bhs_logout_request *bhslr; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) return; bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST; bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION; iscsi_pdu_queue_locked(request); } static void iscsi_session_terminate_task(struct iscsi_session *is, struct iscsi_outstanding *io, cam_status status) { ISCSI_SESSION_LOCK_ASSERT(is); if (io->io_ccb != NULL) { io->io_ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK); io->io_ccb->ccb_h.status |= status; if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN; xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } xpt_done(io->io_ccb); } iscsi_outstanding_remove(is, io); } static void iscsi_session_terminate_tasks(struct iscsi_session *is, cam_status status) { struct iscsi_outstanding *io, *tmp; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) { iscsi_session_terminate_task(is, io, status); } } static void iscsi_session_cleanup(struct iscsi_session *is, bool destroy_sim) { struct icl_pdu *pdu; ISCSI_SESSION_LOCK_ASSERT(is); /* * Don't queue any new PDUs. */ if (is->is_sim != NULL && is->is_simq_frozen == false) { ISCSI_SESSION_DEBUG(is, "freezing"); xpt_freeze_simq(is->is_sim, 1); is->is_simq_frozen = true; } /* * Remove postponed PDUs. */ if (!STAILQ_EMPTY(&is->is_postponed)) xpt_release_simq(is->is_sim, 1); while ((pdu = STAILQ_FIRST(&is->is_postponed)) != NULL) { STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next); icl_pdu_free(pdu); } if (destroy_sim == false) { /* * Terminate SCSI tasks, asking CAM to requeue them. */ iscsi_session_terminate_tasks(is, CAM_REQUEUE_REQ); return; } iscsi_session_terminate_tasks(is, CAM_DEV_NOT_THERE); if (is->is_sim == NULL) return; ISCSI_SESSION_DEBUG(is, "deregistering SIM"); xpt_async(AC_LOST_DEVICE, is->is_path, NULL); if (is->is_simq_frozen) { is->is_simq_frozen = false; xpt_release_simq(is->is_sim, 1); } xpt_free_path(is->is_path); is->is_path = NULL; xpt_bus_deregister(cam_sim_path(is->is_sim)); cam_sim_free(is->is_sim, TRUE /*free_devq*/); is->is_sim = NULL; is->is_devq = NULL; } static void iscsi_maintenance_thread_reconnect(struct iscsi_session *is) { /* * As we will be reconnecting shortly, * discard outstanding data immediately on * close(), also notify peer via RST if * any packets come in. */ struct socket *so; so = is->is_conn->ic_socket; if (so != NULL) { struct sockopt sopt; struct linger sl; sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_LINGER; sopt.sopt_val = &sl; sopt.sopt_valsize = sizeof(sl); sl.l_onoff = 1; /* non-zero value enables linger option in kernel */ sl.l_linger = 0; /* timeout interval in seconds */ sosetopt(is->is_conn->ic_socket, &sopt); } icl_conn_close(is->is_conn); ISCSI_SESSION_LOCK(is); is->is_connected = false; is->is_reconnecting = false; is->is_login_phase = false; #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif if (fail_on_disconnection) { ISCSI_SESSION_DEBUG(is, "connection failed, destroying devices"); iscsi_session_cleanup(is, true); } else { iscsi_session_cleanup(is, false); } KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); if (is->is_conf.isc_enable == 0 && is->is_conf.isc_discovery == 0) { ISCSI_SESSION_UNLOCK(is); return; } /* * Request immediate reconnection from iscsid(8). */ //ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)"); is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_softc->sc_cv); } static void iscsi_maintenance_thread_terminate(struct iscsi_session *is) { struct iscsi_softc *sc; sc = is->is_softc; sx_xlock(&sc->sc_lock); TAILQ_REMOVE(&sc->sc_sessions, is, is_next); sx_xunlock(&sc->sc_lock); icl_conn_close(is->is_conn); callout_drain(&is->is_callout); ISCSI_SESSION_LOCK(is); KASSERT(is->is_terminating, ("is_terminating == false")); #ifdef ICL_KERNEL_PROXY if (is->is_login_pdu != NULL) { icl_pdu_free(is->is_login_pdu); is->is_login_pdu = NULL; } cv_signal(&is->is_login_cv); #endif iscsi_session_cleanup(is, true); KASSERT(TAILQ_EMPTY(&is->is_outstanding), ("destroying session with active tasks")); KASSERT(STAILQ_EMPTY(&is->is_postponed), ("destroying session with postponed PDUs")); ISCSI_SESSION_UNLOCK(is); icl_conn_free(is->is_conn); mtx_destroy(&is->is_lock); cv_destroy(&is->is_maintenance_cv); #ifdef ICL_KERNEL_PROXY cv_destroy(&is->is_login_cv); #endif ISCSI_SESSION_DEBUG(is, "terminated"); free(is, M_ISCSI); /* * The iscsi_unload() routine might be waiting. */ cv_signal(&sc->sc_cv); } static void iscsi_maintenance_thread(void *arg) { struct iscsi_session *is = arg; ISCSI_SESSION_LOCK(is); for (;;) { if (is->is_reconnecting == false && is->is_terminating == false && (STAILQ_EMPTY(&is->is_postponed) || ISCSI_SNGT(is->is_cmdsn, is->is_maxcmdsn))) cv_wait(&is->is_maintenance_cv, &is->is_lock); /* Terminate supersedes reconnect. */ if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_terminate(is); kthread_exit(); return; } if (is->is_reconnecting) { ISCSI_SESSION_UNLOCK(is); iscsi_maintenance_thread_reconnect(is); ISCSI_SESSION_LOCK(is); continue; } iscsi_session_send_postponed(is); } ISCSI_SESSION_UNLOCK(is); } static void iscsi_session_reconnect(struct iscsi_session *is) { /* * XXX: We can't use locking here, because * it's being called from various contexts. * Hope it doesn't break anything. */ if (is->is_reconnecting) return; is->is_reconnecting = true; cv_signal(&is->is_maintenance_cv); } static void iscsi_session_terminate(struct iscsi_session *is) { if (is->is_terminating) return; is->is_terminating = true; #if 0 iscsi_session_logout(is); #endif cv_signal(&is->is_maintenance_cv); } static void iscsi_callout(void *context) { struct icl_pdu *request; struct iscsi_bhs_nop_out *bhsno; struct iscsi_session *is; bool reconnect_needed = false; sbintime_t sbt, pr; is = context; ISCSI_SESSION_LOCK(is); if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); return; } sbt = mstosbt(995); pr = mstosbt(10); callout_schedule_sbt(&is->is_callout, sbt, pr, 0); if (is->is_conf.isc_enable == 0) goto out; is->is_timeout++; if (is->is_waiting_for_iscsid) { if (iscsid_timeout > 0 && is->is_timeout > iscsid_timeout) { ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) " "for %d seconds; reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (is->is_login_phase) { if (is->is_login_timeout > 0 && is->is_timeout > is->is_login_timeout) { ISCSI_SESSION_WARN(is, "login timed out after %d seconds; " "reconnecting", is->is_timeout); reconnect_needed = true; } goto out; } if (is->is_ping_timeout <= 0) { /* * Pings are disabled. Don't send NOP-Out in this case. * Reset the timeout, to avoid triggering reconnection, * should the user decide to reenable them. */ is->is_timeout = 0; goto out; } if (is->is_timeout >= is->is_ping_timeout) { ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; " "reconnecting", is->is_ping_timeout); reconnect_needed = true; goto out; } ISCSI_SESSION_UNLOCK(is); /* * If the ping was reset less than one second ago - which means * that we've received some PDU during the last second - assume * the traffic flows correctly and don't bother sending a NOP-Out. * * (It's 2 - one for one second, and one for incrementing is_timeout * earlier in this routine.) */ if (is->is_timeout < 2) return; request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate PDU"); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_target_transfer_tag = 0xffffffff; iscsi_pdu_queue(request); return; out: if (is->is_terminating) { ISCSI_SESSION_UNLOCK(is); return; } ISCSI_SESSION_UNLOCK(is); if (reconnect_needed) iscsi_session_reconnect(is); } static void iscsi_pdu_update_statsn(const struct icl_pdu *response) { const struct iscsi_bhs_data_in *bhsdi; struct iscsi_session *is; uint32_t expcmdsn, maxcmdsn, statsn; is = PDU_SESSION(response); ISCSI_SESSION_LOCK_ASSERT(is); /* * We're only using fields common for all the response * (target -> initiator) PDUs. */ bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs; /* * Ok, I lied. In case of Data-In, "The fields StatSN, Status, * and Residual Count only have meaningful content if the S bit * is set to 1", so we also need to check the bit specific for * Data-In PDU. */ if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) { statsn = ntohl(bhsdi->bhsdi_statsn); if (statsn != is->is_statsn && statsn != (is->is_statsn + 1)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_WARN(is, "PDU 0x%x StatSN %u != " "session ExpStatSN %u (or + 1); reconnecting", bhsdi->bhsdi_opcode, statsn, is->is_statsn); iscsi_session_reconnect(is); } if (ISCSI_SNGT(statsn, is->is_statsn)) is->is_statsn = statsn; } expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn); maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn); if (ISCSI_SNLT(maxcmdsn + 1, expcmdsn)) { ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u + 1 < PDU ExpCmdSN %u; ignoring", maxcmdsn, expcmdsn); } else { if (ISCSI_SNGT(maxcmdsn, is->is_maxcmdsn)) { is->is_maxcmdsn = maxcmdsn; /* * Command window increased; kick the maintanance thread * to send out postponed commands. */ if (!STAILQ_EMPTY(&is->is_postponed)) cv_signal(&is->is_maintenance_cv); } else if (ISCSI_SNLT(maxcmdsn, is->is_maxcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %u < session MaxCmdSN %u; ignoring", maxcmdsn, is->is_maxcmdsn); } if (ISCSI_SNGT(expcmdsn, is->is_expcmdsn)) { is->is_expcmdsn = expcmdsn; } else if (ISCSI_SNLT(expcmdsn, is->is_expcmdsn)) { /* XXX: This is normal situation for MCS */ ISCSI_SESSION_DEBUG(is, "PDU ExpCmdSN %u < session ExpCmdSN %u; ignoring", expcmdsn, is->is_expcmdsn); } } /* * Every incoming PDU - not just NOP-In - resets the ping timer. * The purpose of the timeout is to reset the connection when it stalls; * we don't want this to happen when NOP-In or NOP-Out ends up delayed * in some queue. */ is->is_timeout = 0; } static void iscsi_receive_callback(struct icl_pdu *response) { struct iscsi_session *is; is = PDU_SESSION(response); ISCSI_SESSION_LOCK(is); iscsi_pdu_update_statsn(response); #ifdef ICL_KERNEL_PROXY if (is->is_login_phase) { if (is->is_login_pdu == NULL) is->is_login_pdu = response; else icl_pdu_free(response); ISCSI_SESSION_UNLOCK(is); cv_signal(&is->is_login_cv); return; } #endif /* * The handling routine is responsible for freeing the PDU * when it's no longer needed. */ switch (response->ip_bhs->bhs_opcode) { case ISCSI_BHS_OPCODE_NOP_IN: iscsi_pdu_handle_nop_in(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_RESPONSE: iscsi_pdu_handle_scsi_response(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_TASK_RESPONSE: iscsi_pdu_handle_task_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_SCSI_DATA_IN: iscsi_pdu_handle_data_in(response); /* Session lock dropped inside. */ ISCSI_SESSION_LOCK_ASSERT_NOT(is); break; case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE: iscsi_pdu_handle_logout_response(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_R2T: iscsi_pdu_handle_r2t(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_ASYNC_MESSAGE: iscsi_pdu_handle_async_message(response); ISCSI_SESSION_UNLOCK(is); break; case ISCSI_BHS_OPCODE_REJECT: iscsi_pdu_handle_reject(response); ISCSI_SESSION_UNLOCK(is); break; default: ISCSI_SESSION_WARN(is, "received PDU with unsupported " "opcode 0x%x; reconnecting", response->ip_bhs->bhs_opcode); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); } } static void iscsi_error_callback(struct icl_conn *ic) { struct iscsi_session *is; is = CONN_SESSION(ic); ISCSI_SESSION_WARN(is, "connection error; reconnecting"); iscsi_session_reconnect(is); } static void iscsi_pdu_handle_nop_in(struct icl_pdu *response) { struct iscsi_session *is; struct iscsi_bhs_nop_out *bhsno; struct iscsi_bhs_nop_in *bhsni; struct icl_pdu *request; void *data = NULL; size_t datasize; int error; is = PDU_SESSION(response); bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; if (bhsni->bhsni_target_transfer_tag == 0xffffffff) { /* * Nothing to do; iscsi_pdu_update_statsn() already * zeroed the timeout. */ icl_pdu_free(response); return; } datasize = icl_pdu_data_segment_length(response); if (datasize > 0) { data = malloc(datasize, M_ISCSI, M_NOWAIT | M_ZERO); if (data == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } icl_pdu_get_data(response, 0, data, datasize); } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | ISCSI_BHS_OPCODE_IMMEDIATE; bhsno->bhsno_flags = 0x80; bhsno->bhsno_initiator_task_tag = 0xffffffff; bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag; if (datasize > 0) { error = icl_pdu_append_data(request, data, datasize, M_NOWAIT); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); free(data, M_ISCSI); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } free(data, M_ISCSI); } icl_pdu_free(response); iscsi_pdu_queue_locked(request); } static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response) { struct iscsi_bhs_scsi_response *bhssr; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received; uint16_t sense_len; uint32_t resid; is = PDU_SESSION(response); bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; if (bhssr->bhssr_response == BHSSR_RESPONSE_COMMAND_COMPLETED) { if (ntohl(bhssr->bhssr_expdatasn) != io->io_datasn) { ISCSI_SESSION_WARN(is, "ExpDataSN mismatch in SCSI Response (%u vs %u)", ntohl(bhssr->bhssr_expdatasn), io->io_datasn); /* * XXX: Permit an ExpDataSN of zero for errors. * * This doesn't conform to RFC 7143, but some * targets seem to do this. */ if (bhssr->bhssr_status != 0 && bhssr->bhssr_expdatasn == htonl(0)) goto skip_expdatasn; icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } } else { if (bhssr->bhssr_expdatasn != htonl(0)) { ISCSI_SESSION_WARN(is, "ExpDataSN mismatch in SCSI Response (%u vs 0)", ntohl(bhssr->bhssr_expdatasn)); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } } skip_expdatasn: /* * With iSER, after getting good response we can be sure * that all the data has been successfully transferred. */ if (is->is_conn->ic_iser) { resid = ntohl(bhssr->bhssr_residual_count); if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW) { io->io_received = ccb->csio.dxfer_len - resid; } else if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_OVERFLOW) { ISCSI_SESSION_WARN(is, "overflow: target indicates %d", resid); } else { io->io_received = ccb->csio.dxfer_len; } } received = io->io_received; iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) { ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; } else if (bhssr->bhssr_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; ccb->csio.scsi_status = bhssr->bhssr_status; } csio = &ccb->csio; data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len > 0) { if (data_segment_len < sizeof(sense_len)) { ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)", data_segment_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len)); sense_len = ntohs(sense_len); #if 0 ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd", sense_len, data_segment_len); #endif if (sizeof(sense_len) + sense_len > data_segment_len) { ISCSI_SESSION_WARN(is, "truncated data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; goto out; } else if (sizeof(sense_len) + sense_len < data_segment_len) ISCSI_SESSION_WARN(is, "oversize data segment " "(%zd bytes, should be %zd)", data_segment_len, sizeof(sense_len) + sense_len); if (sense_len > csio->sense_len) { ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d", sense_len, csio->sense_len); sense_len = csio->sense_len; } icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len); csio->sense_resid = csio->sense_len - sense_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } out: if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW) csio->resid = ntohl(bhssr->bhssr_residual_count); if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_task_response(struct icl_pdu *response) { struct iscsi_bhs_task_management_response *bhstmr; struct iscsi_outstanding *io, *aio; struct iscsi_session *is; is = PDU_SESSION(response); bhstmr = (struct iscsi_bhs_task_management_response *)response->ip_bhs; io = iscsi_outstanding_find(is, bhstmr->bhstmr_initiator_task_tag); if (io == NULL || io->io_ccb != NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhstmr->bhstmr_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } if (bhstmr->bhstmr_response != BHSTMR_RESPONSE_FUNCTION_COMPLETE) { ISCSI_SESSION_WARN(is, "task response 0x%x", bhstmr->bhstmr_response); } else { aio = iscsi_outstanding_find(is, io->io_referenced_task_tag); if (aio != NULL && aio->io_ccb != NULL) iscsi_session_terminate_task(is, aio, CAM_REQ_ABORTED); } iscsi_outstanding_remove(is, io); icl_pdu_free(response); } static void iscsi_pdu_get_data_csio(struct icl_pdu *response, size_t pdu_offset, struct ccb_scsiio *csio, size_t oreceived, size_t data_segment_len) { switch (csio->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_BIO: icl_pdu_get_bio(response, pdu_offset, (struct bio *)csio->data_ptr, oreceived, data_segment_len); break; case CAM_DATA_VADDR: icl_pdu_get_data(response, pdu_offset, csio->data_ptr + oreceived, data_segment_len); break; default: __assert_unreachable(); } } static void iscsi_pdu_handle_data_in(struct icl_pdu *response) { struct iscsi_bhs_data_in *bhsdi; struct iscsi_outstanding *io; struct iscsi_session *is; union ccb *ccb; struct ccb_scsiio *csio; size_t data_segment_len, received, oreceived; is = PDU_SESSION(response); bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } if (io->io_datasn != ntohl(bhsdi->bhsdi_datasn)) { ISCSI_SESSION_WARN(is, "received Data-In PDU with " "DataSN %u, while expected %u; dropping connection", ntohl(bhsdi->bhsdi_datasn), io->io_datasn); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } io->io_datasn += response->ip_additional_pdus + 1; data_segment_len = icl_pdu_data_segment_length(response); if (data_segment_len == 0) { /* * "The sending of 0 length data segments should be avoided, * but initiators and targets MUST be able to properly receive * 0 length data segments." */ ISCSI_SESSION_UNLOCK(is); icl_pdu_free(response); return; } /* * We need to track this for security reasons - without it, malicious target * could respond to SCSI READ without sending Data-In PDUs, which would result * in read operation on the initiator side returning random kernel data. */ if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) { ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd", io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset)); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } ccb = io->io_ccb; csio = &ccb->csio; if (io->io_received + data_segment_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes " "at offset %zd, buffer is %d)", data_segment_len, io->io_received, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); return; } oreceived = io->io_received; io->io_received += data_segment_len; received = io->io_received; if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) iscsi_outstanding_remove(is, io); ISCSI_SESSION_UNLOCK(is); iscsi_pdu_get_data_csio(response, 0, csio, oreceived, data_segment_len); /* * XXX: Check F. */ if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) == 0) { /* * Nothing more to do. */ icl_pdu_free(response); return; } //ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status); if (bhsdi->bhsdi_status == 0) { ccb->ccb_h.status = CAM_REQ_CMP; } else { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; csio->scsi_status = bhsdi->bhsdi_status; } if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { KASSERT(received <= csio->dxfer_len, ("received > csio->dxfer_len")); if (received < csio->dxfer_len) { csio->resid = ntohl(bhsdi->bhsdi_residual_count); if (csio->resid != csio->dxfer_len - received) { ISCSI_SESSION_WARN(is, "underflow mismatch: " "target indicates %d, we calculated %zd", csio->resid, csio->dxfer_len - received); } csio->resid = csio->dxfer_len - received; } } xpt_done(ccb); icl_pdu_free(response); } static void iscsi_pdu_handle_logout_response(struct icl_pdu *response) { ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response"); icl_pdu_free(response); } static int iscsi_pdu_append_data_csio(struct icl_pdu *request, struct ccb_scsiio *csio, size_t off, size_t len, int how) { switch (csio->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_BIO: return (icl_pdu_append_bio(request, (struct bio *)csio->data_ptr, off, len, how)); case CAM_DATA_VADDR: return (icl_pdu_append_data(request, csio->data_ptr + off, len, how)); default: __assert_unreachable(); } } static void iscsi_pdu_handle_r2t(struct icl_pdu *response) { struct icl_pdu *request; struct iscsi_session *is; struct iscsi_bhs_r2t *bhsr2t; struct iscsi_bhs_data_out *bhsdo; struct iscsi_outstanding *io; struct ccb_scsiio *csio; size_t off, len, max_send_data_segment_length, total_len; int error; uint32_t datasn = 0; is = PDU_SESSION(response); bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag); if (io == NULL || io->io_ccb == NULL) { ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting", bhsr2t->bhsr2t_initiator_task_tag); icl_pdu_free(response); iscsi_session_reconnect(is); return; } csio = &io->io_ccb->csio; if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) { ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting"); icl_pdu_free(response); iscsi_session_reconnect(is); return; } /* * XXX: Verify R2TSN. */ off = ntohl(bhsr2t->bhsr2t_buffer_offset); if (off > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid offset " "%zd, buffer is %d; reconnecting", off, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length); if (total_len == 0 || total_len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid length " "%zd, buffer is %d; reconnecting", total_len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } //ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len); if (is->is_conn->ic_hw_isomax != 0) max_send_data_segment_length = is->is_conn->ic_hw_isomax; else max_send_data_segment_length = is->is_conn->ic_max_send_data_segment_length; for (;;) { len = total_len; if (len > max_send_data_segment_length) len = max_send_data_segment_length; if (off + len > csio->dxfer_len) { ISCSI_SESSION_WARN(is, "target requested invalid " "length/offset %zd, buffer is %d; reconnecting", off + len, csio->dxfer_len); icl_pdu_free(response); iscsi_session_reconnect(is); return; } request = icl_pdu_new(response->ip_conn, M_NOWAIT); if (request == NULL) { icl_pdu_free(response); iscsi_session_reconnect(is); return; } bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT; bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun; bhsdo->bhsdo_initiator_task_tag = bhsr2t->bhsr2t_initiator_task_tag; bhsdo->bhsdo_target_transfer_tag = bhsr2t->bhsr2t_target_transfer_tag; bhsdo->bhsdo_datasn = htonl(datasn); bhsdo->bhsdo_buffer_offset = htonl(off); error = iscsi_pdu_append_data_csio(request, csio, off, len, M_NOWAIT | ICL_NOCOPY); if (error != 0) { ISCSI_SESSION_WARN(is, "failed to allocate memory; " "reconnecting"); icl_pdu_free(request); icl_pdu_free(response); iscsi_session_reconnect(is); return; } datasn += howmany(len, is->is_conn->ic_max_send_data_segment_length); off += len; total_len -= len; if (total_len == 0) { bhsdo->bhsdo_flags |= BHSDO_FLAGS_F; //ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off); } else { //ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off); } iscsi_pdu_queue_locked(request); if (total_len == 0) break; } icl_pdu_free(response); } static void iscsi_pdu_handle_async_message(struct icl_pdu *response) { struct iscsi_bhs_asynchronous_message *bhsam; struct iscsi_session *is; is = PDU_SESSION(response); bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; switch (bhsam->bhsam_async_event) { case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT: ISCSI_SESSION_WARN(is, "target requests logout; removing session"); iscsi_session_logout(is); iscsi_session_terminate(is); break; case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION: ISCSI_SESSION_WARN(is, "target indicates it will drop the connection"); break; case BHSAM_EVENT_TARGET_TERMINATES_SESSION: ISCSI_SESSION_WARN(is, "target indicates it will drop the session"); break; default: /* * XXX: Technically, we're obligated to also handle * parameter renegotiation. */ ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event); break; } icl_pdu_free(response); } static void iscsi_pdu_handle_reject(struct icl_pdu *response) { struct iscsi_bhs_reject *bhsr; struct iscsi_session *is; is = PDU_SESSION(response); bhsr = (struct iscsi_bhs_reject *)response->ip_bhs; ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?", bhsr->bhsr_reason); icl_pdu_free(response); } static int iscsi_ioctl_daemon_wait(struct iscsi_softc *sc, - struct iscsi_daemon_request *request) + struct iscsi_daemon_request *request, bool freebsd13) { struct iscsi_session *is; - struct icl_drv_limits idl; int error; sx_slock(&sc->sc_lock); for (;;) { TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_conf.isc_enable == 0 && is->is_conf.isc_discovery == 0) { ISCSI_SESSION_UNLOCK(is); continue; } if (is->is_waiting_for_iscsid) break; ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { if (sc->sc_unloading) { sx_sunlock(&sc->sc_lock); return (ENXIO); } /* * No session requires attention from iscsid(8); wait. */ error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } continue; } is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); request->idr_session_id = is->is_id; memcpy(&request->idr_isid, &is->is_isid, sizeof(request->idr_isid)); request->idr_tsih = 0; /* New or reinstated session. */ memcpy(&request->idr_conf, &is->is_conf, sizeof(request->idr_conf)); - error = icl_limits(is->is_conf.isc_offload, - is->is_conf.isc_iser, &idl); - if (error != 0) { - ISCSI_SESSION_WARN(is, "icl_limits for offload \"%s\" " - "failed with error %d", is->is_conf.isc_offload, - error); - sx_sunlock(&sc->sc_lock); - return (error); - } - request->idr_limits.isl_max_recv_data_segment_length = - idl.idl_max_recv_data_segment_length; - request->idr_limits.isl_max_send_data_segment_length = - idl.idl_max_send_data_segment_length; - request->idr_limits.isl_max_burst_length = - idl.idl_max_burst_length; - request->idr_limits.isl_first_burst_length = - idl.idl_first_burst_length; +#ifdef COMPAT_FREEBSD13 + if (freebsd13) { + struct icl_drv_limits idl; + struct iscsi_daemon_request13 *request13; + error = icl_limits(is->is_conf.isc_offload, + is->is_conf.isc_iser, 0, &idl); + if (error != 0) { + ISCSI_SESSION_WARN(is, "icl_limits for " + "offload \"%s\" failed with error %d", + is->is_conf.isc_offload, error); + sx_sunlock(&sc->sc_lock); + return (error); + } + request13 = (struct iscsi_daemon_request13 *)request; + request13->idr_limits.isl_max_recv_data_segment_length = + idl.idl_max_recv_data_segment_length; + request13->idr_limits.isl_max_send_data_segment_length = + idl.idl_max_send_data_segment_length; + request13->idr_limits.isl_max_burst_length = + idl.idl_max_burst_length; + request13->idr_limits.isl_first_burst_length = + idl.idl_first_burst_length; + } +#endif sx_sunlock(&sc->sc_lock); return (0); } } +static int +iscsi_ioctl_daemon_limits(struct iscsi_softc *sc, + struct iscsi_daemon_limits *limits) +{ + struct icl_drv_limits idl; + struct iscsi_session *is; + int error; + + sx_slock(&sc->sc_lock); + + /* + * Find the session to fetch limits for. + */ + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_id == limits->idl_session_id) + break; + } + if (is == NULL) { + sx_sunlock(&sc->sc_lock); + return (ESRCH); + } + + error = icl_limits(is->is_conf.isc_offload, is->is_conf.isc_iser, + limits->idl_socket, &idl); + sx_sunlock(&sc->sc_lock); + if (error != 0) { + ISCSI_SESSION_WARN(is, "icl_limits for offload \"%s\" " + "failed with error %d", is->is_conf.isc_offload, error); + return (error); + } + limits->idl_limits.isl_max_recv_data_segment_length = + idl.idl_max_recv_data_segment_length; + limits->idl_limits.isl_max_send_data_segment_length = + idl.idl_max_send_data_segment_length; + limits->idl_limits.isl_max_burst_length = + idl.idl_max_burst_length; + limits->idl_limits.isl_first_burst_length = + idl.idl_first_burst_length; + + return (0); +} + static int iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc, struct iscsi_daemon_handoff *handoff) { struct iscsi_session *is; struct icl_conn *ic; int error; sx_slock(&sc->sc_lock); /* * Find the session to hand off socket to. */ TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == handoff->idh_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ic = is->is_conn; if (is->is_conf.isc_discovery || is->is_terminating) { ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EINVAL); } if (is->is_connected) { /* * This might have happened because another iscsid(8) * instance handed off the connection in the meantime. * Just return. */ ISCSI_SESSION_WARN(is, "handoff on already connected " "session"); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (EBUSY); } strlcpy(is->is_target_alias, handoff->idh_target_alias, sizeof(is->is_target_alias)); is->is_tsih = handoff->idh_tsih; is->is_statsn = handoff->idh_statsn; is->is_protocol_level = handoff->idh_protocol_level; is->is_initial_r2t = handoff->idh_initial_r2t; is->is_immediate_data = handoff->idh_immediate_data; ic->ic_max_recv_data_segment_length = handoff->idh_max_recv_data_segment_length; ic->ic_max_send_data_segment_length = handoff->idh_max_send_data_segment_length; is->is_max_burst_length = handoff->idh_max_burst_length; is->is_first_burst_length = handoff->idh_first_burst_length; if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C) ic->ic_header_crc32c = true; else ic->ic_header_crc32c = false; if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C) ic->ic_data_crc32c = true; else ic->ic_data_crc32c = false; ic->ic_maxtags = maxtags; is->is_cmdsn = 0; is->is_expcmdsn = 0; is->is_maxcmdsn = 0; is->is_waiting_for_iscsid = false; is->is_login_phase = false; is->is_timeout = 0; is->is_ping_timeout = is->is_conf.isc_ping_timeout; if (is->is_ping_timeout < 0) is->is_ping_timeout = ping_timeout; is->is_login_timeout = is->is_conf.isc_login_timeout; if (is->is_login_timeout < 0) is->is_login_timeout = login_timeout; is->is_connected = true; is->is_reason[0] = '\0'; ISCSI_SESSION_UNLOCK(is); /* * If we're going through the proxy, the idh_socket will be 0, * and the ICL module can simply ignore this call. It can also * use it to determine it's no longer in the Login phase. */ error = icl_conn_handoff(ic, handoff->idh_socket); if (error != 0) { sx_sunlock(&sc->sc_lock); iscsi_session_terminate(is); return (error); } sx_sunlock(&sc->sc_lock); if (is->is_sim != NULL) { /* * When reconnecting, there already is SIM allocated for the session. */ KASSERT(is->is_simq_frozen, ("reconnect without frozen simq")); ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "releasing"); is->is_simq_frozen = false; xpt_release_simq(is->is_sim, 1); ISCSI_SESSION_UNLOCK(is); } else { ISCSI_SESSION_LOCK(is); is->is_devq = cam_simq_alloc(ic->ic_maxtags); if (is->is_devq == NULL) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to allocate simq"); iscsi_session_terminate(is); return (ENOMEM); } is->is_sim = cam_sim_alloc(iscsi_action, NULL, "iscsi", is, is->is_id /* unit */, &is->is_lock, 1, ic->ic_maxtags, is->is_devq); if (is->is_sim == NULL) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to allocate SIM"); cam_simq_free(is->is_devq); iscsi_session_terminate(is); return (ENOMEM); } if (xpt_bus_register(is->is_sim, NULL, 0) != 0) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to register bus"); iscsi_session_terminate(is); return (ENOMEM); } error = xpt_create_path(&is->is_path, /*periph*/NULL, cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD); if (error != CAM_REQ_CMP) { ISCSI_SESSION_UNLOCK(is); ISCSI_SESSION_WARN(is, "failed to create path"); iscsi_session_terminate(is); return (ENOMEM); } ISCSI_SESSION_UNLOCK(is); } return (0); } static int iscsi_ioctl_daemon_fail(struct iscsi_softc *sc, struct iscsi_daemon_fail *fail) { struct iscsi_session *is; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == fail->idf_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } ISCSI_SESSION_LOCK(is); ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s", fail->idf_reason); strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason)); //is->is_waiting_for_iscsid = false; //is->is_login_phase = true; //iscsi_session_reconnect(is); ISCSI_SESSION_UNLOCK(is); sx_sunlock(&sc->sc_lock); return (0); } #ifdef ICL_KERNEL_PROXY static int iscsi_ioctl_daemon_connect(struct iscsi_softc *sc, struct iscsi_daemon_connect *idc) { struct iscsi_session *is; struct sockaddr *from_sa, *to_sa; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idc->idc_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (idc->idc_from_addrlen > 0) { error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); return (error); } } else { from_sa = NULL; } error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen); if (error != 0) { ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d", error); free(from_sa, M_SONAME); return (error); } ISCSI_SESSION_LOCK(is); is->is_statsn = 0; is->is_cmdsn = 0; is->is_expcmdsn = 0; is->is_maxcmdsn = 0; is->is_waiting_for_iscsid = false; is->is_login_phase = true; is->is_timeout = 0; ISCSI_SESSION_UNLOCK(is); error = icl_conn_connect(is->is_conn, idc->idc_domain, idc->idc_socktype, idc->idc_protocol, from_sa, to_sa); free(from_sa, M_SONAME); free(to_sa, M_SONAME); /* * Digests are always disabled during login phase. */ is->is_conn->ic_header_crc32c = false; is->is_conn->ic_data_crc32c = false; return (error); } static int iscsi_ioctl_daemon_send(struct iscsi_softc *sc, struct iscsi_daemon_send *ids) { struct iscsi_session *is; struct icl_pdu *ip; size_t datalen; void *data; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == ids->ids_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); if (is->is_terminating || is->is_reconnecting) return (EIO); datalen = ids->ids_data_segment_len; if (datalen > is->is_conn->ic_max_send_data_segment_length) return (EINVAL); if (datalen > 0) { data = malloc(datalen, M_ISCSI, M_WAITOK); error = copyin(ids->ids_data_segment, data, datalen); if (error != 0) { free(data, M_ISCSI); return (error); } } ip = icl_pdu_new(is->is_conn, M_WAITOK); memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs)); if (datalen > 0) { error = icl_pdu_append_data(ip, data, datalen, M_WAITOK); KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed")); free(data, M_ISCSI); } iscsi_pdu_queue(ip); return (0); } static int iscsi_ioctl_daemon_receive(struct iscsi_softc *sc, struct iscsi_daemon_receive *idr) { struct iscsi_session *is; struct icl_pdu *ip; void *data; int error; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (is->is_id == idr->idr_session_id) break; } if (is == NULL) { sx_sunlock(&sc->sc_lock); return (ESRCH); } sx_sunlock(&sc->sc_lock); if (is->is_login_phase == false) return (EBUSY); ISCSI_SESSION_LOCK(is); while (is->is_login_pdu == NULL && is->is_terminating == false && is->is_reconnecting == false) { error = cv_wait_sig(&is->is_login_cv, &is->is_lock); if (error != 0) { ISCSI_SESSION_UNLOCK(is); return (error); } } if (is->is_terminating || is->is_reconnecting) { ISCSI_SESSION_UNLOCK(is); return (EIO); } ip = is->is_login_pdu; is->is_login_pdu = NULL; ISCSI_SESSION_UNLOCK(is); if (ip->ip_data_len > idr->idr_data_segment_len) { icl_pdu_free(ip); return (EMSGSIZE); } copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs)); if (ip->ip_data_len > 0) { data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK); icl_pdu_get_data(ip, 0, data, ip->ip_data_len); copyout(data, idr->idr_data_segment, ip->ip_data_len); free(data, M_ISCSI); } icl_pdu_free(ip); return (0); } #endif /* ICL_KERNEL_PROXY */ static void iscsi_sanitize_session_conf(struct iscsi_session_conf *isc) { /* * Just make sure all the fields are null-terminated. * * XXX: This is not particularly secure. We should * create our own conf and then copy in relevant * fields. */ isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0'; isc->isc_target[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0'; isc->isc_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0'; isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0'; isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0'; } static bool iscsi_valid_session_conf(const struct iscsi_session_conf *isc) { if (isc->isc_initiator[0] == '\0') { ISCSI_DEBUG("empty isc_initiator"); return (false); } if (isc->isc_target_addr[0] == '\0') { ISCSI_DEBUG("empty isc_target_addr"); return (false); } if (isc->isc_discovery != 0 && isc->isc_target[0] != 0) { ISCSI_DEBUG("non-empty isc_target for discovery session"); return (false); } if (isc->isc_discovery == 0 && isc->isc_target[0] == 0) { ISCSI_DEBUG("empty isc_target for non-discovery session"); return (false); } return (true); } static int iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa) { struct iscsi_session *is; const struct iscsi_session *is2; int error; sbintime_t sbt, pr; iscsi_sanitize_session_conf(&isa->isa_conf); if (iscsi_valid_session_conf(&isa->isa_conf) == false) return (EINVAL); is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK); memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf)); sx_xlock(&sc->sc_lock); /* * Prevent duplicates. */ TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) { if (!!is->is_conf.isc_discovery != !!is2->is_conf.isc_discovery) continue; if (strcmp(is->is_conf.isc_target_addr, is2->is_conf.isc_target_addr) != 0) continue; if (is->is_conf.isc_discovery == 0 && strcmp(is->is_conf.isc_target, is2->is_conf.isc_target) != 0) continue; sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EBUSY); } is->is_conn = icl_new_conn(is->is_conf.isc_offload, is->is_conf.isc_iser, "iscsi", &is->is_lock); if (is->is_conn == NULL) { sx_xunlock(&sc->sc_lock); free(is, M_ISCSI); return (EINVAL); } is->is_conn->ic_receive = iscsi_receive_callback; is->is_conn->ic_error = iscsi_error_callback; is->is_conn->ic_prv0 = is; TAILQ_INIT(&is->is_outstanding); STAILQ_INIT(&is->is_postponed); mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF); cv_init(&is->is_maintenance_cv, "iscsi_mt"); #ifdef ICL_KERNEL_PROXY cv_init(&is->is_login_cv, "iscsi_login"); #endif /* * Set some default values, from RFC 3720, section 12. * * These values are updated by the handoff IOCTL, but are * needed prior to the handoff to support sending the ISER * login PDU. */ is->is_conn->ic_max_recv_data_segment_length = 8192; is->is_conn->ic_max_send_data_segment_length = 8192; is->is_max_burst_length = 262144; is->is_first_burst_length = 65536; is->is_softc = sc; sc->sc_last_session_id++; is->is_id = sc->sc_last_session_id; is->is_isid[0] = 0x80; /* RFC 3720, 10.12.5: 10b, "Random" ISID. */ arc4rand(&is->is_isid[1], 5, 0); is->is_tsih = 0; callout_init(&is->is_callout, 1); error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt"); if (error != 0) { ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error); sx_xunlock(&sc->sc_lock); return (error); } is->is_ping_timeout = is->is_conf.isc_ping_timeout; if (is->is_ping_timeout < 0) is->is_ping_timeout = ping_timeout; is->is_login_timeout = is->is_conf.isc_login_timeout; if (is->is_login_timeout < 0) is->is_login_timeout = login_timeout; sbt = mstosbt(995); pr = mstosbt(10); callout_reset_sbt(&is->is_callout, sbt, pr, iscsi_callout, is, 0); TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next); ISCSI_SESSION_LOCK(is); /* * Don't notify iscsid(8) if the session is disabled and it's not * a discovery session, */ if (is->is_conf.isc_enable == 0 && is->is_conf.isc_discovery == 0) { ISCSI_SESSION_UNLOCK(is); sx_xunlock(&sc->sc_lock); return (0); } is->is_waiting_for_iscsid = true; strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); ISCSI_SESSION_UNLOCK(is); cv_signal(&sc->sc_cv); sx_xunlock(&sc->sc_lock); return (0); } static bool iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1, unsigned int id2, const struct iscsi_session_conf *c2) { if (id2 != 0 && id2 != id1) return (false); if (c2->isc_target[0] != '\0' && strcmp(c1->isc_target, c2->isc_target) != 0) return (false); if (c2->isc_target_addr[0] != '\0' && strcmp(c1->isc_target_addr, c2->isc_target_addr) != 0) return (false); return (true); } static int iscsi_ioctl_session_remove(struct iscsi_softc *sc, struct iscsi_session_remove *isr) { struct iscsi_session *is, *tmp; bool found = false; iscsi_sanitize_session_conf(&isr->isr_conf); sx_xlock(&sc->sc_lock); TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) { ISCSI_SESSION_LOCK(is); if (iscsi_session_conf_matches(is->is_id, &is->is_conf, isr->isr_session_id, &isr->isr_conf)) { found = true; iscsi_session_logout(is); iscsi_session_terminate(is); } ISCSI_SESSION_UNLOCK(is); } sx_xunlock(&sc->sc_lock); if (!found) return (ESRCH); return (0); } static int iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl) { int error; unsigned int i = 0; struct iscsi_session *is; struct iscsi_session_state iss; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { if (i >= isl->isl_nentries) { sx_sunlock(&sc->sc_lock); return (EMSGSIZE); } memset(&iss, 0, sizeof(iss)); memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf)); iss.iss_id = is->is_id; strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias)); strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason)); strlcpy(iss.iss_offload, is->is_conn->ic_offload, sizeof(iss.iss_offload)); if (is->is_conn->ic_header_crc32c) iss.iss_header_digest = ISCSI_DIGEST_CRC32C; else iss.iss_header_digest = ISCSI_DIGEST_NONE; if (is->is_conn->ic_data_crc32c) iss.iss_data_digest = ISCSI_DIGEST_CRC32C; else iss.iss_data_digest = ISCSI_DIGEST_NONE; iss.iss_max_send_data_segment_length = is->is_conn->ic_max_send_data_segment_length; iss.iss_max_recv_data_segment_length = is->is_conn->ic_max_recv_data_segment_length; iss.iss_max_burst_length = is->is_max_burst_length; iss.iss_first_burst_length = is->is_first_burst_length; iss.iss_immediate_data = is->is_immediate_data; iss.iss_connected = is->is_connected; error = copyout(&iss, isl->isl_pstates + i, sizeof(iss)); if (error != 0) { sx_sunlock(&sc->sc_lock); return (error); } i++; } sx_sunlock(&sc->sc_lock); isl->isl_nentries = i; return (0); } static int iscsi_ioctl_session_modify(struct iscsi_softc *sc, struct iscsi_session_modify *ism) { struct iscsi_session *is; const struct iscsi_session *is2; iscsi_sanitize_session_conf(&ism->ism_conf); if (iscsi_valid_session_conf(&ism->ism_conf) == false) return (EINVAL); sx_xlock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (is->is_id == ism->ism_session_id) { /* Note that the session remains locked. */ break; } ISCSI_SESSION_UNLOCK(is); } if (is == NULL) { sx_xunlock(&sc->sc_lock); return (ESRCH); } /* * Prevent duplicates. */ TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) { if (is == is2) continue; if (!!ism->ism_conf.isc_discovery != !!is2->is_conf.isc_discovery) continue; if (strcmp(ism->ism_conf.isc_target_addr, is2->is_conf.isc_target_addr) != 0) continue; if (ism->ism_conf.isc_discovery == 0 && strcmp(ism->ism_conf.isc_target, is2->is_conf.isc_target) != 0) continue; ISCSI_SESSION_UNLOCK(is); sx_xunlock(&sc->sc_lock); return (EBUSY); } sx_xunlock(&sc->sc_lock); memcpy(&is->is_conf, &ism->ism_conf, sizeof(is->is_conf)); ISCSI_SESSION_UNLOCK(is); iscsi_session_reconnect(is); return (0); } static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, struct thread *td) { struct iscsi_softc *sc; sc = dev->si_drv1; switch (cmd) { case ISCSIDWAIT: return (iscsi_ioctl_daemon_wait(sc, - (struct iscsi_daemon_request *)arg)); + (struct iscsi_daemon_request *)arg, false)); +#ifdef COMPAT_FREEBSD13 + case ISCSIDWAIT13: + return (iscsi_ioctl_daemon_wait(sc, + (struct iscsi_daemon_request *)arg, true)); +#endif + case ISCSIDLIMITS: + return (iscsi_ioctl_daemon_limits(sc, + (struct iscsi_daemon_limits *)arg)); case ISCSIDHANDOFF: return (iscsi_ioctl_daemon_handoff(sc, (struct iscsi_daemon_handoff *)arg)); case ISCSIDFAIL: return (iscsi_ioctl_daemon_fail(sc, (struct iscsi_daemon_fail *)arg)); #ifdef ICL_KERNEL_PROXY case ISCSIDCONNECT: return (iscsi_ioctl_daemon_connect(sc, (struct iscsi_daemon_connect *)arg)); case ISCSIDSEND: return (iscsi_ioctl_daemon_send(sc, (struct iscsi_daemon_send *)arg)); case ISCSIDRECEIVE: return (iscsi_ioctl_daemon_receive(sc, (struct iscsi_daemon_receive *)arg)); #endif /* ICL_KERNEL_PROXY */ case ISCSISADD: return (iscsi_ioctl_session_add(sc, (struct iscsi_session_add *)arg)); case ISCSISREMOVE: return (iscsi_ioctl_session_remove(sc, (struct iscsi_session_remove *)arg)); case ISCSISLIST: return (iscsi_ioctl_session_list(sc, (struct iscsi_session_list *)arg)); case ISCSISMODIFY: return (iscsi_ioctl_session_modify(sc, (struct iscsi_session_modify *)arg)); default: return (EINVAL); } } static struct iscsi_outstanding * iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_initiator_task_tag == initiator_task_tag) return (io); } return (NULL); } static struct iscsi_outstanding * iscsi_outstanding_find_ccb(struct iscsi_session *is, union ccb *ccb) { struct iscsi_outstanding *io; ISCSI_SESSION_LOCK_ASSERT(is); TAILQ_FOREACH(io, &is->is_outstanding, io_next) { if (io->io_ccb == ccb) return (io); } return (NULL); } static struct iscsi_outstanding * iscsi_outstanding_add(struct iscsi_session *is, struct icl_pdu *request, union ccb *ccb, uint32_t *initiator_task_tagp) { struct iscsi_outstanding *io; int error; ISCSI_SESSION_LOCK_ASSERT(is); io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO); if (io == NULL) { ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes", sizeof(*io)); return (NULL); } error = icl_conn_task_setup(is->is_conn, request, &ccb->csio, initiator_task_tagp, &io->io_icl_prv); if (error != 0) { ISCSI_SESSION_WARN(is, "icl_conn_task_setup() failed with error %d", error); uma_zfree(iscsi_outstanding_zone, io); return (NULL); } KASSERT(iscsi_outstanding_find(is, *initiator_task_tagp) == NULL, ("initiator_task_tag 0x%x already added", *initiator_task_tagp)); io->io_initiator_task_tag = *initiator_task_tagp; io->io_ccb = ccb; TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next); return (io); } static void iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io) { ISCSI_SESSION_LOCK_ASSERT(is); icl_conn_task_done(is->is_conn, io->io_icl_prv); TAILQ_REMOVE(&is->is_outstanding, io, io_next); uma_zfree(iscsi_outstanding_zone, io); } static void iscsi_action_abort(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_task_management_request *bhstmr; struct ccb_abort *cab = &ccb->cab; struct iscsi_outstanding *io, *aio; uint32_t initiator_task_tag; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ccb->ccb_h.status = CAM_REQ_ABORTED; xpt_done(ccb); return; } #endif aio = iscsi_outstanding_find_ccb(is, cab->abort_ccb); if (aio == NULL) { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; if (initiator_task_tag == 0xffffffff) initiator_task_tag = is->is_initiator_task_tag++; io = iscsi_outstanding_add(is, request, NULL, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); ccb->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } io->io_referenced_task_tag = aio->io_initiator_task_tag; bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; bhstmr->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_REQUEST; bhstmr->bhstmr_function = 0x80 | BHSTMR_FUNCTION_ABORT_TASK; bhstmr->bhstmr_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhstmr->bhstmr_initiator_task_tag = initiator_task_tag; bhstmr->bhstmr_referenced_task_tag = aio->io_initiator_task_tag; iscsi_pdu_queue_locked(request); } static void iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb) { struct icl_pdu *request; struct iscsi_bhs_scsi_command *bhssc; struct ccb_scsiio *csio; struct iscsi_outstanding *io; size_t len; uint32_t initiator_task_tag; int error; ISCSI_SESSION_LOCK_ASSERT(is); #if 0 KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); #else if (is->is_login_phase) { ISCSI_SESSION_DEBUG(is, "called during login phase"); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN; xpt_done(ccb); return; } #endif request = icl_pdu_new(is->is_conn, M_NOWAIT); if (request == NULL) { if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } initiator_task_tag = is->is_initiator_task_tag++; if (initiator_task_tag == 0xffffffff) initiator_task_tag = is->is_initiator_task_tag++; io = iscsi_outstanding_add(is, request, ccb, &initiator_task_tag); if (io == NULL) { icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } csio = &ccb->csio; bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND; bhssc->bhssc_flags |= BHSSC_FLAGS_F; switch (csio->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_IN: bhssc->bhssc_flags |= BHSSC_FLAGS_R; break; case CAM_DIR_OUT: bhssc->bhssc_flags |= BHSSC_FLAGS_W; break; } if ((ccb->ccb_h.flags & CAM_TAG_ACTION_VALID) != 0) { switch (csio->tag_action) { case MSG_HEAD_OF_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ; break; case MSG_ORDERED_Q_TAG: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED; break; case MSG_ACA_TASK: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA; break; case MSG_SIMPLE_Q_TAG: default: bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE; break; } } else bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_UNTAGGED; if (is->is_protocol_level >= 2) { bhssc->bhssc_pri = (csio->priority << BHSSC_PRI_SHIFT) & BHSSC_PRI_MASK; } bhssc->bhssc_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun)); bhssc->bhssc_initiator_task_tag = initiator_task_tag; bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len); KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb), ("unsupported CDB size %zd", (size_t)csio->cdb_len)); if (csio->ccb_h.flags & CAM_CDB_POINTER) memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); else memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); if (is->is_immediate_data && (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { len = csio->dxfer_len; //ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len); if (len > is->is_first_burst_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %d", len, is->is_first_burst_length); len = is->is_first_burst_length; } if (len > is->is_conn->ic_max_send_data_segment_length) { ISCSI_SESSION_DEBUG(is, "len %zd -> %d", len, is->is_conn->ic_max_send_data_segment_length); len = is->is_conn->ic_max_send_data_segment_length; } error = iscsi_pdu_append_data_csio(request, csio, 0, len, M_NOWAIT | ICL_NOCOPY); if (error != 0) { iscsi_outstanding_remove(is, io); icl_pdu_free(request); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { xpt_freeze_devq(ccb->ccb_h.path, 1); ISCSI_SESSION_DEBUG(is, "freezing devq"); } ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; xpt_done(ccb); return; } } iscsi_pdu_queue_locked(request); } static void iscsi_action(struct cam_sim *sim, union ccb *ccb) { struct iscsi_session *is; is = cam_sim_softc(sim); ISCSI_SESSION_LOCK_ASSERT(is); if (is->is_terminating || (is->is_connected == false && fail_on_disconnection)) { ccb->ccb_h.status = CAM_DEV_NOT_THERE; xpt_done(ccb); return; } /* * Make sure CAM doesn't sneak in a CCB just after freezing the queue. */ if (is->is_simq_frozen == true) { ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK); ccb->ccb_h.status |= CAM_REQUEUE_REQ; /* Don't freeze the devq - the SIM queue is already frozen. */ xpt_done(ccb); return; } switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_EXTLUNS; /* * XXX: It shouldn't ever be NULL; this could be turned * into a KASSERT eventually. */ if (is->is_conn == NULL) ISCSI_WARN("NULL conn"); else if (is->is_conn->ic_unmapped) cpi->hba_misc |= PIM_UNMAPPED; cpi->hba_eng_cnt = 0; cpi->max_target = 0; /* * Note that the variable below is only relevant for targets * that don't claim compliance with anything above SPC2, which * means they don't support REPORT_LUNS. */ cpi->max_lun = 255; cpi->initiator_id = ~0; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "iSCSI", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; /* XXX */ cpi->transport = XPORT_ISCSI; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC3; cpi->maxio = maxphys; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts; struct ccb_trans_settings_scsi *scsi; cts = &ccb->cts; scsi = &cts->proto_specific.scsi; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC3; cts->transport = XPORT_ISCSI; cts->transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; cts->ccb_h.status = CAM_REQ_CMP; break; } case XPT_CALC_GEOMETRY: cam_calc_geometry(&ccb->ccg, /*extended*/1); ccb->ccb_h.status = CAM_REQ_CMP; break; #if 0 /* * XXX: What's the point? */ case XPT_RESET_BUS: case XPT_TERM_IO: ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io"); ccb->ccb_h.status = CAM_REQ_CMP; break; #endif case XPT_ABORT: iscsi_action_abort(is, ccb); return; case XPT_SCSI_IO: iscsi_action_scsiio(is, ccb); return; default: #if 0 ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code); #endif ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; } xpt_done(ccb); } static void iscsi_terminate_sessions(struct iscsi_softc *sc) { struct iscsi_session *is; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) iscsi_session_terminate(is); while(!TAILQ_EMPTY(&sc->sc_sessions)) { ISCSI_DEBUG("waiting for sessions to terminate"); cv_wait(&sc->sc_cv, &sc->sc_lock); } ISCSI_DEBUG("all sessions terminated"); sx_sunlock(&sc->sc_lock); } static void iscsi_shutdown_pre(struct iscsi_softc *sc) { struct iscsi_session *is; if (!fail_on_shutdown) return; /* * If we have any sessions waiting for reconnection, request * maintenance thread to fail them immediately instead of waiting * for reconnect timeout. * * This prevents LUNs with mounted filesystems that are supported * by disconnected iSCSI sessions from hanging, however it will * fail all queued BIOs. */ ISCSI_DEBUG("forcing failing all disconnected sessions due to shutdown"); fail_on_disconnection = 1; sx_slock(&sc->sc_lock); TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { ISCSI_SESSION_LOCK(is); if (!is->is_connected) { ISCSI_SESSION_DEBUG(is, "force failing disconnected session early"); iscsi_session_reconnect(is); } ISCSI_SESSION_UNLOCK(is); } sx_sunlock(&sc->sc_lock); } static void iscsi_shutdown_post(struct iscsi_softc *sc) { if (!KERNEL_PANICKED()) { ISCSI_DEBUG("removing all sessions due to shutdown"); iscsi_terminate_sessions(sc); } } static int iscsi_load(void) { int error; sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK); sx_init(&sc->sc_lock, "iscsi"); TAILQ_INIT(&sc->sc_sessions); cv_init(&sc->sc_cv, "iscsi_cv"); iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding", sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi"); if (error != 0) { ISCSI_WARN("failed to create device node, error %d", error); return (error); } sc->sc_cdev->si_drv1 = sc; sc->sc_shutdown_pre_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, iscsi_shutdown_pre, sc, SHUTDOWN_PRI_FIRST); /* * shutdown_post_sync needs to run after filesystem shutdown and before * CAM shutdown - otherwise when rebooting with an iSCSI session that is * disconnected but has outstanding requests, dashutdown() will hang on * cam_periph_runccb(). */ sc->sc_shutdown_post_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, iscsi_shutdown_post, sc, SHUTDOWN_PRI_DEFAULT - 1); return (0); } static int iscsi_unload(void) { /* Awaken any threads asleep in iscsi_ioctl(). */ sx_xlock(&sc->sc_lock); sc->sc_unloading = true; cv_signal(&sc->sc_cv); sx_xunlock(&sc->sc_lock); if (sc->sc_cdev != NULL) { ISCSI_DEBUG("removing device node"); destroy_dev(sc->sc_cdev); ISCSI_DEBUG("device node removed"); } if (sc->sc_shutdown_pre_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_pre_eh); if (sc->sc_shutdown_post_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_post_eh); iscsi_terminate_sessions(sc); uma_zdestroy(iscsi_outstanding_zone); sx_destroy(&sc->sc_lock); cv_destroy(&sc->sc_cv); free(sc, M_ISCSI); return (0); } static int iscsi_quiesce(void) { sx_slock(&sc->sc_lock); if (!TAILQ_EMPTY(&sc->sc_sessions)) { sx_sunlock(&sc->sc_lock); return (EBUSY); } sx_sunlock(&sc->sc_lock); return (0); } static int iscsi_modevent(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: error = iscsi_load(); break; case MOD_UNLOAD: error = iscsi_unload(); break; case MOD_QUIESCE: error = iscsi_quiesce(); break; default: error = EINVAL; break; } return (error); } moduledata_t iscsi_data = { "iscsi", iscsi_modevent, 0 }; DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(iscsi, cam, 1, 1, 1); MODULE_DEPEND(iscsi, icl, 1, 1, 1); diff --git a/sys/dev/iscsi/iscsi_ioctl.h b/sys/dev/iscsi/iscsi_ioctl.h index c1de089c9d3f..caf403b55970 100644 --- a/sys/dev/iscsi/iscsi_ioctl.h +++ b/sys/dev/iscsi/iscsi_ioctl.h @@ -1,241 +1,247 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ISCSI_IOCTL_H #define ISCSI_IOCTL_H #ifdef ICL_KERNEL_PROXY #include #endif #define ISCSI_PATH "/dev/iscsi" #define ISCSI_MAX_DATA_SEGMENT_LENGTH (128 * 1024) #define ISCSI_NAME_LEN 224 /* 223 bytes, by RFC 3720, + '\0' */ #define ISCSI_ADDR_LEN 47 /* INET6_ADDRSTRLEN + '\0' */ #define ISCSI_ALIAS_LEN 256 /* XXX: Where did it come from? */ #define ISCSI_SECRET_LEN 17 /* 16 + '\0' */ #define ISCSI_OFFLOAD_LEN 8 #define ISCSI_REASON_LEN 64 #define ISCSI_DIGEST_NONE 0 #define ISCSI_DIGEST_CRC32C 1 /* * Session configuration, set when adding the session. */ struct iscsi_session_conf { char isc_initiator[ISCSI_NAME_LEN]; char isc_initiator_addr[ISCSI_ADDR_LEN]; char isc_initiator_alias[ISCSI_ALIAS_LEN]; char isc_target[ISCSI_NAME_LEN]; char isc_target_addr[ISCSI_ADDR_LEN]; char isc_user[ISCSI_NAME_LEN]; char isc_secret[ISCSI_SECRET_LEN]; char isc_mutual_user[ISCSI_NAME_LEN]; char isc_mutual_secret[ISCSI_SECRET_LEN]; int isc_discovery; int isc_header_digest; int isc_data_digest; int isc_iser; char isc_offload[ISCSI_OFFLOAD_LEN]; int isc_enable; int isc_dscp; int isc_pcp; int isc_ping_timeout; int isc_login_timeout; }; /* * Additional constraints imposed by chosen ICL offload module; * iscsid(8) must obey those when negotiating operational parameters. */ struct iscsi_session_limits { size_t isl_spare0; int isl_max_recv_data_segment_length; int isl_max_send_data_segment_length; int isl_max_burst_length; int isl_first_burst_length; int isl_spare[4]; }; /* * Session state, negotiated by iscsid(8) and queried by iscsictl(8). */ struct iscsi_session_state { struct iscsi_session_conf iss_conf; unsigned int iss_id; char iss_target_alias[ISCSI_ALIAS_LEN]; int iss_header_digest; int iss_data_digest; int iss_max_recv_data_segment_length; int iss_max_burst_length; int iss_first_burst_length; int iss_immediate_data; int iss_connected; char iss_reason[ISCSI_REASON_LEN]; char iss_offload[ISCSI_OFFLOAD_LEN]; int iss_max_send_data_segment_length; int iss_spare[3]; }; /* * The following ioctls are used by iscsid(8). */ struct iscsi_daemon_request { unsigned int idr_session_id; struct iscsi_session_conf idr_conf; uint8_t idr_isid[6]; uint16_t idr_tsih; uint16_t idr_spare_cid; - struct iscsi_session_limits idr_limits; int idr_spare[4]; }; +struct iscsi_daemon_limits { + unsigned int idl_session_id; + int idl_socket; + struct iscsi_session_limits idl_limits; +}; + struct iscsi_daemon_handoff { unsigned int idh_session_id; int idh_socket; char idh_target_alias[ISCSI_ALIAS_LEN]; int idh_protocol_level; uint16_t idh_spare; uint16_t idh_tsih; uint16_t idh_spare_cid; uint32_t idh_statsn; int idh_header_digest; int idh_data_digest; size_t spare[3]; int idh_immediate_data; int idh_initial_r2t; int idh_max_recv_data_segment_length; int idh_max_send_data_segment_length; int idh_max_burst_length; int idh_first_burst_length; }; struct iscsi_daemon_fail { unsigned int idf_session_id; char idf_reason[ISCSI_REASON_LEN]; int idf_spare[4]; }; #define ISCSIDWAIT _IOR('I', 0x01, struct iscsi_daemon_request) #define ISCSIDHANDOFF _IOW('I', 0x02, struct iscsi_daemon_handoff) #define ISCSIDFAIL _IOW('I', 0x03, struct iscsi_daemon_fail) +#define ISCSIDLIMITS _IOWR('I', 0x07, struct iscsi_daemon_limits) #ifdef ICL_KERNEL_PROXY /* * When ICL_KERNEL_PROXY is not defined, the iscsid(8) is responsible * for creating the socket, connecting, and performing Login Phase using * the socket in the usual userspace way, and then passing the socket * file descriptor to the kernel part using ISCSIDHANDOFF. * * When ICL_KERNEL_PROXY is defined, the iscsid(8) creates the session * using ISCSICONNECT, performs Login Phase using ISCSISEND/ISCSIRECEIVE * instead of read(2)/write(2), and then calls ISCSIDHANDOFF with * idh_socket set to 0. * * The purpose of ICL_KERNEL_PROXY is to workaround the fact that, * at this time, it's not possible to do iWARP (RDMA) in userspace. */ struct iscsi_daemon_connect { unsigned int idc_session_id; int idc_iser; int idc_domain; int idc_socktype; int idc_protocol; struct sockaddr *idc_from_addr; socklen_t idc_from_addrlen; struct sockaddr *idc_to_addr; socklen_t idc_to_addrlen; int idc_spare[4]; }; struct iscsi_daemon_send { unsigned int ids_session_id; void *ids_bhs; size_t ids_spare; void *ids_spare2; size_t ids_data_segment_len; void *ids_data_segment; int ids_spare3[4]; }; struct iscsi_daemon_receive { unsigned int idr_session_id; void *idr_bhs; size_t idr_spare; void *idr_spare2; size_t idr_data_segment_len; void *idr_data_segment; int idr_spare3[4]; }; #define ISCSIDCONNECT _IOWR('I', 0x04, struct iscsi_daemon_connect) #define ISCSIDSEND _IOWR('I', 0x05, struct iscsi_daemon_send) #define ISCSIDRECEIVE _IOWR('I', 0x06, struct iscsi_daemon_receive) #endif /* ICL_KERNEL_PROXY */ /* * The following ioctls are used by iscsictl(8). */ struct iscsi_session_add { struct iscsi_session_conf isa_conf; int isa_spare[4]; }; struct iscsi_session_remove { unsigned int isr_session_id; struct iscsi_session_conf isr_conf; int isr_spare[4]; }; struct iscsi_session_list { unsigned int isl_nentries; struct iscsi_session_state *isl_pstates; int isl_spare[4]; }; struct iscsi_session_modify { unsigned int ism_session_id; struct iscsi_session_conf ism_conf; int ism_spare[4]; }; #define ISCSISADD _IOW('I', 0x11, struct iscsi_session_add) #define ISCSISREMOVE _IOW('I', 0x12, struct iscsi_session_remove) #define ISCSISLIST _IOWR('I', 0x13, struct iscsi_session_list) #define ISCSISMODIFY _IOWR('I', 0x14, struct iscsi_session_modify) #endif /* !ISCSI_IOCTL_H */ diff --git a/sys/dev/iser/icl_iser.c b/sys/dev/iser/icl_iser.c index 140b5622385d..d43c1dc3b885 100644 --- a/sys/dev/iser/icl_iser.c +++ b/sys/dev/iser/icl_iser.c @@ -1,595 +1,595 @@ /* $FreeBSD$ */ /*- * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "icl_iser.h" SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "iSER module"); int iser_debug = 0; SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, &iser_debug, 0, "Enable iser debug messages"); static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); static uma_zone_t icl_pdu_zone; static volatile u_int icl_iser_ncons; struct iser_global ig; static void iser_conn_release(struct icl_conn *ic); static icl_conn_new_pdu_t iser_conn_new_pdu; static icl_conn_pdu_free_t iser_conn_pdu_free; static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; static icl_conn_pdu_append_bio_t iser_conn_pdu_append_bio; static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; static icl_conn_pdu_queue_t iser_conn_pdu_queue; static icl_conn_handoff_t iser_conn_handoff; static icl_conn_free_t iser_conn_free; static icl_conn_close_t iser_conn_close; static icl_conn_connect_t iser_conn_connect; static icl_conn_task_setup_t iser_conn_task_setup; static icl_conn_task_done_t iser_conn_task_done; static icl_conn_pdu_get_bio_t iser_conn_pdu_get_bio; static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; static kobj_method_t icl_iser_methods[] = { KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_bio, iser_conn_pdu_append_bio), KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), KOBJMETHOD(icl_conn_free, iser_conn_free), KOBJMETHOD(icl_conn_close, iser_conn_close), KOBJMETHOD(icl_conn_connect, iser_conn_connect), KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), KOBJMETHOD(icl_conn_pdu_get_bio, iser_conn_pdu_get_bio), KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), { 0, 0 } }; DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); /** * iser_initialize_headers() - Initialize task headers * @pdu: iser pdu * @iser_conn: iser connection * * Notes: * This routine may race with iser teardown flow for scsi * error handling TMFs. So for TMF we should acquire the * state mutex to avoid dereferencing the IB device which * may have already been terminated (racing teardown sequence). */ int iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) { struct iser_tx_desc *tx_desc = &pdu->desc; struct iser_device *device = iser_conn->ib_conn.device; u64 dma_addr; int ret = 0; dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, ISER_HEADERS_LEN, DMA_TO_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) { ret = -ENOMEM; goto out; } tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; out: return (ret); } int iser_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, struct bio *bp, size_t offset, size_t len, int flags) { MPASS(!((request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) == ISCSI_BHS_OPCODE_LOGIN_REQUEST || (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) == ISCSI_BHS_OPCODE_TEXT_REQUEST)); return (0); } int iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); switch (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) { case ISCSI_BHS_OPCODE_LOGIN_REQUEST: case ISCSI_BHS_OPCODE_TEXT_REQUEST: ISER_DBG("copy to login buff"); memcpy(iser_conn->login_req_buf, addr, len); request->ip_data_len = len; break; } return (0); } void iser_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, size_t pdu_off, struct bio *bp, size_t bio_off, size_t len) { MPASS(ip->ip_data_mbuf == NULL); } void iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { /* If we have a receive data, copy it to upper layer buffer */ if (ip->ip_data_mbuf) memcpy(addr, ip->ip_data_mbuf + off, len); } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * iser_new_pdu(struct icl_conn *ic, int flags) { struct icl_iser_pdu *iser_pdu; struct icl_pdu *ip; struct iser_conn *iser_conn = icl_to_iser_conn(ic); iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); if (iser_pdu == NULL) { ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); return (NULL); } iser_pdu->iser_conn = iser_conn; ip = &iser_pdu->icl_pdu; ip->ip_conn = ic; ip->ip_bhs = &iser_pdu->desc.iscsi_header; return (ip); } struct icl_pdu * iser_conn_new_pdu(struct icl_conn *ic, int flags) { return (iser_new_pdu(ic, flags)); } void iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); uma_zfree(icl_pdu_zone, iser_pdu); } size_t iser_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } void iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { iser_pdu_free(ic, ip); } static bool is_control_opcode(uint8_t opcode) { bool is_control = false; switch (opcode & ISCSI_OPCODE_MASK) { case ISCSI_BHS_OPCODE_NOP_OUT: case ISCSI_BHS_OPCODE_LOGIN_REQUEST: case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: case ISCSI_BHS_OPCODE_TEXT_REQUEST: is_control = true; break; case ISCSI_BHS_OPCODE_SCSI_COMMAND: is_control = false; break; default: ISER_ERR("unknown opcode %d", opcode); } return (is_control); } void iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); int ret; if (iser_conn->state != ISER_CONN_UP) return; ret = iser_initialize_headers(iser_pdu, iser_conn); if (ret) { ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); return; } if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { ret = iser_send_control(iser_conn, iser_pdu); if (unlikely(ret)) ISER_ERR("Failed to send control pdu %p", iser_pdu); } else { ret = iser_send_command(iser_conn, iser_pdu); if (unlikely(ret)) ISER_ERR("Failed to send command pdu %p", iser_pdu); } } static struct icl_conn * iser_new_conn(const char *name, struct mtx *lock) { struct iser_conn *iser_conn; struct icl_conn *ic; refcount_acquire(&icl_iser_ncons); iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); if (!iser_conn) { ISER_ERR("failed to allocate iser conn"); refcount_release(&icl_iser_ncons); return (NULL); } cv_init(&iser_conn->up_cv, "iser_cv"); sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF); cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF); ic = &iser_conn->icl_conn; ic->ic_lock = lock; ic->ic_name = name; ic->ic_offload = strdup("iser", M_TEMP); ic->ic_iser = true; ic->ic_unmapped = true; return (ic); } void iser_conn_free(struct icl_conn *ic) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); iser_conn_release(ic); mtx_destroy(&iser_conn->ib_conn.lock); cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); sx_destroy(&iser_conn->state_mutex); cv_destroy(&iser_conn->up_cv); kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); refcount_release(&icl_iser_ncons); } int iser_conn_handoff(struct icl_conn *ic, int fd) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); int error = 0; sx_xlock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { error = EINVAL; ISER_ERR("iser_conn %p state is %d, teardown started\n", iser_conn, iser_conn->state); goto out; } error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); if (error) goto out; error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); if (error) goto post_error; iser_conn->handoff_done = true; sx_xunlock(&iser_conn->state_mutex); return (error); post_error: iser_free_rx_descriptors(iser_conn); out: sx_xunlock(&iser_conn->state_mutex); return (error); } /** * Frees all conn objects */ static void iser_conn_release(struct icl_conn *ic) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_conn *curr, *tmp; mtx_lock(&ig.connlist_mutex); /* * Search for iser connection in global list. * It may not be there in case of failure in connection establishment * stage. */ list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { if (iser_conn == curr) { ISER_WARN("found iser_conn %p", iser_conn); list_del(&iser_conn->conn_list); } } mtx_unlock(&ig.connlist_mutex); /* * In case we reconnecting or removing session, we need to * release IB resources (which is safe to call more than once). */ sx_xlock(&iser_conn->state_mutex); iser_free_ib_conn_res(iser_conn, true); sx_xunlock(&iser_conn->state_mutex); if (ib_conn->cma_id != NULL) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } } void iser_conn_close(struct icl_conn *ic) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); ISER_INFO("closing conn %p", iser_conn); sx_xlock(&iser_conn->state_mutex); /* * In case iser connection is waiting on conditional variable * (state PENDING) and we try to close it before connection establishment, * we need to signal it to continue releasing connection properly. */ if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) cv_signal(&iser_conn->up_cv); sx_xunlock(&iser_conn->state_mutex); } int iser_conn_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) { struct iser_conn *iser_conn = icl_to_iser_conn(ic); struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; iser_conn_release(ic); sx_xlock(&iser_conn->state_mutex); /* the device is known only --after-- address resolution */ ib_conn->device = NULL; iser_conn->handoff_done = false; iser_conn->state = ISER_CONN_PENDING; ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { err = -PTR_ERR(ib_conn->cma_id); ISER_ERR("rdma_create_id failed: %d", err); goto id_failure; } err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); if (err) { ISER_ERR("rdma_resolve_addr failed: %d", err); if (err < 0) err = -err; goto addr_failure; } ISER_DBG("before cv_wait: %p", iser_conn); cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); ISER_DBG("after cv_wait: %p", iser_conn); if (iser_conn->state != ISER_CONN_UP) { err = EIO; goto addr_failure; } err = iser_alloc_login_buf(iser_conn); if (err) goto addr_failure; sx_xunlock(&iser_conn->state_mutex); mtx_lock(&ig.connlist_mutex); list_add(&iser_conn->conn_list, &ig.connlist); mtx_unlock(&ig.connlist_mutex); return (0); id_failure: ib_conn->cma_id = NULL; addr_failure: sx_xunlock(&iser_conn->state_mutex); return (err); } int iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); *prvp = ip; iser_pdu->csio = csio; return (0); } void iser_conn_task_done(struct icl_conn *ic, void *prv) { struct icl_pdu *ip = prv; struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; struct iser_tx_desc *tx_desc = &iser_pdu->desc; if (iser_pdu->dir[ISER_DIR_IN]) { iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); iser_dma_unmap_task_data(iser_pdu, &iser_pdu->data[ISER_DIR_IN], DMA_FROM_DEVICE); } if (iser_pdu->dir[ISER_DIR_OUT]) { iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); iser_dma_unmap_task_data(iser_pdu, &iser_pdu->data[ISER_DIR_OUT], DMA_TO_DEVICE); } if (likely(tx_desc->mapped)) { ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); tx_desc->mapped = false; } iser_pdu_free(ic, ip); } static int -iser_limits(struct icl_drv_limits *idl) +iser_limits(struct icl_drv_limits *idl, int socket) { idl->idl_max_recv_data_segment_length = 128 * 1024; idl->idl_max_send_data_segment_length = 128 * 1024; idl->idl_max_burst_length = 262144; idl->idl_first_burst_length = 65536; return (0); } static int icl_iser_load(void) { int error; ISER_DBG("Starting iSER datamover..."); icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* FIXME: Check rc */ refcount_init(&icl_iser_ncons, 0); error = icl_register("iser", true, 0, iser_limits, iser_new_conn); KASSERT(error == 0, ("failed to register iser")); memset(&ig, 0, sizeof(struct iser_global)); /* device init is called only after the first addr resolution */ sx_init(&ig.device_list_mutex, "global_device_lock"); INIT_LIST_HEAD(&ig.device_list); mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF); INIT_LIST_HEAD(&ig.connlist); sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); return (error); } static int icl_iser_unload(void) { ISER_DBG("Removing iSER datamover..."); if (icl_iser_ncons != 0) return (EBUSY); sx_destroy(&ig.close_conns_mutex); mtx_destroy(&ig.connlist_mutex); sx_destroy(&ig.device_list_mutex); icl_unregister("iser", true); uma_zdestroy(icl_pdu_zone); return (0); } static int icl_iser_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_iser_load()); case MOD_UNLOAD: return (icl_iser_unload()); default: return (EINVAL); } } moduledata_t icl_iser_data = { .name = "icl_iser", .evhand = icl_iser_modevent, .priv = 0 }; DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_iser, icl, 1, 1, 1); MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); MODULE_VERSION(icl_iser, 1); diff --git a/usr.sbin/ctld/ctld.h b/usr.sbin/ctld/ctld.h index 293f5378592f..de2f480af30b 100644 --- a/usr.sbin/ctld/ctld.h +++ b/usr.sbin/ctld/ctld.h @@ -1,386 +1,386 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef CTLD_H #define CTLD_H #include #ifdef ICL_KERNEL_PROXY #include #endif #include #include #include #include #define DEFAULT_CONFIG_PATH "/etc/ctl.conf" #define DEFAULT_PIDFILE "/var/run/ctld.pid" #define DEFAULT_BLOCKSIZE 512 #define DEFAULT_CD_BLOCKSIZE 2048 #define MAX_LUNS 1024 #define MAX_NAME_LEN 223 #define MAX_DATA_SEGMENT_LENGTH (128 * 1024) #define SOCKBUF_SIZE 1048576 struct auth { TAILQ_ENTRY(auth) a_next; struct auth_group *a_auth_group; char *a_user; char *a_secret; char *a_mutual_user; char *a_mutual_secret; }; struct auth_name { TAILQ_ENTRY(auth_name) an_next; struct auth_group *an_auth_group; char *an_initiator_name; }; struct auth_portal { TAILQ_ENTRY(auth_portal) ap_next; struct auth_group *ap_auth_group; char *ap_initiator_portal; struct sockaddr_storage ap_sa; int ap_mask; }; #define AG_TYPE_UNKNOWN 0 #define AG_TYPE_DENY 1 #define AG_TYPE_NO_AUTHENTICATION 2 #define AG_TYPE_CHAP 3 #define AG_TYPE_CHAP_MUTUAL 4 struct auth_group { TAILQ_ENTRY(auth_group) ag_next; struct conf *ag_conf; char *ag_name; struct target *ag_target; int ag_type; TAILQ_HEAD(, auth) ag_auths; TAILQ_HEAD(, auth_name) ag_names; TAILQ_HEAD(, auth_portal) ag_portals; }; struct portal { TAILQ_ENTRY(portal) p_next; struct portal_group *p_portal_group; bool p_iser; char *p_listen; struct addrinfo *p_ai; #ifdef ICL_KERNEL_PROXY int p_id; #endif TAILQ_HEAD(, target) p_targets; int p_socket; }; TAILQ_HEAD(options, option); #define PG_FILTER_UNKNOWN 0 #define PG_FILTER_NONE 1 #define PG_FILTER_PORTAL 2 #define PG_FILTER_PORTAL_NAME 3 #define PG_FILTER_PORTAL_NAME_AUTH 4 struct portal_group { TAILQ_ENTRY(portal_group) pg_next; struct conf *pg_conf; struct options pg_options; char *pg_name; struct auth_group *pg_discovery_auth_group; int pg_discovery_filter; int pg_foreign; bool pg_unassigned; TAILQ_HEAD(, portal) pg_portals; TAILQ_HEAD(, port) pg_ports; char *pg_offload; char *pg_redirection; int pg_dscp; int pg_pcp; uint16_t pg_tag; }; struct pport { TAILQ_ENTRY(pport) pp_next; TAILQ_HEAD(, port) pp_ports; struct conf *pp_conf; char *pp_name; uint32_t pp_ctl_port; }; struct port { TAILQ_ENTRY(port) p_next; TAILQ_ENTRY(port) p_pgs; TAILQ_ENTRY(port) p_pps; TAILQ_ENTRY(port) p_ts; struct conf *p_conf; char *p_name; struct auth_group *p_auth_group; struct portal_group *p_portal_group; struct pport *p_pport; struct target *p_target; int p_ioctl_port; int p_ioctl_pp; int p_ioctl_vp; uint32_t p_ctl_port; }; struct option { TAILQ_ENTRY(option) o_next; char *o_name; char *o_value; }; struct lun { TAILQ_ENTRY(lun) l_next; struct conf *l_conf; struct options l_options; char *l_name; char *l_backend; uint8_t l_device_type; int l_blocksize; char *l_device_id; char *l_path; char *l_scsiname; char *l_serial; int64_t l_size; int l_ctl_lun; }; struct target { TAILQ_ENTRY(target) t_next; struct conf *t_conf; struct lun *t_luns[MAX_LUNS]; struct auth_group *t_auth_group; TAILQ_HEAD(, port) t_ports; char *t_name; char *t_alias; char *t_redirection; }; struct isns { TAILQ_ENTRY(isns) i_next; struct conf *i_conf; char *i_addr; struct addrinfo *i_ai; }; struct conf { char *conf_pidfile_path; TAILQ_HEAD(, lun) conf_luns; TAILQ_HEAD(, target) conf_targets; TAILQ_HEAD(, auth_group) conf_auth_groups; TAILQ_HEAD(, port) conf_ports; TAILQ_HEAD(, portal_group) conf_portal_groups; TAILQ_HEAD(, pport) conf_pports; TAILQ_HEAD(, isns) conf_isns; int conf_isns_period; int conf_isns_timeout; int conf_debug; int conf_timeout; int conf_maxproc; #ifdef ICL_KERNEL_PROXY int conf_portal_id; #endif struct pidfh *conf_pidfh; bool conf_default_pg_defined; bool conf_default_ag_defined; bool conf_kernel_port_on; }; #define CONN_SESSION_TYPE_NONE 0 #define CONN_SESSION_TYPE_DISCOVERY 1 #define CONN_SESSION_TYPE_NORMAL 2 struct ctld_connection { struct connection conn; struct portal *conn_portal; struct port *conn_port; struct target *conn_target; int conn_session_type; char *conn_initiator_name; char *conn_initiator_addr; char *conn_initiator_alias; uint8_t conn_initiator_isid[6]; struct sockaddr_storage conn_initiator_sa; int conn_max_recv_data_segment_limit; int conn_max_send_data_segment_limit; int conn_max_burst_limit; int conn_first_burst_limit; const char *conn_user; struct chap *conn_chap; }; int parse_conf(struct conf *conf, const char *path); int uclparse_conf(struct conf *conf, const char *path); struct conf *conf_new(void); struct conf *conf_new_from_kernel(void); void conf_delete(struct conf *conf); int conf_verify(struct conf *conf); struct auth_group *auth_group_new(struct conf *conf, const char *name); void auth_group_delete(struct auth_group *ag); struct auth_group *auth_group_find(const struct conf *conf, const char *name); int auth_group_set_type(struct auth_group *ag, const char *type); const struct auth *auth_new_chap(struct auth_group *ag, const char *user, const char *secret); const struct auth *auth_new_chap_mutual(struct auth_group *ag, const char *user, const char *secret, const char *user2, const char *secret2); const struct auth *auth_find(const struct auth_group *ag, const char *user); const struct auth_name *auth_name_new(struct auth_group *ag, const char *initiator_name); bool auth_name_defined(const struct auth_group *ag); const struct auth_name *auth_name_find(const struct auth_group *ag, const char *initiator_name); int auth_name_check(const struct auth_group *ag, const char *initiator_name); const struct auth_portal *auth_portal_new(struct auth_group *ag, const char *initiator_portal); bool auth_portal_defined(const struct auth_group *ag); const struct auth_portal *auth_portal_find(const struct auth_group *ag, const struct sockaddr_storage *sa); int auth_portal_check(const struct auth_group *ag, const struct sockaddr_storage *sa); struct portal_group *portal_group_new(struct conf *conf, const char *name); void portal_group_delete(struct portal_group *pg); struct portal_group *portal_group_find(const struct conf *conf, const char *name); int portal_group_add_listen(struct portal_group *pg, const char *listen, bool iser); int portal_group_set_filter(struct portal_group *pg, const char *filter); int portal_group_set_offload(struct portal_group *pg, const char *offload); int portal_group_set_redirection(struct portal_group *pg, const char *addr); int isns_new(struct conf *conf, const char *addr); void isns_delete(struct isns *is); void isns_register(struct isns *isns, struct isns *oldisns); void isns_check(struct isns *isns); void isns_deregister(struct isns *isns); struct pport *pport_new(struct conf *conf, const char *name, uint32_t ctl_port); struct pport *pport_find(const struct conf *conf, const char *name); struct pport *pport_copy(struct pport *pport, struct conf *conf); void pport_delete(struct pport *pport); struct port *port_new(struct conf *conf, struct target *target, struct portal_group *pg); struct port *port_new_ioctl(struct conf *conf, struct target *target, int pp, int vp); struct port *port_new_pp(struct conf *conf, struct target *target, struct pport *pp); struct port *port_find(const struct conf *conf, const char *name); struct port *port_find_in_pg(const struct portal_group *pg, const char *target); void port_delete(struct port *port); int port_is_dummy(struct port *port); struct target *target_new(struct conf *conf, const char *name); void target_delete(struct target *target); struct target *target_find(struct conf *conf, const char *name); int target_set_redirection(struct target *target, const char *addr); struct lun *lun_new(struct conf *conf, const char *name); void lun_delete(struct lun *lun); struct lun *lun_find(const struct conf *conf, const char *name); void lun_set_backend(struct lun *lun, const char *value); void lun_set_device_type(struct lun *lun, uint8_t value); void lun_set_blocksize(struct lun *lun, size_t value); void lun_set_device_id(struct lun *lun, const char *value); void lun_set_path(struct lun *lun, const char *value); void lun_set_scsiname(struct lun *lun, const char *value); void lun_set_serial(struct lun *lun, const char *value); void lun_set_size(struct lun *lun, size_t value); void lun_set_ctl_lun(struct lun *lun, uint32_t value); struct option *option_new(struct options *os, const char *name, const char *value); void option_delete(struct options *os, struct option *co); struct option *option_find(const struct options *os, const char *name); void option_set(struct option *o, const char *value); void kernel_init(void); int kernel_lun_add(struct lun *lun); int kernel_lun_modify(struct lun *lun); int kernel_lun_remove(struct lun *lun); void kernel_handoff(struct ctld_connection *conn); -void kernel_limits(const char *offload, +void kernel_limits(const char *offload, int s, int *max_recv_data_segment_length, int *max_send_data_segment_length, int *max_burst_length, int *first_burst_length); int kernel_port_add(struct port *port); int kernel_port_update(struct port *port, struct port *old); int kernel_port_remove(struct port *port); void kernel_capsicate(void); #ifdef ICL_KERNEL_PROXY void kernel_listen(struct addrinfo *ai, bool iser, int portal_id); void kernel_accept(int *connection_id, int *portal_id, struct sockaddr *client_sa, socklen_t *client_salen); void kernel_send(struct pdu *pdu); void kernel_receive(struct pdu *pdu); #endif void login(struct ctld_connection *conn); void discovery(struct ctld_connection *conn); bool valid_iscsi_name(const char *name); void set_timeout(int timeout, int fatal); #endif /* !CTLD_H */ diff --git a/usr.sbin/ctld/kernel.c b/usr.sbin/ctld/kernel.c index dbbd7c35f2d9..7e5d2a386878 100644 --- a/usr.sbin/ctld/kernel.c +++ b/usr.sbin/ctld/kernel.c @@ -1,1374 +1,1375 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003, 2004 Silicon Graphics International Corp. * Copyright (c) 1997-2007 Kenneth D. Merry * Copyright (c) 2012 The FreeBSD Foundation * Copyright (c) 2017 Jakub Wojciech Klama * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ctld.h" #ifdef ICL_KERNEL_PROXY #include #endif #define NVLIST_BUFSIZE 1024 extern bool proxy_mode; static int ctl_fd = 0; void kernel_init(void) { int retval, saved_errno; ctl_fd = open(CTL_DEFAULT_DEV, O_RDWR); if (ctl_fd < 0 && errno == ENOENT) { saved_errno = errno; retval = kldload("ctl"); if (retval != -1) ctl_fd = open(CTL_DEFAULT_DEV, O_RDWR); else errno = saved_errno; } if (ctl_fd < 0) log_err(1, "failed to open %s", CTL_DEFAULT_DEV); #ifdef WANT_ISCSI else { saved_errno = errno; if (modfind("cfiscsi") == -1 && kldload("cfiscsi") == -1) log_warn("couldn't load cfiscsi"); errno = saved_errno; } #endif } /* * Name/value pair used for per-LUN attributes. */ struct cctl_lun_nv { char *name; char *value; STAILQ_ENTRY(cctl_lun_nv) links; }; /* * Backend LUN information. */ struct cctl_lun { uint64_t lun_id; char *backend_type; uint8_t device_type; uint64_t size_blocks; uint32_t blocksize; char *serial_number; char *device_id; char *ctld_name; STAILQ_HEAD(,cctl_lun_nv) attr_list; STAILQ_ENTRY(cctl_lun) links; }; struct cctl_port { uint32_t port_id; char *port_frontend; char *port_name; int pp; int vp; int cfiscsi_state; char *cfiscsi_target; uint16_t cfiscsi_portal_group_tag; char *ctld_portal_group_name; STAILQ_HEAD(,cctl_lun_nv) attr_list; STAILQ_ENTRY(cctl_port) links; }; struct cctl_devlist_data { int num_luns; STAILQ_HEAD(,cctl_lun) lun_list; struct cctl_lun *cur_lun; int num_ports; STAILQ_HEAD(,cctl_port) port_list; struct cctl_port *cur_port; int level; struct sbuf *cur_sb[32]; }; static void cctl_start_element(void *user_data, const char *name, const char **attr) { int i; struct cctl_devlist_data *devlist; struct cctl_lun *cur_lun; devlist = (struct cctl_devlist_data *)user_data; cur_lun = devlist->cur_lun; devlist->level++; if ((u_int)devlist->level >= (sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0]))) log_errx(1, "%s: too many nesting levels, %zd max", __func__, sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0])); devlist->cur_sb[devlist->level] = sbuf_new_auto(); if (devlist->cur_sb[devlist->level] == NULL) log_err(1, "%s: unable to allocate sbuf", __func__); if (strcmp(name, "lun") == 0) { if (cur_lun != NULL) log_errx(1, "%s: improper lun element nesting", __func__); cur_lun = calloc(1, sizeof(*cur_lun)); if (cur_lun == NULL) log_err(1, "%s: cannot allocate %zd bytes", __func__, sizeof(*cur_lun)); devlist->num_luns++; devlist->cur_lun = cur_lun; STAILQ_INIT(&cur_lun->attr_list); STAILQ_INSERT_TAIL(&devlist->lun_list, cur_lun, links); for (i = 0; attr[i] != NULL; i += 2) { if (strcmp(attr[i], "id") == 0) { cur_lun->lun_id = strtoull(attr[i+1], NULL, 0); } else { log_errx(1, "%s: invalid LUN attribute %s = %s", __func__, attr[i], attr[i+1]); } } } } static void cctl_end_element(void *user_data, const char *name) { struct cctl_devlist_data *devlist; struct cctl_lun *cur_lun; char *str; devlist = (struct cctl_devlist_data *)user_data; cur_lun = devlist->cur_lun; if ((cur_lun == NULL) && (strcmp(name, "ctllunlist") != 0)) log_errx(1, "%s: cur_lun == NULL! (name = %s)", __func__, name); if (devlist->cur_sb[devlist->level] == NULL) log_errx(1, "%s: no valid sbuf at level %d (name %s)", __func__, devlist->level, name); sbuf_finish(devlist->cur_sb[devlist->level]); str = checked_strdup(sbuf_data(devlist->cur_sb[devlist->level])); if (strlen(str) == 0) { free(str); str = NULL; } sbuf_delete(devlist->cur_sb[devlist->level]); devlist->cur_sb[devlist->level] = NULL; devlist->level--; if (strcmp(name, "backend_type") == 0) { cur_lun->backend_type = str; str = NULL; } else if (strcmp(name, "lun_type") == 0) { if (str == NULL) log_errx(1, "%s: %s missing its argument", __func__, name); cur_lun->device_type = strtoull(str, NULL, 0); } else if (strcmp(name, "size") == 0) { if (str == NULL) log_errx(1, "%s: %s missing its argument", __func__, name); cur_lun->size_blocks = strtoull(str, NULL, 0); } else if (strcmp(name, "blocksize") == 0) { if (str == NULL) log_errx(1, "%s: %s missing its argument", __func__, name); cur_lun->blocksize = strtoul(str, NULL, 0); } else if (strcmp(name, "serial_number") == 0) { cur_lun->serial_number = str; str = NULL; } else if (strcmp(name, "device_id") == 0) { cur_lun->device_id = str; str = NULL; } else if (strcmp(name, "ctld_name") == 0) { cur_lun->ctld_name = str; str = NULL; } else if (strcmp(name, "lun") == 0) { devlist->cur_lun = NULL; } else if (strcmp(name, "ctllunlist") == 0) { /* Nothing. */ } else { struct cctl_lun_nv *nv; nv = calloc(1, sizeof(*nv)); if (nv == NULL) log_err(1, "%s: can't allocate %zd bytes for nv pair", __func__, sizeof(*nv)); nv->name = checked_strdup(name); nv->value = str; str = NULL; STAILQ_INSERT_TAIL(&cur_lun->attr_list, nv, links); } free(str); } static void cctl_start_pelement(void *user_data, const char *name, const char **attr) { int i; struct cctl_devlist_data *devlist; struct cctl_port *cur_port; devlist = (struct cctl_devlist_data *)user_data; cur_port = devlist->cur_port; devlist->level++; if ((u_int)devlist->level >= (sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0]))) log_errx(1, "%s: too many nesting levels, %zd max", __func__, sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0])); devlist->cur_sb[devlist->level] = sbuf_new_auto(); if (devlist->cur_sb[devlist->level] == NULL) log_err(1, "%s: unable to allocate sbuf", __func__); if (strcmp(name, "targ_port") == 0) { if (cur_port != NULL) log_errx(1, "%s: improper port element nesting (%s)", __func__, name); cur_port = calloc(1, sizeof(*cur_port)); if (cur_port == NULL) log_err(1, "%s: cannot allocate %zd bytes", __func__, sizeof(*cur_port)); devlist->num_ports++; devlist->cur_port = cur_port; STAILQ_INIT(&cur_port->attr_list); STAILQ_INSERT_TAIL(&devlist->port_list, cur_port, links); for (i = 0; attr[i] != NULL; i += 2) { if (strcmp(attr[i], "id") == 0) { cur_port->port_id = strtoul(attr[i+1], NULL, 0); } else { log_errx(1, "%s: invalid LUN attribute %s = %s", __func__, attr[i], attr[i+1]); } } } } static void cctl_end_pelement(void *user_data, const char *name) { struct cctl_devlist_data *devlist; struct cctl_port *cur_port; char *str; devlist = (struct cctl_devlist_data *)user_data; cur_port = devlist->cur_port; if ((cur_port == NULL) && (strcmp(name, "ctlportlist") != 0)) log_errx(1, "%s: cur_port == NULL! (name = %s)", __func__, name); if (devlist->cur_sb[devlist->level] == NULL) log_errx(1, "%s: no valid sbuf at level %d (name %s)", __func__, devlist->level, name); sbuf_finish(devlist->cur_sb[devlist->level]); str = checked_strdup(sbuf_data(devlist->cur_sb[devlist->level])); if (strlen(str) == 0) { free(str); str = NULL; } sbuf_delete(devlist->cur_sb[devlist->level]); devlist->cur_sb[devlist->level] = NULL; devlist->level--; if (strcmp(name, "frontend_type") == 0) { cur_port->port_frontend = str; str = NULL; } else if (strcmp(name, "port_name") == 0) { cur_port->port_name = str; str = NULL; } else if (strcmp(name, "physical_port") == 0) { if (str == NULL) log_errx(1, "%s: %s missing its argument", __func__, name); cur_port->pp = strtoul(str, NULL, 0); } else if (strcmp(name, "virtual_port") == 0) { if (str == NULL) log_errx(1, "%s: %s missing its argument", __func__, name); cur_port->vp = strtoul(str, NULL, 0); } else if (strcmp(name, "cfiscsi_target") == 0) { cur_port->cfiscsi_target = str; str = NULL; } else if (strcmp(name, "cfiscsi_state") == 0) { if (str == NULL) log_errx(1, "%s: %s missing its argument", __func__, name); cur_port->cfiscsi_state = strtoul(str, NULL, 0); } else if (strcmp(name, "cfiscsi_portal_group_tag") == 0) { if (str == NULL) log_errx(1, "%s: %s missing its argument", __func__, name); cur_port->cfiscsi_portal_group_tag = strtoul(str, NULL, 0); } else if (strcmp(name, "ctld_portal_group_name") == 0) { cur_port->ctld_portal_group_name = str; str = NULL; } else if (strcmp(name, "targ_port") == 0) { devlist->cur_port = NULL; } else if (strcmp(name, "ctlportlist") == 0) { /* Nothing. */ } else { struct cctl_lun_nv *nv; nv = calloc(1, sizeof(*nv)); if (nv == NULL) log_err(1, "%s: can't allocate %zd bytes for nv pair", __func__, sizeof(*nv)); nv->name = checked_strdup(name); nv->value = str; str = NULL; STAILQ_INSERT_TAIL(&cur_port->attr_list, nv, links); } free(str); } static void cctl_char_handler(void *user_data, const XML_Char *str, int len) { struct cctl_devlist_data *devlist; devlist = (struct cctl_devlist_data *)user_data; sbuf_bcat(devlist->cur_sb[devlist->level], str, len); } struct conf * conf_new_from_kernel(void) { struct conf *conf = NULL; struct target *targ; struct portal_group *pg; struct pport *pp; struct port *cp; struct lun *cl; struct option *o; struct ctl_lun_list list; struct cctl_devlist_data devlist; struct cctl_lun *lun; struct cctl_port *port; XML_Parser parser; char *str, *name; int len, retval; bzero(&devlist, sizeof(devlist)); STAILQ_INIT(&devlist.lun_list); STAILQ_INIT(&devlist.port_list); log_debugx("obtaining previously configured CTL luns from the kernel"); str = NULL; len = 4096; retry: str = realloc(str, len); if (str == NULL) log_err(1, "realloc"); bzero(&list, sizeof(list)); list.alloc_len = len; list.status = CTL_LUN_LIST_NONE; list.lun_xml = str; if (ioctl(ctl_fd, CTL_LUN_LIST, &list) == -1) { log_warn("error issuing CTL_LUN_LIST ioctl"); free(str); return (NULL); } if (list.status == CTL_LUN_LIST_ERROR) { log_warnx("error returned from CTL_LUN_LIST ioctl: %s", list.error_str); free(str); return (NULL); } if (list.status == CTL_LUN_LIST_NEED_MORE_SPACE) { len = len << 1; goto retry; } parser = XML_ParserCreate(NULL); if (parser == NULL) { log_warnx("unable to create XML parser"); free(str); return (NULL); } XML_SetUserData(parser, &devlist); XML_SetElementHandler(parser, cctl_start_element, cctl_end_element); XML_SetCharacterDataHandler(parser, cctl_char_handler); retval = XML_Parse(parser, str, strlen(str), 1); XML_ParserFree(parser); free(str); if (retval != 1) { log_warnx("XML_Parse failed"); return (NULL); } str = NULL; len = 4096; retry_port: str = realloc(str, len); if (str == NULL) log_err(1, "realloc"); bzero(&list, sizeof(list)); list.alloc_len = len; list.status = CTL_LUN_LIST_NONE; list.lun_xml = str; if (ioctl(ctl_fd, CTL_PORT_LIST, &list) == -1) { log_warn("error issuing CTL_PORT_LIST ioctl"); free(str); return (NULL); } if (list.status == CTL_LUN_LIST_ERROR) { log_warnx("error returned from CTL_PORT_LIST ioctl: %s", list.error_str); free(str); return (NULL); } if (list.status == CTL_LUN_LIST_NEED_MORE_SPACE) { len = len << 1; goto retry_port; } parser = XML_ParserCreate(NULL); if (parser == NULL) { log_warnx("unable to create XML parser"); free(str); return (NULL); } XML_SetUserData(parser, &devlist); XML_SetElementHandler(parser, cctl_start_pelement, cctl_end_pelement); XML_SetCharacterDataHandler(parser, cctl_char_handler); retval = XML_Parse(parser, str, strlen(str), 1); XML_ParserFree(parser); free(str); if (retval != 1) { log_warnx("XML_Parse failed"); return (NULL); } conf = conf_new(); name = NULL; STAILQ_FOREACH(port, &devlist.port_list, links) { if (strcmp(port->port_frontend, "ha") == 0) continue; free(name); if (port->pp == 0 && port->vp == 0) { name = checked_strdup(port->port_name); } else if (port->vp == 0) { retval = asprintf(&name, "%s/%d", port->port_name, port->pp); if (retval <= 0) log_err(1, "asprintf"); } else { retval = asprintf(&name, "%s/%d/%d", port->port_name, port->pp, port->vp); if (retval <= 0) log_err(1, "asprintf"); } if (port->cfiscsi_target == NULL) { log_debugx("CTL port %u \"%s\" wasn't managed by ctld; ", port->port_id, name); pp = pport_find(conf, name); if (pp == NULL) { #if 0 log_debugx("found new kernel port %u \"%s\"", port->port_id, name); #endif pp = pport_new(conf, name, port->port_id); if (pp == NULL) { log_warnx("pport_new failed"); continue; } } continue; } if (port->cfiscsi_state != 1) { log_debugx("CTL port %ju is not active (%d); ignoring", (uintmax_t)port->port_id, port->cfiscsi_state); continue; } targ = target_find(conf, port->cfiscsi_target); if (targ == NULL) { #if 0 log_debugx("found new kernel target %s for CTL port %ld", port->cfiscsi_target, port->port_id); #endif targ = target_new(conf, port->cfiscsi_target); if (targ == NULL) { log_warnx("target_new failed"); continue; } } if (port->ctld_portal_group_name == NULL) continue; pg = portal_group_find(conf, port->ctld_portal_group_name); if (pg == NULL) { #if 0 log_debugx("found new kernel portal group %s for CTL port %ld", port->ctld_portal_group_name, port->port_id); #endif pg = portal_group_new(conf, port->ctld_portal_group_name); if (pg == NULL) { log_warnx("portal_group_new failed"); continue; } } pg->pg_tag = port->cfiscsi_portal_group_tag; cp = port_new(conf, targ, pg); if (cp == NULL) { log_warnx("port_new failed"); continue; } cp->p_ctl_port = port->port_id; } free(name); STAILQ_FOREACH(lun, &devlist.lun_list, links) { struct cctl_lun_nv *nv; if (lun->ctld_name == NULL) { log_debugx("CTL lun %ju wasn't managed by ctld; " "ignoring", (uintmax_t)lun->lun_id); continue; } cl = lun_find(conf, lun->ctld_name); if (cl != NULL) { log_warnx("found CTL lun %ju \"%s\", " "also backed by CTL lun %d; ignoring", (uintmax_t)lun->lun_id, lun->ctld_name, cl->l_ctl_lun); continue; } log_debugx("found CTL lun %ju \"%s\"", (uintmax_t)lun->lun_id, lun->ctld_name); cl = lun_new(conf, lun->ctld_name); if (cl == NULL) { log_warnx("lun_new failed"); continue; } lun_set_backend(cl, lun->backend_type); lun_set_device_type(cl, lun->device_type); lun_set_blocksize(cl, lun->blocksize); lun_set_device_id(cl, lun->device_id); lun_set_serial(cl, lun->serial_number); lun_set_size(cl, lun->size_blocks * cl->l_blocksize); lun_set_ctl_lun(cl, lun->lun_id); STAILQ_FOREACH(nv, &lun->attr_list, links) { if (strcmp(nv->name, "file") == 0 || strcmp(nv->name, "dev") == 0) { lun_set_path(cl, nv->value); continue; } o = option_new(&cl->l_options, nv->name, nv->value); if (o == NULL) log_warnx("unable to add CTL lun option %s " "for CTL lun %ju \"%s\"", nv->name, (uintmax_t) lun->lun_id, cl->l_name); } } return (conf); } int kernel_lun_add(struct lun *lun) { struct option *o; struct ctl_lun_req req; int error; bzero(&req, sizeof(req)); strlcpy(req.backend, lun->l_backend, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_CREATE; req.reqdata.create.blocksize_bytes = lun->l_blocksize; if (lun->l_size != 0) req.reqdata.create.lun_size_bytes = lun->l_size; if (lun->l_ctl_lun >= 0) { req.reqdata.create.req_lun_id = lun->l_ctl_lun; req.reqdata.create.flags |= CTL_LUN_FLAG_ID_REQ; } req.reqdata.create.flags |= CTL_LUN_FLAG_DEV_TYPE; req.reqdata.create.device_type = lun->l_device_type; if (lun->l_serial != NULL) { strncpy(req.reqdata.create.serial_num, lun->l_serial, sizeof(req.reqdata.create.serial_num)); req.reqdata.create.flags |= CTL_LUN_FLAG_SERIAL_NUM; } if (lun->l_device_id != NULL) { strncpy(req.reqdata.create.device_id, lun->l_device_id, sizeof(req.reqdata.create.device_id)); req.reqdata.create.flags |= CTL_LUN_FLAG_DEVID; } if (lun->l_path != NULL) { o = option_find(&lun->l_options, "file"); if (o != NULL) { option_set(o, lun->l_path); } else { o = option_new(&lun->l_options, "file", lun->l_path); assert(o != NULL); } } o = option_find(&lun->l_options, "ctld_name"); if (o != NULL) { option_set(o, lun->l_name); } else { o = option_new(&lun->l_options, "ctld_name", lun->l_name); assert(o != NULL); } o = option_find(&lun->l_options, "scsiname"); if (o == NULL && lun->l_scsiname != NULL) { o = option_new(&lun->l_options, "scsiname", lun->l_scsiname); assert(o != NULL); } if (!TAILQ_EMPTY(&lun->l_options)) { req.args_nvl = nvlist_create(0); if (req.args_nvl == NULL) { log_warn("error allocating nvlist"); return (1); } TAILQ_FOREACH(o, &lun->l_options, o_next) nvlist_add_string(req.args_nvl, o->o_name, o->o_value); req.args = nvlist_pack(req.args_nvl, &req.args_len); if (req.args == NULL) { log_warn("error packing nvlist"); return (1); } } error = ioctl(ctl_fd, CTL_LUN_REQ, &req); nvlist_destroy(req.args_nvl); if (error != 0) { log_warn("error issuing CTL_LUN_REQ ioctl"); return (1); } switch (req.status) { case CTL_LUN_ERROR: log_warnx("LUN creation error: %s", req.error_str); return (1); case CTL_LUN_WARNING: log_warnx("LUN creation warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: log_warnx("unknown LUN creation status: %d", req.status); return (1); } lun_set_ctl_lun(lun, req.reqdata.create.req_lun_id); return (0); } int kernel_lun_modify(struct lun *lun) { struct option *o; struct ctl_lun_req req; int error; bzero(&req, sizeof(req)); strlcpy(req.backend, lun->l_backend, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_MODIFY; req.reqdata.modify.lun_id = lun->l_ctl_lun; req.reqdata.modify.lun_size_bytes = lun->l_size; if (lun->l_path != NULL) { o = option_find(&lun->l_options, "file"); if (o != NULL) { option_set(o, lun->l_path); } else { o = option_new(&lun->l_options, "file", lun->l_path); assert(o != NULL); } } o = option_find(&lun->l_options, "ctld_name"); if (o != NULL) { option_set(o, lun->l_name); } else { o = option_new(&lun->l_options, "ctld_name", lun->l_name); assert(o != NULL); } o = option_find(&lun->l_options, "scsiname"); if (o == NULL && lun->l_scsiname != NULL) { o = option_new(&lun->l_options, "scsiname", lun->l_scsiname); assert(o != NULL); } if (!TAILQ_EMPTY(&lun->l_options)) { req.args_nvl = nvlist_create(0); if (req.args_nvl == NULL) { log_warn("error allocating nvlist"); return (1); } TAILQ_FOREACH(o, &lun->l_options, o_next) nvlist_add_string(req.args_nvl, o->o_name, o->o_value); req.args = nvlist_pack(req.args_nvl, &req.args_len); if (req.args == NULL) { log_warn("error packing nvlist"); return (1); } } error = ioctl(ctl_fd, CTL_LUN_REQ, &req); nvlist_destroy(req.args_nvl); if (error != 0) { log_warn("error issuing CTL_LUN_REQ ioctl"); return (1); } switch (req.status) { case CTL_LUN_ERROR: log_warnx("LUN modification error: %s", req.error_str); return (1); case CTL_LUN_WARNING: log_warnx("LUN modification warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: log_warnx("unknown LUN modification status: %d", req.status); return (1); } return (0); } int kernel_lun_remove(struct lun *lun) { struct ctl_lun_req req; bzero(&req, sizeof(req)); strlcpy(req.backend, lun->l_backend, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_RM; req.reqdata.rm.lun_id = lun->l_ctl_lun; if (ioctl(ctl_fd, CTL_LUN_REQ, &req) == -1) { log_warn("error issuing CTL_LUN_REQ ioctl"); return (1); } switch (req.status) { case CTL_LUN_ERROR: log_warnx("LUN removal error: %s", req.error_str); return (1); case CTL_LUN_WARNING: log_warnx("LUN removal warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: log_warnx("unknown LUN removal status: %d", req.status); return (1); } return (0); } void kernel_handoff(struct ctld_connection *conn) { struct ctl_iscsi req; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_HANDOFF; strlcpy(req.data.handoff.initiator_name, conn->conn_initiator_name, sizeof(req.data.handoff.initiator_name)); strlcpy(req.data.handoff.initiator_addr, conn->conn_initiator_addr, sizeof(req.data.handoff.initiator_addr)); if (conn->conn_initiator_alias != NULL) { strlcpy(req.data.handoff.initiator_alias, conn->conn_initiator_alias, sizeof(req.data.handoff.initiator_alias)); } memcpy(req.data.handoff.initiator_isid, conn->conn_initiator_isid, sizeof(req.data.handoff.initiator_isid)); strlcpy(req.data.handoff.target_name, conn->conn_target->t_name, sizeof(req.data.handoff.target_name)); if (conn->conn_portal->p_portal_group->pg_offload != NULL) { strlcpy(req.data.handoff.offload, conn->conn_portal->p_portal_group->pg_offload, sizeof(req.data.handoff.offload)); } #ifdef ICL_KERNEL_PROXY if (proxy_mode) req.data.handoff.connection_id = conn->conn.conn_socket; else req.data.handoff.socket = conn->conn.conn_socket; #else req.data.handoff.socket = conn->conn.conn_socket; #endif req.data.handoff.portal_group_tag = conn->conn_portal->p_portal_group->pg_tag; if (conn->conn.conn_header_digest == CONN_DIGEST_CRC32C) req.data.handoff.header_digest = CTL_ISCSI_DIGEST_CRC32C; if (conn->conn.conn_data_digest == CONN_DIGEST_CRC32C) req.data.handoff.data_digest = CTL_ISCSI_DIGEST_CRC32C; req.data.handoff.cmdsn = conn->conn.conn_cmdsn; req.data.handoff.statsn = conn->conn.conn_statsn; req.data.handoff.max_recv_data_segment_length = conn->conn.conn_max_recv_data_segment_length; req.data.handoff.max_send_data_segment_length = conn->conn.conn_max_send_data_segment_length; req.data.handoff.max_burst_length = conn->conn.conn_max_burst_length; req.data.handoff.first_burst_length = conn->conn.conn_first_burst_length; req.data.handoff.immediate_data = conn->conn.conn_immediate_data; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI handoff request: " "%s; dropping connection", req.error_str); } } void -kernel_limits(const char *offload, int *max_recv_dsl, int *max_send_dsl, +kernel_limits(const char *offload, int s, int *max_recv_dsl, int *max_send_dsl, int *max_burst_length, int *first_burst_length) { struct ctl_iscsi req; struct ctl_iscsi_limits_params *cilp; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_LIMITS; cilp = (struct ctl_iscsi_limits_params *)&(req.data.limits); if (offload != NULL) { strlcpy(cilp->offload, offload, sizeof(cilp->offload)); } + cilp->socket = s; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI limits request: " "%s; dropping connection", req.error_str); } if (cilp->max_recv_data_segment_length != 0) { *max_recv_dsl = cilp->max_recv_data_segment_length; *max_send_dsl = cilp->max_recv_data_segment_length; } if (cilp->max_send_data_segment_length != 0) *max_send_dsl = cilp->max_send_data_segment_length; if (cilp->max_burst_length != 0) *max_burst_length = cilp->max_burst_length; if (cilp->first_burst_length != 0) *first_burst_length = cilp->first_burst_length; if (*max_burst_length < *first_burst_length) *first_burst_length = *max_burst_length; if (offload != NULL) { log_debugx("Kernel limits for offload \"%s\" are " "MaxRecvDataSegment=%d, max_send_dsl=%d, " "MaxBurstLength=%d, FirstBurstLength=%d", offload, *max_recv_dsl, *max_send_dsl, *max_burst_length, *first_burst_length); } else { log_debugx("Kernel limits are " "MaxRecvDataSegment=%d, max_send_dsl=%d, " "MaxBurstLength=%d, FirstBurstLength=%d", *max_recv_dsl, *max_send_dsl, *max_burst_length, *first_burst_length); } } int kernel_port_add(struct port *port) { struct option *o; struct ctl_port_entry entry; struct ctl_req req; struct ctl_lun_map lm; struct target *targ = port->p_target; struct portal_group *pg = port->p_portal_group; char result_buf[NVLIST_BUFSIZE]; int error, i; /* Create iSCSI port. */ if (port->p_portal_group || port->p_ioctl_port) { bzero(&req, sizeof(req)); req.reqtype = CTL_REQ_CREATE; if (port->p_portal_group) { strlcpy(req.driver, "iscsi", sizeof(req.driver)); req.args_nvl = nvlist_create(0); nvlist_add_string(req.args_nvl, "cfiscsi_target", targ->t_name); nvlist_add_string(req.args_nvl, "ctld_portal_group_name", pg->pg_name); nvlist_add_stringf(req.args_nvl, "cfiscsi_portal_group_tag", "%u", pg->pg_tag); if (targ->t_alias) { nvlist_add_string(req.args_nvl, "cfiscsi_target_alias", targ->t_alias); } TAILQ_FOREACH(o, &pg->pg_options, o_next) nvlist_add_string(req.args_nvl, o->o_name, o->o_value); } if (port->p_ioctl_port) { strlcpy(req.driver, "ioctl", sizeof(req.driver)); req.args_nvl = nvlist_create(0); nvlist_add_stringf(req.args_nvl, "pp", "%d", port->p_ioctl_pp); nvlist_add_stringf(req.args_nvl, "vp", "%d", port->p_ioctl_vp); } req.args = nvlist_pack(req.args_nvl, &req.args_len); if (req.args == NULL) { log_warn("error packing nvlist"); return (1); } req.result = result_buf; req.result_len = sizeof(result_buf); error = ioctl(ctl_fd, CTL_PORT_REQ, &req); nvlist_destroy(req.args_nvl); if (error != 0) { log_warn("error issuing CTL_PORT_REQ ioctl"); return (1); } if (req.status == CTL_LUN_ERROR) { log_warnx("error returned from port creation request: %s", req.error_str); return (1); } if (req.status != CTL_LUN_OK) { log_warnx("unknown port creation request status %d", req.status); return (1); } req.result_nvl = nvlist_unpack(result_buf, req.result_len, 0); if (req.result_nvl == NULL) { log_warnx("error unpacking result nvlist"); return (1); } port->p_ctl_port = nvlist_get_number(req.result_nvl, "port_id"); nvlist_destroy(req.result_nvl); } else if (port->p_pport) { port->p_ctl_port = port->p_pport->pp_ctl_port; if (strncmp(targ->t_name, "naa.", 4) == 0 && strlen(targ->t_name) == 20) { bzero(&entry, sizeof(entry)); entry.port_type = CTL_PORT_NONE; entry.targ_port = port->p_ctl_port; entry.flags |= CTL_PORT_WWNN_VALID; entry.wwnn = strtoull(targ->t_name + 4, NULL, 16); if (ioctl(ctl_fd, CTL_SET_PORT_WWNS, &entry) == -1) log_warn("CTL_SET_PORT_WWNS ioctl failed"); } } /* Explicitly enable mapping to block any access except allowed. */ lm.port = port->p_ctl_port; lm.plun = UINT32_MAX; lm.lun = 0; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); /* Map configured LUNs */ for (i = 0; i < MAX_LUNS; i++) { if (targ->t_luns[i] == NULL) continue; lm.port = port->p_ctl_port; lm.plun = i; lm.lun = targ->t_luns[i]->l_ctl_lun; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); } /* Enable port */ bzero(&entry, sizeof(entry)); entry.targ_port = port->p_ctl_port; error = ioctl(ctl_fd, CTL_ENABLE_PORT, &entry); if (error != 0) { log_warn("CTL_ENABLE_PORT ioctl failed"); return (-1); } return (0); } int kernel_port_update(struct port *port, struct port *oport) { struct ctl_lun_map lm; struct target *targ = port->p_target; struct target *otarg = oport->p_target; int error, i; uint32_t olun; /* Map configured LUNs and unmap others */ for (i = 0; i < MAX_LUNS; i++) { lm.port = port->p_ctl_port; lm.plun = i; if (targ->t_luns[i] == NULL) lm.lun = UINT32_MAX; else lm.lun = targ->t_luns[i]->l_ctl_lun; if (otarg->t_luns[i] == NULL) olun = UINT32_MAX; else olun = otarg->t_luns[i]->l_ctl_lun; if (lm.lun == olun) continue; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); } return (0); } int kernel_port_remove(struct port *port) { struct ctl_port_entry entry; struct ctl_lun_map lm; struct ctl_req req; struct target *targ = port->p_target; struct portal_group *pg = port->p_portal_group; int error; /* Disable port */ bzero(&entry, sizeof(entry)); entry.targ_port = port->p_ctl_port; error = ioctl(ctl_fd, CTL_DISABLE_PORT, &entry); if (error != 0) { log_warn("CTL_DISABLE_PORT ioctl failed"); return (-1); } /* Remove iSCSI or ioctl port. */ if (port->p_portal_group || port->p_ioctl_port) { bzero(&req, sizeof(req)); strlcpy(req.driver, port->p_ioctl_port ? "ioctl" : "iscsi", sizeof(req.driver)); req.reqtype = CTL_REQ_REMOVE; req.args_nvl = nvlist_create(0); if (req.args_nvl == NULL) log_err(1, "nvlist_create"); if (port->p_ioctl_port) nvlist_add_stringf(req.args_nvl, "port_id", "%d", port->p_ctl_port); else { nvlist_add_string(req.args_nvl, "cfiscsi_target", targ->t_name); nvlist_add_stringf(req.args_nvl, "cfiscsi_portal_group_tag", "%u", pg->pg_tag); } req.args = nvlist_pack(req.args_nvl, &req.args_len); if (req.args == NULL) { log_warn("error packing nvlist"); return (1); } error = ioctl(ctl_fd, CTL_PORT_REQ, &req); nvlist_destroy(req.args_nvl); if (error != 0) { log_warn("error issuing CTL_PORT_REQ ioctl"); return (1); } if (req.status == CTL_LUN_ERROR) { log_warnx("error returned from port removal request: %s", req.error_str); return (1); } if (req.status != CTL_LUN_OK) { log_warnx("unknown port removal request status %d", req.status); return (1); } } else { /* Disable LUN mapping. */ lm.port = port->p_ctl_port; lm.plun = UINT32_MAX; lm.lun = UINT32_MAX; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); } return (0); } #ifdef ICL_KERNEL_PROXY void kernel_listen(struct addrinfo *ai, bool iser, int portal_id) { struct ctl_iscsi req; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_LISTEN; req.data.listen.iser = iser; req.data.listen.domain = ai->ai_family; req.data.listen.socktype = ai->ai_socktype; req.data.listen.protocol = ai->ai_protocol; req.data.listen.addr = ai->ai_addr; req.data.listen.addrlen = ai->ai_addrlen; req.data.listen.portal_id = portal_id; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) log_err(1, "error issuing CTL_ISCSI ioctl"); if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI listen: %s", req.error_str); } } void kernel_accept(int *connection_id, int *portal_id, struct sockaddr *client_sa, socklen_t *client_salen) { struct ctl_iscsi req; struct sockaddr_storage ss; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_ACCEPT; req.data.accept.initiator_addr = (struct sockaddr *)&ss; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) log_err(1, "error issuing CTL_ISCSI ioctl"); if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI accept: %s", req.error_str); } *connection_id = req.data.accept.connection_id; *portal_id = req.data.accept.portal_id; *client_salen = req.data.accept.initiator_addrlen; memcpy(client_sa, &ss, *client_salen); } void kernel_send(struct pdu *pdu) { struct ctl_iscsi req; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_SEND; req.data.send.connection_id = pdu->pdu_connection->conn_socket; req.data.send.bhs = pdu->pdu_bhs; req.data.send.data_segment_len = pdu->pdu_data_len; req.data.send.data_segment = pdu->pdu_data; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI send: " "%s; dropping connection", req.error_str); } } void kernel_receive(struct pdu *pdu) { struct connection *conn; struct ctl_iscsi req; conn = pdu->pdu_connection; pdu->pdu_data = malloc(conn->conn_max_recv_data_segment_length); if (pdu->pdu_data == NULL) log_err(1, "malloc"); bzero(&req, sizeof(req)); req.type = CTL_ISCSI_RECEIVE; req.data.receive.connection_id = conn->conn_socket; req.data.receive.bhs = pdu->pdu_bhs; req.data.receive.data_segment_len = conn->conn_max_recv_data_segment_length; req.data.receive.data_segment = pdu->pdu_data; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI receive: " "%s; dropping connection", req.error_str); } } #endif /* ICL_KERNEL_PROXY */ /* * XXX: I CANT INTO LATIN */ void kernel_capsicate(void) { cap_rights_t rights; const unsigned long cmds[] = { CTL_ISCSI }; cap_rights_init(&rights, CAP_IOCTL); if (caph_rights_limit(ctl_fd, &rights) < 0) log_err(1, "cap_rights_limit"); if (caph_ioctls_limit(ctl_fd, cmds, nitems(cmds)) < 0) log_err(1, "cap_ioctls_limit"); if (caph_enter() < 0) log_err(1, "cap_enter"); if (cap_sandboxed()) log_debugx("Capsicum capability mode enabled"); else log_warnx("Capsicum capability mode not supported"); } diff --git a/usr.sbin/ctld/login.c b/usr.sbin/ctld/login.c index 19fab3bc494c..c52733f7894a 100644 --- a/usr.sbin/ctld/login.c +++ b/usr.sbin/ctld/login.c @@ -1,1060 +1,1061 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "ctld.h" #include "iscsi_proto.h" static void login_send_error(struct pdu *request, char class, char detail); static void login_set_nsg(struct pdu *response, int nsg) { struct iscsi_bhs_login_response *bhslr; assert(nsg == BHSLR_STAGE_SECURITY_NEGOTIATION || nsg == BHSLR_STAGE_OPERATIONAL_NEGOTIATION || nsg == BHSLR_STAGE_FULL_FEATURE_PHASE); bhslr = (struct iscsi_bhs_login_response *)response->pdu_bhs; bhslr->bhslr_flags &= 0xFC; bhslr->bhslr_flags |= nsg; bhslr->bhslr_flags |= BHSLR_FLAGS_TRANSIT; } static int login_csg(const struct pdu *request) { struct iscsi_bhs_login_request *bhslr; bhslr = (struct iscsi_bhs_login_request *)request->pdu_bhs; return ((bhslr->bhslr_flags & 0x0C) >> 2); } static void login_set_csg(struct pdu *response, int csg) { struct iscsi_bhs_login_response *bhslr; assert(csg == BHSLR_STAGE_SECURITY_NEGOTIATION || csg == BHSLR_STAGE_OPERATIONAL_NEGOTIATION || csg == BHSLR_STAGE_FULL_FEATURE_PHASE); bhslr = (struct iscsi_bhs_login_response *)response->pdu_bhs; bhslr->bhslr_flags &= 0xF3; bhslr->bhslr_flags |= csg << 2; } static struct pdu * login_receive(struct connection *conn, bool initial) { struct pdu *request; struct iscsi_bhs_login_request *bhslr; request = pdu_new(conn); pdu_receive(request); if ((request->pdu_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) != ISCSI_BHS_OPCODE_LOGIN_REQUEST) { /* * The first PDU in session is special - if we receive any PDU * different than login request, we have to drop the connection * without sending response ("A target receiving any PDU * except a Login request before the Login Phase is started MUST * immediately terminate the connection on which the PDU * was received.") */ if (initial == false) login_send_error(request, 0x02, 0x0b); log_errx(1, "protocol error: received invalid opcode 0x%x", request->pdu_bhs->bhs_opcode); } bhslr = (struct iscsi_bhs_login_request *)request->pdu_bhs; /* * XXX: Implement the C flag some day. */ if ((bhslr->bhslr_flags & BHSLR_FLAGS_CONTINUE) != 0) { login_send_error(request, 0x03, 0x00); log_errx(1, "received Login PDU with unsupported \"C\" flag"); } if (bhslr->bhslr_version_max != 0x00) { login_send_error(request, 0x02, 0x05); log_errx(1, "received Login PDU with unsupported " "Version-max 0x%x", bhslr->bhslr_version_max); } if (bhslr->bhslr_version_min != 0x00) { login_send_error(request, 0x02, 0x05); log_errx(1, "received Login PDU with unsupported " "Version-min 0x%x", bhslr->bhslr_version_min); } if (initial == false && ISCSI_SNLT(ntohl(bhslr->bhslr_cmdsn), conn->conn_cmdsn)) { login_send_error(request, 0x02, 0x00); log_errx(1, "received Login PDU with decreasing CmdSN: " "was %u, is %u", conn->conn_cmdsn, ntohl(bhslr->bhslr_cmdsn)); } if (initial == false && ntohl(bhslr->bhslr_expstatsn) != conn->conn_statsn) { login_send_error(request, 0x02, 0x00); log_errx(1, "received Login PDU with wrong ExpStatSN: " "is %u, should be %u", ntohl(bhslr->bhslr_expstatsn), conn->conn_statsn); } conn->conn_cmdsn = ntohl(bhslr->bhslr_cmdsn); return (request); } static struct pdu * login_new_response(struct pdu *request) { struct pdu *response; struct connection *conn; struct iscsi_bhs_login_request *bhslr; struct iscsi_bhs_login_response *bhslr2; bhslr = (struct iscsi_bhs_login_request *)request->pdu_bhs; conn = request->pdu_connection; response = pdu_new_response(request); bhslr2 = (struct iscsi_bhs_login_response *)response->pdu_bhs; bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGIN_RESPONSE; login_set_csg(response, BHSLR_STAGE_SECURITY_NEGOTIATION); memcpy(bhslr2->bhslr_isid, bhslr->bhslr_isid, sizeof(bhslr2->bhslr_isid)); bhslr2->bhslr_initiator_task_tag = bhslr->bhslr_initiator_task_tag; bhslr2->bhslr_statsn = htonl(conn->conn_statsn++); bhslr2->bhslr_expcmdsn = htonl(conn->conn_cmdsn); bhslr2->bhslr_maxcmdsn = htonl(conn->conn_cmdsn); return (response); } static void login_send_error(struct pdu *request, char class, char detail) { struct pdu *response; struct iscsi_bhs_login_response *bhslr2; log_debugx("sending Login Response PDU with failure class 0x%x/0x%x; " "see next line for reason", class, detail); response = login_new_response(request); bhslr2 = (struct iscsi_bhs_login_response *)response->pdu_bhs; bhslr2->bhslr_status_class = class; bhslr2->bhslr_status_detail = detail; pdu_send(response); pdu_delete(response); } static int login_list_contains(const char *list, const char *what) { char *tofree, *str, *token; tofree = str = checked_strdup(list); while ((token = strsep(&str, ",")) != NULL) { if (strcmp(token, what) == 0) { free(tofree); return (1); } } free(tofree); return (0); } static int login_list_prefers(const char *list, const char *choice1, const char *choice2) { char *tofree, *str, *token; tofree = str = checked_strdup(list); while ((token = strsep(&str, ",")) != NULL) { if (strcmp(token, choice1) == 0) { free(tofree); return (1); } if (strcmp(token, choice2) == 0) { free(tofree); return (2); } } free(tofree); return (-1); } static struct pdu * login_receive_chap_a(struct connection *conn) { struct pdu *request; struct keys *request_keys; const char *chap_a; request = login_receive(conn, false); request_keys = keys_new(); keys_load_pdu(request_keys, request); chap_a = keys_find(request_keys, "CHAP_A"); if (chap_a == NULL) { login_send_error(request, 0x02, 0x07); log_errx(1, "received CHAP Login PDU without CHAP_A"); } if (login_list_contains(chap_a, "5") == 0) { login_send_error(request, 0x02, 0x01); log_errx(1, "received CHAP Login PDU with unsupported CHAP_A " "\"%s\"", chap_a); } keys_delete(request_keys); return (request); } static void login_send_chap_c(struct pdu *request, struct chap *chap) { struct pdu *response; struct keys *response_keys; char *chap_c, *chap_i; chap_c = chap_get_challenge(chap); chap_i = chap_get_id(chap); response = login_new_response(request); response_keys = keys_new(); keys_add(response_keys, "CHAP_A", "5"); keys_add(response_keys, "CHAP_I", chap_i); keys_add(response_keys, "CHAP_C", chap_c); free(chap_i); free(chap_c); keys_save_pdu(response_keys, response); pdu_send(response); pdu_delete(response); keys_delete(response_keys); } static struct pdu * login_receive_chap_r(struct connection *conn, struct auth_group *ag, struct chap *chap, const struct auth **authp) { struct pdu *request; struct keys *request_keys; const char *chap_n, *chap_r; const struct auth *auth; int error; request = login_receive(conn, false); request_keys = keys_new(); keys_load_pdu(request_keys, request); chap_n = keys_find(request_keys, "CHAP_N"); if (chap_n == NULL) { login_send_error(request, 0x02, 0x07); log_errx(1, "received CHAP Login PDU without CHAP_N"); } chap_r = keys_find(request_keys, "CHAP_R"); if (chap_r == NULL) { login_send_error(request, 0x02, 0x07); log_errx(1, "received CHAP Login PDU without CHAP_R"); } error = chap_receive(chap, chap_r); if (error != 0) { login_send_error(request, 0x02, 0x07); log_errx(1, "received CHAP Login PDU with malformed CHAP_R"); } /* * Verify the response. */ assert(ag->ag_type == AG_TYPE_CHAP || ag->ag_type == AG_TYPE_CHAP_MUTUAL); auth = auth_find(ag, chap_n); if (auth == NULL) { login_send_error(request, 0x02, 0x01); log_errx(1, "received CHAP Login with invalid user \"%s\"", chap_n); } assert(auth->a_secret != NULL); assert(strlen(auth->a_secret) > 0); error = chap_authenticate(chap, auth->a_secret); if (error != 0) { login_send_error(request, 0x02, 0x01); log_errx(1, "CHAP authentication failed for user \"%s\"", auth->a_user); } keys_delete(request_keys); *authp = auth; return (request); } static void login_send_chap_success(struct pdu *request, const struct auth *auth) { struct pdu *response; struct keys *request_keys, *response_keys; struct rchap *rchap; const char *chap_i, *chap_c; char *chap_r; int error; response = login_new_response(request); login_set_nsg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); /* * Actually, one more thing: mutual authentication. */ request_keys = keys_new(); keys_load_pdu(request_keys, request); chap_i = keys_find(request_keys, "CHAP_I"); chap_c = keys_find(request_keys, "CHAP_C"); if (chap_i != NULL || chap_c != NULL) { if (chap_i == NULL) { login_send_error(request, 0x02, 0x07); log_errx(1, "initiator requested target " "authentication, but didn't send CHAP_I"); } if (chap_c == NULL) { login_send_error(request, 0x02, 0x07); log_errx(1, "initiator requested target " "authentication, but didn't send CHAP_C"); } if (auth->a_auth_group->ag_type != AG_TYPE_CHAP_MUTUAL) { login_send_error(request, 0x02, 0x01); log_errx(1, "initiator requests target authentication " "for user \"%s\", but mutual user/secret " "is not set", auth->a_user); } log_debugx("performing mutual authentication as user \"%s\"", auth->a_mutual_user); rchap = rchap_new(auth->a_mutual_secret); error = rchap_receive(rchap, chap_i, chap_c); if (error != 0) { login_send_error(request, 0x02, 0x07); log_errx(1, "received CHAP Login PDU with malformed " "CHAP_I or CHAP_C"); } chap_r = rchap_get_response(rchap); rchap_delete(rchap); response_keys = keys_new(); keys_add(response_keys, "CHAP_N", auth->a_mutual_user); keys_add(response_keys, "CHAP_R", chap_r); free(chap_r); keys_save_pdu(response_keys, response); keys_delete(response_keys); } else { log_debugx("initiator did not request target authentication"); } keys_delete(request_keys); pdu_send(response); pdu_delete(response); } static void login_chap(struct ctld_connection *conn, struct auth_group *ag) { const struct auth *auth; struct chap *chap; struct pdu *request; /* * Receive CHAP_A PDU. */ log_debugx("beginning CHAP authentication; waiting for CHAP_A"); request = login_receive_chap_a(&conn->conn); /* * Generate the challenge. */ chap = chap_new(); /* * Send the challenge. */ log_debugx("sending CHAP_C, binary challenge size is %zd bytes", sizeof(chap->chap_challenge)); login_send_chap_c(request, chap); pdu_delete(request); /* * Receive CHAP_N/CHAP_R PDU and authenticate. */ log_debugx("waiting for CHAP_N/CHAP_R"); request = login_receive_chap_r(&conn->conn, ag, chap, &auth); /* * Yay, authentication succeeded! */ log_debugx("authentication succeeded for user \"%s\"; " "transitioning to operational parameter negotiation", auth->a_user); login_send_chap_success(request, auth); pdu_delete(request); /* * Leave username and CHAP information for discovery(). */ conn->conn_user = auth->a_user; conn->conn_chap = chap; } static void login_negotiate_key(struct pdu *request, const char *name, const char *value, bool skipped_security, struct keys *response_keys) { int which; size_t tmp; struct ctld_connection *conn; conn = (struct ctld_connection *)request->pdu_connection; if (strcmp(name, "InitiatorName") == 0) { if (!skipped_security) log_errx(1, "initiator resent InitiatorName"); } else if (strcmp(name, "SessionType") == 0) { if (!skipped_security) log_errx(1, "initiator resent SessionType"); } else if (strcmp(name, "TargetName") == 0) { if (!skipped_security) log_errx(1, "initiator resent TargetName"); } else if (strcmp(name, "InitiatorAlias") == 0) { if (conn->conn_initiator_alias != NULL) free(conn->conn_initiator_alias); conn->conn_initiator_alias = checked_strdup(value); } else if (strcmp(value, "Irrelevant") == 0) { /* Ignore. */ } else if (strcmp(name, "HeaderDigest") == 0) { /* * We don't handle digests for discovery sessions. */ if (conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY) { log_debugx("discovery session; digests disabled"); keys_add(response_keys, name, "None"); return; } which = login_list_prefers(value, "CRC32C", "None"); switch (which) { case 1: log_debugx("initiator prefers CRC32C " "for header digest; we'll use it"); conn->conn.conn_header_digest = CONN_DIGEST_CRC32C; keys_add(response_keys, name, "CRC32C"); break; case 2: log_debugx("initiator prefers not to do " "header digest; we'll comply"); keys_add(response_keys, name, "None"); break; default: log_warnx("initiator sent unrecognized " "HeaderDigest value \"%s\"; will use None", value); keys_add(response_keys, name, "None"); break; } } else if (strcmp(name, "DataDigest") == 0) { if (conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY) { log_debugx("discovery session; digests disabled"); keys_add(response_keys, name, "None"); return; } which = login_list_prefers(value, "CRC32C", "None"); switch (which) { case 1: log_debugx("initiator prefers CRC32C " "for data digest; we'll use it"); conn->conn.conn_data_digest = CONN_DIGEST_CRC32C; keys_add(response_keys, name, "CRC32C"); break; case 2: log_debugx("initiator prefers not to do " "data digest; we'll comply"); keys_add(response_keys, name, "None"); break; default: log_warnx("initiator sent unrecognized " "DataDigest value \"%s\"; will use None", value); keys_add(response_keys, name, "None"); break; } } else if (strcmp(name, "MaxConnections") == 0) { keys_add(response_keys, name, "1"); } else if (strcmp(name, "InitialR2T") == 0) { keys_add(response_keys, name, "Yes"); } else if (strcmp(name, "ImmediateData") == 0) { if (conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY) { log_debugx("discovery session; ImmediateData irrelevant"); keys_add(response_keys, name, "Irrelevant"); } else { if (strcmp(value, "Yes") == 0) { conn->conn.conn_immediate_data = true; keys_add(response_keys, name, "Yes"); } else { conn->conn.conn_immediate_data = false; keys_add(response_keys, name, "No"); } } } else if (strcmp(name, "MaxRecvDataSegmentLength") == 0) { tmp = strtoul(value, NULL, 10); if (tmp <= 0) { login_send_error(request, 0x02, 0x00); log_errx(1, "received invalid " "MaxRecvDataSegmentLength"); } /* * MaxRecvDataSegmentLength is a direction-specific parameter. * We'll limit our _send_ to what the initiator can handle but * our MaxRecvDataSegmentLength is not influenced by the * initiator in any way. */ if ((int)tmp > conn->conn_max_send_data_segment_limit) { log_debugx("capping MaxRecvDataSegmentLength " "from %zd to %d", tmp, conn->conn_max_send_data_segment_limit); tmp = conn->conn_max_send_data_segment_limit; } conn->conn.conn_max_send_data_segment_length = tmp; } else if (strcmp(name, "MaxBurstLength") == 0) { tmp = strtoul(value, NULL, 10); if (tmp <= 0) { login_send_error(request, 0x02, 0x00); log_errx(1, "received invalid MaxBurstLength"); } if ((int)tmp > conn->conn_max_burst_limit) { log_debugx("capping MaxBurstLength from %zd to %d", tmp, conn->conn_max_burst_limit); tmp = conn->conn_max_burst_limit; } conn->conn.conn_max_burst_length = tmp; keys_add_int(response_keys, name, tmp); } else if (strcmp(name, "FirstBurstLength") == 0) { tmp = strtoul(value, NULL, 10); if (tmp <= 0) { login_send_error(request, 0x02, 0x00); log_errx(1, "received invalid FirstBurstLength"); } if ((int)tmp > conn->conn_first_burst_limit) { log_debugx("capping FirstBurstLength from %zd to %d", tmp, conn->conn_first_burst_limit); tmp = conn->conn_first_burst_limit; } conn->conn.conn_first_burst_length = tmp; keys_add_int(response_keys, name, tmp); } else if (strcmp(name, "DefaultTime2Wait") == 0) { keys_add(response_keys, name, value); } else if (strcmp(name, "DefaultTime2Retain") == 0) { keys_add(response_keys, name, "0"); } else if (strcmp(name, "MaxOutstandingR2T") == 0) { keys_add(response_keys, name, "1"); } else if (strcmp(name, "DataPDUInOrder") == 0) { keys_add(response_keys, name, "Yes"); } else if (strcmp(name, "DataSequenceInOrder") == 0) { keys_add(response_keys, name, "Yes"); } else if (strcmp(name, "ErrorRecoveryLevel") == 0) { keys_add(response_keys, name, "0"); } else if (strcmp(name, "OFMarker") == 0) { keys_add(response_keys, name, "No"); } else if (strcmp(name, "IFMarker") == 0) { keys_add(response_keys, name, "No"); } else if (strcmp(name, "iSCSIProtocolLevel") == 0) { tmp = strtoul(value, NULL, 10); if (tmp > 2) tmp = 2; keys_add_int(response_keys, name, tmp); } else { log_debugx("unknown key \"%s\"; responding " "with NotUnderstood", name); keys_add(response_keys, name, "NotUnderstood"); } } static void login_redirect(struct pdu *request, const char *target_address) { struct pdu *response; struct iscsi_bhs_login_response *bhslr2; struct keys *response_keys; response = login_new_response(request); login_set_csg(response, login_csg(request)); bhslr2 = (struct iscsi_bhs_login_response *)response->pdu_bhs; bhslr2->bhslr_status_class = 0x01; bhslr2->bhslr_status_detail = 0x01; response_keys = keys_new(); keys_add(response_keys, "TargetAddress", target_address); keys_save_pdu(response_keys, response); pdu_send(response); pdu_delete(response); keys_delete(response_keys); } static bool login_portal_redirect(struct ctld_connection *conn, struct pdu *request) { const struct portal_group *pg; pg = conn->conn_portal->p_portal_group; if (pg->pg_redirection == NULL) return (false); log_debugx("portal-group \"%s\" configured to redirect to %s", pg->pg_name, pg->pg_redirection); login_redirect(request, pg->pg_redirection); return (true); } static bool login_target_redirect(struct ctld_connection *conn, struct pdu *request) { const char *target_address; assert(conn->conn_portal->p_portal_group->pg_redirection == NULL); if (conn->conn_target == NULL) return (false); target_address = conn->conn_target->t_redirection; if (target_address == NULL) return (false); log_debugx("target \"%s\" configured to redirect to %s", conn->conn_target->t_name, target_address); login_redirect(request, target_address); return (true); } static void login_negotiate(struct ctld_connection *conn, struct pdu *request) { struct pdu *response; struct iscsi_bhs_login_response *bhslr2; struct keys *request_keys, *response_keys; int i; bool redirected, skipped_security; if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) { /* * Query the kernel for various size limits. In case of * offload, it depends on hardware capabilities. */ assert(conn->conn_target != NULL); conn->conn_max_recv_data_segment_limit = (1 << 24) - 1; conn->conn_max_send_data_segment_limit = (1 << 24) - 1; conn->conn_max_burst_limit = (1 << 24) - 1; conn->conn_first_burst_limit = (1 << 24) - 1; kernel_limits(conn->conn_portal->p_portal_group->pg_offload, + conn->conn.conn_socket, &conn->conn_max_recv_data_segment_limit, &conn->conn_max_send_data_segment_limit, &conn->conn_max_burst_limit, &conn->conn_first_burst_limit); /* We expect legal, usable values at this point. */ assert(conn->conn_max_recv_data_segment_limit >= 512); assert(conn->conn_max_recv_data_segment_limit < (1 << 24)); assert(conn->conn_max_send_data_segment_limit >= 512); assert(conn->conn_max_send_data_segment_limit < (1 << 24)); assert(conn->conn_max_burst_limit >= 512); assert(conn->conn_max_burst_limit < (1 << 24)); assert(conn->conn_first_burst_limit >= 512); assert(conn->conn_first_burst_limit < (1 << 24)); assert(conn->conn_first_burst_limit <= conn->conn_max_burst_limit); /* * Limit default send length in case it won't be negotiated. * We can't do it for other limits, since they may affect both * sender and receiver operation, and we must obey defaults. */ if (conn->conn_max_send_data_segment_limit < conn->conn.conn_max_send_data_segment_length) { conn->conn.conn_max_send_data_segment_length = conn->conn_max_send_data_segment_limit; } } else { conn->conn_max_recv_data_segment_limit = MAX_DATA_SEGMENT_LENGTH; conn->conn_max_send_data_segment_limit = MAX_DATA_SEGMENT_LENGTH; } if (request == NULL) { log_debugx("beginning operational parameter negotiation; " "waiting for Login PDU"); request = login_receive(&conn->conn, false); skipped_security = false; } else skipped_security = true; /* * RFC 3720, 10.13.5. Status-Class and Status-Detail, says * the redirection SHOULD be accepted by the initiator before * authentication, but MUST be accepted afterwards; that's * why we're doing it here and not earlier. */ redirected = login_target_redirect(conn, request); if (redirected) { log_debugx("initiator redirected; exiting"); exit(0); } request_keys = keys_new(); keys_load_pdu(request_keys, request); response = login_new_response(request); bhslr2 = (struct iscsi_bhs_login_response *)response->pdu_bhs; bhslr2->bhslr_tsih = htons(0xbadd); login_set_csg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); login_set_nsg(response, BHSLR_STAGE_FULL_FEATURE_PHASE); response_keys = keys_new(); if (skipped_security && conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) { if (conn->conn_target->t_alias != NULL) keys_add(response_keys, "TargetAlias", conn->conn_target->t_alias); keys_add_int(response_keys, "TargetPortalGroupTag", conn->conn_portal->p_portal_group->pg_tag); } for (i = 0; i < KEYS_MAX; i++) { if (request_keys->keys_names[i] == NULL) break; login_negotiate_key(request, request_keys->keys_names[i], request_keys->keys_values[i], skipped_security, response_keys); } /* * We'd started with usable values at our end. But a bad initiator * could have presented a large FirstBurstLength and then a smaller * MaxBurstLength (in that order) and because we process the key/value * pairs in the order they are in the request we might have ended up * with illegal values here. */ if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL && conn->conn.conn_first_burst_length > conn->conn.conn_max_burst_length) { log_errx(1, "initiator sent FirstBurstLength > MaxBurstLength"); } conn->conn.conn_max_recv_data_segment_length = conn->conn_max_recv_data_segment_limit; keys_add_int(response_keys, "MaxRecvDataSegmentLength", conn->conn.conn_max_recv_data_segment_length); log_debugx("operational parameter negotiation done; " "transitioning to Full Feature Phase"); keys_save_pdu(response_keys, response); pdu_send(response); pdu_delete(response); keys_delete(response_keys); pdu_delete(request); keys_delete(request_keys); } static void login_wait_transition(struct ctld_connection *conn) { struct pdu *request, *response; struct iscsi_bhs_login_request *bhslr; log_debugx("waiting for state transition request"); request = login_receive(&conn->conn, false); bhslr = (struct iscsi_bhs_login_request *)request->pdu_bhs; if ((bhslr->bhslr_flags & BHSLR_FLAGS_TRANSIT) == 0) { login_send_error(request, 0x02, 0x00); log_errx(1, "got no \"T\" flag after answering AuthMethod"); } log_debugx("got state transition request"); response = login_new_response(request); pdu_delete(request); login_set_nsg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); pdu_send(response); pdu_delete(response); login_negotiate(conn, NULL); } void login(struct ctld_connection *conn) { struct pdu *request, *response; struct iscsi_bhs_login_request *bhslr; struct keys *request_keys, *response_keys; struct auth_group *ag; struct portal_group *pg; const char *initiator_name, *initiator_alias, *session_type, *target_name, *auth_method; bool redirected, fail, trans; /* * Handle the initial Login Request - figure out required authentication * method and either transition to the next phase, if no authentication * is required, or call appropriate authentication code. */ log_debugx("beginning Login Phase; waiting for Login PDU"); request = login_receive(&conn->conn, true); bhslr = (struct iscsi_bhs_login_request *)request->pdu_bhs; if (bhslr->bhslr_tsih != 0) { login_send_error(request, 0x02, 0x0a); log_errx(1, "received Login PDU with non-zero TSIH"); } pg = conn->conn_portal->p_portal_group; memcpy(conn->conn_initiator_isid, bhslr->bhslr_isid, sizeof(conn->conn_initiator_isid)); /* * XXX: Implement the C flag some day. */ request_keys = keys_new(); keys_load_pdu(request_keys, request); assert(conn->conn_initiator_name == NULL); initiator_name = keys_find(request_keys, "InitiatorName"); if (initiator_name == NULL) { login_send_error(request, 0x02, 0x07); log_errx(1, "received Login PDU without InitiatorName"); } if (valid_iscsi_name(initiator_name) == false) { login_send_error(request, 0x02, 0x00); log_errx(1, "received Login PDU with invalid InitiatorName"); } conn->conn_initiator_name = checked_strdup(initiator_name); log_set_peer_name(conn->conn_initiator_name); setproctitle("%s (%s)", conn->conn_initiator_addr, conn->conn_initiator_name); redirected = login_portal_redirect(conn, request); if (redirected) { log_debugx("initiator redirected; exiting"); exit(0); } initiator_alias = keys_find(request_keys, "InitiatorAlias"); if (initiator_alias != NULL) conn->conn_initiator_alias = checked_strdup(initiator_alias); assert(conn->conn_session_type == CONN_SESSION_TYPE_NONE); session_type = keys_find(request_keys, "SessionType"); if (session_type != NULL) { if (strcmp(session_type, "Normal") == 0) { conn->conn_session_type = CONN_SESSION_TYPE_NORMAL; } else if (strcmp(session_type, "Discovery") == 0) { conn->conn_session_type = CONN_SESSION_TYPE_DISCOVERY; } else { login_send_error(request, 0x02, 0x00); log_errx(1, "received Login PDU with invalid " "SessionType \"%s\"", session_type); } } else conn->conn_session_type = CONN_SESSION_TYPE_NORMAL; assert(conn->conn_target == NULL); if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) { target_name = keys_find(request_keys, "TargetName"); if (target_name == NULL) { login_send_error(request, 0x02, 0x07); log_errx(1, "received Login PDU without TargetName"); } conn->conn_port = port_find_in_pg(pg, target_name); if (conn->conn_port == NULL) { login_send_error(request, 0x02, 0x03); log_errx(1, "requested target \"%s\" not found", target_name); } conn->conn_target = conn->conn_port->p_target; } /* * At this point we know what kind of authentication we need. */ if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) { ag = conn->conn_port->p_auth_group; if (ag == NULL) ag = conn->conn_target->t_auth_group; if (ag->ag_name != NULL) { log_debugx("initiator requests to connect " "to target \"%s\"; auth-group \"%s\"", conn->conn_target->t_name, ag->ag_name); } else { log_debugx("initiator requests to connect " "to target \"%s\"", conn->conn_target->t_name); } } else { assert(conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY); ag = pg->pg_discovery_auth_group; if (ag->ag_name != NULL) { log_debugx("initiator requests " "discovery session; auth-group \"%s\"", ag->ag_name); } else { log_debugx("initiator requests discovery session"); } } if (ag->ag_type == AG_TYPE_DENY) { login_send_error(request, 0x02, 0x01); log_errx(1, "auth-type is \"deny\""); } if (ag->ag_type == AG_TYPE_UNKNOWN) { /* * This can happen with empty auth-group. */ login_send_error(request, 0x02, 0x01); log_errx(1, "auth-type not set, denying access"); } /* * Enforce initiator-name and initiator-portal. */ if (auth_name_check(ag, initiator_name) != 0) { login_send_error(request, 0x02, 0x02); log_errx(1, "initiator does not match allowed initiator names"); } if (auth_portal_check(ag, &conn->conn_initiator_sa) != 0) { login_send_error(request, 0x02, 0x02); log_errx(1, "initiator does not match allowed " "initiator portals"); } /* * Let's see if the initiator intends to do any kind of authentication * at all. */ if (login_csg(request) == BHSLR_STAGE_OPERATIONAL_NEGOTIATION) { if (ag->ag_type != AG_TYPE_NO_AUTHENTICATION) { login_send_error(request, 0x02, 0x01); log_errx(1, "initiator skipped the authentication, " "but authentication is required"); } keys_delete(request_keys); log_debugx("initiator skipped the authentication, " "and we don't need it; proceeding with negotiation"); login_negotiate(conn, request); return; } fail = false; response = login_new_response(request); response_keys = keys_new(); trans = (bhslr->bhslr_flags & BHSLR_FLAGS_TRANSIT) != 0; auth_method = keys_find(request_keys, "AuthMethod"); if (ag->ag_type == AG_TYPE_NO_AUTHENTICATION) { log_debugx("authentication not required"); if (auth_method == NULL || login_list_contains(auth_method, "None")) { keys_add(response_keys, "AuthMethod", "None"); } else { log_warnx("initiator requests " "AuthMethod \"%s\" instead of \"None\"", auth_method); keys_add(response_keys, "AuthMethod", "Reject"); } if (trans) login_set_nsg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); } else { log_debugx("CHAP authentication required"); if (auth_method == NULL || login_list_contains(auth_method, "CHAP")) { keys_add(response_keys, "AuthMethod", "CHAP"); } else { log_warnx("initiator requests unsupported " "AuthMethod \"%s\" instead of \"CHAP\"", auth_method); keys_add(response_keys, "AuthMethod", "Reject"); fail = true; } } if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) { if (conn->conn_target->t_alias != NULL) keys_add(response_keys, "TargetAlias", conn->conn_target->t_alias); keys_add_int(response_keys, "TargetPortalGroupTag", pg->pg_tag); } keys_save_pdu(response_keys, response); pdu_send(response); pdu_delete(response); keys_delete(response_keys); pdu_delete(request); keys_delete(request_keys); if (fail) { log_debugx("sent reject for AuthMethod; exiting"); exit(1); } if (ag->ag_type != AG_TYPE_NO_AUTHENTICATION) { login_chap(conn, ag); login_negotiate(conn, NULL); } else if (trans) { login_negotiate(conn, NULL); } else { login_wait_transition(conn); } } diff --git a/usr.sbin/iscsid/iscsid.c b/usr.sbin/iscsid/iscsid.c index 5555ffccbf7c..c81692948c1a 100644 --- a/usr.sbin/iscsid/iscsid.c +++ b/usr.sbin/iscsid/iscsid.c @@ -1,790 +1,809 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iscsid.h" static bool timed_out(void); #ifdef ICL_KERNEL_PROXY static void pdu_receive_proxy(struct pdu *pdu); static void pdu_send_proxy(struct pdu *pdu); #endif /* ICL_KERNEL_PROXY */ static volatile bool sigalrm_received = false; static int nchildren = 0; static struct connection_ops conn_ops = { .timed_out = timed_out, #ifdef ICL_KERNEL_PROXY .pdu_receive_proxy = pdu_receive_proxy, .pdu_send_proxy = pdu_send_proxy, #endif .fail = fail, }; static void usage(void) { fprintf(stderr, "usage: iscsid [-P pidfile][-d][-m maxproc][-t timeout]\n"); exit(1); } #ifdef ICL_KERNEL_PROXY static void pdu_receive_proxy(struct pdu *pdu) { struct iscsid_connection *conn; struct iscsi_daemon_receive idr; size_t len; int error; conn = (struct iscsid_connection *)pdu->pdu_connection; assert(conn->conn_conf.isc_iser != 0); pdu->pdu_data = malloc(conn->conn.conn_max_recv_data_segment_length); if (pdu->pdu_data == NULL) log_err(1, "malloc"); memset(&idr, 0, sizeof(idr)); idr.idr_session_id = conn->conn_session_id; idr.idr_bhs = pdu->pdu_bhs; idr.idr_data_segment_len = conn->conn.conn_max_recv_data_segment_length; idr.idr_data_segment = pdu->pdu_data; error = ioctl(conn->conn_iscsi_fd, ISCSIDRECEIVE, &idr); if (error != 0) log_err(1, "ISCSIDRECEIVE"); len = pdu_ahs_length(pdu); if (len > 0) log_errx(1, "protocol error: non-empty AHS"); len = pdu_data_segment_length(pdu); assert(len <= (size_t)conn->conn.conn_max_recv_data_segment_length); pdu->pdu_data_len = len; } static void pdu_send_proxy(struct pdu *pdu) { struct iscsid_connection *conn; struct iscsi_daemon_send ids; int error; conn = (struct iscsid_connection *)pdu->pdu_connection; assert(conn->conn_conf.isc_iser != 0); pdu_set_data_segment_length(pdu, pdu->pdu_data_len); memset(&ids, 0, sizeof(ids)); ids.ids_session_id = conn->conn_session_id; ids.ids_bhs = pdu->pdu_bhs; ids.ids_data_segment_len = pdu->pdu_data_len; ids.ids_data_segment = pdu->pdu_data; error = ioctl(conn->conn_iscsi_fd, ISCSIDSEND, &ids); if (error != 0) log_err(1, "ISCSIDSEND"); } #endif /* ICL_KERNEL_PROXY */ static void resolve_addr(const struct connection *conn, const char *address, struct addrinfo **ai, bool initiator_side) { struct addrinfo hints; char *arg, *addr, *ch, *tofree; const char *port; int error, colons = 0; tofree = arg = checked_strdup(address); if (arg[0] == '\0') { fail(conn, "empty address"); log_errx(1, "empty address"); } if (arg[0] == '[') { /* * IPv6 address in square brackets, perhaps with port. */ arg++; addr = strsep(&arg, "]"); if (arg == NULL) { fail(conn, "malformed address"); log_errx(1, "malformed address %s", address); } if (arg[0] == '\0') { port = NULL; } else if (arg[0] == ':') { port = arg + 1; } else { fail(conn, "malformed address"); log_errx(1, "malformed address %s", address); } } else { /* * Either IPv6 address without brackets - and without * a port - or IPv4 address. Just count the colons. */ for (ch = arg; *ch != '\0'; ch++) { if (*ch == ':') colons++; } if (colons > 1) { addr = arg; port = NULL; } else { addr = strsep(&arg, ":"); if (arg == NULL) port = NULL; else port = arg; } } if (port == NULL && !initiator_side) port = "3260"; memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_ADDRCONFIG | AI_NUMERICSERV; if (initiator_side) hints.ai_flags |= AI_PASSIVE; error = getaddrinfo(addr, port, &hints, ai); if (error != 0) { fail(conn, gai_strerror(error)); log_errx(1, "getaddrinfo for %s failed: %s", address, gai_strerror(error)); } free(tofree); } static struct iscsid_connection * connection_new(int iscsi_fd, const struct iscsi_daemon_request *request) { struct iscsid_connection *conn; - struct iscsi_session_limits *isl; struct addrinfo *from_ai, *to_ai; const char *from_addr, *to_addr; #ifdef ICL_KERNEL_PROXY struct iscsi_daemon_connect idc; #endif int error, optval; conn = calloc(1, sizeof(*conn)); if (conn == NULL) log_err(1, "calloc"); connection_init(&conn->conn, &conn_ops, request->idr_conf.isc_iser != 0); conn->conn_protocol_level = 0; conn->conn_initial_r2t = true; conn->conn_iscsi_fd = iscsi_fd; conn->conn_session_id = request->idr_session_id; memcpy(&conn->conn_conf, &request->idr_conf, sizeof(conn->conn_conf)); memcpy(&conn->conn.conn_isid, &request->idr_isid, sizeof(conn->conn.conn_isid)); conn->conn.conn_tsih = request->idr_tsih; - /* - * Read the driver limits and provide reasonable defaults for the ones - * the driver doesn't care about. If a max_snd_dsl is not explicitly - * provided by the driver then we'll make sure both conn->max_snd_dsl - * and isl->max_snd_dsl are set to the rcv_dsl. This preserves historic - * behavior. - */ - isl = &conn->conn_limits; - memcpy(isl, &request->idr_limits, sizeof(*isl)); - if (isl->isl_max_recv_data_segment_length == 0) - isl->isl_max_recv_data_segment_length = (1 << 24) - 1; - if (isl->isl_max_send_data_segment_length == 0) - isl->isl_max_send_data_segment_length = - isl->isl_max_recv_data_segment_length; - if (isl->isl_max_burst_length == 0) - isl->isl_max_burst_length = (1 << 24) - 1; - if (isl->isl_first_burst_length == 0) - isl->isl_first_burst_length = (1 << 24) - 1; - if (isl->isl_first_burst_length > isl->isl_max_burst_length) - isl->isl_first_burst_length = isl->isl_max_burst_length; - - /* - * Limit default send length in case it won't be negotiated. - * We can't do it for other limits, since they may affect both - * sender and receiver operation, and we must obey defaults. - */ - if (conn->conn.conn_max_send_data_segment_length > - isl->isl_max_send_data_segment_length) { - conn->conn.conn_max_send_data_segment_length = - isl->isl_max_send_data_segment_length; - } - from_addr = conn->conn_conf.isc_initiator_addr; to_addr = conn->conn_conf.isc_target_addr; if (from_addr[0] != '\0') resolve_addr(&conn->conn, from_addr, &from_ai, true); else from_ai = NULL; resolve_addr(&conn->conn, to_addr, &to_ai, false); #ifdef ICL_KERNEL_PROXY if (conn->conn_conf.isc_iser) { memset(&idc, 0, sizeof(idc)); idc.idc_session_id = conn->conn_session_id; if (conn->conn_conf.isc_iser) idc.idc_iser = 1; idc.idc_domain = to_ai->ai_family; idc.idc_socktype = to_ai->ai_socktype; idc.idc_protocol = to_ai->ai_protocol; if (from_ai != NULL) { idc.idc_from_addr = from_ai->ai_addr; idc.idc_from_addrlen = from_ai->ai_addrlen; } idc.idc_to_addr = to_ai->ai_addr; idc.idc_to_addrlen = to_ai->ai_addrlen; log_debugx("connecting to %s using ICL kernel proxy", to_addr); error = ioctl(iscsi_fd, ISCSIDCONNECT, &idc); if (error != 0) { fail(&conn->conn, strerror(errno)); log_err(1, "failed to connect to %s " "using ICL kernel proxy: ISCSIDCONNECT", to_addr); } if (from_ai != NULL) freeaddrinfo(from_ai); freeaddrinfo(to_ai); return (conn); } #endif /* ICL_KERNEL_PROXY */ if (conn->conn_conf.isc_iser) { fail(&conn->conn, "iSER not supported"); log_errx(1, "iscsid(8) compiled without ICL_KERNEL_PROXY " "does not support iSER"); } conn->conn.conn_socket = socket(to_ai->ai_family, to_ai->ai_socktype, to_ai->ai_protocol); if (conn->conn.conn_socket < 0) { fail(&conn->conn, strerror(errno)); log_err(1, "failed to create socket for %s", from_addr); } optval = SOCKBUF_SIZE; if (setsockopt(conn->conn.conn_socket, SOL_SOCKET, SO_RCVBUF, &optval, sizeof(optval)) == -1) log_warn("setsockopt(SO_RCVBUF) failed"); optval = SOCKBUF_SIZE; if (setsockopt(conn->conn.conn_socket, SOL_SOCKET, SO_SNDBUF, &optval, sizeof(optval)) == -1) log_warn("setsockopt(SO_SNDBUF) failed"); optval = 1; if (setsockopt(conn->conn.conn_socket, SOL_SOCKET, SO_NO_DDP, &optval, sizeof(optval)) == -1) log_warn("setsockopt(SO_NO_DDP) failed"); if (conn->conn_conf.isc_dscp != -1) { int tos = conn->conn_conf.isc_dscp << 2; if (to_ai->ai_family == AF_INET) { if (setsockopt(conn->conn.conn_socket, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)) == -1) log_warn("setsockopt(IP_TOS) " "failed for %s", from_addr); } else if (to_ai->ai_family == AF_INET6) { if (setsockopt(conn->conn.conn_socket, IPPROTO_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) == -1) log_warn("setsockopt(IPV6_TCLASS) " "failed for %s", from_addr); } } if (conn->conn_conf.isc_pcp != -1) { int pcp = conn->conn_conf.isc_pcp; if (to_ai->ai_family == AF_INET) { if (setsockopt(conn->conn.conn_socket, IPPROTO_IP, IP_VLAN_PCP, &pcp, sizeof(pcp)) == -1) log_warn("setsockopt(IP_VLAN_PCP) " "failed for %s", from_addr); } else if (to_ai->ai_family == AF_INET6) { if (setsockopt(conn->conn.conn_socket, IPPROTO_IPV6, IPV6_VLAN_PCP, &pcp, sizeof(pcp)) == -1) log_warn("setsockopt(IPV6_VLAN_PCP) " "failed for %s", from_addr); } } /* * Reduce TCP SYN_SENT timeout while * no connectivity exists, to allow * rapid reuse of the available slots. */ int keepinit = 0; if (conn->conn_conf.isc_login_timeout > 0) { keepinit = conn->conn_conf.isc_login_timeout; log_debugx("session specific LoginTimeout at %d sec", keepinit); } if (conn->conn_conf.isc_login_timeout == -1) { int value; size_t size = sizeof(value); if (sysctlbyname("kern.iscsi.login_timeout", &value, &size, NULL, 0) == 0) { keepinit = value; log_debugx("global login_timeout at %d sec", keepinit); } } if (keepinit > 0) { if (setsockopt(conn->conn.conn_socket, IPPROTO_TCP, TCP_KEEPINIT, &keepinit, sizeof(keepinit)) == -1) log_warnx("setsockopt(TCP_KEEPINIT) " "failed for %s", to_addr); } if (from_ai != NULL) { error = bind(conn->conn.conn_socket, from_ai->ai_addr, from_ai->ai_addrlen); if (error != 0) { fail(&conn->conn, strerror(errno)); log_err(1, "failed to bind to %s", from_addr); } } log_debugx("connecting to %s", to_addr); error = connect(conn->conn.conn_socket, to_ai->ai_addr, to_ai->ai_addrlen); if (error != 0) { fail(&conn->conn, strerror(errno)); log_err(1, "failed to connect to %s", to_addr); } if (from_ai != NULL) freeaddrinfo(from_ai); freeaddrinfo(to_ai); return (conn); } +static void +limits(struct iscsid_connection *conn) +{ + struct iscsi_daemon_limits idl; + struct iscsi_session_limits *isl; + int error; + + log_debugx("fetching limits from the kernel"); + + memset(&idl, 0, sizeof(idl)); + idl.idl_session_id = conn->conn_session_id; + idl.idl_socket = conn->conn.conn_socket; + + error = ioctl(conn->conn_iscsi_fd, ISCSIDLIMITS, &idl); + if (error != 0) + log_err(1, "ISCSIDLIMITS"); + + /* + * Read the driver limits and provide reasonable defaults for the ones + * the driver doesn't care about. If a max_snd_dsl is not explicitly + * provided by the driver then we'll make sure both conn->max_snd_dsl + * and isl->max_snd_dsl are set to the rcv_dsl. This preserves historic + * behavior. + */ + isl = &conn->conn_limits; + memcpy(isl, &idl.idl_limits, sizeof(*isl)); + if (isl->isl_max_recv_data_segment_length == 0) + isl->isl_max_recv_data_segment_length = (1 << 24) - 1; + if (isl->isl_max_send_data_segment_length == 0) + isl->isl_max_send_data_segment_length = + isl->isl_max_recv_data_segment_length; + if (isl->isl_max_burst_length == 0) + isl->isl_max_burst_length = (1 << 24) - 1; + if (isl->isl_first_burst_length == 0) + isl->isl_first_burst_length = (1 << 24) - 1; + if (isl->isl_first_burst_length > isl->isl_max_burst_length) + isl->isl_first_burst_length = isl->isl_max_burst_length; + + /* + * Limit default send length in case it won't be negotiated. + * We can't do it for other limits, since they may affect both + * sender and receiver operation, and we must obey defaults. + */ + if (conn->conn.conn_max_send_data_segment_length > + isl->isl_max_send_data_segment_length) { + conn->conn.conn_max_send_data_segment_length = + isl->isl_max_send_data_segment_length; + } +} + static void handoff(struct iscsid_connection *conn) { struct iscsi_daemon_handoff idh; int error; log_debugx("handing off connection to the kernel"); memset(&idh, 0, sizeof(idh)); idh.idh_session_id = conn->conn_session_id; idh.idh_socket = conn->conn.conn_socket; strlcpy(idh.idh_target_alias, conn->conn_target_alias, sizeof(idh.idh_target_alias)); idh.idh_tsih = conn->conn.conn_tsih; idh.idh_statsn = conn->conn.conn_statsn; idh.idh_protocol_level = conn->conn_protocol_level; idh.idh_header_digest = conn->conn.conn_header_digest; idh.idh_data_digest = conn->conn.conn_data_digest; idh.idh_initial_r2t = conn->conn_initial_r2t; idh.idh_immediate_data = conn->conn.conn_immediate_data; idh.idh_max_recv_data_segment_length = conn->conn.conn_max_recv_data_segment_length; idh.idh_max_send_data_segment_length = conn->conn.conn_max_send_data_segment_length; idh.idh_max_burst_length = conn->conn.conn_max_burst_length; idh.idh_first_burst_length = conn->conn.conn_first_burst_length; error = ioctl(conn->conn_iscsi_fd, ISCSIDHANDOFF, &idh); if (error != 0) log_err(1, "ISCSIDHANDOFF"); } void fail(const struct connection *base_conn, const char *reason) { const struct iscsid_connection *conn; struct iscsi_daemon_fail idf; int error, saved_errno; conn = (const struct iscsid_connection *)base_conn; saved_errno = errno; memset(&idf, 0, sizeof(idf)); idf.idf_session_id = conn->conn_session_id; strlcpy(idf.idf_reason, reason, sizeof(idf.idf_reason)); error = ioctl(conn->conn_iscsi_fd, ISCSIDFAIL, &idf); if (error != 0) log_err(1, "ISCSIDFAIL"); errno = saved_errno; } /* * XXX: I CANT INTO LATIN */ static void capsicate(struct iscsid_connection *conn) { cap_rights_t rights; const unsigned long cmds[] = { #ifdef ICL_KERNEL_PROXY ISCSIDCONNECT, ISCSIDSEND, ISCSIDRECEIVE, #endif + ISCSIDLIMITS, ISCSIDHANDOFF, ISCSIDFAIL, ISCSISADD, ISCSISREMOVE, ISCSISMODIFY }; cap_rights_init(&rights, CAP_IOCTL); if (caph_rights_limit(conn->conn_iscsi_fd, &rights) < 0) log_err(1, "cap_rights_limit"); if (caph_ioctls_limit(conn->conn_iscsi_fd, cmds, nitems(cmds)) < 0) log_err(1, "cap_ioctls_limit"); if (caph_enter() != 0) log_err(1, "cap_enter"); if (cap_sandboxed()) log_debugx("Capsicum capability mode enabled"); else log_warnx("Capsicum capability mode not supported"); } static bool timed_out(void) { return (sigalrm_received); } static void sigalrm_handler(int dummy __unused) { /* * It would be easiest to just log an error and exit. We can't * do this, though, because log_errx() is not signal safe, since * it calls syslog(3). Instead, set a flag checked by pdu_send() * and pdu_receive(), to call log_errx() there. Should they fail * to notice, we'll exit here one second later. */ if (sigalrm_received) { /* * Oh well. Just give up and quit. */ _exit(2); } sigalrm_received = true; } static void set_timeout(int timeout) { struct sigaction sa; struct itimerval itv; int error; if (timeout <= 0) { log_debugx("session timeout disabled"); return; } bzero(&sa, sizeof(sa)); sa.sa_handler = sigalrm_handler; sigfillset(&sa.sa_mask); error = sigaction(SIGALRM, &sa, NULL); if (error != 0) log_err(1, "sigaction"); /* * First SIGALRM will arive after conf_timeout seconds. * If we do nothing, another one will arrive a second later. */ bzero(&itv, sizeof(itv)); itv.it_interval.tv_sec = 1; itv.it_value.tv_sec = timeout; log_debugx("setting session timeout to %d seconds", timeout); error = setitimer(ITIMER_REAL, &itv, NULL); if (error != 0) log_err(1, "setitimer"); } static void sigchld_handler(int dummy __unused) { /* * The only purpose of this handler is to make SIGCHLD * interrupt the ISCSIDWAIT ioctl(2), so we can call * wait_for_children(). */ } static void register_sigchld(void) { struct sigaction sa; int error; bzero(&sa, sizeof(sa)); sa.sa_handler = sigchld_handler; sigfillset(&sa.sa_mask); error = sigaction(SIGCHLD, &sa, NULL); if (error != 0) log_err(1, "sigaction"); } static void handle_request(int iscsi_fd, const struct iscsi_daemon_request *request, int timeout) { struct iscsid_connection *conn; log_set_peer_addr(request->idr_conf.isc_target_addr); if (request->idr_conf.isc_target[0] != '\0') { log_set_peer_name(request->idr_conf.isc_target); setproctitle("%s (%s)", request->idr_conf.isc_target_addr, request->idr_conf.isc_target); } else { setproctitle("%s", request->idr_conf.isc_target_addr); } conn = connection_new(iscsi_fd, request); - set_timeout(timeout); capsicate(conn); + limits(conn); + set_timeout(timeout); login(conn); if (conn->conn_conf.isc_discovery != 0) discovery(conn); else handoff(conn); log_debugx("nothing more to do; exiting"); exit (0); } static int wait_for_children(bool block) { pid_t pid; int status; int num = 0; for (;;) { /* * If "block" is true, wait for at least one process. */ if (block && num == 0) pid = wait4(-1, &status, 0, NULL); else pid = wait4(-1, &status, WNOHANG, NULL); if (pid <= 0) break; if (WIFSIGNALED(status)) { log_warnx("child process %d terminated with signal %d", pid, WTERMSIG(status)); } else if (WEXITSTATUS(status) != 0) { log_warnx("child process %d terminated with exit status %d", pid, WEXITSTATUS(status)); } else { log_debugx("child process %d terminated gracefully", pid); } num++; } return (num); } int main(int argc, char **argv) { int ch, debug = 0, error, iscsi_fd, maxproc = 30, retval, saved_errno, timeout = 60; bool dont_daemonize = false; struct pidfh *pidfh; pid_t pid, otherpid; const char *pidfile_path = DEFAULT_PIDFILE; struct iscsi_daemon_request request; while ((ch = getopt(argc, argv, "P:dl:m:t:")) != -1) { switch (ch) { case 'P': pidfile_path = optarg; break; case 'd': dont_daemonize = true; debug++; break; case 'l': debug = atoi(optarg); break; case 'm': maxproc = atoi(optarg); break; case 't': timeout = atoi(optarg); break; case '?': default: usage(); } } argc -= optind; if (argc != 0) usage(); log_init(debug); pidfh = pidfile_open(pidfile_path, 0600, &otherpid); if (pidfh == NULL) { if (errno == EEXIST) log_errx(1, "daemon already running, pid: %jd.", (intmax_t)otherpid); log_err(1, "cannot open or create pidfile \"%s\"", pidfile_path); } iscsi_fd = open(ISCSI_PATH, O_RDWR); if (iscsi_fd < 0 && errno == ENOENT) { saved_errno = errno; retval = kldload("iscsi"); if (retval != -1) iscsi_fd = open(ISCSI_PATH, O_RDWR); else errno = saved_errno; } if (iscsi_fd < 0) log_err(1, "failed to open %s", ISCSI_PATH); if (dont_daemonize == false) { if (daemon(0, 0) == -1) { log_warn("cannot daemonize"); pidfile_remove(pidfh); exit(1); } } pidfile_write(pidfh); register_sigchld(); for (;;) { log_debugx("waiting for request from the kernel"); memset(&request, 0, sizeof(request)); error = ioctl(iscsi_fd, ISCSIDWAIT, &request); if (error != 0) { if (errno == EINTR) { nchildren -= wait_for_children(false); assert(nchildren >= 0); continue; } log_err(1, "ISCSIDWAIT"); } if (dont_daemonize) { log_debugx("not forking due to -d flag; " "will exit after servicing a single request"); } else { nchildren -= wait_for_children(false); assert(nchildren >= 0); while (maxproc > 0 && nchildren >= maxproc) { log_debugx("maxproc limit of %d child processes hit; " "waiting for child process to exit", maxproc); nchildren -= wait_for_children(true); assert(nchildren >= 0); } log_debugx("incoming connection; forking child process #%d", nchildren); nchildren++; pid = fork(); if (pid < 0) log_err(1, "fork"); if (pid > 0) continue; } pidfile_close(pidfh); handle_request(iscsi_fd, &request, timeout); } return (0); }