Index: head/sys/cam/ctl/ctl_backend_block.c =================================================================== --- head/sys/cam/ctl/ctl_backend_block.c (revision 361938) +++ head/sys/cam/ctl/ctl_backend_block.c (revision 361939) @@ -1,2792 +1,2812 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 Silicon Graphics International Corp. * Copyright (c) 2009-2011 Spectra Logic Corporation * Copyright (c) 2012 The FreeBSD Foundation * Copyright (c) 2014-2015 Alexander Motin * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ */ /* * CAM Target Layer driver backend for block devices. * * Author: Ken Merry */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The idea here is that we'll allocate enough S/G space to hold a 1MB * I/O. If we get an I/O larger than that, we'll split it. */ #define CTLBLK_HALF_IO_SIZE (512 * 1024) #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) #define CTLBLK_MAX_SEG MAXPHYS #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) #ifdef CTLBLK_DEBUG #define DPRINTF(fmt, args...) \ printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) do {} while(0) #endif #define PRIV(io) \ ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) #define ARGS(io) \ ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) SDT_PROVIDER_DEFINE(cbb); typedef enum { CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, CTL_BE_BLOCK_LUN_WAITING = 0x04, } ctl_be_block_lun_flags; typedef enum { CTL_BE_BLOCK_NONE, CTL_BE_BLOCK_DEV, CTL_BE_BLOCK_FILE } ctl_be_block_type; struct ctl_be_block_filedata { struct ucred *cred; }; union ctl_be_block_bedata { struct ctl_be_block_filedata file; }; struct ctl_be_block_io; struct ctl_be_block_lun; typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, const char *attrname); /* * Backend LUN structure. There is a 1:1 mapping between a block device * and a backend block LUN, and between a backend block LUN and a CTL LUN. */ struct ctl_be_block_lun { struct ctl_be_lun cbe_lun; /* Must be first element. */ struct ctl_lun_create_params params; char *dev_path; ctl_be_block_type dev_type; struct vnode *vn; union ctl_be_block_bedata backend; cbb_dispatch_t dispatch; cbb_dispatch_t lun_flush; cbb_dispatch_t unmap; cbb_dispatch_t get_lba_status; cbb_getattr_t getattr; uint64_t size_blocks; uint64_t size_bytes; struct ctl_be_block_softc *softc; struct devstat *disk_stats; ctl_be_block_lun_flags flags; SLIST_ENTRY(ctl_be_block_lun) links; struct taskqueue *io_taskqueue; struct task io_task; int num_threads; STAILQ_HEAD(, ctl_io_hdr) input_queue; STAILQ_HEAD(, ctl_io_hdr) config_read_queue; STAILQ_HEAD(, ctl_io_hdr) config_write_queue; STAILQ_HEAD(, ctl_io_hdr) datamove_queue; struct mtx_padalign io_lock; struct mtx_padalign queue_lock; }; /* * Overall softc structure for the block backend module. */ struct ctl_be_block_softc { struct sx modify_lock; struct mtx lock; int num_luns; SLIST_HEAD(, ctl_be_block_lun) lun_list; uma_zone_t beio_zone; uma_zone_t buf_zone; }; static struct ctl_be_block_softc backend_block_softc; /* * Per-I/O information. */ struct ctl_be_block_io { union ctl_io *io; struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; struct iovec xiovecs[CTLBLK_MAX_SEGS]; + int refcnt; int bio_cmd; int two_sglists; int num_segs; int num_bios_sent; int num_bios_done; int send_complete; int first_error; uint64_t first_error_offset; struct bintime ds_t0; devstat_tag_type ds_tag_type; devstat_trans_flags ds_trans_type; uint64_t io_len; uint64_t io_offset; int io_arg; struct ctl_be_block_softc *softc; struct ctl_be_block_lun *lun; void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ }; extern struct ctl_softc *control_softc; static int cbb_num_threads = 14; SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "CAM Target Layer Block Backend"); SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, &cbb_num_threads, 0, "Number of threads per backing file"); static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); static void ctl_free_beio(struct ctl_be_block_io *beio); static void ctl_complete_beio(struct ctl_be_block_io *beio); static int ctl_be_block_move_done(union ctl_io *io); static void ctl_be_block_biodone(struct bio *bio); static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname); static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname); static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_worker(void *context, int pending); static int ctl_be_block_submit(union ctl_io *io); static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static int ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static int ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun); static int ctl_be_block_config_write(union ctl_io *io); static int ctl_be_block_config_read(union ctl_io *io); static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb); static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname); static int ctl_be_block_init(void); static int ctl_be_block_shutdown(void); static struct ctl_backend_driver ctl_be_block_driver = { .name = "block", .flags = CTL_BE_FLAG_HAS_CONFIG, .init = ctl_be_block_init, .shutdown = ctl_be_block_shutdown, .data_submit = ctl_be_block_submit, .data_move_done = ctl_be_block_move_done, .config_read = ctl_be_block_config_read, .config_write = ctl_be_block_config_write, .ioctl = ctl_be_block_ioctl, .lun_info = ctl_be_block_lun_info, .lun_attr = ctl_be_block_lun_attr }; MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend"); CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); static struct ctl_be_block_io * ctl_alloc_beio(struct ctl_be_block_softc *softc) { struct ctl_be_block_io *beio; beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO); beio->softc = softc; + beio->refcnt = 1; return (beio); } static void -ctl_free_beio(struct ctl_be_block_io *beio) +ctl_real_free_beio(struct ctl_be_block_io *beio) { struct ctl_be_block_softc *softc = beio->softc; int i; for (i = 0; i < beio->num_segs; i++) { uma_zfree(softc->buf_zone, beio->sg_segs[i].addr); /* For compare we had two equal S/G lists. */ if (beio->two_sglists) { uma_zfree(softc->buf_zone, beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); } } uma_zfree(softc->beio_zone, beio); } static void +ctl_refcnt_beio(void *arg, int diff) +{ + struct ctl_be_block_io *beio = arg; + + if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0) + ctl_real_free_beio(beio); +} + +static void +ctl_free_beio(struct ctl_be_block_io *beio) +{ + + ctl_refcnt_beio(beio, -1); +} + +static void ctl_complete_beio(struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; if (beio->beio_cont != NULL) { beio->beio_cont(beio); } else { ctl_free_beio(beio); ctl_data_submit_done(io); } } static size_t cmp(uint8_t *a, uint8_t *b, size_t size) { size_t i; for (i = 0; i < size; i++) { if (a[i] != b[i]) break; } return (i); } static void ctl_be_block_compare(union ctl_io *io) { struct ctl_be_block_io *beio; uint64_t off, res; int i; uint8_t info[8]; beio = (struct ctl_be_block_io *)PRIV(io)->ptr; off = 0; for (i = 0; i < beio->num_segs; i++) { res = cmp(beio->sg_segs[i].addr, beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, beio->sg_segs[i].len); off += res; if (res < beio->sg_segs[i].len) break; } if (i < beio->num_segs) { scsi_u64to8b(off, info); ctl_set_sense(&io->scsiio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_MISCOMPARE, /*asc*/ 0x1D, /*ascq*/ 0x00, /*type*/ SSD_ELEM_INFO, /*size*/ sizeof(info), /*data*/ &info, /*type*/ SSD_ELEM_NONE); } else ctl_set_success(&io->scsiio); } static int ctl_be_block_move_done(union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_lun *be_lun; struct ctl_lba_len_flags *lbalen; #ifdef CTL_TIME_IO struct bintime cur_bt; #endif beio = (struct ctl_be_block_io *)PRIV(io)->ptr; be_lun = beio->lun; DPRINTF("entered\n"); #ifdef CTL_TIME_IO getbinuptime(&cur_bt); bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); bintime_add(&io->io_hdr.dma_bt, &cur_bt); #endif io->io_hdr.num_dmas++; io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; /* * We set status at this point for read commands, and write * commands with errors. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { ; } else if ((io->io_hdr.port_status != 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ io->io_hdr.port_status); } else if (io->scsiio.kern_data_resid != 0 && (io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { ctl_set_invalid_field_ciu(&io->scsiio); } else if ((io->io_hdr.port_status == 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { lbalen = ARGS(beio->io); if (lbalen->flags & CTL_LLF_READ) { ctl_set_success(&io->scsiio); } else if (lbalen->flags & CTL_LLF_COMPARE) { /* We have two data blocks ready for comparison. */ ctl_be_block_compare(io); } } /* * If this is a read, or a write with errors, it is done. */ if ((beio->bio_cmd == BIO_READ) || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { ctl_complete_beio(beio); return (0); } /* * At this point, we have a write and the DMA completed * successfully. We now have to queue it to the task queue to * execute the backend I/O. That is because we do blocking * memory allocations, and in the file backing case, blocking I/O. * This move done routine is generally called in the SIM's * interrupt context, and therefore we cannot block. */ mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); return (0); } static void ctl_be_block_biodone(struct bio *bio) { struct ctl_be_block_io *beio; struct ctl_be_block_lun *be_lun; union ctl_io *io; int error; beio = bio->bio_caller1; be_lun = beio->lun; io = beio->io; DPRINTF("entered\n"); error = bio->bio_error; mtx_lock(&be_lun->io_lock); if (error != 0 && (beio->first_error == 0 || bio->bio_offset < beio->first_error_offset)) { beio->first_error = error; beio->first_error_offset = bio->bio_offset; } beio->num_bios_done++; /* * XXX KDM will this cause WITNESS to complain? Holding a lock * during the free might cause it to complain. */ g_destroy_bio(bio); /* * If the send complete bit isn't set, or we aren't the last I/O to * complete, then we're done. */ if ((beio->send_complete == 0) || (beio->num_bios_done < beio->num_bios_sent)) { mtx_unlock(&be_lun->io_lock); return; } /* * At this point, we've verified that we are the last I/O to * complete, so it's safe to drop the lock. */ devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If there are any errors from the backing device, we fail the * entire I/O with a medium error. */ error = beio->first_error; if (error != 0) { if (error == EOPNOTSUPP) { ctl_set_invalid_opcode(&io->scsiio); } else if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else if (error == EROFS || error == EACCES) { ctl_set_hw_write_protected(&io->scsiio); } else if (beio->bio_cmd == BIO_FLUSH) { /* XXX KDM is there is a better error here? */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xbad2); } else { ctl_set_medium_error(&io->scsiio, beio->bio_cmd == BIO_READ); } ctl_complete_beio(beio); return; } /* * If this is a write, a flush, a delete or verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (beio->bio_cmd == BIO_FLUSH) || (beio->bio_cmd == BIO_DELETE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) { ctl_set_success(&io->scsiio); ctl_serseq_done(io); } #ifdef CTL_TIME_IO getbinuptime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct mount *mountpoint; int error, lock_flags; DPRINTF("entered\n"); binuptime(&beio->ds_t0); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_lock(be_lun->vn, lock_flags | LK_RETRY); error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, curthread); VOP_UNLOCK(be_lun->vn); vn_finished_write(mountpoint); mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (error == 0) ctl_set_success(&io->scsiio); else { /* XXX KDM is there is a better error here? */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xbad1); } ctl_complete_beio(beio); } SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t"); SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t"); static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct ctl_be_block_filedata *file_data; union ctl_io *io; struct uio xuio; struct iovec *xiovec; size_t s; int error, flags, i; DPRINTF("entered\n"); file_data = &be_lun->backend.file; io = beio->io; flags = 0; if (ARGS(io)->flags & CTL_LLF_DPO) flags |= IO_DIRECT; if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) flags |= IO_SYNC; bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { SDT_PROBE0(cbb, , read, file_start); xuio.uio_rw = UIO_READ; } else { SDT_PROBE0(cbb, , write, file_start); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; xuio.uio_resid = beio->io_len; xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = beio->xiovecs; xuio.uio_iovcnt = beio->num_segs; xuio.uio_td = curthread; for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { xiovec->iov_base = beio->sg_segs[i].addr; xiovec->iov_len = beio->sg_segs[i].len; } binuptime(&beio->ds_t0); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); if (beio->bio_cmd == BIO_READ) { vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); /* * UFS pays attention to IO_DIRECT for reads. If the * DIRECTIO option is configured into the kernel, it calls * ffs_rawread(). But that only works for single-segment * uios with user space addresses. In our case, with a * kernel uio, it still reads into the buffer cache, but it * will just try to release the buffer from the cache later * on in ffs_read(). * * ZFS does not pay attention to IO_DIRECT for reads. * * UFS does not pay attention to IO_SYNC for reads. * * ZFS pays attention to IO_SYNC (which translates into the * Solaris define FRSYNC for zfs_read()) for reads. It * attempts to sync the file before reading. */ error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); VOP_UNLOCK(be_lun->vn); SDT_PROBE0(cbb, , read, file_done); if (error == 0 && xuio.uio_resid > 0) { /* * If we red less then requested (EOF), then * we should clean the rest of the buffer. */ s = beio->io_len - xuio.uio_resid; for (i = 0; i < beio->num_segs; i++) { if (s >= beio->sg_segs[i].len) { s -= beio->sg_segs[i].len; continue; } bzero((uint8_t *)beio->sg_segs[i].addr + s, beio->sg_segs[i].len - s); s = 0; } } } else { struct mount *mountpoint; int lock_flags; (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_lock(be_lun->vn, lock_flags | LK_RETRY); /* * UFS pays attention to IO_DIRECT for writes. The write * is done asynchronously. (Normally the write would just * get put into cache. * * UFS pays attention to IO_SYNC for writes. It will * attempt to write the buffer out synchronously if that * flag is set. * * ZFS does not pay attention to IO_DIRECT for writes. * * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) * for writes. It will flush the transaction from the * cache before returning. */ error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); VOP_UNLOCK(be_lun->vn); vn_finished_write(mountpoint); SDT_PROBE0(cbb, , write, file_done); } mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If we got an error, set the sense data to "MEDIUM ERROR" and * return the I/O to the user. */ if (error != 0) { if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else if (error == EROFS || error == EACCES) { ctl_set_hw_write_protected(&io->scsiio); } else { ctl_set_medium_error(&io->scsiio, beio->bio_cmd == BIO_READ); } ctl_complete_beio(beio); return; } /* * If this is a write or a verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) { ctl_set_success(&io->scsiio); ctl_serseq_done(io); } #ifdef CTL_TIME_IO getbinuptime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct ctl_lba_len_flags *lbalen = ARGS(io); struct scsi_get_lba_status_data *data; off_t roff, off; int error, status; DPRINTF("entered\n"); off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 0, curthread->td_ucred, curthread); if (error == 0 && off > roff) status = 0; /* mapped up to off */ else { error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 0, curthread->td_ucred, curthread); if (error == 0 && off > roff) status = 1; /* deallocated up to off */ else { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; } } VOP_UNLOCK(be_lun->vn); data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; scsi_u64to8b(lbalen->lba, data->descr[0].addr); scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - lbalen->lba), data->descr[0].length); data->descr[0].status = status; ctl_complete_beio(beio); } static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) { struct vattr vattr; struct statfs statfs; uint64_t val; int error; val = UINT64_MAX; if (be_lun->vn == NULL) return (val); vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); if (strcmp(attrname, "blocksused") == 0) { error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); if (error == 0) val = vattr.va_bytes / be_lun->cbe_lun.blocksize; } if (strcmp(attrname, "blocksavail") == 0 && !VN_IS_DOOMED(be_lun->vn)) { error = VFS_STATFS(be_lun->vn->v_mount, &statfs); if (error == 0) val = statfs.f_bavail * statfs.f_bsize / be_lun->cbe_lun.blocksize; } VOP_UNLOCK(be_lun->vn); return (val); } static void ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io; struct cdevsw *csw; struct cdev *dev; struct uio xuio; struct iovec *xiovec; int error, flags, i, ref; DPRINTF("entered\n"); io = beio->io; flags = 0; if (ARGS(io)->flags & CTL_LLF_DPO) flags |= IO_DIRECT; if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) flags |= IO_SYNC; bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { SDT_PROBE0(cbb, , read, file_start); xuio.uio_rw = UIO_READ; } else { SDT_PROBE0(cbb, , write, file_start); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; xuio.uio_resid = beio->io_len; xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = beio->xiovecs; xuio.uio_iovcnt = beio->num_segs; xuio.uio_td = curthread; for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { xiovec->iov_base = beio->sg_segs[i].addr; xiovec->iov_len = beio->sg_segs[i].len; } binuptime(&beio->ds_t0); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw) { if (beio->bio_cmd == BIO_READ) error = csw->d_read(dev, &xuio, flags); else error = csw->d_write(dev, &xuio, flags); dev_relthread(dev, ref); } else error = ENXIO; if (beio->bio_cmd == BIO_READ) SDT_PROBE0(cbb, , read, file_done); else SDT_PROBE0(cbb, , write, file_done); mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If we got an error, set the sense data to "MEDIUM ERROR" and * return the I/O to the user. */ if (error != 0) { if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else if (error == EROFS || error == EACCES) { ctl_set_hw_write_protected(&io->scsiio); } else { ctl_set_medium_error(&io->scsiio, beio->bio_cmd == BIO_READ); } ctl_complete_beio(beio); return; } /* * If this is a write or a verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) { ctl_set_success(&io->scsiio); ctl_serseq_done(io); } #ifdef CTL_TIME_IO getbinuptime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct cdevsw *csw; struct cdev *dev; struct ctl_lba_len_flags *lbalen = ARGS(io); struct scsi_get_lba_status_data *data; off_t roff, off; int error, ref, status; DPRINTF("entered\n"); csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw == NULL) { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; goto done; } off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, curthread); if (error == 0 && off > roff) status = 0; /* mapped up to off */ else { error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, curthread); if (error == 0 && off > roff) status = 1; /* deallocated up to off */ else { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; } } dev_relthread(dev, ref); done: data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; scsi_u64to8b(lbalen->lba, data->descr[0].addr); scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - lbalen->lba), data->descr[0].length); data->descr[0].status = status; ctl_complete_beio(beio); } static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct bio *bio; struct cdevsw *csw; struct cdev *dev; int ref; DPRINTF("entered\n"); /* This can't fail, it's a blocking allocation. */ bio = g_alloc_bio(); bio->bio_cmd = BIO_FLUSH; bio->bio_offset = 0; bio->bio_data = 0; bio->bio_done = ctl_be_block_biodone; bio->bio_caller1 = beio; bio->bio_pblkno = 0; /* * We don't need to acquire the LUN lock here, because we are only * sending one bio, and so there is no other context to synchronize * with. */ beio->num_bios_sent = 1; beio->send_complete = 1; binuptime(&beio->ds_t0); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw) { bio->bio_dev = dev; csw->d_strategy(bio); dev_relthread(dev, ref); } else { bio->bio_error = ENXIO; ctl_be_block_biodone(bio); } } static void ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio, uint64_t off, uint64_t len, int last) { struct bio *bio; uint64_t maxlen; struct cdevsw *csw; struct cdev *dev; int ref; csw = devvn_refthread(be_lun->vn, &dev, &ref); maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); while (len > 0) { bio = g_alloc_bio(); bio->bio_cmd = BIO_DELETE; bio->bio_dev = dev; bio->bio_offset = off; bio->bio_length = MIN(len, maxlen); bio->bio_data = 0; bio->bio_done = ctl_be_block_biodone; bio->bio_caller1 = beio; bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; off += bio->bio_length; len -= bio->bio_length; mtx_lock(&be_lun->io_lock); beio->num_bios_sent++; if (last && len == 0) beio->send_complete = 1; mtx_unlock(&be_lun->io_lock); if (csw) { csw->d_strategy(bio); } else { bio->bio_error = ENXIO; ctl_be_block_biodone(bio); } } if (csw) dev_relthread(dev, ref); } static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io; struct ctl_ptr_len_flags *ptrlen; struct scsi_unmap_desc *buf, *end; uint64_t len; io = beio->io; DPRINTF("entered\n"); binuptime(&beio->ds_t0); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); if (beio->io_offset == -1) { beio->io_len = 0; ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; buf = (struct scsi_unmap_desc *)ptrlen->ptr; end = buf + ptrlen->len / sizeof(*buf); for (; buf < end; buf++) { len = (uint64_t)scsi_4btoul(buf->length) * be_lun->cbe_lun.blocksize; beio->io_len += len; ctl_be_block_unmap_dev_range(be_lun, beio, scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, len, (end - buf < 2) ? TRUE : FALSE); } } else ctl_be_block_unmap_dev_range(be_lun, beio, beio->io_offset, beio->io_len, TRUE); } static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); struct bio *bio; struct cdevsw *csw; struct cdev *dev; off_t cur_offset; int i, max_iosize, ref; DPRINTF("entered\n"); csw = devvn_refthread(be_lun->vn, &dev, &ref); /* * We have to limit our I/O size to the maximum supported by the * backend device. Hopefully it is MAXPHYS. If the driver doesn't * set it properly, use DFLTPHYS. */ if (csw) { max_iosize = dev->si_iosize_max; if (max_iosize < PAGE_SIZE) max_iosize = DFLTPHYS; } else max_iosize = DFLTPHYS; cur_offset = beio->io_offset; for (i = 0; i < beio->num_segs; i++) { size_t cur_size; uint8_t *cur_ptr; cur_size = beio->sg_segs[i].len; cur_ptr = beio->sg_segs[i].addr; while (cur_size > 0) { /* This can't fail, it's a blocking allocation. */ bio = g_alloc_bio(); KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); bio->bio_cmd = beio->bio_cmd; bio->bio_dev = dev; bio->bio_caller1 = beio; bio->bio_length = min(cur_size, max_iosize); bio->bio_offset = cur_offset; bio->bio_data = cur_ptr; bio->bio_done = ctl_be_block_biodone; bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; cur_offset += bio->bio_length; cur_ptr += bio->bio_length; cur_size -= bio->bio_length; TAILQ_INSERT_TAIL(&queue, bio, bio_queue); beio->num_bios_sent++; } } beio->send_complete = 1; binuptime(&beio->ds_t0); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); /* * Fire off all allocated requests! */ while ((bio = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, bio, bio_queue); if (csw) csw->d_strategy(bio); else { bio->bio_error = ENXIO; ctl_be_block_biodone(bio); } } if (csw) dev_relthread(dev, ref); } static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) { struct diocgattr_arg arg; struct cdevsw *csw; struct cdev *dev; int error, ref; csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw == NULL) return (UINT64_MAX); strlcpy(arg.name, attrname, sizeof(arg.name)); arg.len = sizeof(arg.value.off); if (csw->d_ioctl) { error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, curthread); } else error = ENODEV; dev_relthread(dev, ref); if (error != 0) return (UINT64_MAX); return (arg.value.off); } static void ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_lba_len_flags *lbalen; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; beio->io_len = lbalen->len * cbe_lun->blocksize; beio->io_offset = lbalen->lba * cbe_lun->blocksize; beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; beio->bio_cmd = BIO_FLUSH; beio->ds_trans_type = DEVSTAT_NO_DATA; DPRINTF("SYNC\n"); be_lun->lun_flush(be_lun, beio); } static void ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); if ((io->io_hdr.flags & CTL_FLAG_ABORT) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { ctl_config_write_done(io); return; } ctl_be_block_config_write(io); } static void ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_softc *softc = be_lun->softc; struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_lba_len_flags *lbalen; uint64_t len_left, lba; uint32_t pb, pbo, adj; int i, seglen; uint8_t *buf, *end; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; lbalen = ARGS(beio->io); if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { ctl_free_beio(beio); ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 0, /*bit*/ 0); ctl_config_write_done(io); return; } if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { beio->io_offset = lbalen->lba * cbe_lun->blocksize; beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; beio->bio_cmd = BIO_DELETE; beio->ds_trans_type = DEVSTAT_FREE; be_lun->unmap(be_lun, beio); return; } beio->bio_cmd = BIO_WRITE; beio->ds_trans_type = DEVSTAT_WRITE; DPRINTF("WRITE SAME at LBA %jx len %u\n", (uintmax_t)lbalen->lba, lbalen->len); pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; if (be_lun->cbe_lun.pblockoff > 0) pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; else pbo = 0; len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { /* * Setup the S/G entry for this chunk. */ seglen = MIN(CTLBLK_MAX_SEG, len_left); if (pb > cbe_lun->blocksize) { adj = ((lbalen->lba + lba) * cbe_lun->blocksize + seglen - pbo) % pb; if (seglen > adj) seglen -= adj; else seglen -= seglen % cbe_lun->blocksize; } else seglen -= seglen % cbe_lun->blocksize; beio->sg_segs[i].len = seglen; beio->sg_segs[i].addr = uma_zalloc(softc->buf_zone, M_WAITOK); DPRINTF("segment %d addr %p len %zd\n", i, beio->sg_segs[i].addr, beio->sg_segs[i].len); beio->num_segs++; len_left -= seglen; buf = beio->sg_segs[i].addr; end = buf + seglen; for (; buf < end; buf += cbe_lun->blocksize) { if (lbalen->flags & SWS_NDOB) { memset(buf, 0, cbe_lun->blocksize); } else { memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize); } if (lbalen->flags & SWS_LBDATA) scsi_ulto4b(lbalen->lba + lba, buf); lba++; } } beio->io_offset = lbalen->lba * cbe_lun->blocksize; beio->io_len = lba * cbe_lun->blocksize; /* We can not do all in one run. Correct and schedule rerun. */ if (len_left > 0) { lbalen->lba += lba; lbalen->len -= lba; beio->beio_cont = ctl_be_block_cw_done_ws; } be_lun->dispatch(be_lun, beio); } static void ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_ptr_len_flags *ptrlen; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { ctl_free_beio(beio); ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 0, /*command*/ 1, /*field*/ 0, /*bit_valid*/ 0, /*bit*/ 0); ctl_config_write_done(io); return; } beio->io_len = 0; beio->io_offset = -1; beio->bio_cmd = BIO_DELETE; beio->ds_trans_type = DEVSTAT_FREE; DPRINTF("UNMAP\n"); be_lun->unmap(be_lun, beio); } static void ctl_be_block_cr_done(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); ctl_config_read_done(io); } static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; DPRINTF("entered\n"); softc = be_lun->softc; beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; beio->beio_cont = ctl_be_block_cr_done; PRIV(io)->ptr = (void *)beio; switch (io->scsiio.cdb[0]) { case SERVICE_ACTION_IN: /* GET LBA STATUS */ beio->bio_cmd = -1; beio->ds_trans_type = DEVSTAT_NO_DATA; beio->ds_tag_type = DEVSTAT_TAG_ORDERED; beio->io_len = 0; if (be_lun->get_lba_status) be_lun->get_lba_status(be_lun, beio); else ctl_be_block_cr_done(beio); break; default: panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); break; } } static void ctl_be_block_cw_done(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); ctl_config_write_done(io); } static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; DPRINTF("entered\n"); softc = be_lun->softc; beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; beio->beio_cont = ctl_be_block_cw_done; switch (io->scsiio.tag_type) { case CTL_TAG_ORDERED: beio->ds_tag_type = DEVSTAT_TAG_ORDERED; break; case CTL_TAG_HEAD_OF_QUEUE: beio->ds_tag_type = DEVSTAT_TAG_HEAD; break; case CTL_TAG_UNTAGGED: case CTL_TAG_SIMPLE: case CTL_TAG_ACA: default: beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; break; } PRIV(io)->ptr = (void *)beio; switch (io->scsiio.cdb[0]) { case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: ctl_be_block_cw_dispatch_sync(be_lun, io); break; case WRITE_SAME_10: case WRITE_SAME_16: ctl_be_block_cw_dispatch_ws(be_lun, io); break; case UNMAP: ctl_be_block_cw_dispatch_unmap(be_lun, io); break; default: panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); break; } } SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t"); SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t"); static void ctl_be_block_next(struct ctl_be_block_io *beio) { struct ctl_be_block_lun *be_lun; union ctl_io *io; io = beio->io; be_lun = beio->lun; ctl_free_beio(beio); if ((io->io_hdr.flags & CTL_FLAG_ABORT) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { ctl_data_submit_done(io); return; } io->io_hdr.status &= ~CTL_STATUS_MASK; io->io_hdr.status |= CTL_STATUS_NONE; mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); } static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; struct ctl_lba_len_flags *lbalen; struct ctl_ptr_len_flags *bptrlen; uint64_t len_left, lbas; int i; softc = be_lun->softc; DPRINTF("entered\n"); lbalen = ARGS(io); if (lbalen->flags & CTL_LLF_WRITE) { SDT_PROBE0(cbb, , write, start); } else { SDT_PROBE0(cbb, , read, start); } beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; bptrlen = PRIV(io); bptrlen->ptr = (void *)beio; switch (io->scsiio.tag_type) { case CTL_TAG_ORDERED: beio->ds_tag_type = DEVSTAT_TAG_ORDERED; break; case CTL_TAG_HEAD_OF_QUEUE: beio->ds_tag_type = DEVSTAT_TAG_HEAD; break; case CTL_TAG_UNTAGGED: case CTL_TAG_SIMPLE: case CTL_TAG_ACA: default: beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; break; } if (lbalen->flags & CTL_LLF_WRITE) { beio->bio_cmd = BIO_WRITE; beio->ds_trans_type = DEVSTAT_WRITE; } else { beio->bio_cmd = BIO_READ; beio->ds_trans_type = DEVSTAT_READ; } DPRINTF("%s at LBA %jx len %u @%ju\n", (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); if (lbalen->flags & CTL_LLF_COMPARE) { beio->two_sglists = 1; lbas = CTLBLK_HALF_IO_SIZE; } else { lbas = CTLBLK_MAX_IO_SIZE; } lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; beio->io_len = lbas * cbe_lun->blocksize; bptrlen->len += lbas; for (i = 0, len_left = beio->io_len; len_left > 0; i++) { KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", i, CTLBLK_MAX_SEGS)); /* * Setup the S/G entry for this chunk. */ beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); beio->sg_segs[i].addr = uma_zalloc(softc->buf_zone, M_WAITOK); DPRINTF("segment %d addr %p len %zd\n", i, beio->sg_segs[i].addr, beio->sg_segs[i].len); /* Set up second segment for compare operation. */ if (beio->two_sglists) { beio->sg_segs[i + CTLBLK_HALF_SEGS].len = beio->sg_segs[i].len; beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = uma_zalloc(softc->buf_zone, M_WAITOK); } beio->num_segs++; len_left -= beio->sg_segs[i].len; } if (bptrlen->len < lbalen->len) beio->beio_cont = ctl_be_block_next; io->scsiio.be_move_done = ctl_be_block_move_done; /* For compare we have separate S/G lists for read and datamove. */ if (beio->two_sglists) io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; else io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; io->scsiio.kern_data_len = beio->io_len; io->scsiio.kern_sg_entries = beio->num_segs; + io->scsiio.kern_data_ref = ctl_refcnt_beio; + io->scsiio.kern_data_arg = beio; io->io_hdr.flags |= CTL_FLAG_ALLOCATED; /* * For the read case, we need to read the data into our buffers and * then we can send it back to the user. For the write case, we * need to get the data from the user first. */ if (beio->bio_cmd == BIO_READ) { SDT_PROBE0(cbb, , read, alloc_done); be_lun->dispatch(be_lun, beio); } else { SDT_PROBE0(cbb, , write, alloc_done); #ifdef CTL_TIME_IO getbinuptime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_worker(void *context, int pending) { struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; union ctl_io *io; struct ctl_be_block_io *beio; DPRINTF("entered\n"); /* * Fetch and process I/Os from all queues. If we detect LUN * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race, * so make response maximally opaque to not confuse initiator. */ for (;;) { mtx_lock(&be_lun->queue_lock); io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); if (io != NULL) { DPRINTF("datamove queue\n"); STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { ctl_set_busy(&io->scsiio); ctl_complete_beio(beio); return; } be_lun->dispatch(be_lun, beio); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); if (io != NULL) { DPRINTF("config write queue\n"); STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { ctl_set_busy(&io->scsiio); ctl_config_write_done(io); return; } ctl_be_block_cw_dispatch(be_lun, io); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); if (io != NULL) { DPRINTF("config read queue\n"); STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { ctl_set_busy(&io->scsiio); ctl_config_read_done(io); return; } ctl_be_block_cr_dispatch(be_lun, io); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); if (io != NULL) { DPRINTF("input queue\n"); STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) { ctl_set_busy(&io->scsiio); ctl_data_submit_done(io); return; } ctl_be_block_dispatch(be_lun, io); continue; } /* * If we get here, there is no work left in the queues, so * just break out and let the task queue go to sleep. */ mtx_unlock(&be_lun->queue_lock); break; } } /* * Entry point from CTL to the backend for I/O. We queue everything to a * work thread, so this just puts the I/O on a queue and wakes up the * thread. */ static int ctl_be_block_submit(union ctl_io *io) { struct ctl_be_block_lun *be_lun; DPRINTF("entered\n"); be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); /* * Make sure we only get SCSI I/O. */ KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " "%#x) encountered", io->io_hdr.io_type)); PRIV(io)->len = 0; mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); return (CTL_RETVAL_COMPLETE); } static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_be_block_softc *softc = &backend_block_softc; int error; error = 0; switch (cmd) { case CTL_LUN_REQ: { struct ctl_lun_req *lun_req; lun_req = (struct ctl_lun_req *)addr; switch (lun_req->reqtype) { case CTL_LUNREQ_CREATE: error = ctl_be_block_create(softc, lun_req); break; case CTL_LUNREQ_RM: error = ctl_be_block_rm(softc, lun_req); break; case CTL_LUNREQ_MODIFY: error = ctl_be_block_modify(softc, lun_req); break; default: lun_req->status = CTL_LUN_ERROR; snprintf(lun_req->error_str, sizeof(lun_req->error_str), "invalid LUN request type %d", lun_req->reqtype); break; } break; } default: error = ENOTTY; break; } return (error); } static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun; struct ctl_be_block_filedata *file_data; struct ctl_lun_create_params *params; const char *value; struct vattr vattr; off_t ps, pss, po, pos, us, uss, uo, uos; int error; cbe_lun = &be_lun->cbe_lun; file_data = &be_lun->backend.file; params = &be_lun->params; be_lun->dev_type = CTL_BE_BLOCK_FILE; be_lun->dispatch = ctl_be_block_dispatch_file; be_lun->lun_flush = ctl_be_block_flush_file; be_lun->get_lba_status = ctl_be_block_gls_file; be_lun->getattr = ctl_be_block_getattr_file; be_lun->unmap = NULL; cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); if (error != 0) { snprintf(req->error_str, sizeof(req->error_str), "error calling VOP_GETATTR() for file %s", be_lun->dev_path); return (error); } file_data->cred = crhold(curthread->td_ucred); if (params->lun_size_bytes != 0) be_lun->size_bytes = params->lun_size_bytes; else be_lun->size_bytes = vattr.va_size; /* * For files we can use any logical block size. Prefer 512 bytes * for compatibility reasons. If file's vattr.va_blocksize * (preferred I/O block size) is bigger and multiple to chosen * logical block size -- report it as physical block size. */ if (params->blocksize_bytes != 0) cbe_lun->blocksize = params->blocksize_bytes; else if (cbe_lun->lun_type == T_CDROM) cbe_lun->blocksize = 2048; else cbe_lun->blocksize = 512; be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 0 : (be_lun->size_blocks - 1); us = ps = vattr.va_blocksize; uo = po = 0; value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); if (value != NULL) ctl_expand_number(value, &ps); value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); if (value != NULL) ctl_expand_number(value, &po); pss = ps / cbe_lun->blocksize; pos = po / cbe_lun->blocksize; if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { cbe_lun->pblockexp = fls(pss) - 1; cbe_lun->pblockoff = (pss - pos) % pss; } value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); if (value != NULL) ctl_expand_number(value, &us); value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); if (value != NULL) ctl_expand_number(value, &uo); uss = us / cbe_lun->blocksize; uos = uo / cbe_lun->blocksize; if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { cbe_lun->ublockexp = fls(uss) - 1; cbe_lun->ublockoff = (uss - uos) % uss; } /* * Sanity check. The media size has to be at least one * sector long. */ if (be_lun->size_bytes < cbe_lun->blocksize) { error = EINVAL; snprintf(req->error_str, sizeof(req->error_str), "file %s size %ju < block size %u", be_lun->dev_path, (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); } cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; return (error); } static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_lun_create_params *params; struct cdevsw *csw; struct cdev *dev; const char *value; int error, atomic, maxio, ref, unmap, tmp; off_t ps, pss, po, pos, us, uss, uo, uos, otmp; params = &be_lun->params; be_lun->dev_type = CTL_BE_BLOCK_DEV; csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw == NULL) return (ENXIO); if (strcmp(csw->d_name, "zvol") == 0) { be_lun->dispatch = ctl_be_block_dispatch_zvol; be_lun->get_lba_status = ctl_be_block_gls_zvol; atomic = maxio = CTLBLK_MAX_IO_SIZE; } else { be_lun->dispatch = ctl_be_block_dispatch_dev; be_lun->get_lba_status = NULL; atomic = 0; maxio = dev->si_iosize_max; if (maxio <= 0) maxio = DFLTPHYS; if (maxio > CTLBLK_MAX_IO_SIZE) maxio = CTLBLK_MAX_IO_SIZE; } be_lun->lun_flush = ctl_be_block_flush_dev; be_lun->getattr = ctl_be_block_getattr_dev; be_lun->unmap = ctl_be_block_unmap_dev; if (!csw->d_ioctl) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "no d_ioctl for device %s!", be_lun->dev_path); return (ENODEV); } error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, curthread); if (error) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "error %d returned for DIOCGSECTORSIZE ioctl " "on %s!", error, be_lun->dev_path); return (error); } /* * If the user has asked for a blocksize that is greater than the * backing device's blocksize, we can do it only if the blocksize * the user is asking for is an even multiple of the underlying * device's blocksize. */ if ((params->blocksize_bytes != 0) && (params->blocksize_bytes >= tmp)) { if (params->blocksize_bytes % tmp == 0) { cbe_lun->blocksize = params->blocksize_bytes; } else { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "requested blocksize %u is not an even " "multiple of backing device blocksize %u", params->blocksize_bytes, tmp); return (EINVAL); } } else if (params->blocksize_bytes != 0) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "requested blocksize %u < backing device " "blocksize %u", params->blocksize_bytes, tmp); return (EINVAL); } else if (cbe_lun->lun_type == T_CDROM) cbe_lun->blocksize = MAX(tmp, 2048); else cbe_lun->blocksize = tmp; error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, curthread); if (error) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "error %d returned for DIOCGMEDIASIZE " " ioctl on %s!", error, be_lun->dev_path); return (error); } if (params->lun_size_bytes != 0) { if (params->lun_size_bytes > otmp) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "requested LUN size %ju > backing device " "size %ju", (uintmax_t)params->lun_size_bytes, (uintmax_t)otmp); return (EINVAL); } be_lun->size_bytes = params->lun_size_bytes; } else be_lun->size_bytes = otmp; be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 0 : (be_lun->size_blocks - 1); error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, curthread); if (error) ps = po = 0; else { error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, FREAD, curthread); if (error) po = 0; } us = ps; uo = po; value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL); if (value != NULL) ctl_expand_number(value, &ps); value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL); if (value != NULL) ctl_expand_number(value, &po); pss = ps / cbe_lun->blocksize; pos = po / cbe_lun->blocksize; if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { cbe_lun->pblockexp = fls(pss) - 1; cbe_lun->pblockoff = (pss - pos) % pss; } value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL); if (value != NULL) ctl_expand_number(value, &us); value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL); if (value != NULL) ctl_expand_number(value, &uo); uss = us / cbe_lun->blocksize; uos = uo / cbe_lun->blocksize; if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { cbe_lun->ublockexp = fls(uss) - 1; cbe_lun->ublockoff = (uss - uos) % uss; } cbe_lun->atomicblock = atomic / cbe_lun->blocksize; cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { unmap = 1; } else { struct diocgattr_arg arg; strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); arg.len = sizeof(arg.value.i); error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, curthread); unmap = (error == 0) ? arg.value.i : 0; } value = dnvlist_get_string(cbe_lun->options, "unmap", NULL); if (value != NULL) unmap = (strcmp(value, "on") == 0); if (unmap) cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; else cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; dev_relthread(dev, ref); return (0); } static int ctl_be_block_close(struct ctl_be_block_lun *be_lun) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; int flags; if (be_lun->vn) { flags = FREAD; if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) flags |= FWRITE; (void)vn_close(be_lun->vn, flags, NOCRED, curthread); be_lun->vn = NULL; switch (be_lun->dev_type) { case CTL_BE_BLOCK_DEV: break; case CTL_BE_BLOCK_FILE: if (be_lun->backend.file.cred != NULL) { crfree(be_lun->backend.file.cred); be_lun->backend.file.cred = NULL; } break; case CTL_BE_BLOCK_NONE: break; default: panic("Unexpected backend type %d", be_lun->dev_type); break; } be_lun->dev_type = CTL_BE_BLOCK_NONE; } return (0); } static int ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct nameidata nd; const char *value; int error, flags; error = 0; if (rootvnode == NULL) { snprintf(req->error_str, sizeof(req->error_str), "Root filesystem is not mounted"); return (1); } pwd_ensure_dirs(); value = dnvlist_get_string(cbe_lun->options, "file", NULL); if (value == NULL) { snprintf(req->error_str, sizeof(req->error_str), "no file argument specified"); return (1); } free(be_lun->dev_path, M_CTLBLK); be_lun->dev_path = strdup(value, M_CTLBLK); flags = FREAD; value = dnvlist_get_string(cbe_lun->options, "readonly", NULL); if (value != NULL) { if (strcmp(value, "on") != 0) flags |= FWRITE; } else if (cbe_lun->lun_type == T_DIRECT) flags |= FWRITE; again: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); error = vn_open(&nd, &flags, 0, NULL); if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { flags &= ~FWRITE; goto again; } if (error) { /* * This is the only reasonable guess we can make as far as * path if the user doesn't give us a fully qualified path. * If they want to specify a file, they need to specify the * full path. */ if (be_lun->dev_path[0] != '/') { char *dev_name; asprintf(&dev_name, M_CTLBLK, "/dev/%s", be_lun->dev_path); free(be_lun->dev_path, M_CTLBLK); be_lun->dev_path = dev_name; goto again; } snprintf(req->error_str, sizeof(req->error_str), "error opening %s: %d", be_lun->dev_path, error); return (error); } if (flags & FWRITE) cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; else cbe_lun->flags |= CTL_LUN_FLAG_READONLY; NDFREE(&nd, NDF_ONLY_PNBUF); be_lun->vn = nd.ni_vp; /* We only support disks and files. */ if (vn_isdisk(be_lun->vn, &error)) { error = ctl_be_block_open_dev(be_lun, req); } else if (be_lun->vn->v_type == VREG) { error = ctl_be_block_open_file(be_lun, req); } else { error = EINVAL; snprintf(req->error_str, sizeof(req->error_str), "%s is not a disk or plain file", be_lun->dev_path); } VOP_UNLOCK(be_lun->vn); if (error != 0) ctl_be_block_close(be_lun); cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; if (be_lun->dispatch != ctl_be_block_dispatch_dev) cbe_lun->serseq = CTL_LUN_SERSEQ_READ; value = dnvlist_get_string(cbe_lun->options, "serseq", NULL); if (value != NULL && strcmp(value, "on") == 0) cbe_lun->serseq = CTL_LUN_SERSEQ_ON; else if (value != NULL && strcmp(value, "read") == 0) cbe_lun->serseq = CTL_LUN_SERSEQ_READ; else if (value != NULL && strcmp(value, "off") == 0) cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; return (0); } static int ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun; struct ctl_be_block_lun *be_lun; struct ctl_lun_create_params *params; char num_thread_str[16]; char tmpstr[32]; const char *value; int retval, num_threads; int tmp_num_threads; params = &req->reqdata.create; retval = 0; req->status = CTL_LUN_OK; be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); cbe_lun = &be_lun->cbe_lun; be_lun->params = req->reqdata.create; be_lun->softc = softc; STAILQ_INIT(&be_lun->input_queue); STAILQ_INIT(&be_lun->config_read_queue); STAILQ_INIT(&be_lun->config_write_queue); STAILQ_INIT(&be_lun->datamove_queue); mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF); mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF); cbe_lun->options = nvlist_clone(req->args_nvl); if (params->flags & CTL_LUN_FLAG_DEV_TYPE) cbe_lun->lun_type = params->device_type; else cbe_lun->lun_type = T_DIRECT; be_lun->flags = 0; cbe_lun->flags = 0; value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); if (value != NULL) { if (strcmp(value, "primary") == 0) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; if (cbe_lun->lun_type == T_DIRECT || cbe_lun->lun_type == T_CDROM) { be_lun->size_bytes = params->lun_size_bytes; if (params->blocksize_bytes != 0) cbe_lun->blocksize = params->blocksize_bytes; else if (cbe_lun->lun_type == T_CDROM) cbe_lun->blocksize = 2048; else cbe_lun->blocksize = 512; be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 0 : (be_lun->size_blocks - 1); if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { retval = ctl_be_block_open(be_lun, req); if (retval != 0) { retval = 0; req->status = CTL_LUN_WARNING; } } num_threads = cbb_num_threads; } else { num_threads = 1; } value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL); if (value != NULL) { tmp_num_threads = strtol(value, NULL, 0); /* * We don't let the user specify less than one * thread, but hope he's clueful enough not to * specify 1000 threads. */ if (tmp_num_threads < 1) { snprintf(req->error_str, sizeof(req->error_str), "invalid number of threads %s", num_thread_str); goto bailout_error; } num_threads = tmp_num_threads; } if (be_lun->vn == NULL) cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; /* Tell the user the blocksize we ended up using */ params->lun_size_bytes = be_lun->size_bytes; params->blocksize_bytes = cbe_lun->blocksize; if (params->flags & CTL_LUN_FLAG_ID_REQ) { cbe_lun->req_lun_id = params->req_lun_id; cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; } else cbe_lun->req_lun_id = 0; cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; cbe_lun->be = &ctl_be_block_driver; if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d", softc->num_luns); strncpy((char *)cbe_lun->serial_num, tmpstr, MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); /* Tell the user what we used for a serial number */ strncpy((char *)params->serial_num, tmpstr, MIN(sizeof(params->serial_num), sizeof(tmpstr))); } else { strncpy((char *)cbe_lun->serial_num, params->serial_num, MIN(sizeof(cbe_lun->serial_num), sizeof(params->serial_num))); } if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns); strncpy((char *)cbe_lun->device_id, tmpstr, MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); /* Tell the user what we used for a device ID */ strncpy((char *)params->device_id, tmpstr, MIN(sizeof(params->device_id), sizeof(tmpstr))); } else { strncpy((char *)cbe_lun->device_id, params->device_id, MIN(sizeof(cbe_lun->device_id), sizeof(params->device_id))); } TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK, taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); if (be_lun->io_taskqueue == NULL) { snprintf(req->error_str, sizeof(req->error_str), "unable to create taskqueue"); goto bailout_error; } /* * Note that we start the same number of threads by default for * both the file case and the block device case. For the file * case, we need multiple threads to allow concurrency, because the * vnode interface is designed to be a blocking interface. For the * block device case, ZFS zvols at least will block the caller's * context in many instances, and so we need multiple threads to * overcome that problem. Other block devices don't need as many * threads, but they shouldn't cause too many problems. * * If the user wants to just have a single thread for a block * device, he can specify that when the LUN is created, or change * the tunable/sysctl to alter the default number of threads. */ retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue, /*num threads*/num_threads, /*priority*/PUSER, /*proc*/control_softc->ctl_proc, /*thread name*/"block"); if (retval != 0) goto bailout_error; be_lun->num_threads = num_threads; retval = ctl_add_lun(&be_lun->cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "ctl_add_lun() returned error %d, see dmesg for " "details", retval); retval = 0; goto bailout_error; } be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id, cbe_lun->blocksize, DEVSTAT_ALL_SUPPORTED, cbe_lun->lun_type | DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_OTHER); mtx_lock(&softc->lock); softc->num_luns++; SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links); mtx_unlock(&softc->lock); params->req_lun_id = cbe_lun->lun_id; return (retval); bailout_error: req->status = CTL_LUN_ERROR; if (be_lun->io_taskqueue != NULL) taskqueue_free(be_lun->io_taskqueue); ctl_be_block_close(be_lun); if (be_lun->dev_path != NULL) free(be_lun->dev_path, M_CTLBLK); nvlist_destroy(cbe_lun->options); mtx_destroy(&be_lun->queue_lock); mtx_destroy(&be_lun->io_lock); free(be_lun, M_CTLBLK); return (retval); } static int ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_lun_rm_params *params; struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; int retval; params = &req->reqdata.rm; sx_xlock(&softc->modify_lock); mtx_lock(&softc->lock); SLIST_FOREACH(be_lun, &softc->lun_list, links) { if (be_lun->cbe_lun.lun_id == params->lun_id) { SLIST_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); softc->num_luns--; break; } } mtx_unlock(&softc->lock); sx_xunlock(&softc->modify_lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "LUN %u is not managed by the block backend", params->lun_id); goto bailout_error; } cbe_lun = &be_lun->cbe_lun; if (be_lun->vn != NULL) { cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; ctl_lun_no_media(cbe_lun); taskqueue_drain_all(be_lun->io_taskqueue); ctl_be_block_close(be_lun); } mtx_lock(&softc->lock); be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; mtx_unlock(&softc->lock); retval = ctl_remove_lun(cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned from ctl_remove_lun() for " "LUN %d", retval, params->lun_id); mtx_lock(&softc->lock); be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; mtx_unlock(&softc->lock); goto bailout_error; } mtx_lock(&softc->lock); while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0); if (retval == EINTR) break; } be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { mtx_unlock(&softc->lock); free(be_lun, M_CTLBLK); } else { mtx_unlock(&softc->lock); return (EINTR); } req->status = CTL_LUN_OK; return (0); bailout_error: req->status = CTL_LUN_ERROR; return (0); } static int ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_lun_modify_params *params; struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; const char *value; uint64_t oldsize; int error, wasprim; params = &req->reqdata.modify; sx_xlock(&softc->modify_lock); mtx_lock(&softc->lock); SLIST_FOREACH(be_lun, &softc->lun_list, links) { if (be_lun->cbe_lun.lun_id == params->lun_id) break; } mtx_unlock(&softc->lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "LUN %u is not managed by the block backend", params->lun_id); goto bailout_error; } cbe_lun = &be_lun->cbe_lun; if (params->lun_size_bytes != 0) be_lun->params.lun_size_bytes = params->lun_size_bytes; if (req->args_nvl != NULL) { nvlist_destroy(cbe_lun->options); cbe_lun->options = nvlist_clone(req->args_nvl); } wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL); if (value != NULL) { if (strcmp(value, "primary") == 0) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; else cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; else cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ctl_lun_primary(cbe_lun); else ctl_lun_secondary(cbe_lun); } oldsize = be_lun->size_blocks; if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { if (be_lun->vn == NULL) error = ctl_be_block_open(be_lun, req); else if (vn_isdisk(be_lun->vn, &error)) error = ctl_be_block_open_dev(be_lun, req); else if (be_lun->vn->v_type == VREG) { vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); error = ctl_be_block_open_file(be_lun, req); VOP_UNLOCK(be_lun->vn); } else error = EINVAL; if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) && be_lun->vn != NULL) { cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; ctl_lun_has_media(cbe_lun); } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 && be_lun->vn == NULL) { cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; ctl_lun_no_media(cbe_lun); } cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; } else { if (be_lun->vn != NULL) { cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; ctl_lun_no_media(cbe_lun); taskqueue_drain_all(be_lun->io_taskqueue); error = ctl_be_block_close(be_lun); } else error = 0; } if (be_lun->size_blocks != oldsize) ctl_lun_capacity_changed(cbe_lun); /* Tell the user the exact size we ended up using */ params->lun_size_bytes = be_lun->size_bytes; sx_xunlock(&softc->modify_lock); req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; return (0); bailout_error: sx_xunlock(&softc->modify_lock); req->status = CTL_LUN_ERROR; return (0); } static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun) { struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun; struct ctl_be_block_softc *softc = be_lun->softc; taskqueue_drain_all(be_lun->io_taskqueue); taskqueue_free(be_lun->io_taskqueue); if (be_lun->disk_stats != NULL) devstat_remove_entry(be_lun->disk_stats); nvlist_destroy(be_lun->cbe_lun.options); free(be_lun->dev_path, M_CTLBLK); mtx_destroy(&be_lun->queue_lock); mtx_destroy(&be_lun->io_lock); mtx_lock(&softc->lock); be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING) wakeup(be_lun); else free(be_lun, M_CTLBLK); mtx_unlock(&softc->lock); } static int ctl_be_block_config_write(union ctl_io *io) { struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; int retval; DPRINTF("entered\n"); cbe_lun = CTL_BACKEND_LUN(io); be_lun = (struct ctl_be_block_lun *)cbe_lun; retval = 0; switch (io->scsiio.cdb[0]) { case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: case WRITE_SAME_10: case WRITE_SAME_16: case UNMAP: /* * The upper level CTL code will filter out any CDBs with * the immediate bit set and return the proper error. * * We don't really need to worry about what LBA range the * user asked to be synced out. When they issue a sync * cache command, we'll sync out the whole thing. */ mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); break; case START_STOP_UNIT: { struct scsi_start_stop_unit *cdb; struct ctl_lun_req req; cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; if ((cdb->how & SSS_PC_MASK) != 0) { ctl_set_success(&io->scsiio); ctl_config_write_done(io); break; } if (cdb->how & SSS_START) { if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) { retval = ctl_be_block_open(be_lun, &req); cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED; if (retval == 0) { cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA; ctl_lun_has_media(cbe_lun); } else { cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; ctl_lun_no_media(cbe_lun); } } ctl_start_lun(cbe_lun); } else { ctl_stop_lun(cbe_lun); if (cdb->how & SSS_LOEJ) { cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA; cbe_lun->flags |= CTL_LUN_FLAG_EJECTED; ctl_lun_ejected(cbe_lun); if (be_lun->vn != NULL) ctl_be_block_close(be_lun); } } ctl_set_success(&io->scsiio); ctl_config_write_done(io); break; } case PREVENT_ALLOW: ctl_set_success(&io->scsiio); ctl_config_write_done(io); break; default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_write_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } static int ctl_be_block_config_read(union ctl_io *io) { struct ctl_be_block_lun *be_lun; int retval = 0; DPRINTF("entered\n"); be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io); switch (io->scsiio.cdb[0]) { case SERVICE_ACTION_IN: if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->config_read_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); retval = CTL_RETVAL_QUEUED; break; } ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb) { struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; int retval; retval = sbuf_printf(sb, "\t"); if (retval != 0) goto bailout; retval = sbuf_printf(sb, "%d", lun->num_threads); if (retval != 0) goto bailout; retval = sbuf_printf(sb, "\n"); bailout: return (retval); } static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname) { struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun; if (lun->getattr == NULL) return (UINT64_MAX); return (lun->getattr(lun, attrname)); } static int ctl_be_block_init(void) { struct ctl_be_block_softc *softc = &backend_block_softc; sx_init(&softc->modify_lock, "ctlblock modify"); mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); softc->buf_zone = uma_zcreate("ctlblock", CTLBLK_MAX_SEG, NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); SLIST_INIT(&softc->lun_list); return (0); } static int ctl_be_block_shutdown(void) { struct ctl_be_block_softc *softc = &backend_block_softc; struct ctl_be_block_lun *lun; mtx_lock(&softc->lock); while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) { SLIST_REMOVE_HEAD(&softc->lun_list, links); softc->num_luns--; /* * Drop our lock here. Since ctl_remove_lun() can call * back into us, this could potentially lead to a recursive * lock of the same mutex, which would cause a hang. */ mtx_unlock(&softc->lock); ctl_remove_lun(&lun->cbe_lun); mtx_lock(&softc->lock); } mtx_unlock(&softc->lock); uma_zdestroy(softc->buf_zone); uma_zdestroy(softc->beio_zone); mtx_destroy(&softc->lock); sx_destroy(&softc->modify_lock); return (0); } Index: head/sys/cam/ctl/ctl_frontend_iscsi.c =================================================================== --- head/sys/cam/ctl/ctl_frontend_iscsi.c (revision 361938) +++ head/sys/cam/ctl/ctl_frontend_iscsi.c (revision 361939) @@ -1,3004 +1,3041 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * CTL frontend for the iSCSI protocol. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ICL_KERNEL_PROXY #include #endif #ifdef ICL_KERNEL_PROXY FEATURE(cfiscsi_kernel_proxy, "iSCSI target built with ICL_KERNEL_PROXY"); #endif static MALLOC_DEFINE(M_CFISCSI, "cfiscsi", "Memory used for CTL iSCSI frontend"); static uma_zone_t cfiscsi_data_wait_zone; SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, iscsi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "CAM Target Layer iSCSI Frontend"); static int debug = 1; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN, &debug, 1, "Enable debug messages"); static int ping_timeout = 5; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout, 5, "Interval between ping (NOP-Out) requests, in seconds"); static int login_timeout = 60; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout, 60, "Time to wait for ctld(8) to finish Login Phase, in seconds"); static int maxtags = 256; SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags, 0, "Max number of requests queued by initiator"); #define CFISCSI_DEBUG(X, ...) \ do { \ if (debug > 1) { \ printf("%s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_WARN(X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_DEBUG(S, X, ...) \ do { \ if (debug > 1) { \ printf("%s: %s (%s): " X "\n", \ __func__, S->cs_initiator_addr, \ S->cs_initiator_name, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_WARN(S, X, ...) \ do { \ if (debug > 0) { \ printf("WARNING: %s (%s): " X "\n", \ S->cs_initiator_addr, \ S->cs_initiator_name, ## __VA_ARGS__); \ } \ } while (0) #define CFISCSI_SESSION_LOCK(X) mtx_lock(&X->cs_lock) #define CFISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->cs_lock) #define CFISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->cs_lock, MA_OWNED) #define CONN_SESSION(X) ((struct cfiscsi_session *)(X)->ic_prv0) #define PDU_SESSION(X) CONN_SESSION((X)->ip_conn) struct cfiscsi_priv { void *request; uint32_t expdatasn; uint32_t r2tsn; }; #define PRIV(io) \ ((struct cfiscsi_priv *)&(io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND]) #define PRIV_REQUEST(io) PRIV(io)->request #define PRIV_EXPDATASN(io) PRIV(io)->expdatasn #define PRIV_R2TSN(io) PRIV(io)->r2tsn static int cfiscsi_init(void); static int cfiscsi_shutdown(void); static void cfiscsi_online(void *arg); static void cfiscsi_offline(void *arg); static int cfiscsi_info(void *arg, struct sbuf *sb); static int cfiscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static void cfiscsi_datamove(union ctl_io *io); static void cfiscsi_datamove_in(union ctl_io *io); static void cfiscsi_datamove_out(union ctl_io *io); static void cfiscsi_done(union ctl_io *io); static bool cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request); static void cfiscsi_pdu_handle_nop_out(struct icl_pdu *request); static void cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request); static void cfiscsi_pdu_handle_task_request(struct icl_pdu *request); static void cfiscsi_pdu_handle_data_out(struct icl_pdu *request); static void cfiscsi_pdu_handle_logout_request(struct icl_pdu *request); static void cfiscsi_session_terminate(struct cfiscsi_session *cs); static struct cfiscsi_data_wait *cfiscsi_data_wait_new( struct cfiscsi_session *cs, union ctl_io *io, uint32_t initiator_task_tag, uint32_t *target_transfer_tagp); static void cfiscsi_data_wait_free(struct cfiscsi_session *cs, struct cfiscsi_data_wait *cdw); static struct cfiscsi_target *cfiscsi_target_find(struct cfiscsi_softc *softc, const char *name, uint16_t tag); static struct cfiscsi_target *cfiscsi_target_find_or_create( struct cfiscsi_softc *softc, const char *name, const char *alias, uint16_t tag); static void cfiscsi_target_release(struct cfiscsi_target *ct); static void cfiscsi_session_delete(struct cfiscsi_session *cs); static struct cfiscsi_softc cfiscsi_softc; static struct ctl_frontend cfiscsi_frontend = { .name = "iscsi", .init = cfiscsi_init, .ioctl = cfiscsi_ioctl, .shutdown = cfiscsi_shutdown, }; CTL_FRONTEND_DECLARE(cfiscsi, cfiscsi_frontend); MODULE_DEPEND(cfiscsi, icl, 1, 1, 1); static struct icl_pdu * cfiscsi_pdu_new_response(struct icl_pdu *request, int flags) { return (icl_pdu_new(request->ip_conn, flags)); } static bool cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request) { const struct iscsi_bhs_scsi_command *bhssc; struct cfiscsi_session *cs; uint32_t cmdsn, curcmdsn; cs = PDU_SESSION(request); /* * Every incoming PDU - not just NOP-Out - resets the ping timer. * The purpose of the timeout is to reset the connection when it stalls; * we don't want this to happen when NOP-In or NOP-Out ends up delayed * in some queue. */ cs->cs_timeout = 0; /* * Immediate commands carry cmdsn, but it is neither incremented nor * verified. */ if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) return (false); /* * Data-Out PDUs don't contain CmdSN. */ if (request->ip_bhs->bhs_opcode == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) return (false); /* * We're only using fields common for all the request * (initiator -> target) PDUs. */ bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; curcmdsn = cmdsn = ntohl(bhssc->bhssc_cmdsn); /* * Increment session cmdsn and exit if we received the expected value. */ do { if (atomic_fcmpset_32(&cs->cs_cmdsn, &curcmdsn, cmdsn + 1)) return (false); } while (curcmdsn == cmdsn); /* * The target MUST silently ignore any non-immediate command outside * of this range. */ if (ISCSI_SNLT(cmdsn, curcmdsn) || ISCSI_SNGT(cmdsn, curcmdsn - 1 + maxtags)) { CFISCSI_SESSION_WARN(cs, "received PDU with CmdSN %u, " "while expected %u", cmdsn, curcmdsn); return (true); } /* * We don't support multiple connections now, so any discontinuity in * CmdSN means lost PDUs. Since we don't support PDU retransmission -- * terminate the connection. */ CFISCSI_SESSION_WARN(cs, "received PDU with CmdSN %u, " "while expected %u; dropping connection", cmdsn, curcmdsn); cfiscsi_session_terminate(cs); return (true); } static void cfiscsi_pdu_handle(struct icl_pdu *request) { struct cfiscsi_session *cs; bool ignore; cs = PDU_SESSION(request); ignore = cfiscsi_pdu_update_cmdsn(request); if (ignore) { icl_pdu_free(request); return; } /* * Handle the PDU; this includes e.g. receiving the remaining * part of PDU and submitting the SCSI command to CTL * or queueing a reply. The handling routine is responsible * for freeing the PDU when it's no longer needed. */ switch (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { case ISCSI_BHS_OPCODE_NOP_OUT: cfiscsi_pdu_handle_nop_out(request); break; case ISCSI_BHS_OPCODE_SCSI_COMMAND: cfiscsi_pdu_handle_scsi_command(request); break; case ISCSI_BHS_OPCODE_TASK_REQUEST: cfiscsi_pdu_handle_task_request(request); break; case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: cfiscsi_pdu_handle_data_out(request); break; case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: cfiscsi_pdu_handle_logout_request(request); break; default: CFISCSI_SESSION_WARN(cs, "received PDU with unsupported " "opcode 0x%x; dropping connection", request->ip_bhs->bhs_opcode); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static void cfiscsi_receive_callback(struct icl_pdu *request) { #ifdef ICL_KERNEL_PROXY struct cfiscsi_session *cs; cs = PDU_SESSION(request); if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { if (cs->cs_login_pdu == NULL) cs->cs_login_pdu = request; else icl_pdu_free(request); cv_signal(&cs->cs_login_cv); return; } #endif cfiscsi_pdu_handle(request); } static void cfiscsi_error_callback(struct icl_conn *ic) { struct cfiscsi_session *cs; cs = CONN_SESSION(ic); CFISCSI_SESSION_WARN(cs, "connection error; dropping connection"); cfiscsi_session_terminate(cs); } static int cfiscsi_pdu_prepare(struct icl_pdu *response) { struct cfiscsi_session *cs; struct iscsi_bhs_scsi_response *bhssr; bool advance_statsn = true; uint32_t cmdsn; cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK_ASSERT(cs); /* * We're only using fields common for all the response * (target -> initiator) PDUs. */ bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; /* * 10.8.3: "The StatSN for this connection is not advanced * after this PDU is sent." */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_R2T) advance_statsn = false; /* * 10.19.2: "However, when the Initiator Task Tag is set to 0xffffffff, * StatSN for the connection is not advanced after this PDU is sent." */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_NOP_IN && bhssr->bhssr_initiator_task_tag == 0xffffffff) advance_statsn = false; /* * See the comment below - StatSN is not meaningful and must * not be advanced. */ if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_SCSI_DATA_IN && (bhssr->bhssr_flags & BHSDI_FLAGS_S) == 0) advance_statsn = false; /* * 10.7.3: "The fields StatSN, Status, and Residual Count * only have meaningful content if the S bit is set to 1." */ if (bhssr->bhssr_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || (bhssr->bhssr_flags & BHSDI_FLAGS_S)) bhssr->bhssr_statsn = htonl(cs->cs_statsn); cmdsn = cs->cs_cmdsn; bhssr->bhssr_expcmdsn = htonl(cmdsn); bhssr->bhssr_maxcmdsn = htonl(cmdsn - 1 + imax(0, maxtags - cs->cs_outstanding_ctl_pdus)); if (advance_statsn) cs->cs_statsn++; return (0); } static void cfiscsi_pdu_queue(struct icl_pdu *response) { struct cfiscsi_session *cs; cs = PDU_SESSION(response); CFISCSI_SESSION_LOCK(cs); cfiscsi_pdu_prepare(response); icl_pdu_queue(response); CFISCSI_SESSION_UNLOCK(cs); } + static void +cfiscsi_pdu_queue_cb(struct icl_pdu *response, icl_pdu_cb cb) +{ + struct cfiscsi_session *cs = PDU_SESSION(response); + + CFISCSI_SESSION_LOCK(cs); + cfiscsi_pdu_prepare(response); + icl_pdu_queue_cb(response, cb); + CFISCSI_SESSION_UNLOCK(cs); +} + static void cfiscsi_pdu_handle_nop_out(struct icl_pdu *request) { struct cfiscsi_session *cs; struct iscsi_bhs_nop_out *bhsno; struct iscsi_bhs_nop_in *bhsni; struct icl_pdu *response; void *data = NULL; size_t datasize; int error; cs = PDU_SESSION(request); bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; if (bhsno->bhsno_initiator_task_tag == 0xffffffff) { /* * Nothing to do, iscsi_pdu_update_statsn() already * zeroed the timeout. */ icl_pdu_free(request); return; } datasize = icl_pdu_data_segment_length(request); if (datasize > 0) { data = malloc(datasize, M_CFISCSI, M_NOWAIT | M_ZERO); if (data == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } icl_pdu_get_data(request, 0, data, datasize); } response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "droppping connection"); free(data, M_CFISCSI); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; bhsni->bhsni_flags = 0x80; bhsni->bhsni_initiator_task_tag = bhsno->bhsno_initiator_task_tag; bhsni->bhsni_target_transfer_tag = 0xffffffff; if (datasize > 0) { error = icl_pdu_append_data(response, data, datasize, M_NOWAIT); if (error != 0) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); free(data, M_CFISCSI); icl_pdu_free(request); icl_pdu_free(response); cfiscsi_session_terminate(cs); return; } free(data, M_CFISCSI); } icl_pdu_free(request); cfiscsi_pdu_queue(response); } static void cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request) { struct iscsi_bhs_scsi_command *bhssc; struct cfiscsi_session *cs; union ctl_io *io; int error; cs = PDU_SESSION(request); bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", // bhssc->bhssc_initiator_task_tag); if (request->ip_data_len > 0 && cs->cs_immediate_data == false) { CFISCSI_SESSION_WARN(cs, "unsolicited data with " "ImmediateData=No; dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = request; io->io_hdr.io_type = CTL_IO_SCSI; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = ctl_decode_lun(be64toh(bhssc->bhssc_lun)); io->scsiio.tag_num = bhssc->bhssc_initiator_task_tag; switch ((bhssc->bhssc_flags & BHSSC_FLAGS_ATTR)) { case BHSSC_FLAGS_ATTR_UNTAGGED: io->scsiio.tag_type = CTL_TAG_UNTAGGED; break; case BHSSC_FLAGS_ATTR_SIMPLE: io->scsiio.tag_type = CTL_TAG_SIMPLE; break; case BHSSC_FLAGS_ATTR_ORDERED: io->scsiio.tag_type = CTL_TAG_ORDERED; break; case BHSSC_FLAGS_ATTR_HOQ: io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; break; case BHSSC_FLAGS_ATTR_ACA: io->scsiio.tag_type = CTL_TAG_ACA; break; default: io->scsiio.tag_type = CTL_TAG_UNTAGGED; CFISCSI_SESSION_WARN(cs, "unhandled tag type %d", bhssc->bhssc_flags & BHSSC_FLAGS_ATTR); break; } io->scsiio.cdb_len = sizeof(bhssc->bhssc_cdb); /* Which is 16. */ memcpy(io->scsiio.cdb, bhssc->bhssc_cdb, sizeof(bhssc->bhssc_cdb)); refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_queue(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_queue() failed; error %d; " "dropping connection", error); ctl_free_io(io); refcount_release(&cs->cs_outstanding_ctl_pdus); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static void cfiscsi_pdu_handle_task_request(struct icl_pdu *request) { struct iscsi_bhs_task_management_request *bhstmr; struct iscsi_bhs_task_management_response *bhstmr2; struct icl_pdu *response; struct cfiscsi_session *cs; union ctl_io *io; int error; cs = PDU_SESSION(request); bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = request; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = ctl_decode_lun(be64toh(bhstmr->bhstmr_lun)); io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ switch (bhstmr->bhstmr_function & ~0x80) { case BHSTMR_FUNCTION_ABORT_TASK: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_ABORT_TASK"); #endif io->taskio.task_action = CTL_TASK_ABORT_TASK; io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag; break; case BHSTMR_FUNCTION_ABORT_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_ABORT_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_ABORT_TASK_SET; break; case BHSTMR_FUNCTION_CLEAR_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_CLEAR_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_CLEAR_TASK_SET; break; case BHSTMR_FUNCTION_LOGICAL_UNIT_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_LOGICAL_UNIT_RESET"); #endif io->taskio.task_action = CTL_TASK_LUN_RESET; break; case BHSTMR_FUNCTION_TARGET_WARM_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_WARM_RESET"); #endif io->taskio.task_action = CTL_TASK_TARGET_RESET; break; case BHSTMR_FUNCTION_TARGET_COLD_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_COLD_RESET"); #endif io->taskio.task_action = CTL_TASK_TARGET_RESET; break; case BHSTMR_FUNCTION_QUERY_TASK: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK"); #endif io->taskio.task_action = CTL_TASK_QUERY_TASK; io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag; break; case BHSTMR_FUNCTION_QUERY_TASK_SET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_TASK_SET"); #endif io->taskio.task_action = CTL_TASK_QUERY_TASK_SET; break; case BHSTMR_FUNCTION_I_T_NEXUS_RESET: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_I_T_NEXUS_RESET"); #endif io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; break; case BHSTMR_FUNCTION_QUERY_ASYNC_EVENT: #if 0 CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_QUERY_ASYNC_EVENT"); #endif io->taskio.task_action = CTL_TASK_QUERY_ASYNC_EVENT; break; default: CFISCSI_SESSION_DEBUG(cs, "unsupported function 0x%x", bhstmr->bhstmr_function & ~0x80); ctl_free_io(io); response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; " "dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhstmr2 = (struct iscsi_bhs_task_management_response *) response->ip_bhs; bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; bhstmr2->bhstmr_flags = 0x80; bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); return; } refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_queue(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_queue() failed; error %d; " "dropping connection", error); ctl_free_io(io); refcount_release(&cs->cs_outstanding_ctl_pdus); icl_pdu_free(request); cfiscsi_session_terminate(cs); } } static bool cfiscsi_handle_data_segment(struct icl_pdu *request, struct cfiscsi_data_wait *cdw) { struct iscsi_bhs_data_out *bhsdo; struct cfiscsi_session *cs; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; size_t copy_len, len, off, buffer_offset; int ctl_sg_count; union ctl_io *io; cs = PDU_SESSION(request); KASSERT((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT || (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bad opcode 0x%x", request->ip_bhs->bhs_opcode)); /* * We're only using fields common for Data-Out and SCSI Command PDUs. */ bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; io = cdw->cdw_ctl_io; KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, ("CTL_FLAG_DATA_IN")); #if 0 CFISCSI_SESSION_DEBUG(cs, "received %zd bytes out of %d", request->ip_data_len, io->scsiio.kern_total_len); #endif if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) buffer_offset = ntohl(bhsdo->bhsdo_buffer_offset); else buffer_offset = 0; len = icl_pdu_data_segment_length(request); /* * Make sure the offset, as sent by the initiator, matches the offset * we're supposed to be at in the scatter-gather list. */ if (buffer_offset > io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled || buffer_offset + len <= io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled) { CFISCSI_SESSION_WARN(cs, "received bad buffer offset %zd, " "expected %zd; dropping connection", buffer_offset, (size_t)io->scsiio.kern_rel_offset + (size_t)io->scsiio.ext_data_filled); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } /* * This is the offset within the PDU data segment, as opposed * to buffer_offset, which is the offset within the task (SCSI * command). */ off = io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled - buffer_offset; /* * Iterate over the scatter/gather segments, filling them with data * from the PDU data segment. Note that this can get called multiple * times for one SCSI command; the cdw structure holds state for the * scatter/gather list. */ for (;;) { KASSERT(cdw->cdw_sg_index < ctl_sg_count, ("cdw->cdw_sg_index >= ctl_sg_count")); if (cdw->cdw_sg_len == 0) { cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; } KASSERT(off <= len, ("len > off")); copy_len = len - off; if (copy_len > cdw->cdw_sg_len) copy_len = cdw->cdw_sg_len; icl_pdu_get_data(request, off, cdw->cdw_sg_addr, copy_len); cdw->cdw_sg_addr += copy_len; cdw->cdw_sg_len -= copy_len; off += copy_len; io->scsiio.ext_data_filled += copy_len; io->scsiio.kern_data_resid -= copy_len; if (cdw->cdw_sg_len == 0) { /* * End of current segment. */ if (cdw->cdw_sg_index == ctl_sg_count - 1) { /* * Last segment in scatter/gather list. */ break; } cdw->cdw_sg_index++; } if (off == len) { /* * End of PDU payload. */ break; } } if (len > off) { /* * In case of unsolicited data, it's possible that the buffer * provided by CTL is smaller than negotiated FirstBurstLength. * Just ignore the superfluous data; will ask for them with R2T * on next call to cfiscsi_datamove(). * * This obviously can only happen with SCSI Command PDU. */ if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND) return (true); CFISCSI_SESSION_WARN(cs, "received too much data: got %zd bytes, " "expected %zd; dropping connection", icl_pdu_data_segment_length(request), off); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } if (io->scsiio.ext_data_filled == cdw->cdw_r2t_end && (bhsdo->bhsdo_flags & BHSDO_FLAGS_F) == 0) { CFISCSI_SESSION_WARN(cs, "got the final packet without " "the F flag; flags = 0x%x; dropping connection", bhsdo->bhsdo_flags); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } if (io->scsiio.ext_data_filled != cdw->cdw_r2t_end && (bhsdo->bhsdo_flags & BHSDO_FLAGS_F) != 0) { if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { CFISCSI_SESSION_WARN(cs, "got the final packet, but the " "transmitted size was %zd bytes instead of %d; " "dropping connection", (size_t)io->scsiio.ext_data_filled, cdw->cdw_r2t_end); ctl_set_data_phase_error(&io->scsiio); cfiscsi_session_terminate(cs); return (true); } else { /* * For SCSI Command PDU, this just means we need to * solicit more data by sending R2T. */ return (false); } } if (io->scsiio.ext_data_filled == cdw->cdw_r2t_end) { #if 0 CFISCSI_SESSION_DEBUG(cs, "no longer expecting Data-Out with target " "transfer tag 0x%x", cdw->cdw_target_transfer_tag); #endif return (true); } return (false); } static void cfiscsi_pdu_handle_data_out(struct icl_pdu *request) { struct iscsi_bhs_data_out *bhsdo; struct cfiscsi_session *cs; struct cfiscsi_data_wait *cdw = NULL; union ctl_io *io; bool done; cs = PDU_SESSION(request); bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) { #if 0 CFISCSI_SESSION_DEBUG(cs, "have ttt 0x%x, itt 0x%x; looking for " "ttt 0x%x, itt 0x%x", bhsdo->bhsdo_target_transfer_tag, bhsdo->bhsdo_initiator_task_tag, cdw->cdw_target_transfer_tag, cdw->cdw_initiator_task_tag)); #endif if (bhsdo->bhsdo_target_transfer_tag == cdw->cdw_target_transfer_tag) break; } CFISCSI_SESSION_UNLOCK(cs); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "data transfer tag 0x%x, initiator task tag " "0x%x, not found; dropping connection", bhsdo->bhsdo_target_transfer_tag, bhsdo->bhsdo_initiator_task_tag); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } if (cdw->cdw_datasn != ntohl(bhsdo->bhsdo_datasn)) { CFISCSI_SESSION_WARN(cs, "received Data-Out PDU with " "DataSN %u, while expected %u; dropping connection", ntohl(bhsdo->bhsdo_datasn), cdw->cdw_datasn); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } cdw->cdw_datasn++; io = cdw->cdw_ctl_io; KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, ("CTL_FLAG_DATA_IN")); done = cfiscsi_handle_data_segment(request, cdw); if (done) { CFISCSI_SESSION_LOCK(cs); TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); done = (io->scsiio.ext_data_filled != cdw->cdw_r2t_end || io->scsiio.ext_data_filled == io->scsiio.kern_data_len); cfiscsi_data_wait_free(cs, cdw); io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; if (done) io->scsiio.be_move_done(io); else cfiscsi_datamove_out(io); } icl_pdu_free(request); } static void cfiscsi_pdu_handle_logout_request(struct icl_pdu *request) { struct iscsi_bhs_logout_request *bhslr; struct iscsi_bhs_logout_response *bhslr2; struct icl_pdu *response; struct cfiscsi_session *cs; cs = PDU_SESSION(request); bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; switch (bhslr->bhslr_reason & 0x7f) { case BHSLR_REASON_CLOSE_SESSION: case BHSLR_REASON_CLOSE_CONNECTION: response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_DEBUG(cs, "failed to allocate memory"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; bhslr2->bhslr_flags = 0x80; bhslr2->bhslr_response = BHSLR_RESPONSE_CLOSED_SUCCESSFULLY; bhslr2->bhslr_initiator_task_tag = bhslr->bhslr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); cfiscsi_session_terminate(cs); break; case BHSLR_REASON_REMOVE_FOR_RECOVERY: response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory; dropping connection"); icl_pdu_free(request); cfiscsi_session_terminate(cs); return; } bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; bhslr2->bhslr_flags = 0x80; bhslr2->bhslr_response = BHSLR_RESPONSE_RECOVERY_NOT_SUPPORTED; bhslr2->bhslr_initiator_task_tag = bhslr->bhslr_initiator_task_tag; icl_pdu_free(request); cfiscsi_pdu_queue(response); break; default: CFISCSI_SESSION_WARN(cs, "invalid reason 0%x; dropping connection", bhslr->bhslr_reason); icl_pdu_free(request); cfiscsi_session_terminate(cs); break; } } static void cfiscsi_callout(void *context) { struct icl_pdu *cp; struct iscsi_bhs_nop_in *bhsni; struct cfiscsi_session *cs; cs = context; if (cs->cs_terminating) return; callout_schedule(&cs->cs_callout, 1 * hz); atomic_add_int(&cs->cs_timeout, 1); #ifdef ICL_KERNEL_PROXY if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { if (login_timeout > 0 && cs->cs_timeout > login_timeout) { CFISCSI_SESSION_WARN(cs, "login timed out after " "%d seconds; dropping connection", cs->cs_timeout); cfiscsi_session_terminate(cs); } return; } #endif if (ping_timeout <= 0) { /* * Pings are disabled. Don't send NOP-In in this case; * user might have disabled pings to work around problems * with certain initiators that can't properly handle * NOP-In, such as iPXE. Reset the timeout, to avoid * triggering reconnection, should the user decide to * reenable them. */ cs->cs_timeout = 0; return; } if (cs->cs_timeout >= ping_timeout) { CFISCSI_SESSION_WARN(cs, "no ping reply (NOP-Out) after %d seconds; " "dropping connection", ping_timeout); cfiscsi_session_terminate(cs); return; } /* * If the ping was reset less than one second ago - which means * that we've received some PDU during the last second - assume * the traffic flows correctly and don't bother sending a NOP-Out. * * (It's 2 - one for one second, and one for incrementing is_timeout * earlier in this routine.) */ if (cs->cs_timeout < 2) return; cp = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (cp == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate memory"); return; } bhsni = (struct iscsi_bhs_nop_in *)cp->ip_bhs; bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; bhsni->bhsni_flags = 0x80; bhsni->bhsni_initiator_task_tag = 0xffffffff; cfiscsi_pdu_queue(cp); } static struct cfiscsi_data_wait * cfiscsi_data_wait_new(struct cfiscsi_session *cs, union ctl_io *io, uint32_t initiator_task_tag, uint32_t *target_transfer_tagp) { struct cfiscsi_data_wait *cdw; int error; cdw = uma_zalloc(cfiscsi_data_wait_zone, M_NOWAIT | M_ZERO); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "failed to allocate %zd bytes", sizeof(*cdw)); return (NULL); } error = icl_conn_transfer_setup(cs->cs_conn, io, target_transfer_tagp, &cdw->cdw_icl_prv); if (error != 0) { CFISCSI_SESSION_WARN(cs, "icl_conn_transfer_setup() failed with error %d", error); uma_zfree(cfiscsi_data_wait_zone, cdw); return (NULL); } cdw->cdw_ctl_io = io; cdw->cdw_target_transfer_tag = *target_transfer_tagp; cdw->cdw_initiator_task_tag = initiator_task_tag; return (cdw); } static void cfiscsi_data_wait_free(struct cfiscsi_session *cs, struct cfiscsi_data_wait *cdw) { icl_conn_transfer_done(cs->cs_conn, cdw->cdw_icl_prv); uma_zfree(cfiscsi_data_wait_zone, cdw); } static void cfiscsi_session_terminate_tasks(struct cfiscsi_session *cs) { struct cfiscsi_data_wait *cdw; union ctl_io *io; int error, last, wait; if (cs->cs_target == NULL) return; /* No target yet, so nothing to do. */ io = ctl_alloc_io(cs->cs_target->ct_port.ctl_pool_ref); ctl_zero_io(io); PRIV_REQUEST(io) = cs; io->io_hdr.io_type = CTL_IO_TASK; io->io_hdr.nexus.initid = cs->cs_ctl_initid; io->io_hdr.nexus.targ_port = cs->cs_target->ct_port.targ_port; io->io_hdr.nexus.targ_lun = 0; io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; wait = cs->cs_outstanding_ctl_pdus; refcount_acquire(&cs->cs_outstanding_ctl_pdus); error = ctl_queue(io); if (error != CTL_RETVAL_COMPLETE) { CFISCSI_SESSION_WARN(cs, "ctl_queue() failed; error %d", error); refcount_release(&cs->cs_outstanding_ctl_pdus); ctl_free_io(io); } CFISCSI_SESSION_LOCK(cs); while ((cdw = TAILQ_FIRST(&cs->cs_waiting_for_data_out)) != NULL) { TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); /* * Set nonzero port status; this prevents backends from * assuming that the data transfer actually succeeded * and writing uninitialized data to disk. */ cdw->cdw_ctl_io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; cdw->cdw_ctl_io->scsiio.io_hdr.port_status = 42; cdw->cdw_ctl_io->scsiio.be_move_done(cdw->cdw_ctl_io); cfiscsi_data_wait_free(cs, cdw); CFISCSI_SESSION_LOCK(cs); } CFISCSI_SESSION_UNLOCK(cs); /* * Wait for CTL to terminate all the tasks. */ if (wait > 0) CFISCSI_SESSION_WARN(cs, "waiting for CTL to terminate %d tasks", wait); for (;;) { refcount_acquire(&cs->cs_outstanding_ctl_pdus); last = refcount_release(&cs->cs_outstanding_ctl_pdus); if (last != 0) break; tsleep(__DEVOLATILE(void *, &cs->cs_outstanding_ctl_pdus), 0, "cfiscsi_terminate", hz / 100); } if (wait > 0) CFISCSI_SESSION_WARN(cs, "tasks terminated"); } static void cfiscsi_maintenance_thread(void *arg) { struct cfiscsi_session *cs; cs = arg; for (;;) { CFISCSI_SESSION_LOCK(cs); if (cs->cs_terminating == false || cs->cs_handoff_in_progress) cv_wait(&cs->cs_maintenance_cv, &cs->cs_lock); CFISCSI_SESSION_UNLOCK(cs); if (cs->cs_terminating && cs->cs_handoff_in_progress == false) { /* * We used to wait up to 30 seconds to deliver queued * PDUs to the initiator. We also tried hard to deliver * SCSI Responses for the aborted PDUs. We don't do * that anymore. We might need to revisit that. */ callout_drain(&cs->cs_callout); icl_conn_close(cs->cs_conn); /* * At this point ICL receive thread is no longer * running; no new tasks can be queued. */ cfiscsi_session_terminate_tasks(cs); cfiscsi_session_delete(cs); kthread_exit(); return; } CFISCSI_SESSION_DEBUG(cs, "nothing to do"); } } static void cfiscsi_session_terminate(struct cfiscsi_session *cs) { cs->cs_terminating = true; cv_signal(&cs->cs_maintenance_cv); #ifdef ICL_KERNEL_PROXY cv_signal(&cs->cs_login_cv); #endif } static int cfiscsi_session_register_initiator(struct cfiscsi_session *cs) { struct cfiscsi_target *ct; char *name; int i; KASSERT(cs->cs_ctl_initid == -1, ("already registered")); ct = cs->cs_target; name = strdup(cs->cs_initiator_id, M_CTL); i = ctl_add_initiator(&ct->ct_port, -1, 0, name); if (i < 0) { CFISCSI_SESSION_WARN(cs, "ctl_add_initiator failed with error %d", i); cs->cs_ctl_initid = -1; return (1); } cs->cs_ctl_initid = i; #if 0 CFISCSI_SESSION_DEBUG(cs, "added initiator id %d", i); #endif return (0); } static void cfiscsi_session_unregister_initiator(struct cfiscsi_session *cs) { int error; if (cs->cs_ctl_initid == -1) return; error = ctl_remove_initiator(&cs->cs_target->ct_port, cs->cs_ctl_initid); if (error != 0) { CFISCSI_SESSION_WARN(cs, "ctl_remove_initiator failed with error %d", error); } cs->cs_ctl_initid = -1; } static struct cfiscsi_session * cfiscsi_session_new(struct cfiscsi_softc *softc, const char *offload) { struct cfiscsi_session *cs; int error; cs = malloc(sizeof(*cs), M_CFISCSI, M_NOWAIT | M_ZERO); if (cs == NULL) { CFISCSI_WARN("malloc failed"); return (NULL); } cs->cs_ctl_initid = -1; refcount_init(&cs->cs_outstanding_ctl_pdus, 0); TAILQ_INIT(&cs->cs_waiting_for_data_out); mtx_init(&cs->cs_lock, "cfiscsi_lock", NULL, MTX_DEF); cv_init(&cs->cs_maintenance_cv, "cfiscsi_mt"); #ifdef ICL_KERNEL_PROXY cv_init(&cs->cs_login_cv, "cfiscsi_login"); #endif /* * The purpose of this is to avoid racing with session shutdown. * Otherwise we could have the maintenance thread call icl_conn_close() * before we call icl_conn_handoff(). */ cs->cs_handoff_in_progress = true; cs->cs_conn = icl_new_conn(offload, false, "cfiscsi", &cs->cs_lock); if (cs->cs_conn == NULL) { free(cs, M_CFISCSI); return (NULL); } cs->cs_conn->ic_receive = cfiscsi_receive_callback; cs->cs_conn->ic_error = cfiscsi_error_callback; cs->cs_conn->ic_prv0 = cs; error = kthread_add(cfiscsi_maintenance_thread, cs, NULL, NULL, 0, 0, "cfiscsimt"); if (error != 0) { CFISCSI_SESSION_WARN(cs, "kthread_add(9) failed with error %d", error); free(cs, M_CFISCSI); return (NULL); } mtx_lock(&softc->lock); cs->cs_id = ++softc->last_session_id; TAILQ_INSERT_TAIL(&softc->sessions, cs, cs_next); mtx_unlock(&softc->lock); /* * Start pinging the initiator. */ callout_init(&cs->cs_callout, 1); callout_reset(&cs->cs_callout, 1 * hz, cfiscsi_callout, cs); return (cs); } static void cfiscsi_session_delete(struct cfiscsi_session *cs) { struct cfiscsi_softc *softc; softc = &cfiscsi_softc; KASSERT(cs->cs_outstanding_ctl_pdus == 0, ("destroying session with outstanding CTL pdus")); KASSERT(TAILQ_EMPTY(&cs->cs_waiting_for_data_out), ("destroying session with non-empty queue")); mtx_lock(&softc->lock); TAILQ_REMOVE(&softc->sessions, cs, cs_next); mtx_unlock(&softc->lock); cfiscsi_session_unregister_initiator(cs); if (cs->cs_target != NULL) cfiscsi_target_release(cs->cs_target); icl_conn_close(cs->cs_conn); icl_conn_free(cs->cs_conn); free(cs, M_CFISCSI); cv_signal(&softc->sessions_cv); } static int cfiscsi_init(void) { struct cfiscsi_softc *softc; softc = &cfiscsi_softc; bzero(softc, sizeof(*softc)); mtx_init(&softc->lock, "cfiscsi", NULL, MTX_DEF); cv_init(&softc->sessions_cv, "cfiscsi_sessions"); #ifdef ICL_KERNEL_PROXY cv_init(&softc->accept_cv, "cfiscsi_accept"); #endif TAILQ_INIT(&softc->sessions); TAILQ_INIT(&softc->targets); cfiscsi_data_wait_zone = uma_zcreate("cfiscsi_data_wait", sizeof(struct cfiscsi_data_wait), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); return (0); } static int cfiscsi_shutdown(void) { struct cfiscsi_softc *softc = &cfiscsi_softc; if (!TAILQ_EMPTY(&softc->sessions) || !TAILQ_EMPTY(&softc->targets)) return (EBUSY); uma_zdestroy(cfiscsi_data_wait_zone); #ifdef ICL_KERNEL_PROXY cv_destroy(&softc->accept_cv); #endif cv_destroy(&softc->sessions_cv); mtx_destroy(&softc->lock); return (0); } #ifdef ICL_KERNEL_PROXY static void cfiscsi_accept(struct socket *so, struct sockaddr *sa, int portal_id) { struct cfiscsi_session *cs; cs = cfiscsi_session_new(&cfiscsi_softc, NULL); if (cs == NULL) { CFISCSI_WARN("failed to create session"); return; } icl_conn_handoff_sock(cs->cs_conn, so); cs->cs_initiator_sa = sa; cs->cs_portal_id = portal_id; cs->cs_handoff_in_progress = false; cs->cs_waiting_for_ctld = true; cv_signal(&cfiscsi_softc.accept_cv); CFISCSI_SESSION_LOCK(cs); /* * Wake up the maintenance thread if we got scheduled for termination * somewhere between cfiscsi_session_new() and icl_conn_handoff_sock(). */ if (cs->cs_terminating) cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); } #endif static void cfiscsi_online(void *arg) { struct cfiscsi_softc *softc; struct cfiscsi_target *ct; int online; ct = (struct cfiscsi_target *)arg; softc = ct->ct_softc; mtx_lock(&softc->lock); if (ct->ct_online) { mtx_unlock(&softc->lock); return; } ct->ct_online = 1; online = softc->online++; mtx_unlock(&softc->lock); if (online > 0) return; #ifdef ICL_KERNEL_PROXY if (softc->listener != NULL) icl_listen_free(softc->listener); softc->listener = icl_listen_new(cfiscsi_accept); #endif } static void cfiscsi_offline(void *arg) { struct cfiscsi_softc *softc; struct cfiscsi_target *ct; struct cfiscsi_session *cs; int error, online; ct = (struct cfiscsi_target *)arg; softc = ct->ct_softc; mtx_lock(&softc->lock); if (!ct->ct_online) { mtx_unlock(&softc->lock); return; } ct->ct_online = 0; online = --softc->online; do { TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == ct) cfiscsi_session_terminate(cs); } TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == ct) break; } if (cs != NULL) { error = cv_wait_sig(&softc->sessions_cv, &softc->lock); if (error != 0) { CFISCSI_SESSION_DEBUG(cs, "cv_wait failed with error %d\n", error); break; } } } while (cs != NULL && ct->ct_online == 0); mtx_unlock(&softc->lock); if (online > 0) return; #ifdef ICL_KERNEL_PROXY icl_listen_free(softc->listener); softc->listener = NULL; #endif } static int cfiscsi_info(void *arg, struct sbuf *sb) { struct cfiscsi_target *ct = (struct cfiscsi_target *)arg; int retval; retval = sbuf_printf(sb, "\t%d\n", ct->ct_state); return (retval); } static void cfiscsi_ioctl_handoff(struct ctl_iscsi *ci) { struct cfiscsi_softc *softc; struct cfiscsi_session *cs, *cs2; struct cfiscsi_target *ct; struct ctl_iscsi_handoff_params *cihp; int error; cihp = (struct ctl_iscsi_handoff_params *)&(ci->data); softc = &cfiscsi_softc; CFISCSI_DEBUG("new connection from %s (%s) to %s", cihp->initiator_name, cihp->initiator_addr, cihp->target_name); ct = cfiscsi_target_find(softc, cihp->target_name, cihp->portal_group_tag); if (ct == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: target not found", __func__); return; } #ifdef ICL_KERNEL_PROXY if (cihp->socket > 0 && cihp->connection_id > 0) { snprintf(ci->error_str, sizeof(ci->error_str), "both socket and connection_id set"); ci->status = CTL_ISCSI_ERROR; cfiscsi_target_release(ct); return; } if (cihp->socket == 0) { mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cihp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; cfiscsi_target_release(ct); return; } mtx_unlock(&cfiscsi_softc.lock); } else { #endif cs = cfiscsi_session_new(softc, cihp->offload); if (cs == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: cfiscsi_session_new failed", __func__); cfiscsi_target_release(ct); return; } #ifdef ICL_KERNEL_PROXY } #endif /* * First PDU of Full Feature phase has the same CmdSN as the last * PDU from the Login Phase received from the initiator. Thus, * the -1 below. */ cs->cs_cmdsn = cihp->cmdsn; cs->cs_statsn = cihp->statsn; cs->cs_max_recv_data_segment_length = cihp->max_recv_data_segment_length; cs->cs_max_send_data_segment_length = cihp->max_send_data_segment_length; cs->cs_max_burst_length = cihp->max_burst_length; cs->cs_first_burst_length = cihp->first_burst_length; cs->cs_immediate_data = !!cihp->immediate_data; if (cihp->header_digest == CTL_ISCSI_DIGEST_CRC32C) cs->cs_conn->ic_header_crc32c = true; if (cihp->data_digest == CTL_ISCSI_DIGEST_CRC32C) cs->cs_conn->ic_data_crc32c = true; strlcpy(cs->cs_initiator_name, cihp->initiator_name, sizeof(cs->cs_initiator_name)); strlcpy(cs->cs_initiator_addr, cihp->initiator_addr, sizeof(cs->cs_initiator_addr)); strlcpy(cs->cs_initiator_alias, cihp->initiator_alias, sizeof(cs->cs_initiator_alias)); memcpy(cs->cs_initiator_isid, cihp->initiator_isid, sizeof(cs->cs_initiator_isid)); snprintf(cs->cs_initiator_id, sizeof(cs->cs_initiator_id), "%s,i,0x%02x%02x%02x%02x%02x%02x", cs->cs_initiator_name, cihp->initiator_isid[0], cihp->initiator_isid[1], cihp->initiator_isid[2], cihp->initiator_isid[3], cihp->initiator_isid[4], cihp->initiator_isid[5]); mtx_lock(&softc->lock); if (ct->ct_online == 0) { mtx_unlock(&softc->lock); CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); cfiscsi_target_release(ct); ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: port offline", __func__); return; } cs->cs_target = ct; mtx_unlock(&softc->lock); restart: if (!cs->cs_terminating) { mtx_lock(&softc->lock); TAILQ_FOREACH(cs2, &softc->sessions, cs_next) { if (cs2 != cs && cs2->cs_tasks_aborted == false && cs->cs_target == cs2->cs_target && strcmp(cs->cs_initiator_id, cs2->cs_initiator_id) == 0) { if (strcmp(cs->cs_initiator_addr, cs2->cs_initiator_addr) != 0) { CFISCSI_SESSION_WARN(cs2, "session reinstatement from " "different address %s", cs->cs_initiator_addr); } else { CFISCSI_SESSION_DEBUG(cs2, "session reinstatement"); } cfiscsi_session_terminate(cs2); mtx_unlock(&softc->lock); pause("cfiscsi_reinstate", 1); goto restart; } } mtx_unlock(&softc->lock); } /* * Register initiator with CTL. */ cfiscsi_session_register_initiator(cs); #ifdef ICL_KERNEL_PROXY if (cihp->socket > 0) { #endif error = icl_conn_handoff(cs->cs_conn, cihp->socket); if (error != 0) { CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: icl_conn_handoff failed with error %d", __func__, error); return; } #ifdef ICL_KERNEL_PROXY } #endif #ifdef ICL_KERNEL_PROXY cs->cs_login_phase = false; /* * First PDU of the Full Feature phase has likely already arrived. * We have to pick it up and execute properly. */ if (cs->cs_login_pdu != NULL) { CFISCSI_SESSION_DEBUG(cs, "picking up first PDU"); cfiscsi_pdu_handle(cs->cs_login_pdu); cs->cs_login_pdu = NULL; } #endif CFISCSI_SESSION_LOCK(cs); cs->cs_handoff_in_progress = false; /* * Wake up the maintenance thread if we got scheduled for termination. */ if (cs->cs_terminating) cfiscsi_session_terminate(cs); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_list(struct ctl_iscsi *ci) { struct ctl_iscsi_list_params *cilp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; struct sbuf *sb; int error; cilp = (struct ctl_iscsi_list_params *)&(ci->data); softc = &cfiscsi_softc; sb = sbuf_new(NULL, NULL, cilp->alloc_len, SBUF_FIXEDLEN); if (sb == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Unable to allocate %d bytes for iSCSI session list", cilp->alloc_len); return; } sbuf_printf(sb, "\n"); mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cs->cs_target == NULL) continue; error = sbuf_printf(sb, "" "%s" "%s" "%s" "%s" "%s" "%u" "%s" "%s" "%d" "%d" "%d" "%d" "%d" "%d" "%s" "\n", cs->cs_id, cs->cs_initiator_name, cs->cs_initiator_addr, cs->cs_initiator_alias, cs->cs_target->ct_name, cs->cs_target->ct_alias, cs->cs_target->ct_tag, cs->cs_conn->ic_header_crc32c ? "CRC32C" : "None", cs->cs_conn->ic_data_crc32c ? "CRC32C" : "None", cs->cs_max_recv_data_segment_length, cs->cs_max_send_data_segment_length, cs->cs_max_burst_length, cs->cs_first_burst_length, cs->cs_immediate_data, cs->cs_conn->ic_iser, cs->cs_conn->ic_offload); if (error != 0) break; } mtx_unlock(&softc->lock); error = sbuf_printf(sb, "\n"); if (error != 0) { sbuf_delete(sb); ci->status = CTL_ISCSI_LIST_NEED_MORE_SPACE; snprintf(ci->error_str, sizeof(ci->error_str), "Out of space, %d bytes is too small", cilp->alloc_len); return; } sbuf_finish(sb); error = copyout(sbuf_data(sb), cilp->conn_xml, sbuf_len(sb) + 1); if (error != 0) { sbuf_delete(sb); snprintf(ci->error_str, sizeof(ci->error_str), "copyout failed with error %d", error); ci->status = CTL_ISCSI_ERROR; return; } cilp->fill_len = sbuf_len(sb) + 1; ci->status = CTL_ISCSI_OK; sbuf_delete(sb); } static void cfiscsi_ioctl_logout(struct ctl_iscsi *ci) { struct icl_pdu *response; struct iscsi_bhs_asynchronous_message *bhsam; struct ctl_iscsi_logout_params *cilp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; int found = 0; cilp = (struct ctl_iscsi_logout_params *)&(ci->data); softc = &cfiscsi_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (cilp->all == 0 && cs->cs_id != cilp->connection_id && strcmp(cs->cs_initiator_name, cilp->initiator_name) != 0 && strcmp(cs->cs_initiator_addr, cilp->initiator_addr) != 0) continue; response = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (response == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Unable to allocate memory"); mtx_unlock(&softc->lock); return; } bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; bhsam->bhsam_flags = 0x80; bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_REQUESTS_LOGOUT; bhsam->bhsam_parameter3 = htons(10); cfiscsi_pdu_queue(response); found++; } mtx_unlock(&softc->lock); if (found == 0) { ci->status = CTL_ISCSI_SESSION_NOT_FOUND; snprintf(ci->error_str, sizeof(ci->error_str), "No matching connections found"); return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_terminate(struct ctl_iscsi *ci) { struct icl_pdu *response; struct iscsi_bhs_asynchronous_message *bhsam; struct ctl_iscsi_terminate_params *citp; struct cfiscsi_session *cs; struct cfiscsi_softc *softc; int found = 0; citp = (struct ctl_iscsi_terminate_params *)&(ci->data); softc = &cfiscsi_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(cs, &softc->sessions, cs_next) { if (citp->all == 0 && cs->cs_id != citp->connection_id && strcmp(cs->cs_initiator_name, citp->initiator_name) != 0 && strcmp(cs->cs_initiator_addr, citp->initiator_addr) != 0) continue; response = icl_pdu_new(cs->cs_conn, M_NOWAIT); if (response == NULL) { /* * Oh well. Just terminate the connection. */ } else { bhsam = (struct iscsi_bhs_asynchronous_message *) response->ip_bhs; bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; bhsam->bhsam_flags = 0x80; bhsam->bhsam_0xffffffff = 0xffffffff; bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_TERMINATES_SESSION; cfiscsi_pdu_queue(response); } cfiscsi_session_terminate(cs); found++; } mtx_unlock(&softc->lock); if (found == 0) { ci->status = CTL_ISCSI_SESSION_NOT_FOUND; snprintf(ci->error_str, sizeof(ci->error_str), "No matching connections found"); return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_limits(struct ctl_iscsi *ci) { struct ctl_iscsi_limits_params *cilp; struct icl_drv_limits idl; int error; cilp = (struct ctl_iscsi_limits_params *)&(ci->data); error = icl_limits(cilp->offload, false, &idl); if (error != 0) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: icl_limits failed with error %d", __func__, error); return; } cilp->max_recv_data_segment_length = idl.idl_max_recv_data_segment_length; cilp->max_send_data_segment_length = idl.idl_max_send_data_segment_length; cilp->max_burst_length = idl.idl_max_burst_length; cilp->first_burst_length = idl.idl_first_burst_length; ci->status = CTL_ISCSI_OK; } #ifdef ICL_KERNEL_PROXY static void cfiscsi_ioctl_listen(struct ctl_iscsi *ci) { struct ctl_iscsi_listen_params *cilp; struct sockaddr *sa; int error; cilp = (struct ctl_iscsi_listen_params *)&(ci->data); if (cfiscsi_softc.listener == NULL) { CFISCSI_DEBUG("no listener"); snprintf(ci->error_str, sizeof(ci->error_str), "no listener"); ci->status = CTL_ISCSI_ERROR; return; } error = getsockaddr(&sa, (void *)cilp->addr, cilp->addrlen); if (error != 0) { CFISCSI_DEBUG("getsockaddr, error %d", error); snprintf(ci->error_str, sizeof(ci->error_str), "getsockaddr failed"); ci->status = CTL_ISCSI_ERROR; return; } error = icl_listen_add(cfiscsi_softc.listener, cilp->iser, cilp->domain, cilp->socktype, cilp->protocol, sa, cilp->portal_id); if (error != 0) { free(sa, M_SONAME); CFISCSI_DEBUG("icl_listen_add, error %d", error); snprintf(ci->error_str, sizeof(ci->error_str), "icl_listen_add failed, error %d", error); ci->status = CTL_ISCSI_ERROR; return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_accept(struct ctl_iscsi *ci) { struct ctl_iscsi_accept_params *ciap; struct cfiscsi_session *cs; int error; ciap = (struct ctl_iscsi_accept_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); for (;;) { TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_waiting_for_ctld) break; } if (cs != NULL) break; error = cv_wait_sig(&cfiscsi_softc.accept_cv, &cfiscsi_softc.lock); if (error != 0) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "interrupted"); ci->status = CTL_ISCSI_ERROR; return; } } mtx_unlock(&cfiscsi_softc.lock); cs->cs_waiting_for_ctld = false; cs->cs_login_phase = true; ciap->connection_id = cs->cs_id; ciap->portal_id = cs->cs_portal_id; ciap->initiator_addrlen = cs->cs_initiator_sa->sa_len; error = copyout(cs->cs_initiator_sa, ciap->initiator_addr, cs->cs_initiator_sa->sa_len); if (error != 0) { snprintf(ci->error_str, sizeof(ci->error_str), "copyout failed with error %d", error); ci->status = CTL_ISCSI_ERROR; return; } ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_send(struct ctl_iscsi *ci) { struct ctl_iscsi_send_params *cisp; struct cfiscsi_session *cs; struct icl_pdu *ip; size_t datalen; void *data; int error; cisp = (struct ctl_iscsi_send_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cisp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; return; } mtx_unlock(&cfiscsi_softc.lock); #if 0 if (cs->cs_login_phase == false) return (EBUSY); #endif if (cs->cs_terminating) { snprintf(ci->error_str, sizeof(ci->error_str), "connection is terminating"); ci->status = CTL_ISCSI_ERROR; return; } datalen = cisp->data_segment_len; /* * XXX */ //if (datalen > CFISCSI_MAX_DATA_SEGMENT_LENGTH) { if (datalen > 65535) { snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); ci->status = CTL_ISCSI_ERROR; return; } if (datalen > 0) { data = malloc(datalen, M_CFISCSI, M_WAITOK); error = copyin(cisp->data_segment, data, datalen); if (error != 0) { free(data, M_CFISCSI); snprintf(ci->error_str, sizeof(ci->error_str), "copyin error %d", error); ci->status = CTL_ISCSI_ERROR; return; } } ip = icl_pdu_new(cs->cs_conn, M_WAITOK); memcpy(ip->ip_bhs, cisp->bhs, sizeof(*ip->ip_bhs)); if (datalen > 0) { icl_pdu_append_data(ip, data, datalen, M_WAITOK); free(data, M_CFISCSI); } CFISCSI_SESSION_LOCK(cs); icl_pdu_queue(ip); CFISCSI_SESSION_UNLOCK(cs); ci->status = CTL_ISCSI_OK; } static void cfiscsi_ioctl_receive(struct ctl_iscsi *ci) { struct ctl_iscsi_receive_params *cirp; struct cfiscsi_session *cs; struct icl_pdu *ip; void *data; int error; cirp = (struct ctl_iscsi_receive_params *)&(ci->data); mtx_lock(&cfiscsi_softc.lock); TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { if (cs->cs_id == cirp->connection_id) break; } if (cs == NULL) { mtx_unlock(&cfiscsi_softc.lock); snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); ci->status = CTL_ISCSI_ERROR; return; } mtx_unlock(&cfiscsi_softc.lock); #if 0 if (is->is_login_phase == false) return (EBUSY); #endif CFISCSI_SESSION_LOCK(cs); while (cs->cs_login_pdu == NULL && cs->cs_terminating == false) { error = cv_wait_sig(&cs->cs_login_cv, &cs->cs_lock); if (error != 0) { CFISCSI_SESSION_UNLOCK(cs); snprintf(ci->error_str, sizeof(ci->error_str), "interrupted by signal"); ci->status = CTL_ISCSI_ERROR; return; } } if (cs->cs_terminating) { CFISCSI_SESSION_UNLOCK(cs); snprintf(ci->error_str, sizeof(ci->error_str), "connection terminating"); ci->status = CTL_ISCSI_ERROR; return; } ip = cs->cs_login_pdu; cs->cs_login_pdu = NULL; CFISCSI_SESSION_UNLOCK(cs); if (ip->ip_data_len > cirp->data_segment_len) { icl_pdu_free(ip); snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); ci->status = CTL_ISCSI_ERROR; return; } copyout(ip->ip_bhs, cirp->bhs, sizeof(*ip->ip_bhs)); if (ip->ip_data_len > 0) { data = malloc(ip->ip_data_len, M_CFISCSI, M_WAITOK); icl_pdu_get_data(ip, 0, data, ip->ip_data_len); copyout(data, cirp->data_segment, ip->ip_data_len); free(data, M_CFISCSI); } icl_pdu_free(ip); ci->status = CTL_ISCSI_OK; } #endif /* !ICL_KERNEL_PROXY */ static void cfiscsi_ioctl_port_create(struct ctl_req *req) { struct cfiscsi_target *ct; struct ctl_port *port; const char *target, *alias, *val; struct scsi_vpd_id_descriptor *desc; int retval, len, idlen; uint16_t tag; target = dnvlist_get_string(req->args_nvl, "cfiscsi_target", NULL); alias = dnvlist_get_string(req->args_nvl, "cfiscsi_target_alias", NULL); val = dnvlist_get_string(req->args_nvl, "cfiscsi_portal_group_tag", NULL); if (target == NULL || val == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Missing required argument"); return; } tag = strtoul(val, NULL, 0); ct = cfiscsi_target_find_or_create(&cfiscsi_softc, target, alias, tag); if (ct == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "failed to create target \"%s\"", target); return; } if (ct->ct_state == CFISCSI_TARGET_STATE_ACTIVE) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "target \"%s\" for portal group tag %u already exists", target, tag); cfiscsi_target_release(ct); return; } port = &ct->ct_port; // WAT if (ct->ct_state == CFISCSI_TARGET_STATE_DYING) goto done; port->frontend = &cfiscsi_frontend; port->port_type = CTL_PORT_ISCSI; /* XXX KDM what should the real number be here? */ port->num_requested_ctl_io = 4096; port->port_name = "iscsi"; port->physical_port = (int)tag; port->virtual_port = ct->ct_target_id; port->port_online = cfiscsi_online; port->port_offline = cfiscsi_offline; port->port_info = cfiscsi_info; port->onoff_arg = ct; port->fe_datamove = cfiscsi_datamove; port->fe_done = cfiscsi_done; port->targ_port = -1; port->options = nvlist_clone(req->args_nvl); /* Generate Port ID. */ idlen = strlen(target) + strlen(",t,0x0001") + 1; idlen = roundup2(idlen, 4); len = sizeof(struct scsi_vpd_device_id) + idlen; port->port_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); port->port_devid->len = len; desc = (struct scsi_vpd_id_descriptor *)port->port_devid->data; desc->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen; snprintf(desc->identifier, idlen, "%s,t,0x%4.4x", target, tag); /* Generate Target ID. */ idlen = strlen(target) + 1; idlen = roundup2(idlen, 4); len = sizeof(struct scsi_vpd_device_id) + idlen; port->target_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); port->target_devid->len = len; desc = (struct scsi_vpd_id_descriptor *)port->target_devid->data; desc->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_TARGET | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen; strlcpy(desc->identifier, target, idlen); retval = ctl_port_register(port); if (retval != 0) { free(port->port_devid, M_CFISCSI); free(port->target_devid, M_CFISCSI); cfiscsi_target_release(ct); req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "ctl_port_register() failed with error %d", retval); return; } done: ct->ct_state = CFISCSI_TARGET_STATE_ACTIVE; req->status = CTL_LUN_OK; req->result_nvl = nvlist_create(0); nvlist_add_number(req->result_nvl, "port_id", port->targ_port); } static void cfiscsi_ioctl_port_remove(struct ctl_req *req) { struct cfiscsi_target *ct; const char *target, *val; uint16_t tag; target = dnvlist_get_string(req->args_nvl, "cfiscsi_target", NULL); val = dnvlist_get_string(req->args_nvl, "cfiscsi_portal_group_tag", NULL); if (target == NULL || val == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Missing required argument"); return; } tag = strtoul(val, NULL, 0); ct = cfiscsi_target_find(&cfiscsi_softc, target, tag); if (ct == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "can't find target \"%s\"", target); return; } ct->ct_state = CFISCSI_TARGET_STATE_DYING; ctl_port_offline(&ct->ct_port); cfiscsi_target_release(ct); cfiscsi_target_release(ct); req->status = CTL_LUN_OK; } static int cfiscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_iscsi *ci; struct ctl_req *req; if (cmd == CTL_PORT_REQ) { req = (struct ctl_req *)addr; switch (req->reqtype) { case CTL_REQ_CREATE: cfiscsi_ioctl_port_create(req); break; case CTL_REQ_REMOVE: cfiscsi_ioctl_port_remove(req); break; default: req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Unsupported request type %d", req->reqtype); } return (0); } if (cmd != CTL_ISCSI) return (ENOTTY); ci = (struct ctl_iscsi *)addr; switch (ci->type) { case CTL_ISCSI_HANDOFF: cfiscsi_ioctl_handoff(ci); break; case CTL_ISCSI_LIST: cfiscsi_ioctl_list(ci); break; case CTL_ISCSI_LOGOUT: cfiscsi_ioctl_logout(ci); break; case CTL_ISCSI_TERMINATE: cfiscsi_ioctl_terminate(ci); break; case CTL_ISCSI_LIMITS: cfiscsi_ioctl_limits(ci); break; #ifdef ICL_KERNEL_PROXY case CTL_ISCSI_LISTEN: cfiscsi_ioctl_listen(ci); break; case CTL_ISCSI_ACCEPT: cfiscsi_ioctl_accept(ci); break; case CTL_ISCSI_SEND: cfiscsi_ioctl_send(ci); break; case CTL_ISCSI_RECEIVE: cfiscsi_ioctl_receive(ci); break; #else case CTL_ISCSI_LISTEN: case CTL_ISCSI_ACCEPT: case CTL_ISCSI_SEND: case CTL_ISCSI_RECEIVE: ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: CTL compiled without ICL_KERNEL_PROXY", __func__); break; #endif /* !ICL_KERNEL_PROXY */ default: ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "%s: invalid iSCSI request type %d", __func__, ci->type); break; } return (0); } static void cfiscsi_target_hold(struct cfiscsi_target *ct) { refcount_acquire(&ct->ct_refcount); } static void cfiscsi_target_release(struct cfiscsi_target *ct) { struct cfiscsi_softc *softc; softc = ct->ct_softc; mtx_lock(&softc->lock); if (refcount_release(&ct->ct_refcount)) { TAILQ_REMOVE(&softc->targets, ct, ct_next); mtx_unlock(&softc->lock); if (ct->ct_state != CFISCSI_TARGET_STATE_INVALID) { ct->ct_state = CFISCSI_TARGET_STATE_INVALID; if (ctl_port_deregister(&ct->ct_port) != 0) printf("%s: ctl_port_deregister() failed\n", __func__); } free(ct, M_CFISCSI); return; } mtx_unlock(&softc->lock); } static struct cfiscsi_target * cfiscsi_target_find(struct cfiscsi_softc *softc, const char *name, uint16_t tag) { struct cfiscsi_target *ct; mtx_lock(&softc->lock); TAILQ_FOREACH(ct, &softc->targets, ct_next) { if (ct->ct_tag != tag || strcmp(name, ct->ct_name) != 0 || ct->ct_state != CFISCSI_TARGET_STATE_ACTIVE) continue; cfiscsi_target_hold(ct); mtx_unlock(&softc->lock); return (ct); } mtx_unlock(&softc->lock); return (NULL); } static struct cfiscsi_target * cfiscsi_target_find_or_create(struct cfiscsi_softc *softc, const char *name, const char *alias, uint16_t tag) { struct cfiscsi_target *ct, *newct; if (name[0] == '\0' || strlen(name) >= CTL_ISCSI_NAME_LEN) return (NULL); newct = malloc(sizeof(*newct), M_CFISCSI, M_WAITOK | M_ZERO); mtx_lock(&softc->lock); TAILQ_FOREACH(ct, &softc->targets, ct_next) { if (ct->ct_tag != tag || strcmp(name, ct->ct_name) != 0 || ct->ct_state == CFISCSI_TARGET_STATE_INVALID) continue; cfiscsi_target_hold(ct); mtx_unlock(&softc->lock); free(newct, M_CFISCSI); return (ct); } strlcpy(newct->ct_name, name, sizeof(newct->ct_name)); if (alias != NULL) strlcpy(newct->ct_alias, alias, sizeof(newct->ct_alias)); newct->ct_tag = tag; refcount_init(&newct->ct_refcount, 1); newct->ct_softc = softc; if (TAILQ_EMPTY(&softc->targets)) softc->last_target_id = 0; newct->ct_target_id = ++softc->last_target_id; TAILQ_INSERT_TAIL(&softc->targets, newct, ct_next); mtx_unlock(&softc->lock); return (newct); } static void +cfiscsi_pdu_done(struct icl_pdu *ip, int error) +{ + + if (error != 0) + ; // XXX: Do something on error? + ((ctl_ref)ip->ip_prv0)(ip->ip_prv1, -1); +} + +static void cfiscsi_datamove_in(union ctl_io *io) { struct cfiscsi_session *cs; struct icl_pdu *request, *response; const struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_data_in *bhsdi; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; size_t len, expected_len, sg_len, buffer_offset; const char *sg_addr; + icl_pdu_cb cb; int ctl_sg_count, error, i; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND")); if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } /* * This is the offset within the current SCSI command; for the first * call to cfiscsi_datamove() it will be 0, and for subsequent ones * it will be the sum of lengths of previous ones. */ buffer_offset = io->scsiio.kern_rel_offset; /* * This is the transfer length expected by the initiator. It can be * different from the amount of data from the SCSI point of view. */ expected_len = ntohl(bhssc->bhssc_expected_data_transfer_length); /* * If the transfer is outside of expected length -- we are done. */ if (buffer_offset >= expected_len) { #if 0 CFISCSI_SESSION_DEBUG(cs, "buffer_offset = %zd, " "already sent the expected len", buffer_offset); #endif io->scsiio.be_move_done(io); return; } + if (io->scsiio.kern_data_ref != NULL) + cb = cfiscsi_pdu_done; + else + cb = NULL; + i = 0; sg_addr = NULL; sg_len = 0; response = NULL; bhsdi = NULL; for (;;) { if (response == NULL) { response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); io->scsiio.be_move_done(io); cfiscsi_session_terminate(cs); return; } bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; bhsdi->bhsdi_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_IN; bhsdi->bhsdi_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhsdi->bhsdi_target_transfer_tag = 0xffffffff; bhsdi->bhsdi_datasn = htonl(PRIV_EXPDATASN(io)++); bhsdi->bhsdi_buffer_offset = htonl(buffer_offset); } KASSERT(i < ctl_sg_count, ("i >= ctl_sg_count")); if (sg_len == 0) { sg_addr = ctl_sglist[i].addr; sg_len = ctl_sglist[i].len; KASSERT(sg_len > 0, ("sg_len <= 0")); } len = sg_len; /* * Truncate to maximum data segment length. */ KASSERT(response->ip_data_len < cs->cs_max_send_data_segment_length, ("ip_data_len %zd >= max_send_data_segment_length %d", response->ip_data_len, cs->cs_max_send_data_segment_length)); if (response->ip_data_len + len > cs->cs_max_send_data_segment_length) { len = cs->cs_max_send_data_segment_length - response->ip_data_len; KASSERT(len <= sg_len, ("len %zd > sg_len %zd", len, sg_len)); } /* * Truncate to expected data transfer length. */ KASSERT(buffer_offset + response->ip_data_len < expected_len, ("buffer_offset %zd + ip_data_len %zd >= expected_len %zd", buffer_offset, response->ip_data_len, expected_len)); if (buffer_offset + response->ip_data_len + len > expected_len) { CFISCSI_SESSION_DEBUG(cs, "truncating from %zd " "to expected data transfer length %zd", buffer_offset + response->ip_data_len + len, expected_len); len = expected_len - (buffer_offset + response->ip_data_len); KASSERT(len <= sg_len, ("len %zd > sg_len %zd", len, sg_len)); } - error = icl_pdu_append_data(response, sg_addr, len, M_NOWAIT); + error = icl_pdu_append_data(response, sg_addr, len, + M_NOWAIT | (cb ? ICL_NOCOPY : 0)); if (error != 0) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); icl_pdu_free(response); ctl_set_busy(&io->scsiio); io->scsiio.be_move_done(io); cfiscsi_session_terminate(cs); return; } sg_addr += len; sg_len -= len; io->scsiio.kern_data_resid -= len; KASSERT(buffer_offset + response->ip_data_len <= expected_len, ("buffer_offset %zd + ip_data_len %zd > expected_len %zd", buffer_offset, response->ip_data_len, expected_len)); if (buffer_offset + response->ip_data_len == expected_len) { /* * Already have the amount of data the initiator wanted. */ break; } if (sg_len == 0) { /* * End of scatter-gather segment; * proceed to the next one... */ if (i == ctl_sg_count - 1) { /* * ... unless this was the last one. */ break; } i++; } if (response->ip_data_len == cs->cs_max_send_data_segment_length) { /* * Can't stuff more data into the current PDU; * queue it. Note that's not enough to check * for kern_data_resid == 0 instead; there * may be several Data-In PDUs for the final * call to cfiscsi_datamove(), and we want * to set the F flag only on the last of them. */ buffer_offset += response->ip_data_len; if (buffer_offset == io->scsiio.kern_total_len || buffer_offset == expected_len) { buffer_offset -= response->ip_data_len; break; } - cfiscsi_pdu_queue(response); + if (cb != NULL) { + response->ip_prv0 = io->scsiio.kern_data_ref; + response->ip_prv1 = io->scsiio.kern_data_arg; + io->scsiio.kern_data_ref(io->scsiio.kern_data_arg, 1); + } + cfiscsi_pdu_queue_cb(response, cb); response = NULL; bhsdi = NULL; } } if (response != NULL) { buffer_offset += response->ip_data_len; if (buffer_offset == io->scsiio.kern_total_len || buffer_offset == expected_len) { bhsdi->bhsdi_flags |= BHSDI_FLAGS_F; if (io->io_hdr.status == CTL_SUCCESS) { bhsdi->bhsdi_flags |= BHSDI_FLAGS_S; if (io->scsiio.kern_total_len < ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhsdi->bhsdi_flags |= BHSSR_FLAGS_RESIDUAL_UNDERFLOW; bhsdi->bhsdi_residual_count = htonl(ntohl(bhssc->bhssc_expected_data_transfer_length) - io->scsiio.kern_total_len); } else if (io->scsiio.kern_total_len > ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhsdi->bhsdi_flags |= BHSSR_FLAGS_RESIDUAL_OVERFLOW; bhsdi->bhsdi_residual_count = htonl(io->scsiio.kern_total_len - ntohl(bhssc->bhssc_expected_data_transfer_length)); } bhsdi->bhsdi_status = io->scsiio.scsi_status; io->io_hdr.flags |= CTL_FLAG_STATUS_SENT; } } KASSERT(response->ip_data_len > 0, ("sending empty Data-In")); - cfiscsi_pdu_queue(response); + if (cb != NULL) { + response->ip_prv0 = io->scsiio.kern_data_ref; + response->ip_prv1 = io->scsiio.kern_data_arg; + io->scsiio.kern_data_ref(io->scsiio.kern_data_arg, 1); + } + cfiscsi_pdu_queue_cb(response, cb); } io->scsiio.be_move_done(io); } static void cfiscsi_datamove_out(union ctl_io *io) { struct cfiscsi_session *cs; struct icl_pdu *request, *response; const struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_r2t *bhsr2t; struct cfiscsi_data_wait *cdw; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; uint32_t expected_len, datamove_len, r2t_off, r2t_len; uint32_t target_transfer_tag; bool done; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND")); /* * Complete write underflow. Not a single byte to read. Return. */ expected_len = ntohl(bhssc->bhssc_expected_data_transfer_length); if (io->scsiio.kern_rel_offset >= expected_len) { io->scsiio.be_move_done(io); return; } datamove_len = MIN(io->scsiio.kern_data_len, expected_len - io->scsiio.kern_rel_offset); target_transfer_tag = atomic_fetchadd_32(&cs->cs_target_transfer_tag, 1); cdw = cfiscsi_data_wait_new(cs, io, bhssc->bhssc_initiator_task_tag, &target_transfer_tag); if (cdw == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); io->scsiio.be_move_done(io); cfiscsi_session_terminate(cs); return; } #if 0 CFISCSI_SESSION_DEBUG(cs, "expecting Data-Out with initiator " "task tag 0x%x, target transfer tag 0x%x", bhssc->bhssc_initiator_task_tag, target_transfer_tag); #endif cdw->cdw_ctl_io = io; cdw->cdw_target_transfer_tag = target_transfer_tag; cdw->cdw_initiator_task_tag = bhssc->bhssc_initiator_task_tag; cdw->cdw_r2t_end = datamove_len; cdw->cdw_datasn = 0; /* Set initial data pointer for the CDW respecting ext_data_filled. */ if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = datamove_len; } cdw->cdw_sg_index = 0; cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; r2t_off = io->scsiio.ext_data_filled; while (r2t_off > 0) { if (r2t_off >= cdw->cdw_sg_len) { r2t_off -= cdw->cdw_sg_len; cdw->cdw_sg_index++; cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; continue; } cdw->cdw_sg_addr += r2t_off; cdw->cdw_sg_len -= r2t_off; r2t_off = 0; } if (cs->cs_immediate_data && io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled < icl_pdu_data_segment_length(request)) { done = cfiscsi_handle_data_segment(request, cdw); if (done) { cfiscsi_data_wait_free(cs, cdw); io->scsiio.be_move_done(io); return; } } r2t_off = io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled; r2t_len = MIN(datamove_len - io->scsiio.ext_data_filled, cs->cs_max_burst_length); cdw->cdw_r2t_end = io->scsiio.ext_data_filled + r2t_len; CFISCSI_SESSION_LOCK(cs); TAILQ_INSERT_TAIL(&cs->cs_waiting_for_data_out, cdw, cdw_next); CFISCSI_SESSION_UNLOCK(cs); /* * XXX: We should limit the number of outstanding R2T PDUs * per task to MaxOutstandingR2T. */ response = cfiscsi_pdu_new_response(request, M_NOWAIT); if (response == NULL) { CFISCSI_SESSION_WARN(cs, "failed to " "allocate memory; dropping connection"); ctl_set_busy(&io->scsiio); io->scsiio.be_move_done(io); cfiscsi_session_terminate(cs); return; } io->io_hdr.flags |= CTL_FLAG_DMA_INPROG; bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; bhsr2t->bhsr2t_opcode = ISCSI_BHS_OPCODE_R2T; bhsr2t->bhsr2t_flags = 0x80; bhsr2t->bhsr2t_lun = bhssc->bhssc_lun; bhsr2t->bhsr2t_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhsr2t->bhsr2t_target_transfer_tag = target_transfer_tag; /* * XXX: Here we assume that cfiscsi_datamove() won't ever * be running concurrently on several CPUs for a given * command. */ bhsr2t->bhsr2t_r2tsn = htonl(PRIV_R2TSN(io)++); /* * This is the offset within the current SCSI command; * i.e. for the first call of datamove(), it will be 0, * and for subsequent ones it will be the sum of lengths * of previous ones. * * The ext_data_filled is to account for unsolicited * (immediate) data that might have already arrived. */ bhsr2t->bhsr2t_buffer_offset = htonl(r2t_off); /* * This is the total length (sum of S/G lengths) this call * to cfiscsi_datamove() is supposed to handle, limited by * MaxBurstLength. */ bhsr2t->bhsr2t_desired_data_transfer_length = htonl(r2t_len); cfiscsi_pdu_queue(response); } static void cfiscsi_datamove(union ctl_io *io) { if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) cfiscsi_datamove_in(io); else { /* We hadn't received anything during this datamove yet. */ io->scsiio.ext_data_filled = 0; cfiscsi_datamove_out(io); } } static void cfiscsi_scsi_command_done(union ctl_io *io) { struct icl_pdu *request, *response; struct iscsi_bhs_scsi_command *bhssc; struct iscsi_bhs_scsi_response *bhssr; #ifdef DIAGNOSTIC struct cfiscsi_data_wait *cdw; #endif struct cfiscsi_session *cs; uint16_t sense_length; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_SCSI_COMMAND, ("replying to wrong opcode 0x%x", bhssc->bhssc_opcode)); //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", // bhssc->bhssc_initiator_task_tag); #ifdef DIAGNOSTIC CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) KASSERT(bhssc->bhssc_initiator_task_tag != cdw->cdw_initiator_task_tag, ("dangling cdw")); CFISCSI_SESSION_UNLOCK(cs); #endif /* * Do not return status for aborted commands. * There are exceptions, but none supported by CTL yet. */ if (((io->io_hdr.flags & CTL_FLAG_ABORT) && (io->io_hdr.flags & CTL_FLAG_ABORT_STATUS) == 0) || (io->io_hdr.flags & CTL_FLAG_STATUS_SENT)) { ctl_free_io(io); icl_pdu_free(request); return; } response = cfiscsi_pdu_new_response(request, M_WAITOK); bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; bhssr->bhssr_opcode = ISCSI_BHS_OPCODE_SCSI_RESPONSE; bhssr->bhssr_flags = 0x80; /* * XXX: We don't deal with bidirectional under/overflows; * does anything actually support those? */ if (io->scsiio.kern_total_len < ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_UNDERFLOW; bhssr->bhssr_residual_count = htonl(ntohl(bhssc->bhssc_expected_data_transfer_length) - io->scsiio.kern_total_len); //CFISCSI_SESSION_DEBUG(cs, "underflow; residual count %d", // ntohl(bhssr->bhssr_residual_count)); } else if (io->scsiio.kern_total_len > ntohl(bhssc->bhssc_expected_data_transfer_length)) { bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_OVERFLOW; bhssr->bhssr_residual_count = htonl(io->scsiio.kern_total_len - ntohl(bhssc->bhssc_expected_data_transfer_length)); //CFISCSI_SESSION_DEBUG(cs, "overflow; residual count %d", // ntohl(bhssr->bhssr_residual_count)); } bhssr->bhssr_response = BHSSR_RESPONSE_COMMAND_COMPLETED; bhssr->bhssr_status = io->scsiio.scsi_status; bhssr->bhssr_initiator_task_tag = bhssc->bhssc_initiator_task_tag; bhssr->bhssr_expdatasn = htonl(PRIV_EXPDATASN(io)); if (io->scsiio.sense_len > 0) { #if 0 CFISCSI_SESSION_DEBUG(cs, "returning %d bytes of sense data", io->scsiio.sense_len); #endif sense_length = htons(io->scsiio.sense_len); icl_pdu_append_data(response, &sense_length, sizeof(sense_length), M_WAITOK); icl_pdu_append_data(response, &io->scsiio.sense_data, io->scsiio.sense_len, M_WAITOK); } ctl_free_io(io); icl_pdu_free(request); cfiscsi_pdu_queue(response); } static void cfiscsi_task_management_done(union ctl_io *io) { struct icl_pdu *request, *response; struct iscsi_bhs_task_management_request *bhstmr; struct iscsi_bhs_task_management_response *bhstmr2; struct cfiscsi_data_wait *cdw, *tmpcdw; struct cfiscsi_session *cs, *tcs; struct cfiscsi_softc *softc; int cold_reset = 0; request = PRIV_REQUEST(io); cs = PDU_SESSION(request); bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; KASSERT((bhstmr->bhstmr_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == ISCSI_BHS_OPCODE_TASK_REQUEST, ("replying to wrong opcode 0x%x", bhstmr->bhstmr_opcode)); #if 0 CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x; referenced task tag 0x%x", bhstmr->bhstmr_initiator_task_tag, bhstmr->bhstmr_referenced_task_tag); #endif if ((bhstmr->bhstmr_function & ~0x80) == BHSTMR_FUNCTION_ABORT_TASK) { /* * Make sure we no longer wait for Data-Out for this command. */ CFISCSI_SESSION_LOCK(cs); TAILQ_FOREACH_SAFE(cdw, &cs->cs_waiting_for_data_out, cdw_next, tmpcdw) { if (bhstmr->bhstmr_referenced_task_tag != cdw->cdw_initiator_task_tag) continue; #if 0 CFISCSI_SESSION_DEBUG(cs, "removing csw for initiator task " "tag 0x%x", bhstmr->bhstmr_initiator_task_tag); #endif TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; cdw->cdw_ctl_io->scsiio.io_hdr.port_status = 43; cdw->cdw_ctl_io->scsiio.be_move_done(cdw->cdw_ctl_io); cfiscsi_data_wait_free(cs, cdw); } CFISCSI_SESSION_UNLOCK(cs); } if ((bhstmr->bhstmr_function & ~0x80) == BHSTMR_FUNCTION_TARGET_COLD_RESET && io->io_hdr.status == CTL_SUCCESS) cold_reset = 1; response = cfiscsi_pdu_new_response(request, M_WAITOK); bhstmr2 = (struct iscsi_bhs_task_management_response *) response->ip_bhs; bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; bhstmr2->bhstmr_flags = 0x80; switch (io->taskio.task_status) { case CTL_TASK_FUNCTION_COMPLETE: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_COMPLETE; break; case CTL_TASK_FUNCTION_SUCCEEDED: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_SUCCEEDED; break; case CTL_TASK_LUN_DOES_NOT_EXIST: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_LUN_DOES_NOT_EXIST; break; case CTL_TASK_FUNCTION_NOT_SUPPORTED: default: bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; break; } memcpy(bhstmr2->bhstmr_additional_reponse_information, io->taskio.task_resp, sizeof(io->taskio.task_resp)); bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag; ctl_free_io(io); icl_pdu_free(request); cfiscsi_pdu_queue(response); if (cold_reset) { softc = cs->cs_target->ct_softc; mtx_lock(&softc->lock); TAILQ_FOREACH(tcs, &softc->sessions, cs_next) { if (tcs->cs_target == cs->cs_target) cfiscsi_session_terminate(tcs); } mtx_unlock(&softc->lock); } } static void cfiscsi_done(union ctl_io *io) { struct icl_pdu *request; struct cfiscsi_session *cs; KASSERT(((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE), ("invalid CTL status %#x", io->io_hdr.status)); if (io->io_hdr.io_type == CTL_IO_TASK && io->taskio.task_action == CTL_TASK_I_T_NEXUS_RESET) { /* * Implicit task termination has just completed; nothing to do. */ cs = PRIV_REQUEST(io); cs->cs_tasks_aborted = true; refcount_release(&cs->cs_outstanding_ctl_pdus); wakeup(__DEVOLATILE(void *, &cs->cs_outstanding_ctl_pdus)); ctl_free_io(io); return; } request = PRIV_REQUEST(io); cs = PDU_SESSION(request); switch (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { case ISCSI_BHS_OPCODE_SCSI_COMMAND: cfiscsi_scsi_command_done(io); break; case ISCSI_BHS_OPCODE_TASK_REQUEST: cfiscsi_task_management_done(io); break; default: panic("cfiscsi_done called with wrong opcode 0x%x", request->ip_bhs->bhs_opcode); } refcount_release(&cs->cs_outstanding_ctl_pdus); } Index: head/sys/cam/ctl/ctl_io.h =================================================================== --- head/sys/cam/ctl/ctl_io.h (revision 361938) +++ head/sys/cam/ctl/ctl_io.h (revision 361939) @@ -1,598 +1,602 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2003 Silicon Graphics International Corp. * Copyright (c) 2014-2015 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_io.h#5 $ * $FreeBSD$ */ /* * CAM Target Layer data movement structures/interface. * * Author: Ken Merry */ #ifndef _CTL_IO_H_ #define _CTL_IO_H_ #define CTL_MAX_CDBLEN 32 /* * Uncomment this next line to enable printing out times for I/Os * that take longer than CTL_TIME_IO_SECS seconds to get to the datamove * and/or done stage. */ #define CTL_TIME_IO #ifdef CTL_TIME_IO #define CTL_TIME_IO_DEFAULT_SECS 90 #endif /* * Uncomment this next line to enable the CTL I/O delay feature. You * can delay I/O at two different points -- datamove and done. This is * useful for diagnosing abort conditions (for hosts that send an abort on a * timeout), and for determining how long a host's timeout is. */ //#define CTL_IO_DELAY typedef enum { CTL_STATUS_NONE, /* No status */ CTL_SUCCESS, /* Transaction completed successfully */ CTL_CMD_TIMEOUT, /* Command timed out, shouldn't happen here */ CTL_SEL_TIMEOUT, /* Selection timeout, shouldn't happen here */ CTL_ERROR, /* General CTL error XXX expand on this? */ CTL_SCSI_ERROR, /* SCSI error, look at status byte/sense data */ CTL_CMD_ABORTED, /* Command aborted, don't return status */ CTL_STATUS_MASK = 0xfff,/* Mask off any status flags */ CTL_AUTOSENSE = 0x1000 /* Autosense performed */ } ctl_io_status; /* * WARNING: Keep the data in/out/none flags where they are. They're used * in conjunction with ctl_cmd_flags. See comment above ctl_cmd_flags * definition in ctl_private.h. */ typedef enum { CTL_FLAG_NONE = 0x00000000, /* no flags */ CTL_FLAG_DATA_IN = 0x00000001, /* DATA IN */ CTL_FLAG_DATA_OUT = 0x00000002, /* DATA OUT */ CTL_FLAG_DATA_NONE = 0x00000003, /* no data */ CTL_FLAG_DATA_MASK = 0x00000003, CTL_FLAG_DO_AUTOSENSE = 0x00000020, /* grab sense info */ CTL_FLAG_USER_REQ = 0x00000040, /* request came from userland */ CTL_FLAG_ALLOCATED = 0x00000100, /* data space allocated */ CTL_FLAG_ABORT_STATUS = 0x00000400, /* return TASK ABORTED status */ CTL_FLAG_ABORT = 0x00000800, /* this I/O should be aborted */ CTL_FLAG_DMA_INPROG = 0x00001000, /* DMA in progress */ CTL_FLAG_DELAY_DONE = 0x00004000, /* delay injection done */ CTL_FLAG_INT_COPY = 0x00008000, /* internal copy, no done call*/ CTL_FLAG_SENT_2OTHER_SC = 0x00010000, CTL_FLAG_FROM_OTHER_SC = 0x00020000, CTL_FLAG_IS_WAS_ON_RTR = 0x00040000, /* Don't rerun cmd on failover*/ CTL_FLAG_BUS_ADDR = 0x00080000, /* ctl_sglist contains BUS addresses, not virtual ones*/ CTL_FLAG_IO_CONT = 0x00100000, /* Continue I/O instead of completing */ #if 0 CTL_FLAG_ALREADY_DONE = 0x00200000 /* I/O already completed */ #endif CTL_FLAG_NO_DATAMOVE = 0x00400000, CTL_FLAG_DMA_QUEUED = 0x00800000, /* DMA queued but not started*/ CTL_FLAG_STATUS_QUEUED = 0x01000000, /* Status queued but not sent*/ CTL_FLAG_FAILOVER = 0x04000000, /* Killed by a failover */ CTL_FLAG_IO_ACTIVE = 0x08000000, /* I/O active on this SC */ CTL_FLAG_STATUS_SENT = 0x10000000, /* Status sent by datamove */ CTL_FLAG_SERSEQ_DONE = 0x20000000 /* All storage I/O started */ } ctl_io_flags; struct ctl_lba_len { uint64_t lba; uint32_t len; }; struct ctl_lba_len_flags { uint64_t lba; uint32_t len; uint32_t flags; #define CTL_LLF_FUA 0x04000000 #define CTL_LLF_DPO 0x08000000 #define CTL_LLF_READ 0x10000000 #define CTL_LLF_WRITE 0x20000000 #define CTL_LLF_VERIFY 0x40000000 #define CTL_LLF_COMPARE 0x80000000 }; struct ctl_ptr_len_flags { uint8_t *ptr; uint32_t len; uint32_t flags; }; union ctl_priv { uint8_t bytes[sizeof(uint64_t) * 2]; uint64_t integer; uint64_t integers[2]; void *ptr; void *ptrs[2]; }; /* * Number of CTL private areas. */ #define CTL_NUM_PRIV 6 /* * Which private area are we using for a particular piece of data? */ #define CTL_PRIV_LUN 0 /* CTL LUN pointer goes here */ #define CTL_PRIV_LBA_LEN 1 /* Decoded LBA/len for read/write*/ #define CTL_PRIV_MODEPAGE 1 /* Modepage info for config write */ #define CTL_PRIV_BACKEND 2 /* Reserved for block, RAIDCore */ #define CTL_PRIV_BACKEND_LUN 3 /* Backend LUN pointer */ #define CTL_PRIV_FRONTEND 4 /* Frontend storage */ #define CTL_PRIV_FRONTEND2 5 /* Another frontend storage */ #define CTL_LUN(io) ((io)->io_hdr.ctl_private[CTL_PRIV_LUN].ptrs[0]) #define CTL_SOFTC(io) ((io)->io_hdr.ctl_private[CTL_PRIV_LUN].ptrs[1]) #define CTL_BACKEND_LUN(io) ((io)->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptrs[0]) #define CTL_PORT(io) (((struct ctl_softc *)CTL_SOFTC(io))-> \ ctl_ports[(io)->io_hdr.nexus.targ_port]) /* * These are used only on Originating SC in XFER mode, where requests don't * ever reach backends, so we can reuse backend's private storage. */ #define CTL_RSGL(io) ((io)->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptrs[0]) #define CTL_LSGL(io) ((io)->io_hdr.ctl_private[CTL_PRIV_BACKEND].ptrs[1]) #define CTL_RSGLT(io) ((struct ctl_sg_entry *)CTL_RSGL(io)) #define CTL_LSGLT(io) ((struct ctl_sg_entry *)CTL_LSGL(io)) #define CTL_INVALID_PORTNAME 0xFF #define CTL_UNMAPPED_IID 0xFF struct ctl_sg_entry { void *addr; size_t len; }; typedef enum { CTL_IO_NONE, CTL_IO_SCSI, CTL_IO_TASK, } ctl_io_type; struct ctl_nexus { uint32_t initid; /* Initiator ID */ uint32_t targ_port; /* Target port, filled in by PORT */ uint32_t targ_lun; /* Destination lun */ uint32_t targ_mapped_lun; /* Destination lun CTL-wide */ }; typedef enum { CTL_MSG_SERIALIZE, CTL_MSG_R2R, CTL_MSG_FINISH_IO, CTL_MSG_BAD_JUJU, CTL_MSG_MANAGE_TASKS, CTL_MSG_PERS_ACTION, CTL_MSG_DATAMOVE, CTL_MSG_DATAMOVE_DONE, CTL_MSG_UA, /* Set/clear UA on secondary. */ CTL_MSG_PORT_SYNC, /* Information about port. */ CTL_MSG_LUN_SYNC, /* Information about LUN. */ CTL_MSG_IID_SYNC, /* Information about initiator. */ CTL_MSG_LOGIN, /* Information about HA peer. */ CTL_MSG_MODE_SYNC, /* Mode page current content. */ CTL_MSG_FAILOVER /* Fake, never sent though the wire */ } ctl_msg_type; struct ctl_scsiio; struct ctl_io_hdr { uint32_t version; /* interface version XXX */ ctl_io_type io_type; /* task I/O, SCSI I/O, etc. */ ctl_msg_type msg_type; struct ctl_nexus nexus; /* Initiator, port, target, lun */ uint32_t iid_indx; /* the index into the iid mapping */ uint32_t flags; /* transaction flags */ uint32_t status; /* transaction status */ uint32_t port_status; /* trans status, set by PORT, 0 = good*/ uint32_t timeout; /* timeout in ms */ uint32_t retries; /* retry count */ #ifdef CTL_IO_DELAY struct callout delay_callout; #endif /* CTL_IO_DELAY */ #ifdef CTL_TIME_IO time_t start_time; /* I/O start time */ struct bintime start_bt; /* Timer start ticks */ struct bintime dma_start_bt; /* DMA start ticks */ struct bintime dma_bt; /* DMA total ticks */ #endif /* CTL_TIME_IO */ uint32_t num_dmas; /* Number of DMAs */ union ctl_io *remote_io; /* I/O counterpart on remote HA side */ union ctl_io *blocker; /* I/O blocking this one */ void *pool; /* I/O pool */ union ctl_priv ctl_private[CTL_NUM_PRIV];/* CTL private area */ TAILQ_HEAD(, ctl_io_hdr) blocked_queue; /* I/Os blocked by this one */ STAILQ_ENTRY(ctl_io_hdr) links; /* linked list pointer */ TAILQ_ENTRY(ctl_io_hdr) ooa_links; /* ooa_queue links */ TAILQ_ENTRY(ctl_io_hdr) blocked_links; /* blocked_queue links */ }; typedef enum { CTL_TAG_UNTAGGED, CTL_TAG_SIMPLE, CTL_TAG_ORDERED, CTL_TAG_HEAD_OF_QUEUE, CTL_TAG_ACA } ctl_tag_type; union ctl_io; +typedef void (*ctl_ref)(void *arg, int diff); + /* * SCSI passthrough I/O structure for the CAM Target Layer. Note * that some of these fields are here for completeness, but they aren't * used in the CTL implementation. e.g., timeout and retries won't be * used. * * Note: Make sure the io_hdr is *always* the first element in this * structure. */ struct ctl_scsiio { struct ctl_io_hdr io_hdr; /* common to all I/O types */ /* * The ext_* fields are generally intended for frontend use; CTL itself * doesn't modify or use them. */ uint32_t ext_sg_entries; /* 0 = no S/G list, > 0 = num entries */ uint8_t *ext_data_ptr; /* data buffer or S/G list */ uint32_t ext_data_len; /* Data transfer length */ uint32_t ext_data_filled; /* Amount of data filled so far */ /* * The number of scatter/gather entries in the list pointed to * by kern_data_ptr. 0 means there is no list, just a data pointer. */ uint32_t kern_sg_entries; uint32_t rem_sg_entries; /* Unused. */ /* * The data pointer or a pointer to the scatter/gather list. */ uint8_t *kern_data_ptr; /* * Length of the data buffer or scatter/gather list. It's also * the length of this particular piece of the data transfer, * ie. number of bytes expected to be transferred by the current * invocation of frontend's datamove() callback. It's always * less than or equal to kern_total_len. */ uint32_t kern_data_len; /* * Total length of data to be transferred during this particular * SCSI command, as decoded from SCSI CDB. */ uint32_t kern_total_len; /* * Amount of data left after the current data transfer. */ uint32_t kern_data_resid; /* * Byte offset of this transfer, equal to the amount of data * already transferred for this SCSI command during previous * datamove() invocations. */ uint32_t kern_rel_offset; struct scsi_sense_data sense_data; /* sense data */ uint8_t sense_len; /* Returned sense length */ uint8_t scsi_status; /* SCSI status byte */ uint8_t sense_residual; /* Unused. */ uint32_t residual; /* Unused */ uint32_t tag_num; /* tag number */ ctl_tag_type tag_type; /* simple, ordered, head of queue,etc.*/ uint8_t cdb_len; /* CDB length */ uint8_t cdb[CTL_MAX_CDBLEN]; /* CDB */ int (*be_move_done)(union ctl_io *io); /* called by fe */ int (*io_cont)(union ctl_io *io); /* to continue processing */ + ctl_ref kern_data_ref; /* Method to reference/release data */ + void *kern_data_arg; /* Opaque argument for kern_data_ref() */ }; typedef enum { CTL_TASK_ABORT_TASK, CTL_TASK_ABORT_TASK_SET, CTL_TASK_CLEAR_ACA, CTL_TASK_CLEAR_TASK_SET, CTL_TASK_I_T_NEXUS_RESET, CTL_TASK_LUN_RESET, CTL_TASK_TARGET_RESET, CTL_TASK_BUS_RESET, CTL_TASK_PORT_LOGIN, CTL_TASK_PORT_LOGOUT, CTL_TASK_QUERY_TASK, CTL_TASK_QUERY_TASK_SET, CTL_TASK_QUERY_ASYNC_EVENT } ctl_task_type; typedef enum { CTL_TASK_FUNCTION_COMPLETE, CTL_TASK_FUNCTION_SUCCEEDED, CTL_TASK_FUNCTION_REJECTED, CTL_TASK_LUN_DOES_NOT_EXIST, CTL_TASK_FUNCTION_NOT_SUPPORTED } ctl_task_status; /* * Task management I/O structure. Aborts, bus resets, etc., are sent using * this structure. * * Note: Make sure the io_hdr is *always* the first element in this * structure. */ struct ctl_taskio { struct ctl_io_hdr io_hdr; /* common to all I/O types */ ctl_task_type task_action; /* Target Reset, Abort, etc. */ uint32_t tag_num; /* tag number */ ctl_tag_type tag_type; /* simple, ordered, etc. */ uint8_t task_status; /* Complete, Succeeded, etc. */ uint8_t task_resp[3];/* Response information */ }; /* * HA link messages. */ #define CTL_HA_VERSION 3 /* * Used for CTL_MSG_LOGIN. */ struct ctl_ha_msg_login { ctl_msg_type msg_type; int version; int ha_mode; int ha_id; int max_luns; int max_ports; int max_init_per_port; }; typedef enum { CTL_PR_REG_KEY, CTL_PR_UNREG_KEY, CTL_PR_PREEMPT, CTL_PR_CLEAR, CTL_PR_RESERVE, CTL_PR_RELEASE } ctl_pr_action; /* * The PR info is specifically for sending Persistent Reserve actions * to the other SC which it must also act on. * * Note: Make sure the io_hdr is *always* the first element in this * structure. */ struct ctl_pr_info { ctl_pr_action action; uint8_t sa_res_key[8]; uint8_t res_type; uint32_t residx; }; struct ctl_ha_msg_hdr { ctl_msg_type msg_type; uint32_t status; /* transaction status */ union ctl_io *original_sc; union ctl_io *serializing_sc; struct ctl_nexus nexus; /* Initiator, port, target, lun */ }; #define CTL_HA_MAX_SG_ENTRIES 16 #define CTL_HA_DATAMOVE_SEGMENT 131072 /* * Used for CTL_MSG_PERS_ACTION. */ struct ctl_ha_msg_pr { struct ctl_ha_msg_hdr hdr; struct ctl_pr_info pr_info; }; /* * Used for CTL_MSG_UA. */ struct ctl_ha_msg_ua { struct ctl_ha_msg_hdr hdr; int ua_all; int ua_set; int ua_type; uint8_t ua_info[8]; }; /* * The S/G handling here is a little different than the standard ctl_scsiio * structure, because we can't pass data by reference in between controllers. * The S/G list in the ctl_scsiio struct is normally passed in the * kern_data_ptr field. So kern_sg_entries here will always be non-zero, * even if there is only one entry. * * Used for CTL_MSG_DATAMOVE. */ struct ctl_ha_msg_dt { struct ctl_ha_msg_hdr hdr; ctl_io_flags flags; /* Only I/O flags are used here */ uint32_t sg_sequence; /* S/G portion number */ uint8_t sg_last; /* last S/G batch = 1 */ uint32_t sent_sg_entries; /* previous S/G count */ uint32_t cur_sg_entries; /* current S/G entries */ uint32_t kern_sg_entries; /* total S/G entries */ uint32_t kern_data_len; /* Length of this S/G list */ uint32_t kern_total_len; /* Total length of this transaction */ uint32_t kern_data_resid; /* Length left to transfer after this*/ uint32_t kern_rel_offset; /* Byte Offset of this transfer */ struct ctl_sg_entry sg_list[CTL_HA_MAX_SG_ENTRIES]; }; /* * Used for CTL_MSG_SERIALIZE, CTL_MSG_FINISH_IO, CTL_MSG_BAD_JUJU, * and CTL_MSG_DATAMOVE_DONE. */ struct ctl_ha_msg_scsi { struct ctl_ha_msg_hdr hdr; uint32_t tag_num; /* tag number */ ctl_tag_type tag_type; /* simple, ordered, etc. */ uint8_t cdb[CTL_MAX_CDBLEN]; /* CDB */ uint8_t cdb_len; /* CDB length */ uint8_t scsi_status; /* SCSI status byte */ uint8_t sense_len; /* Returned sense length */ uint32_t port_status; /* trans status, set by FETD, 0 = good*/ uint32_t kern_data_resid; /* for DATAMOVE_DONE */ struct scsi_sense_data sense_data; /* sense data */ }; /* * Used for CTL_MSG_MANAGE_TASKS. */ struct ctl_ha_msg_task { struct ctl_ha_msg_hdr hdr; ctl_task_type task_action; /* Target Reset, Abort, etc. */ uint32_t tag_num; /* tag number */ ctl_tag_type tag_type; /* simple, ordered, etc. */ }; /* * Used for CTL_MSG_PORT_SYNC. */ struct ctl_ha_msg_port { struct ctl_ha_msg_hdr hdr; int port_type; int physical_port; int virtual_port; int status; int name_len; int lun_map_len; int port_devid_len; int target_devid_len; int init_devid_len; uint8_t data[]; }; /* * Used for CTL_MSG_LUN_SYNC. */ struct ctl_ha_msg_lun { struct ctl_ha_msg_hdr hdr; int flags; unsigned int pr_generation; uint32_t pr_res_idx; uint8_t pr_res_type; int lun_devid_len; int pr_key_count; uint8_t data[]; }; struct ctl_ha_msg_lun_pr_key { uint32_t pr_iid; uint64_t pr_key; }; /* * Used for CTL_MSG_IID_SYNC. */ struct ctl_ha_msg_iid { struct ctl_ha_msg_hdr hdr; int in_use; int name_len; uint64_t wwpn; uint8_t data[]; }; /* * Used for CTL_MSG_MODE_SYNC. */ struct ctl_ha_msg_mode { struct ctl_ha_msg_hdr hdr; uint8_t page_code; uint8_t subpage; uint16_t page_len; uint8_t data[]; }; union ctl_ha_msg { struct ctl_ha_msg_hdr hdr; struct ctl_ha_msg_task task; struct ctl_ha_msg_scsi scsi; struct ctl_ha_msg_dt dt; struct ctl_ha_msg_pr pr; struct ctl_ha_msg_ua ua; struct ctl_ha_msg_port port; struct ctl_ha_msg_lun lun; struct ctl_ha_msg_iid iid; struct ctl_ha_msg_login login; struct ctl_ha_msg_mode mode; }; struct ctl_prio { struct ctl_io_hdr io_hdr; struct ctl_ha_msg_pr pr_msg; }; union ctl_io { struct ctl_io_hdr io_hdr; /* common to all I/O types */ struct ctl_scsiio scsiio; /* Normal SCSI commands */ struct ctl_taskio taskio; /* SCSI task management/reset */ struct ctl_prio presio; /* update per. res info on other SC */ }; #ifdef _KERNEL union ctl_io *ctl_alloc_io(void *pool_ref); union ctl_io *ctl_alloc_io_nowait(void *pool_ref); void ctl_free_io(union ctl_io *io); void ctl_zero_io(union ctl_io *io); #endif /* _KERNEL */ #endif /* _CTL_IO_H_ */ /* * vim: ts=8 */ Index: head/sys/dev/iscsi/icl.h =================================================================== --- head/sys/dev/iscsi/icl.h (revision 361938) +++ head/sys/dev/iscsi/icl.h (revision 361939) @@ -1,173 +1,176 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ICL_H #define ICL_H /* * iSCSI Common Layer. It's used by both the initiator and target to send * and receive iSCSI PDUs. */ #include #include #include #include SYSCTL_DECL(_kern_icl); extern int icl_debug; #define ICL_DEBUG(X, ...) \ do { \ if (icl_debug > 1) \ printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ } while (0) #define ICL_WARN(X, ...) \ do { \ if (icl_debug > 0) { \ printf("WARNING: %s: " X "\n", \ __func__, ## __VA_ARGS__); \ } \ } while (0) struct icl_conn; struct ccb_scsiio; union ctl_io; struct icl_pdu { STAILQ_ENTRY(icl_pdu) ip_next; struct icl_conn *ip_conn; struct iscsi_bhs *ip_bhs; struct mbuf *ip_bhs_mbuf; size_t ip_ahs_len; struct mbuf *ip_ahs_mbuf; size_t ip_data_len; struct mbuf *ip_data_mbuf; /* * User (initiator or provider) private fields. */ - uint32_t ip_prv0; - uint32_t ip_prv1; - uint32_t ip_prv2; + void *ip_prv0; + void *ip_prv1; }; #define ICL_CONN_STATE_INVALID 0 #define ICL_CONN_STATE_BHS 1 #define ICL_CONN_STATE_AHS 2 #define ICL_CONN_STATE_HEADER_DIGEST 3 #define ICL_CONN_STATE_DATA 4 #define ICL_CONN_STATE_DATA_DIGEST 5 #define ICL_MAX_DATA_SEGMENT_LENGTH (128 * 1024) +#define ICL_NOCOPY (1 << 30) + struct icl_conn { KOBJ_FIELDS; struct mtx *ic_lock; struct socket *ic_socket; #ifdef DIAGNOSTIC volatile u_int ic_outstanding_pdus; #endif STAILQ_HEAD(, icl_pdu) ic_to_send; bool ic_check_send_space; size_t ic_receive_len; int ic_receive_state; struct icl_pdu *ic_receive_pdu; struct cv ic_send_cv; struct cv ic_receive_cv; bool ic_header_crc32c; bool ic_data_crc32c; bool ic_send_running; bool ic_receive_running; size_t ic_max_data_segment_length; size_t ic_maxtags; bool ic_disconnecting; bool ic_iser; bool ic_unmapped; const char *ic_name; const char *ic_offload; void (*ic_receive)(struct icl_pdu *); void (*ic_error)(struct icl_conn *); /* * User (initiator or provider) private fields. */ void *ic_prv0; }; struct icl_drv_limits { int idl_max_recv_data_segment_length; int idl_max_send_data_segment_length; int idl_max_burst_length; int idl_first_burst_length; int spare[4]; }; + +typedef void (*icl_pdu_cb)(struct icl_pdu *, int error); struct icl_conn *icl_new_conn(const char *offload, bool iser, const char *name, struct mtx *lock); int icl_limits(const char *offload, bool iser, struct icl_drv_limits *idl); int icl_register(const char *offload, bool iser, int priority, int (*limits)(struct icl_drv_limits *), struct icl_conn *(*new_conn)(const char *, struct mtx *)); int icl_unregister(const char *offload, bool rdma); #ifdef ICL_KERNEL_PROXY struct sockaddr; struct icl_listen; /* * Target part. */ struct icl_listen *icl_listen_new(void (*accept_cb)(struct socket *, struct sockaddr *, int)); void icl_listen_free(struct icl_listen *il); int icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol, struct sockaddr *sa, int portal_id); int icl_listen_remove(struct icl_listen *il, struct sockaddr *sa); /* * Those two are not a public API; only to be used between icl_soft.c * and icl_soft_proxy.c. */ int icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so); int icl_soft_proxy_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa); #endif /* ICL_KERNEL_PROXY */ #endif /* !ICL_H */ Index: head/sys/dev/iscsi/icl_conn_if.m =================================================================== --- head/sys/dev/iscsi/icl_conn_if.m (revision 361938) +++ head/sys/dev/iscsi/icl_conn_if.m (revision 361939) @@ -1,123 +1,139 @@ #- # SPDX-License-Identifier: BSD-2-Clause-FreeBSD # # Copyright (c) 2014 The FreeBSD Foundation # All rights reserved. # # This software was developed by Edward Tomasz Napierala under sponsorship # from the FreeBSD Foundation. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # #include #include INTERFACE icl_conn; +CODE { + static void null_pdu_queue_cb(struct icl_conn *ic, + struct icl_pdu *ip, icl_pdu_cb cb) + { + ICL_CONN_PDU_QUEUE(ic, ip); + if (cb) + cb(ip, 0); + } +}; + METHOD size_t pdu_data_segment_length { struct icl_conn *_ic; const struct icl_pdu *_ip; }; METHOD int pdu_append_data { struct icl_conn *_ic; struct icl_pdu *_ip; const void *_addr; size_t _len; int _flags; }; METHOD void pdu_get_data { struct icl_conn *_ic; struct icl_pdu *_ip; size_t _off; void *_addr; size_t _len; }; METHOD void pdu_queue { struct icl_conn *_ic; struct icl_pdu *_ip; }; + +METHOD void pdu_queue_cb { + struct icl_conn *_ic; + struct icl_pdu *_ip; + icl_pdu_cb cb; +} DEFAULT null_pdu_queue_cb; METHOD void pdu_free { struct icl_conn *_ic; struct icl_pdu *_ip; }; METHOD struct icl_pdu * new_pdu { struct icl_conn *_ic; int _flags; }; METHOD void free { struct icl_conn *_ic; }; METHOD int handoff { struct icl_conn *_ic; int _fd; }; METHOD void close { struct icl_conn *_ic; }; METHOD int task_setup { struct icl_conn *_ic; struct icl_pdu *_ip; struct ccb_scsiio *_csio; uint32_t *_task_tag; void **_prvp; }; METHOD void task_done { struct icl_conn *_ic; void *_prv; }; METHOD int transfer_setup { struct icl_conn *_ic; union ctl_io *_io; uint32_t *_transfer_tag; void **_prvp; }; METHOD void transfer_done { struct icl_conn *_ic; void *_prv; }; # # The function below is only used with ICL_KERNEL_PROXY. # METHOD int connect { struct icl_conn *_ic; int _domain; int _socktype; int _protocol; struct sockaddr *_from_sa; struct sockaddr *_to_sa; }; Index: head/sys/dev/iscsi/icl_soft.c =================================================================== --- head/sys/dev/iscsi/icl_soft.c (revision 361938) +++ head/sys/dev/iscsi/icl_soft.c (revision 361939) @@ -1,1567 +1,1637 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Software implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +struct icl_soft_pdu { + struct icl_pdu ip; + + /* soft specific stuff goes here. */ + u_int ref_cnt; + icl_pdu_cb cb; + int error; +}; + static int coalesce = 1; SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 128 * 1024; SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); -static uma_zone_t icl_pdu_zone; +static uma_zone_t icl_soft_pdu_zone; static volatile u_int icl_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) STAILQ_HEAD(icl_pdu_stailq, icl_pdu); static icl_conn_new_pdu_t icl_soft_conn_new_pdu; static icl_conn_pdu_free_t icl_soft_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_soft_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; +static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb; static icl_conn_handoff_t icl_soft_conn_handoff; static icl_conn_free_t icl_soft_conn_free; static icl_conn_close_t icl_soft_conn_close; static icl_conn_task_setup_t icl_soft_conn_task_setup; static icl_conn_task_done_t icl_soft_conn_task_done; static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup; static icl_conn_transfer_done_t icl_soft_conn_transfer_done; #ifdef ICL_KERNEL_PROXY static icl_conn_connect_t icl_soft_conn_connect; #endif static kobj_method_t icl_soft_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_soft_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), + KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb), KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), KOBJMETHOD(icl_conn_free, icl_soft_conn_free), KOBJMETHOD(icl_conn_close, icl_soft_conn_close), KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done), #ifdef ICL_KERNEL_PROXY KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect), #endif { 0, 0 } }; DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); static void icl_conn_fail(struct icl_conn *ic) { if (ic->ic_socket == NULL) return; /* * XXX */ ic->ic_socket->so_error = EDOOFUS; (ic->ic_error)(ic); } static struct mbuf * icl_conn_receive(struct icl_conn *ic, size_t len) { struct uio uio; struct socket *so; struct mbuf *m; int error, flags; so = ic->ic_socket; memset(&uio, 0, sizeof(uio)); uio.uio_resid = len; flags = MSG_DONTWAIT; error = soreceive(so, NULL, &uio, &m, NULL, &flags); if (error != 0) { ICL_DEBUG("soreceive error %d", error); return (NULL); } if (uio.uio_resid != 0) { m_freem(m); ICL_DEBUG("short read"); return (NULL); } return (m); } static int icl_conn_receive_buf(struct icl_conn *ic, void *buf, size_t len) { struct iovec iov[1]; struct uio uio; struct socket *so; int error, flags; so = ic->ic_socket; memset(&uio, 0, sizeof(uio)); iov[0].iov_base = buf; iov[0].iov_len = len; uio.uio_iov = iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = len; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; flags = MSG_DONTWAIT; error = soreceive(so, NULL, &uio, NULL, NULL, &flags); if (error != 0) { ICL_DEBUG("soreceive error %d", error); return (-1); } if (uio.uio_resid != 0) { ICL_DEBUG("short read"); return (-1); } return (0); } static void icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { + struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; + KASSERT(isp->ref_cnt == 0, ("freeing active PDU")); m_freem(ip->ip_bhs_mbuf); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); - uma_zfree(icl_pdu_zone, ip); + uma_zfree(icl_soft_pdu_zone, isp); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif } +static void +icl_soft_pdu_call_cb(struct icl_pdu *ip) +{ + struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; + + if (isp->cb != NULL) + isp->cb(ip, isp->error); +#ifdef DIAGNOSTIC + refcount_release(&ip->ip_conn->ic_outstanding_pdus); +#endif + uma_zfree(icl_soft_pdu_zone, isp); +} + +static void +icl_soft_pdu_done(struct icl_pdu *ip, int error) +{ + struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; + + if (error != 0) + isp->error = error; + + m_freem(ip->ip_bhs_mbuf); + ip->ip_bhs_mbuf = NULL; + m_freem(ip->ip_ahs_mbuf); + ip->ip_ahs_mbuf = NULL; + m_freem(ip->ip_data_mbuf); + ip->ip_data_mbuf = NULL; + + if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1) + icl_soft_pdu_call_cb(ip); +} + +static void +icl_soft_mbuf_done(struct mbuf *mb) +{ + struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1; + + icl_soft_pdu_call_cb(&isp->ip); +} + /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) { + struct icl_soft_pdu *isp; struct icl_pdu *ip; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif - ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); - if (ip == NULL) { - ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); + isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO); + if (isp == NULL) { + ICL_WARN("failed to allocate soft PDU"); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif return (NULL); } + ip = &isp->ip; ip->ip_conn = ic; CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN); ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA); if (ip->ip_bhs_mbuf == NULL) { ICL_WARN("failed to allocate BHS mbuf"); icl_soft_conn_pdu_free(ic, ip); return (NULL); } ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); return (ip); } static int icl_pdu_ahs_length(const struct icl_pdu *request) { return (request->ip_bhs->bhs_total_ahs_len * 4); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static void icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) { response->ip_bhs->bhs_data_segment_len[2] = len; response->ip_bhs->bhs_data_segment_len[1] = len >> 8; response->ip_bhs->bhs_data_segment_len[0] = len >> 16; } static size_t icl_pdu_padding(const struct icl_pdu *ip) { if ((ip->ip_data_len % 4) != 0) return (4 - (ip->ip_data_len % 4)); return (0); } static size_t icl_pdu_size(const struct icl_pdu *response) { size_t len; KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); len = sizeof(struct iscsi_bhs) + response->ip_data_len + icl_pdu_padding(response); if (response->ip_conn->ic_header_crc32c) len += ISCSI_HEADER_DIGEST_SIZE; if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) len += ISCSI_DATA_DIGEST_SIZE; return (len); } static int icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) { if (icl_conn_receive_buf(request->ip_conn, request->ip_bhs, sizeof(struct iscsi_bhs))) { ICL_DEBUG("failed to receive BHS"); return (-1); } *availablep -= sizeof(struct iscsi_bhs); return (0); } static int icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) { request->ip_ahs_len = icl_pdu_ahs_length(request); if (request->ip_ahs_len == 0) return (0); request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, request->ip_ahs_len); if (request->ip_ahs_mbuf == NULL) { ICL_DEBUG("failed to receive AHS"); return (-1); } *availablep -= request->ip_ahs_len; return (0); } static uint32_t icl_mbuf_to_crc32c(const struct mbuf *m0) { uint32_t digest = 0xffffffff; const struct mbuf *m; for (m = m0; m != NULL; m = m->m_next) digest = calculate_crc32c(digest, mtod(m, const void *), m->m_len); digest = digest ^ 0xffffffff; return (digest); } static int icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) { uint32_t received_digest, valid_digest; if (request->ip_conn->ic_header_crc32c == false) return (0); CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); if (icl_conn_receive_buf(request->ip_conn, &received_digest, ISCSI_HEADER_DIGEST_SIZE)) { ICL_DEBUG("failed to receive header digest"); return (-1); } *availablep -= ISCSI_HEADER_DIGEST_SIZE; /* Temporary attach AHS to BHS to calculate header digest. */ request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf; valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); request->ip_bhs_mbuf->m_next = NULL; if (received_digest != valid_digest) { ICL_WARN("header digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Return the number of bytes that should be waiting in the receive socket * before icl_pdu_receive_data_segment() gets called. */ static size_t icl_pdu_data_segment_receive_len(const struct icl_pdu *request) { size_t len; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); /* * Account for the parts of data segment already read from * the socket buffer. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; /* * Don't always wait for the full data segment to be delivered * to the socket; this might badly affect performance due to * TCP window scaling. */ if (len > partial_receive_len) { #if 0 ICL_DEBUG("need %zd bytes of data, limiting to %zd", len, partial_receive_len)); #endif len = partial_receive_len; return (len); } /* * Account for padding. Note that due to the way code is written, * the icl_pdu_receive_data_segment() must always receive padding * along with the last part of data segment, because it would be * impossible to tell whether we've already received the full data * segment including padding, or without it. */ if ((len % 4) != 0) len += 4 - (len % 4); #if 0 ICL_DEBUG("need %zd bytes of data", len)); #endif return (len); } static int icl_pdu_receive_data_segment(struct icl_pdu *request, size_t *availablep, bool *more_neededp) { struct icl_conn *ic; size_t len, padding = 0; struct mbuf *m; ic = request->ip_conn; *more_neededp = false; ic->ic_receive_len = 0; len = icl_pdu_data_segment_length(request); if (len == 0) return (0); if ((len % 4) != 0) padding = 4 - (len % 4); /* * Account for already received parts of data segment. */ KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); len -= request->ip_data_len; if (len + padding > *availablep) { /* * Not enough data in the socket buffer. Receive as much * as we can. Don't receive padding, since, obviously, it's * not the end of data segment yet. */ #if 0 ICL_DEBUG("limited from %zd to %zd", len + padding, *availablep - padding)); #endif len = *availablep - padding; *more_neededp = true; padding = 0; } /* * Must not try to receive padding without at least one byte * of actual data segment. */ if (len > 0) { m = icl_conn_receive(request->ip_conn, len + padding); if (m == NULL) { ICL_DEBUG("failed to receive data segment"); return (-1); } if (request->ip_data_mbuf == NULL) request->ip_data_mbuf = m; else m_cat(request->ip_data_mbuf, m); request->ip_data_len += len; *availablep -= len + padding; } else ICL_DEBUG("len 0"); if (*more_neededp) ic->ic_receive_len = icl_pdu_data_segment_receive_len(request); return (0); } static int icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) { uint32_t received_digest, valid_digest; if (request->ip_conn->ic_data_crc32c == false) return (0); if (request->ip_data_len == 0) return (0); CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); if (icl_conn_receive_buf(request->ip_conn, &received_digest, ISCSI_DATA_DIGEST_SIZE)) { ICL_DEBUG("failed to receive data digest"); return (-1); } *availablep -= ISCSI_DATA_DIGEST_SIZE; /* * Note that ip_data_mbuf also contains padding; since digest * calculation is supposed to include that, we iterate over * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. */ valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); if (received_digest != valid_digest) { ICL_WARN("data digest check failed; got 0x%x, " "should be 0x%x", received_digest, valid_digest); return (-1); } return (0); } /* * Somewhat contrary to the name, this attempts to receive only one * "part" of PDU at a time; call it repeatedly until it returns non-NULL. */ static struct icl_pdu * icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) { struct icl_pdu *request; struct socket *so; size_t len; int error; bool more_needed; so = ic->ic_socket; if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { KASSERT(ic->ic_receive_pdu == NULL, ("ic->ic_receive_pdu != NULL")); request = icl_soft_conn_new_pdu(ic, M_NOWAIT); if (request == NULL) { ICL_DEBUG("failed to allocate PDU; " "dropping connection"); icl_conn_fail(ic); return (NULL); } ic->ic_receive_pdu = request; } else { KASSERT(ic->ic_receive_pdu != NULL, ("ic->ic_receive_pdu == NULL")); request = ic->ic_receive_pdu; } if (*availablep < ic->ic_receive_len) { #if 0 ICL_DEBUG("not enough data; need %zd, " "have %zd", ic->ic_receive_len, *availablep); #endif return (NULL); } switch (ic->ic_receive_state) { case ICL_CONN_STATE_BHS: //ICL_DEBUG("receiving BHS"); error = icl_pdu_receive_bhs(request, availablep); if (error != 0) { ICL_DEBUG("failed to receive BHS; " "dropping connection"); break; } /* * We don't enforce any limit for AHS length; * its length is stored in 8 bit field. */ len = icl_pdu_data_segment_length(request); if (len > ic->ic_max_data_segment_length) { ICL_WARN("received data segment " "length %zd is larger than negotiated " "MaxDataSegmentLength %zd; " "dropping connection", len, ic->ic_max_data_segment_length); error = EINVAL; break; } ic->ic_receive_state = ICL_CONN_STATE_AHS; ic->ic_receive_len = icl_pdu_ahs_length(request); break; case ICL_CONN_STATE_AHS: //ICL_DEBUG("receiving AHS"); error = icl_pdu_receive_ahs(request, availablep); if (error != 0) { ICL_DEBUG("failed to receive AHS; " "dropping connection"); break; } ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; if (ic->ic_header_crc32c == false) ic->ic_receive_len = 0; else ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; break; case ICL_CONN_STATE_HEADER_DIGEST: //ICL_DEBUG("receiving header digest"); error = icl_pdu_check_header_digest(request, availablep); if (error != 0) { ICL_DEBUG("header digest failed; " "dropping connection"); break; } ic->ic_receive_state = ICL_CONN_STATE_DATA; ic->ic_receive_len = icl_pdu_data_segment_receive_len(request); break; case ICL_CONN_STATE_DATA: //ICL_DEBUG("receiving data segment"); error = icl_pdu_receive_data_segment(request, availablep, &more_needed); if (error != 0) { ICL_DEBUG("failed to receive data segment;" "dropping connection"); break; } if (more_needed) break; ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) ic->ic_receive_len = 0; else ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; break; case ICL_CONN_STATE_DATA_DIGEST: //ICL_DEBUG("receiving data digest"); error = icl_pdu_check_data_digest(request, availablep); if (error != 0) { ICL_DEBUG("data digest failed; " "dropping connection"); break; } /* * We've received complete PDU; reset the receive state machine * and return the PDU. */ ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_receive_pdu = NULL; return (request); default: panic("invalid ic_receive_state %d\n", ic->ic_receive_state); } if (error != 0) { /* * Don't free the PDU; it's pointed to by ic->ic_receive_pdu * and will get freed in icl_soft_conn_close(). */ icl_conn_fail(ic); } return (NULL); } static void icl_conn_receive_pdus(struct icl_conn *ic, size_t available) { struct icl_pdu *response; struct socket *so; so = ic->ic_socket; /* * This can never happen; we're careful to only mess with ic->ic_socket * pointer when the send/receive threads are not running. */ KASSERT(so != NULL, ("NULL socket")); for (;;) { if (ic->ic_disconnecting) return; if (so->so_error != 0) { ICL_DEBUG("connection error %d; " "dropping connection", so->so_error); icl_conn_fail(ic); return; } /* * Loop until we have a complete PDU or there is not enough * data in the socket buffer. */ if (available < ic->ic_receive_len) { #if 0 ICL_DEBUG("not enough data; have %zd, " "need %zd", available, ic->ic_receive_len); #endif return; } response = icl_conn_receive_pdu(ic, &available); if (response == NULL) continue; if (response->ip_ahs_len > 0) { ICL_WARN("received PDU with unsupported " "AHS; opcode 0x%x; dropping connection", response->ip_bhs->bhs_opcode); icl_soft_conn_pdu_free(ic, response); icl_conn_fail(ic); return; } (ic->ic_receive)(response); } } static void icl_receive_thread(void *arg) { struct icl_conn *ic; size_t available; struct socket *so; ic = arg; so = ic->ic_socket; for (;;) { if (ic->ic_disconnecting) { //ICL_DEBUG("terminating"); break; } /* * Set the low watermark, to be checked by * soreadable() in icl_soupcall_receive() * to avoid unnecessary wakeups until there * is enough data received to read the PDU. */ SOCKBUF_LOCK(&so->so_rcv); available = sbavail(&so->so_rcv); if (available < ic->ic_receive_len) { so->so_rcv.sb_lowat = ic->ic_receive_len; cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); } else so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; SOCKBUF_UNLOCK(&so->so_rcv); icl_conn_receive_pdus(ic, available); } ICL_CONN_LOCK(ic); ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_receive(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!soreadable(so)) return (SU_OK); ic = arg; cv_signal(&ic->ic_receive_cv); return (SU_OK); } static int icl_pdu_finalize(struct icl_pdu *request) { size_t padding, pdu_len; uint32_t digest, zero = 0; int ok; struct icl_conn *ic; ic = request->ip_conn; icl_pdu_set_data_segment_length(request, request->ip_data_len); pdu_len = icl_pdu_size(request); if (ic->ic_header_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); ok = m_append(request->ip_bhs_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append header digest"); return (1); } } if (request->ip_data_len != 0) { padding = icl_pdu_padding(request); if (padding > 0) { ok = m_append(request->ip_data_mbuf, padding, (void *)&zero); if (ok != 1) { ICL_WARN("failed to append padding"); return (1); } } if (ic->ic_data_crc32c) { digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); ok = m_append(request->ip_data_mbuf, sizeof(digest), (void *)&digest); if (ok != 1) { ICL_WARN("failed to append data digest"); return (1); } } m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); request->ip_data_mbuf = NULL; } request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; return (0); } static void icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) { struct icl_pdu *request, *request2; struct socket *so; long available, size, size2; int coalesced, error; ICL_CONN_LOCK_ASSERT_NOT(ic); so = ic->ic_socket; SOCKBUF_LOCK(&so->so_snd); /* * Check how much space do we have for transmit. We can't just * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, * as it always frees the mbuf chain passed to it, even in case * of error. */ available = sbspace(&so->so_snd); /* * Notify the socket upcall that we don't need wakeups * for the time being. */ so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; SOCKBUF_UNLOCK(&so->so_snd); while (!STAILQ_EMPTY(queue)) { request = STAILQ_FIRST(queue); size = icl_pdu_size(request); if (available < size) { /* * Set the low watermark, to be checked by * sowriteable() in icl_soupcall_send() * to avoid unnecessary wakeups until there * is enough space for the PDU to fit. */ SOCKBUF_LOCK(&so->so_snd); available = sbspace(&so->so_snd); if (available < size) { #if 1 ICL_DEBUG("no space to send; " "have %ld, need %ld", available, size); #endif so->so_snd.sb_lowat = max(size, so->so_snd.sb_hiwat / 8); SOCKBUF_UNLOCK(&so->so_snd); return; } SOCKBUF_UNLOCK(&so->so_snd); } STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); - icl_soft_conn_pdu_free(ic, request); + icl_soft_pdu_done(request, EIO); icl_conn_fail(ic); return; } if (coalesce) { coalesced = 1; for (;;) { request2 = STAILQ_FIRST(queue); if (request2 == NULL) break; size2 = icl_pdu_size(request2); if (available < size + size2) break; STAILQ_REMOVE_HEAD(queue, ip_next); error = icl_pdu_finalize(request2); if (error != 0) { ICL_DEBUG("failed to finalize PDU; " "dropping connection"); - icl_soft_conn_pdu_free(ic, request); - icl_soft_conn_pdu_free(ic, request2); + icl_soft_pdu_done(request, EIO); + icl_soft_pdu_done(request2, EIO); icl_conn_fail(ic); return; } m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); request2->ip_bhs_mbuf = NULL; request->ip_bhs_mbuf->m_pkthdr.len += size2; size += size2; STAILQ_REMOVE_AFTER(queue, request, ip_next); - icl_soft_conn_pdu_free(ic, request2); + icl_soft_pdu_done(request2, 0); coalesced++; } #if 0 if (coalesced > 1) { ICL_DEBUG("coalesced %d PDUs into %ld bytes", coalesced, size); } #endif } available -= size; error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, NULL, MSG_DONTWAIT, curthread); request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ if (error != 0) { ICL_DEBUG("failed to send PDU, error %d; " "dropping connection", error); - icl_soft_conn_pdu_free(ic, request); + icl_soft_pdu_done(request, error); icl_conn_fail(ic); return; } - icl_soft_conn_pdu_free(ic, request); + icl_soft_pdu_done(request, 0); } } static void icl_send_thread(void *arg) { struct icl_conn *ic; struct icl_pdu_stailq queue; ic = arg; STAILQ_INIT(&queue); ICL_CONN_LOCK(ic); for (;;) { for (;;) { /* * If the local queue is empty, populate it from * the main one. This way the icl_conn_send_pdus() * can go through all the queued PDUs without holding * any locks. */ if (STAILQ_EMPTY(&queue)) STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); ic->ic_check_send_space = false; ICL_CONN_UNLOCK(ic); icl_conn_send_pdus(ic, &queue); ICL_CONN_LOCK(ic); /* * The icl_soupcall_send() was called since the last * call to sbspace(); go around; */ if (ic->ic_check_send_space) continue; /* * Local queue is empty, but we still have PDUs * in the main one; go around. */ if (STAILQ_EMPTY(&queue) && !STAILQ_EMPTY(&ic->ic_to_send)) continue; /* * There might be some stuff in the local queue, * which didn't get sent due to not having enough send * space. Wait for socket upcall. */ break; } if (ic->ic_disconnecting) { //ICL_DEBUG("terminating"); break; } cv_wait(&ic->ic_send_cv, ic->ic_lock); } /* * We're exiting; move PDUs back to the main queue, so they can * get freed properly. At this point ordering doesn't matter. */ STAILQ_CONCAT(&ic->ic_to_send, &queue); ic->ic_send_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); kthread_exit(); } static int icl_soupcall_send(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!sowriteable(so)) return (SU_OK); ic = arg; ICL_CONN_LOCK(ic); ic->ic_check_send_space = true; ICL_CONN_UNLOCK(ic); cv_signal(&ic->ic_send_cv); return (SU_OK); } static int icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { + struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request; struct mbuf *mb, *newmb; size_t copylen, off = 0; KASSERT(len > 0, ("len == 0")); - newmb = m_getm2(NULL, len, flags, MT_DATA, 0); - if (newmb == NULL) { - ICL_WARN("failed to allocate mbuf for %zd bytes", len); - return (ENOMEM); - } + if (flags & ICL_NOCOPY) { + newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA); + if (newmb == NULL) { + ICL_WARN("failed to allocate mbuf"); + return (ENOMEM); + } - for (mb = newmb; mb != NULL; mb = mb->m_next) { - copylen = min(M_TRAILINGSPACE(mb), len - off); - memcpy(mtod(mb, char *), (const char *)addr + off, copylen); - mb->m_len = copylen; - off += copylen; + newmb->m_flags |= M_RDONLY; + m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt, + icl_soft_mbuf_done, isp, NULL); + newmb->m_len = len; + } else { + newmb = m_getm2(NULL, len, flags, MT_DATA, 0); + if (newmb == NULL) { + ICL_WARN("failed to allocate mbuf for %zd bytes", len); + return (ENOMEM); + } + + for (mb = newmb; mb != NULL; mb = mb->m_next) { + copylen = min(M_TRAILINGSPACE(mb), len - off); + memcpy(mtod(mb, char *), (const char *)addr + off, copylen); + mb->m_len = copylen; + off += copylen; + } + KASSERT(off == len, ("%s: off != len", __func__)); } - KASSERT(off == len, ("%s: off != len", __func__)); if (request->ip_data_mbuf == NULL) { request->ip_data_mbuf = newmb; request->ip_data_len = len; } else { m_cat(request->ip_data_mbuf, newmb); request->ip_data_len += len; } return (0); } void icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { m_copydata(ip->ip_data_mbuf, off, len, addr); } static void -icl_pdu_queue(struct icl_pdu *ip) +icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { - struct icl_conn *ic; - ic = ip->ip_conn; + icl_soft_conn_pdu_queue_cb(ic, ip, NULL); +} +static void +icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, + icl_pdu_cb cb) +{ + struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip; + ICL_CONN_LOCK_ASSERT(ic); + isp->ref_cnt++; + isp->cb = cb; if (ic->ic_disconnecting || ic->ic_socket == NULL) { ICL_DEBUG("icl_pdu_queue on closed connection"); - icl_soft_conn_pdu_free(ic, ip); + icl_soft_pdu_done(ip, ENOTCONN); return; } if (!STAILQ_EMPTY(&ic->ic_to_send)) { STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); /* * If the queue is not empty, someone else had already * signaled the send thread; no need to do that again, * just return. */ return; } STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); cv_signal(&ic->ic_send_cv); } -void -icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) -{ - - icl_pdu_queue(ip); -} - static struct icl_conn * icl_soft_new_conn(const char *name, struct mtx *lock) { struct icl_conn *ic; refcount_acquire(&icl_ncons); ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); STAILQ_INIT(&ic->ic_to_send); ic->ic_lock = lock; cv_init(&ic->ic_send_cv, "icl_tx"); cv_init(&ic->ic_receive_cv, "icl_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; ic->ic_name = name; ic->ic_offload = "None"; ic->ic_unmapped = false; return (ic); } void icl_soft_conn_free(struct icl_conn *ic) { #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); kobj_delete((struct kobj *)ic, M_ICL_SOFT); refcount_release(&icl_ncons); } static int icl_conn_start(struct icl_conn *ic) { size_t minspace; struct sockopt opt; int error, one = 1; ICL_CONN_LOCK(ic); /* * XXX: Ugly hack. */ if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (EINVAL); } ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_disconnecting = false; ICL_CONN_UNLOCK(ic); /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. * * What we should actually do here is to use autoscaling, but set * some minimal buffer size to "minspace". I don't know a way to do * that, though. */ minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (sendspace < minspace) { ICL_WARN("kern.icl.sendspace too low; must be at least %zd", minspace); sendspace = minspace; } if (recvspace < minspace) { ICL_WARN("kern.icl.recvspace too low; must be at least %zd", minspace); recvspace = minspace; } error = soreserve(ic->ic_socket, sendspace, recvspace); if (error != 0) { ICL_WARN("soreserve failed with error %d", error); icl_soft_conn_close(ic); return (error); } ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(ic->ic_socket, &opt); if (error != 0) { ICL_WARN("disabling TCP_NODELAY failed with error %d", error); icl_soft_conn_close(ic); return (error); } /* * Register socket upcall, to get notified about incoming PDUs * and free space to send outgoing ones. */ SOCKBUF_LOCK(&ic->ic_socket->so_snd); soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); /* * Start threads. */ ICL_CONN_LOCK(ic); ic->ic_send_running = ic->ic_receive_running = true; ICL_CONN_UNLOCK(ic); error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); ic->ic_send_running = ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", ic->ic_name); if (error != 0) { ICL_WARN("kthread_add(9) failed with error %d", error); ICL_CONN_LOCK(ic); ic->ic_receive_running = false; cv_signal(&ic->ic_send_cv); ICL_CONN_UNLOCK(ic); icl_soft_conn_close(ic); return (error); } return (0); } int icl_soft_conn_handoff(struct icl_conn *ic, int fd) { struct file *fp; struct socket *so; cap_rights_t rights; int error; ICL_CONN_LOCK_ASSERT_NOT(ic); #ifdef ICL_KERNEL_PROXY /* * We're transitioning to Full Feature phase, and we don't * really care. */ if (fd == 0) { ICL_CONN_LOCK(ic); if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); ICL_WARN("proxy handoff without connect"); return (EINVAL); } ICL_CONN_UNLOCK(ic); return (0); } #endif /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_socket = fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } void icl_soft_conn_close(struct icl_conn *ic) { struct icl_pdu *pdu; struct socket *so; ICL_CONN_LOCK(ic); /* * Wake up the threads, so they can properly terminate. */ ic->ic_disconnecting = true; while (ic->ic_receive_running || ic->ic_send_running) { cv_signal(&ic->ic_receive_cv); cv_signal(&ic->ic_send_cv); cv_wait(&ic->ic_send_cv, ic->ic_lock); } /* Some other thread could close the connection same time. */ so = ic->ic_socket; if (so == NULL) { ICL_CONN_UNLOCK(ic); return; } ic->ic_socket = NULL; /* * Deregister socket upcalls. */ ICL_CONN_UNLOCK(ic); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_upcall != NULL) soupcall_clear(so, SO_SND); SOCKBUF_UNLOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (so->so_rcv.sb_upcall != NULL) soupcall_clear(so, SO_RCV); SOCKBUF_UNLOCK(&so->so_rcv); soclose(so); ICL_CONN_LOCK(ic); if (ic->ic_receive_pdu != NULL) { //ICL_DEBUG("freeing partially received PDU"); icl_soft_conn_pdu_free(ic, ic->ic_receive_pdu); ic->ic_receive_pdu = NULL; } /* * Remove any outstanding PDUs from the send queue. */ while (!STAILQ_EMPTY(&ic->ic_to_send)) { pdu = STAILQ_FIRST(&ic->ic_to_send); STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); - icl_soft_conn_pdu_free(ic, pdu); + icl_soft_pdu_done(pdu, ENOTCONN); } KASSERT(STAILQ_EMPTY(&ic->ic_to_send), ("destroying session with non-empty send queue")); ICL_CONN_UNLOCK(ic); } int icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { return (0); } void icl_soft_conn_task_done(struct icl_conn *ic, void *prv) { } int icl_soft_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { return (0); } void icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv) { } static int icl_soft_limits(struct icl_drv_limits *idl) { idl->idl_max_recv_data_segment_length = 128 * 1024; idl->idl_max_send_data_segment_length = 128 * 1024; idl->idl_max_burst_length = 262144; idl->idl_first_burst_length = 65536; return (0); } #ifdef ICL_KERNEL_PROXY int icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) { return (icl_soft_proxy_connect(ic, domain, socktype, protocol, from_sa, to_sa)); } int icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so) { int error; ICL_CONN_LOCK_ASSERT_NOT(ic); if (so->so_type != SOCK_STREAM) return (EINVAL); ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); return (EBUSY); } ic->ic_socket = so; ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } #endif /* ICL_KERNEL_PROXY */ static int icl_soft_load(void) { int error; - icl_pdu_zone = uma_zcreate("icl_pdu", - sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, + icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu", + sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_ncons, 0); /* * The reason we call this "none" is that to the user, * it's known as "offload driver"; "offload driver: soft" * doesn't make much sense. */ error = icl_register("none", false, 0, icl_soft_limits, icl_soft_new_conn); KASSERT(error == 0, ("failed to register")); #if defined(ICL_KERNEL_PROXY) && 0 /* * Debugging aid for kernel proxy functionality. */ error = icl_register("proxytest", true, 0, icl_soft_limits, icl_soft_new_conn); KASSERT(error == 0, ("failed to register")); #endif return (error); } static int icl_soft_unload(void) { if (icl_ncons != 0) return (EBUSY); icl_unregister("none", false); #if defined(ICL_KERNEL_PROXY) && 0 icl_unregister("proxytest", true); #endif - uma_zdestroy(icl_pdu_zone); + uma_zdestroy(icl_soft_pdu_zone); return (0); } static int icl_soft_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_soft_load()); case MOD_UNLOAD: return (icl_soft_unload()); default: return (EINVAL); } } moduledata_t icl_soft_data = { "icl_soft", icl_soft_modevent, 0 }; DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_soft, icl, 1, 1, 1); MODULE_VERSION(icl_soft, 1); Index: head/sys/dev/iscsi/icl_wrappers.h =================================================================== --- head/sys/dev/iscsi/icl_wrappers.h (revision 361938) +++ head/sys/dev/iscsi/icl_wrappers.h (revision 361939) @@ -1,152 +1,159 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This file is used to provide the initiator and target with a prettier * interface. It must not be included by ICL modules, such as icl_soft.c. */ #ifndef ICL_WRAPPERS_H #define ICL_WRAPPERS_H #include #include #include static inline struct icl_pdu * icl_pdu_new(struct icl_conn *ic, int flags) { return (ICL_CONN_NEW_PDU(ic, flags)); } static inline size_t icl_pdu_data_segment_length(const struct icl_pdu *ip) { return (ICL_CONN_PDU_DATA_SEGMENT_LENGTH(ip->ip_conn, ip)); } static inline int icl_pdu_append_data(struct icl_pdu *ip, const void *addr, size_t len, int flags) { return (ICL_CONN_PDU_APPEND_DATA(ip->ip_conn, ip, addr, len, flags)); } static inline void icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) { ICL_CONN_PDU_GET_DATA(ip->ip_conn, ip, off, addr, len); } static inline void icl_pdu_queue(struct icl_pdu *ip) { ICL_CONN_PDU_QUEUE(ip->ip_conn, ip); } static inline void +icl_pdu_queue_cb(struct icl_pdu *ip, icl_pdu_cb cb) +{ + + ICL_CONN_PDU_QUEUE_CB(ip->ip_conn, ip, cb); +} + +static inline void icl_pdu_free(struct icl_pdu *ip) { ICL_CONN_PDU_FREE(ip->ip_conn, ip); } static inline void icl_conn_free(struct icl_conn *ic) { ICL_CONN_FREE(ic); } static inline int icl_conn_handoff(struct icl_conn *ic, int fd) { return (ICL_CONN_HANDOFF(ic, fd)); } static inline void icl_conn_close(struct icl_conn *ic) { ICL_CONN_CLOSE(ic); } static inline int icl_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { return (ICL_CONN_TASK_SETUP(ic, ip, csio, task_tagp, prvp)); } static inline void icl_conn_task_done(struct icl_conn *ic, void *prv) { ICL_CONN_TASK_DONE(ic, prv); } static inline int icl_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tagp, void **prvp) { return (ICL_CONN_TRANSFER_SETUP(ic, io, transfer_tagp, prvp)); } static inline void icl_conn_transfer_done(struct icl_conn *ic, void *prv) { ICL_CONN_TRANSFER_DONE(ic, prv); } /* * The function below is only used with ICL_KERNEL_PROXY. */ static inline int icl_conn_connect(struct icl_conn *ic, int domain, int socktype, int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) { return (ICL_CONN_CONNECT(ic, domain, socktype, protocol, from_sa, to_sa)); } #endif /* !ICL_WRAPPERS_H */