Index: head/sys/cam/cam_ccb.h =================================================================== --- head/sys/cam/cam_ccb.h (revision 322996) +++ head/sys/cam/cam_ccb.h (revision 322997) @@ -1,1538 +1,1539 @@ /*- * Data structures and definitions for CAM Control Blocks (CCBs). * * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _CAM_CAM_CCB_H #define _CAM_CAM_CCB_H 1 #include #include #include #include #ifndef _KERNEL #include #endif #include #include #include #include #include /* General allocation length definitions for CCB structures */ #define IOCDBLEN CAM_MAX_CDBLEN /* Space for CDB bytes/pointer */ #define VUHBALEN 14 /* Vendor Unique HBA length */ #define SIM_IDLEN 16 /* ASCII string len for SIM ID */ #define HBA_IDLEN 16 /* ASCII string len for HBA ID */ #define DEV_IDLEN 16 /* ASCII string len for device names */ #define CCB_PERIPH_PRIV_SIZE 2 /* size of peripheral private area */ #define CCB_SIM_PRIV_SIZE 2 /* size of sim private area */ /* Struct definitions for CAM control blocks */ /* Common CCB header */ /* CAM CCB flags */ typedef enum { CAM_CDB_POINTER = 0x00000001,/* The CDB field is a pointer */ CAM_QUEUE_ENABLE = 0x00000002,/* SIM queue actions are enabled */ CAM_CDB_LINKED = 0x00000004,/* CCB contains a linked CDB */ CAM_NEGOTIATE = 0x00000008,/* * Perform transport negotiation * with this command. */ CAM_DATA_ISPHYS = 0x00000010,/* Data type with physical addrs */ CAM_DIS_AUTOSENSE = 0x00000020,/* Disable autosense feature */ CAM_DIR_BOTH = 0x00000000,/* Data direction (00:IN/OUT) */ CAM_DIR_IN = 0x00000040,/* Data direction (01:DATA IN) */ CAM_DIR_OUT = 0x00000080,/* Data direction (10:DATA OUT) */ CAM_DIR_NONE = 0x000000C0,/* Data direction (11:no data) */ CAM_DIR_MASK = 0x000000C0,/* Data direction Mask */ CAM_DATA_VADDR = 0x00000000,/* Data type (000:Virtual) */ CAM_DATA_PADDR = 0x00000010,/* Data type (001:Physical) */ CAM_DATA_SG = 0x00040000,/* Data type (010:sglist) */ CAM_DATA_SG_PADDR = 0x00040010,/* Data type (011:sglist phys) */ CAM_DATA_BIO = 0x00200000,/* Data type (100:bio) */ CAM_DATA_MASK = 0x00240010,/* Data type mask */ CAM_SOFT_RST_OP = 0x00000100,/* Use Soft reset alternative */ CAM_ENG_SYNC = 0x00000200,/* Flush resid bytes on complete */ CAM_DEV_QFRZDIS = 0x00000400,/* Disable DEV Q freezing */ CAM_DEV_QFREEZE = 0x00000800,/* Freeze DEV Q on execution */ CAM_HIGH_POWER = 0x00001000,/* Command takes a lot of power */ CAM_SENSE_PTR = 0x00002000,/* Sense data is a pointer */ CAM_SENSE_PHYS = 0x00004000,/* Sense pointer is physical addr*/ CAM_TAG_ACTION_VALID = 0x00008000,/* Use the tag action in this ccb*/ CAM_PASS_ERR_RECOVER = 0x00010000,/* Pass driver does err. recovery*/ CAM_DIS_DISCONNECT = 0x00020000,/* Disable disconnect */ CAM_MSG_BUF_PHYS = 0x00080000,/* Message buffer ptr is physical*/ CAM_SNS_BUF_PHYS = 0x00100000,/* Autosense data ptr is physical*/ CAM_CDB_PHYS = 0x00400000,/* CDB poiner is physical */ CAM_ENG_SGLIST = 0x00800000,/* SG list is for the HBA engine */ /* Phase cognizant mode flags */ CAM_DIS_AUTOSRP = 0x01000000,/* Disable autosave/restore ptrs */ CAM_DIS_AUTODISC = 0x02000000,/* Disable auto disconnect */ CAM_TGT_CCB_AVAIL = 0x04000000,/* Target CCB available */ CAM_TGT_PHASE_MODE = 0x08000000,/* The SIM runs in phase mode */ CAM_MSGB_VALID = 0x10000000,/* Message buffer valid */ CAM_STATUS_VALID = 0x20000000,/* Status buffer valid */ CAM_DATAB_VALID = 0x40000000,/* Data buffer valid */ /* Host target Mode flags */ CAM_SEND_SENSE = 0x08000000,/* Send sense data with status */ CAM_TERM_IO = 0x10000000,/* Terminate I/O Message sup. */ CAM_DISCONNECT = 0x20000000,/* Disconnects are mandatory */ CAM_SEND_STATUS = 0x40000000,/* Send status after data phase */ CAM_UNLOCKED = 0x80000000 /* Call callback without lock. */ } ccb_flags; typedef enum { CAM_USER_DATA_ADDR = 0x00000002,/* Userspace data pointers */ CAM_SG_FORMAT_IOVEC = 0x00000004,/* iovec instead of busdma S/G*/ CAM_UNMAPPED_BUF = 0x00000008 /* use unmapped I/O */ } ccb_xflags; /* XPT Opcodes for xpt_action */ typedef enum { /* Function code flags are bits greater than 0xff */ XPT_FC_QUEUED = 0x100, /* Non-immediate function code */ XPT_FC_USER_CCB = 0x200, XPT_FC_XPT_ONLY = 0x400, /* Only for the transport layer device */ XPT_FC_DEV_QUEUED = 0x800 | XPT_FC_QUEUED, /* Passes through the device queues */ /* Common function commands: 0x00->0x0F */ XPT_NOOP = 0x00, /* Execute Nothing */ XPT_SCSI_IO = 0x01 | XPT_FC_DEV_QUEUED, /* Execute the requested I/O operation */ XPT_GDEV_TYPE = 0x02, /* Get type information for specified device */ XPT_GDEVLIST = 0x03, /* Get a list of peripheral devices */ XPT_PATH_INQ = 0x04, /* Path routing inquiry */ XPT_REL_SIMQ = 0x05, /* Release a frozen device queue */ XPT_SASYNC_CB = 0x06, /* Set Asynchronous Callback Parameters */ XPT_SDEV_TYPE = 0x07, /* Set device type information */ XPT_SCAN_BUS = 0x08 | XPT_FC_QUEUED | XPT_FC_USER_CCB | XPT_FC_XPT_ONLY, /* (Re)Scan the SCSI Bus */ XPT_DEV_MATCH = 0x09 | XPT_FC_XPT_ONLY, /* Get EDT entries matching the given pattern */ XPT_DEBUG = 0x0a, /* Turn on debugging for a bus, target or lun */ XPT_PATH_STATS = 0x0b, /* Path statistics (error counts, etc.) */ XPT_GDEV_STATS = 0x0c, /* Device statistics (error counts, etc.) */ XPT_DEV_ADVINFO = 0x0e, /* Get/Set Device advanced information */ XPT_ASYNC = 0x0f | XPT_FC_QUEUED | XPT_FC_USER_CCB | XPT_FC_XPT_ONLY, /* Asynchronous event */ /* SCSI Control Functions: 0x10->0x1F */ XPT_ABORT = 0x10, /* Abort the specified CCB */ XPT_RESET_BUS = 0x11 | XPT_FC_XPT_ONLY, /* Reset the specified SCSI bus */ XPT_RESET_DEV = 0x12 | XPT_FC_DEV_QUEUED, /* Bus Device Reset the specified SCSI device */ XPT_TERM_IO = 0x13, /* Terminate the I/O process */ XPT_SCAN_LUN = 0x14 | XPT_FC_QUEUED | XPT_FC_USER_CCB | XPT_FC_XPT_ONLY, /* Scan Logical Unit */ XPT_GET_TRAN_SETTINGS = 0x15, /* * Get default/user transfer settings * for the target */ XPT_SET_TRAN_SETTINGS = 0x16, /* * Set transfer rate/width * negotiation settings */ XPT_CALC_GEOMETRY = 0x17, /* * Calculate the geometry parameters for * a device give the sector size and * volume size. */ XPT_ATA_IO = 0x18 | XPT_FC_DEV_QUEUED, /* Execute the requested ATA I/O operation */ XPT_GET_SIM_KNOB_OLD = 0x18, /* Compat only */ XPT_SET_SIM_KNOB = 0x19, /* * Set SIM specific knob values. */ XPT_GET_SIM_KNOB = 0x1a, /* * Get SIM specific knob values. */ XPT_SMP_IO = 0x1b | XPT_FC_DEV_QUEUED, /* Serial Management Protocol */ XPT_NVME_IO = 0x1c | XPT_FC_DEV_QUEUED, /* Execute the requested NVMe I/O operation */ XPT_MMC_IO = 0x1d | XPT_FC_DEV_QUEUED, /* Placeholder for MMC / SD / SDIO I/O stuff */ XPT_SCAN_TGT = 0x1e | XPT_FC_QUEUED | XPT_FC_USER_CCB | XPT_FC_XPT_ONLY, /* Scan Target */ XPT_NVME_ADMIN = 0x1f | XPT_FC_DEV_QUEUED, /* Execute the requested NVMe Admin operation */ /* HBA engine commands 0x20->0x2F */ XPT_ENG_INQ = 0x20 | XPT_FC_XPT_ONLY, /* HBA engine feature inquiry */ XPT_ENG_EXEC = 0x21 | XPT_FC_DEV_QUEUED, /* HBA execute engine request */ /* Target mode commands: 0x30->0x3F */ XPT_EN_LUN = 0x30, /* Enable LUN as a target */ XPT_TARGET_IO = 0x31 | XPT_FC_DEV_QUEUED, /* Execute target I/O request */ XPT_ACCEPT_TARGET_IO = 0x32 | XPT_FC_QUEUED | XPT_FC_USER_CCB, /* Accept Host Target Mode CDB */ XPT_CONT_TARGET_IO = 0x33 | XPT_FC_DEV_QUEUED, /* Continue Host Target I/O Connection */ XPT_IMMED_NOTIFY = 0x34 | XPT_FC_QUEUED | XPT_FC_USER_CCB, /* Notify Host Target driver of event (obsolete) */ XPT_NOTIFY_ACK = 0x35, /* Acknowledgement of event (obsolete) */ XPT_IMMEDIATE_NOTIFY = 0x36 | XPT_FC_QUEUED | XPT_FC_USER_CCB, /* Notify Host Target driver of event */ XPT_NOTIFY_ACKNOWLEDGE = 0x37 | XPT_FC_QUEUED | XPT_FC_USER_CCB, /* Acknowledgement of event */ XPT_REPROBE_LUN = 0x38 | XPT_FC_QUEUED | XPT_FC_USER_CCB, /* Query device capacity and notify GEOM */ /* Vendor Unique codes: 0x80->0x8F */ XPT_VUNIQUE = 0x80 } xpt_opcode; #define XPT_FC_GROUP_MASK 0xF0 #define XPT_FC_GROUP(op) ((op) & XPT_FC_GROUP_MASK) #define XPT_FC_GROUP_COMMON 0x00 #define XPT_FC_GROUP_SCSI_CONTROL 0x10 #define XPT_FC_GROUP_HBA_ENGINE 0x20 #define XPT_FC_GROUP_TMODE 0x30 #define XPT_FC_GROUP_VENDOR_UNIQUE 0x80 #define XPT_FC_IS_DEV_QUEUED(ccb) \ (((ccb)->ccb_h.func_code & XPT_FC_DEV_QUEUED) == XPT_FC_DEV_QUEUED) #define XPT_FC_IS_QUEUED(ccb) \ (((ccb)->ccb_h.func_code & XPT_FC_QUEUED) != 0) typedef enum { PROTO_UNKNOWN, PROTO_UNSPECIFIED, PROTO_SCSI, /* Small Computer System Interface */ PROTO_ATA, /* AT Attachment */ PROTO_ATAPI, /* AT Attachment Packetized Interface */ PROTO_SATAPM, /* SATA Port Multiplier */ PROTO_SEMB, /* SATA Enclosure Management Bridge */ PROTO_NVME, /* NVME */ PROTO_MMCSD, /* MMC, SD, SDIO */ } cam_proto; typedef enum { XPORT_UNKNOWN, XPORT_UNSPECIFIED, XPORT_SPI, /* SCSI Parallel Interface */ XPORT_FC, /* Fiber Channel */ XPORT_SSA, /* Serial Storage Architecture */ XPORT_USB, /* Universal Serial Bus */ XPORT_PPB, /* Parallel Port Bus */ XPORT_ATA, /* AT Attachment */ XPORT_SAS, /* Serial Attached SCSI */ XPORT_SATA, /* Serial AT Attachment */ XPORT_ISCSI, /* iSCSI */ XPORT_SRP, /* SCSI RDMA Protocol */ XPORT_NVME, /* NVMe over PCIe */ XPORT_MMCSD, /* MMC, SD, SDIO card */ } cam_xport; #define XPORT_IS_NVME(t) ((t) == XPORT_NVME) #define XPORT_IS_ATA(t) ((t) == XPORT_ATA || (t) == XPORT_SATA) #define XPORT_IS_SCSI(t) ((t) != XPORT_UNKNOWN && \ (t) != XPORT_UNSPECIFIED && \ !XPORT_IS_ATA(t) && !XPORT_IS_NVME(t)) #define XPORT_DEVSTAT_TYPE(t) (XPORT_IS_ATA(t) ? DEVSTAT_TYPE_IF_IDE : \ XPORT_IS_SCSI(t) ? DEVSTAT_TYPE_IF_SCSI : \ DEVSTAT_TYPE_IF_OTHER) #define PROTO_VERSION_UNKNOWN (UINT_MAX - 1) #define PROTO_VERSION_UNSPECIFIED UINT_MAX #define XPORT_VERSION_UNKNOWN (UINT_MAX - 1) #define XPORT_VERSION_UNSPECIFIED UINT_MAX typedef union { LIST_ENTRY(ccb_hdr) le; SLIST_ENTRY(ccb_hdr) sle; TAILQ_ENTRY(ccb_hdr) tqe; STAILQ_ENTRY(ccb_hdr) stqe; } camq_entry; typedef union { void *ptr; u_long field; u_int8_t bytes[sizeof(uintptr_t)]; } ccb_priv_entry; typedef union { ccb_priv_entry entries[CCB_PERIPH_PRIV_SIZE]; u_int8_t bytes[CCB_PERIPH_PRIV_SIZE * sizeof(ccb_priv_entry)]; } ccb_ppriv_area; typedef union { ccb_priv_entry entries[CCB_SIM_PRIV_SIZE]; u_int8_t bytes[CCB_SIM_PRIV_SIZE * sizeof(ccb_priv_entry)]; } ccb_spriv_area; typedef struct { struct timeval *etime; uintptr_t sim_data; uintptr_t periph_data; } ccb_qos_area; struct ccb_hdr { cam_pinfo pinfo; /* Info for priority scheduling */ camq_entry xpt_links; /* For chaining in the XPT layer */ camq_entry sim_links; /* For chaining in the SIM layer */ camq_entry periph_links; /* For chaining in the type driver */ u_int32_t retry_count; void (*cbfcnp)(struct cam_periph *, union ccb *); /* Callback on completion function */ xpt_opcode func_code; /* XPT function code */ u_int32_t status; /* Status returned by CAM subsystem */ struct cam_path *path; /* Compiled path for this ccb */ path_id_t path_id; /* Path ID for the request */ target_id_t target_id; /* Target device ID */ lun_id_t target_lun; /* Target LUN number */ u_int32_t flags; /* ccb_flags */ u_int32_t xflags; /* Extended flags */ ccb_ppriv_area periph_priv; ccb_spriv_area sim_priv; ccb_qos_area qos; u_int32_t timeout; /* Hard timeout value in mseconds */ struct timeval softtimeout; /* Soft timeout value in sec + usec */ }; /* Get Device Information CCB */ struct ccb_getdev { struct ccb_hdr ccb_h; cam_proto protocol; struct scsi_inquiry_data inq_data; struct ata_params ident_data; u_int8_t serial_num[252]; u_int8_t inq_flags; u_int8_t serial_num_len; const struct nvme_controller_data *nvme_cdata; const struct nvme_namespace_data *nvme_data; }; /* Device Statistics CCB */ struct ccb_getdevstats { struct ccb_hdr ccb_h; int dev_openings; /* Space left for more work on device*/ int dev_active; /* Transactions running on the device */ int allocated; /* CCBs allocated for the device */ int queued; /* CCBs queued to be sent to the device */ int held; /* * CCBs held by peripheral drivers * for this device */ int maxtags; /* * Boundary conditions for number of * tagged operations */ int mintags; struct timeval last_reset; /* Time of last bus reset/loop init */ }; typedef enum { CAM_GDEVLIST_LAST_DEVICE, CAM_GDEVLIST_LIST_CHANGED, CAM_GDEVLIST_MORE_DEVS, CAM_GDEVLIST_ERROR } ccb_getdevlist_status_e; struct ccb_getdevlist { struct ccb_hdr ccb_h; char periph_name[DEV_IDLEN]; u_int32_t unit_number; unsigned int generation; u_int32_t index; ccb_getdevlist_status_e status; }; typedef enum { PERIPH_MATCH_NONE = 0x000, PERIPH_MATCH_PATH = 0x001, PERIPH_MATCH_TARGET = 0x002, PERIPH_MATCH_LUN = 0x004, PERIPH_MATCH_NAME = 0x008, PERIPH_MATCH_UNIT = 0x010, PERIPH_MATCH_ANY = 0x01f } periph_pattern_flags; struct periph_match_pattern { char periph_name[DEV_IDLEN]; u_int32_t unit_number; path_id_t path_id; target_id_t target_id; lun_id_t target_lun; periph_pattern_flags flags; }; typedef enum { DEV_MATCH_NONE = 0x000, DEV_MATCH_PATH = 0x001, DEV_MATCH_TARGET = 0x002, DEV_MATCH_LUN = 0x004, DEV_MATCH_INQUIRY = 0x008, DEV_MATCH_DEVID = 0x010, DEV_MATCH_ANY = 0x00f } dev_pattern_flags; struct device_id_match_pattern { uint8_t id_len; uint8_t id[256]; }; struct device_match_pattern { path_id_t path_id; target_id_t target_id; lun_id_t target_lun; dev_pattern_flags flags; union { struct scsi_static_inquiry_pattern inq_pat; struct device_id_match_pattern devid_pat; } data; }; typedef enum { BUS_MATCH_NONE = 0x000, BUS_MATCH_PATH = 0x001, BUS_MATCH_NAME = 0x002, BUS_MATCH_UNIT = 0x004, BUS_MATCH_BUS_ID = 0x008, BUS_MATCH_ANY = 0x00f } bus_pattern_flags; struct bus_match_pattern { path_id_t path_id; char dev_name[DEV_IDLEN]; u_int32_t unit_number; u_int32_t bus_id; bus_pattern_flags flags; }; union match_pattern { struct periph_match_pattern periph_pattern; struct device_match_pattern device_pattern; struct bus_match_pattern bus_pattern; }; typedef enum { DEV_MATCH_PERIPH, DEV_MATCH_DEVICE, DEV_MATCH_BUS } dev_match_type; struct dev_match_pattern { dev_match_type type; union match_pattern pattern; }; struct periph_match_result { char periph_name[DEV_IDLEN]; u_int32_t unit_number; path_id_t path_id; target_id_t target_id; lun_id_t target_lun; }; typedef enum { DEV_RESULT_NOFLAG = 0x00, DEV_RESULT_UNCONFIGURED = 0x01 } dev_result_flags; struct device_match_result { path_id_t path_id; target_id_t target_id; lun_id_t target_lun; cam_proto protocol; struct scsi_inquiry_data inq_data; struct ata_params ident_data; dev_result_flags flags; struct mmc_params mmc_ident_data; }; struct bus_match_result { path_id_t path_id; char dev_name[DEV_IDLEN]; u_int32_t unit_number; u_int32_t bus_id; }; union match_result { struct periph_match_result periph_result; struct device_match_result device_result; struct bus_match_result bus_result; }; struct dev_match_result { dev_match_type type; union match_result result; }; typedef enum { CAM_DEV_MATCH_LAST, CAM_DEV_MATCH_MORE, CAM_DEV_MATCH_LIST_CHANGED, CAM_DEV_MATCH_SIZE_ERROR, CAM_DEV_MATCH_ERROR } ccb_dev_match_status; typedef enum { CAM_DEV_POS_NONE = 0x000, CAM_DEV_POS_BUS = 0x001, CAM_DEV_POS_TARGET = 0x002, CAM_DEV_POS_DEVICE = 0x004, CAM_DEV_POS_PERIPH = 0x008, CAM_DEV_POS_PDPTR = 0x010, CAM_DEV_POS_TYPEMASK = 0xf00, CAM_DEV_POS_EDT = 0x100, CAM_DEV_POS_PDRV = 0x200 } dev_pos_type; struct ccb_dm_cookie { void *bus; void *target; void *device; void *periph; void *pdrv; }; struct ccb_dev_position { u_int generations[4]; #define CAM_BUS_GENERATION 0x00 #define CAM_TARGET_GENERATION 0x01 #define CAM_DEV_GENERATION 0x02 #define CAM_PERIPH_GENERATION 0x03 dev_pos_type position_type; struct ccb_dm_cookie cookie; }; struct ccb_dev_match { struct ccb_hdr ccb_h; ccb_dev_match_status status; u_int32_t num_patterns; u_int32_t pattern_buf_len; struct dev_match_pattern *patterns; u_int32_t num_matches; u_int32_t match_buf_len; struct dev_match_result *matches; struct ccb_dev_position pos; }; /* * Definitions for the path inquiry CCB fields. */ #define CAM_VERSION 0x19 /* Hex value for current version */ typedef enum { PI_MDP_ABLE = 0x80, /* Supports MDP message */ PI_WIDE_32 = 0x40, /* Supports 32 bit wide SCSI */ PI_WIDE_16 = 0x20, /* Supports 16 bit wide SCSI */ PI_SDTR_ABLE = 0x10, /* Supports SDTR message */ PI_LINKED_CDB = 0x08, /* Supports linked CDBs */ PI_SATAPM = 0x04, /* Supports SATA PM */ PI_TAG_ABLE = 0x02, /* Supports tag queue messages */ PI_SOFT_RST = 0x01 /* Supports soft reset alternative */ } pi_inqflag; typedef enum { PIT_PROCESSOR = 0x80, /* Target mode processor mode */ PIT_PHASE = 0x40, /* Target mode phase cog. mode */ PIT_DISCONNECT = 0x20, /* Disconnects supported in target mode */ PIT_TERM_IO = 0x10, /* Terminate I/O message supported in TM */ PIT_GRP_6 = 0x08, /* Group 6 commands supported */ PIT_GRP_7 = 0x04 /* Group 7 commands supported */ } pi_tmflag; typedef enum { PIM_ATA_EXT = 0x200,/* ATA requests can understand ata_ext requests */ PIM_EXTLUNS = 0x100,/* 64bit extended LUNs supported */ PIM_SCANHILO = 0x80, /* Bus scans from high ID to low ID */ PIM_NOREMOVE = 0x40, /* Removeable devices not included in scan */ PIM_NOINITIATOR = 0x20, /* Initiator role not supported. */ PIM_NOBUSRESET = 0x10, /* User has disabled initial BUS RESET */ PIM_NO_6_BYTE = 0x08, /* Do not send 6-byte commands */ PIM_SEQSCAN = 0x04, /* Do bus scans sequentially, not in parallel */ PIM_UNMAPPED = 0x02, PIM_NOSCAN = 0x01 /* SIM does its own scanning */ } pi_miscflag; /* Path Inquiry CCB */ struct ccb_pathinq_settings_spi { u_int8_t ppr_options; }; struct ccb_pathinq_settings_fc { u_int64_t wwnn; /* world wide node name */ u_int64_t wwpn; /* world wide port name */ u_int32_t port; /* 24 bit port id, if known */ u_int32_t bitrate; /* Mbps */ }; struct ccb_pathinq_settings_sas { u_int32_t bitrate; /* Mbps */ }; struct ccb_pathinq_settings_nvme { uint16_t nsid; /* Namespace ID for this path */ }; #define PATHINQ_SETTINGS_SIZE 128 struct ccb_pathinq { struct ccb_hdr ccb_h; u_int8_t version_num; /* Version number for the SIM/HBA */ u_int8_t hba_inquiry; /* Mimic of INQ byte 7 for the HBA */ u_int16_t target_sprt; /* Flags for target mode support */ u_int32_t hba_misc; /* Misc HBA features */ u_int16_t hba_eng_cnt; /* HBA engine count */ /* Vendor Unique capabilities */ u_int8_t vuhba_flags[VUHBALEN]; u_int32_t max_target; /* Maximum supported Target */ u_int32_t max_lun; /* Maximum supported Lun */ u_int32_t async_flags; /* Installed Async handlers */ path_id_t hpath_id; /* Highest Path ID in the subsystem */ target_id_t initiator_id; /* ID of the HBA on the SCSI bus */ char sim_vid[SIM_IDLEN]; /* Vendor ID of the SIM */ char hba_vid[HBA_IDLEN]; /* Vendor ID of the HBA */ char dev_name[DEV_IDLEN];/* Device name for SIM */ u_int32_t unit_number; /* Unit number for SIM */ u_int32_t bus_id; /* Bus ID for SIM */ u_int32_t base_transfer_speed;/* Base bus speed in KB/sec */ cam_proto protocol; u_int protocol_version; cam_xport transport; u_int transport_version; union { struct ccb_pathinq_settings_spi spi; struct ccb_pathinq_settings_fc fc; struct ccb_pathinq_settings_sas sas; struct ccb_pathinq_settings_nvme nvme; char ccb_pathinq_settings_opaque[PATHINQ_SETTINGS_SIZE]; } xport_specific; u_int maxio; /* Max supported I/O size, in bytes. */ u_int16_t hba_vendor; /* HBA vendor ID */ u_int16_t hba_device; /* HBA device ID */ u_int16_t hba_subvendor; /* HBA subvendor ID */ u_int16_t hba_subdevice; /* HBA subdevice ID */ }; /* Path Statistics CCB */ struct ccb_pathstats { struct ccb_hdr ccb_h; struct timeval last_reset; /* Time of last bus reset/loop init */ }; typedef enum { SMP_FLAG_NONE = 0x00, SMP_FLAG_REQ_SG = 0x01, SMP_FLAG_RSP_SG = 0x02 } ccb_smp_pass_flags; /* * Serial Management Protocol CCB * XXX Currently the semantics for this CCB are that it is executed either * by the addressed device, or that device's parent (i.e. an expander for * any device on an expander) if the addressed device doesn't support SMP. * Later, once we have the ability to probe SMP-only devices and put them * in CAM's topology, the CCB will only be executed by the addressed device * if possible. */ struct ccb_smpio { struct ccb_hdr ccb_h; uint8_t *smp_request; int smp_request_len; uint16_t smp_request_sglist_cnt; uint8_t *smp_response; int smp_response_len; uint16_t smp_response_sglist_cnt; ccb_smp_pass_flags flags; }; typedef union { u_int8_t *sense_ptr; /* * Pointer to storage * for sense information */ /* Storage Area for sense information */ struct scsi_sense_data sense_buf; } sense_t; typedef union { u_int8_t *cdb_ptr; /* Pointer to the CDB bytes to send */ /* Area for the CDB send */ u_int8_t cdb_bytes[IOCDBLEN]; } cdb_t; /* * SCSI I/O Request CCB used for the XPT_SCSI_IO and XPT_CONT_TARGET_IO * function codes. */ struct ccb_scsiio { struct ccb_hdr ccb_h; union ccb *next_ccb; /* Ptr for next CCB for action */ u_int8_t *req_map; /* Ptr to mapping info */ u_int8_t *data_ptr; /* Ptr to the data buf/SG list */ u_int32_t dxfer_len; /* Data transfer length */ /* Autosense storage */ struct scsi_sense_data sense_data; u_int8_t sense_len; /* Number of bytes to autosense */ u_int8_t cdb_len; /* Number of bytes for the CDB */ u_int16_t sglist_cnt; /* Number of SG list entries */ u_int8_t scsi_status; /* Returned SCSI status */ u_int8_t sense_resid; /* Autosense resid length: 2's comp */ u_int32_t resid; /* Transfer residual length: 2's comp */ cdb_t cdb_io; /* Union for CDB bytes/pointer */ u_int8_t *msg_ptr; /* Pointer to the message buffer */ u_int16_t msg_len; /* Number of bytes for the Message */ u_int8_t tag_action; /* What to do for tag queueing */ /* * The tag action should be either the define below (to send a * non-tagged transaction) or one of the defined scsi tag messages * from scsi_message.h. */ #define CAM_TAG_ACTION_NONE 0x00 u_int tag_id; /* tag id from initator (target mode) */ u_int init_id; /* initiator id of who selected */ #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) struct bio *bio; /* Associated bio */ #endif }; static __inline uint8_t * scsiio_cdb_ptr(struct ccb_scsiio *ccb) { return ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? ccb->cdb_io.cdb_ptr : ccb->cdb_io.cdb_bytes); } /* * ATA I/O Request CCB used for the XPT_ATA_IO function code. */ struct ccb_ataio { struct ccb_hdr ccb_h; union ccb *next_ccb; /* Ptr for next CCB for action */ struct ata_cmd cmd; /* ATA command register set */ struct ata_res res; /* ATA result register set */ u_int8_t *data_ptr; /* Ptr to the data buf/SG list */ u_int32_t dxfer_len; /* Data transfer length */ u_int32_t resid; /* Transfer residual length: 2's comp */ u_int8_t ata_flags; /* Flags for the rest of the buffer */ #define ATA_FLAG_AUX 0x1 uint32_t aux; uint32_t unused; }; /* * MMC I/O Request CCB used for the XPT_MMC_IO function code. */ struct ccb_mmcio { struct ccb_hdr ccb_h; union ccb *next_ccb; /* Ptr for next CCB for action */ struct mmc_command cmd; struct mmc_command stop; }; struct ccb_accept_tio { struct ccb_hdr ccb_h; cdb_t cdb_io; /* Union for CDB bytes/pointer */ u_int8_t cdb_len; /* Number of bytes for the CDB */ u_int8_t tag_action; /* What to do for tag queueing */ u_int8_t sense_len; /* Number of bytes of Sense Data */ u_int tag_id; /* tag id from initator (target mode) */ u_int init_id; /* initiator id of who selected */ struct scsi_sense_data sense_data; }; static __inline uint8_t * atio_cdb_ptr(struct ccb_accept_tio *ccb) { return ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? ccb->cdb_io.cdb_ptr : ccb->cdb_io.cdb_bytes); } /* Release SIM Queue */ struct ccb_relsim { struct ccb_hdr ccb_h; u_int32_t release_flags; #define RELSIM_ADJUST_OPENINGS 0x01 #define RELSIM_RELEASE_AFTER_TIMEOUT 0x02 #define RELSIM_RELEASE_AFTER_CMDCMPLT 0x04 #define RELSIM_RELEASE_AFTER_QEMPTY 0x08 u_int32_t openings; u_int32_t release_timeout; /* Abstract argument. */ u_int32_t qfrozen_cnt; }; /* * NVMe I/O Request CCB used for the XPT_NVME_IO and XPT_NVME_ADMIN function codes. */ struct ccb_nvmeio { struct ccb_hdr ccb_h; union ccb *next_ccb; /* Ptr for next CCB for action */ struct nvme_command cmd; /* NVME command, per NVME standard */ struct nvme_completion cpl; /* NVME completion, per NVME standard */ uint8_t *data_ptr; /* Ptr to the data buf/SG list */ uint32_t dxfer_len; /* Data transfer length */ - uint32_t resid; /* Transfer residual length: 2's comp unused ?*/ + uint16_t sglist_cnt; /* Number of SG list entries */ + uint16_t unused; /* padding for removed uint32_t */ }; /* * Definitions for the asynchronous callback CCB fields. */ typedef enum { AC_UNIT_ATTENTION = 0x4000,/* Device reported UNIT ATTENTION */ AC_ADVINFO_CHANGED = 0x2000,/* Advance info might have changes */ AC_CONTRACT = 0x1000,/* A contractual callback */ AC_GETDEV_CHANGED = 0x800,/* Getdev info might have changed */ AC_INQ_CHANGED = 0x400,/* Inquiry info might have changed */ AC_TRANSFER_NEG = 0x200,/* New transfer settings in effect */ AC_LOST_DEVICE = 0x100,/* A device went away */ AC_FOUND_DEVICE = 0x080,/* A new device was found */ AC_PATH_DEREGISTERED = 0x040,/* A path has de-registered */ AC_PATH_REGISTERED = 0x020,/* A new path has been registered */ AC_SENT_BDR = 0x010,/* A BDR message was sent to target */ AC_SCSI_AEN = 0x008,/* A SCSI AEN has been received */ AC_UNSOL_RESEL = 0x002,/* Unsolicited reselection occurred */ AC_BUS_RESET = 0x001 /* A SCSI bus reset occurred */ } ac_code; typedef void ac_callback_t (void *softc, u_int32_t code, struct cam_path *path, void *args); /* * Generic Asynchronous callbacks. * * Generic arguments passed bac which are then interpreted between a per-system * contract number. */ #define AC_CONTRACT_DATA_MAX (128 - sizeof (u_int64_t)) struct ac_contract { u_int64_t contract_number; u_int8_t contract_data[AC_CONTRACT_DATA_MAX]; }; #define AC_CONTRACT_DEV_CHG 1 struct ac_device_changed { u_int64_t wwpn; u_int32_t port; target_id_t target; u_int8_t arrived; }; /* Set Asynchronous Callback CCB */ struct ccb_setasync { struct ccb_hdr ccb_h; u_int32_t event_enable; /* Async Event enables */ ac_callback_t *callback; void *callback_arg; }; /* Set Device Type CCB */ struct ccb_setdev { struct ccb_hdr ccb_h; u_int8_t dev_type; /* Value for dev type field in EDT */ }; /* SCSI Control Functions */ /* Abort XPT request CCB */ struct ccb_abort { struct ccb_hdr ccb_h; union ccb *abort_ccb; /* Pointer to CCB to abort */ }; /* Reset SCSI Bus CCB */ struct ccb_resetbus { struct ccb_hdr ccb_h; }; /* Reset SCSI Device CCB */ struct ccb_resetdev { struct ccb_hdr ccb_h; }; /* Terminate I/O Process Request CCB */ struct ccb_termio { struct ccb_hdr ccb_h; union ccb *termio_ccb; /* Pointer to CCB to terminate */ }; typedef enum { CTS_TYPE_CURRENT_SETTINGS, CTS_TYPE_USER_SETTINGS } cts_type; struct ccb_trans_settings_scsi { u_int valid; /* Which fields to honor */ #define CTS_SCSI_VALID_TQ 0x01 u_int flags; #define CTS_SCSI_FLAGS_TAG_ENB 0x01 }; struct ccb_trans_settings_ata { u_int valid; /* Which fields to honor */ #define CTS_ATA_VALID_TQ 0x01 u_int flags; #define CTS_ATA_FLAGS_TAG_ENB 0x01 }; struct ccb_trans_settings_spi { u_int valid; /* Which fields to honor */ #define CTS_SPI_VALID_SYNC_RATE 0x01 #define CTS_SPI_VALID_SYNC_OFFSET 0x02 #define CTS_SPI_VALID_BUS_WIDTH 0x04 #define CTS_SPI_VALID_DISC 0x08 #define CTS_SPI_VALID_PPR_OPTIONS 0x10 u_int flags; #define CTS_SPI_FLAGS_DISC_ENB 0x01 u_int sync_period; u_int sync_offset; u_int bus_width; u_int ppr_options; }; struct ccb_trans_settings_fc { u_int valid; /* Which fields to honor */ #define CTS_FC_VALID_WWNN 0x8000 #define CTS_FC_VALID_WWPN 0x4000 #define CTS_FC_VALID_PORT 0x2000 #define CTS_FC_VALID_SPEED 0x1000 u_int64_t wwnn; /* world wide node name */ u_int64_t wwpn; /* world wide port name */ u_int32_t port; /* 24 bit port id, if known */ u_int32_t bitrate; /* Mbps */ }; struct ccb_trans_settings_sas { u_int valid; /* Which fields to honor */ #define CTS_SAS_VALID_SPEED 0x1000 u_int32_t bitrate; /* Mbps */ }; struct ccb_trans_settings_pata { u_int valid; /* Which fields to honor */ #define CTS_ATA_VALID_MODE 0x01 #define CTS_ATA_VALID_BYTECOUNT 0x02 #define CTS_ATA_VALID_ATAPI 0x20 #define CTS_ATA_VALID_CAPS 0x40 int mode; /* Mode */ u_int bytecount; /* Length of PIO transaction */ u_int atapi; /* Length of ATAPI CDB */ u_int caps; /* Device and host SATA caps. */ #define CTS_ATA_CAPS_H 0x0000ffff #define CTS_ATA_CAPS_H_DMA48 0x00000001 /* 48-bit DMA */ #define CTS_ATA_CAPS_D 0xffff0000 }; struct ccb_trans_settings_sata { u_int valid; /* Which fields to honor */ #define CTS_SATA_VALID_MODE 0x01 #define CTS_SATA_VALID_BYTECOUNT 0x02 #define CTS_SATA_VALID_REVISION 0x04 #define CTS_SATA_VALID_PM 0x08 #define CTS_SATA_VALID_TAGS 0x10 #define CTS_SATA_VALID_ATAPI 0x20 #define CTS_SATA_VALID_CAPS 0x40 int mode; /* Legacy PATA mode */ u_int bytecount; /* Length of PIO transaction */ int revision; /* SATA revision */ u_int pm_present; /* PM is present (XPT->SIM) */ u_int tags; /* Number of allowed tags */ u_int atapi; /* Length of ATAPI CDB */ u_int caps; /* Device and host SATA caps. */ #define CTS_SATA_CAPS_H 0x0000ffff #define CTS_SATA_CAPS_H_PMREQ 0x00000001 #define CTS_SATA_CAPS_H_APST 0x00000002 #define CTS_SATA_CAPS_H_DMAAA 0x00000010 /* Auto-activation */ #define CTS_SATA_CAPS_H_AN 0x00000020 /* Async. notification */ #define CTS_SATA_CAPS_D 0xffff0000 #define CTS_SATA_CAPS_D_PMREQ 0x00010000 #define CTS_SATA_CAPS_D_APST 0x00020000 }; struct ccb_trans_settings_nvme { u_int valid; /* Which fields to honor */ #define CTS_NVME_VALID_SPEC 0x01 #define CTS_NVME_VALID_CAPS 0x02 u_int spec_major; /* Major version of spec supported */ u_int spec_minor; /* Minor verison of spec supported */ u_int spec_tiny; /* Tiny version of spec supported */ u_int max_xfer; /* Max transfer size (0 -> unlimited */ u_int caps; }; #include struct ccb_trans_settings_mmc { struct mmc_ios ios; #define MMC_CLK (1 << 1) #define MMC_VDD (1 << 2) #define MMC_CS (1 << 3) #define MMC_BW (1 << 4) #define MMC_PM (1 << 5) #define MMC_BT (1 << 6) #define MMC_BM (1 << 7) uint32_t ios_valid; /* The folowing is used only for GET_TRAN_SETTINGS */ uint32_t host_ocr; int host_f_min; int host_f_max; #define MMC_CAP_4_BIT_DATA (1 << 0) /* Can do 4-bit data transfers */ #define MMC_CAP_8_BIT_DATA (1 << 1) /* Can do 8-bit data transfers */ #define MMC_CAP_HSPEED (1 << 2) /* Can do High Speed transfers */ uint32_t host_caps; }; /* Get/Set transfer rate/width/disconnection/tag queueing settings */ struct ccb_trans_settings { struct ccb_hdr ccb_h; cts_type type; /* Current or User settings */ cam_proto protocol; u_int protocol_version; cam_xport transport; u_int transport_version; union { u_int valid; /* Which fields to honor */ struct ccb_trans_settings_ata ata; struct ccb_trans_settings_scsi scsi; struct ccb_trans_settings_nvme nvme; struct ccb_trans_settings_mmc mmc; } proto_specific; union { u_int valid; /* Which fields to honor */ struct ccb_trans_settings_spi spi; struct ccb_trans_settings_fc fc; struct ccb_trans_settings_sas sas; struct ccb_trans_settings_pata ata; struct ccb_trans_settings_sata sata; struct ccb_trans_settings_nvme nvme; } xport_specific; }; /* * Calculate the geometry parameters for a device * give the block size and volume size in blocks. */ struct ccb_calc_geometry { struct ccb_hdr ccb_h; u_int32_t block_size; u_int64_t volume_size; u_int32_t cylinders; u_int8_t heads; u_int8_t secs_per_track; }; /* * Set or get SIM (and transport) specific knobs */ #define KNOB_VALID_ADDRESS 0x1 #define KNOB_VALID_ROLE 0x2 #define KNOB_ROLE_NONE 0x0 #define KNOB_ROLE_INITIATOR 0x1 #define KNOB_ROLE_TARGET 0x2 #define KNOB_ROLE_BOTH 0x3 struct ccb_sim_knob_settings_spi { u_int valid; u_int initiator_id; u_int role; }; struct ccb_sim_knob_settings_fc { u_int valid; u_int64_t wwnn; /* world wide node name */ u_int64_t wwpn; /* world wide port name */ u_int role; }; struct ccb_sim_knob_settings_sas { u_int valid; u_int64_t wwnn; /* world wide node name */ u_int role; }; #define KNOB_SETTINGS_SIZE 128 struct ccb_sim_knob { struct ccb_hdr ccb_h; union { u_int valid; /* Which fields to honor */ struct ccb_sim_knob_settings_spi spi; struct ccb_sim_knob_settings_fc fc; struct ccb_sim_knob_settings_sas sas; char pad[KNOB_SETTINGS_SIZE]; } xport_specific; }; /* * Rescan the given bus, or bus/target/lun */ struct ccb_rescan { struct ccb_hdr ccb_h; cam_flags flags; }; /* * Turn on debugging for the given bus, bus/target, or bus/target/lun. */ struct ccb_debug { struct ccb_hdr ccb_h; cam_debug_flags flags; }; /* Target mode structures. */ struct ccb_en_lun { struct ccb_hdr ccb_h; u_int16_t grp6_len; /* Group 6 VU CDB length */ u_int16_t grp7_len; /* Group 7 VU CDB length */ u_int8_t enable; }; /* old, barely used immediate notify, binary compatibility */ struct ccb_immed_notify { struct ccb_hdr ccb_h; struct scsi_sense_data sense_data; u_int8_t sense_len; /* Number of bytes in sense buffer */ u_int8_t initiator_id; /* Id of initiator that selected */ u_int8_t message_args[7]; /* Message Arguments */ }; struct ccb_notify_ack { struct ccb_hdr ccb_h; u_int16_t seq_id; /* Sequence identifier */ u_int8_t event; /* Event flags */ }; struct ccb_immediate_notify { struct ccb_hdr ccb_h; u_int tag_id; /* Tag for immediate notify */ u_int seq_id; /* Tag for target of notify */ u_int initiator_id; /* Initiator Identifier */ u_int arg; /* Function specific */ }; struct ccb_notify_acknowledge { struct ccb_hdr ccb_h; u_int tag_id; /* Tag for immediate notify */ u_int seq_id; /* Tar for target of notify */ u_int initiator_id; /* Initiator Identifier */ u_int arg; /* Response information */ /* * Lower byte of arg is one of RESPONSE CODE values defined below * (subset of response codes from SPL-4 and FCP-4 specifications), * upper 3 bytes is code-specific ADDITIONAL RESPONSE INFORMATION. */ #define CAM_RSP_TMF_COMPLETE 0x00 #define CAM_RSP_TMF_REJECTED 0x04 #define CAM_RSP_TMF_FAILED 0x05 #define CAM_RSP_TMF_SUCCEEDED 0x08 #define CAM_RSP_TMF_INCORRECT_LUN 0x09 }; /* HBA engine structures. */ typedef enum { EIT_BUFFER, /* Engine type: buffer memory */ EIT_LOSSLESS, /* Engine type: lossless compression */ EIT_LOSSY, /* Engine type: lossy compression */ EIT_ENCRYPT /* Engine type: encryption */ } ei_type; typedef enum { EAD_VUNIQUE, /* Engine algorithm ID: vendor unique */ EAD_LZ1V1, /* Engine algorithm ID: LZ1 var.1 */ EAD_LZ2V1, /* Engine algorithm ID: LZ2 var.1 */ EAD_LZ2V2 /* Engine algorithm ID: LZ2 var.2 */ } ei_algo; struct ccb_eng_inq { struct ccb_hdr ccb_h; u_int16_t eng_num; /* The engine number for this inquiry */ ei_type eng_type; /* Returned engine type */ ei_algo eng_algo; /* Returned engine algorithm type */ u_int32_t eng_memeory; /* Returned engine memory size */ }; struct ccb_eng_exec { /* This structure must match SCSIIO size */ struct ccb_hdr ccb_h; u_int8_t *pdrv_ptr; /* Ptr used by the peripheral driver */ u_int8_t *req_map; /* Ptr for mapping info on the req. */ u_int8_t *data_ptr; /* Pointer to the data buf/SG list */ u_int32_t dxfer_len; /* Data transfer length */ u_int8_t *engdata_ptr; /* Pointer to the engine buffer data */ u_int16_t sglist_cnt; /* Num of scatter gather list entries */ u_int32_t dmax_len; /* Destination data maximum length */ u_int32_t dest_len; /* Destination data length */ int32_t src_resid; /* Source residual length: 2's comp */ u_int32_t timeout; /* Timeout value */ u_int16_t eng_num; /* Engine number for this request */ u_int16_t vu_flags; /* Vendor Unique flags */ }; /* * Definitions for the timeout field in the SCSI I/O CCB. */ #define CAM_TIME_DEFAULT 0x00000000 /* Use SIM default value */ #define CAM_TIME_INFINITY 0xFFFFFFFF /* Infinite timeout */ #define CAM_SUCCESS 0 /* For signaling general success */ #define CAM_FAILURE 1 /* For signaling general failure */ #define CAM_FALSE 0 #define CAM_TRUE 1 #define XPT_CCB_INVALID -1 /* for signaling a bad CCB to free */ /* * CCB for working with advanced device information. This operates in a fashion * similar to XPT_GDEV_TYPE. Specify the target in ccb_h, the buffer * type requested, and provide a buffer size/buffer to write to. If the * buffer is too small, provsiz will be larger than bufsiz. */ struct ccb_dev_advinfo { struct ccb_hdr ccb_h; uint32_t flags; #define CDAI_FLAG_NONE 0x0 /* No flags set */ #define CDAI_FLAG_STORE 0x1 /* If set, action becomes store */ uint32_t buftype; /* IN: Type of data being requested */ /* NB: buftype is interpreted on a per-transport basis */ #define CDAI_TYPE_SCSI_DEVID 1 #define CDAI_TYPE_SERIAL_NUM 2 #define CDAI_TYPE_PHYS_PATH 3 #define CDAI_TYPE_RCAPLONG 4 #define CDAI_TYPE_EXT_INQ 5 off_t bufsiz; /* IN: Size of external buffer */ #define CAM_SCSI_DEVID_MAXLEN 65536 /* length in buffer is an uint16_t */ off_t provsiz; /* OUT: Size required/used */ uint8_t *buf; /* IN/OUT: Buffer for requested data */ }; /* * CCB for sending async events */ struct ccb_async { struct ccb_hdr ccb_h; uint32_t async_code; off_t async_arg_size; void *async_arg_ptr; }; /* * Union of all CCB types for kernel space allocation. This union should * never be used for manipulating CCBs - its only use is for the allocation * and deallocation of raw CCB space and is the return type of xpt_ccb_alloc * and the argument to xpt_ccb_free. */ union ccb { struct ccb_hdr ccb_h; /* For convenience */ struct ccb_scsiio csio; struct ccb_getdev cgd; struct ccb_getdevlist cgdl; struct ccb_pathinq cpi; struct ccb_relsim crs; struct ccb_setasync csa; struct ccb_setdev csd; struct ccb_pathstats cpis; struct ccb_getdevstats cgds; struct ccb_dev_match cdm; struct ccb_trans_settings cts; struct ccb_calc_geometry ccg; struct ccb_sim_knob knob; struct ccb_abort cab; struct ccb_resetbus crb; struct ccb_resetdev crd; struct ccb_termio tio; struct ccb_accept_tio atio; struct ccb_scsiio ctio; struct ccb_en_lun cel; struct ccb_immed_notify cin; struct ccb_notify_ack cna; struct ccb_immediate_notify cin1; struct ccb_notify_acknowledge cna2; struct ccb_eng_inq cei; struct ccb_eng_exec cee; struct ccb_smpio smpio; struct ccb_rescan crcn; struct ccb_debug cdbg; struct ccb_ataio ataio; struct ccb_dev_advinfo cdai; struct ccb_async casync; struct ccb_nvmeio nvmeio; struct ccb_mmcio mmcio; }; #define CCB_CLEAR_ALL_EXCEPT_HDR(ccbp) \ bzero((char *)(ccbp) + sizeof((ccbp)->ccb_h), \ sizeof(*(ccbp)) - sizeof((ccbp)->ccb_h)) __BEGIN_DECLS static __inline void cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int8_t tag_action, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len, u_int8_t cdb_len, u_int32_t timeout); static __inline void cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int32_t timeout); static __inline void cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int tag_action, u_int tag_id, u_int init_id, u_int scsi_status, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int32_t timeout); static __inline void cam_fill_ataio(struct ccb_ataio *ataio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int tag_action, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int32_t timeout); static __inline void cam_fill_smpio(struct ccb_smpio *smpio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags, uint8_t *smp_request, int smp_request_len, uint8_t *smp_response, int smp_response_len, uint32_t timeout); static __inline void cam_fill_mmcio(struct ccb_mmcio *mmcio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags, uint32_t mmc_opcode, uint32_t mmc_arg, uint32_t mmc_flags, struct mmc_data *mmc_d, uint32_t timeout); static __inline void cam_fill_csio(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int8_t tag_action, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len, u_int8_t cdb_len, u_int32_t timeout) { csio->ccb_h.func_code = XPT_SCSI_IO; csio->ccb_h.flags = flags; csio->ccb_h.xflags = 0; csio->ccb_h.retry_count = retries; csio->ccb_h.cbfcnp = cbfcnp; csio->ccb_h.timeout = timeout; csio->data_ptr = data_ptr; csio->dxfer_len = dxfer_len; csio->sense_len = sense_len; csio->cdb_len = cdb_len; csio->tag_action = tag_action; #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) csio->bio = NULL; #endif } static __inline void cam_fill_ctio(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int tag_action, u_int tag_id, u_int init_id, u_int scsi_status, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int32_t timeout) { csio->ccb_h.func_code = XPT_CONT_TARGET_IO; csio->ccb_h.flags = flags; csio->ccb_h.xflags = 0; csio->ccb_h.retry_count = retries; csio->ccb_h.cbfcnp = cbfcnp; csio->ccb_h.timeout = timeout; csio->data_ptr = data_ptr; csio->dxfer_len = dxfer_len; csio->scsi_status = scsi_status; csio->tag_action = tag_action; csio->tag_id = tag_id; csio->init_id = init_id; } static __inline void cam_fill_ataio(struct ccb_ataio *ataio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int tag_action __unused, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int32_t timeout) { ataio->ccb_h.func_code = XPT_ATA_IO; ataio->ccb_h.flags = flags; ataio->ccb_h.retry_count = retries; ataio->ccb_h.cbfcnp = cbfcnp; ataio->ccb_h.timeout = timeout; ataio->data_ptr = data_ptr; ataio->dxfer_len = dxfer_len; ataio->ata_flags = 0; } static __inline void cam_fill_smpio(struct ccb_smpio *smpio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags, uint8_t *smp_request, int smp_request_len, uint8_t *smp_response, int smp_response_len, uint32_t timeout) { #ifdef _KERNEL KASSERT((flags & CAM_DIR_MASK) == CAM_DIR_BOTH, ("direction != CAM_DIR_BOTH")); KASSERT((smp_request != NULL) && (smp_response != NULL), ("need valid request and response buffers")); KASSERT((smp_request_len != 0) && (smp_response_len != 0), ("need non-zero request and response lengths")); #endif /*_KERNEL*/ smpio->ccb_h.func_code = XPT_SMP_IO; smpio->ccb_h.flags = flags; smpio->ccb_h.retry_count = retries; smpio->ccb_h.cbfcnp = cbfcnp; smpio->ccb_h.timeout = timeout; smpio->smp_request = smp_request; smpio->smp_request_len = smp_request_len; smpio->smp_response = smp_response; smpio->smp_response_len = smp_response_len; } static __inline void cam_fill_mmcio(struct ccb_mmcio *mmcio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint32_t flags, uint32_t mmc_opcode, uint32_t mmc_arg, uint32_t mmc_flags, struct mmc_data *mmc_d, uint32_t timeout) { mmcio->ccb_h.func_code = XPT_MMC_IO; mmcio->ccb_h.flags = flags; mmcio->ccb_h.retry_count = retries; mmcio->ccb_h.cbfcnp = cbfcnp; mmcio->ccb_h.timeout = timeout; mmcio->cmd.opcode = mmc_opcode; mmcio->cmd.arg = mmc_arg; mmcio->cmd.flags = mmc_flags; mmcio->stop.opcode = 0; mmcio->stop.arg = 0; mmcio->stop.flags = 0; if (mmc_d != NULL) { mmcio->cmd.data = mmc_d; } else mmcio->cmd.data = NULL; mmcio->cmd.resp[0] = 0; mmcio->cmd.resp[1] = 0; mmcio->cmd.resp[2] = 0; mmcio->cmd.resp[3] = 0; } static __inline void cam_set_ccbstatus(union ccb *ccb, cam_status status) { ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= status; } static __inline cam_status cam_ccb_status(union ccb *ccb) { return ((cam_status)(ccb->ccb_h.status & CAM_STATUS_MASK)); } void cam_calc_geometry(struct ccb_calc_geometry *ccg, int extended); static __inline void cam_fill_nvmeio(struct ccb_nvmeio *nvmeio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int32_t timeout) { nvmeio->ccb_h.func_code = XPT_NVME_IO; nvmeio->ccb_h.flags = flags; nvmeio->ccb_h.retry_count = retries; nvmeio->ccb_h.cbfcnp = cbfcnp; nvmeio->ccb_h.timeout = timeout; nvmeio->data_ptr = data_ptr; nvmeio->dxfer_len = dxfer_len; } static __inline void cam_fill_nvmeadmin(struct ccb_nvmeio *nvmeio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int32_t timeout) { nvmeio->ccb_h.func_code = XPT_NVME_ADMIN; nvmeio->ccb_h.flags = flags; nvmeio->ccb_h.retry_count = retries; nvmeio->ccb_h.cbfcnp = cbfcnp; nvmeio->ccb_h.timeout = timeout; nvmeio->data_ptr = data_ptr; nvmeio->dxfer_len = dxfer_len; } __END_DECLS #endif /* _CAM_CAM_CCB_H */ Index: head/sys/cam/nvme/nvme_da.c =================================================================== --- head/sys/cam/nvme/nvme_da.c (revision 322996) +++ head/sys/cam/nvme/nvme_da.c (revision 322997) @@ -1,1162 +1,1157 @@ /*- * Copyright (c) 2015 Netflix, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Derived from ata_da.c: * Copyright (c) 2009 Alexander Motin */ #include __FBSDID("$FreeBSD$"); #include #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #ifndef _KERNEL #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include typedef enum { NDA_STATE_NORMAL } nda_state; typedef enum { NDA_FLAG_OPEN = 0x0001, NDA_FLAG_DIRTY = 0x0002, NDA_FLAG_SCTX_INIT = 0x0004, } nda_flags; typedef enum { NDA_Q_4K = 0x01, NDA_Q_NONE = 0x00, } nda_quirks; #define NDA_Q_BIT_STRING \ "\020" \ "\001Bit 0" typedef enum { NDA_CCB_BUFFER_IO = 0x01, NDA_CCB_DUMP = 0x02, NDA_CCB_TRIM = 0x03, NDA_CCB_TYPE_MASK = 0x0F, } nda_ccb_state; /* Offsets into our private area for storing information */ #define ccb_state ppriv_field0 #define ccb_bp ppriv_ptr1 struct trim_request { TAILQ_HEAD(, bio) bps; }; struct nda_softc { struct cam_iosched_softc *cam_iosched; int outstanding_cmds; /* Number of active commands */ int refcount; /* Active xpt_action() calls */ nda_state state; nda_flags flags; nda_quirks quirks; int unmappedio; uint32_t nsid; /* Namespace ID for this nda device */ struct disk *disk; struct task sysctl_task; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct trim_request trim_req; #ifdef CAM_IO_STATS struct sysctl_ctx_list sysctl_stats_ctx; struct sysctl_oid *sysctl_stats_tree; u_int timeouts; u_int errors; u_int invalidations; #endif }; /* Need quirk table */ static disk_strategy_t ndastrategy; static dumper_t ndadump; static periph_init_t ndainit; static void ndaasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg); static void ndasysctlinit(void *context, int pending); static periph_ctor_t ndaregister; static periph_dtor_t ndacleanup; static periph_start_t ndastart; static periph_oninv_t ndaoninvalidate; static void ndadone(struct cam_periph *periph, union ccb *done_ccb); static int ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags); static void ndashutdown(void *arg, int howto); static void ndasuspend(void *arg); #ifndef NDA_DEFAULT_SEND_ORDERED #define NDA_DEFAULT_SEND_ORDERED 1 #endif #ifndef NDA_DEFAULT_TIMEOUT #define NDA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */ #endif #ifndef NDA_DEFAULT_RETRY #define NDA_DEFAULT_RETRY 4 #endif //static int nda_retry_count = NDA_DEFAULT_RETRY; static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED; static int nda_default_timeout = NDA_DEFAULT_TIMEOUT; /* * All NVMe media is non-rotational, so all nvme device instances * share this to implement the sysctl. */ static int nda_rotating_media = 0; static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD, 0, "CAM Direct Access Disk driver"); static struct periph_driver ndadriver = { ndainit, "nda", TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0 }; PERIPHDRIVER_DECLARE(nda, ndadriver); static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers"); /* * nice wrappers. Maybe these belong in nvme_all.c instead of * here, but this is the only place that uses these. Should * we ever grow another NVME periph, we should move them * all there wholesale. */ static void nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio) { cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ CAM_DIR_NONE, /* flags */ NULL, /* data_ptr */ 0, /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid); } static void nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, void *payload, uint32_t num_ranges) { cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ CAM_DIR_OUT, /* flags */ payload, /* data_ptr */ num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges); } static void nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, void *payload, uint64_t lba, uint32_t len, uint32_t count) { cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ CAM_DIR_OUT, /* flags */ payload, /* data_ptr */ len, /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count); } static void nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, struct bio *bp, uint32_t rwcmd) { int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT; void *payload; uint64_t lba; uint32_t count; if (bp->bio_flags & BIO_UNMAPPED) { flags |= CAM_DATA_BIO; payload = bp; } else { payload = bp->bio_data; } lba = bp->bio_pblkno; count = bp->bio_bcount / softc->disk->d_sectorsize; cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ flags, /* flags */ payload, /* data_ptr */ bp->bio_bcount, /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count); } static int ndaopen(struct disk *dp) { struct cam_periph *periph; struct nda_softc *softc; int error; periph = (struct cam_periph *)dp->d_drv1; if (cam_periph_acquire(periph) != CAM_REQ_CMP) { return(ENXIO); } cam_periph_lock(periph); if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) { cam_periph_unlock(periph); cam_periph_release(periph); return (error); } CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("ndaopen\n")); softc = (struct nda_softc *)periph->softc; softc->flags |= NDA_FLAG_OPEN; cam_periph_unhold(periph); cam_periph_unlock(periph); return (0); } static int ndaclose(struct disk *dp) { struct cam_periph *periph; struct nda_softc *softc; union ccb *ccb; int error; periph = (struct cam_periph *)dp->d_drv1; softc = (struct nda_softc *)periph->softc; cam_periph_lock(periph); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("ndaclose\n")); if ((softc->flags & NDA_FLAG_DIRTY) != 0 && (periph->flags & CAM_PERIPH_INVALID) == 0 && cam_periph_hold(periph, PRIBIO) == 0) { ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); nda_nvme_flush(softc, &ccb->nvmeio); error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, /*sense_flags*/0, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); else softc->flags &= ~NDA_FLAG_DIRTY; xpt_release_ccb(ccb); cam_periph_unhold(periph); } softc->flags &= ~NDA_FLAG_OPEN; while (softc->refcount != 0) cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1); cam_periph_unlock(periph); cam_periph_release(periph); return (0); } static void ndaschedule(struct cam_periph *periph) { struct nda_softc *softc = (struct nda_softc *)periph->softc; if (softc->state != NDA_STATE_NORMAL) return; cam_iosched_schedule(softc->cam_iosched, periph); } /* * Actually translate the requested transfer into one the physical driver * can understand. The transfer is described by a buf and will include * only one physical transfer. */ static void ndastrategy(struct bio *bp) { struct cam_periph *periph; struct nda_softc *softc; periph = (struct cam_periph *)bp->bio_disk->d_drv1; softc = (struct nda_softc *)periph->softc; cam_periph_lock(periph); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp)); /* * If the device has been made invalid, error out */ if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_unlock(periph); biofinish(bp, NULL, ENXIO); return; } /* * Place it in the queue of disk activities for this disk */ cam_iosched_queue_work(softc->cam_iosched, bp); /* * Schedule ourselves for performing the work. */ ndaschedule(periph); cam_periph_unlock(periph); return; } static int ndadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct cam_periph *periph; struct nda_softc *softc; u_int secsize; union ccb ccb; struct disk *dp; uint64_t lba; uint32_t count; int error = 0; dp = arg; periph = dp->d_drv1; softc = (struct nda_softc *)periph->softc; cam_periph_lock(periph); secsize = softc->disk->d_sectorsize; lba = offset / secsize; count = length / secsize; if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_unlock(periph); return (ENXIO); } if (length > 0) { xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); ccb.ccb_h.ccb_state = NDA_CCB_DUMP; nda_nvme_write(softc, &ccb.nvmeio, virtual, lba, length, count); xpt_polled_action(&ccb); error = cam_periph_error(&ccb, 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) printf("Aborting dump due to I/O error.\n"); cam_periph_unlock(periph); return (error); } /* Flush */ xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); ccb.ccb_h.ccb_state = NDA_CCB_DUMP; nda_nvme_flush(softc, &ccb.nvmeio); xpt_polled_action(&ccb); error = cam_periph_error(&ccb, 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) xpt_print(periph->path, "flush cmd failed\n"); cam_periph_unlock(periph); return (error); } static void ndainit(void) { cam_status status; /* * Install a global async callback. This callback will * receive async callbacks like "new device found". */ status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL); if (status != CAM_REQ_CMP) { printf("nda: Failed to attach master async callback " "due to status 0x%x!\n", status); } else if (nda_send_ordered) { /* Register our event handlers */ if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend, NULL, EVENTHANDLER_PRI_LAST)) == NULL) printf("ndainit: power event registration failed!\n"); if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown, NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) printf("ndainit: shutdown event registration failed!\n"); } } /* * Callback from GEOM, called when it has finished cleaning up its * resources. */ static void ndadiskgonecb(struct disk *dp) { struct cam_periph *periph; periph = (struct cam_periph *)dp->d_drv1; cam_periph_release(periph); } static void ndaoninvalidate(struct cam_periph *periph) { struct nda_softc *softc; softc = (struct nda_softc *)periph->softc; /* * De-register any async callbacks. */ xpt_register_async(0, ndaasync, periph, periph->path); #ifdef CAM_IO_STATS softc->invalidations++; #endif /* * Return all queued I/O with ENXIO. * XXX Handle any transactions queued to the card * with XPT_ABORT_CCB. */ cam_iosched_flush(softc->cam_iosched, NULL, ENXIO); disk_gone(softc->disk); } static void ndacleanup(struct cam_periph *periph) { struct nda_softc *softc; softc = (struct nda_softc *)periph->softc; cam_periph_unlock(periph); cam_iosched_fini(softc->cam_iosched); /* * If we can't free the sysctl tree, oh well... */ if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) { #ifdef CAM_IO_STATS if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0) xpt_print(periph->path, "can't remove sysctl stats context\n"); #endif if (sysctl_ctx_free(&softc->sysctl_ctx) != 0) xpt_print(periph->path, "can't remove sysctl context\n"); } disk_destroy(softc->disk); free(softc, M_DEVBUF); cam_periph_lock(periph); } static void ndaasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { struct cam_periph *periph; periph = (struct cam_periph *)callback_arg; switch (code) { case AC_FOUND_DEVICE: { struct ccb_getdev *cgd; cam_status status; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) break; if (cgd->protocol != PROTO_NVME) break; /* * Allocate a peripheral instance for * this device and start the probe * process. */ status = cam_periph_alloc(ndaregister, ndaoninvalidate, ndacleanup, ndastart, "nda", CAM_PERIPH_BIO, path, ndaasync, AC_FOUND_DEVICE, cgd); if (status != CAM_REQ_CMP && status != CAM_REQ_INPROG) printf("ndaasync: Unable to attach to new device " "due to status 0x%x\n", status); break; } case AC_ADVINFO_CHANGED: { uintptr_t buftype; buftype = (uintptr_t)arg; if (buftype == CDAI_TYPE_PHYS_PATH) { struct nda_softc *softc; softc = periph->softc; disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); } break; } case AC_LOST_DEVICE: default: cam_periph_async(periph, code, path, arg); break; } } static void ndasysctlinit(void *context, int pending) { struct cam_periph *periph; struct nda_softc *softc; char tmpstr[80], tmpstr2[80]; periph = (struct cam_periph *)context; /* periph was held for us when this task was enqueued */ if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_release(periph); return; } softc = (struct nda_softc *)periph->softc; snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number); snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number); sysctl_ctx_init(&softc->sysctl_ctx); softc->flags |= NDA_FLAG_SCTX_INIT; softc->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&softc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr, "device_index"); if (softc->sysctl_tree == NULL) { printf("ndasysctlinit: unable to allocate sysctl tree\n"); cam_periph_release(periph); return; } SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->unmappedio, 0, "Unmapped I/O leaf"); SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "rotating", CTLFLAG_RD | CTLFLAG_MPSAFE, &nda_rotating_media, 0, "Rotating media"); #ifdef CAM_IO_STATS softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats", CTLFLAG_RD, 0, "Statistics"); if (softc->sysctl_stats_tree == NULL) { printf("ndasysctlinit: unable to allocate sysctl tree for stats\n"); cam_periph_release(periph); return; } SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_stats_tree), OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->timeouts, 0, "Device timeouts reported by the SIM"); SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_stats_tree), OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->errors, 0, "Transport errors reported by the SIM."); SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_stats_tree), OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->invalidations, 0, "Device pack invalidations."); #endif cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, softc->sysctl_tree); cam_periph_release(periph); } static int ndagetattr(struct bio *bp) { int ret; struct cam_periph *periph; periph = (struct cam_periph *)bp->bio_disk->d_drv1; cam_periph_lock(periph); ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, periph->path); cam_periph_unlock(periph); if (ret == 0) bp->bio_completed = bp->bio_length; return ret; } static cam_status ndaregister(struct cam_periph *periph, void *arg) { struct nda_softc *softc; struct disk *disk; struct ccb_pathinq cpi; struct ccb_getdev *cgd; const struct nvme_namespace_data *nsd; const struct nvme_controller_data *cd; char announce_buf[80]; // caddr_t match; u_int maxio; int quirks; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) { printf("ndaregister: no getdev CCB, can't register device\n"); return(CAM_REQ_CMP_ERR); } nsd = cgd->nvme_data; cd = cgd->nvme_cdata; softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc == NULL) { printf("ndaregister: Unable to probe new device. " "Unable to allocate softc\n"); return(CAM_REQ_CMP_ERR); } if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { printf("ndaregister: Unable to probe new device. " "Unable to allocate iosched memory\n"); return(CAM_REQ_CMP_ERR); } /* ident_data parsing */ periph->softc = softc; #if 0 /* * See if this device has any quirks. */ match = cam_quirkmatch((caddr_t)&cgd->ident_data, (caddr_t)nda_quirk_table, sizeof(nda_quirk_table)/sizeof(*nda_quirk_table), sizeof(*nda_quirk_table), ata_identify_match); if (match != NULL) softc->quirks = ((struct nda_quirk_entry *)match)->quirks; else #endif softc->quirks = NDA_Q_NONE; bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph); /* * The name space ID is the lun, save it for later I/O */ softc->nsid = (uint32_t)xpt_path_lun_id(periph->path); /* * Register this media as a disk */ (void)cam_periph_hold(periph, PRIBIO); cam_periph_unlock(periph); snprintf(announce_buf, sizeof(announce_buf), "kern.cam.nda.%d.quirks", periph->unit_number); quirks = softc->quirks; TUNABLE_INT_FETCH(announce_buf, &quirks); softc->quirks = quirks; cam_iosched_set_sort_queue(softc->cam_iosched, 0); softc->disk = disk = disk_alloc(); strlcpy(softc->disk->d_descr, cd->mn, MIN(sizeof(softc->disk->d_descr), sizeof(cd->mn))); strlcpy(softc->disk->d_ident, cd->sn, MIN(sizeof(softc->disk->d_ident), sizeof(cd->sn))); disk->d_rotation_rate = DISK_RR_NON_ROTATING; disk->d_open = ndaopen; disk->d_close = ndaclose; disk->d_strategy = ndastrategy; disk->d_getattr = ndagetattr; disk->d_dump = ndadump; disk->d_gone = ndadiskgonecb; disk->d_name = "nda"; disk->d_drv1 = periph; disk->d_unit = periph->unit_number; maxio = cpi.maxio; /* Honor max I/O size of SIM */ if (maxio == 0) maxio = DFLTPHYS; /* traditional default */ else if (maxio > MAXPHYS) maxio = MAXPHYS; /* for safety */ disk->d_maxsize = maxio; disk->d_sectorsize = 1 << nsd->lbaf[nsd->flbas.format].lbads; disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); disk->d_delmaxsize = disk->d_mediasize; disk->d_flags = DISKFLAG_DIRECT_COMPLETION; // if (cd->oncs.dsm) // XXX broken? disk->d_flags |= DISKFLAG_CANDELETE; if (cd->vwc.present) disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { disk->d_flags |= DISKFLAG_UNMAPPED_BIO; softc->unmappedio = 1; } /* * d_ident and d_descr are both far bigger than the length of either * the serial or model number strings. */ nvme_strvis(disk->d_descr, cd->mn, sizeof(disk->d_descr), NVME_MODEL_NUMBER_LENGTH); nvme_strvis(disk->d_ident, cd->sn, sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH); disk->d_hba_vendor = cpi.hba_vendor; disk->d_hba_device = cpi.hba_device; disk->d_hba_subvendor = cpi.hba_subvendor; disk->d_hba_subdevice = cpi.hba_subdevice; disk->d_stripesize = disk->d_sectorsize; disk->d_stripeoffset = 0; disk->d_devstat = devstat_new_entry(periph->periph_name, periph->unit_number, disk->d_sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport), DEVSTAT_PRIORITY_DISK); /* * Add alias for older nvd drives to ease transition. */ disk_add_alias(disk, "nvd"); /* * Acquire a reference to the periph before we register with GEOM. * We'll release this reference once GEOM calls us back (via * ndadiskgonecb()) telling us that our provider has been freed. */ if (cam_periph_acquire(periph) != CAM_REQ_CMP) { xpt_print(periph->path, "%s: lost periph during " "registration!\n", __func__); cam_periph_lock(periph); return (CAM_REQ_CMP_ERR); } disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); cam_periph_unhold(periph); snprintf(announce_buf, sizeof(announce_buf), "%juMB (%ju %u byte sectors)", (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)), (uintmax_t)disk->d_mediasize / disk->d_sectorsize, disk->d_sectorsize); xpt_announce_periph(periph, announce_buf); xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING); /* * Create our sysctl variables, now that we know * we have successfully attached. */ if (cam_periph_acquire(periph) == CAM_REQ_CMP) taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task); /* * Register for device going away and info about the drive * changing (though with NVMe, it can't) */ xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED, ndaasync, periph, periph->path); softc->state = NDA_STATE_NORMAL; return(CAM_REQ_CMP); } static void ndastart(struct cam_periph *periph, union ccb *start_ccb) { struct nda_softc *softc = (struct nda_softc *)periph->softc; struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n")); switch (softc->state) { case NDA_STATE_NORMAL: { struct bio *bp; bp = cam_iosched_next_bio(softc->cam_iosched); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp)); if (bp == NULL) { xpt_release_ccb(start_ccb); break; } switch (bp->bio_cmd) { case BIO_WRITE: softc->flags |= NDA_FLAG_DIRTY; /* FALLTHROUGH */ case BIO_READ: { #ifdef NDA_TEST_FAILURE int fail = 0; /* * Support the failure ioctls. If the command is a * read, and there are pending forced read errors, or * if a write and pending write errors, then fail this * operation with EIO. This is useful for testing * purposes. Also, support having every Nth read fail. * * This is a rather blunt tool. */ if (bp->bio_cmd == BIO_READ) { if (softc->force_read_error) { softc->force_read_error--; fail = 1; } if (softc->periodic_read_error > 0) { if (++softc->periodic_read_count >= softc->periodic_read_error) { softc->periodic_read_count = 0; fail = 1; } } } else { if (softc->force_write_error) { softc->force_write_error--; fail = 1; } } if (fail) { biofinish(bp, NULL, EIO); xpt_release_ccb(start_ccb); ndaschedule(periph); return; } #endif KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || round_page(bp->bio_bcount + bp->bio_ma_offset) / PAGE_SIZE == bp->bio_ma_n, ("Short bio %p", bp)); nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ? NVME_OPC_READ : NVME_OPC_WRITE); break; } case BIO_DELETE: { struct nvme_dsm_range *dsm_range; dsm_range = malloc(sizeof(*dsm_range), M_NVMEDA, M_ZERO | M_WAITOK); dsm_range->length = bp->bio_bcount / softc->disk->d_sectorsize; dsm_range->starting_lba = bp->bio_offset / softc->disk->d_sectorsize; bp->bio_driver2 = dsm_range; nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1); start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM; start_ccb->ccb_h.flags |= CAM_UNLOCKED; cam_iosched_submit_trim(softc->cam_iosched); /* XXX */ goto out; } case BIO_FLUSH: nda_nvme_flush(softc, nvmeio); break; } start_ccb->ccb_h.ccb_state = NDA_CCB_BUFFER_IO; start_ccb->ccb_h.flags |= CAM_UNLOCKED; out: start_ccb->ccb_h.ccb_bp = bp; softc->outstanding_cmds++; softc->refcount++; cam_periph_unlock(periph); xpt_action(start_ccb); cam_periph_lock(periph); softc->refcount--; /* May have more work to do, so ensure we stay scheduled */ ndaschedule(periph); break; } } } static void ndadone(struct cam_periph *periph, union ccb *done_ccb) { struct nda_softc *softc; struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio; struct cam_path *path; int state; softc = (struct nda_softc *)periph->softc; path = done_ccb->ccb_h.path; CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n")); state = nvmeio->ccb_h.ccb_state & NDA_CCB_TYPE_MASK; switch (state) { case NDA_CCB_BUFFER_IO: case NDA_CCB_TRIM: { struct bio *bp; int error; cam_periph_lock(periph); bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { error = ndaerror(done_ccb, 0, 0); if (error == ERESTART) { /* A retry was scheduled, so just return. */ cam_periph_unlock(periph); return; } if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } else { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) panic("REQ_CMP with QFRZN"); error = 0; } bp->bio_error = error; if (error != 0) { bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; } else { - if (state == NDA_CCB_TRIM) - bp->bio_resid = 0; - else - bp->bio_resid = nvmeio->resid; - if (bp->bio_resid > 0) - bp->bio_flags |= BIO_ERROR; + bp->bio_resid = 0; } if (state == NDA_CCB_TRIM) free(bp->bio_driver2, M_NVMEDA); softc->outstanding_cmds--; /* * We need to call cam_iosched before we call biodone so that we * don't measure any activity that happens in the completion * routine, which in the case of sendfile can be quite * extensive. */ cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); xpt_release_ccb(done_ccb); if (state == NDA_CCB_TRIM) { #ifdef notyet TAILQ_HEAD(, bio) queue; struct bio *bp1; TAILQ_INIT(&queue); TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue); #endif cam_iosched_trim_done(softc->cam_iosched); ndaschedule(periph); cam_periph_unlock(periph); #ifdef notyet /* Not yet collapsing several BIO_DELETE requests into one TRIM */ while ((bp1 = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, bp1, bio_queue); bp1->bio_error = error; if (error != 0) { bp1->bio_flags |= BIO_ERROR; bp1->bio_resid = bp1->bio_bcount; } else bp1->bio_resid = 0; biodone(bp1); } #else biodone(bp); #endif } else { ndaschedule(periph); cam_periph_unlock(periph); biodone(bp); } return; } case NDA_CCB_DUMP: /* No-op. We're polling */ return; default: break; } xpt_release_ccb(done_ccb); } static int ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags) { struct nda_softc *softc; struct cam_periph *periph; periph = xpt_path_periph(ccb->ccb_h.path); softc = (struct nda_softc *)periph->softc; switch (ccb->ccb_h.status & CAM_STATUS_MASK) { case CAM_CMD_TIMEOUT: #ifdef CAM_IO_STATS softc->timeouts++; #endif break; case CAM_REQ_ABORTED: case CAM_REQ_CMP_ERR: case CAM_REQ_TERMIO: case CAM_UNREC_HBA_ERROR: case CAM_DATA_RUN_ERR: case CAM_ATA_STATUS_ERROR: #ifdef CAM_IO_STATS softc->errors++; #endif break; default: break; } return(cam_periph_error(ccb, cam_flags, sense_flags, NULL)); } /* * Step through all NDA peripheral drivers, and if the device is still open, * sync the disk cache to physical media. */ static void ndaflush(void) { struct cam_periph *periph; struct nda_softc *softc; union ccb *ccb; int error; CAM_PERIPH_FOREACH(periph, &ndadriver) { softc = (struct nda_softc *)periph->softc; if (SCHEDULER_STOPPED()) { /* If we paniced with the lock held, do not recurse. */ if (!cam_periph_owned(periph) && (softc->flags & NDA_FLAG_OPEN)) { ndadump(softc->disk, NULL, 0, 0, 0); } continue; } cam_periph_lock(periph); /* * We only sync the cache if the drive is still open, and * if the drive is capable of it.. */ if ((softc->flags & NDA_FLAG_OPEN) == 0) { cam_periph_unlock(periph); continue; } ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); nda_nvme_flush(softc, &ccb->nvmeio); error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); xpt_release_ccb(ccb); cam_periph_unlock(periph); } } static void ndashutdown(void *arg, int howto) { ndaflush(); } static void ndasuspend(void *arg) { ndaflush(); } Index: head/sys/cam/scsi/scsi_pass.c =================================================================== --- head/sys/cam/scsi/scsi_pass.c (revision 322996) +++ head/sys/cam/scsi/scsi_pass.c (revision 322997) @@ -1,2282 +1,2278 @@ /*- * Copyright (c) 1997, 1998, 2000 Justin T. Gibbs. * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef enum { PASS_FLAG_OPEN = 0x01, PASS_FLAG_LOCKED = 0x02, PASS_FLAG_INVALID = 0x04, PASS_FLAG_INITIAL_PHYSPATH = 0x08, PASS_FLAG_ZONE_INPROG = 0x10, PASS_FLAG_ZONE_VALID = 0x20, PASS_FLAG_UNMAPPED_CAPABLE = 0x40, PASS_FLAG_ABANDONED_REF_SET = 0x80 } pass_flags; typedef enum { PASS_STATE_NORMAL } pass_state; typedef enum { PASS_CCB_BUFFER_IO, PASS_CCB_QUEUED_IO } pass_ccb_types; #define ccb_type ppriv_field0 #define ccb_ioreq ppriv_ptr1 /* * The maximum number of memory segments we preallocate. */ #define PASS_MAX_SEGS 16 typedef enum { PASS_IO_NONE = 0x00, PASS_IO_USER_SEG_MALLOC = 0x01, PASS_IO_KERN_SEG_MALLOC = 0x02, PASS_IO_ABANDONED = 0x04 } pass_io_flags; struct pass_io_req { union ccb ccb; union ccb *alloced_ccb; union ccb *user_ccb_ptr; camq_entry user_periph_links; ccb_ppriv_area user_periph_priv; struct cam_periph_map_info mapinfo; pass_io_flags flags; ccb_flags data_flags; int num_user_segs; bus_dma_segment_t user_segs[PASS_MAX_SEGS]; int num_kern_segs; bus_dma_segment_t kern_segs[PASS_MAX_SEGS]; bus_dma_segment_t *user_segptr; bus_dma_segment_t *kern_segptr; int num_bufs; uint32_t dirs[CAM_PERIPH_MAXMAPS]; uint32_t lengths[CAM_PERIPH_MAXMAPS]; uint8_t *user_bufs[CAM_PERIPH_MAXMAPS]; uint8_t *kern_bufs[CAM_PERIPH_MAXMAPS]; struct bintime start_time; TAILQ_ENTRY(pass_io_req) links; }; struct pass_softc { pass_state state; pass_flags flags; u_int8_t pd_type; union ccb saved_ccb; int open_count; u_int maxio; struct devstat *device_stats; struct cdev *dev; struct cdev *alias_dev; struct task add_physpath_task; struct task shutdown_kqueue_task; struct selinfo read_select; TAILQ_HEAD(, pass_io_req) incoming_queue; TAILQ_HEAD(, pass_io_req) active_queue; TAILQ_HEAD(, pass_io_req) abandoned_queue; TAILQ_HEAD(, pass_io_req) done_queue; struct cam_periph *periph; char zone_name[12]; char io_zone_name[12]; uma_zone_t pass_zone; uma_zone_t pass_io_zone; size_t io_zone_size; }; static d_open_t passopen; static d_close_t passclose; static d_ioctl_t passioctl; static d_ioctl_t passdoioctl; static d_poll_t passpoll; static d_kqfilter_t passkqfilter; static void passreadfiltdetach(struct knote *kn); static int passreadfilt(struct knote *kn, long hint); static periph_init_t passinit; static periph_ctor_t passregister; static periph_oninv_t passoninvalidate; static periph_dtor_t passcleanup; static periph_start_t passstart; static void pass_shutdown_kqueue(void *context, int pending); static void pass_add_physpath(void *context, int pending); static void passasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg); static void passdone(struct cam_periph *periph, union ccb *done_ccb); static int passcreatezone(struct cam_periph *periph); static void passiocleanup(struct pass_softc *softc, struct pass_io_req *io_req); static int passcopysglist(struct cam_periph *periph, struct pass_io_req *io_req, ccb_flags direction); static int passmemsetup(struct cam_periph *periph, struct pass_io_req *io_req); static int passmemdone(struct cam_periph *periph, struct pass_io_req *io_req); static int passerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags); static int passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb); static struct periph_driver passdriver = { passinit, "pass", TAILQ_HEAD_INITIALIZER(passdriver.units), /* generation */ 0 }; PERIPHDRIVER_DECLARE(pass, passdriver); static struct cdevsw pass_cdevsw = { .d_version = D_VERSION, .d_flags = D_TRACKCLOSE, .d_open = passopen, .d_close = passclose, .d_ioctl = passioctl, .d_poll = passpoll, .d_kqfilter = passkqfilter, .d_name = "pass", }; static struct filterops passread_filtops = { .f_isfd = 1, .f_detach = passreadfiltdetach, .f_event = passreadfilt }; static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers"); static void passinit(void) { cam_status status; /* * Install a global async callback. This callback will * receive async callbacks like "new device found". */ status = xpt_register_async(AC_FOUND_DEVICE, passasync, NULL, NULL); if (status != CAM_REQ_CMP) { printf("pass: Failed to attach master async callback " "due to status 0x%x!\n", status); } } static void passrejectios(struct cam_periph *periph) { struct pass_io_req *io_req, *io_req2; struct pass_softc *softc; softc = (struct pass_softc *)periph->softc; /* * The user can no longer get status for I/O on the done queue, so * clean up all outstanding I/O on the done queue. */ TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) { TAILQ_REMOVE(&softc->done_queue, io_req, links); passiocleanup(softc, io_req); uma_zfree(softc->pass_zone, io_req); } /* * The underlying device is gone, so we can't issue these I/Os. * The devfs node has been shut down, so we can't return status to * the user. Free any I/O left on the incoming queue. */ TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links, io_req2) { TAILQ_REMOVE(&softc->incoming_queue, io_req, links); passiocleanup(softc, io_req); uma_zfree(softc->pass_zone, io_req); } /* * Normally we would put I/Os on the abandoned queue and acquire a * reference when we saw the final close. But, the device went * away and devfs may have moved everything off to deadfs by the * time the I/O done callback is called; as a result, we won't see * any more closes. So, if we have any active I/Os, we need to put * them on the abandoned queue. When the abandoned queue is empty, * we'll release the remaining reference (see below) to the peripheral. */ TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links, io_req2) { TAILQ_REMOVE(&softc->active_queue, io_req, links); io_req->flags |= PASS_IO_ABANDONED; TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req, links); } /* * If we put any I/O on the abandoned queue, acquire a reference. */ if ((!TAILQ_EMPTY(&softc->abandoned_queue)) && ((softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0)) { cam_periph_doacquire(periph); softc->flags |= PASS_FLAG_ABANDONED_REF_SET; } } static void passdevgonecb(void *arg) { struct cam_periph *periph; struct mtx *mtx; struct pass_softc *softc; int i; periph = (struct cam_periph *)arg; mtx = cam_periph_mtx(periph); mtx_lock(mtx); softc = (struct pass_softc *)periph->softc; KASSERT(softc->open_count >= 0, ("Negative open count %d", softc->open_count)); /* * When we get this callback, we will get no more close calls from * devfs. So if we have any dangling opens, we need to release the * reference held for that particular context. */ for (i = 0; i < softc->open_count; i++) cam_periph_release_locked(periph); softc->open_count = 0; /* * Release the reference held for the device node, it is gone now. * Accordingly, inform all queued I/Os of their fate. */ cam_periph_release_locked(periph); passrejectios(periph); /* * We reference the SIM lock directly here, instead of using * cam_periph_unlock(). The reason is that the final call to * cam_periph_release_locked() above could result in the periph * getting freed. If that is the case, dereferencing the periph * with a cam_periph_unlock() call would cause a page fault. */ mtx_unlock(mtx); /* * We have to remove our kqueue context from a thread because it * may sleep. It would be nice if we could get a callback from * kqueue when it is done cleaning up resources. */ taskqueue_enqueue(taskqueue_thread, &softc->shutdown_kqueue_task); } static void passoninvalidate(struct cam_periph *periph) { struct pass_softc *softc; softc = (struct pass_softc *)periph->softc; /* * De-register any async callbacks. */ xpt_register_async(0, passasync, periph, periph->path); softc->flags |= PASS_FLAG_INVALID; /* * Tell devfs this device has gone away, and ask for a callback * when it has cleaned up its state. */ destroy_dev_sched_cb(softc->dev, passdevgonecb, periph); } static void passcleanup(struct cam_periph *periph) { struct pass_softc *softc; softc = (struct pass_softc *)periph->softc; cam_periph_assert(periph, MA_OWNED); KASSERT(TAILQ_EMPTY(&softc->active_queue), ("%s called when there are commands on the active queue!\n", __func__)); KASSERT(TAILQ_EMPTY(&softc->abandoned_queue), ("%s called when there are commands on the abandoned queue!\n", __func__)); KASSERT(TAILQ_EMPTY(&softc->incoming_queue), ("%s called when there are commands on the incoming queue!\n", __func__)); KASSERT(TAILQ_EMPTY(&softc->done_queue), ("%s called when there are commands on the done queue!\n", __func__)); devstat_remove_entry(softc->device_stats); cam_periph_unlock(periph); /* * We call taskqueue_drain() for the physpath task to make sure it * is complete. We drop the lock because this can potentially * sleep. XXX KDM that is bad. Need a way to get a callback when * a taskqueue is drained. * * Note that we don't drain the kqueue shutdown task queue. This * is because we hold a reference on the periph for kqueue, and * release that reference from the kqueue shutdown task queue. So * we cannot come into this routine unless we've released that * reference. Also, because that could be the last reference, we * could be called from the cam_periph_release() call in * pass_shutdown_kqueue(). In that case, the taskqueue_drain() * would deadlock. It would be preferable if we had a way to * get a callback when a taskqueue is done. */ taskqueue_drain(taskqueue_thread, &softc->add_physpath_task); cam_periph_lock(periph); free(softc, M_DEVBUF); } static void pass_shutdown_kqueue(void *context, int pending) { struct cam_periph *periph; struct pass_softc *softc; periph = context; softc = periph->softc; knlist_clear(&softc->read_select.si_note, /*is_locked*/ 0); knlist_destroy(&softc->read_select.si_note); /* * Release the reference we held for kqueue. */ cam_periph_release(periph); } static void pass_add_physpath(void *context, int pending) { struct cam_periph *periph; struct pass_softc *softc; struct mtx *mtx; char *physpath; /* * If we have one, create a devfs alias for our * physical path. */ periph = context; softc = periph->softc; physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK); mtx = cam_periph_mtx(periph); mtx_lock(mtx); if (periph->flags & CAM_PERIPH_INVALID) goto out; if (xpt_getattr(physpath, MAXPATHLEN, "GEOM::physpath", periph->path) == 0 && strlen(physpath) != 0) { mtx_unlock(mtx); make_dev_physpath_alias(MAKEDEV_WAITOK, &softc->alias_dev, softc->dev, softc->alias_dev, physpath); mtx_lock(mtx); } out: /* * Now that we've made our alias, we no longer have to have a * reference to the device. */ if ((softc->flags & PASS_FLAG_INITIAL_PHYSPATH) == 0) softc->flags |= PASS_FLAG_INITIAL_PHYSPATH; /* * We always acquire a reference to the periph before queueing this * task queue function, so it won't go away before we run. */ while (pending-- > 0) cam_periph_release_locked(periph); mtx_unlock(mtx); free(physpath, M_DEVBUF); } static void passasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { struct cam_periph *periph; periph = (struct cam_periph *)callback_arg; switch (code) { case AC_FOUND_DEVICE: { struct ccb_getdev *cgd; cam_status status; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) break; /* * Allocate a peripheral instance for * this device and start the probe * process. */ status = cam_periph_alloc(passregister, passoninvalidate, passcleanup, passstart, "pass", CAM_PERIPH_BIO, path, passasync, AC_FOUND_DEVICE, cgd); if (status != CAM_REQ_CMP && status != CAM_REQ_INPROG) { const struct cam_status_entry *entry; entry = cam_fetch_status_entry(status); printf("passasync: Unable to attach new device " "due to status %#x: %s\n", status, entry ? entry->status_text : "Unknown"); } break; } case AC_ADVINFO_CHANGED: { uintptr_t buftype; buftype = (uintptr_t)arg; if (buftype == CDAI_TYPE_PHYS_PATH) { struct pass_softc *softc; cam_status status; softc = (struct pass_softc *)periph->softc; /* * Acquire a reference to the periph before we * start the taskqueue, so that we don't run into * a situation where the periph goes away before * the task queue has a chance to run. */ status = cam_periph_acquire(periph); if (status != CAM_REQ_CMP) break; taskqueue_enqueue(taskqueue_thread, &softc->add_physpath_task); } break; } default: cam_periph_async(periph, code, path, arg); break; } } static cam_status passregister(struct cam_periph *periph, void *arg) { struct pass_softc *softc; struct ccb_getdev *cgd; struct ccb_pathinq cpi; struct make_dev_args args; int error, no_tags; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) { printf("%s: no getdev CCB, can't register device\n", __func__); return(CAM_REQ_CMP_ERR); } softc = (struct pass_softc *)malloc(sizeof(*softc), M_DEVBUF, M_NOWAIT); if (softc == NULL) { printf("%s: Unable to probe new device. " "Unable to allocate softc\n", __func__); return(CAM_REQ_CMP_ERR); } bzero(softc, sizeof(*softc)); softc->state = PASS_STATE_NORMAL; if (cgd->protocol == PROTO_SCSI || cgd->protocol == PROTO_ATAPI) softc->pd_type = SID_TYPE(&cgd->inq_data); else if (cgd->protocol == PROTO_SATAPM) softc->pd_type = T_ENCLOSURE; else softc->pd_type = T_DIRECT; periph->softc = softc; softc->periph = periph; TAILQ_INIT(&softc->incoming_queue); TAILQ_INIT(&softc->active_queue); TAILQ_INIT(&softc->abandoned_queue); TAILQ_INIT(&softc->done_queue); snprintf(softc->zone_name, sizeof(softc->zone_name), "%s%d", periph->periph_name, periph->unit_number); snprintf(softc->io_zone_name, sizeof(softc->io_zone_name), "%s%dIO", periph->periph_name, periph->unit_number); softc->io_zone_size = MAXPHYS; knlist_init_mtx(&softc->read_select.si_note, cam_periph_mtx(periph)); bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NORMAL); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); if (cpi.maxio == 0) softc->maxio = DFLTPHYS; /* traditional default */ else if (cpi.maxio > MAXPHYS) softc->maxio = MAXPHYS; /* for safety */ else softc->maxio = cpi.maxio; /* real value */ if (cpi.hba_misc & PIM_UNMAPPED) softc->flags |= PASS_FLAG_UNMAPPED_CAPABLE; /* * We pass in 0 for a blocksize, since we don't * know what the blocksize of this device is, if * it even has a blocksize. */ cam_periph_unlock(periph); no_tags = (cgd->inq_data.flags & SID_CmdQue) == 0; softc->device_stats = devstat_new_entry("pass", periph->unit_number, 0, DEVSTAT_NO_BLOCKSIZE | (no_tags ? DEVSTAT_NO_ORDERED_TAGS : 0), softc->pd_type | XPORT_DEVSTAT_TYPE(cpi.transport) | DEVSTAT_TYPE_PASS, DEVSTAT_PRIORITY_PASS); /* * Initialize the taskqueue handler for shutting down kqueue. */ TASK_INIT(&softc->shutdown_kqueue_task, /*priority*/ 0, pass_shutdown_kqueue, periph); /* * Acquire a reference to the periph that we can release once we've * cleaned up the kqueue. */ if (cam_periph_acquire(periph) != CAM_REQ_CMP) { xpt_print(periph->path, "%s: lost periph during " "registration!\n", __func__); cam_periph_lock(periph); return (CAM_REQ_CMP_ERR); } /* * Acquire a reference to the periph before we create the devfs * instance for it. We'll release this reference once the devfs * instance has been freed. */ if (cam_periph_acquire(periph) != CAM_REQ_CMP) { xpt_print(periph->path, "%s: lost periph during " "registration!\n", __func__); cam_periph_lock(periph); return (CAM_REQ_CMP_ERR); } /* Register the device */ make_dev_args_init(&args); args.mda_devsw = &pass_cdevsw; args.mda_unit = periph->unit_number; args.mda_uid = UID_ROOT; args.mda_gid = GID_OPERATOR; args.mda_mode = 0600; args.mda_si_drv1 = periph; error = make_dev_s(&args, &softc->dev, "%s%d", periph->periph_name, periph->unit_number); if (error != 0) { cam_periph_lock(periph); cam_periph_release_locked(periph); return (CAM_REQ_CMP_ERR); } /* * Hold a reference to the periph before we create the physical * path alias so it can't go away. */ if (cam_periph_acquire(periph) != CAM_REQ_CMP) { xpt_print(periph->path, "%s: lost periph during " "registration!\n", __func__); cam_periph_lock(periph); return (CAM_REQ_CMP_ERR); } cam_periph_lock(periph); TASK_INIT(&softc->add_physpath_task, /*priority*/0, pass_add_physpath, periph); /* * See if physical path information is already available. */ taskqueue_enqueue(taskqueue_thread, &softc->add_physpath_task); /* * Add an async callback so that we get notified if * this device goes away or its physical path * (stored in the advanced info data of the EDT) has * changed. */ xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED, passasync, periph, periph->path); if (bootverbose) xpt_announce_periph(periph, NULL); return(CAM_REQ_CMP); } static int passopen(struct cdev *dev, int flags, int fmt, struct thread *td) { struct cam_periph *periph; struct pass_softc *softc; int error; periph = (struct cam_periph *)dev->si_drv1; if (cam_periph_acquire(periph) != CAM_REQ_CMP) return (ENXIO); cam_periph_lock(periph); softc = (struct pass_softc *)periph->softc; if (softc->flags & PASS_FLAG_INVALID) { cam_periph_release_locked(periph); cam_periph_unlock(periph); return(ENXIO); } /* * Don't allow access when we're running at a high securelevel. */ error = securelevel_gt(td->td_ucred, 1); if (error) { cam_periph_release_locked(periph); cam_periph_unlock(periph); return(error); } /* * Only allow read-write access. */ if (((flags & FWRITE) == 0) || ((flags & FREAD) == 0)) { cam_periph_release_locked(periph); cam_periph_unlock(periph); return(EPERM); } /* * We don't allow nonblocking access. */ if ((flags & O_NONBLOCK) != 0) { xpt_print(periph->path, "can't do nonblocking access\n"); cam_periph_release_locked(periph); cam_periph_unlock(periph); return(EINVAL); } softc->open_count++; cam_periph_unlock(periph); return (error); } static int passclose(struct cdev *dev, int flag, int fmt, struct thread *td) { struct cam_periph *periph; struct pass_softc *softc; struct mtx *mtx; periph = (struct cam_periph *)dev->si_drv1; mtx = cam_periph_mtx(periph); mtx_lock(mtx); softc = periph->softc; softc->open_count--; if (softc->open_count == 0) { struct pass_io_req *io_req, *io_req2; TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) { TAILQ_REMOVE(&softc->done_queue, io_req, links); passiocleanup(softc, io_req); uma_zfree(softc->pass_zone, io_req); } TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links, io_req2) { TAILQ_REMOVE(&softc->incoming_queue, io_req, links); passiocleanup(softc, io_req); uma_zfree(softc->pass_zone, io_req); } /* * If there are any active I/Os, we need to forcibly acquire a * reference to the peripheral so that we don't go away * before they complete. We'll release the reference when * the abandoned queue is empty. */ io_req = TAILQ_FIRST(&softc->active_queue); if ((io_req != NULL) && (softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0) { cam_periph_doacquire(periph); softc->flags |= PASS_FLAG_ABANDONED_REF_SET; } /* * Since the I/O in the active queue is not under our * control, just set a flag so that we can clean it up when * it completes and put it on the abandoned queue. This * will prevent our sending spurious completions in the * event that the device is opened again before these I/Os * complete. */ TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links, io_req2) { TAILQ_REMOVE(&softc->active_queue, io_req, links); io_req->flags |= PASS_IO_ABANDONED; TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req, links); } } cam_periph_release_locked(periph); /* * We reference the lock directly here, instead of using * cam_periph_unlock(). The reason is that the call to * cam_periph_release_locked() above could result in the periph * getting freed. If that is the case, dereferencing the periph * with a cam_periph_unlock() call would cause a page fault. * * cam_periph_release() avoids this problem using the same method, * but we're manually acquiring and dropping the lock here to * protect the open count and avoid another lock acquisition and * release. */ mtx_unlock(mtx); return (0); } static void passstart(struct cam_periph *periph, union ccb *start_ccb) { struct pass_softc *softc; softc = (struct pass_softc *)periph->softc; switch (softc->state) { case PASS_STATE_NORMAL: { struct pass_io_req *io_req; /* * Check for any queued I/O requests that require an * allocated slot. */ io_req = TAILQ_FIRST(&softc->incoming_queue); if (io_req == NULL) { xpt_release_ccb(start_ccb); break; } TAILQ_REMOVE(&softc->incoming_queue, io_req, links); TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links); /* * Merge the user's CCB into the allocated CCB. */ xpt_merge_ccb(start_ccb, &io_req->ccb); start_ccb->ccb_h.ccb_type = PASS_CCB_QUEUED_IO; start_ccb->ccb_h.ccb_ioreq = io_req; start_ccb->ccb_h.cbfcnp = passdone; io_req->alloced_ccb = start_ccb; binuptime(&io_req->start_time); devstat_start_transaction(softc->device_stats, &io_req->start_time); xpt_action(start_ccb); /* * If we have any more I/O waiting, schedule ourselves again. */ if (!TAILQ_EMPTY(&softc->incoming_queue)) xpt_schedule(periph, CAM_PRIORITY_NORMAL); break; } default: break; } } static void passdone(struct cam_periph *periph, union ccb *done_ccb) { struct pass_softc *softc; struct ccb_scsiio *csio; softc = (struct pass_softc *)periph->softc; cam_periph_assert(periph, MA_OWNED); csio = &done_ccb->csio; switch (csio->ccb_h.ccb_type) { case PASS_CCB_QUEUED_IO: { struct pass_io_req *io_req; io_req = done_ccb->ccb_h.ccb_ioreq; #if 0 xpt_print(periph->path, "%s: called for user CCB %p\n", __func__, io_req->user_ccb_ptr); #endif if (((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) && (done_ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER) && ((io_req->flags & PASS_IO_ABANDONED) == 0)) { int error; error = passerror(done_ccb, CAM_RETRY_SELTO, SF_RETRY_UA | SF_NO_PRINT); if (error == ERESTART) { /* * A retry was scheduled, so * just return. */ return; } } /* * Copy the allocated CCB contents back to the malloced CCB * so we can give status back to the user when he requests it. */ bcopy(done_ccb, &io_req->ccb, sizeof(*done_ccb)); /* * Log data/transaction completion with devstat(9). */ switch (done_ccb->ccb_h.func_code) { case XPT_SCSI_IO: devstat_end_transaction(softc->device_stats, done_ccb->csio.dxfer_len - done_ccb->csio.resid, done_ccb->csio.tag_action & 0x3, ((done_ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE) ? DEVSTAT_NO_DATA : (done_ccb->ccb_h.flags & CAM_DIR_OUT) ? DEVSTAT_WRITE : DEVSTAT_READ, NULL, &io_req->start_time); break; case XPT_ATA_IO: devstat_end_transaction(softc->device_stats, done_ccb->ataio.dxfer_len - done_ccb->ataio.resid, 0, /* Not used in ATA */ ((done_ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE) ? DEVSTAT_NO_DATA : (done_ccb->ccb_h.flags & CAM_DIR_OUT) ? DEVSTAT_WRITE : DEVSTAT_READ, NULL, &io_req->start_time); break; case XPT_SMP_IO: /* * XXX KDM this isn't quite right, but there isn't * currently an easy way to represent a bidirectional * transfer in devstat. The only way to do it * and have the byte counts come out right would * mean that we would have to record two * transactions, one for the request and one for the * response. For now, so that we report something, * just treat the entire thing as a read. */ devstat_end_transaction(softc->device_stats, done_ccb->smpio.smp_request_len + done_ccb->smpio.smp_response_len, DEVSTAT_TAG_SIMPLE, DEVSTAT_READ, NULL, &io_req->start_time); break; default: devstat_end_transaction(softc->device_stats, 0, DEVSTAT_TAG_NONE, DEVSTAT_NO_DATA, NULL, &io_req->start_time); break; } /* * In the normal case, take the completed I/O off of the * active queue and put it on the done queue. Notitfy the * user that we have a completed I/O. */ if ((io_req->flags & PASS_IO_ABANDONED) == 0) { TAILQ_REMOVE(&softc->active_queue, io_req, links); TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links); selwakeuppri(&softc->read_select, PRIBIO); KNOTE_LOCKED(&softc->read_select.si_note, 0); } else { /* * In the case of an abandoned I/O (final close * without fetching the I/O), take it off of the * abandoned queue and free it. */ TAILQ_REMOVE(&softc->abandoned_queue, io_req, links); passiocleanup(softc, io_req); uma_zfree(softc->pass_zone, io_req); /* * Release the done_ccb here, since we may wind up * freeing the peripheral when we decrement the * reference count below. */ xpt_release_ccb(done_ccb); /* * If the abandoned queue is empty, we can release * our reference to the periph since we won't have * any more completions coming. */ if ((TAILQ_EMPTY(&softc->abandoned_queue)) && (softc->flags & PASS_FLAG_ABANDONED_REF_SET)) { softc->flags &= ~PASS_FLAG_ABANDONED_REF_SET; cam_periph_release_locked(periph); } /* * We have already released the CCB, so we can * return. */ return; } break; } } xpt_release_ccb(done_ccb); } static int passcreatezone(struct cam_periph *periph) { struct pass_softc *softc; int error; error = 0; softc = (struct pass_softc *)periph->softc; cam_periph_assert(periph, MA_OWNED); KASSERT(((softc->flags & PASS_FLAG_ZONE_VALID) == 0), ("%s called when the pass(4) zone is valid!\n", __func__)); KASSERT((softc->pass_zone == NULL), ("%s called when the pass(4) zone is allocated!\n", __func__)); if ((softc->flags & PASS_FLAG_ZONE_INPROG) == 0) { /* * We're the first context through, so we need to create * the pass(4) UMA zone for I/O requests. */ softc->flags |= PASS_FLAG_ZONE_INPROG; /* * uma_zcreate() does a blocking (M_WAITOK) allocation, * so we cannot hold a mutex while we call it. */ cam_periph_unlock(periph); softc->pass_zone = uma_zcreate(softc->zone_name, sizeof(struct pass_io_req), NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/ 0); softc->pass_io_zone = uma_zcreate(softc->io_zone_name, softc->io_zone_size, NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/ 0); cam_periph_lock(periph); if ((softc->pass_zone == NULL) || (softc->pass_io_zone == NULL)) { if (softc->pass_zone == NULL) xpt_print(periph->path, "unable to allocate " "IO Req UMA zone\n"); else xpt_print(periph->path, "unable to allocate " "IO UMA zone\n"); softc->flags &= ~PASS_FLAG_ZONE_INPROG; goto bailout; } /* * Set the flags appropriately and notify any other waiters. */ softc->flags &= PASS_FLAG_ZONE_INPROG; softc->flags |= PASS_FLAG_ZONE_VALID; wakeup(&softc->pass_zone); } else { /* * In this case, the UMA zone has not yet been created, but * another context is in the process of creating it. We * need to sleep until the creation is either done or has * failed. */ while ((softc->flags & PASS_FLAG_ZONE_INPROG) && ((softc->flags & PASS_FLAG_ZONE_VALID) == 0)) { error = msleep(&softc->pass_zone, cam_periph_mtx(periph), PRIBIO, "paszon", 0); if (error != 0) goto bailout; } /* * If the zone creation failed, no luck for the user. */ if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0){ error = ENOMEM; goto bailout; } } bailout: return (error); } static void passiocleanup(struct pass_softc *softc, struct pass_io_req *io_req) { union ccb *ccb; u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS]; int i, numbufs; ccb = &io_req->ccb; switch (ccb->ccb_h.func_code) { case XPT_DEV_MATCH: numbufs = min(io_req->num_bufs, 2); if (numbufs == 1) { data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches; } else { data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns; data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches; } break; case XPT_SCSI_IO: case XPT_CONT_TARGET_IO: data_ptrs[0] = &ccb->csio.data_ptr; numbufs = min(io_req->num_bufs, 1); break; case XPT_ATA_IO: data_ptrs[0] = &ccb->ataio.data_ptr; numbufs = min(io_req->num_bufs, 1); break; case XPT_SMP_IO: numbufs = min(io_req->num_bufs, 2); data_ptrs[0] = &ccb->smpio.smp_request; data_ptrs[1] = &ccb->smpio.smp_response; break; case XPT_DEV_ADVINFO: numbufs = min(io_req->num_bufs, 1); data_ptrs[0] = (uint8_t **)&ccb->cdai.buf; break; case XPT_NVME_IO: case XPT_NVME_ADMIN: data_ptrs[0] = &ccb->nvmeio.data_ptr; numbufs = min(io_req->num_bufs, 1); break; default: /* allow ourselves to be swapped once again */ return; break; /* NOTREACHED */ } if (io_req->flags & PASS_IO_USER_SEG_MALLOC) { free(io_req->user_segptr, M_SCSIPASS); io_req->user_segptr = NULL; } /* * We only want to free memory we malloced. */ if (io_req->data_flags == CAM_DATA_VADDR) { for (i = 0; i < io_req->num_bufs; i++) { if (io_req->kern_bufs[i] == NULL) continue; free(io_req->kern_bufs[i], M_SCSIPASS); io_req->kern_bufs[i] = NULL; } } else if (io_req->data_flags == CAM_DATA_SG) { for (i = 0; i < io_req->num_kern_segs; i++) { if ((uint8_t *)(uintptr_t) io_req->kern_segptr[i].ds_addr == NULL) continue; uma_zfree(softc->pass_io_zone, (uint8_t *)(uintptr_t) io_req->kern_segptr[i].ds_addr); io_req->kern_segptr[i].ds_addr = 0; } } if (io_req->flags & PASS_IO_KERN_SEG_MALLOC) { free(io_req->kern_segptr, M_SCSIPASS); io_req->kern_segptr = NULL; } if (io_req->data_flags != CAM_DATA_PADDR) { for (i = 0; i < numbufs; i++) { /* * Restore the user's buffer pointers to their * previous values. */ if (io_req->user_bufs[i] != NULL) *data_ptrs[i] = io_req->user_bufs[i]; } } } static int passcopysglist(struct cam_periph *periph, struct pass_io_req *io_req, ccb_flags direction) { bus_size_t kern_watermark, user_watermark, len_copied, len_to_copy; bus_dma_segment_t *user_sglist, *kern_sglist; int i, j, error; error = 0; kern_watermark = 0; user_watermark = 0; len_to_copy = 0; len_copied = 0; user_sglist = io_req->user_segptr; kern_sglist = io_req->kern_segptr; for (i = 0, j = 0; i < io_req->num_user_segs && j < io_req->num_kern_segs;) { uint8_t *user_ptr, *kern_ptr; len_to_copy = min(user_sglist[i].ds_len -user_watermark, kern_sglist[j].ds_len - kern_watermark); user_ptr = (uint8_t *)(uintptr_t)user_sglist[i].ds_addr; user_ptr = user_ptr + user_watermark; kern_ptr = (uint8_t *)(uintptr_t)kern_sglist[j].ds_addr; kern_ptr = kern_ptr + kern_watermark; user_watermark += len_to_copy; kern_watermark += len_to_copy; if (!useracc(user_ptr, len_to_copy, (direction == CAM_DIR_IN) ? VM_PROT_WRITE : VM_PROT_READ)) { xpt_print(periph->path, "%s: unable to access user " "S/G list element %p len %zu\n", __func__, user_ptr, len_to_copy); error = EFAULT; goto bailout; } if (direction == CAM_DIR_IN) { error = copyout(kern_ptr, user_ptr, len_to_copy); if (error != 0) { xpt_print(periph->path, "%s: copyout of %u " "bytes from %p to %p failed with " "error %d\n", __func__, len_to_copy, kern_ptr, user_ptr, error); goto bailout; } } else { error = copyin(user_ptr, kern_ptr, len_to_copy); if (error != 0) { xpt_print(periph->path, "%s: copyin of %u " "bytes from %p to %p failed with " "error %d\n", __func__, len_to_copy, user_ptr, kern_ptr, error); goto bailout; } } len_copied += len_to_copy; if (user_sglist[i].ds_len == user_watermark) { i++; user_watermark = 0; } if (kern_sglist[j].ds_len == kern_watermark) { j++; kern_watermark = 0; } } bailout: return (error); } static int passmemsetup(struct cam_periph *periph, struct pass_io_req *io_req) { union ccb *ccb; struct pass_softc *softc; int numbufs, i; uint8_t **data_ptrs[CAM_PERIPH_MAXMAPS]; uint32_t lengths[CAM_PERIPH_MAXMAPS]; uint32_t dirs[CAM_PERIPH_MAXMAPS]; uint32_t num_segs; uint16_t *seg_cnt_ptr; size_t maxmap; int error; cam_periph_assert(periph, MA_NOTOWNED); softc = periph->softc; error = 0; ccb = &io_req->ccb; maxmap = 0; num_segs = 0; seg_cnt_ptr = NULL; switch(ccb->ccb_h.func_code) { case XPT_DEV_MATCH: if (ccb->cdm.match_buf_len == 0) { printf("%s: invalid match buffer length 0\n", __func__); return(EINVAL); } if (ccb->cdm.pattern_buf_len > 0) { data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns; lengths[0] = ccb->cdm.pattern_buf_len; dirs[0] = CAM_DIR_OUT; data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches; lengths[1] = ccb->cdm.match_buf_len; dirs[1] = CAM_DIR_IN; numbufs = 2; } else { data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches; lengths[0] = ccb->cdm.match_buf_len; dirs[0] = CAM_DIR_IN; numbufs = 1; } io_req->data_flags = CAM_DATA_VADDR; break; case XPT_SCSI_IO: case XPT_CONT_TARGET_IO: if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE) return(0); /* * The user shouldn't be able to supply a bio. */ if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO) return (EINVAL); io_req->data_flags = ccb->ccb_h.flags & CAM_DATA_MASK; data_ptrs[0] = &ccb->csio.data_ptr; lengths[0] = ccb->csio.dxfer_len; dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK; num_segs = ccb->csio.sglist_cnt; seg_cnt_ptr = &ccb->csio.sglist_cnt; numbufs = 1; maxmap = softc->maxio; break; case XPT_ATA_IO: if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE) return(0); /* * We only support a single virtual address for ATA I/O. */ if ((ccb->ccb_h.flags & CAM_DATA_MASK) != CAM_DATA_VADDR) return (EINVAL); io_req->data_flags = CAM_DATA_VADDR; data_ptrs[0] = &ccb->ataio.data_ptr; lengths[0] = ccb->ataio.dxfer_len; dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK; numbufs = 1; maxmap = softc->maxio; break; case XPT_SMP_IO: io_req->data_flags = CAM_DATA_VADDR; data_ptrs[0] = &ccb->smpio.smp_request; lengths[0] = ccb->smpio.smp_request_len; dirs[0] = CAM_DIR_OUT; data_ptrs[1] = &ccb->smpio.smp_response; lengths[1] = ccb->smpio.smp_response_len; dirs[1] = CAM_DIR_IN; numbufs = 2; maxmap = softc->maxio; break; case XPT_DEV_ADVINFO: if (ccb->cdai.bufsiz == 0) return (0); io_req->data_flags = CAM_DATA_VADDR; data_ptrs[0] = (uint8_t **)&ccb->cdai.buf; lengths[0] = ccb->cdai.bufsiz; dirs[0] = CAM_DIR_IN; numbufs = 1; break; case XPT_NVME_ADMIN: case XPT_NVME_IO: if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE) return (0); io_req->data_flags = ccb->ccb_h.flags & CAM_DATA_MASK; - /* - * We only support a single virtual address for NVMe - */ - if ((ccb->ccb_h.flags & CAM_DATA_MASK) != CAM_DATA_VADDR) - return (EINVAL); - data_ptrs[0] = &ccb->nvmeio.data_ptr; lengths[0] = ccb->nvmeio.dxfer_len; dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK; + num_segs = ccb->nvmeio.sglist_cnt; + seg_cnt_ptr = &ccb->nvmeio.sglist_cnt; numbufs = 1; maxmap = softc->maxio; break; default: return(EINVAL); break; /* NOTREACHED */ } io_req->num_bufs = numbufs; /* * If there is a maximum, check to make sure that the user's * request fits within the limit. In general, we should only have * a maximum length for requests that go to hardware. Otherwise it * is whatever we're able to malloc. */ for (i = 0; i < numbufs; i++) { io_req->user_bufs[i] = *data_ptrs[i]; io_req->dirs[i] = dirs[i]; io_req->lengths[i] = lengths[i]; if (maxmap == 0) continue; if (lengths[i] <= maxmap) continue; xpt_print(periph->path, "%s: data length %u > max allowed %u " "bytes\n", __func__, lengths[i], maxmap); error = EINVAL; goto bailout; } switch (io_req->data_flags) { case CAM_DATA_VADDR: /* Map or copy the buffer into kernel address space */ for (i = 0; i < numbufs; i++) { uint8_t *tmp_buf; /* * If for some reason no length is specified, we * don't need to allocate anything. */ if (io_req->lengths[i] == 0) continue; /* * Make sure that the user's buffer is accessible * to that process. */ if (!useracc(io_req->user_bufs[i], io_req->lengths[i], (io_req->dirs[i] == CAM_DIR_IN) ? VM_PROT_WRITE : VM_PROT_READ)) { xpt_print(periph->path, "%s: user address %p " "length %u is not accessible\n", __func__, io_req->user_bufs[i], io_req->lengths[i]); error = EFAULT; goto bailout; } tmp_buf = malloc(lengths[i], M_SCSIPASS, M_WAITOK | M_ZERO); io_req->kern_bufs[i] = tmp_buf; *data_ptrs[i] = tmp_buf; #if 0 xpt_print(periph->path, "%s: malloced %p len %u, user " "buffer %p, operation: %s\n", __func__, tmp_buf, lengths[i], io_req->user_bufs[i], (dirs[i] == CAM_DIR_IN) ? "read" : "write"); #endif /* * We only need to copy in if the user is writing. */ if (dirs[i] != CAM_DIR_OUT) continue; error = copyin(io_req->user_bufs[i], io_req->kern_bufs[i], lengths[i]); if (error != 0) { xpt_print(periph->path, "%s: copy of user " "buffer from %p to %p failed with " "error %d\n", __func__, io_req->user_bufs[i], io_req->kern_bufs[i], error); goto bailout; } } break; case CAM_DATA_PADDR: /* Pass down the pointer as-is */ break; case CAM_DATA_SG: { size_t sg_length, size_to_go, alloc_size; uint32_t num_segs_needed; /* * Copy the user S/G list in, and then copy in the * individual segments. */ /* * We shouldn't see this, but check just in case. */ if (numbufs != 1) { xpt_print(periph->path, "%s: cannot currently handle " "more than one S/G list per CCB\n", __func__); error = EINVAL; goto bailout; } /* * We have to have at least one segment. */ if (num_segs == 0) { xpt_print(periph->path, "%s: CAM_DATA_SG flag set, " "but sglist_cnt=0!\n", __func__); error = EINVAL; goto bailout; } /* * Make sure the user specified the total length and didn't * just leave it to us to decode the S/G list. */ if (lengths[0] == 0) { xpt_print(periph->path, "%s: no dxfer_len specified, " "but CAM_DATA_SG flag is set!\n", __func__); error = EINVAL; goto bailout; } /* * We allocate buffers in io_zone_size increments for an * S/G list. This will generally be MAXPHYS. */ if (lengths[0] <= softc->io_zone_size) num_segs_needed = 1; else { num_segs_needed = lengths[0] / softc->io_zone_size; if ((lengths[0] % softc->io_zone_size) != 0) num_segs_needed++; } /* Figure out the size of the S/G list */ sg_length = num_segs * sizeof(bus_dma_segment_t); io_req->num_user_segs = num_segs; io_req->num_kern_segs = num_segs_needed; /* Save the user's S/G list pointer for later restoration */ io_req->user_bufs[0] = *data_ptrs[0]; /* * If we have enough segments allocated by default to handle * the length of the user's S/G list, */ if (num_segs > PASS_MAX_SEGS) { io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) * num_segs, M_SCSIPASS, M_WAITOK | M_ZERO); io_req->flags |= PASS_IO_USER_SEG_MALLOC; } else io_req->user_segptr = io_req->user_segs; if (!useracc(*data_ptrs[0], sg_length, VM_PROT_READ)) { xpt_print(periph->path, "%s: unable to access user " "S/G list at %p\n", __func__, *data_ptrs[0]); error = EFAULT; goto bailout; } error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length); if (error != 0) { xpt_print(periph->path, "%s: copy of user S/G list " "from %p to %p failed with error %d\n", __func__, *data_ptrs[0], io_req->user_segptr, error); goto bailout; } if (num_segs_needed > PASS_MAX_SEGS) { io_req->kern_segptr = malloc(sizeof(bus_dma_segment_t) * num_segs_needed, M_SCSIPASS, M_WAITOK | M_ZERO); io_req->flags |= PASS_IO_KERN_SEG_MALLOC; } else { io_req->kern_segptr = io_req->kern_segs; } /* * Allocate the kernel S/G list. */ for (size_to_go = lengths[0], i = 0; size_to_go > 0 && i < num_segs_needed; i++, size_to_go -= alloc_size) { uint8_t *kern_ptr; alloc_size = min(size_to_go, softc->io_zone_size); kern_ptr = uma_zalloc(softc->pass_io_zone, M_WAITOK); io_req->kern_segptr[i].ds_addr = (bus_addr_t)(uintptr_t)kern_ptr; io_req->kern_segptr[i].ds_len = alloc_size; } if (size_to_go > 0) { printf("%s: size_to_go = %zu, software error!\n", __func__, size_to_go); error = EINVAL; goto bailout; } *data_ptrs[0] = (uint8_t *)io_req->kern_segptr; *seg_cnt_ptr = io_req->num_kern_segs; /* * We only need to copy data here if the user is writing. */ if (dirs[0] == CAM_DIR_OUT) error = passcopysglist(periph, io_req, dirs[0]); break; } case CAM_DATA_SG_PADDR: { size_t sg_length; /* * We shouldn't see this, but check just in case. */ if (numbufs != 1) { printf("%s: cannot currently handle more than one " "S/G list per CCB\n", __func__); error = EINVAL; goto bailout; } /* * We have to have at least one segment. */ if (num_segs == 0) { xpt_print(periph->path, "%s: CAM_DATA_SG_PADDR flag " "set, but sglist_cnt=0!\n", __func__); error = EINVAL; goto bailout; } /* * Make sure the user specified the total length and didn't * just leave it to us to decode the S/G list. */ if (lengths[0] == 0) { xpt_print(periph->path, "%s: no dxfer_len specified, " "but CAM_DATA_SG flag is set!\n", __func__); error = EINVAL; goto bailout; } /* Figure out the size of the S/G list */ sg_length = num_segs * sizeof(bus_dma_segment_t); io_req->num_user_segs = num_segs; io_req->num_kern_segs = io_req->num_user_segs; /* Save the user's S/G list pointer for later restoration */ io_req->user_bufs[0] = *data_ptrs[0]; if (num_segs > PASS_MAX_SEGS) { io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) * num_segs, M_SCSIPASS, M_WAITOK | M_ZERO); io_req->flags |= PASS_IO_USER_SEG_MALLOC; } else io_req->user_segptr = io_req->user_segs; io_req->kern_segptr = io_req->user_segptr; error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length); if (error != 0) { xpt_print(periph->path, "%s: copy of user S/G list " "from %p to %p failed with error %d\n", __func__, *data_ptrs[0], io_req->user_segptr, error); goto bailout; } break; } default: case CAM_DATA_BIO: /* * A user shouldn't be attaching a bio to the CCB. It * isn't a user-accessible structure. */ error = EINVAL; break; } bailout: if (error != 0) passiocleanup(softc, io_req); return (error); } static int passmemdone(struct cam_periph *periph, struct pass_io_req *io_req) { struct pass_softc *softc; union ccb *ccb; int error; int i; error = 0; softc = (struct pass_softc *)periph->softc; ccb = &io_req->ccb; switch (io_req->data_flags) { case CAM_DATA_VADDR: /* * Copy back to the user buffer if this was a read. */ for (i = 0; i < io_req->num_bufs; i++) { if (io_req->dirs[i] != CAM_DIR_IN) continue; error = copyout(io_req->kern_bufs[i], io_req->user_bufs[i], io_req->lengths[i]); if (error != 0) { xpt_print(periph->path, "Unable to copy %u " "bytes from %p to user address %p\n", io_req->lengths[i], io_req->kern_bufs[i], io_req->user_bufs[i]); goto bailout; } } break; case CAM_DATA_PADDR: /* Do nothing. The pointer is a physical address already */ break; case CAM_DATA_SG: /* * Copy back to the user buffer if this was a read. * Restore the user's S/G list buffer pointer. */ if (io_req->dirs[0] == CAM_DIR_IN) error = passcopysglist(periph, io_req, io_req->dirs[0]); break; case CAM_DATA_SG_PADDR: /* * Restore the user's S/G list buffer pointer. No need to * copy. */ break; default: case CAM_DATA_BIO: error = EINVAL; break; } bailout: /* * Reset the user's pointers to their original values and free * allocated memory. */ passiocleanup(softc, io_req); return (error); } static int passioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { int error; if ((error = passdoioctl(dev, cmd, addr, flag, td)) == ENOTTY) { error = cam_compat_ioctl(dev, cmd, addr, flag, td, passdoioctl); } return (error); } static int passdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct cam_periph *periph; struct pass_softc *softc; int error; uint32_t priority; periph = (struct cam_periph *)dev->si_drv1; cam_periph_lock(periph); softc = (struct pass_softc *)periph->softc; error = 0; switch (cmd) { case CAMIOCOMMAND: { union ccb *inccb; union ccb *ccb; int ccb_malloced; inccb = (union ccb *)addr; #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) if (inccb->ccb_h.func_code == XPT_SCSI_IO) inccb->csio.bio = NULL; #endif if (inccb->ccb_h.flags & CAM_UNLOCKED) { error = EINVAL; break; } /* * Some CCB types, like scan bus and scan lun can only go * through the transport layer device. */ if (inccb->ccb_h.func_code & XPT_FC_XPT_ONLY) { xpt_print(periph->path, "CCB function code %#x is " "restricted to the XPT device\n", inccb->ccb_h.func_code); error = ENODEV; break; } /* Compatibility for RL/priority-unaware code. */ priority = inccb->ccb_h.pinfo.priority; if (priority <= CAM_PRIORITY_OOB) priority += CAM_PRIORITY_OOB + 1; /* * Non-immediate CCBs need a CCB from the per-device pool * of CCBs, which is scheduled by the transport layer. * Immediate CCBs and user-supplied CCBs should just be * malloced. */ if ((inccb->ccb_h.func_code & XPT_FC_QUEUED) && ((inccb->ccb_h.func_code & XPT_FC_USER_CCB) == 0)) { ccb = cam_periph_getccb(periph, priority); ccb_malloced = 0; } else { ccb = xpt_alloc_ccb_nowait(); if (ccb != NULL) xpt_setup_ccb(&ccb->ccb_h, periph->path, priority); ccb_malloced = 1; } if (ccb == NULL) { xpt_print(periph->path, "unable to allocate CCB\n"); error = ENOMEM; break; } error = passsendccb(periph, ccb, inccb); if (ccb_malloced) xpt_free_ccb(ccb); else xpt_release_ccb(ccb); break; } case CAMIOQUEUE: { struct pass_io_req *io_req; union ccb **user_ccb, *ccb; xpt_opcode fc; if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0) { error = passcreatezone(periph); if (error != 0) goto bailout; } /* * We're going to do a blocking allocation for this I/O * request, so we have to drop the lock. */ cam_periph_unlock(periph); io_req = uma_zalloc(softc->pass_zone, M_WAITOK | M_ZERO); ccb = &io_req->ccb; user_ccb = (union ccb **)addr; /* * Unlike the CAMIOCOMMAND ioctl above, we only have a * pointer to the user's CCB, so we have to copy the whole * thing in to a buffer we have allocated (above) instead * of allowing the ioctl code to malloc a buffer and copy * it in. * * This is an advantage for this asynchronous interface, * since we don't want the memory to get freed while the * CCB is outstanding. */ #if 0 xpt_print(periph->path, "Copying user CCB %p to " "kernel address %p\n", *user_ccb, ccb); #endif error = copyin(*user_ccb, ccb, sizeof(*ccb)); if (error != 0) { xpt_print(periph->path, "Copy of user CCB %p to " "kernel address %p failed with error %d\n", *user_ccb, ccb, error); goto camioqueue_error; } #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) if (ccb->ccb_h.func_code == XPT_SCSI_IO) ccb->csio.bio = NULL; #endif if (ccb->ccb_h.flags & CAM_UNLOCKED) { error = EINVAL; goto camioqueue_error; } if (ccb->ccb_h.flags & CAM_CDB_POINTER) { if (ccb->csio.cdb_len > IOCDBLEN) { error = EINVAL; goto camioqueue_error; } error = copyin(ccb->csio.cdb_io.cdb_ptr, ccb->csio.cdb_io.cdb_bytes, ccb->csio.cdb_len); if (error != 0) goto camioqueue_error; ccb->ccb_h.flags &= ~CAM_CDB_POINTER; } /* * Some CCB types, like scan bus and scan lun can only go * through the transport layer device. */ if (ccb->ccb_h.func_code & XPT_FC_XPT_ONLY) { xpt_print(periph->path, "CCB function code %#x is " "restricted to the XPT device\n", ccb->ccb_h.func_code); error = ENODEV; goto camioqueue_error; } /* * Save the user's CCB pointer as well as his linked list * pointers and peripheral private area so that we can * restore these later. */ io_req->user_ccb_ptr = *user_ccb; io_req->user_periph_links = ccb->ccb_h.periph_links; io_req->user_periph_priv = ccb->ccb_h.periph_priv; /* * Now that we've saved the user's values, we can set our * own peripheral private entry. */ ccb->ccb_h.ccb_ioreq = io_req; /* Compatibility for RL/priority-unaware code. */ priority = ccb->ccb_h.pinfo.priority; if (priority <= CAM_PRIORITY_OOB) priority += CAM_PRIORITY_OOB + 1; /* * Setup fields in the CCB like the path and the priority. * The path in particular cannot be done in userland, since * it is a pointer to a kernel data structure. */ xpt_setup_ccb_flags(&ccb->ccb_h, periph->path, priority, ccb->ccb_h.flags); /* * Setup our done routine. There is no way for the user to * have a valid pointer here. */ ccb->ccb_h.cbfcnp = passdone; fc = ccb->ccb_h.func_code; /* * If this function code has memory that can be mapped in * or out, we need to call passmemsetup(). */ if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO) || (fc == XPT_SMP_IO) || (fc == XPT_DEV_MATCH) || (fc == XPT_DEV_ADVINFO) || (fc == XPT_NVME_ADMIN) || (fc == XPT_NVME_IO)) { error = passmemsetup(periph, io_req); if (error != 0) goto camioqueue_error; } else io_req->mapinfo.num_bufs_used = 0; cam_periph_lock(periph); /* * Everything goes on the incoming queue initially. */ TAILQ_INSERT_TAIL(&softc->incoming_queue, io_req, links); /* * If the CCB is queued, and is not a user CCB, then * we need to allocate a slot for it. Call xpt_schedule() * so that our start routine will get called when a CCB is * available. */ if ((fc & XPT_FC_QUEUED) && ((fc & XPT_FC_USER_CCB) == 0)) { xpt_schedule(periph, priority); break; } /* * At this point, the CCB in question is either an * immediate CCB (like XPT_DEV_ADVINFO) or it is a user CCB * and therefore should be malloced, not allocated via a slot. * Remove the CCB from the incoming queue and add it to the * active queue. */ TAILQ_REMOVE(&softc->incoming_queue, io_req, links); TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links); xpt_action(ccb); /* * If this is not a queued CCB (i.e. it is an immediate CCB), * then it is already done. We need to put it on the done * queue for the user to fetch. */ if ((fc & XPT_FC_QUEUED) == 0) { TAILQ_REMOVE(&softc->active_queue, io_req, links); TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links); } break; camioqueue_error: uma_zfree(softc->pass_zone, io_req); cam_periph_lock(periph); break; } case CAMIOGET: { union ccb **user_ccb; struct pass_io_req *io_req; int old_error; user_ccb = (union ccb **)addr; old_error = 0; io_req = TAILQ_FIRST(&softc->done_queue); if (io_req == NULL) { error = ENOENT; break; } /* * Remove the I/O from the done queue. */ TAILQ_REMOVE(&softc->done_queue, io_req, links); /* * We have to drop the lock during the copyout because the * copyout can result in VM faults that require sleeping. */ cam_periph_unlock(periph); /* * Do any needed copies (e.g. for reads) and revert the * pointers in the CCB back to the user's pointers. */ error = passmemdone(periph, io_req); old_error = error; io_req->ccb.ccb_h.periph_links = io_req->user_periph_links; io_req->ccb.ccb_h.periph_priv = io_req->user_periph_priv; #if 0 xpt_print(periph->path, "Copying to user CCB %p from " "kernel address %p\n", *user_ccb, &io_req->ccb); #endif error = copyout(&io_req->ccb, *user_ccb, sizeof(union ccb)); if (error != 0) { xpt_print(periph->path, "Copy to user CCB %p from " "kernel address %p failed with error %d\n", *user_ccb, &io_req->ccb, error); } /* * Prefer the first error we got back, and make sure we * don't overwrite bad status with good. */ if (old_error != 0) error = old_error; cam_periph_lock(periph); /* * At this point, if there was an error, we could potentially * re-queue the I/O and try again. But why? The error * would almost certainly happen again. We might as well * not leak memory. */ uma_zfree(softc->pass_zone, io_req); break; } default: error = cam_periph_ioctl(periph, cmd, addr, passerror); break; } bailout: cam_periph_unlock(periph); return(error); } static int passpoll(struct cdev *dev, int poll_events, struct thread *td) { struct cam_periph *periph; struct pass_softc *softc; int revents; periph = (struct cam_periph *)dev->si_drv1; softc = (struct pass_softc *)periph->softc; revents = poll_events & (POLLOUT | POLLWRNORM); if ((poll_events & (POLLIN | POLLRDNORM)) != 0) { cam_periph_lock(periph); if (!TAILQ_EMPTY(&softc->done_queue)) { revents |= poll_events & (POLLIN | POLLRDNORM); } cam_periph_unlock(periph); if (revents == 0) selrecord(td, &softc->read_select); } return (revents); } static int passkqfilter(struct cdev *dev, struct knote *kn) { struct cam_periph *periph; struct pass_softc *softc; periph = (struct cam_periph *)dev->si_drv1; softc = (struct pass_softc *)periph->softc; kn->kn_hook = (caddr_t)periph; kn->kn_fop = &passread_filtops; knlist_add(&softc->read_select.si_note, kn, 0); return (0); } static void passreadfiltdetach(struct knote *kn) { struct cam_periph *periph; struct pass_softc *softc; periph = (struct cam_periph *)kn->kn_hook; softc = (struct pass_softc *)periph->softc; knlist_remove(&softc->read_select.si_note, kn, 0); } static int passreadfilt(struct knote *kn, long hint) { struct cam_periph *periph; struct pass_softc *softc; int retval; periph = (struct cam_periph *)kn->kn_hook; softc = (struct pass_softc *)periph->softc; cam_periph_assert(periph, MA_OWNED); if (TAILQ_EMPTY(&softc->done_queue)) retval = 0; else retval = 1; return (retval); } /* * Generally, "ccb" should be the CCB supplied by the kernel. "inccb" * should be the CCB that is copied in from the user. */ static int passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb) { struct pass_softc *softc; struct cam_periph_map_info mapinfo; uint8_t *cmd; xpt_opcode fc; int error; softc = (struct pass_softc *)periph->softc; /* * There are some fields in the CCB header that need to be * preserved, the rest we get from the user. */ xpt_merge_ccb(ccb, inccb); if (ccb->ccb_h.flags & CAM_CDB_POINTER) { cmd = __builtin_alloca(ccb->csio.cdb_len); error = copyin(ccb->csio.cdb_io.cdb_ptr, cmd, ccb->csio.cdb_len); if (error) return (error); ccb->csio.cdb_io.cdb_ptr = cmd; } /* */ ccb->ccb_h.cbfcnp = passdone; /* * Let cam_periph_mapmem do a sanity check on the data pointer format. * Even if no data transfer is needed, it's a cheap check and it * simplifies the code. */ fc = ccb->ccb_h.func_code; if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO) || (fc == XPT_SMP_IO) || (fc == XPT_DEV_MATCH) || (fc == XPT_DEV_ADVINFO) || (fc == XPT_MMC_IO)) { bzero(&mapinfo, sizeof(mapinfo)); /* * cam_periph_mapmem calls into proc and vm functions that can * sleep as well as trigger I/O, so we can't hold the lock. * Dropping it here is reasonably safe. */ cam_periph_unlock(periph); error = cam_periph_mapmem(ccb, &mapinfo, softc->maxio); cam_periph_lock(periph); /* * cam_periph_mapmem returned an error, we can't continue. * Return the error to the user. */ if (error) return(error); } else /* Ensure that the unmap call later on is a no-op. */ mapinfo.num_bufs_used = 0; /* * If the user wants us to perform any error recovery, then honor * that request. Otherwise, it's up to the user to perform any * error recovery. */ cam_periph_runccb(ccb, (ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER) ? passerror : NULL, /* cam_flags */ CAM_RETRY_SELTO, /* sense_flags */ SF_RETRY_UA | SF_NO_PRINT, softc->device_stats); cam_periph_unmapmem(ccb, &mapinfo); ccb->ccb_h.cbfcnp = NULL; ccb->ccb_h.periph_priv = inccb->ccb_h.periph_priv; bcopy(ccb, inccb, sizeof(union ccb)); return(0); } static int passerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags) { struct cam_periph *periph; struct pass_softc *softc; periph = xpt_path_periph(ccb->ccb_h.path); softc = (struct pass_softc *)periph->softc; return(cam_periph_error(ccb, cam_flags, sense_flags, &softc->saved_ccb)); } Index: head/sys/dev/nvme/nvme_private.h =================================================================== --- head/sys/dev/nvme/nvme_private.h (revision 322996) +++ head/sys/dev/nvme/nvme_private.h (revision 322997) @@ -1,531 +1,546 @@ /*- * Copyright (C) 2012-2014 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __NVME_PRIVATE_H__ #define __NVME_PRIVATE_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include "nvme.h" #define DEVICE2SOFTC(dev) ((struct nvme_controller *) device_get_softc(dev)) MALLOC_DECLARE(M_NVME); #define IDT32_PCI_ID 0x80d0111d /* 32 channel board */ #define IDT8_PCI_ID 0x80d2111d /* 8 channel board */ /* * For commands requiring more than 2 PRP entries, one PRP will be * embedded in the command (prp1), and the rest of the PRP entries * will be in a list pointed to by the command (prp2). This means * that real max number of PRP entries we support is 32+1, which * results in a max xfer size of 32*PAGE_SIZE. */ #define NVME_MAX_PRP_LIST_ENTRIES (NVME_MAX_XFER_SIZE / PAGE_SIZE) #define NVME_ADMIN_TRACKERS (16) #define NVME_ADMIN_ENTRIES (128) /* min and max are defined in admin queue attributes section of spec */ #define NVME_MIN_ADMIN_ENTRIES (2) #define NVME_MAX_ADMIN_ENTRIES (4096) /* * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion * queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we * will allow outstanding on an I/O qpair at any time. The only advantage in * having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping * the contents of the submission and completion queues, it will show a longer * history of data. */ #define NVME_IO_ENTRIES (256) #define NVME_IO_TRACKERS (128) #define NVME_MIN_IO_TRACKERS (4) #define NVME_MAX_IO_TRACKERS (1024) /* * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES * for each controller. */ #define NVME_INT_COAL_TIME (0) /* disabled */ #define NVME_INT_COAL_THRESHOLD (0) /* 0-based */ #define NVME_MAX_NAMESPACES (16) #define NVME_MAX_CONSUMERS (2) #define NVME_MAX_ASYNC_EVENTS (8) #define NVME_DEFAULT_TIMEOUT_PERIOD (30) /* in seconds */ #define NVME_MIN_TIMEOUT_PERIOD (5) #define NVME_MAX_TIMEOUT_PERIOD (120) #define NVME_DEFAULT_RETRY_COUNT (4) /* Maximum log page size to fetch for AERs. */ #define NVME_MAX_AER_LOG_SIZE (4096) /* * Define CACHE_LINE_SIZE here for older FreeBSD versions that do not define * it. */ #ifndef CACHE_LINE_SIZE #define CACHE_LINE_SIZE (64) #endif /* * Use presence of the BIO_UNMAPPED flag to determine whether unmapped I/O * support and the bus_dmamap_load_bio API are available on the target * kernel. This will ease porting back to earlier stable branches at a * later point. */ #ifdef BIO_UNMAPPED #define NVME_UNMAPPED_BIO_SUPPORT #endif extern uma_zone_t nvme_request_zone; extern int32_t nvme_retry_count; struct nvme_completion_poll_status { struct nvme_completion cpl; boolean_t done; }; #define NVME_REQUEST_VADDR 1 #define NVME_REQUEST_NULL 2 /* For requests with no payload. */ #define NVME_REQUEST_UIO 3 #ifdef NVME_UNMAPPED_BIO_SUPPORT #define NVME_REQUEST_BIO 4 #endif +#define NVME_REQUEST_CCB 5 struct nvme_request { struct nvme_command cmd; struct nvme_qpair *qpair; union { void *payload; struct bio *bio; } u; uint32_t type; uint32_t payload_size; boolean_t timeout; nvme_cb_fn_t cb_fn; void *cb_arg; int32_t retries; STAILQ_ENTRY(nvme_request) stailq; }; struct nvme_async_event_request { struct nvme_controller *ctrlr; struct nvme_request *req; struct nvme_completion cpl; uint32_t log_page_id; uint32_t log_page_size; uint8_t log_page_buffer[NVME_MAX_AER_LOG_SIZE]; }; struct nvme_tracker { TAILQ_ENTRY(nvme_tracker) tailq; struct nvme_request *req; struct nvme_qpair *qpair; struct callout timer; bus_dmamap_t payload_dma_map; uint16_t cid; uint64_t *prp; bus_addr_t prp_bus_addr; }; struct nvme_qpair { struct nvme_controller *ctrlr; uint32_t id; uint32_t phase; uint16_t vector; int rid; struct resource *res; void *tag; uint32_t num_entries; uint32_t num_trackers; uint32_t sq_tdbl_off; uint32_t cq_hdbl_off; uint32_t sq_head; uint32_t sq_tail; uint32_t cq_head; int64_t num_cmds; int64_t num_intr_handler_calls; struct nvme_command *cmd; struct nvme_completion *cpl; bus_dma_tag_t dma_tag; bus_dma_tag_t dma_tag_payload; bus_dmamap_t queuemem_map; uint64_t cmd_bus_addr; uint64_t cpl_bus_addr; TAILQ_HEAD(, nvme_tracker) free_tr; TAILQ_HEAD(, nvme_tracker) outstanding_tr; STAILQ_HEAD(, nvme_request) queued_req; struct nvme_tracker **act_tr; boolean_t is_enabled; struct mtx lock __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); struct nvme_namespace { struct nvme_controller *ctrlr; struct nvme_namespace_data data; uint32_t id; uint32_t flags; struct cdev *cdev; void *cons_cookie[NVME_MAX_CONSUMERS]; uint32_t stripesize; struct mtx lock; }; /* * One of these per allocated PCI device. */ struct nvme_controller { device_t dev; struct mtx lock; uint32_t ready_timeout_in_ms; bus_space_tag_t bus_tag; bus_space_handle_t bus_handle; int resource_id; struct resource *resource; /* * The NVMe spec allows for the MSI-X table to be placed in BAR 4/5, * separate from the control registers which are in BAR 0/1. These * members track the mapping of BAR 4/5 for that reason. */ int bar4_resource_id; struct resource *bar4_resource; uint32_t msix_enabled; uint32_t force_intx; uint32_t enable_aborts; uint32_t num_io_queues; uint32_t num_cpus_per_ioq; uint32_t max_hw_pend_io; /* Fields for tracking progress during controller initialization. */ struct intr_config_hook config_hook; uint32_t ns_identified; uint32_t queues_created; struct task reset_task; struct task fail_req_task; struct taskqueue *taskqueue; /* For shared legacy interrupt. */ int rid; struct resource *res; void *tag; bus_dma_tag_t hw_desc_tag; bus_dmamap_t hw_desc_map; /** maximum i/o size in bytes */ uint32_t max_xfer_size; /** minimum page size supported by this controller in bytes */ uint32_t min_page_size; /** interrupt coalescing time period (in microseconds) */ uint32_t int_coal_time; /** interrupt coalescing threshold */ uint32_t int_coal_threshold; /** timeout period in seconds */ uint32_t timeout_period; struct nvme_qpair adminq; struct nvme_qpair *ioq; struct nvme_registers *regs; struct nvme_controller_data cdata; struct nvme_namespace ns[NVME_MAX_NAMESPACES]; struct cdev *cdev; /** bit mask of warning types currently enabled for async events */ union nvme_critical_warning_state async_event_config; uint32_t num_aers; struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; void *cons_cookie[NVME_MAX_CONSUMERS]; uint32_t is_resetting; uint32_t is_initialized; uint32_t notification_sent; boolean_t is_failed; STAILQ_HEAD(, nvme_request) fail_req; }; #define nvme_mmio_offsetof(reg) \ offsetof(struct nvme_registers, reg) #define nvme_mmio_read_4(sc, reg) \ bus_space_read_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg)) #define nvme_mmio_write_4(sc, reg, val) \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val) #define nvme_mmio_write_8(sc, reg, val) \ do { \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg)+4, \ (val & 0xFFFFFFFF00000000UL) >> 32); \ } while (0); #if __FreeBSD_version < 800054 #define wmb() __asm volatile("sfence" ::: "memory") #define mb() __asm volatile("mfence" ::: "memory") #endif #define nvme_printf(ctrlr, fmt, args...) \ device_printf(ctrlr->dev, fmt, ##args) void nvme_ns_test(struct nvme_namespace *ns, u_long cmd, caddr_t arg); void nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr, void *payload, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr, uint32_t nsid, void *payload, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr, uint32_t microseconds, uint32_t threshold, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr, struct nvme_error_information_entry *payload, uint32_t num_entries, /* 0 = max */ nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr, uint32_t nsid, struct nvme_health_information_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr, struct nvme_firmware_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, uint16_t vector, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr, struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr, uint32_t num_queues, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr, union nvme_critical_warning_state state, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid, uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl); int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev); void nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev); void nvme_ctrlr_shutdown(struct nvme_controller *ctrlr); int nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr); void nvme_ctrlr_reset(struct nvme_controller *ctrlr); /* ctrlr defined as void * to allow use with config_intrhook. */ void nvme_ctrlr_start_config_hook(void *ctrlr_arg); void nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr, struct nvme_request *req); void nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr, struct nvme_request *req); void nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr, struct nvme_request *req); int nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, uint32_t num_trackers, struct nvme_controller *ctrlr); void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr); void nvme_qpair_process_completions(struct nvme_qpair *qpair); void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req); void nvme_qpair_reset(struct nvme_qpair *qpair); void nvme_qpair_fail(struct nvme_qpair *qpair); void nvme_qpair_manual_complete_request(struct nvme_qpair *qpair, struct nvme_request *req, uint32_t sct, uint32_t sc, boolean_t print_on_error); void nvme_admin_qpair_enable(struct nvme_qpair *qpair); void nvme_admin_qpair_disable(struct nvme_qpair *qpair); void nvme_admin_qpair_destroy(struct nvme_qpair *qpair); void nvme_io_qpair_enable(struct nvme_qpair *qpair); void nvme_io_qpair_disable(struct nvme_qpair *qpair); void nvme_io_qpair_destroy(struct nvme_qpair *qpair); int nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, struct nvme_controller *ctrlr); void nvme_ns_destruct(struct nvme_namespace *ns); void nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr); void nvme_dump_command(struct nvme_command *cmd); void nvme_dump_completion(struct nvme_completion *cpl); static __inline void nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { uint64_t *bus_addr = (uint64_t *)arg; if (error != 0) printf("nvme_single_map err %d\n", error); *bus_addr = seg[0].ds_addr; } static __inline struct nvme_request * _nvme_allocate_request(nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO); if (req != NULL) { req->cb_fn = cb_fn; req->cb_arg = cb_arg; req->timeout = TRUE; } return (req); } static __inline struct nvme_request * nvme_allocate_request_vaddr(void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; req = _nvme_allocate_request(cb_fn, cb_arg); if (req != NULL) { req->type = NVME_REQUEST_VADDR; req->u.payload = payload; req->payload_size = payload_size; } return (req); } static __inline struct nvme_request * nvme_allocate_request_null(nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; req = _nvme_allocate_request(cb_fn, cb_arg); if (req != NULL) req->type = NVME_REQUEST_NULL; return (req); } static __inline struct nvme_request * nvme_allocate_request_bio(struct bio *bio, nvme_cb_fn_t cb_fn, void *cb_arg) { struct nvme_request *req; req = _nvme_allocate_request(cb_fn, cb_arg); if (req != NULL) { #ifdef NVME_UNMAPPED_BIO_SUPPORT req->type = NVME_REQUEST_BIO; req->u.bio = bio; #else req->type = NVME_REQUEST_VADDR; req->u.payload = bio->bio_data; req->payload_size = bio->bio_bcount; #endif } + return (req); +} + +static __inline struct nvme_request * +nvme_allocate_request_ccb(union ccb *ccb, nvme_cb_fn_t cb_fn, void *cb_arg) +{ + struct nvme_request *req; + + req = _nvme_allocate_request(cb_fn, cb_arg); + if (req != NULL) { + req->type = NVME_REQUEST_CCB; + req->u.payload = ccb; + } + return (req); } #define nvme_free_request(req) uma_zfree(nvme_request_zone, req) void nvme_notify_async_consumers(struct nvme_controller *ctrlr, const struct nvme_completion *async_cpl, uint32_t log_page_id, void *log_page_buffer, uint32_t log_page_size); void nvme_notify_fail_consumers(struct nvme_controller *ctrlr); void nvme_notify_new_controller(struct nvme_controller *ctrlr); void nvme_ctrlr_intx_handler(void *arg); #endif /* __NVME_PRIVATE_H__ */ Index: head/sys/dev/nvme/nvme_qpair.c =================================================================== --- head/sys/dev/nvme/nvme_qpair.c (revision 322996) +++ head/sys/dev/nvme/nvme_qpair.c (revision 322997) @@ -1,1034 +1,1042 @@ /*- * Copyright (C) 2012-2014 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "nvme_private.h" static void _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req); static void nvme_qpair_destroy(struct nvme_qpair *qpair); struct nvme_opcode_string { uint16_t opc; const char * str; }; static struct nvme_opcode_string admin_opcode[] = { { NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" }, { NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" }, { NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" }, { NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" }, { NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" }, { NVME_OPC_IDENTIFY, "IDENTIFY" }, { NVME_OPC_ABORT, "ABORT" }, { NVME_OPC_SET_FEATURES, "SET FEATURES" }, { NVME_OPC_GET_FEATURES, "GET FEATURES" }, { NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" }, { NVME_OPC_FIRMWARE_ACTIVATE, "FIRMWARE ACTIVATE" }, { NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" }, { NVME_OPC_FORMAT_NVM, "FORMAT NVM" }, { NVME_OPC_SECURITY_SEND, "SECURITY SEND" }, { NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" }, { 0xFFFF, "ADMIN COMMAND" } }; static struct nvme_opcode_string io_opcode[] = { { NVME_OPC_FLUSH, "FLUSH" }, { NVME_OPC_WRITE, "WRITE" }, { NVME_OPC_READ, "READ" }, { NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" }, { NVME_OPC_COMPARE, "COMPARE" }, { NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" }, { 0xFFFF, "IO COMMAND" } }; static const char * get_admin_opcode_string(uint16_t opc) { struct nvme_opcode_string *entry; entry = admin_opcode; while (entry->opc != 0xFFFF) { if (entry->opc == opc) return (entry->str); entry++; } return (entry->str); } static const char * get_io_opcode_string(uint16_t opc) { struct nvme_opcode_string *entry; entry = io_opcode; while (entry->opc != 0xFFFF) { if (entry->opc == opc) return (entry->str); entry++; } return (entry->str); } static void nvme_admin_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd) { nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%x " "cdw10:%08x cdw11:%08x\n", get_admin_opcode_string(cmd->opc), cmd->opc, qpair->id, cmd->cid, cmd->nsid, cmd->cdw10, cmd->cdw11); } static void nvme_io_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd) { switch (cmd->opc) { case NVME_OPC_WRITE: case NVME_OPC_READ: case NVME_OPC_WRITE_UNCORRECTABLE: case NVME_OPC_COMPARE: nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d " "lba:%llu len:%d\n", get_io_opcode_string(cmd->opc), qpair->id, cmd->cid, cmd->nsid, ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10, (cmd->cdw12 & 0xFFFF) + 1); break; case NVME_OPC_FLUSH: case NVME_OPC_DATASET_MANAGEMENT: nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d\n", get_io_opcode_string(cmd->opc), qpair->id, cmd->cid, cmd->nsid); break; default: nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%d\n", get_io_opcode_string(cmd->opc), cmd->opc, qpair->id, cmd->cid, cmd->nsid); break; } } static void nvme_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd) { if (qpair->id == 0) nvme_admin_qpair_print_command(qpair, cmd); else nvme_io_qpair_print_command(qpair, cmd); } struct nvme_status_string { uint16_t sc; const char * str; }; static struct nvme_status_string generic_status[] = { { NVME_SC_SUCCESS, "SUCCESS" }, { NVME_SC_INVALID_OPCODE, "INVALID OPCODE" }, { NVME_SC_INVALID_FIELD, "INVALID_FIELD" }, { NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" }, { NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" }, { NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" }, { NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" }, { NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" }, { NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" }, { NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" }, { NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" }, { NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" }, { NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" }, { NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" }, { NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" }, { NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" }, { 0xFFFF, "GENERIC" } }; static struct nvme_status_string command_specific_status[] = { { NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" }, { NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" }, { NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" }, { NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" }, { NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" }, { NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" }, { NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" }, { NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" }, { NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" }, { NVME_SC_INVALID_FORMAT, "INVALID FORMAT" }, { NVME_SC_FIRMWARE_REQUIRES_RESET, "FIRMWARE REQUIRES RESET" }, { NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" }, { NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" }, { NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" }, { 0xFFFF, "COMMAND SPECIFIC" } }; static struct nvme_status_string media_error_status[] = { { NVME_SC_WRITE_FAULTS, "WRITE FAULTS" }, { NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" }, { NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" }, { NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" }, { NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" }, { NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" }, { NVME_SC_ACCESS_DENIED, "ACCESS DENIED" }, { 0xFFFF, "MEDIA ERROR" } }; static const char * get_status_string(uint16_t sct, uint16_t sc) { struct nvme_status_string *entry; switch (sct) { case NVME_SCT_GENERIC: entry = generic_status; break; case NVME_SCT_COMMAND_SPECIFIC: entry = command_specific_status; break; case NVME_SCT_MEDIA_ERROR: entry = media_error_status; break; case NVME_SCT_VENDOR_SPECIFIC: return ("VENDOR SPECIFIC"); default: return ("RESERVED"); } while (entry->sc != 0xFFFF) { if (entry->sc == sc) return (entry->str); entry++; } return (entry->str); } static void nvme_qpair_print_completion(struct nvme_qpair *qpair, struct nvme_completion *cpl) { nvme_printf(qpair->ctrlr, "%s (%02x/%02x) sqid:%d cid:%d cdw0:%x\n", get_status_string(cpl->status.sct, cpl->status.sc), cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0); } static boolean_t nvme_completion_is_retry(const struct nvme_completion *cpl) { /* * TODO: spec is not clear how commands that are aborted due * to TLER will be marked. So for now, it seems * NAMESPACE_NOT_READY is the only case where we should * look at the DNR bit. */ switch (cpl->status.sct) { case NVME_SCT_GENERIC: switch (cpl->status.sc) { case NVME_SC_ABORTED_BY_REQUEST: case NVME_SC_NAMESPACE_NOT_READY: if (cpl->status.dnr) return (0); else return (1); case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: case NVME_SC_COMMAND_ID_CONFLICT: case NVME_SC_DATA_TRANSFER_ERROR: case NVME_SC_ABORTED_POWER_LOSS: case NVME_SC_INTERNAL_DEVICE_ERROR: case NVME_SC_ABORTED_SQ_DELETION: case NVME_SC_ABORTED_FAILED_FUSED: case NVME_SC_ABORTED_MISSING_FUSED: case NVME_SC_INVALID_NAMESPACE_OR_FORMAT: case NVME_SC_COMMAND_SEQUENCE_ERROR: case NVME_SC_LBA_OUT_OF_RANGE: case NVME_SC_CAPACITY_EXCEEDED: default: return (0); } case NVME_SCT_COMMAND_SPECIFIC: case NVME_SCT_MEDIA_ERROR: case NVME_SCT_VENDOR_SPECIFIC: default: return (0); } } static void nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, struct nvme_completion *cpl, boolean_t print_on_error) { struct nvme_request *req; boolean_t retry, error; req = tr->req; error = nvme_completion_is_error(cpl); retry = error && nvme_completion_is_retry(cpl) && req->retries < nvme_retry_count; if (error && print_on_error) { nvme_qpair_print_command(qpair, &req->cmd); nvme_qpair_print_completion(qpair, cpl); } qpair->act_tr[cpl->cid] = NULL; KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n")); if (req->cb_fn && !retry) req->cb_fn(req->cb_arg, cpl); mtx_lock(&qpair->lock); callout_stop(&tr->timer); if (retry) { req->retries++; nvme_qpair_submit_tracker(qpair, tr); } else { if (req->type != NVME_REQUEST_NULL) bus_dmamap_unload(qpair->dma_tag_payload, tr->payload_dma_map); nvme_free_request(req); tr->req = NULL; TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq); TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); /* * If the controller is in the middle of resetting, don't * try to submit queued requests here - let the reset logic * handle that instead. */ if (!STAILQ_EMPTY(&qpair->queued_req) && !qpair->ctrlr->is_resetting) { req = STAILQ_FIRST(&qpair->queued_req); STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); _nvme_qpair_submit_request(qpair, req); } } mtx_unlock(&qpair->lock); } static void nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, boolean_t print_on_error) { struct nvme_completion cpl; memset(&cpl, 0, sizeof(cpl)); cpl.sqid = qpair->id; cpl.cid = tr->cid; cpl.status.sct = sct; cpl.status.sc = sc; cpl.status.dnr = dnr; nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); } void nvme_qpair_manual_complete_request(struct nvme_qpair *qpair, struct nvme_request *req, uint32_t sct, uint32_t sc, boolean_t print_on_error) { struct nvme_completion cpl; boolean_t error; memset(&cpl, 0, sizeof(cpl)); cpl.sqid = qpair->id; cpl.status.sct = sct; cpl.status.sc = sc; error = nvme_completion_is_error(&cpl); if (error && print_on_error) { nvme_qpair_print_command(qpair, &req->cmd); nvme_qpair_print_completion(qpair, &cpl); } if (req->cb_fn) req->cb_fn(req->cb_arg, &cpl); nvme_free_request(req); } void nvme_qpair_process_completions(struct nvme_qpair *qpair) { struct nvme_tracker *tr; struct nvme_completion *cpl; qpair->num_intr_handler_calls++; if (!qpair->is_enabled) /* * qpair is not enabled, likely because a controller reset is * is in progress. Ignore the interrupt - any I/O that was * associated with this interrupt will get retried when the * reset is complete. */ return; while (1) { cpl = &qpair->cpl[qpair->cq_head]; if (cpl->status.p != qpair->phase) break; tr = qpair->act_tr[cpl->cid]; if (tr != NULL) { nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE); qpair->sq_head = cpl->sqhd; } else { nvme_printf(qpair->ctrlr, "cpl does not map to outstanding cmd\n"); nvme_dump_completion(cpl); KASSERT(0, ("received completion for unknown cmd\n")); } if (++qpair->cq_head == qpair->num_entries) { qpair->cq_head = 0; qpair->phase = !qpair->phase; } nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl, qpair->cq_head); } } static void nvme_qpair_msix_handler(void *arg) { struct nvme_qpair *qpair = arg; nvme_qpair_process_completions(qpair); } int nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, uint32_t num_trackers, struct nvme_controller *ctrlr) { struct nvme_tracker *tr; size_t cmdsz, cplsz, prpsz, allocsz, prpmemsz; uint64_t queuemem_phys, prpmem_phys, list_phys; uint8_t *queuemem, *prpmem, *prp_list; int i, err; qpair->id = id; qpair->vector = vector; qpair->num_entries = num_entries; qpair->num_trackers = num_trackers; qpair->ctrlr = ctrlr; if (ctrlr->msix_enabled) { /* * MSI-X vector resource IDs start at 1, so we add one to * the queue's vector to get the corresponding rid to use. */ qpair->rid = vector + 1; qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ, &qpair->rid, RF_ACTIVE); bus_setup_intr(ctrlr->dev, qpair->res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_qpair_msix_handler, qpair, &qpair->tag); } mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF); /* Note: NVMe PRP format is restricted to 4-byte alignment. */ err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), 4, PAGE_SIZE, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE, (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0, NULL, NULL, &qpair->dma_tag_payload); if (err != 0) { nvme_printf(ctrlr, "payload tag create failed %d\n", err); goto out; } /* * Each component must be page aligned, and individual PRP lists * cannot cross a page boundary. */ cmdsz = qpair->num_entries * sizeof(struct nvme_command); cmdsz = roundup2(cmdsz, PAGE_SIZE); cplsz = qpair->num_entries * sizeof(struct nvme_completion); cplsz = roundup2(cplsz, PAGE_SIZE); prpsz = sizeof(uint64_t) * NVME_MAX_PRP_LIST_ENTRIES;; prpmemsz = qpair->num_trackers * prpsz; allocsz = cmdsz + cplsz + prpmemsz; err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, allocsz, 1, allocsz, 0, NULL, NULL, &qpair->dma_tag); if (err != 0) { nvme_printf(ctrlr, "tag create failed %d\n", err); goto out; } if (bus_dmamem_alloc(qpair->dma_tag, (void **)&queuemem, BUS_DMA_NOWAIT, &qpair->queuemem_map)) { nvme_printf(ctrlr, "failed to alloc qpair memory\n"); goto out; } if (bus_dmamap_load(qpair->dma_tag, qpair->queuemem_map, queuemem, allocsz, nvme_single_map, &queuemem_phys, 0) != 0) { nvme_printf(ctrlr, "failed to load qpair memory\n"); goto out; } qpair->num_cmds = 0; qpair->num_intr_handler_calls = 0; qpair->cmd = (struct nvme_command *)queuemem; qpair->cpl = (struct nvme_completion *)(queuemem + cmdsz); prpmem = (uint8_t *)(queuemem + cmdsz + cplsz); qpair->cmd_bus_addr = queuemem_phys; qpair->cpl_bus_addr = queuemem_phys + cmdsz; prpmem_phys = queuemem_phys + cmdsz + cplsz; qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl); qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl); TAILQ_INIT(&qpair->free_tr); TAILQ_INIT(&qpair->outstanding_tr); STAILQ_INIT(&qpair->queued_req); list_phys = prpmem_phys; prp_list = prpmem; for (i = 0; i < qpair->num_trackers; i++) { if (list_phys + prpsz > prpmem_phys + prpmemsz) { qpair->num_trackers = i; break; } /* * Make sure that the PRP list for this tracker doesn't * overflow to another page. */ if (trunc_page(list_phys) != trunc_page(list_phys + prpsz - 1)) { list_phys = roundup2(list_phys, PAGE_SIZE); prp_list = (uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE); } tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_WAITOK); bus_dmamap_create(qpair->dma_tag_payload, 0, &tr->payload_dma_map); callout_init(&tr->timer, 1); tr->cid = i; tr->qpair = qpair; tr->prp = (uint64_t *)prp_list; tr->prp_bus_addr = list_phys; TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq); list_phys += prpsz; prp_list += prpsz; } if (qpair->num_trackers == 0) { nvme_printf(ctrlr, "failed to allocate enough trackers\n"); goto out; } qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries, M_NVME, M_ZERO | M_WAITOK); return (0); out: nvme_qpair_destroy(qpair); return (ENOMEM); } static void nvme_qpair_destroy(struct nvme_qpair *qpair) { struct nvme_tracker *tr; if (qpair->tag) bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag); if (mtx_initialized(&qpair->lock)) mtx_destroy(&qpair->lock); if (qpair->res) bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ, rman_get_rid(qpair->res), qpair->res); if (qpair->cmd != NULL) { bus_dmamap_unload(qpair->dma_tag, qpair->queuemem_map); bus_dmamem_free(qpair->dma_tag, qpair->cmd, qpair->queuemem_map); } if (qpair->dma_tag) bus_dma_tag_destroy(qpair->dma_tag); if (qpair->dma_tag_payload) bus_dma_tag_destroy(qpair->dma_tag_payload); if (qpair->act_tr) free(qpair->act_tr, M_NVME); while (!TAILQ_EMPTY(&qpair->free_tr)) { tr = TAILQ_FIRST(&qpair->free_tr); TAILQ_REMOVE(&qpair->free_tr, tr, tailq); bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map); free(tr, M_NVME); } } static void nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair) { struct nvme_tracker *tr; tr = TAILQ_FIRST(&qpair->outstanding_tr); while (tr != NULL) { if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0, FALSE); tr = TAILQ_FIRST(&qpair->outstanding_tr); } else { tr = TAILQ_NEXT(tr, tailq); } } } void nvme_admin_qpair_destroy(struct nvme_qpair *qpair) { nvme_admin_qpair_abort_aers(qpair); nvme_qpair_destroy(qpair); } void nvme_io_qpair_destroy(struct nvme_qpair *qpair) { nvme_qpair_destroy(qpair); } static void nvme_abort_complete(void *arg, const struct nvme_completion *status) { struct nvme_tracker *tr = arg; /* * If cdw0 == 1, the controller was not able to abort the command * we requested. We still need to check the active tracker array, * to cover race where I/O timed out at same time controller was * completing the I/O. */ if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) { /* * An I/O has timed out, and the controller was unable to * abort it for some reason. Construct a fake completion * status, and then complete the I/O's tracker manually. */ nvme_printf(tr->qpair->ctrlr, "abort command failed, aborting command manually\n"); nvme_qpair_manual_complete_tracker(tr->qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, TRUE); } } static void nvme_timeout(void *arg) { struct nvme_tracker *tr = arg; struct nvme_qpair *qpair = tr->qpair; struct nvme_controller *ctrlr = qpair->ctrlr; union csts_register csts; /* Read csts to get value of cfs - controller fatal status. */ csts.raw = nvme_mmio_read_4(ctrlr, csts); if (ctrlr->enable_aborts && csts.bits.cfs == 0) { /* * If aborts are enabled, only use them if the controller is * not reporting fatal status. */ nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, nvme_abort_complete, tr); } else nvme_ctrlr_reset(ctrlr); } void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) { struct nvme_request *req; struct nvme_controller *ctrlr; mtx_assert(&qpair->lock, MA_OWNED); req = tr->req; req->cmd.cid = tr->cid; qpair->act_tr[tr->cid] = tr; ctrlr = qpair->ctrlr; if (req->timeout) #if __FreeBSD_version >= 800030 callout_reset_curcpu(&tr->timer, ctrlr->timeout_period * hz, nvme_timeout, tr); #else callout_reset(&tr->timer, ctrlr->timeout_period * hz, nvme_timeout, tr); #endif /* Copy the command from the tracker to the submission queue. */ memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd)); if (++qpair->sq_tail == qpair->num_entries) qpair->sq_tail = 0; wmb(); nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl, qpair->sq_tail); qpair->num_cmds++; } static void nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { struct nvme_tracker *tr = arg; uint32_t cur_nseg; /* * If the mapping operation failed, return immediately. The caller * is responsible for detecting the error status and failing the * tracker manually. */ if (error != 0) { nvme_printf(tr->qpair->ctrlr, "nvme_payload_map err %d\n", error); return; } /* * Note that we specified PAGE_SIZE for alignment and max * segment size when creating the bus dma tags. So here * we can safely just transfer each segment to its * associated PRP entry. */ tr->req->cmd.prp1 = seg[0].ds_addr; if (nseg == 2) { tr->req->cmd.prp2 = seg[1].ds_addr; } else if (nseg > 2) { cur_nseg = 1; tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr; while (cur_nseg < nseg) { tr->prp[cur_nseg-1] = (uint64_t)seg[cur_nseg].ds_addr; cur_nseg++; } } else { /* * prp2 should not be used by the controller * since there is only one segment, but set * to 0 just to be safe. */ tr->req->cmd.prp2 = 0; } nvme_qpair_submit_tracker(tr->qpair, tr); } static void _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) { struct nvme_tracker *tr; int err = 0; mtx_assert(&qpair->lock, MA_OWNED); tr = TAILQ_FIRST(&qpair->free_tr); req->qpair = qpair; if (tr == NULL || !qpair->is_enabled) { /* * No tracker is available, or the qpair is disabled due to * an in-progress controller-level reset or controller * failure. */ if (qpair->ctrlr->is_failed) { /* * The controller has failed. Post the request to a * task where it will be aborted, so that we do not * invoke the request's callback in the context * of the submission. */ nvme_ctrlr_post_failed_request(qpair->ctrlr, req); } else { /* * Put the request on the qpair's request queue to be * processed when a tracker frees up via a command * completion or when the controller reset is * completed. */ STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); } return; } TAILQ_REMOVE(&qpair->free_tr, tr, tailq); TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq); tr->req = req; switch (req->type) { case NVME_REQUEST_VADDR: KASSERT(req->payload_size <= qpair->ctrlr->max_xfer_size, ("payload_size (%d) exceeds max_xfer_size (%d)\n", req->payload_size, qpair->ctrlr->max_xfer_size)); err = bus_dmamap_load(tr->qpair->dma_tag_payload, tr->payload_dma_map, req->u.payload, req->payload_size, nvme_payload_map, tr, 0); if (err != 0) nvme_printf(qpair->ctrlr, "bus_dmamap_load returned 0x%x!\n", err); break; case NVME_REQUEST_NULL: nvme_qpair_submit_tracker(tr->qpair, tr); break; #ifdef NVME_UNMAPPED_BIO_SUPPORT case NVME_REQUEST_BIO: KASSERT(req->u.bio->bio_bcount <= qpair->ctrlr->max_xfer_size, ("bio->bio_bcount (%jd) exceeds max_xfer_size (%d)\n", (intmax_t)req->u.bio->bio_bcount, qpair->ctrlr->max_xfer_size)); err = bus_dmamap_load_bio(tr->qpair->dma_tag_payload, tr->payload_dma_map, req->u.bio, nvme_payload_map, tr, 0); if (err != 0) nvme_printf(qpair->ctrlr, "bus_dmamap_load_bio returned 0x%x!\n", err); break; #endif + case NVME_REQUEST_CCB: + err = bus_dmamap_load_ccb(tr->qpair->dma_tag_payload, + tr->payload_dma_map, req->u.payload, + nvme_payload_map, tr, 0); + if (err != 0) + nvme_printf(qpair->ctrlr, + "bus_dmamap_load_ccb returned 0x%x!\n", err); + break; default: panic("unknown nvme request type 0x%x\n", req->type); break; } if (err != 0) { /* * The dmamap operation failed, so we manually fail the * tracker here with DATA_TRANSFER_ERROR status. * * nvme_qpair_manual_complete_tracker must not be called * with the qpair lock held. */ mtx_unlock(&qpair->lock); nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, NVME_SC_DATA_TRANSFER_ERROR, 1 /* do not retry */, TRUE); mtx_lock(&qpair->lock); } } void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) { mtx_lock(&qpair->lock); _nvme_qpair_submit_request(qpair, req); mtx_unlock(&qpair->lock); } static void nvme_qpair_enable(struct nvme_qpair *qpair) { qpair->is_enabled = TRUE; } void nvme_qpair_reset(struct nvme_qpair *qpair) { qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* * First time through the completion queue, HW will set phase * bit on completions to 1. So set this to 1 here, indicating * we're looking for a 1 to know which entries have completed. * we'll toggle the bit each time when the completion queue * rolls over. */ qpair->phase = 1; memset(qpair->cmd, 0, qpair->num_entries * sizeof(struct nvme_command)); memset(qpair->cpl, 0, qpair->num_entries * sizeof(struct nvme_completion)); } void nvme_admin_qpair_enable(struct nvme_qpair *qpair) { struct nvme_tracker *tr; struct nvme_tracker *tr_temp; /* * Manually abort each outstanding admin command. Do not retry * admin commands found here, since they will be left over from * a controller reset and its likely the context in which the * command was issued no longer applies. */ TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { nvme_printf(qpair->ctrlr, "aborting outstanding admin command\n"); nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, TRUE); } nvme_qpair_enable(qpair); } void nvme_io_qpair_enable(struct nvme_qpair *qpair) { STAILQ_HEAD(, nvme_request) temp; struct nvme_tracker *tr; struct nvme_tracker *tr_temp; struct nvme_request *req; /* * Manually abort each outstanding I/O. This normally results in a * retry, unless the retry count on the associated request has * reached its limit. */ TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) { nvme_printf(qpair->ctrlr, "aborting outstanding i/o\n"); nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, TRUE); } mtx_lock(&qpair->lock); nvme_qpair_enable(qpair); STAILQ_INIT(&temp); STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request); while (!STAILQ_EMPTY(&temp)) { req = STAILQ_FIRST(&temp); STAILQ_REMOVE_HEAD(&temp, stailq); nvme_printf(qpair->ctrlr, "resubmitting queued i/o\n"); nvme_qpair_print_command(qpair, &req->cmd); _nvme_qpair_submit_request(qpair, req); } mtx_unlock(&qpair->lock); } static void nvme_qpair_disable(struct nvme_qpair *qpair) { struct nvme_tracker *tr; qpair->is_enabled = FALSE; mtx_lock(&qpair->lock); TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) callout_stop(&tr->timer); mtx_unlock(&qpair->lock); } void nvme_admin_qpair_disable(struct nvme_qpair *qpair) { nvme_qpair_disable(qpair); nvme_admin_qpair_abort_aers(qpair); } void nvme_io_qpair_disable(struct nvme_qpair *qpair) { nvme_qpair_disable(qpair); } void nvme_qpair_fail(struct nvme_qpair *qpair) { struct nvme_tracker *tr; struct nvme_request *req; if (!mtx_initialized(&qpair->lock)) return; mtx_lock(&qpair->lock); while (!STAILQ_EMPTY(&qpair->queued_req)) { req = STAILQ_FIRST(&qpair->queued_req); STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); nvme_printf(qpair->ctrlr, "failing queued i/o\n"); mtx_unlock(&qpair->lock); nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE); mtx_lock(&qpair->lock); } /* Manually abort each outstanding I/O. */ while (!TAILQ_EMPTY(&qpair->outstanding_tr)) { tr = TAILQ_FIRST(&qpair->outstanding_tr); /* * Do not remove the tracker. The abort_tracker path will * do that for us. */ nvme_printf(qpair->ctrlr, "failing outstanding i/o\n"); mtx_unlock(&qpair->lock); nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, TRUE); mtx_lock(&qpair->lock); } mtx_unlock(&qpair->lock); } Index: head/sys/dev/nvme/nvme_sim.c =================================================================== --- head/sys/dev/nvme/nvme_sim.c (revision 322996) +++ head/sys/dev/nvme/nvme_sim.c (revision 322997) @@ -1,395 +1,397 @@ /*- * Copyright (c) 2016 Netflix, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Yes, this is wrong. #include #include "nvme_private.h" #define ccb_accb_ptr spriv_ptr0 #define ccb_ctrlr_ptr spriv_ptr1 static void nvme_sim_action(struct cam_sim *sim, union ccb *ccb); static void nvme_sim_poll(struct cam_sim *sim); #define sim2softc(sim) ((struct nvme_sim_softc *)cam_sim_softc(sim)) #define sim2ns(sim) (sim2softc(sim)->s_ns) #define sim2ctrlr(sim) (sim2softc(sim)->s_ctrlr) struct nvme_sim_softc { struct nvme_controller *s_ctrlr; struct nvme_namespace *s_ns; struct cam_sim *s_sim; struct cam_path *s_path; }; static void nvme_sim_nvmeio_done(void *ccb_arg, const struct nvme_completion *cpl) { union ccb *ccb = (union ccb *)ccb_arg; /* * Let the periph know the completion, and let it sort out what * it means. Make our best guess, though for the status code. */ memcpy(&ccb->nvmeio.cpl, cpl, sizeof(*cpl)); if (nvme_completion_is_error(cpl)) ccb->ccb_h.status = CAM_REQ_CMP_ERR; else ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); } static void nvme_sim_nvmeio(struct cam_sim *sim, union ccb *ccb) { struct ccb_nvmeio *nvmeio = &ccb->nvmeio; struct nvme_request *req; void *payload; uint32_t size; struct nvme_controller *ctrlr; ctrlr = sim2ctrlr(sim); payload = nvmeio->data_ptr; size = nvmeio->dxfer_len; /* SG LIST ??? */ if ((nvmeio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO) req = nvme_allocate_request_bio((struct bio *)payload, nvme_sim_nvmeio_done, ccb); + else if ((nvmeio->ccb_h.flags & CAM_DATA_SG) == CAM_DATA_SG) + req = nvme_allocate_request_ccb(ccb, nvme_sim_nvmeio_done, ccb); else if (payload == NULL) req = nvme_allocate_request_null(nvme_sim_nvmeio_done, ccb); else req = nvme_allocate_request_vaddr(payload, size, nvme_sim_nvmeio_done, ccb); if (req == NULL) { nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; } memcpy(&req->cmd, &ccb->nvmeio.cmd, sizeof(ccb->nvmeio.cmd)); if (ccb->ccb_h.func_code == XPT_NVME_IO) nvme_ctrlr_submit_io_request(ctrlr, req); else nvme_ctrlr_submit_admin_request(ctrlr, req); ccb->ccb_h.status |= CAM_SIM_QUEUED; } static void nvme_sim_action(struct cam_sim *sim, union ccb *ccb) { struct nvme_controller *ctrlr; struct nvme_namespace *ns; CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("nvme_sim_action: func= %#x\n", ccb->ccb_h.func_code)); /* * XXX when we support multiple namespaces in the base driver we'll need * to revisit how all this gets stored and saved in the periph driver's * reserved areas. Right now we store all three in the softc of the sim. */ ns = sim2ns(sim); ctrlr = sim2ctrlr(sim); mtx_assert(&ctrlr->lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_CALC_GEOMETRY: /* Calculate Geometry Totally nuts ? XXX */ /* * Only meaningful for old-school SCSI disks since only the SCSI * da driver generates them. Reject all these that slip through. */ /*FALLTHROUGH*/ case XPT_ABORT: /* Abort the specified CCB */ ccb->ccb_h.status = CAM_REQ_INVALID; break; case XPT_SET_TRAN_SETTINGS: /* * NVMe doesn't really have different transfer settings, but * other parts of CAM think failure here is a big deal. */ ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_PATH_INQ: /* Path routing inquiry */ { struct ccb_pathinq *cpi = &ccb->cpi; /* * NVMe may have multiple LUNs on the same path. Current generation * of NVMe devives support only a single name space. Multiple name * space drives are coming, but it's unclear how we should report * them up the stack. */ cpi->version_num = 1; cpi->hba_inquiry = 0; cpi->target_sprt = 0; cpi->hba_misc = PIM_UNMAPPED /* | PIM_NOSCAN */; cpi->hba_eng_cnt = 0; cpi->max_target = 0; cpi->max_lun = ctrlr->cdata.nn; cpi->maxio = nvme_ns_get_max_io_xfer_size(ns); cpi->initiator_id = 0; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 4000000; /* 4 GB/s 4 lanes pcie 3 */ strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "NVMe", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->transport = XPORT_NVME; /* XXX XPORT_PCIE ? */ cpi->transport_version = 1; /* XXX Get PCIe spec ? */ cpi->protocol = PROTO_NVME; cpi->protocol_version = NVME_REV_1; /* Groks all 1.x NVMe cards */ cpi->xport_specific.nvme.nsid = ns->id; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: /* Get transport settings */ { struct ccb_trans_settings *cts; struct ccb_trans_settings_nvme *nvmep; struct ccb_trans_settings_nvme *nvmex; cts = &ccb->cts; nvmex = &cts->xport_specific.nvme; nvmep = &cts->proto_specific.nvme; nvmex->valid = CTS_NVME_VALID_SPEC; nvmex->spec_major = 1; /* XXX read from card */ nvmex->spec_minor = 2; nvmex->spec_tiny = 0; nvmep->valid = CTS_NVME_VALID_SPEC; nvmep->spec_major = 1; /* XXX read from card */ nvmep->spec_minor = 2; nvmep->spec_tiny = 0; cts->transport = XPORT_NVME; cts->protocol = PROTO_NVME; cts->ccb_h.status = CAM_REQ_CMP; break; } case XPT_TERM_IO: /* Terminate the I/O process */ /* * every driver handles this, but nothing generates it. Assume * it's OK to just say 'that worked'. */ /*FALLTHROUGH*/ case XPT_RESET_DEV: /* Bus Device Reset the specified device */ case XPT_RESET_BUS: /* Reset the specified bus */ /* * NVMe doesn't really support physically resetting the bus. It's part * of the bus scanning dance, so return sucess to tell the process to * proceed. */ ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_NVME_IO: /* Execute the requested I/O operation */ case XPT_NVME_ADMIN: /* or Admin operation */ nvme_sim_nvmeio(sim, ccb); return; /* no done */ default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } xpt_done(ccb); } static void nvme_sim_poll(struct cam_sim *sim) { nvme_ctrlr_intx_handler(sim2ctrlr(sim)); } static void * nvme_sim_new_controller(struct nvme_controller *ctrlr) { struct cam_devq *devq; int max_trans; int unit; struct nvme_sim_softc *sc = NULL; max_trans = ctrlr->max_hw_pend_io; unit = device_get_unit(ctrlr->dev); devq = cam_simq_alloc(max_trans); if (devq == NULL) return NULL; sc = malloc(sizeof(*sc), M_NVME, M_ZERO | M_WAITOK); sc->s_ctrlr = ctrlr; sc->s_sim = cam_sim_alloc(nvme_sim_action, nvme_sim_poll, "nvme", sc, unit, &ctrlr->lock, max_trans, max_trans, devq); if (sc->s_sim == NULL) { printf("Failed to allocate a sim\n"); cam_simq_free(devq); free(sc, M_NVME); return NULL; } return sc; } static void nvme_sim_rescan_target(struct nvme_controller *ctrlr, struct cam_path *path) { union ccb *ccb; ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return; } if (xpt_clone_path(&ccb->ccb_h.path, path) != CAM_REQ_CMP) { printf("unable to copy path for rescan\n"); xpt_free_ccb(ccb); return; } xpt_rescan(ccb); } static void * nvme_sim_new_ns(struct nvme_namespace *ns, void *sc_arg) { struct nvme_sim_softc *sc = sc_arg; struct nvme_controller *ctrlr = sc->s_ctrlr; int i; sc->s_ns = ns; /* * XXX this is creating one bus per ns, but it should be one * XXX target per controller, and one LUN per namespace. * XXX Current drives only support one NS, so there's time * XXX to fix it later when new drives arrive. * * XXX I'm pretty sure the xpt_bus_register() call below is * XXX like super lame and it really belongs in the sim_new_ctrlr * XXX callback. Then the create_path below would be pretty close * XXX to being right. Except we should be per-ns not per-ctrlr * XXX data. */ mtx_lock(&ctrlr->lock); /* Create bus */ /* * XXX do I need to lock ctrlr->lock ? * XXX do I need to lock the path? * ata and scsi seem to in their code, but their discovery is * somewhat more asynchronous. We're only every called one at a * time, and nothing is in parallel. */ i = 0; if (xpt_bus_register(sc->s_sim, ctrlr->dev, 0) != CAM_SUCCESS) goto error; i++; if (xpt_create_path(&sc->s_path, /*periph*/NULL, cam_sim_path(sc->s_sim), 1, ns->id) != CAM_REQ_CMP) goto error; i++; sc->s_path->device->nvme_data = nvme_ns_get_data(ns); sc->s_path->device->nvme_cdata = nvme_ctrlr_get_data(ns->ctrlr); /* Scan bus */ nvme_sim_rescan_target(ctrlr, sc->s_path); mtx_unlock(&ctrlr->lock); return ns; error: switch (i) { case 2: xpt_free_path(sc->s_path); case 1: xpt_bus_deregister(cam_sim_path(sc->s_sim)); case 0: cam_sim_free(sc->s_sim, /*free_devq*/TRUE); } mtx_unlock(&ctrlr->lock); return NULL; } static void nvme_sim_controller_fail(void *ctrlr_arg) { /* XXX cleanup XXX */ } struct nvme_consumer *consumer_cookie; static void nvme_sim_init(void) { if (nvme_use_nvd) return; consumer_cookie = nvme_register_consumer(nvme_sim_new_ns, nvme_sim_new_controller, NULL, nvme_sim_controller_fail); } SYSINIT(nvme_sim_register, SI_SUB_DRIVERS, SI_ORDER_ANY, nvme_sim_init, NULL); static void nvme_sim_uninit(void) { if (nvme_use_nvd) return; /* XXX Cleanup */ nvme_unregister_consumer(consumer_cookie); } SYSUNINIT(nvme_sim_unregister, SI_SUB_DRIVERS, SI_ORDER_ANY, nvme_sim_uninit, NULL); Index: head/sys/kern/subr_bus_dma.c =================================================================== --- head/sys/kern/subr_bus_dma.c (revision 322996) +++ head/sys/kern/subr_bus_dma.c (revision 322997) @@ -1,569 +1,569 @@ /*- * Copyright (c) 2012 EMC Corp. * All rights reserved. * * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bus.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Load up data starting at offset within a region specified by a * list of virtual address ranges until either length or the region * are exhausted. */ static int _bus_dmamap_load_vlist(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *list, int sglist_cnt, struct pmap *pmap, int *nsegs, int flags, size_t offset, size_t length) { int error; error = 0; for (; sglist_cnt > 0 && length != 0; sglist_cnt--, list++) { char *addr; size_t ds_len; KASSERT((offset < list->ds_len), ("Invalid mid-segment offset")); addr = (char *)(uintptr_t)list->ds_addr + offset; ds_len = list->ds_len - offset; offset = 0; if (ds_len > length) ds_len = length; length -= ds_len; KASSERT((ds_len != 0), ("Segment length is zero")); error = _bus_dmamap_load_buffer(dmat, map, addr, ds_len, pmap, flags, NULL, nsegs); if (error) break; } return (error); } /* * Load a list of physical addresses. */ static int _bus_dmamap_load_plist(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *list, int sglist_cnt, int *nsegs, int flags) { int error; error = 0; for (; sglist_cnt > 0; sglist_cnt--, list++) { error = _bus_dmamap_load_phys(dmat, map, (vm_paddr_t)list->ds_addr, list->ds_len, flags, NULL, nsegs); if (error) break; } return (error); } /* * Load an mbuf chain. */ static int _bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs, int flags) { struct mbuf *m; int error; error = 0; for (m = m0; m != NULL && error == 0; m = m->m_next) { if (m->m_len > 0) { error = _bus_dmamap_load_buffer(dmat, map, m->m_data, m->m_len, kernel_pmap, flags | BUS_DMA_LOAD_MBUF, segs, nsegs); } } CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, *nsegs); return (error); } /* * Load from block io. */ static int _bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio, int *nsegs, int flags) { if ((bio->bio_flags & BIO_VLIST) != 0) { bus_dma_segment_t *segs = (bus_dma_segment_t *)bio->bio_data; return (_bus_dmamap_load_vlist(dmat, map, segs, bio->bio_ma_n, kernel_pmap, nsegs, flags, bio->bio_ma_offset, bio->bio_bcount)); } if ((bio->bio_flags & BIO_UNMAPPED) != 0) return (_bus_dmamap_load_ma(dmat, map, bio->bio_ma, bio->bio_bcount, bio->bio_ma_offset, flags, NULL, nsegs)); return (_bus_dmamap_load_buffer(dmat, map, bio->bio_data, bio->bio_bcount, kernel_pmap, flags, NULL, nsegs)); } int bus_dmamap_load_ma_triv(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { vm_paddr_t paddr; bus_size_t len; int error, i; error = 0; for (i = 0; tlen > 0; i++, tlen -= len) { len = min(PAGE_SIZE - ma_offs, tlen); paddr = VM_PAGE_TO_PHYS(ma[i]) + ma_offs; error = _bus_dmamap_load_phys(dmat, map, paddr, len, flags, segs, segp); if (error != 0) break; ma_offs = 0; } return (error); } /* * Load a cam control block. */ static int _bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb, int *nsegs, int flags) { struct ccb_hdr *ccb_h; void *data_ptr; int error; uint32_t dxfer_len; uint16_t sglist_cnt; error = 0; ccb_h = &ccb->ccb_h; switch (ccb_h->func_code) { case XPT_SCSI_IO: { struct ccb_scsiio *csio; csio = &ccb->csio; data_ptr = csio->data_ptr; dxfer_len = csio->dxfer_len; sglist_cnt = csio->sglist_cnt; break; } case XPT_CONT_TARGET_IO: { struct ccb_scsiio *ctio; ctio = &ccb->ctio; data_ptr = ctio->data_ptr; dxfer_len = ctio->dxfer_len; sglist_cnt = ctio->sglist_cnt; break; } case XPT_ATA_IO: { struct ccb_ataio *ataio; ataio = &ccb->ataio; data_ptr = ataio->data_ptr; dxfer_len = ataio->dxfer_len; sglist_cnt = 0; break; } case XPT_NVME_IO: case XPT_NVME_ADMIN: { struct ccb_nvmeio *nvmeio; nvmeio = &ccb->nvmeio; data_ptr = nvmeio->data_ptr; dxfer_len = nvmeio->dxfer_len; - sglist_cnt = 0; + sglist_cnt = nvmeio->sglist_cnt; break; } default: panic("_bus_dmamap_load_ccb: Unsupported func code %d", ccb_h->func_code); } switch ((ccb_h->flags & CAM_DATA_MASK)) { case CAM_DATA_VADDR: error = _bus_dmamap_load_buffer(dmat, map, data_ptr, dxfer_len, kernel_pmap, flags, NULL, nsegs); break; case CAM_DATA_PADDR: error = _bus_dmamap_load_phys(dmat, map, (vm_paddr_t)(uintptr_t)data_ptr, dxfer_len, flags, NULL, nsegs); break; case CAM_DATA_SG: error = _bus_dmamap_load_vlist(dmat, map, (bus_dma_segment_t *)data_ptr, sglist_cnt, kernel_pmap, nsegs, flags, 0, dxfer_len); break; case CAM_DATA_SG_PADDR: error = _bus_dmamap_load_plist(dmat, map, (bus_dma_segment_t *)data_ptr, sglist_cnt, nsegs, flags); break; case CAM_DATA_BIO: error = _bus_dmamap_load_bio(dmat, map, (struct bio *)data_ptr, nsegs, flags); break; default: panic("_bus_dmamap_load_ccb: flags 0x%X unimplemented", ccb_h->flags); } return (error); } /* * Load a uio. */ static int _bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio, int *nsegs, int flags) { bus_size_t resid; bus_size_t minlen; struct iovec *iov; pmap_t pmap; caddr_t addr; int error, i; if (uio->uio_segflg == UIO_USERSPACE) { KASSERT(uio->uio_td != NULL, ("bus_dmamap_load_uio: USERSPACE but no proc")); pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace); } else pmap = kernel_pmap; resid = uio->uio_resid; iov = uio->uio_iov; error = 0; for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) { /* * Now at the first iovec to load. Load each iovec * until we have exhausted the residual count. */ addr = (caddr_t) iov[i].iov_base; minlen = resid < iov[i].iov_len ? resid : iov[i].iov_len; if (minlen > 0) { error = _bus_dmamap_load_buffer(dmat, map, addr, minlen, pmap, flags, NULL, nsegs); resid -= minlen; } } return (error); } /* * Map the buffer buf into bus space using the dmamap map. */ int bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, bus_dmamap_callback_t *callback, void *callback_arg, int flags) { bus_dma_segment_t *segs; struct memdesc mem; int error; int nsegs; if ((flags & BUS_DMA_NOWAIT) == 0) { mem = memdesc_vaddr(buf, buflen); _bus_dmamap_waitok(dmat, map, &mem, callback, callback_arg); } nsegs = -1; error = _bus_dmamap_load_buffer(dmat, map, buf, buflen, kernel_pmap, flags, NULL, &nsegs); nsegs++; CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, nsegs); if (error == EINPROGRESS) return (error); segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error); if (error) (*callback)(callback_arg, segs, 0, error); else (*callback)(callback_arg, segs, nsegs, 0); /* * Return ENOMEM to the caller so that it can pass it up the stack. * This error only happens when NOWAIT is set, so deferral is disabled. */ if (error == ENOMEM) return (error); return (0); } int bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, bus_dmamap_callback2_t *callback, void *callback_arg, int flags) { bus_dma_segment_t *segs; int nsegs, error; M_ASSERTPKTHDR(m0); flags |= BUS_DMA_NOWAIT; nsegs = -1; error = _bus_dmamap_load_mbuf_sg(dmat, map, m0, NULL, &nsegs, flags); ++nsegs; segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error); if (error) (*callback)(callback_arg, segs, 0, 0, error); else (*callback)(callback_arg, segs, nsegs, m0->m_pkthdr.len, error); CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, nsegs); return (error); } int bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs, int flags) { int error; flags |= BUS_DMA_NOWAIT; *nsegs = -1; error = _bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags); ++*nsegs; _bus_dmamap_complete(dmat, map, segs, *nsegs, error); return (error); } int bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio, bus_dmamap_callback2_t *callback, void *callback_arg, int flags) { bus_dma_segment_t *segs; int nsegs, error; flags |= BUS_DMA_NOWAIT; nsegs = -1; error = _bus_dmamap_load_uio(dmat, map, uio, &nsegs, flags); nsegs++; segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error); if (error) (*callback)(callback_arg, segs, 0, 0, error); else (*callback)(callback_arg, segs, nsegs, uio->uio_resid, error); CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, nsegs); return (error); } int bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb, bus_dmamap_callback_t *callback, void *callback_arg, int flags) { bus_dma_segment_t *segs; struct ccb_hdr *ccb_h; struct memdesc mem; int error; int nsegs; ccb_h = &ccb->ccb_h; if ((ccb_h->flags & CAM_DIR_MASK) == CAM_DIR_NONE) { callback(callback_arg, NULL, 0, 0); return (0); } if ((flags & BUS_DMA_NOWAIT) == 0) { mem = memdesc_ccb(ccb); _bus_dmamap_waitok(dmat, map, &mem, callback, callback_arg); } nsegs = -1; error = _bus_dmamap_load_ccb(dmat, map, ccb, &nsegs, flags); nsegs++; CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, nsegs); if (error == EINPROGRESS) return (error); segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error); if (error) (*callback)(callback_arg, segs, 0, error); else (*callback)(callback_arg, segs, nsegs, error); /* * Return ENOMEM to the caller so that it can pass it up the stack. * This error only happens when NOWAIT is set, so deferral is disabled. */ if (error == ENOMEM) return (error); return (0); } int bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio, bus_dmamap_callback_t *callback, void *callback_arg, int flags) { bus_dma_segment_t *segs; struct memdesc mem; int error; int nsegs; if ((flags & BUS_DMA_NOWAIT) == 0) { mem = memdesc_bio(bio); _bus_dmamap_waitok(dmat, map, &mem, callback, callback_arg); } nsegs = -1; error = _bus_dmamap_load_bio(dmat, map, bio, &nsegs, flags); nsegs++; CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, nsegs); if (error == EINPROGRESS) return (error); segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error); if (error) (*callback)(callback_arg, segs, 0, error); else (*callback)(callback_arg, segs, nsegs, error); /* * Return ENOMEM to the caller so that it can pass it up the stack. * This error only happens when NOWAIT is set, so deferral is disabled. */ if (error == ENOMEM) return (error); return (0); } int bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg, int flags) { bus_dma_segment_t *segs; int error; int nsegs; if ((flags & BUS_DMA_NOWAIT) == 0) _bus_dmamap_waitok(dmat, map, mem, callback, callback_arg); nsegs = -1; error = 0; switch (mem->md_type) { case MEMDESC_VADDR: error = _bus_dmamap_load_buffer(dmat, map, mem->u.md_vaddr, mem->md_opaque, kernel_pmap, flags, NULL, &nsegs); break; case MEMDESC_PADDR: error = _bus_dmamap_load_phys(dmat, map, mem->u.md_paddr, mem->md_opaque, flags, NULL, &nsegs); break; case MEMDESC_VLIST: error = _bus_dmamap_load_vlist(dmat, map, mem->u.md_list, mem->md_opaque, kernel_pmap, &nsegs, flags, 0, SIZE_T_MAX); break; case MEMDESC_PLIST: error = _bus_dmamap_load_plist(dmat, map, mem->u.md_list, mem->md_opaque, &nsegs, flags); break; case MEMDESC_BIO: error = _bus_dmamap_load_bio(dmat, map, mem->u.md_bio, &nsegs, flags); break; case MEMDESC_UIO: error = _bus_dmamap_load_uio(dmat, map, mem->u.md_uio, &nsegs, flags); break; case MEMDESC_MBUF: error = _bus_dmamap_load_mbuf_sg(dmat, map, mem->u.md_mbuf, NULL, &nsegs, flags); break; case MEMDESC_CCB: error = _bus_dmamap_load_ccb(dmat, map, mem->u.md_ccb, &nsegs, flags); break; } nsegs++; CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, nsegs); if (error == EINPROGRESS) return (error); segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error); if (error) (*callback)(callback_arg, segs, 0, error); else (*callback)(callback_arg, segs, nsegs, 0); /* * Return ENOMEM to the caller so that it can pass it up the stack. * This error only happens when NOWAIT is set, so deferral is disabled. */ if (error == ENOMEM) return (error); return (0); }