Index: projects/iosched/sys/cam/ata/ata_da.c =================================================================== --- projects/iosched/sys/cam/ata/ata_da.c (revision 287875) +++ projects/iosched/sys/cam/ata/ata_da.c (revision 287876) @@ -1,2293 +1,2423 @@ /*- * Copyright (c) 2009 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ada.h" #include #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #ifndef _KERNEL #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include #include /* geometry translation */ #ifdef _KERNEL #define ATA_MAX_28BIT_LBA 268435455UL extern int iosched_debug; typedef enum { ADA_STATE_RAHEAD, ADA_STATE_WCACHE, ADA_STATE_NORMAL } ada_state; typedef enum { ADA_FLAG_CAN_48BIT = 0x0002, ADA_FLAG_CAN_FLUSHCACHE = 0x0004, ADA_FLAG_CAN_NCQ = 0x0008, ADA_FLAG_CAN_DMA = 0x0010, ADA_FLAG_NEED_OTAG = 0x0020, ADA_FLAG_WAS_OTAG = 0x0040, ADA_FLAG_CAN_TRIM = 0x0080, ADA_FLAG_OPEN = 0x0100, ADA_FLAG_SCTX_INIT = 0x0200, ADA_FLAG_CAN_CFA = 0x0400, ADA_FLAG_CAN_POWERMGT = 0x0800, ADA_FLAG_CAN_DMA48 = 0x1000, ADA_FLAG_DIRTY = 0x2000, ADA_FLAG_CAN_NCQ_TRIM = 0x4000, /* CAN_TRIM also set */ ADA_FLAG_PIM_CAN_NCQ_TRIM = 0x8000 } ada_flags; typedef enum { ADA_Q_NONE = 0x00, ADA_Q_4K = 0x01, + ADA_Q_NCQ_TRIM_BROKEN = 0x02, } ada_quirks; #define ADA_Q_BIT_STRING \ "\020" \ - "\0014K" + "\0014K" \ + "\002NCQ_TRIM_BROKEN" typedef enum { ADA_CCB_RAHEAD = 0x01, ADA_CCB_WCACHE = 0x02, ADA_CCB_BUFFER_IO = 0x03, ADA_CCB_DUMP = 0x05, ADA_CCB_TRIM = 0x06, ADA_CCB_TYPE_MASK = 0x0F, } ada_ccb_state; /* Offsets into our private area for storing information */ #define ccb_state ppriv_field0 #define ccb_bp ppriv_ptr1 typedef enum { ADA_DELETE_NONE, ADA_DELETE_DISABLE, ADA_DELETE_CFA_ERASE, ADA_DELETE_DSM_TRIM, ADA_DELETE_NCQ_DSM_TRIM, ADA_DELETE_MIN = ADA_DELETE_CFA_ERASE, ADA_DELETE_MAX = ADA_DELETE_NCQ_DSM_TRIM, } ada_delete_methods; static const char *ada_delete_method_names[] = { "NONE", "DISABLE", "CFA_ERASE", "DSM_TRIM", "NCQ_DSM_TRIM" }; #if 0 static const char *ada_delete_method_desc[] = { "NONE", "DISABLED", "CFA Erase", "DSM Trim", "DSM Trim via NCQ" }; #endif struct disk_params { u_int8_t heads; u_int8_t secs_per_track; u_int32_t cylinders; u_int32_t secsize; /* Number of bytes/logical sector */ u_int64_t sectors; /* Total number sectors */ }; #define TRIM_MAX_BLOCKS 8 #define TRIM_MAX_RANGES (TRIM_MAX_BLOCKS * ATA_DSM_BLK_RANGES) struct trim_request { uint8_t data[TRIM_MAX_RANGES * ATA_DSM_RANGE_SIZE]; TAILQ_HEAD(, bio) bps; }; struct ada_softc { struct cam_iosched_softc *cam_iosched; int outstanding_cmds; /* Number of active commands */ int refcount; /* Active xpt_action() calls */ ada_state state; ada_flags flags; ada_quirks quirks; ada_delete_methods delete_method; int trim_max_ranges; int read_ahead; int write_cache; + int unmappedio; + int rotating; #ifdef ADA_TEST_FAILURE int force_read_error; int force_write_error; int periodic_read_error; int periodic_read_count; #endif struct disk_params params; struct disk *disk; struct task sysctl_task; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct callout sendordered_c; struct trim_request trim_req; +#ifdef CAM_IO_STATS + struct sysctl_ctx_list sysctl_stats_ctx; + struct sysctl_oid *sysctl_stats_tree; + u_int timeouts; + u_int errors; + u_int invalidations; +#endif }; struct ada_quirk_entry { struct scsi_inquiry_pattern inq_pat; ada_quirks quirks; }; static struct ada_quirk_entry ada_quirk_table[] = { { /* Hitachi Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Hitachi H??????????E3*", "*" }, /*quirks*/ADA_Q_4K }, { /* Samsung Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "SAMSUNG HD155UI*", "*" }, /*quirks*/ADA_Q_4K }, { /* Samsung Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "SAMSUNG HD204UI*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Barracuda Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST????DL*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Barracuda Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST???DM*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Barracuda Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST????DM*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9500423AS*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9500424AS*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9640423AS*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9640424AS*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9750420AS*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9750422AS*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST9750423AS*", "*" }, /*quirks*/ADA_Q_4K }, { /* Seagate Momentus Thin Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "ST???LT*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Caviar Red Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????CX*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????RS*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Caviar Green/Red Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD????RX*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Caviar Red Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????CX*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Caviar Black Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????EX*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????RS*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD??????RX*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Scorpio Black Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD???PKT*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Scorpio Black Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD?????PKT*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Scorpio Blue Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD???PVT*", "*" }, /*quirks*/ADA_Q_4K }, { /* WDC Scorpio Blue Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "WDC WD?????PVT*", "*" }, /*quirks*/ADA_Q_4K }, /* SSDs */ { /* * Corsair Force 2 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Corsair CSSD-F*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Corsair Force 3 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Corsair Force 3*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Corsair Neutron GTX SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Corsair Neutron GTX*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Corsair Force GT & GS SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Corsair Force G*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Crucial M4 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "M4-CT???M4SSD2*", "*" }, /*quirks*/ADA_Q_4K }, { /* + * Crucial M500 SSDs EU07 firmware + * NCQ Trim works ? + */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Crucial CT*M500*", "EU07" }, + /*quirks*/0 + }, + { + /* + * Crucial M500 SSDs all other firmware + * NCQ Trim doesn't work + */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Crucial CT*M500*", "*" }, + /*quirks*/ADA_Q_NCQ_TRIM_BROKEN + }, + { + /* + * Crucial M550 SSDs + * NCQ Trim doesn't work, but only on MU01 firmware + */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Crucial CT*M550*", "MU01" }, + /*quirks*/ADA_Q_NCQ_TRIM_BROKEN + }, + { + /* + * Crucial MX100 SSDs + * NCQ Trim doesn't work, but only on MU01 firmware + */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Crucial CT*MX100*", "MU01" }, + /*quirks*/ADA_Q_NCQ_TRIM_BROKEN + }, + { + /* * Crucial RealSSD C300 SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "C300-CTFDDAC???MAG*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Intel 320 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "INTEL SSDSA2CW*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Intel 330 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "INTEL SSDSC2CT*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Intel 510 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "INTEL SSDSC2MH*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Intel 520 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "INTEL SSDSC2BW*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Intel X25-M Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "INTEL SSDSA2M*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Kingston E100 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "KINGSTON SE100S3*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Kingston HyperX 3k SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "KINGSTON SH103S3*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Marvell SSDs (entry taken from OpenSolaris) * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "MARVELL SD88SA02*", "*" }, /*quirks*/ADA_Q_4K }, { /* + * Micron M500 SSDs firmware EU07 + * NCQ Trim works? + */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Micron M500*", "EU07" }, + /*quirks*/0 + }, + { + /* + * Micron M500 SSDs all other firmware + * NCQ Trim doesn't work + */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Micron M500*", "*" }, + /*quirks*/ADA_Q_NCQ_TRIM_BROKEN + }, + { + /* + * Micron M5[15]0 SSDs + * NCQ Trim doesn't work, but only MU01 firmware + */ + { T_DIRECT, SIP_MEDIA_FIXED, "*", "Micron M5[15]0*", "MU01" }, + /*quirks*/ADA_Q_NCQ_TRIM_BROKEN + }, + { + /* * OCZ Agility 2 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "OCZ-AGILITY2*", "*" }, /*quirks*/ADA_Q_4K }, { /* * OCZ Agility 3 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "OCZ-AGILITY3*", "*" }, /*quirks*/ADA_Q_4K }, { /* * OCZ Deneva R Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "DENRSTE251M45*", "*" }, /*quirks*/ADA_Q_4K }, { /* * OCZ Vertex 2 SSDs (inc pro series) * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "OCZ?VERTEX2*", "*" }, /*quirks*/ADA_Q_4K }, { /* * OCZ Vertex 3 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "OCZ-VERTEX3*", "*" }, /*quirks*/ADA_Q_4K }, { /* * OCZ Vertex 4 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "OCZ-VERTEX4*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Samsung 830 Series SSDs - * 4k optimised + * 4k optimised, NCQ TRIM broken (normal TRIM fine) */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "SAMSUNG SSD 830 Series*", "*" }, - /*quirks*/ADA_Q_4K + /*quirks*/ADA_Q_4K | ADA_Q_NCQ_TRIM_BROKEN }, { /* * Samsung 840 SSDs - * 4k optimised + * 4k optimised, NCQ TRIM broken (normal TRIM fine) */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Samsung SSD 840*", "*" }, - /*quirks*/ADA_Q_4K + /*quirks*/ADA_Q_4K | ADA_Q_NCQ_TRIM_BROKEN }, { /* - * Samsung 843T Series SSDs + * Samsung PM843T Series SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "SAMSUNG MZ7WD*", "*" }, /*quirks*/ADA_Q_4K }, { /* * Samsung 850 SSDs - * 4k optimised + * 4k optimised, NCQ TRIM broken (normal TRIM fine) */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Samsung SSD 850*", "*" }, - /*quirks*/ADA_Q_4K + /*quirks*/ADA_Q_4K | ADA_Q_NCQ_TRIM_BROKEN }, { /* * Samsung PM853T Series SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "SAMSUNG MZ7GE*", "*" }, /*quirks*/ADA_Q_4K }, { /* * SuperTalent TeraDrive CT SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "FTM??CT25H*", "*" }, /*quirks*/ADA_Q_4K }, { /* * XceedIOPS SATA SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "SG9XCS2D*", "*" }, /*quirks*/ADA_Q_4K }, { /* Default */ { T_ANY, SIP_MEDIA_REMOVABLE|SIP_MEDIA_FIXED, /*vendor*/"*", /*product*/"*", /*revision*/"*" }, /*quirks*/0 }, }; static disk_strategy_t adastrategy; static dumper_t adadump; static periph_init_t adainit; static void adaasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg); static void adasysctlinit(void *context, int pending); static periph_ctor_t adaregister; static periph_dtor_t adacleanup; static periph_start_t adastart; static periph_oninv_t adaoninvalidate; static void adadone(struct cam_periph *periph, union ccb *done_ccb); static int adaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags); static void adagetparams(struct cam_periph *periph, struct ccb_getdev *cgd); static timeout_t adasendorderedtag; static void adashutdown(void *arg, int howto); static void adasuspend(void *arg); static void adaresume(void *arg); #ifndef ADA_DEFAULT_LEGACY_ALIASES #define ADA_DEFAULT_LEGACY_ALIASES 1 #endif #ifndef ADA_DEFAULT_TIMEOUT #define ADA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */ #endif #ifndef ADA_DEFAULT_RETRY #define ADA_DEFAULT_RETRY 4 #endif #ifndef ADA_DEFAULT_SEND_ORDERED #define ADA_DEFAULT_SEND_ORDERED 1 #endif #ifndef ADA_DEFAULT_SPINDOWN_SHUTDOWN #define ADA_DEFAULT_SPINDOWN_SHUTDOWN 1 #endif #ifndef ADA_DEFAULT_SPINDOWN_SUSPEND #define ADA_DEFAULT_SPINDOWN_SUSPEND 1 #endif #ifndef ADA_DEFAULT_READ_AHEAD #define ADA_DEFAULT_READ_AHEAD 1 #endif #ifndef ADA_DEFAULT_WRITE_CACHE #define ADA_DEFAULT_WRITE_CACHE 1 #endif #define ADA_RA (softc->read_ahead >= 0 ? \ softc->read_ahead : ada_read_ahead) #define ADA_WC (softc->write_cache >= 0 ? \ softc->write_cache : ada_write_cache) /* * Most platforms map firmware geometry to actual, but some don't. If * not overridden, default to nothing. */ #ifndef ata_disk_firmware_geom_adjust #define ata_disk_firmware_geom_adjust(disk) #endif static int ada_legacy_aliases = ADA_DEFAULT_LEGACY_ALIASES; static int ada_retry_count = ADA_DEFAULT_RETRY; static int ada_default_timeout = ADA_DEFAULT_TIMEOUT; static int ada_send_ordered = ADA_DEFAULT_SEND_ORDERED; static int ada_spindown_shutdown = ADA_DEFAULT_SPINDOWN_SHUTDOWN; static int ada_spindown_suspend = ADA_DEFAULT_SPINDOWN_SUSPEND; static int ada_read_ahead = ADA_DEFAULT_READ_AHEAD; static int ada_write_cache = ADA_DEFAULT_WRITE_CACHE; static SYSCTL_NODE(_kern_cam, OID_AUTO, ada, CTLFLAG_RD, 0, "CAM Direct Access Disk driver"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, legacy_aliases, CTLFLAG_RWTUN, &ada_legacy_aliases, 0, "Create legacy-like device aliases"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, retry_count, CTLFLAG_RWTUN, &ada_retry_count, 0, "Normal I/O retry count"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, default_timeout, CTLFLAG_RWTUN, &ada_default_timeout, 0, "Normal I/O timeout (in seconds)"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, send_ordered, CTLFLAG_RWTUN, &ada_send_ordered, 0, "Send Ordered Tags"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, spindown_shutdown, CTLFLAG_RWTUN, &ada_spindown_shutdown, 0, "Spin down upon shutdown"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, spindown_suspend, CTLFLAG_RWTUN, &ada_spindown_suspend, 0, "Spin down upon suspend"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, read_ahead, CTLFLAG_RWTUN, &ada_read_ahead, 0, "Enable disk read-ahead"); SYSCTL_INT(_kern_cam_ada, OID_AUTO, write_cache, CTLFLAG_RWTUN, &ada_write_cache, 0, "Enable disk write cache"); /* * ADA_ORDEREDTAG_INTERVAL determines how often, relative * to the default timeout, we check to see whether an ordered * tagged transaction is appropriate to prevent simple tag * starvation. Since we'd like to ensure that there is at least * 1/2 of the timeout length left for a starved transaction to * complete after we've sent an ordered tag, we must poll at least * four times in every timeout period. This takes care of the worst * case where a starved transaction starts during an interval that * meets the requirement "don't send an ordered tag" test so it takes * us two intervals to determine that a tag must be sent. */ #ifndef ADA_ORDEREDTAG_INTERVAL #define ADA_ORDEREDTAG_INTERVAL 4 #endif static struct periph_driver adadriver = { adainit, "ada", TAILQ_HEAD_INITIALIZER(adadriver.units), /* generation */ 0 }; static int adadeletemethodsysctl(SYSCTL_HANDLER_ARGS); PERIPHDRIVER_DECLARE(ada, adadriver); static MALLOC_DEFINE(M_ATADA, "ata_da", "ata_da buffers"); static int adaopen(struct disk *dp) { struct cam_periph *periph; struct ada_softc *softc; int error; periph = (struct cam_periph *)dp->d_drv1; if (cam_periph_acquire(periph) != CAM_REQ_CMP) { return(ENXIO); } cam_periph_lock(periph); if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) { cam_periph_unlock(periph); cam_periph_release(periph); return (error); } CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("adaopen\n")); softc = (struct ada_softc *)periph->softc; softc->flags |= ADA_FLAG_OPEN; cam_periph_unhold(periph); cam_periph_unlock(periph); return (0); } static int adaclose(struct disk *dp) { struct cam_periph *periph; struct ada_softc *softc; union ccb *ccb; int error; periph = (struct cam_periph *)dp->d_drv1; softc = (struct ada_softc *)periph->softc; cam_periph_lock(periph); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("adaclose\n")); /* We only sync the cache if the drive is capable of it. */ if ((softc->flags & ADA_FLAG_DIRTY) != 0 && (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) != 0 && (periph->flags & CAM_PERIPH_INVALID) == 0 && cam_periph_hold(periph, PRIBIO) == 0) { ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); cam_fill_ataio(&ccb->ataio, 1, adadone, CAM_DIR_NONE, 0, NULL, 0, ada_default_timeout*1000); if (softc->flags & ADA_FLAG_CAN_48BIT) ata_48bit_cmd(&ccb->ataio, ATA_FLUSHCACHE48, 0, 0, 0); else ata_28bit_cmd(&ccb->ataio, ATA_FLUSHCACHE, 0, 0, 0); error = cam_periph_runccb(ccb, adaerror, /*cam_flags*/0, /*sense_flags*/0, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); else softc->flags &= ~ADA_FLAG_DIRTY; xpt_release_ccb(ccb); cam_periph_unhold(periph); } softc->flags &= ~ADA_FLAG_OPEN; while (softc->refcount != 0) cam_periph_sleep(periph, &softc->refcount, PRIBIO, "adaclose", 1); cam_periph_unlock(periph); cam_periph_release(periph); return (0); } static void adaschedule(struct cam_periph *periph) { struct ada_softc *softc = (struct ada_softc *)periph->softc; if (softc->state != ADA_STATE_NORMAL) return; cam_iosched_schedule(softc->cam_iosched, periph); } /* * Actually translate the requested transfer into one the physical driver * can understand. The transfer is described by a buf and will include * only one physical transfer. */ static void adastrategy(struct bio *bp) { struct cam_periph *periph; struct ada_softc *softc; periph = (struct cam_periph *)bp->bio_disk->d_drv1; softc = (struct ada_softc *)periph->softc; cam_periph_lock(periph); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("adastrategy(%p)\n", bp)); /* * If the device has been made invalid, error out */ if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_unlock(periph); biofinish(bp, NULL, ENXIO); return; } /* * Place it in the queue of disk activities for this disk */ - if (bp->bio_cmd == BIO_DELETE) { - } cam_iosched_queue_work(softc->cam_iosched, bp); /* * Schedule ourselves for performing the work. */ adaschedule(periph); cam_periph_unlock(periph); return; } static int adadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct cam_periph *periph; struct ada_softc *softc; u_int secsize; union ccb ccb; struct disk *dp; uint64_t lba; uint16_t count; int error = 0; dp = arg; periph = dp->d_drv1; softc = (struct ada_softc *)periph->softc; cam_periph_lock(periph); secsize = softc->params.secsize; lba = offset / secsize; count = length / secsize; if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_unlock(periph); return (ENXIO); } if (length > 0) { xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); ccb.ccb_h.ccb_state = ADA_CCB_DUMP; cam_fill_ataio(&ccb.ataio, 0, adadone, CAM_DIR_OUT, 0, (u_int8_t *) virtual, length, ada_default_timeout*1000); if ((softc->flags & ADA_FLAG_CAN_48BIT) && (lba + count >= ATA_MAX_28BIT_LBA || count >= 256)) { ata_48bit_cmd(&ccb.ataio, ATA_WRITE_DMA48, 0, lba, count); } else { ata_28bit_cmd(&ccb.ataio, ATA_WRITE_DMA, 0, lba, count); } xpt_polled_action(&ccb); error = cam_periph_error(&ccb, 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) printf("Aborting dump due to I/O error.\n"); cam_periph_unlock(periph); return (error); } if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) { xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); ccb.ccb_h.ccb_state = ADA_CCB_DUMP; cam_fill_ataio(&ccb.ataio, 0, adadone, CAM_DIR_NONE, 0, NULL, 0, - ada_default_timeout*1000); + 5*1000); if (softc->flags & ADA_FLAG_CAN_48BIT) ata_48bit_cmd(&ccb.ataio, ATA_FLUSHCACHE48, 0, 0, 0); else ata_28bit_cmd(&ccb.ataio, ATA_FLUSHCACHE, 0, 0, 0); xpt_polled_action(&ccb); error = cam_periph_error(&ccb, 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); } cam_periph_unlock(periph); return (error); } static void adainit(void) { cam_status status; /* * Install a global async callback. This callback will * receive async callbacks like "new device found". */ status = xpt_register_async(AC_FOUND_DEVICE, adaasync, NULL, NULL); if (status != CAM_REQ_CMP) { printf("ada: Failed to attach master async callback " "due to status 0x%x!\n", status); } else if (ada_send_ordered) { /* Register our event handlers */ if ((EVENTHANDLER_REGISTER(power_suspend, adasuspend, NULL, EVENTHANDLER_PRI_LAST)) == NULL) printf("adainit: power event registration failed!\n"); if ((EVENTHANDLER_REGISTER(power_resume, adaresume, NULL, EVENTHANDLER_PRI_LAST)) == NULL) printf("adainit: power event registration failed!\n"); if ((EVENTHANDLER_REGISTER(shutdown_post_sync, adashutdown, NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) printf("adainit: shutdown event registration failed!\n"); } } /* * Callback from GEOM, called when it has finished cleaning up its * resources. */ static void adadiskgonecb(struct disk *dp) { struct cam_periph *periph; periph = (struct cam_periph *)dp->d_drv1; cam_periph_release(periph); } static void adaoninvalidate(struct cam_periph *periph) { struct ada_softc *softc; softc = (struct ada_softc *)periph->softc; /* * De-register any async callbacks. */ xpt_register_async(0, adaasync, periph, periph->path); +#ifdef CAM_IO_STATS + softc->invalidations++; +#endif /* * Return all queued I/O with ENXIO. * XXX Handle any transactions queued to the card * with XPT_ABORT_CCB. */ cam_iosched_flush(softc->cam_iosched, NULL, ENXIO); disk_gone(softc->disk); } static void adacleanup(struct cam_periph *periph) { struct ada_softc *softc; softc = (struct ada_softc *)periph->softc; cam_periph_unlock(periph); + cam_iosched_fini(softc->cam_iosched); + /* * If we can't free the sysctl tree, oh well... */ - if ((softc->flags & ADA_FLAG_SCTX_INIT) != 0 - && sysctl_ctx_free(&softc->sysctl_ctx) != 0) { - xpt_print(periph->path, "can't remove sysctl context\n"); + if ((softc->flags & ADA_FLAG_SCTX_INIT) != 0) { +#ifdef CAM_IO_STATS + if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl stats context\n"); +#endif + if (sysctl_ctx_free(&softc->sysctl_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl context\n"); } disk_destroy(softc->disk); callout_drain(&softc->sendordered_c); free(softc, M_DEVBUF); cam_periph_lock(periph); } static void adasetdeletemethod(struct ada_softc *softc) { -#if 0 - /* - * Don't set NCQ_DSM_TRIM method by default. It is currently - * a "feature of interest" implicated in some data corruption. - */ if (softc->flags & ADA_FLAG_CAN_NCQ_TRIM) softc->delete_method = ADA_DELETE_NCQ_DSM_TRIM; - else -#endif - if (softc->flags & ADA_FLAG_CAN_TRIM) + else if (softc->flags & ADA_FLAG_CAN_TRIM) softc->delete_method = ADA_DELETE_DSM_TRIM; else if ((softc->flags & ADA_FLAG_CAN_CFA) && !(softc->flags & ADA_FLAG_CAN_48BIT)) softc->delete_method = ADA_DELETE_CFA_ERASE; else softc->delete_method = ADA_DELETE_NONE; } static void adaasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { struct ccb_getdev cgd; struct cam_periph *periph; struct ada_softc *softc; periph = (struct cam_periph *)callback_arg; switch (code) { case AC_FOUND_DEVICE: { struct ccb_getdev *cgd; cam_status status; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) break; if (cgd->protocol != PROTO_ATA) break; /* * Allocate a peripheral instance for * this device and start the probe * process. */ status = cam_periph_alloc(adaregister, adaoninvalidate, adacleanup, adastart, "ada", CAM_PERIPH_BIO, path, adaasync, AC_FOUND_DEVICE, cgd); if (status != CAM_REQ_CMP && status != CAM_REQ_INPROG) printf("adaasync: Unable to attach to new device " "due to status 0x%x\n", status); break; } case AC_GETDEV_CHANGED: { softc = (struct ada_softc *)periph->softc; xpt_setup_ccb(&cgd.ccb_h, periph->path, CAM_PRIORITY_NORMAL); cgd.ccb_h.func_code = XPT_GDEV_TYPE; xpt_action((union ccb *)&cgd); if ((cgd.ident_data.capabilities1 & ATA_SUPPORT_DMA) && (cgd.inq_flags & SID_DMA)) softc->flags |= ADA_FLAG_CAN_DMA; else softc->flags &= ~ADA_FLAG_CAN_DMA; if (cgd.ident_data.support.command2 & ATA_SUPPORT_ADDRESS48) { softc->flags |= ADA_FLAG_CAN_48BIT; if (cgd.inq_flags & SID_DMA48) softc->flags |= ADA_FLAG_CAN_DMA48; else softc->flags &= ~ADA_FLAG_CAN_DMA48; } else softc->flags &= ~(ADA_FLAG_CAN_48BIT | ADA_FLAG_CAN_DMA48); if ((cgd.ident_data.satacapabilities & ATA_SUPPORT_NCQ) && (cgd.inq_flags & SID_DMA) && (cgd.inq_flags & SID_CmdQue)) softc->flags |= ADA_FLAG_CAN_NCQ; else softc->flags &= ~ADA_FLAG_CAN_NCQ; if ((cgd.ident_data.support_dsm & ATA_SUPPORT_DSM_TRIM) && (cgd.inq_flags & SID_DMA)) { softc->flags |= ADA_FLAG_CAN_TRIM; /* * If we can do RCVSND_FPDMA_QUEUED commands, we may be able to do * NCQ trims, if we support trims at all. We also need support from * the sim do do things properly. Perhaps we should look at log 13 * dword 0 bit 0 and dword 1 bit 0 are set too... */ - if ((softc->flags & ADA_FLAG_PIM_CAN_NCQ_TRIM) != 0 && + if ((softc->quirks & ADA_Q_NCQ_TRIM_BROKEN) == 0 && + (softc->flags & ADA_FLAG_PIM_CAN_NCQ_TRIM) != 0 && (cgd.ident_data.satacapabilities2 & ATA_SUPPORT_RCVSND_FPDMA_QUEUED) != 0 && (softc->flags & ADA_FLAG_CAN_TRIM) != 0) softc->flags |= ADA_FLAG_CAN_NCQ_TRIM; else softc->flags &= ~ADA_FLAG_CAN_NCQ_TRIM; } else softc->flags &= ~(ADA_FLAG_CAN_TRIM | ADA_FLAG_CAN_NCQ_TRIM); adasetdeletemethod(softc); cam_periph_async(periph, code, path, arg); break; } case AC_ADVINFO_CHANGED: { uintptr_t buftype; buftype = (uintptr_t)arg; if (buftype == CDAI_TYPE_PHYS_PATH) { struct ada_softc *softc; softc = periph->softc; disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); } break; } case AC_SENT_BDR: case AC_BUS_RESET: { softc = (struct ada_softc *)periph->softc; cam_periph_async(periph, code, path, arg); if (softc->state != ADA_STATE_NORMAL) break; xpt_setup_ccb(&cgd.ccb_h, periph->path, CAM_PRIORITY_NORMAL); cgd.ccb_h.func_code = XPT_GDEV_TYPE; xpt_action((union ccb *)&cgd); if (ADA_RA >= 0 && cgd.ident_data.support.command1 & ATA_SUPPORT_LOOKAHEAD) softc->state = ADA_STATE_RAHEAD; else if (ADA_WC >= 0 && cgd.ident_data.support.command1 & ATA_SUPPORT_WRITECACHE) softc->state = ADA_STATE_WCACHE; else break; if (cam_periph_acquire(periph) != CAM_REQ_CMP) softc->state = ADA_STATE_NORMAL; else xpt_schedule(periph, CAM_PRIORITY_DEV); } default: cam_periph_async(periph, code, path, arg); break; } } static void adasysctlinit(void *context, int pending) { struct cam_periph *periph; struct ada_softc *softc; char tmpstr[80], tmpstr2[80]; periph = (struct cam_periph *)context; /* periph was held for us when this task was enqueued */ if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_release(periph); return; } softc = (struct ada_softc *)periph->softc; snprintf(tmpstr, sizeof(tmpstr), "CAM ADA unit %d", periph->unit_number); snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number); sysctl_ctx_init(&softc->sysctl_ctx); softc->flags |= ADA_FLAG_SCTX_INIT; softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_cam_ada), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr); if (softc->sysctl_tree == NULL) { printf("adasysctlinit: unable to allocate sysctl tree\n"); cam_periph_release(periph); return; } SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "delete_method", CTLTYPE_STRING | CTLFLAG_RW, softc, 0, adadeletemethodsysctl, "A", "BIO_DELETE execution method"); SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "read_ahead", CTLFLAG_RW | CTLFLAG_MPSAFE, &softc->read_ahead, 0, "Enable disk read ahead."); SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "write_cache", CTLFLAG_RW | CTLFLAG_MPSAFE, &softc->write_cache, 0, "Enable disk write cache."); + SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->unmappedio, 0, "Unmapped I/O leaf"); + SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, "rotating", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->rotating, 0, "Rotating media"); #ifdef ADA_TEST_FAILURE /* * Add a 'door bell' sysctl which allows one to set it from userland * and cause something bad to happen. For the moment, we only allow * whacking the next read or write. */ SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "force_read_error", CTLFLAG_RW | CTLFLAG_MPSAFE, &softc->force_read_error, 0, "Force a read error for the next N reads."); SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "force_write_error", CTLFLAG_RW | CTLFLAG_MPSAFE, &softc->force_write_error, 0, "Force a write error for the next N writes."); SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "periodic_read_error", CTLFLAG_RW | CTLFLAG_MPSAFE, &softc->periodic_read_error, 0, "Force a read error every N reads (don't set too low)."); #endif + +#ifdef CAM_IO_STATS + softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats", + CTLFLAG_RD, 0, "Statistics"); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->timeouts, 0, + "Device timeouts reported by the SIM"); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->errors, 0, + "Transport errors reported by the SIM."); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE, + &softc->invalidations, 0, + "Device pack invalidations."); +#endif + cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, softc->sysctl_tree); cam_periph_release(periph); } static int adagetattr(struct bio *bp) { int ret; struct cam_periph *periph; periph = (struct cam_periph *)bp->bio_disk->d_drv1; cam_periph_lock(periph); ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, periph->path); cam_periph_unlock(periph); if (ret == 0) bp->bio_completed = bp->bio_length; return ret; } static int adadeletemethodsysctl(SYSCTL_HANDLER_ARGS) { char buf[16]; const char *p; struct ada_softc *softc; int i, error, value, methods; softc = (struct ada_softc *)arg1; value = softc->delete_method; if (value < 0 || value > ADA_DELETE_MAX) p = "UNKNOWN"; else p = ada_delete_method_names[value]; strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); methods = 1 << ADA_DELETE_DISABLE; if ((softc->flags & ADA_FLAG_CAN_CFA) && !(softc->flags & ADA_FLAG_CAN_48BIT)) methods |= 1 << ADA_DELETE_CFA_ERASE; if (softc->flags & ADA_FLAG_CAN_TRIM) methods |= 1 << ADA_DELETE_DSM_TRIM; if (softc->flags & ADA_FLAG_CAN_NCQ_TRIM) methods |= 1 << ADA_DELETE_NCQ_DSM_TRIM; for (i = 0; i <= ADA_DELETE_MAX; i++) { if (!(methods & (1 << i)) || strcmp(buf, ada_delete_method_names[i]) != 0) continue; softc->delete_method = i; return (0); } return (EINVAL); } static cam_status adaregister(struct cam_periph *periph, void *arg) { struct ada_softc *softc; struct ccb_pathinq cpi; struct ccb_getdev *cgd; char announce_buf[80], buf1[32]; struct disk_params *dp; caddr_t match; u_int maxio; int legacy_id, quirks; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) { printf("adaregister: no getdev CCB, can't register device\n"); return(CAM_REQ_CMP_ERR); } softc = (struct ada_softc *)malloc(sizeof(*softc), M_DEVBUF, M_NOWAIT|M_ZERO); if (softc == NULL) { printf("adaregister: Unable to probe new device. " "Unable to allocate softc\n"); return(CAM_REQ_CMP_ERR); } - if (cam_iosched_init(&softc->cam_iosched) != 0) { + if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { printf("adaregister: Unable to probe new device. " "Unable to allocate iosched memory\n"); return(CAM_REQ_CMP_ERR); } if ((cgd->ident_data.capabilities1 & ATA_SUPPORT_DMA) && (cgd->inq_flags & SID_DMA)) softc->flags |= ADA_FLAG_CAN_DMA; if (cgd->ident_data.support.command2 & ATA_SUPPORT_ADDRESS48) { softc->flags |= ADA_FLAG_CAN_48BIT; if (cgd->inq_flags & SID_DMA48) softc->flags |= ADA_FLAG_CAN_DMA48; } if (cgd->ident_data.support.command2 & ATA_SUPPORT_FLUSHCACHE) softc->flags |= ADA_FLAG_CAN_FLUSHCACHE; if (cgd->ident_data.support.command1 & ATA_SUPPORT_POWERMGT) softc->flags |= ADA_FLAG_CAN_POWERMGT; if ((cgd->ident_data.satacapabilities & ATA_SUPPORT_NCQ) && (cgd->inq_flags & SID_DMA) && (cgd->inq_flags & SID_CmdQue)) softc->flags |= ADA_FLAG_CAN_NCQ; if ((cgd->ident_data.support_dsm & ATA_SUPPORT_DSM_TRIM) && (cgd->inq_flags & SID_DMA)) { softc->flags |= ADA_FLAG_CAN_TRIM; softc->trim_max_ranges = TRIM_MAX_RANGES; if (cgd->ident_data.max_dsm_blocks != 0) { softc->trim_max_ranges = min(cgd->ident_data.max_dsm_blocks * ATA_DSM_BLK_RANGES, softc->trim_max_ranges); } } if (cgd->ident_data.support.command2 & ATA_SUPPORT_CFA) softc->flags |= ADA_FLAG_CAN_CFA; adasetdeletemethod(softc); periph->softc = softc; /* * See if this device has any quirks. */ match = cam_quirkmatch((caddr_t)&cgd->ident_data, (caddr_t)ada_quirk_table, sizeof(ada_quirk_table)/sizeof(*ada_quirk_table), sizeof(*ada_quirk_table), ata_identify_match); if (match != NULL) softc->quirks = ((struct ada_quirk_entry *)match)->quirks; else softc->quirks = ADA_Q_NONE; bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); TASK_INIT(&softc->sysctl_task, 0, adasysctlinit, periph); /* * Register this media as a disk */ (void)cam_periph_hold(periph, PRIBIO); cam_periph_unlock(periph); snprintf(announce_buf, sizeof(announce_buf), "kern.cam.ada.%d.quirks", periph->unit_number); quirks = softc->quirks; TUNABLE_INT_FETCH(announce_buf, &quirks); softc->quirks = quirks; softc->read_ahead = -1; snprintf(announce_buf, sizeof(announce_buf), "kern.cam.ada.%d.read_ahead", periph->unit_number); TUNABLE_INT_FETCH(announce_buf, &softc->read_ahead); softc->write_cache = -1; snprintf(announce_buf, sizeof(announce_buf), "kern.cam.ada.%d.write_cache", periph->unit_number); TUNABLE_INT_FETCH(announce_buf, &softc->write_cache); /* Disable queue sorting for non-rotational media by default. */ - cam_iosched_set_sort_queue(softc->cam_iosched, - cgd->ident_data.media_rotation_rate == ATA_RATE_NON_ROTATING); + if (cgd->ident_data.media_rotation_rate == ATA_RATE_NON_ROTATING) { + softc->rotating = 0; + } else { + softc->rotating = 1; + } + cam_iosched_set_sort_queue(softc->cam_iosched, softc->rotating ? -1 : 0); adagetparams(periph, cgd); softc->disk = disk_alloc(); softc->disk->d_rotation_rate = cgd->ident_data.media_rotation_rate; softc->disk->d_devstat = devstat_new_entry(periph->periph_name, periph->unit_number, softc->params.secsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport), DEVSTAT_PRIORITY_DISK); softc->disk->d_open = adaopen; softc->disk->d_close = adaclose; softc->disk->d_strategy = adastrategy; softc->disk->d_getattr = adagetattr; softc->disk->d_dump = adadump; softc->disk->d_gone = adadiskgonecb; softc->disk->d_name = "ada"; softc->disk->d_drv1 = periph; maxio = cpi.maxio; /* Honor max I/O size of SIM */ if (maxio == 0) maxio = DFLTPHYS; /* traditional default */ else if (maxio > MAXPHYS) maxio = MAXPHYS; /* for safety */ if (softc->flags & ADA_FLAG_CAN_48BIT) maxio = min(maxio, 65536 * softc->params.secsize); else /* 28bit ATA command limit */ maxio = min(maxio, 256 * softc->params.secsize); softc->disk->d_maxsize = maxio; softc->disk->d_unit = periph->unit_number; softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if (softc->flags & ADA_FLAG_CAN_TRIM) { softc->disk->d_flags |= DISKFLAG_CANDELETE; softc->disk->d_delmaxsize = softc->params.secsize * ATA_DSM_RANGE_MAX * softc->trim_max_ranges; } else if ((softc->flags & ADA_FLAG_CAN_CFA) && !(softc->flags & ADA_FLAG_CAN_48BIT)) { softc->disk->d_flags |= DISKFLAG_CANDELETE; softc->disk->d_delmaxsize = 256 * softc->params.secsize; } else softc->disk->d_delmaxsize = maxio; - if ((cpi.hba_misc & PIM_UNMAPPED) != 0) + if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { softc->disk->d_flags |= DISKFLAG_UNMAPPED_BIO; + softc->unmappedio = 1; + } /* * If we can do RCVSND_FPDMA_QUEUED commands, we may be able to do * NCQ trims, if we support trims at all. We also need support from * the sim do do things properly. Perhaps we should look at log 13 * dword 0 bit 0 and dword 1 bit 0 are set too... */ if (cpi.hba_misc & PIM_NCQ_KLUDGE) softc->flags |= ADA_FLAG_PIM_CAN_NCQ_TRIM; - if ((softc->flags & ADA_FLAG_PIM_CAN_NCQ_TRIM) != 0 && - (cgd->ident_data.satacapabilities2 & - ATA_SUPPORT_RCVSND_FPDMA_QUEUED) != 0 && + if ((softc->quirks & ADA_Q_NCQ_TRIM_BROKEN) == 0 && + (softc->flags & ADA_FLAG_PIM_CAN_NCQ_TRIM) != 0 && + (cgd->ident_data.satacapabilities2 & ATA_SUPPORT_RCVSND_FPDMA_QUEUED) != 0 && (softc->flags & ADA_FLAG_CAN_TRIM) != 0) softc->flags |= ADA_FLAG_CAN_NCQ_TRIM; strlcpy(softc->disk->d_descr, cgd->ident_data.model, MIN(sizeof(softc->disk->d_descr), sizeof(cgd->ident_data.model))); strlcpy(softc->disk->d_ident, cgd->ident_data.serial, MIN(sizeof(softc->disk->d_ident), sizeof(cgd->ident_data.serial))); softc->disk->d_hba_vendor = cpi.hba_vendor; softc->disk->d_hba_device = cpi.hba_device; softc->disk->d_hba_subvendor = cpi.hba_subvendor; softc->disk->d_hba_subdevice = cpi.hba_subdevice; softc->disk->d_sectorsize = softc->params.secsize; softc->disk->d_mediasize = (off_t)softc->params.sectors * softc->params.secsize; if (ata_physical_sector_size(&cgd->ident_data) != softc->params.secsize) { softc->disk->d_stripesize = ata_physical_sector_size(&cgd->ident_data); softc->disk->d_stripeoffset = (softc->disk->d_stripesize - ata_logical_sector_offset(&cgd->ident_data)) % softc->disk->d_stripesize; } else if (softc->quirks & ADA_Q_4K) { softc->disk->d_stripesize = 4096; softc->disk->d_stripeoffset = 0; } softc->disk->d_fwsectors = softc->params.secs_per_track; softc->disk->d_fwheads = softc->params.heads; ata_disk_firmware_geom_adjust(softc->disk); adasetdeletemethod(softc); if (ada_legacy_aliases) { #ifdef ATA_STATIC_ID legacy_id = xpt_path_legacy_ata_id(periph->path); #else legacy_id = softc->disk->d_unit; #endif if (legacy_id >= 0) { snprintf(announce_buf, sizeof(announce_buf), "kern.devalias.%s%d", softc->disk->d_name, softc->disk->d_unit); snprintf(buf1, sizeof(buf1), "ad%d", legacy_id); kern_setenv(announce_buf, buf1); } } else legacy_id = -1; /* * Acquire a reference to the periph before we register with GEOM. * We'll release this reference once GEOM calls us back (via * adadiskgonecb()) telling us that our provider has been freed. */ if (cam_periph_acquire(periph) != CAM_REQ_CMP) { xpt_print(periph->path, "%s: lost periph during " "registration!\n", __func__); cam_periph_lock(periph); return (CAM_REQ_CMP_ERR); } disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); cam_periph_unhold(periph); dp = &softc->params; snprintf(announce_buf, sizeof(announce_buf), "%juMB (%ju %u byte sectors: %dH %dS/T %dC)", (uintmax_t)(((uintmax_t)dp->secsize * dp->sectors) / (1024*1024)), (uintmax_t)dp->sectors, dp->secsize, dp->heads, dp->secs_per_track, dp->cylinders); xpt_announce_periph(periph, announce_buf); xpt_announce_quirks(periph, softc->quirks, ADA_Q_BIT_STRING); if (legacy_id >= 0) printf("%s%d: Previously was known as ad%d\n", periph->periph_name, periph->unit_number, legacy_id); /* * Create our sysctl variables, now that we know * we have successfully attached. */ if (cam_periph_acquire(periph) == CAM_REQ_CMP) taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task); /* * Add async callbacks for bus reset and * bus device reset calls. I don't bother * checking if this fails as, in most cases, * the system will function just fine without * them and the only alternative would be to * not attach the device on failure. */ xpt_register_async(AC_SENT_BDR | AC_BUS_RESET | AC_LOST_DEVICE | AC_GETDEV_CHANGED | AC_ADVINFO_CHANGED, adaasync, periph, periph->path); /* * Schedule a periodic event to occasionally send an * ordered tag to a device. */ callout_init_mtx(&softc->sendordered_c, cam_periph_mtx(periph), 0); callout_reset(&softc->sendordered_c, (ada_default_timeout * hz) / ADA_ORDEREDTAG_INTERVAL, adasendorderedtag, softc); if (ADA_RA >= 0 && cgd->ident_data.support.command1 & ATA_SUPPORT_LOOKAHEAD) { softc->state = ADA_STATE_RAHEAD; } else if (ADA_WC >= 0 && cgd->ident_data.support.command1 & ATA_SUPPORT_WRITECACHE) { softc->state = ADA_STATE_WCACHE; } else { softc->state = ADA_STATE_NORMAL; return(CAM_REQ_CMP); } if (cam_periph_acquire(periph) != CAM_REQ_CMP) softc->state = ADA_STATE_NORMAL; else xpt_schedule(periph, CAM_PRIORITY_DEV); return(CAM_REQ_CMP); } static int ada_dsmtrim_req_create(struct ada_softc *softc, struct bio *bp, struct trim_request *req) { uint64_t lastlba = (uint64_t)-1; int c, lastcount = 0, off, ranges = 0; bzero(req, sizeof(*req)); TAILQ_INIT(&req->bps); do { uint64_t lba = bp->bio_pblkno; int count = bp->bio_bcount / softc->params.secsize; /* Try to extend the previous range. */ if (lba == lastlba) { c = min(count, ATA_DSM_RANGE_MAX - lastcount); lastcount += c; off = (ranges - 1) * ATA_DSM_RANGE_SIZE; req->data[off + 6] = lastcount & 0xff; req->data[off + 7] = (lastcount >> 8) & 0xff; count -= c; lba += c; } while (count > 0) { c = min(count, ATA_DSM_RANGE_MAX); off = ranges * ATA_DSM_RANGE_SIZE; req->data[off + 0] = lba & 0xff; req->data[off + 1] = (lba >> 8) & 0xff; req->data[off + 2] = (lba >> 16) & 0xff; req->data[off + 3] = (lba >> 24) & 0xff; req->data[off + 4] = (lba >> 32) & 0xff; req->data[off + 5] = (lba >> 40) & 0xff; req->data[off + 6] = c & 0xff; req->data[off + 7] = (c >> 8) & 0xff; lba += c; count -= c; lastcount = c; ranges++; /* * Its the caller's responsibility to ensure the * request will fit so we don't need to check for * overrun here */ } lastlba = lba; TAILQ_INSERT_TAIL(&req->bps, bp, bio_queue); bp = cam_iosched_next_trim(softc->cam_iosched); if (bp == NULL) break; if (bp->bio_bcount / softc->params.secsize > (softc->trim_max_ranges - ranges) * ATA_DSM_RANGE_MAX) { cam_iosched_put_back_trim(softc->cam_iosched, bp); break; } } while (1); return (ranges); } static void ada_dsmtrim(struct ada_softc *softc, struct bio *bp, struct ccb_ataio *ataio) { struct trim_request *req = &softc->trim_req; int ranges; ranges = ada_dsmtrim_req_create(softc, bp, req); cam_fill_ataio(ataio, ada_retry_count, adadone, CAM_DIR_OUT, 0, req->data, ((ranges + ATA_DSM_BLK_RANGES - 1) / ATA_DSM_BLK_RANGES) * ATA_DSM_BLK_SIZE, ada_default_timeout * 1000); ata_48bit_cmd(ataio, ATA_DATA_SET_MANAGEMENT, ATA_DSM_TRIM, 0, (ranges + ATA_DSM_BLK_RANGES - 1) / ATA_DSM_BLK_RANGES); } static void ada_ncq_dsmtrim(struct ada_softc *softc, struct bio *bp, struct ccb_ataio *ataio) { struct trim_request *req = &softc->trim_req; int ranges; ranges = ada_dsmtrim_req_create(softc, bp, req); cam_fill_ataio(ataio, ada_retry_count, adadone, CAM_DIR_OUT, 0, req->data, ((ranges + ATA_DSM_BLK_RANGES - 1) / ATA_DSM_BLK_RANGES) * ATA_DSM_BLK_SIZE, ada_default_timeout * 1000); ata_ncq_cmd(ataio, ATA_SEND_FPDMA_QUEUED, 0, (ranges + ATA_DSM_BLK_RANGES - 1) / ATA_DSM_BLK_RANGES); ataio->cmd.sector_count_exp = ATA_SFPDMA_DSM; ataio->cmd.flags |= CAM_ATAIO_AUX_HACK; } static void ada_cfaerase(struct ada_softc *softc, struct bio *bp, struct ccb_ataio *ataio) { struct trim_request *req = &softc->trim_req; uint64_t lba = bp->bio_pblkno; uint16_t count = bp->bio_bcount / softc->params.secsize; bzero(req, sizeof(*req)); TAILQ_INIT(&req->bps); TAILQ_INSERT_TAIL(&req->bps, bp, bio_queue); cam_fill_ataio(ataio, ada_retry_count, adadone, CAM_DIR_NONE, 0, NULL, 0, ada_default_timeout*1000); if (count >= 256) count = 0; ata_28bit_cmd(ataio, ATA_CFA_ERASE, 0, lba, count); } static void adastart(struct cam_periph *periph, union ccb *start_ccb) { struct ada_softc *softc = (struct ada_softc *)periph->softc; struct ccb_ataio *ataio = &start_ccb->ataio; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("adastart\n")); switch (softc->state) { case ADA_STATE_NORMAL: { struct bio *bp; u_int8_t tag_code; bp = cam_iosched_next_bio(softc->cam_iosched); if (bp == NULL) { xpt_release_ccb(start_ccb); break; } if ((bp->bio_flags & BIO_ORDERED) != 0 || - (bp->bio_cmd != BIO_DELETE && - (softc->flags & ADA_FLAG_NEED_OTAG) != 0)) { + (bp->bio_cmd != BIO_DELETE && (softc->flags & ADA_FLAG_NEED_OTAG) != 0)) { softc->flags &= ~ADA_FLAG_NEED_OTAG; softc->flags |= ADA_FLAG_WAS_OTAG; tag_code = 0; } else { tag_code = 1; } switch (bp->bio_cmd) { case BIO_WRITE: softc->flags |= ADA_FLAG_DIRTY; /* FALLTHROUGH */ case BIO_READ: { uint64_t lba = bp->bio_pblkno; uint16_t count = bp->bio_bcount / softc->params.secsize; #ifdef ADA_TEST_FAILURE int fail = 0; /* * Support the failure ioctls. If the command is a * read, and there are pending forced read errors, or * if a write and pending write errors, then fail this * operation with EIO. This is useful for testing * purposes. Also, support having every Nth read fail. * * This is a rather blunt tool. */ if (bp->bio_cmd == BIO_READ) { if (softc->force_read_error) { softc->force_read_error--; fail = 1; } if (softc->periodic_read_error > 0) { if (++softc->periodic_read_count >= softc->periodic_read_error) { softc->periodic_read_count = 0; fail = 1; } } } else { if (softc->force_write_error) { softc->force_write_error--; fail = 1; } } if (fail) { biofinish(bp, NULL, EIO); xpt_release_ccb(start_ccb); adaschedule(periph); return; } #endif KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || round_page(bp->bio_bcount + bp->bio_ma_offset) / PAGE_SIZE == bp->bio_ma_n, ("Short bio %p", bp)); cam_fill_ataio(ataio, ada_retry_count, adadone, (bp->bio_cmd == BIO_READ ? CAM_DIR_IN : CAM_DIR_OUT) | ((bp->bio_flags & BIO_UNMAPPED) != 0 ? CAM_DATA_BIO : 0), tag_code, ((bp->bio_flags & BIO_UNMAPPED) != 0) ? (void *)bp : bp->bio_data, bp->bio_bcount, ada_default_timeout*1000); if ((softc->flags & ADA_FLAG_CAN_NCQ) && tag_code) { if (bp->bio_cmd == BIO_READ) { ata_ncq_cmd(ataio, ATA_READ_FPDMA_QUEUED, lba, count); } else { ata_ncq_cmd(ataio, ATA_WRITE_FPDMA_QUEUED, lba, count); } } else if ((softc->flags & ADA_FLAG_CAN_48BIT) && (lba + count >= ATA_MAX_28BIT_LBA || count > 256)) { if (softc->flags & ADA_FLAG_CAN_DMA48) { if (bp->bio_cmd == BIO_READ) { ata_48bit_cmd(ataio, ATA_READ_DMA48, 0, lba, count); } else { ata_48bit_cmd(ataio, ATA_WRITE_DMA48, 0, lba, count); } } else { if (bp->bio_cmd == BIO_READ) { ata_48bit_cmd(ataio, ATA_READ_MUL48, 0, lba, count); } else { ata_48bit_cmd(ataio, ATA_WRITE_MUL48, 0, lba, count); } } } else { if (count == 256) count = 0; if (softc->flags & ADA_FLAG_CAN_DMA) { if (bp->bio_cmd == BIO_READ) { ata_28bit_cmd(ataio, ATA_READ_DMA, 0, lba, count); } else { ata_28bit_cmd(ataio, ATA_WRITE_DMA, 0, lba, count); } } else { if (bp->bio_cmd == BIO_READ) { ata_28bit_cmd(ataio, ATA_READ_MUL, 0, lba, count); } else { ata_28bit_cmd(ataio, ATA_WRITE_MUL, 0, lba, count); } } } break; } case BIO_DELETE: switch (softc->delete_method) { case ADA_DELETE_NCQ_DSM_TRIM: ada_ncq_dsmtrim(softc, bp, ataio); break; case ADA_DELETE_DSM_TRIM: ada_dsmtrim(softc, bp, ataio); break; case ADA_DELETE_CFA_ERASE: ada_cfaerase(softc, bp, ataio); break; default: - panic("adastart: BIO_DELETE without method, not possible."); + biofinish(bp, NULL, EOPNOTSUPP); + xpt_release_ccb(start_ccb); + adaschedule(periph); + return; } start_ccb->ccb_h.ccb_state = ADA_CCB_TRIM; start_ccb->ccb_h.flags |= CAM_UNLOCKED; cam_iosched_submit_trim(softc->cam_iosched); goto out; case BIO_FLUSH: cam_fill_ataio(ataio, 1, adadone, CAM_DIR_NONE, 0, NULL, 0, ada_default_timeout*1000); if (softc->flags & ADA_FLAG_CAN_48BIT) ata_48bit_cmd(ataio, ATA_FLUSHCACHE48, 0, 0, 0); else ata_28bit_cmd(ataio, ATA_FLUSHCACHE, 0, 0, 0); break; } start_ccb->ccb_h.ccb_state = ADA_CCB_BUFFER_IO; start_ccb->ccb_h.flags |= CAM_UNLOCKED; out: start_ccb->ccb_h.ccb_bp = bp; softc->outstanding_cmds++; softc->refcount++; cam_periph_unlock(periph); xpt_action(start_ccb); cam_periph_lock(periph); softc->refcount--; /* May have more work to do, so ensure we stay scheduled */ adaschedule(periph); break; } case ADA_STATE_RAHEAD: case ADA_STATE_WCACHE: { cam_fill_ataio(ataio, 1, adadone, CAM_DIR_NONE, 0, NULL, 0, ada_default_timeout*1000); if (softc->state == ADA_STATE_RAHEAD) { ata_28bit_cmd(ataio, ATA_SETFEATURES, ADA_RA ? ATA_SF_ENAB_RCACHE : ATA_SF_DIS_RCACHE, 0, 0); start_ccb->ccb_h.ccb_state = ADA_CCB_RAHEAD; } else { ata_28bit_cmd(ataio, ATA_SETFEATURES, ADA_WC ? ATA_SF_ENAB_WCACHE : ATA_SF_DIS_WCACHE, 0, 0); start_ccb->ccb_h.ccb_state = ADA_CCB_WCACHE; } start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE; xpt_action(start_ccb); break; } } } static void adadone(struct cam_periph *periph, union ccb *done_ccb) { struct ada_softc *softc; struct ccb_ataio *ataio; struct ccb_getdev *cgd; struct cam_path *path; int state; softc = (struct ada_softc *)periph->softc; ataio = &done_ccb->ataio; path = done_ccb->ccb_h.path; CAM_DEBUG(path, CAM_DEBUG_TRACE, ("adadone\n")); state = ataio->ccb_h.ccb_state & ADA_CCB_TYPE_MASK; switch (state) { case ADA_CCB_BUFFER_IO: case ADA_CCB_TRIM: { struct bio *bp; - int error, need_sched; + int error; cam_periph_lock(periph); bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { error = adaerror(done_ccb, 0, 0); if (error == ERESTART) { /* A retry was scheduled, so just return. */ cam_periph_unlock(periph); return; } if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); /* * If we get an error on an NCQ DSM TRIM, fall back * to a non-NCQ DSM TRIM forever. Please note that if * CAN_NCQ_TRIM is set, CAN_TRIM is necessarily set too. * However, for this one trim, we treat it as advisory * and return success up the stack. */ if (state == ADA_CCB_TRIM && error != 0 && (softc->flags & ADA_FLAG_CAN_NCQ_TRIM) != 0) { softc->flags &= ~ADA_FLAG_CAN_NCQ_TRIM; error = 0; adasetdeletemethod(softc); } } else { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) panic("REQ_CMP with QFRZN"); error = 0; } bp->bio_error = error; if (error != 0) { bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; } else { if (state == ADA_CCB_TRIM) bp->bio_resid = 0; else bp->bio_resid = ataio->resid; if (bp->bio_resid > 0) bp->bio_flags |= BIO_ERROR; } softc->outstanding_cmds--; if (softc->outstanding_cmds == 0) softc->flags |= ADA_FLAG_WAS_OTAG; - need_sched = cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); + cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); xpt_release_ccb(done_ccb); if (state == ADA_CCB_TRIM) { TAILQ_HEAD(, bio) queue; struct bio *bp1; TAILQ_INIT(&queue); TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue); /* * Normally, the xpt_release_ccb() above would make sure * that when we have more work to do, that work would * get kicked off. However, we specifically keep - * trim running set to 0 before the call above to allow + * trim_running set to 0 before the call above to allow * other I/O to progress when many BIO_DELETE requests - * are pushed down. We set trim running to 0 and call + * are pushed down. We set trim_running to 0 and call * daschedule again so that we don't stall if there are * no other I/Os pending apart from BIO_DELETEs. */ cam_iosched_trim_done(softc->cam_iosched); adaschedule(periph); cam_periph_unlock(periph); while ((bp1 = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, bp1, bio_queue); bp1->bio_error = error; if (error != 0) { bp1->bio_flags |= BIO_ERROR; bp1->bio_resid = bp1->bio_bcount; } else bp1->bio_resid = 0; biodone(bp1); } } else { - if (need_sched) - adaschedule(periph); + adaschedule(periph); cam_periph_unlock(periph); biodone(bp); } return; } case ADA_CCB_RAHEAD: { if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { if (adaerror(done_ccb, 0, 0) == ERESTART) { out: /* Drop freeze taken due to CAM_DEV_QFREEZE */ cam_release_devq(path, 0, 0, 0, FALSE); return; } else if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { cam_release_devq(path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } } /* * Since our peripheral may be invalidated by an error * above or an external event, we must release our CCB * before releasing the reference on the peripheral. * The peripheral will only go away once the last reference * is removed, and we need it around for the CCB release * operation. */ cgd = (struct ccb_getdev *)done_ccb; xpt_setup_ccb(&cgd->ccb_h, path, CAM_PRIORITY_NORMAL); cgd->ccb_h.func_code = XPT_GDEV_TYPE; xpt_action((union ccb *)cgd); if (ADA_WC >= 0 && cgd->ident_data.support.command1 & ATA_SUPPORT_WRITECACHE) { softc->state = ADA_STATE_WCACHE; xpt_release_ccb(done_ccb); xpt_schedule(periph, CAM_PRIORITY_DEV); goto out; } softc->state = ADA_STATE_NORMAL; xpt_release_ccb(done_ccb); /* Drop freeze taken due to CAM_DEV_QFREEZE */ cam_release_devq(path, 0, 0, 0, FALSE); adaschedule(periph); cam_periph_release_locked(periph); return; } case ADA_CCB_WCACHE: { if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { if (adaerror(done_ccb, 0, 0) == ERESTART) { goto out; } else if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { cam_release_devq(path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } } softc->state = ADA_STATE_NORMAL; /* * Since our peripheral may be invalidated by an error * above or an external event, we must release our CCB * before releasing the reference on the peripheral. * The peripheral will only go away once the last reference * is removed, and we need it around for the CCB release * operation. */ xpt_release_ccb(done_ccb); /* Drop freeze taken due to CAM_DEV_QFREEZE */ cam_release_devq(path, 0, 0, 0, FALSE); adaschedule(periph); cam_periph_release_locked(periph); return; } case ADA_CCB_DUMP: /* No-op. We're polling */ return; default: break; } xpt_release_ccb(done_ccb); } static int adaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags) { + struct ada_softc *softc; + struct cam_periph *periph; + + periph = xpt_path_periph(ccb->ccb_h.path); + softc = (struct ada_softc *)periph->softc; + + switch (ccb->ccb_h.status & CAM_STATUS_MASK) { + case CAM_CMD_TIMEOUT: +#ifdef CAM_IO_STATS + softc->timeouts++; +#endif + break; + case CAM_REQ_ABORTED: + case CAM_REQ_CMP_ERR: + case CAM_REQ_TERMIO: + case CAM_UNREC_HBA_ERROR: + case CAM_DATA_RUN_ERR: + case CAM_ATA_STATUS_ERROR: +#ifdef CAM_IO_STATS + softc->errors++; +#endif + break; + default: + break; + } return(cam_periph_error(ccb, cam_flags, sense_flags, NULL)); } static void adagetparams(struct cam_periph *periph, struct ccb_getdev *cgd) { struct ada_softc *softc = (struct ada_softc *)periph->softc; struct disk_params *dp = &softc->params; u_int64_t lbasize48; u_int32_t lbasize; dp->secsize = ata_logical_sector_size(&cgd->ident_data); if ((cgd->ident_data.atavalid & ATA_FLAG_54_58) && cgd->ident_data.current_heads && cgd->ident_data.current_sectors) { dp->heads = cgd->ident_data.current_heads; dp->secs_per_track = cgd->ident_data.current_sectors; dp->cylinders = cgd->ident_data.cylinders; dp->sectors = (u_int32_t)cgd->ident_data.current_size_1 | ((u_int32_t)cgd->ident_data.current_size_2 << 16); } else { dp->heads = cgd->ident_data.heads; dp->secs_per_track = cgd->ident_data.sectors; dp->cylinders = cgd->ident_data.cylinders; dp->sectors = cgd->ident_data.cylinders * dp->heads * dp->secs_per_track; } lbasize = (u_int32_t)cgd->ident_data.lba_size_1 | ((u_int32_t)cgd->ident_data.lba_size_2 << 16); /* use the 28bit LBA size if valid or bigger than the CHS mapping */ if (cgd->ident_data.cylinders == 16383 || dp->sectors < lbasize) dp->sectors = lbasize; /* use the 48bit LBA size if valid */ lbasize48 = ((u_int64_t)cgd->ident_data.lba_size48_1) | ((u_int64_t)cgd->ident_data.lba_size48_2 << 16) | ((u_int64_t)cgd->ident_data.lba_size48_3 << 32) | ((u_int64_t)cgd->ident_data.lba_size48_4 << 48); if ((cgd->ident_data.support.command2 & ATA_SUPPORT_ADDRESS48) && lbasize48 > ATA_MAX_28BIT_LBA) dp->sectors = lbasize48; } static void adasendorderedtag(void *arg) { struct ada_softc *softc = arg; if (ada_send_ordered) { if (softc->outstanding_cmds > 0) { if ((softc->flags & ADA_FLAG_WAS_OTAG) == 0) softc->flags |= ADA_FLAG_NEED_OTAG; softc->flags &= ~ADA_FLAG_WAS_OTAG; } } /* Queue us up again */ callout_reset(&softc->sendordered_c, (ada_default_timeout * hz) / ADA_ORDEREDTAG_INTERVAL, adasendorderedtag, softc); } /* * Step through all ADA peripheral drivers, and if the device is still open, * sync the disk cache to physical media. */ static void adaflush(void) { struct cam_periph *periph; struct ada_softc *softc; union ccb *ccb; int error; CAM_PERIPH_FOREACH(periph, &adadriver) { softc = (struct ada_softc *)periph->softc; if (SCHEDULER_STOPPED()) { /* If we paniced with the lock held, do not recurse. */ if (!cam_periph_owned(periph) && (softc->flags & ADA_FLAG_OPEN)) { adadump(softc->disk, NULL, 0, 0, 0); } continue; } cam_periph_lock(periph); /* * We only sync the cache if the drive is still open, and * if the drive is capable of it.. */ if (((softc->flags & ADA_FLAG_OPEN) == 0) || (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) == 0) { cam_periph_unlock(periph); continue; } ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); cam_fill_ataio(&ccb->ataio, 0, adadone, CAM_DIR_NONE, 0, NULL, 0, ada_default_timeout*1000); if (softc->flags & ADA_FLAG_CAN_48BIT) ata_48bit_cmd(&ccb->ataio, ATA_FLUSHCACHE48, 0, 0, 0); else ata_28bit_cmd(&ccb->ataio, ATA_FLUSHCACHE, 0, 0, 0); error = cam_periph_runccb(ccb, adaerror, /*cam_flags*/0, /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); xpt_release_ccb(ccb); cam_periph_unlock(periph); } } static void adaspindown(uint8_t cmd, int flags) { struct cam_periph *periph; struct ada_softc *softc; union ccb *ccb; int error; CAM_PERIPH_FOREACH(periph, &adadriver) { /* If we paniced with lock held - not recurse here. */ if (cam_periph_owned(periph)) continue; cam_periph_lock(periph); softc = (struct ada_softc *)periph->softc; /* * We only spin-down the drive if it is capable of it.. */ if ((softc->flags & ADA_FLAG_CAN_POWERMGT) == 0) { cam_periph_unlock(periph); continue; } if (bootverbose) xpt_print(periph->path, "spin-down\n"); ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); cam_fill_ataio(&ccb->ataio, 0, adadone, CAM_DIR_NONE | flags, 0, NULL, 0, ada_default_timeout*1000); ata_28bit_cmd(&ccb->ataio, cmd, 0, 0, 0); error = cam_periph_runccb(ccb, adaerror, /*cam_flags*/0, /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Spin-down disk failed\n"); xpt_release_ccb(ccb); cam_periph_unlock(periph); } } static void adashutdown(void *arg, int howto) { adaflush(); if (ada_spindown_shutdown != 0 && (howto & (RB_HALT | RB_POWEROFF)) != 0) adaspindown(ATA_STANDBY_IMMEDIATE, 0); } static void adasuspend(void *arg) { adaflush(); if (ada_spindown_suspend != 0) adaspindown(ATA_SLEEP, CAM_DEV_QFREEZE); } static void adaresume(void *arg) { struct cam_periph *periph; struct ada_softc *softc; if (ada_spindown_suspend == 0) return; CAM_PERIPH_FOREACH(periph, &adadriver) { cam_periph_lock(periph); softc = (struct ada_softc *)periph->softc; /* * We only spin-down the drive if it is capable of it.. */ if ((softc->flags & ADA_FLAG_CAN_POWERMGT) == 0) { cam_periph_unlock(periph); continue; } if (bootverbose) xpt_print(periph->path, "resume\n"); /* * Drop freeze taken due to CAM_DEV_QFREEZE flag set on * sleep request. */ cam_release_devq(periph->path, /*relsim_flags*/0, /*openings*/0, /*timeout*/0, /*getcount_only*/0); cam_periph_unlock(periph); } } #endif /* _KERNEL */ Index: projects/iosched/sys/cam/cam_iosched.c =================================================================== --- projects/iosched/sys/cam/cam_iosched.c (revision 287875) +++ projects/iosched/sys/cam/cam_iosched.c (revision 287876) @@ -1,983 +1,1600 @@ /*- * CAM IO Scheduler Interface * * Copyright (c) 2015 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_cam.h" #include "opt_ddb.h" #define CAM_NETFLIX_IOSCHED 1 /* hack xxx */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_CAMSCHED, "CAM I/O Scheduler", "CAM I/O Scheduler buffers"); /* * Default I/O scheduler for FreeBSD. This implementation is just a thin-vineer * over the bioq_* interface, with notions of separate calls for normal I/O and * for trims. */ #ifdef CAM_NETFLIX_IOSCHED -#define IOP_MAX_SKIP 50 -#define IOP_MAX_TRAINING 500 -#define ALPHA_BITS 14 /* ~32k events or about the last minute */ SYSCTL_DECL(_kern_cam); static int do_netflix_iosched = 1; TUNABLE_INT("kern.cam.do_netflix_iosched", &do_netflix_iosched); SYSCTL_INT(_kern_cam, OID_AUTO, do_netflix_iosched, CTLFLAG_RD, &do_netflix_iosched, 1, "Enable Netflix I/O scheduler optimizations."); +static int alpha_bits = 9; +TUNABLE_INT("kern.cam.iosched_alpha_bits", &alpha_bits); +SYSCTL_INT(_kern_cam, OID_AUTO, iosched_alpha_bits, CTLFLAG_RW, + &alpha_bits, 1, + "Bits in EMA's alpha."); + + + +struct iop_stats; +struct cam_iosched_softc; + int iosched_debug = 0; +typedef enum { + none = 0, /* No limits */ + queue_depth, /* Limit how many ops we queue to SIM */ + iops, /* Limit # of IOPS to the drive */ + bandwidth, /* Limit bandwidth to the drive */ + limiter_max +} io_limiter; + +static const char *cam_iosched_limiter_names[] = + { "none", "queue_depth", "iops", "bandwidth" }; + +/* + * Called to initialize the bits of the iop_stats structure relevant to the + * limiter. Called just after the limiter is set. + */ +typedef int l_init_t(struct iop_stats *); + +/* + * Called every tick. + */ +typedef int l_tick_t(struct iop_stats *); + +/* + * Called to see if the limiter thinks this IOP can be allowed to + * proceed. If so, the limiter assumes that the while IOP proceeded + * and makes any accounting of it that's needed. + */ +typedef int l_iop_t(struct iop_stats *, struct bio *); + +/* + * Called when an I/O completes so the limiter can updates its + * accounting. Pending I/Os may complete in any order (even when + * sent to the hardware at the same time), so the limiter may not + * make any assumptions other than this I/O has completed. If it + * returns 1, then xpt_schedule() needs to be called again. + */ +typedef int l_iodone_t(struct iop_stats *, struct bio *); + +static l_iop_t cam_iosched_qd_iop; +static l_iop_t cam_iosched_qd_caniop; +static l_iodone_t cam_iosched_qd_iodone; + +static l_init_t cam_iosched_iops_init; +static l_tick_t cam_iosched_iops_tick; +static l_iop_t cam_iosched_iops_caniop; +static l_iop_t cam_iosched_iops_iop; + +static l_init_t cam_iosched_bw_init; +static l_tick_t cam_iosched_bw_tick; +static l_iop_t cam_iosched_bw_caniop; +static l_iop_t cam_iosched_bw_iop; + +struct limswitch +{ + l_init_t *l_init; + l_tick_t *l_tick; + l_iop_t *l_iop; + l_iop_t *l_caniop; + l_iodone_t *l_iodone; +} limsw[] = +{ + { /* none */ + .l_init = NULL, + .l_tick = NULL, + .l_iop = NULL, + .l_iodone= NULL, + }, + { /* queue_depth */ + .l_init = NULL, + .l_tick = NULL, + .l_caniop = cam_iosched_qd_caniop, + .l_iop = cam_iosched_qd_iop, + .l_iodone= cam_iosched_qd_iodone, + }, + { /* iops */ + .l_init = cam_iosched_iops_init, + .l_tick = cam_iosched_iops_tick, + .l_caniop = cam_iosched_iops_caniop, + .l_iop = cam_iosched_iops_iop, + .l_iodone= NULL, + }, + { /* bandwidth */ + .l_init = cam_iosched_bw_init, + .l_tick = cam_iosched_bw_tick, + .l_caniop = cam_iosched_bw_caniop, + .l_iop = cam_iosched_bw_iop, + .l_iodone= NULL, + }, +}; + struct iop_stats { - sbintime_t data[IOP_MAX_TRAINING]; /* Data for training period */ - sbintime_t worst; /* estimate of worst case latency */ - int outliers; /* Number of outlier latency I/Os */ - int skipping; /* Skipping I/Os when < IOP_MAX_SKIP */ - int training; /* Training when < IOP_MAX_TRAINING */ + /* + * sysctl state for this subnode. + */ + struct sysctl_ctx_list sysctl_ctx; + struct sysctl_oid *sysctl_tree; + + /* + * Information about the current rate limiters, if any + */ + io_limiter limiter; /* How are I/Os being limited */ + int min; /* Low range of limit */ + int max; /* High range of limit */ + int current; /* Current rate limiter */ + int l_value1; /* per-limiter scratch value 1. */ + int l_value2; /* per-limiter scratch value 2. */ + + + /* + * Debug information about counts of I/Os that have gone through the + * scheduler. + */ + int pending; /* I/Os pending in the hardware */ + int queued; /* number currently in the queue */ + int total; /* Total for all time -- wraps */ + int in; /* number queued all time -- wraps */ + int out; /* number completed all time -- wraps */ + + /* + * Statistics on different bits of the process. + */ /* Exp Moving Average, alpha = 1 / (1 << alpha_bits) */ sbintime_t ema; sbintime_t emss; /* Exp Moving sum of the squares */ sbintime_t sd; /* Last computed sd */ + + struct cam_iosched_softc *softc; }; + + +typedef enum { + set_max = 0, /* current = max */ + read_latency, /* Steer read latency by throttling writes */ + cl_max /* Keep last */ +} control_type; + +static const char *cam_iosched_control_type_names[] = + { "set_max", "read_latency" }; + +struct control_loop +{ + /* + * sysctl state for this subnode. + */ + struct sysctl_ctx_list sysctl_ctx; + struct sysctl_oid *sysctl_tree; + + sbintime_t next_steer; /* Time of next steer */ + sbintime_t steer_interval; /* How often do we steer? */ + sbintime_t lolat; + sbintime_t hilat; + int alpha; + control_type type; /* What type of control? */ + int last_count; /* Last I/O count */ + + struct cam_iosched_softc *softc; +}; + #endif struct cam_iosched_softc { struct bio_queue_head bio_queue; struct bio_queue_head trim_queue; /* scheduler flags < 16, user flags >= 16 */ uint32_t flags; int sort_io_queue; #ifdef CAM_NETFLIX_IOSCHED - /* Number of pending transactions */ - int pending_reads; - int pending_writes; - /* Have at least this many transactions in progress, if possible */ - int min_reads; - int min_writes; - /* Maximum number of each type of transaction in progress */ - int max_reads; - int max_writes; - - int trims; - int reads; - int writes; - int queued_reads; - int queued_writes; - int in_reads; - int in_writes; - int out_reads; - int out_writes; + int read_bias; /* Read bias setting */ + int current_read_bias; /* Current read bias state */ + int total_ticks; - int read_bias; - int current_read_bias; - struct bio_queue_head write_queue; struct iop_stats read_stats, write_stats, trim_stats; + struct sysctl_ctx_list sysctl_ctx; + struct sysctl_oid *sysctl_tree; + + int quanta; /* Number of quanta per second */ + struct callout ticker; /* Callout for our quota system */ + struct cam_periph *periph; /* cam periph associated with this device */ + uint32_t this_frac; /* Fraction of a second (1024ths) for this tick */ + sbintime_t last_time; /* Last time we ticked */ + struct control_loop cl; #endif }; +#ifdef CAM_NETFLIX_IOSCHED +/* + * helper functions to call the limsw functions. + */ +static int +cam_iosched_limiter_init(struct iop_stats *ios) +{ + int lim = ios->limiter; + + /* maybe this should be a kassert */ + if (lim < none || lim >= limiter_max) + return EINVAL; + + if (limsw[lim].l_init) + return limsw[lim].l_init(ios); + + return 0; +} + +static int +cam_iosched_limiter_tick(struct iop_stats *ios) +{ + int lim = ios->limiter; + + /* maybe this should be a kassert */ + if (lim < none || lim >= limiter_max) + return EINVAL; + + if (limsw[lim].l_tick) + return limsw[lim].l_tick(ios); + + return 0; +} + +static int +cam_iosched_limiter_iop(struct iop_stats *ios, struct bio *bp) +{ + int lim = ios->limiter; + + /* maybe this should be a kassert */ + if (lim < none || lim >= limiter_max) + return EINVAL; + + if (limsw[lim].l_iop) + return limsw[lim].l_iop(ios, bp); + + return 0; +} + +static int +cam_iosched_limiter_caniop(struct iop_stats *ios, struct bio *bp) +{ + int lim = ios->limiter; + + /* maybe this should be a kassert */ + if (lim < none || lim >= limiter_max) + return EINVAL; + + if (limsw[lim].l_caniop) + return limsw[lim].l_caniop(ios, bp); + + return 0; +} + +static int +cam_iosched_limiter_iodone(struct iop_stats *ios, struct bio *bp) +{ + int lim = ios->limiter; + + /* maybe this should be a kassert */ + if (lim < none || lim >= limiter_max) + return 0; + + if (limsw[lim].l_iodone) + return limsw[lim].l_iodone(ios, bp); + + return 0; +} + +/* + * Functions to implement the different kinds of limiters + */ + +static int +cam_iosched_qd_iop(struct iop_stats *ios, struct bio *bp) +{ + + if (ios->current <= 0 || ios->pending < ios->current) + return 0; + + return EAGAIN; +} + +static int +cam_iosched_qd_caniop(struct iop_stats *ios, struct bio *bp) +{ + + if (ios->current <= 0 || ios->pending < ios->current) + return 0; + + return EAGAIN; +} + +static int +cam_iosched_qd_iodone(struct iop_stats *ios, struct bio *bp) +{ + + if (ios->current <= 0 || ios->pending != ios->current) + return 0; + + return 1; +} + +static int +cam_iosched_iops_init(struct iop_stats *ios) +{ + + ios->l_value1 = ios->current / ios->softc->quanta; + if (ios->l_value1 <= 0) + ios->l_value1 = 1; + + return 0; +} + +static int +cam_iosched_iops_tick(struct iop_stats *ios) +{ + + ios->l_value1 = (int)((ios->current * (uint64_t)ios->softc->this_frac) >> 16); + if (ios->l_value1 <= 0) + ios->l_value1 = 1; + + return 0; +} + +static int +cam_iosched_iops_caniop(struct iop_stats *ios, struct bio *bp) +{ + + /* + * So if we have any more IOPs left, allow it, + * otherwise wait. + */ + if (ios->l_value1 <= 0) + return EAGAIN; + return 0; +} + +static int +cam_iosched_iops_iop(struct iop_stats *ios, struct bio *bp) +{ + int rv; + + rv = cam_iosched_limiter_caniop(ios, bp); + if (rv == 0) + ios->l_value1--; + + return rv; +} + +static int +cam_iosched_bw_init(struct iop_stats *ios) +{ + + /* ios->current is in kB/s, so scale to bytes */ + ios->l_value1 = ios->current * 1000 / ios->softc->quanta; + + return 0; +} + +static int +cam_iosched_bw_tick(struct iop_stats *ios) +{ + int bw; + + /* + * If we're in the hole for available quota from + * the last time, then add the quantum for this. + * If we have any left over from last quantum, + * then too bad, that's lost. Also, ios->current + * is in kB/s, so scale. + * + * We also allow up to 4 quanta of credits to + * accumulate to deal with burstiness. 4 is extremely + * arbitrary. + */ + bw = (int)((ios->current * 1000ull * (uint64_t)ios->softc->this_frac) >> 16); + if (ios->l_value1 < bw * 4) + ios->l_value1 += bw; + + return 0; +} + +static int +cam_iosched_bw_caniop(struct iop_stats *ios, struct bio *bp) +{ + /* + * So if we have any more bw quota left, allow it, + * otherwise wait. Not, we'll go negative and that's + * OK. We'll just get a lettle less next quota. + * + * Note on going negative: that allows us to process + * requests in order better, since we won't allow + * shorter reads to get around the long one that we + * don't have the quota to do just yet. It also prevents + * starvation by being a little more permissive about + * what we let through this quantum (to prevent the + * starvation), at the cost of getting a little less + * next quantum. + */ + if (ios->l_value1 <= 0) + return EAGAIN; + + + return 0; +} + +static int +cam_iosched_bw_iop(struct iop_stats *ios, struct bio *bp) +{ + int rv; + + rv = cam_iosched_limiter_caniop(ios, bp); + if (rv == 0) + ios->l_value1 -= bp->bio_length; + + return rv; +} + +static void cam_iosched_cl_maybe_steer(struct control_loop *clp); + +static void +cam_iosched_ticker(void *arg) +{ + struct cam_iosched_softc *isc = arg; + sbintime_t now, delta; + + callout_reset(&isc->ticker, hz / isc->quanta - 1, cam_iosched_ticker, isc); + + now = sbinuptime(); + delta = now - isc->last_time; + isc->this_frac = (uint32_t)delta >> 16; /* Note: discards seconds -- should be 0 harmless if not */ + isc->last_time = now; + + cam_iosched_cl_maybe_steer(&isc->cl); + + cam_iosched_limiter_tick(&isc->read_stats); + cam_iosched_limiter_tick(&isc->write_stats); + cam_iosched_limiter_tick(&isc->trim_stats); + + cam_iosched_schedule(isc, isc->periph); + + isc->total_ticks++; +} + + +static void +cam_iosched_cl_init(struct control_loop *clp, struct cam_iosched_softc *isc) +{ + + clp->next_steer = sbinuptime(); + clp->softc = isc; + clp->steer_interval = SBT_1S * 5; /* Let's start out steering every 5s */ + clp->lolat = 5 * SBT_1MS; + clp->hilat = 15 * SBT_1MS; + clp->alpha = 20; /* Alpha == gain. 20 = .2 */ + clp->type = set_max; +} + +static void +cam_iosched_cl_maybe_steer(struct control_loop *clp) +{ + struct cam_iosched_softc *isc; + sbintime_t now, lat; + int old; + + isc = clp->softc; + now = isc->last_time; + if (now < clp->next_steer) + return; + + clp->next_steer = now + clp->steer_interval; + switch (clp->type) { + case set_max: + if (isc->write_stats.current != isc->write_stats.max) + printf("Steering write from %d kBps to %d kBps\n", + isc->write_stats.current, isc->write_stats.max); + isc->read_stats.current = isc->read_stats.max; + isc->write_stats.current = isc->write_stats.max; + isc->trim_stats.current = isc->trim_stats.max; + break; + case read_latency: + old = isc->write_stats.current; + lat = isc->read_stats.ema; + /* + * Simple PLL-like engine. Since we're steering to a range for + * the SP (set point) that makes things a little more + * complicated. In addition, we're not directly controlling our + * PV (process variable), the read latency, but instead are + * manipulating the write bandwidth limit for our MV + * (manipulation variable), analysis of this code gets a bit + * messy. Also, the MV is a very noisy control surface for read + * latency since it is affected by many hidden processes inside + * the device which change how responsive read latency will be + * in reaction to changes in write bandwidth. Unlike the classic + * boiler control PLL. this may result in over-steering while + * the SSD takes its time to react to the new, lower load. This + * is why we use a relatively low alpha of between .1 and .25 to + * compensate for this effect. At .1, it takes ~22 steering + * intervals to back off by a factor of 10. At .2 it only takes + * ~10. At .25 it only takes ~8. However some preliminary data + * from the SSD drives suggests a reasponse time in 10's of + * seconds before latency drops regardless of the new write + * rate. Careful observation will be reqiured to tune this + * effectively. + * + * Also, when there's no read traffic, we jack up the write + * limit too regardless of the last read latency. 10 is + * somewhat arbitrary. + */ + if (lat < clp->lolat || isc->read_stats.total - clp->last_count < 10) + isc->write_stats.current = isc->write_stats.current * + (100 + clp->alpha) / 100; /* Scale up */ + else if (lat > clp->hilat) + isc->write_stats.current = isc->write_stats.current * + (100 - clp->alpha) / 100; /* Scale down */ + clp->last_count = isc->read_stats.total; + + /* + * Even if we don't steer, per se, enforce the min/max limits as + * those may have changed. + */ + if (isc->write_stats.current < isc->write_stats.min) + isc->write_stats.current = isc->write_stats.min; + if (isc->write_stats.current > isc->write_stats.max) + isc->write_stats.current = isc->write_stats.max; + if (old != isc->write_stats.current) + printf("Steering write from %d kBps to %d kBps due to latency of %ldus\n", + old, isc->write_stats.current, + ((uint64_t)1000000 * (uint32_t)lat) >> 32); + break; + case cl_max: + break; + } +} +#endif + /* Trim or similar currently pending completion */ -#define CAM_IOSCHED_FLAG_TRIM_ACTIVE 1 +#define CAM_IOSCHED_FLAG_TRIM_ACTIVE (1ul << 0) + /* Callout active, and needs to be torn down */ +#define CAM_IOSCHED_FLAG_CALLOUT_ACTIVE (1ul << 1) /* Periph drivers set these flags to indicate work */ #define CAM_IOSCHED_FLAG_WORK_FLAGS ((0xffffu) << 16) static void cam_iosched_io_metric_update(struct cam_iosched_softc *isc, sbintime_t sim_latency, int cmd, size_t size); static inline int cam_iosched_has_flagged_work(struct cam_iosched_softc *isc) { return !!(isc->flags & CAM_IOSCHED_FLAG_WORK_FLAGS); } static inline int cam_iosched_has_io(struct cam_iosched_softc *isc) { #ifdef CAM_NETFLIX_IOSCHED if (do_netflix_iosched) { - int can_write = bioq_first(&isc->write_queue) != NULL && - (isc->max_writes <= 0 || - isc->pending_writes < isc->max_writes); - int can_read = bioq_first(&isc->bio_queue) != NULL && - (isc->max_reads <= 0 || - isc->pending_reads < isc->max_reads); + struct bio *rbp = bioq_first(&isc->bio_queue); + struct bio *wbp = bioq_first(&isc->write_queue); + int can_write = wbp != NULL && + cam_iosched_limiter_caniop(&isc->write_stats, wbp) == 0; + int can_read = rbp != NULL && + cam_iosched_limiter_caniop(&isc->read_stats, rbp) == 0; if (iosched_debug > 2) { - printf("can write %d: pending_writes %d max_writes %d\n", can_write, isc->pending_writes, isc->max_writes); - printf("can read %d: pending_reads %d max_reads %d\n", can_read, isc->pending_reads, isc->max_reads); - printf("Queued reads %d writes %d\n", isc->queued_reads, isc->queued_writes); + printf("can write %d: pending_writes %d max_writes %d\n", can_write, isc->write_stats.pending, isc->write_stats.max); + printf("can read %d: read_stats.pending %d max_reads %d\n", can_read, isc->read_stats.pending, isc->read_stats.max); + printf("Queued reads %d writes %d\n", isc->read_stats.queued, isc->write_stats.queued); } return can_read || can_write; } #endif return bioq_first(&isc->bio_queue) != NULL; } static inline int cam_iosched_has_more_trim(struct cam_iosched_softc *isc) { return !(isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) && bioq_first(&isc->trim_queue); } #define cam_iosched_sort_queue(isc) ((isc)->sort_io_queue >= 0 ? \ (isc)->sort_io_queue : cam_sort_io_queues) static inline int cam_iosched_has_work(struct cam_iosched_softc *isc) { +#ifdef CAM_NETFLIX_IOSCHED if (iosched_debug > 2) printf("has work: %d %d %d\n", cam_iosched_has_io(isc), cam_iosched_has_more_trim(isc), cam_iosched_has_flagged_work(isc)); +#endif return cam_iosched_has_io(isc) || cam_iosched_has_more_trim(isc) || cam_iosched_has_flagged_work(isc); } +#ifdef CAM_NETFLIX_IOSCHED static void -iop_stats_init(struct iop_stats *ios) +cam_iosched_iop_stats_init(struct cam_iosched_softc *isc, struct iop_stats *ios) { + + ios->limiter = none; + cam_iosched_limiter_init(ios); + ios->in = 0; + ios->max = 300000; + ios->min = 1; + ios->out = 0; + ios->pending = 0; + ios->queued = 0; + ios->total = 0; ios->ema = 0; ios->emss = 0; ios->sd = 0; - ios->outliers = 0; - ios->skipping = 0; - ios->training = 0; - ios->outliers = 0; + ios->softc = isc; } +static int +cam_iosched_limiter_sysctl(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + struct iop_stats *ios; + struct cam_iosched_softc *isc; + int value, i, error, cantick; + const char *p; + + ios = arg1; + isc = ios->softc; + value = ios->limiter; + if (value < none || value >= limiter_max) + p = "UNKNOWN"; + else + p = cam_iosched_limiter_names[value]; + + strlcpy(buf, p, sizeof(buf)); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return error; + + cam_periph_lock(isc->periph); + + for (i = none; i < limiter_max; i++) { + if (strcmp(buf, cam_iosched_limiter_names[i]) != 0) + continue; + ios->limiter = i; + error = cam_iosched_limiter_init(ios); + if (error != 0) { + ios->limiter = value; + cam_periph_unlock(isc->periph); + return error; + } + cantick = !!limsw[isc->read_stats.limiter].l_tick + + !!limsw[isc->write_stats.limiter].l_tick + + !!limsw[isc->trim_stats.limiter].l_tick + + 1; /* Control loop requires it */ + if (isc->flags & CAM_IOSCHED_FLAG_CALLOUT_ACTIVE) { + if (cantick == 0) { + callout_stop(&isc->ticker); + isc->flags &= ~CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; + } + } else { + if (cantick != 0) { + callout_reset(&isc->ticker, hz / isc->quanta - 1, cam_iosched_ticker, isc); + isc->flags |= CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; + } + } + + cam_periph_unlock(isc->periph); + return 0; + } + + cam_periph_unlock(isc->periph); + return EINVAL; +} + +static int +cam_iosched_control_type_sysctl(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + struct control_loop *clp; + struct cam_iosched_softc *isc; + int value, i, error; + const char *p; + + clp = arg1; + isc = clp->softc; + value = clp->type; + if (value < none || value >= cl_max) + p = "UNKNOWN"; + else + p = cam_iosched_control_type_names[value]; + + strlcpy(buf, p, sizeof(buf)); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return error; + + for (i = set_max; i < cl_max; i++) { + if (strcmp(buf, cam_iosched_control_type_names[i]) != 0) + continue; + cam_periph_lock(isc->periph); + clp->type = i; + cam_periph_unlock(isc->periph); + return 0; + } + + return EINVAL; +} + +static int +cam_iosched_sbintime_sysctl(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + sbintime_t value; + int error; + uint64_t us; + + value = *(sbintime_t *)arg1; + us = (uint64_t)value / SBT_1US; + snprintf(buf, sizeof(buf), "%ju", (intmax_t)us); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return error; + us = strtoul(buf, NULL, 10); + if (us == 0) + return EINVAL; + *(sbintime_t *)arg1 = us * SBT_1US; + return 0; +} + +static void +cam_iosched_iop_stats_sysctl_init(struct cam_iosched_softc *isc, struct iop_stats *ios, char *name) +{ + struct sysctl_oid_list *n; + struct sysctl_ctx_list *ctx; + + ios->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx, + SYSCTL_CHILDREN(isc->sysctl_tree), OID_AUTO, name, + CTLFLAG_RD, 0, name); + n = SYSCTL_CHILDREN(ios->sysctl_tree); + ctx = &ios->sysctl_ctx; + + SYSCTL_ADD_UQUAD(ctx, n, + OID_AUTO, "ema", CTLFLAG_RD, + &ios->ema, + "Fast Exponentially Weighted Moving Average"); + SYSCTL_ADD_UQUAD(ctx, n, + OID_AUTO, "emss", CTLFLAG_RD, + &ios->emss, + "Fast Exponentially Weighted Moving Sum of Squares (maybe wrong)"); + SYSCTL_ADD_UQUAD(ctx, n, + OID_AUTO, "sd", CTLFLAG_RD, + &ios->sd, + "Estimated SD for fast ema (may be wrong)"); + + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "pending", CTLFLAG_RD, + &ios->pending, 0, + "Instantaneous # of pending transactions"); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "count", CTLFLAG_RD, + &ios->total, 0, + "# of transactions submitted to hardware"); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "queued", CTLFLAG_RD, + &ios->queued, 0, + "# of transactions in the queue"); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "in", CTLFLAG_RD, + &ios->in, 0, + "# of transactions queued to driver"); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "out", CTLFLAG_RD, + &ios->out, 0, + "# of transactions completed"); + + SYSCTL_ADD_PROC(ctx, n, + OID_AUTO, "limiter", CTLTYPE_STRING | CTLFLAG_RW, + ios, 0, cam_iosched_limiter_sysctl, "A", + "Current limiting type."); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "min", CTLFLAG_RW, + &ios->min, 0, + "min resource"); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "max", CTLFLAG_RW, + &ios->max, 0, + "max resource"); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "current", CTLFLAG_RW, + &ios->current, 0, + "current resource"); + +} + +static void +cam_iosched_iop_stats_fini(struct iop_stats *ios) +{ + if (ios->sysctl_tree) + if (sysctl_ctx_free(&ios->sysctl_ctx) != 0) + printf("can't remove iosched sysctl stats context\n"); +} + +static void +cam_iosched_cl_sysctl_init(struct cam_iosched_softc *isc) +{ + struct sysctl_oid_list *n; + struct sysctl_ctx_list *ctx; + struct control_loop *clp; + + clp = &isc->cl; + clp->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx, + SYSCTL_CHILDREN(isc->sysctl_tree), OID_AUTO, "control", + CTLFLAG_RD, 0, "Control loop info"); + n = SYSCTL_CHILDREN(clp->sysctl_tree); + ctx = &clp->sysctl_ctx; + + SYSCTL_ADD_PROC(ctx, n, + OID_AUTO, "type", CTLTYPE_STRING | CTLFLAG_RW, + clp, 0, cam_iosched_control_type_sysctl, "A", + "Control loop algorithm"); + SYSCTL_ADD_PROC(ctx, n, + OID_AUTO, "steer_interval", CTLTYPE_STRING | CTLFLAG_RW, + &clp->steer_interval, 0, cam_iosched_sbintime_sysctl, "A", + "How often to steer (in us)"); + SYSCTL_ADD_PROC(ctx, n, + OID_AUTO, "lolat", CTLTYPE_STRING | CTLFLAG_RW, + &clp->lolat, 0, cam_iosched_sbintime_sysctl, "A", + "Low water mark for Latency (in us)"); + SYSCTL_ADD_PROC(ctx, n, + OID_AUTO, "hilat", CTLTYPE_STRING | CTLFLAG_RW, + &clp->hilat, 0, cam_iosched_sbintime_sysctl, "A", + "Hi water mark for Latency (in us)"); + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "alpha", CTLFLAG_RW, + &clp->alpha, 0, + "Alpha for PLL (x100) aka gain"); +} + +static void +cam_iosched_cl_sysctl_fini(struct control_loop *clp) +{ + if (clp->sysctl_tree) + if (sysctl_ctx_free(&clp->sysctl_ctx) != 0) + printf("can't remove iosched sysctl control loop context\n"); +} +#endif + /* * Allocate the iosched structure. This also insulates callers from knowing * sizeof struct cam_iosched_softc. */ int -cam_iosched_init(struct cam_iosched_softc **iscp) +cam_iosched_init(struct cam_iosched_softc **iscp, struct cam_periph *periph) { *iscp = malloc(sizeof(**iscp), M_CAMSCHED, M_NOWAIT | M_ZERO); if (*iscp == NULL) return ENOMEM; +#ifdef CAM_NETFLIX_IOSCHED if (iosched_debug) printf("CAM IOSCHEDULER Allocating entry at %p\n", *iscp); +#endif (*iscp)->sort_io_queue = -1; bioq_init(&(*iscp)->bio_queue); bioq_init(&(*iscp)->trim_queue); #ifdef CAM_NETFLIX_IOSCHED if (do_netflix_iosched) { bioq_init(&(*iscp)->write_queue); - (*iscp)->pending_reads = 0; - (*iscp)->pending_writes = 0; - (*iscp)->min_reads = 1; - (*iscp)->min_writes = 1; - (*iscp)->max_reads = 0; - (*iscp)->max_writes = 0; - (*iscp)->trims = 0; - (*iscp)->reads = 0; - (*iscp)->writes = 0; - (*iscp)->queued_reads = 0; - (*iscp)->queued_writes = 0; - (*iscp)->out_reads = 0; - (*iscp)->out_writes = 0; - (*iscp)->in_reads = 0; - (*iscp)->in_writes = 0; (*iscp)->read_bias = 100; (*iscp)->current_read_bias = 100; - iop_stats_init(&(*iscp)->read_stats); - iop_stats_init(&(*iscp)->write_stats); - iop_stats_init(&(*iscp)->trim_stats); + (*iscp)->quanta = 200; + cam_iosched_iop_stats_init(*iscp, &(*iscp)->read_stats); + cam_iosched_iop_stats_init(*iscp, &(*iscp)->write_stats); + cam_iosched_iop_stats_init(*iscp, &(*iscp)->trim_stats); + (*iscp)->trim_stats.max = 1; /* Trims are special: one at a time for now */ + (*iscp)->last_time = sbinuptime(); + callout_init_mtx(&(*iscp)->ticker, cam_periph_mtx(periph), 0); + (*iscp)->periph = periph; + cam_iosched_cl_init(&(*iscp)->cl, *iscp); + callout_reset(&(*iscp)->ticker, hz / (*iscp)->quanta - 1, cam_iosched_ticker, *iscp); + (*iscp)->flags |= CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; } #endif return 0; } /* * Reclaim all used resources. This assumes that other folks have * drained the requests in the hardware. Maybe an unwise assumption. */ void cam_iosched_fini(struct cam_iosched_softc *isc) { - cam_iosched_flush(isc, NULL, ENXIO); - free(isc, M_CAMSCHED); + if (isc) { + cam_iosched_flush(isc, NULL, ENXIO); +#ifdef CAM_NETFLIX_IOSCHED + cam_iosched_iop_stats_fini(&isc->read_stats); + cam_iosched_iop_stats_fini(&isc->write_stats); + cam_iosched_iop_stats_fini(&isc->trim_stats); + cam_iosched_cl_sysctl_fini(&isc->cl); + if (isc->sysctl_tree) + if (sysctl_ctx_free(&isc->sysctl_ctx) != 0) + printf("can't remove iosched sysctl stats context\n"); + if (isc->flags & CAM_IOSCHED_FLAG_CALLOUT_ACTIVE) { + callout_drain(&isc->ticker); + isc->flags &= ~ CAM_IOSCHED_FLAG_CALLOUT_ACTIVE; + } + +#endif + free(isc, M_CAMSCHED); + } } /* * After we're sure we're attaching a device, go ahead and add * hooks for any sysctl we may wish to honor. */ void cam_iosched_sysctl_init(struct cam_iosched_softc *isc, struct sysctl_ctx_list *ctx, struct sysctl_oid *node) { +#ifdef CAM_NETFLIX_IOSCHED + struct sysctl_oid_list *n; +#endif + SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "sort_io_queue", CTLFLAG_RW | CTLFLAG_MPSAFE, &isc->sort_io_queue, 0, "Sort IO queue to try and optimise disk access patterns"); + #ifdef CAM_NETFLIX_IOSCHED if (!do_netflix_iosched) return; - /* - * Read stats - */ - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "read_ema", CTLFLAG_RD, - &isc->read_stats.ema, - "Fast Exponentially Weighted Moving Average for reads"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "read_emss", CTLFLAG_RD, - &isc->read_stats.emss, - "Fast Exponentially Weighted Moving Sum of Squares for reads"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "read_sd", CTLFLAG_RD, - &isc->read_stats.sd, - "Estimated SD for fast read ema"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "read_bad_latency", CTLFLAG_RD, - &isc->read_stats.worst, - "read bad latency threshold"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "read_outliers", CTLFLAG_RD, - &isc->read_stats.outliers, 0, - "read latency outliers"); + isc->sysctl_tree = SYSCTL_ADD_NODE(&isc->sysctl_ctx, + SYSCTL_CHILDREN(node), OID_AUTO, "iosched", + CTLFLAG_RD, 0, "I/O scheduler statistics"); + n = SYSCTL_CHILDREN(isc->sysctl_tree); + ctx = &isc->sysctl_ctx; - /* - * Write stats - */ - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "write_ema", CTLFLAG_RD, - &isc->write_stats.ema, - "Fast Exponentially Weighted Moving Average for writes"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "write_emss", CTLFLAG_RD, - &isc->write_stats.emss, - "Fast Exponentially Weighted Moving Sum of Squares for writes"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "write_sd", CTLFLAG_RD, - &isc->write_stats.sd, - "Estimated SD for fast write ema"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "write_bad_latency", CTLFLAG_RD, - &isc->write_stats.worst, - "write bad latency threshold"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "write_outliers", CTLFLAG_RD, - &isc->write_stats.outliers, 0, - "write latency outliers"); + cam_iosched_iop_stats_sysctl_init(isc, &isc->read_stats, "read"); + cam_iosched_iop_stats_sysctl_init(isc, &isc->write_stats, "write"); + cam_iosched_iop_stats_sysctl_init(isc, &isc->trim_stats, "trim"); + cam_iosched_cl_sysctl_init(isc); - /* - * Trim stats - */ - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "trim_ema", CTLFLAG_RD, - &isc->trim_stats.ema, - "Fast Exponentially Weighted Moving Average for trims"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "trim_emss", CTLFLAG_RD, - &isc->trim_stats.emss, - "Fast Exponentially Weighted Moving Sum of Squares for trims"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "trim_sd", CTLFLAG_RD, - &isc->trim_stats.sd, - "Estimated SD for fast trim ema"); - SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "trim_bad_latency", CTLFLAG_RD, - &isc->trim_stats.worst, - "trim bad latency threshold"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "trim_outliers", CTLFLAG_RD, - &isc->trim_stats.outliers, 0, - "trim latency outliers"); - - /* - * Other misc knobs. - */ - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "pending_reads", CTLFLAG_RD, - &isc->pending_reads, 0, - "Instantaneous # of pending reads"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "pending_writes", CTLFLAG_RD, - &isc->pending_writes, 0, - "Instantaneous # of pending writes"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "trims", CTLFLAG_RD, - &isc->trims, 0, - "# of trims"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "writes", CTLFLAG_RD, - &isc->writes, 0, - "# of writes submitted to hardware"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "reads", CTLFLAG_RD, - &isc->reads, 0, - "# of reads submitted to hardware"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "queued_writes", CTLFLAG_RD, - &isc->queued_writes, 0, - "# of writes in the queue"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "queued_reads", CTLFLAG_RD, - &isc->queued_reads, 0, - "# of reads in the queue"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "in_writes", CTLFLAG_RD, - &isc->in_writes, 0, - "# of writes queued"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "in_reads", CTLFLAG_RD, - &isc->in_reads, 0, - "# of reads queued"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "out_writes", CTLFLAG_RD, - &isc->out_writes, 0, - "# of writes completed"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "out_reads", CTLFLAG_RD, - &isc->in_reads, 0, - "# of reads completed"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), + SYSCTL_ADD_INT(ctx, n, OID_AUTO, "read_bias", CTLFLAG_RW, &isc->read_bias, 100, - "How biased towards read should we be"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "min_reads", CTLFLAG_RW, - &isc->min_reads, 0, - "min reads reserved in the queue"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "min_writes", CTLFLAG_RW, - &isc->min_writes, 0, - "min writes reserved in the queue"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "max_reads", CTLFLAG_RW, - &isc->max_reads, 0, - "max reads reserved in the queue"); - SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(node), - OID_AUTO, "max_writes", CTLFLAG_RW, - &isc->max_writes, 0, - "max writes reserved in the queue"); + "How biased towards read should we be independent of limits"); + + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "quanta", CTLFLAG_RW, + &isc->quanta, 200, + "How many quanta per second do we slice the I/O up into"); + + SYSCTL_ADD_INT(ctx, n, + OID_AUTO, "total_ticks", CTLFLAG_RD, + &isc->total_ticks, 0, + "Total number of ticks we've done"); #endif } /* * Flush outstanding I/O. Consumers of this library don't know all the * queues we may keep, so this allows all I/O to be flushed in one * convenient call. */ void cam_iosched_flush(struct cam_iosched_softc *isc, struct devstat *stp, int err) { bioq_flush(&isc->bio_queue, stp, err); bioq_flush(&isc->trim_queue, stp, err); #ifdef CAM_NETFLIX_IOSCHED if (do_netflix_iosched) bioq_flush(&isc->write_queue, stp, err); #endif } #ifdef CAM_NETFLIX_IOSCHED static struct bio * cam_iosched_get_write(struct cam_iosched_softc *isc) { struct bio *bp; /* * We control the write rate by controlling how many requests we send * down to the drive at any one time. Fewer requests limits the * effects of both starvation when the requests take a while and write * amplification when each request is causing more than one write to * the NAND media. Limiting the queue depth like this will also limit * the write throughput and give and reads that want to compete to * compete unfairly. */ - if (isc->max_writes > 0 && isc->pending_writes >= isc->max_writes) { - if (iosched_debug) - printf("Can't write because pending_writes %d and max_writes %d\n", - isc->pending_writes, isc->max_writes); - return NULL; - } bp = bioq_first(&isc->write_queue); if (bp == NULL) { if (iosched_debug > 3) printf("No writes present in write_queue\n"); return NULL; } + + /* + * If pending read, prefer that based on current read bias + * setting. + */ if (bioq_first(&isc->bio_queue) && isc->current_read_bias) { if (iosched_debug) - printf("Reads present and current_read_bias is %d queued writes %d queued reads %d\n", isc->current_read_bias, isc->queued_writes, isc->queued_reads); + printf("Reads present and current_read_bias is %d queued writes %d queued reads %d\n", isc->current_read_bias, isc->write_stats.queued, isc->read_stats.queued); isc->current_read_bias--; return NULL; } + + /* + * See if our current limiter allows this I/O. + */ + if (cam_iosched_limiter_iop(&isc->write_stats, bp) != 0) { + if (iosched_debug) + printf("Can't write because limiter says no.\n"); + return NULL; + } + + /* + * Let's do this: We've passed all the gates and we're a go + * to schedule the I/O in the SIM. + */ isc->current_read_bias = isc->read_bias; bioq_remove(&isc->write_queue, bp); if (bp->bio_cmd == BIO_WRITE) { - isc->queued_writes--; - isc->writes++; - isc->pending_writes++; + isc->write_stats.queued--; + isc->write_stats.total++; + isc->write_stats.pending++; } if (iosched_debug > 9) printf("HWQ : %p %#x\n", bp, bp->bio_cmd); return bp; } #endif /* * Put back a trim that you weren't able to actually schedule this time. */ void cam_iosched_put_back_trim(struct cam_iosched_softc *isc, struct bio *bp) { bioq_insert_head(&isc->trim_queue, bp); - isc->trims--; +#ifdef CAM_NETFLIX_IOSCHED + isc->trim_stats.queued++; + isc->trim_stats.total--; /* since we put it back, don't double count */ + isc->trim_stats.pending--; +#endif } /* * gets the next trim from the trim queue. * * Assumes we're called with the periph lock held. It removes this * trim from the queue and the device must explicitly reinstert it * should the need arise. */ struct bio * cam_iosched_next_trim(struct cam_iosched_softc *isc) { struct bio *bp; bp = bioq_first(&isc->trim_queue); if (bp == NULL) return NULL; bioq_remove(&isc->trim_queue, bp); - isc->trims++; +#ifdef CAM_NETFLIX_IOSCHED + isc->trim_stats.queued--; + isc->trim_stats.total++; + isc->trim_stats.pending++; +#endif return bp; } /* * gets the an available trim from the trim queue, if there's no trim * already pending. It removes this trim from the queue and the device * must explicitly reinstert it should the need arise. * * Assumes we're called with the periph lock held. */ struct bio * cam_iosched_get_trim(struct cam_iosched_softc *isc) { if (!cam_iosched_has_more_trim(isc)) return NULL; return cam_iosched_next_trim(isc); } /* * Determine what the next bit of work to do is for the periph. The * default implementation looks to see if we have trims to do, but no * trims outstanding. If so, we do that. Otherwise we see if we have * other work. If we do, then we do that. Otherwise why were we called? */ struct bio * cam_iosched_next_bio(struct cam_iosched_softc *isc) { struct bio *bp; /* * See if we have a trim that can be scheduled. We can only send one * at a time down, so this takes that into account. * * XXX newer TRIM commands are queueable. Revisit this when we * implement them. */ if ((bp = cam_iosched_get_trim(isc)) != NULL) return bp; #ifdef CAM_NETFLIX_IOSCHED /* * See if we have any pending writes, and room in the queue for them, * and if so, those are next. */ if (do_netflix_iosched) { if ((bp = cam_iosched_get_write(isc)) != NULL) return bp; } #endif /* * next, see if there's other, normal I/O waiting. If so return that. */ + if ((bp = bioq_first(&isc->bio_queue)) == NULL) + return NULL; + #ifdef CAM_NETFLIX_IOSCHED /* * For the netflix scheduler, bio_queue is only for reads, so enforce - * the limits here. + * the limits here. Enforce only for reads. */ if (do_netflix_iosched) { - if (isc->max_reads > 0 && isc->pending_reads >= isc->max_reads) + if (bp->bio_cmd == BIO_READ && + cam_iosched_limiter_iop(&isc->read_stats, bp) != 0) return NULL; } #endif - if ((bp = bioq_first(&isc->bio_queue)) != NULL) { - bioq_remove(&isc->bio_queue, bp); + bioq_remove(&isc->bio_queue, bp); #ifdef CAM_NETFLIX_IOSCHED - if (do_netflix_iosched) { - if (bp->bio_cmd == BIO_READ) { - isc->queued_reads--; - isc->reads++; - isc->pending_reads++; - } else - printf("Found bio_cmd = %#x\n", bp->bio_cmd); - } -#endif - if (iosched_debug > 9) - printf("HWQ : %p %#x\n", bp, bp->bio_cmd); - return bp; + if (do_netflix_iosched) { + if (bp->bio_cmd == BIO_READ) { + isc->read_stats.queued--; + isc->read_stats.total++; + isc->read_stats.pending++; + } else + printf("Found bio_cmd = %#x\n", bp->bio_cmd); } - - /* - * Otherwise, we got nada, so return that - */ - return NULL; + if (iosched_debug > 9) + printf("HWQ : %p %#x\n", bp, bp->bio_cmd); +#endif + return bp; } /* * Driver has been given some work to do by the block layer. Tell the * scheduler about it and have it queue the work up. The scheduler module * will then return the currently most useful bit of work later, possibly * deferring work for various reasons. */ void cam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp) { /* * Put all trims on the trim queue sorted, since we know * that the collapsing code requires this. Otherwise put * the work on the bio queue. */ if (bp->bio_cmd == BIO_DELETE) { bioq_disksort(&isc->trim_queue, bp); +#ifdef CAM_NETFLIX_IOSCHED + isc->trim_stats.in++; + isc->trim_stats.queued++; +#endif } #ifdef CAM_NETFLIX_IOSCHED else if (do_netflix_iosched && (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { if (cam_iosched_sort_queue(isc)) bioq_disksort(&isc->write_queue, bp); else bioq_insert_tail(&isc->write_queue, bp); if (iosched_debug > 9) printf("Qw : %p %#x\n", bp, bp->bio_cmd); if (bp->bio_cmd == BIO_WRITE) { - isc->in_writes++; - isc->queued_writes++; + isc->write_stats.in++; + isc->write_stats.queued++; } } #endif else { if (cam_iosched_sort_queue(isc)) bioq_disksort(&isc->bio_queue, bp); else bioq_insert_tail(&isc->bio_queue, bp); +#ifdef CAM_NETFLIX_IOSCHED if (iosched_debug > 9) printf("Qr : %p %#x\n", bp, bp->bio_cmd); if (bp->bio_cmd == BIO_READ) { - isc->in_reads++; - isc->queued_reads++; + isc->read_stats.in++; + isc->read_stats.queued++; } else if (bp->bio_cmd == BIO_WRITE) { - isc->in_writes++; - isc->queued_writes++; + isc->write_stats.in++; + isc->write_stats.queued++; } +#endif } } /* * If we have work, get it scheduled. Called with the periph lock held. */ void cam_iosched_schedule(struct cam_iosched_softc *isc, struct cam_periph *periph) { if (cam_iosched_has_work(isc)) xpt_schedule(periph, CAM_PRIORITY_NORMAL); } /* * Complete a trim request */ void cam_iosched_trim_done(struct cam_iosched_softc *isc) { isc->flags &= ~CAM_IOSCHED_FLAG_TRIM_ACTIVE; } /* * Complete a bio. Called before we release the ccb with xpt_release_ccb so we * might use notes in the ccb for statistics. */ int -cam_iosched_bio_complete(struct cam_iosched_softc *isc, struct bio *bp, union ccb *done_ccb) +cam_iosched_bio_complete(struct cam_iosched_softc *isc, struct bio *bp, + union ccb *done_ccb) { int retval = 0; #ifdef CAM_NETFLIX_IOSCHED if (!do_netflix_iosched) return retval; if (iosched_debug > 10) printf("done: %p %#x\n", bp, bp->bio_cmd); if (bp->bio_cmd == BIO_WRITE) { - retval = isc->max_writes > 0 && isc->pending_writes == isc->max_writes; - if (isc->max_writes > 0 && retval && iosched_debug) - printf("NEEDS SCHED\n"); - isc->out_writes++; - isc->pending_writes--; + retval = cam_iosched_limiter_iodone(&isc->write_stats, bp); + isc->write_stats.out++; + isc->write_stats.pending--; } else if (bp->bio_cmd == BIO_READ) { - retval = isc->max_reads > 0 && isc->pending_reads == isc->max_reads; - isc->out_reads++; - isc->pending_reads--; - } else if (bp->bio_cmd != BIO_FLUSH && bp->bio_cmd != BIO_DELETE) { + retval = cam_iosched_limiter_iodone(&isc->read_stats, bp); + isc->read_stats.out++; + isc->read_stats.pending--; + } else if (bp->bio_cmd == BIO_DELETE) { + isc->trim_stats.out++; + isc->trim_stats.pending--; + } else if (bp->bio_cmd != BIO_FLUSH) { if (iosched_debug) printf("Completing command with bio_cmd == %#x\n", bp->bio_cmd); } if (!(bp->bio_flags & BIO_ERROR)) cam_iosched_io_metric_update(isc, done_ccb->ccb_h.qos.sim_data, bp->bio_cmd, bp->bio_bcount); #endif return retval; } /* * Tell the io scheduler that you've pushed a trim down into the sim. * xxx better place for this? */ void cam_iosched_submit_trim(struct cam_iosched_softc *isc) { isc->flags |= CAM_IOSCHED_FLAG_TRIM_ACTIVE; } /* * Change the sorting policy hint for I/O transactions for this device. */ void cam_iosched_set_sort_queue(struct cam_iosched_softc *isc, int val) { isc->sort_io_queue = val; } int cam_iosched_has_work_flags(struct cam_iosched_softc *isc, uint32_t flags) { return isc->flags & flags; } void cam_iosched_set_work_flags(struct cam_iosched_softc *isc, uint32_t flags) { isc->flags |= flags; } void cam_iosched_clr_work_flags(struct cam_iosched_softc *isc, uint32_t flags) { isc->flags &= ~flags; } #ifdef CAM_NETFLIX_IOSCHED /* * After the method presented in Jack Crenshaw's 1998 article "Integer * Suqare Roots," reprinted at * http://www.embedded.com/electronics-blogs/programmer-s-toolbox/4219659/Integer-Square-Roots * and well worth the read. Briefly, we find the power of 4 that's the * largest smaller than val. We then check each smaller power of 4 to * see if val is still bigger. The right shifts at each step divide * the result by 2 which after successive application winds up * accumulating the right answer. It could also have been accumulated * using a separate root counter, but this code is smaller and faster * than that method. This method is also integer size invariant. * It returns floor(sqrt((float)val)), or the larget integer less than * or equal to the square root. */ static uint64_t isqrt64(uint64_t val) { uint64_t res = 0; uint64_t bit = 1ULL << (sizeof(uint64_t) * NBBY - 2); /* * Find the largest power of 4 smaller than val. */ while (bit > val) bit >>= 2; /* * Accumulate the answer, one bit at a time (we keep moving * them over since 2 is the square root of 4 and we test * powers of 4). We accumulate where we find the bit, but * the successive shifts land the bit in the right place * by the end. */ while (bit != 0) { if (val >= res + bit) { val -= res + bit; res = (res >> 1) + bit; } else res >>= 1; bit >>= 2; } return res; } /* * a and b are 32.32 fixed point stored in a 64-bit word. * Let al and bl be the .32 part of a and b. * Let ah and bh be the 32 part of a and b. * R is the radix and is 1 << 32 * * a * b * (ah + al / R) * (bh + bl / R) * ah * bh + (al * bh + ah * bl) / R + al * bl / R^2 * * After multiplicaiton, we have to renormalize by multiply by * R, so we wind up with * ah * bh * R + al * bh + ah * bl + al * bl / R * which turns out to be a very nice way to compute this value * so long as ah and bh are < 65536 there's no loss of high bits * and the low order bits are below the threshold of caring for * this application. */ static uint64_t mul(uint64_t a, uint64_t b) { uint64_t al, ah, bl, bh; al = a & 0xffffffff; ah = a >> 32; bl = b & 0xffffffff; bh = b >> 32; return ((ah * bh) << 32) + al * bh + ah * bl + ((al * bl) >> 32); } -static int -cmp(const void *a, const void *b) -{ - return (int)(*(sbintime_t *)a - *(sbintime_t *)b); -} - static void cam_iosched_update(struct iop_stats *iop, sbintime_t sim_latency) { sbintime_t y, yy; uint64_t var; - /* - * Skip the first ~50 I/Os since experience has shown the first few are - * atypical. - */ - if (iop->skipping < IOP_MAX_SKIP) { - iop->skipping++; - return; - } - - /* - * After the first few, the next few hundred are typical of what we'd - * expect. - */ - if (iop->training < IOP_MAX_TRAINING) { - iop->data[iop->training++] = sim_latency; - if (iop->training == IOP_MAX_TRAINING) { - qsort(iop->data, nitems(iop->data), - sizeof(iop->data[0]), cmp); - /* 95th percentile */ - iop->worst = iop->data[IOP_MAX_TRAINING * 95 / 100]; - /* - * We expect about 5% of the I/Os to be above the 95th - * percentile. However, the training happens early in - * boot where things are somewhat ideal compared to - * the fully loaded system, so allow for a wide margin - * of error before declaring things outliers by - * multiplying that by 4 to allow for expected - * degredation. - */ - iop->worst *= 4; - } - return; - } - - if (sim_latency > iop->worst) - iop->outliers++; - - /* - * We determine this new measurement is an outlier before we update - * our estimators. If sd is 0, then we have no reliable estimate for - * SD yet so don't bother seeing if this is an outlier. - */ - if (iop->sd != 0 && sim_latency > iop->ema + 5 * iop->sd) { -// printf("Outlier: EMA: %ju SD: %ju lat %ju\n", (intmax_t)iop->ema, -// (intmax_t)iop->sd, (intmax_t)sim_latency); - iop->outliers++; - } - /* * Classic expoentially decaying average with a tiny alpha * (2 ^ -alpha_bits). For more info see the NIST statistical * handbook. * * ema_t = y_t * alpha + ema_t-1 * (1 - alpha) * alpha = 1 / (1 << alpha_bits) * * Since alpha is a power of two, we can compute this w/o any mult or * division. */ y = sim_latency; - iop->ema = (y + (iop->ema << ALPHA_BITS) - iop->ema) >> ALPHA_BITS; + iop->ema = (y + (iop->ema << alpha_bits) - iop->ema) >> alpha_bits; yy = mul(y, y); - iop->emss = (yy + (iop->emss << ALPHA_BITS) - iop->emss) >> ALPHA_BITS; + iop->emss = (yy + (iop->emss << alpha_bits) - iop->emss) >> alpha_bits; /* * s_1 = sum of data * s_2 = sum of data * data * ema ~ mean (or s_1 / N) * emss ~ s_2 / N * * sd = sqrt((N * s_2 - s_1 ^ 2) / (N * (N - 1))) * sd = sqrt((N * s_2 / N * (N - 1)) - (s_1 ^ 2 / (N * (N - 1)))) * * N ~ 2 / alpha - 1 * alpha < 1 / 16 (typically much less) * N > 31 --> N large so N * (N - 1) is approx N * N * * substituting and rearranging: * sd ~ sqrt(s_2 / N - (s_1 / N) ^ 2) * ~ sqrt(emss - ema ^ 2); * which is the formula used here to get a decent estimate of sd which * we use to detect outliers. Note that when first starting up, it * takes a while for emss sum of squares estimator to converge on a * good value. during this time, it can be less than ema^2. We * compute a sd of 0 in that case, and ignore outliers. */ var = iop->emss - mul(iop->ema, iop->ema); iop->sd = (int64_t)var < 0 ? 0 : isqrt64(var); } static void cam_iosched_io_metric_update(struct cam_iosched_softc *isc, sbintime_t sim_latency, int cmd, size_t size) { /* xxx Do we need to scale based on the size of the I/O ? */ switch (cmd) { case BIO_READ: cam_iosched_update(&isc->read_stats, sim_latency); break; case BIO_WRITE: cam_iosched_update(&isc->write_stats, sim_latency); break; case BIO_DELETE: cam_iosched_update(&isc->trim_stats, sim_latency); break; default: break; } } #ifdef DDB static int biolen(struct bio_queue_head *bq) { int i = 0; struct bio *bp; TAILQ_FOREACH(bp, &bq->queue, bio_queue) { i++; } return i; } /* * Show the internal state of the I/O scheduler. */ DB_SHOW_COMMAND(iosched, cam_iosched_db_show) { struct cam_iosched_softc *isc; if (!have_addr) { db_printf("Need addr\n"); return; } isc = (struct cam_iosched_softc *)addr; - db_printf("pending_reads: %d\n", isc->pending_reads); - db_printf("min_reads: %d\n", isc->min_reads); - db_printf("max_reads: %d\n", isc->max_reads); - db_printf("reads: %d\n", isc->reads); - db_printf("in_reads: %d\n", isc->in_reads); - db_printf("out_reads: %d\n", isc->out_reads); - db_printf("queued_reads: %d\n", isc->queued_reads); + db_printf("pending_reads: %d\n", isc->read_stats.pending); + db_printf("min_reads: %d\n", isc->read_stats.min); + db_printf("max_reads: %d\n", isc->read_stats.max); + db_printf("reads: %d\n", isc->read_stats.total); + db_printf("in_reads: %d\n", isc->read_stats.in); + db_printf("out_reads: %d\n", isc->read_stats.out); + db_printf("queued_reads: %d\n", isc->read_stats.queued); db_printf("Current Q len %d\n", biolen(&isc->bio_queue)); - db_printf("pending_writes: %d\n", isc->pending_writes); - db_printf("min_writes: %d\n", isc->min_writes); - db_printf("max_writes: %d\n", isc->max_writes); - db_printf("writes: %d\n", isc->writes); - db_printf("in_writes: %d\n", isc->in_writes); - db_printf("out_writes: %d\n", isc->out_writes); - db_printf("queued_writes: %d\n", isc->queued_writes); + db_printf("pending_writes: %d\n", isc->write_stats.pending); + db_printf("min_writes: %d\n", isc->write_stats.min); + db_printf("max_writes: %d\n", isc->write_stats.max); + db_printf("writes: %d\n", isc->write_stats.total); + db_printf("in_writes: %d\n", isc->write_stats.in); + db_printf("out_writes: %d\n", isc->write_stats.out); + db_printf("queued_writes: %d\n", isc->write_stats.queued); db_printf("Current Q len %d\n", biolen(&isc->write_queue)); + db_printf("pending_trims: %d\n", isc->trim_stats.pending); + db_printf("min_trims: %d\n", isc->trim_stats.min); + db_printf("max_trims: %d\n", isc->trim_stats.max); + db_printf("trims: %d\n", isc->trim_stats.total); + db_printf("in_trims: %d\n", isc->trim_stats.in); + db_printf("out_trims: %d\n", isc->trim_stats.out); + db_printf("queued_trims: %d\n", isc->trim_stats.queued); + db_printf("Current Q len %d\n", biolen(&isc->trim_queue)); db_printf("read_bias: %d\n", isc->read_bias); db_printf("current_read_bias: %d\n", isc->current_read_bias); db_printf("Trim active? %s\n", (isc->flags & CAM_IOSCHED_FLAG_TRIM_ACTIVE) ? "yes" : "no"); } #endif #endif Index: projects/iosched/sys/cam/cam_iosched.h =================================================================== --- projects/iosched/sys/cam/cam_iosched.h (revision 287875) +++ projects/iosched/sys/cam/cam_iosched.h (revision 287876) @@ -1,64 +1,64 @@ /*- * CAM IO Scheduler Interface * * Copyright (c) 2015 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _CAM_CAM_IOSCHED_H #define _CAM_CAM_IOSCHED_H /* No user-servicable parts in here. */ #ifdef _KERNEL /* Forward declare all structs to keep interface thin */ struct cam_iosched_softc; struct sysctl_ctx_list; struct sysctl_oid; union ccb; struct bio; -int cam_iosched_init(struct cam_iosched_softc **); +int cam_iosched_init(struct cam_iosched_softc **, struct cam_periph *periph); void cam_iosched_fini(struct cam_iosched_softc *); void cam_iosched_sysctl_init(struct cam_iosched_softc *, struct sysctl_ctx_list *, struct sysctl_oid *); struct bio *cam_iosched_next_trim(struct cam_iosched_softc *isc); struct bio *cam_iosched_get_trim(struct cam_iosched_softc *isc); struct bio *cam_iosched_next_bio(struct cam_iosched_softc *isc); void cam_iosched_queue_work(struct cam_iosched_softc *isc, struct bio *bp); void cam_iosched_flush(struct cam_iosched_softc *isc, struct devstat *stp, int err); void cam_iosched_schedule(struct cam_iosched_softc *isc, struct cam_periph *periph); void cam_iosched_finish_trim(struct cam_iosched_softc *isc); void cam_iosched_submit_trim(struct cam_iosched_softc *isc); void cam_iosched_put_back_trim(struct cam_iosched_softc *isc, struct bio *bp); void cam_iosched_set_sort_queue(struct cam_iosched_softc *isc, int val); int cam_iosched_has_work_flags(struct cam_iosched_softc *isc, uint32_t flags); void cam_iosched_set_work_flags(struct cam_iosched_softc *isc, uint32_t flags); void cam_iosched_clr_work_flags(struct cam_iosched_softc *isc, uint32_t flags); void cam_iosched_trim_done(struct cam_iosched_softc *isc); int cam_iosched_bio_complete(struct cam_iosched_softc *isc, struct bio *bp, union ccb *done_ccb); #endif #endif Index: projects/iosched/sys/cam/scsi/scsi_da.c =================================================================== --- projects/iosched/sys/cam/scsi/scsi_da.c (revision 287875) +++ projects/iosched/sys/cam/scsi/scsi_da.c (revision 287876) @@ -1,4035 +1,4163 @@ /*- * Implementation of SCSI Direct Access Peripheral driver for CAM. * * Copyright (c) 1997 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #ifndef _KERNEL #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include #ifndef _KERNEL #include #endif /* !_KERNEL */ #ifdef _KERNEL typedef enum { DA_STATE_PROBE_RC, DA_STATE_PROBE_RC16, DA_STATE_PROBE_LBP, DA_STATE_PROBE_BLK_LIMITS, DA_STATE_PROBE_BDC, DA_STATE_PROBE_ATA, DA_STATE_NORMAL } da_state; typedef enum { DA_FLAG_PACK_INVALID = 0x001, DA_FLAG_NEW_PACK = 0x002, DA_FLAG_PACK_LOCKED = 0x004, DA_FLAG_PACK_REMOVABLE = 0x008, DA_FLAG_NEED_OTAG = 0x020, DA_FLAG_WAS_OTAG = 0x040, DA_FLAG_RETRY_UA = 0x080, DA_FLAG_OPEN = 0x100, DA_FLAG_SCTX_INIT = 0x200, DA_FLAG_CAN_RC16 = 0x400, DA_FLAG_PROBED = 0x800, DA_FLAG_DIRTY = 0x1000, DA_FLAG_ANNOUNCED = 0x2000 } da_flags; typedef enum { DA_Q_NONE = 0x00, DA_Q_NO_SYNC_CACHE = 0x01, DA_Q_NO_6_BYTE = 0x02, DA_Q_NO_PREVENT = 0x04, DA_Q_4K = 0x08, DA_Q_NO_RC16 = 0x10, DA_Q_NO_UNMAP = 0x20, DA_Q_RETRY_BUSY = 0x40 } da_quirks; #define DA_Q_BIT_STRING \ "\020" \ "\001NO_SYNC_CACHE" \ "\002NO_6_BYTE" \ "\003NO_PREVENT" \ "\0044K" \ "\005NO_RC16" \ "\006NO_UNMAP" \ "\007RETRY_BUSY" typedef enum { DA_CCB_PROBE_RC = 0x01, DA_CCB_PROBE_RC16 = 0x02, DA_CCB_PROBE_LBP = 0x03, DA_CCB_PROBE_BLK_LIMITS = 0x04, DA_CCB_PROBE_BDC = 0x05, DA_CCB_PROBE_ATA = 0x06, DA_CCB_BUFFER_IO = 0x07, DA_CCB_DUMP = 0x0A, DA_CCB_DELETE = 0x0B, DA_CCB_TUR = 0x0C, DA_CCB_TYPE_MASK = 0x0F, DA_CCB_RETRY_UA = 0x10 } da_ccb_state; /* * Order here is important for method choice * * We prefer ATA_TRIM as tests run against a Sandforce 2281 SSD attached to * LSI 2008 (mps) controller (FW: v12, Drv: v14) resulted 20% quicker deletes * using ATA_TRIM than the corresponding UNMAP results for a real world mysql * import taking 5mins. * */ typedef enum { DA_DELETE_NONE, DA_DELETE_DISABLE, DA_DELETE_ATA_TRIM, DA_DELETE_UNMAP, DA_DELETE_WS16, DA_DELETE_WS10, DA_DELETE_ZERO, DA_DELETE_MIN = DA_DELETE_ATA_TRIM, DA_DELETE_MAX = DA_DELETE_ZERO } da_delete_methods; typedef void da_delete_func_t (struct cam_periph *periph, union ccb *ccb, struct bio *bp); static da_delete_func_t da_delete_trim; static da_delete_func_t da_delete_unmap; static da_delete_func_t da_delete_ws; static const void * da_delete_functions[] = { NULL, NULL, da_delete_trim, da_delete_unmap, da_delete_ws, da_delete_ws, da_delete_ws }; static const char *da_delete_method_names[] = { "NONE", "DISABLE", "ATA_TRIM", "UNMAP", "WS16", "WS10", "ZERO" }; static const char *da_delete_method_desc[] = { "NONE", "DISABLED", "ATA TRIM", "UNMAP", "WRITE SAME(16) with UNMAP", "WRITE SAME(10) with UNMAP", "ZERO" }; /* Offsets into our private area for storing information */ #define ccb_state ppriv_field0 #define ccb_bp ppriv_ptr1 struct disk_params { u_int8_t heads; u_int32_t cylinders; u_int8_t secs_per_track; u_int32_t secsize; /* Number of bytes/sector */ u_int64_t sectors; /* total number sectors */ u_int stripesize; u_int stripeoffset; }; #define UNMAP_RANGE_MAX 0xffffffff #define UNMAP_HEAD_SIZE 8 #define UNMAP_RANGE_SIZE 16 #define UNMAP_MAX_RANGES 2048 /* Protocol Max is 4095 */ #define UNMAP_BUF_SIZE ((UNMAP_MAX_RANGES * UNMAP_RANGE_SIZE) + \ UNMAP_HEAD_SIZE) #define WS10_MAX_BLKS 0xffff #define WS16_MAX_BLKS 0xffffffff #define ATA_TRIM_MAX_RANGES ((UNMAP_BUF_SIZE / \ (ATA_DSM_RANGE_SIZE * ATA_DSM_BLK_SIZE)) * ATA_DSM_BLK_SIZE) #define DA_WORK_TUR (1 << 16) struct da_softc { struct cam_iosched_softc *cam_iosched; struct bio_queue_head delete_run_queue; LIST_HEAD(, ccb_hdr) pending_ccbs; int refcount; /* Active xpt_action() calls */ da_state state; da_flags flags; da_quirks quirks; int minimum_cmd_size; int error_inject; int trim_max_ranges; int delete_available; /* Delete methods possibly available */ u_int maxio; uint32_t unmap_max_ranges; uint32_t unmap_max_lba; /* Max LBAs in UNMAP req */ uint64_t ws_max_blks; da_delete_methods delete_method; da_delete_func_t *delete_func; + int unmappedio; + int rotating; struct disk_params params; struct disk *disk; union ccb saved_ccb; struct task sysctl_task; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct callout sendordered_c; uint64_t wwpn; uint8_t unmap_buf[UNMAP_BUF_SIZE]; struct scsi_read_capacity_data_long rcaplong; struct callout mediapoll_c; +#ifdef CAM_IO_STATS + struct sysctl_ctx_list sysctl_stats_ctx; + struct sysctl_oid *sysctl_stats_tree; + u_int errors; + u_int timeouts; + u_int invalidations; +#endif }; #define dadeleteflag(softc, delete_method, enable) \ if (enable) { \ softc->delete_available |= (1 << delete_method); \ } else { \ softc->delete_available &= ~(1 << delete_method); \ } struct da_quirk_entry { struct scsi_inquiry_pattern inq_pat; da_quirks quirks; }; static const char quantum[] = "QUANTUM"; static const char microp[] = "MICROP"; static struct da_quirk_entry da_quirk_table[] = { /* SPI, FC devices */ { /* * Fujitsu M2513A MO drives. * Tested devices: M2513A2 firmware versions 1200 & 1300. * (dip switch selects whether T_DIRECT or T_OPTICAL device) * Reported by: W.Scholten */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "FUJITSU", "M2513A", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* See above. */ {T_OPTICAL, SIP_MEDIA_REMOVABLE, "FUJITSU", "M2513A", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * This particular Fujitsu drive doesn't like the * synchronize cache command. * Reported by: Tom Jackson */ {T_DIRECT, SIP_MEDIA_FIXED, "FUJITSU", "M2954*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * This drive doesn't like the synchronize cache command * either. Reported by: Matthew Jacob * in NetBSD PR kern/6027, August 24, 1998. */ {T_DIRECT, SIP_MEDIA_FIXED, microp, "2217*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * This drive doesn't like the synchronize cache command * either. Reported by: Hellmuth Michaelis (hm@kts.org) * (PR 8882). */ {T_DIRECT, SIP_MEDIA_FIXED, microp, "2112*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Doesn't like the synchronize cache command. * Reported by: Blaz Zupan */ {T_DIRECT, SIP_MEDIA_FIXED, "NEC", "D3847*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Doesn't like the synchronize cache command. * Reported by: Blaz Zupan */ {T_DIRECT, SIP_MEDIA_FIXED, quantum, "MAVERICK 540S", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Doesn't like the synchronize cache command. */ {T_DIRECT, SIP_MEDIA_FIXED, quantum, "LPS525S", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Doesn't like the synchronize cache command. * Reported by: walter@pelissero.de */ {T_DIRECT, SIP_MEDIA_FIXED, quantum, "LPS540S", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Doesn't work correctly with 6 byte reads/writes. * Returns illegal request, and points to byte 9 of the * 6-byte CDB. * Reported by: Adam McDougall */ {T_DIRECT, SIP_MEDIA_FIXED, quantum, "VIKING 4*", "*"}, /*quirks*/ DA_Q_NO_6_BYTE }, { /* See above. */ {T_DIRECT, SIP_MEDIA_FIXED, quantum, "VIKING 2*", "*"}, /*quirks*/ DA_Q_NO_6_BYTE }, { /* * Doesn't like the synchronize cache command. * Reported by: walter@pelissero.de */ {T_DIRECT, SIP_MEDIA_FIXED, "CONNER", "CP3500*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * The CISS RAID controllers do not support SYNC_CACHE */ {T_DIRECT, SIP_MEDIA_FIXED, "COMPAQ", "RAID*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * The STEC SSDs sometimes hang on UNMAP. */ {T_DIRECT, SIP_MEDIA_FIXED, "STEC", "*", "*"}, /*quirks*/ DA_Q_NO_UNMAP }, { /* * VMware returns BUSY status when storage has transient * connectivity problems, so better wait. */ {T_DIRECT, SIP_MEDIA_FIXED, "VMware", "Virtual disk", "*"}, /*quirks*/ DA_Q_RETRY_BUSY }, /* USB mass storage devices supported by umass(4) */ { /* * EXATELECOM (Sigmatel) i-Bead 100/105 USB Flash MP3 Player * PR: kern/51675 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "EXATEL", "i-BEAD10*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Power Quotient Int. (PQI) USB flash key * PR: kern/53067 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Generic*", "USB Flash Disk*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Creative Nomad MUVO mp3 player (USB) * PR: kern/53094 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "CREATIVE", "NOMAD_MUVO", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE|DA_Q_NO_PREVENT }, { /* * Jungsoft NEXDISK USB flash key * PR: kern/54737 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "JUNGSOFT", "NEXDISK*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * FreeDik USB Mini Data Drive * PR: kern/54786 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "FreeDik*", "Mini Data Drive", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Sigmatel USB Flash MP3 Player * PR: kern/57046 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "SigmaTel", "MSCN", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE|DA_Q_NO_PREVENT }, { /* * Neuros USB Digital Audio Computer * PR: kern/63645 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "NEUROS", "dig. audio comp.", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * SEAGRAND NP-900 MP3 Player * PR: kern/64563 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "SEAGRAND", "NP-900*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE|DA_Q_NO_PREVENT }, { /* * iRiver iFP MP3 player (with UMS Firmware) * PR: kern/54881, i386/63941, kern/66124 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "iRiver", "iFP*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Frontier Labs NEX IA+ Digital Audio Player, rev 1.10/0.01 * PR: kern/70158 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "FL" , "Nex*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * ZICPlay USB MP3 Player with FM * PR: kern/75057 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "ACTIONS*" , "USB DISK*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * TEAC USB floppy mechanisms */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "TEAC" , "FD-05*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Kingston DataTraveler II+ USB Pen-Drive. * Reported by: Pawel Jakub Dawidek */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Kingston" , "DataTraveler II+", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * USB DISK Pro PMAP * Reported by: jhs * PR: usb/96381 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, " ", "USB DISK Pro", "PMAP"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Motorola E398 Mobile Phone (TransFlash memory card). * Reported by: Wojciech A. Koszek * PR: usb/89889 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Motorola" , "Motorola Phone", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Qware BeatZkey! Pro * PR: usb/79164 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "GENERIC", "USB DISK DEVICE", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Time DPA20B 1GB MP3 Player * PR: usb/81846 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "USB2.0*", "(FS) FLASH DISK*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Samsung USB key 128Mb * PR: usb/90081 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "USB-DISK", "FreeDik-FlashUsb", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Kingston DataTraveler 2.0 USB Flash memory. * PR: usb/89196 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Kingston", "DataTraveler 2.0", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Creative MUVO Slim mp3 player (USB) * PR: usb/86131 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "CREATIVE", "MuVo Slim", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE|DA_Q_NO_PREVENT }, { /* * United MP5512 Portable MP3 Player (2-in-1 USB DISK/MP3) * PR: usb/80487 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Generic*", "MUSIC DISK", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * SanDisk Micro Cruzer 128MB * PR: usb/75970 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "SanDisk" , "Micro Cruzer", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * TOSHIBA TransMemory USB sticks * PR: kern/94660 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "TOSHIBA", "TransMemory", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * PNY USB 3.0 Flash Drives */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "PNY", "USB 3.0 FD*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE | DA_Q_NO_RC16 }, { /* * PNY USB Flash keys * PR: usb/75578, usb/72344, usb/65436 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "*" , "USB DISK*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Genesys 6-in-1 Card Reader * PR: usb/94647 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Generic*", "STORAGE DEVICE*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Rekam Digital CAMERA * PR: usb/98713 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "CAMERA*", "4MP-9J6*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * iRiver H10 MP3 player * PR: usb/102547 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "iriver", "H10*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * iRiver U10 MP3 player * PR: usb/92306 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "iriver", "U10*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * X-Micro Flash Disk * PR: usb/96901 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "X-Micro", "Flash Disk", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * EasyMP3 EM732X USB 2.0 Flash MP3 Player * PR: usb/96546 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "EM732X", "MP3 Player*", "1.00"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Denver MP3 player * PR: usb/107101 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "DENVER", "MP3 PLAYER", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Philips USB Key Audio KEY013 * PR: usb/68412 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "PHILIPS", "Key*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE | DA_Q_NO_PREVENT }, { /* * JNC MP3 Player * PR: usb/94439 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "JNC*" , "MP3 Player*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * SAMSUNG MP0402H * PR: usb/108427 */ {T_DIRECT, SIP_MEDIA_FIXED, "SAMSUNG", "MP0402H", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * I/O Magic USB flash - Giga Bank * PR: usb/108810 */ {T_DIRECT, SIP_MEDIA_FIXED, "GS-Magic", "stor*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * JoyFly 128mb USB Flash Drive * PR: 96133 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "USB 2.0", "Flash Disk*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * ChipsBnk usb stick * PR: 103702 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "ChipsBnk", "USB*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Storcase (Kingston) InfoStation IFS FC2/SATA-R 201A * PR: 129858 */ {T_DIRECT, SIP_MEDIA_FIXED, "IFS", "FC2/SATA-R*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Samsung YP-U3 mp3-player * PR: 125398 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Samsung", "YP-U3", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { {T_DIRECT, SIP_MEDIA_REMOVABLE, "Netac", "OnlyDisk*", "2000"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Sony Cyber-Shot DSC cameras * PR: usb/137035 */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "Sony", "Sony DSC", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE | DA_Q_NO_PREVENT }, { {T_DIRECT, SIP_MEDIA_REMOVABLE, "Kingston", "DataTraveler G3", "1.00"}, /*quirks*/ DA_Q_NO_PREVENT }, { /* At least several Transcent USB sticks lie on RC16. */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "JetFlash", "Transcend*", "*"}, /*quirks*/ DA_Q_NO_RC16 }, /* ATA/SATA devices over SAS/USB/... */ { /* Hitachi Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "Hitachi", "H??????????E3*", "*" }, /*quirks*/DA_Q_4K }, { /* Samsung Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "SAMSUNG HD155UI*", "*" }, /*quirks*/DA_Q_4K }, { /* Samsung Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "SAMSUNG", "HD155UI*", "*" }, /*quirks*/DA_Q_4K }, { /* Samsung Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "SAMSUNG HD204UI*", "*" }, /*quirks*/DA_Q_4K }, { /* Samsung Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "SAMSUNG", "HD204UI*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Barracuda Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST????DL*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Barracuda Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST????DL", "*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Barracuda Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST???DM*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Barracuda Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST???DM*", "*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Barracuda Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST????DM*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Barracuda Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST????DM", "*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST9500423AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST950042", "3AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST9500424AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST950042", "4AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST9640423AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST964042", "3AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST9640424AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST964042", "4AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST9750420AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST975042", "0AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST9750422AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST975042", "2AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST9750423AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST975042", "3AS*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Thin Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "ST???LT*", "*" }, /*quirks*/DA_Q_4K }, { /* Seagate Momentus Thin Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ST???LT*", "*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD????RS*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "??RS*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD????RX*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "??RX*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD??????RS*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "????RS*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD??????RX*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Caviar Green Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "????RX*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Black Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD???PKT*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Black Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "?PKT*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Black Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD?????PKT*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Black Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "???PKT*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Blue Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD???PVT*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Blue Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "?PVT*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Blue Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "WDC WD?????PVT*", "*" }, /*quirks*/DA_Q_4K }, { /* WDC Scorpio Blue Advanced Format (4k) drives */ { T_DIRECT, SIP_MEDIA_FIXED, "WDC WD??", "???PVT*", "*" }, /*quirks*/DA_Q_4K }, { /* * Olympus FE-210 camera */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "OLYMPUS", "FE210*", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * LG UP3S MP3 player */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "LG", "UP3S", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * Laser MP3-2GA13 MP3 player */ {T_DIRECT, SIP_MEDIA_REMOVABLE, "USB 2.0", "(HS) Flash Disk", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, { /* * LaCie external 250GB Hard drive des by Porsche * Submitted by: Ben Stuyts * PR: 121474 */ {T_DIRECT, SIP_MEDIA_FIXED, "SAMSUNG", "HM250JI", "*"}, /*quirks*/ DA_Q_NO_SYNC_CACHE }, /* SATA SSDs */ { /* * Corsair Force 2 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "Corsair CSSD-F*", "*" }, /*quirks*/DA_Q_4K }, { /* * Corsair Force 3 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "Corsair Force 3*", "*" }, /*quirks*/DA_Q_4K }, { /* * Corsair Neutron GTX SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "Corsair Neutron GTX*", "*" }, /*quirks*/DA_Q_4K }, { /* * Corsair Force GT & GS SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "Corsair Force G*", "*" }, /*quirks*/DA_Q_4K }, { /* * Crucial M4 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "M4-CT???M4SSD2*", "*" }, /*quirks*/DA_Q_4K }, { /* * Crucial RealSSD C300 SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "C300-CTFDDAC???MAG*", "*" }, /*quirks*/DA_Q_4K }, { /* * Intel 320 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "INTEL SSDSA2CW*", "*" }, /*quirks*/DA_Q_4K }, { /* * Intel 330 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "INTEL SSDSC2CT*", "*" }, /*quirks*/DA_Q_4K }, { /* * Intel 510 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "INTEL SSDSC2MH*", "*" }, /*quirks*/DA_Q_4K }, { /* * Intel 520 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "INTEL SSDSC2BW*", "*" }, /*quirks*/DA_Q_4K }, { /* * Intel X25-M Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "INTEL SSDSA2M*", "*" }, /*quirks*/DA_Q_4K }, { /* * Kingston E100 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "KINGSTON SE100S3*", "*" }, /*quirks*/DA_Q_4K }, { /* * Kingston HyperX 3k SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "KINGSTON SH103S3*", "*" }, /*quirks*/DA_Q_4K }, { /* * Marvell SSDs (entry taken from OpenSolaris) * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "MARVELL SD88SA02*", "*" }, /*quirks*/DA_Q_4K }, { /* * OCZ Agility 2 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "*", "OCZ-AGILITY2*", "*" }, /*quirks*/DA_Q_4K }, { /* * OCZ Agility 3 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "OCZ-AGILITY3*", "*" }, /*quirks*/DA_Q_4K }, { /* * OCZ Deneva R Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "DENRSTE251M45*", "*" }, /*quirks*/DA_Q_4K }, { /* * OCZ Vertex 2 SSDs (inc pro series) * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "OCZ?VERTEX2*", "*" }, /*quirks*/DA_Q_4K }, { /* * OCZ Vertex 3 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "OCZ-VERTEX3*", "*" }, /*quirks*/DA_Q_4K }, { /* * OCZ Vertex 4 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "OCZ-VERTEX4*", "*" }, /*quirks*/DA_Q_4K }, { /* * Samsung 830 Series SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "SAMSUNG SSD 830 Series*", "*" }, /*quirks*/DA_Q_4K }, { /* * Samsung 840 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "Samsung SSD 840*", "*" }, /*quirks*/DA_Q_4K }, { /* * Samsung 843T Series SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "SAMSUNG MZ7WD*", "*" }, /*quirks*/DA_Q_4K }, { /* * Samsung 850 SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "Samsung SSD 850*", "*" }, /*quirks*/DA_Q_4K }, { /* * Samsung PM853T Series SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "SAMSUNG MZ7GE*", "*" }, /*quirks*/DA_Q_4K }, { /* * SuperTalent TeraDrive CT SSDs * 4k optimised & trim only works in 4k requests + 4k aligned */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "FTM??CT25H*", "*" }, /*quirks*/DA_Q_4K }, { /* * XceedIOPS SATA SSDs * 4k optimised */ { T_DIRECT, SIP_MEDIA_FIXED, "ATA", "SG9XCS2D*", "*" }, /*quirks*/DA_Q_4K }, { /* * Hama Innostor USB-Stick */ { T_DIRECT, SIP_MEDIA_REMOVABLE, "Innostor", "Innostor*", "*" }, /*quirks*/DA_Q_NO_RC16 }, { /* * MX-ES USB Drive by Mach Xtreme */ { T_DIRECT, SIP_MEDIA_REMOVABLE, "MX", "MXUB3*", "*"}, /*quirks*/DA_Q_NO_RC16 }, }; static disk_strategy_t dastrategy; static dumper_t dadump; static periph_init_t dainit; static void daasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg); static void dasysctlinit(void *context, int pending); +static int dasysctlsofttimeout(SYSCTL_HANDLER_ARGS); static int dacmdsizesysctl(SYSCTL_HANDLER_ARGS); static int dadeletemethodsysctl(SYSCTL_HANDLER_ARGS); static int dadeletemaxsysctl(SYSCTL_HANDLER_ARGS); static void dadeletemethodset(struct da_softc *softc, da_delete_methods delete_method); static off_t dadeletemaxsize(struct da_softc *softc, da_delete_methods delete_method); static void dadeletemethodchoose(struct da_softc *softc, da_delete_methods default_method); static void daprobedone(struct cam_periph *periph, union ccb *ccb); static periph_ctor_t daregister; static periph_dtor_t dacleanup; static periph_start_t dastart; static periph_oninv_t daoninvalidate; static void dadone(struct cam_periph *periph, union ccb *done_ccb); static int daerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags); static void daprevent(struct cam_periph *periph, int action); static void dareprobe(struct cam_periph *periph); static void dasetgeom(struct cam_periph *periph, uint32_t block_len, uint64_t maxsector, struct scsi_read_capacity_data_long *rcaplong, size_t rcap_size); static timeout_t dasendorderedtag; static void dashutdown(void *arg, int howto); static timeout_t damediapoll; #ifndef DA_DEFAULT_POLL_PERIOD #define DA_DEFAULT_POLL_PERIOD 3 #endif #ifndef DA_DEFAULT_TIMEOUT #define DA_DEFAULT_TIMEOUT 60 /* Timeout in seconds */ #endif +#ifndef DA_DEFAULT_SOFTTIMEOUT +#define DA_DEFAULT_SOFTTIMEOUT 0 +#endif + #ifndef DA_DEFAULT_RETRY #define DA_DEFAULT_RETRY 4 #endif #ifndef DA_DEFAULT_SEND_ORDERED #define DA_DEFAULT_SEND_ORDERED 1 #endif static int da_poll_period = DA_DEFAULT_POLL_PERIOD; static int da_retry_count = DA_DEFAULT_RETRY; static int da_default_timeout = DA_DEFAULT_TIMEOUT; +static sbintime_t da_default_softtimeout = DA_DEFAULT_SOFTTIMEOUT; static int da_send_ordered = DA_DEFAULT_SEND_ORDERED; static SYSCTL_NODE(_kern_cam, OID_AUTO, da, CTLFLAG_RD, 0, "CAM Direct Access Disk driver"); SYSCTL_INT(_kern_cam_da, OID_AUTO, poll_period, CTLFLAG_RWTUN, &da_poll_period, 0, "Media polling period in seconds"); SYSCTL_INT(_kern_cam_da, OID_AUTO, retry_count, CTLFLAG_RWTUN, &da_retry_count, 0, "Normal I/O retry count"); SYSCTL_INT(_kern_cam_da, OID_AUTO, default_timeout, CTLFLAG_RWTUN, &da_default_timeout, 0, "Normal I/O timeout (in seconds)"); SYSCTL_INT(_kern_cam_da, OID_AUTO, send_ordered, CTLFLAG_RWTUN, &da_send_ordered, 0, "Send Ordered Tags"); +SYSCTL_PROC(_kern_cam_da, OID_AUTO, default_softtimeout, + CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, dasysctlsofttimeout, "I", + "Soft I/O timeout (ms)"); +TUNABLE_LONG("kern.cam.da.default_softtimeout", &da_default_softtimeout); + /* * DA_ORDEREDTAG_INTERVAL determines how often, relative * to the default timeout, we check to see whether an ordered * tagged transaction is appropriate to prevent simple tag * starvation. Since we'd like to ensure that there is at least * 1/2 of the timeout length left for a starved transaction to * complete after we've sent an ordered tag, we must poll at least * four times in every timeout period. This takes care of the worst * case where a starved transaction starts during an interval that * meets the requirement "don't send an ordered tag" test so it takes * us two intervals to determine that a tag must be sent. */ #ifndef DA_ORDEREDTAG_INTERVAL #define DA_ORDEREDTAG_INTERVAL 4 #endif static struct periph_driver dadriver = { dainit, "da", TAILQ_HEAD_INITIALIZER(dadriver.units), /* generation */ 0 }; PERIPHDRIVER_DECLARE(da, dadriver); static MALLOC_DEFINE(M_SCSIDA, "scsi_da", "scsi_da buffers"); static int daopen(struct disk *dp) { struct cam_periph *periph; struct da_softc *softc; int error; periph = (struct cam_periph *)dp->d_drv1; if (cam_periph_acquire(periph) != CAM_REQ_CMP) { return (ENXIO); } cam_periph_lock(periph); if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) { cam_periph_unlock(periph); cam_periph_release(periph); return (error); } CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("daopen\n")); softc = (struct da_softc *)periph->softc; dareprobe(periph); /* Wait for the disk size update. */ error = cam_periph_sleep(periph, &softc->disk->d_mediasize, PRIBIO, "dareprobe", 0); if (error != 0) xpt_print(periph->path, "unable to retrieve capacity data\n"); if (periph->flags & CAM_PERIPH_INVALID) error = ENXIO; if (error == 0 && (softc->flags & DA_FLAG_PACK_REMOVABLE) != 0 && (softc->quirks & DA_Q_NO_PREVENT) == 0) daprevent(periph, PR_PREVENT); if (error == 0) { softc->flags &= ~DA_FLAG_PACK_INVALID; softc->flags |= DA_FLAG_OPEN; } cam_periph_unhold(periph); cam_periph_unlock(periph); if (error != 0) cam_periph_release(periph); return (error); } static int daclose(struct disk *dp) { struct cam_periph *periph; struct da_softc *softc; union ccb *ccb; int error; periph = (struct cam_periph *)dp->d_drv1; softc = (struct da_softc *)periph->softc; cam_periph_lock(periph); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("daclose\n")); if (cam_periph_hold(periph, PRIBIO) == 0) { /* Flush disk cache. */ if ((softc->flags & DA_FLAG_DIRTY) != 0 && (softc->quirks & DA_Q_NO_SYNC_CACHE) == 0 && (softc->flags & DA_FLAG_PACK_INVALID) == 0) { ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); scsi_synchronize_cache(&ccb->csio, /*retries*/1, /*cbfcnp*/dadone, MSG_SIMPLE_Q_TAG, /*begin_lba*/0, /*lb_count*/0, SSD_FULL_SIZE, 5 * 60 * 1000); error = cam_periph_runccb(ccb, daerror, /*cam_flags*/0, /*sense_flags*/SF_RETRY_UA | SF_QUIET_IR, softc->disk->d_devstat); if (error == 0) softc->flags &= ~DA_FLAG_DIRTY; xpt_release_ccb(ccb); } /* Allow medium removal. */ if ((softc->flags & DA_FLAG_PACK_REMOVABLE) != 0 && (softc->quirks & DA_Q_NO_PREVENT) == 0) daprevent(periph, PR_ALLOW); cam_periph_unhold(periph); } /* * If we've got removeable media, mark the blocksize as * unavailable, since it could change when new media is * inserted. */ if ((softc->flags & DA_FLAG_PACK_REMOVABLE) != 0) softc->disk->d_devstat->flags |= DEVSTAT_BS_UNAVAILABLE; softc->flags &= ~DA_FLAG_OPEN; while (softc->refcount != 0) cam_periph_sleep(periph, &softc->refcount, PRIBIO, "daclose", 1); cam_periph_unlock(periph); cam_periph_release(periph); return (0); } static void daschedule(struct cam_periph *periph) { struct da_softc *softc = (struct da_softc *)periph->softc; if (softc->state != DA_STATE_NORMAL) return; cam_iosched_schedule(softc->cam_iosched, periph); } /* * Actually translate the requested transfer into one the physical driver * can understand. The transfer is described by a buf and will include * only one physical transfer. */ static void dastrategy(struct bio *bp) { struct cam_periph *periph; struct da_softc *softc; periph = (struct cam_periph *)bp->bio_disk->d_drv1; softc = (struct da_softc *)periph->softc; cam_periph_lock(periph); /* * If the device has been made invalid, error out */ if ((softc->flags & DA_FLAG_PACK_INVALID)) { cam_periph_unlock(periph); biofinish(bp, NULL, ENXIO); return; } CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("dastrategy(%p)\n", bp)); /* * Place it in the queue of disk activities for this disk */ cam_iosched_queue_work(softc->cam_iosched, bp); /* * Schedule ourselves for performing the work. */ daschedule(periph); cam_periph_unlock(periph); return; } static int dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct cam_periph *periph; struct da_softc *softc; u_int secsize; struct ccb_scsiio csio; struct disk *dp; int error = 0; dp = arg; periph = dp->d_drv1; softc = (struct da_softc *)periph->softc; cam_periph_lock(periph); secsize = softc->params.secsize; if ((softc->flags & DA_FLAG_PACK_INVALID) != 0) { cam_periph_unlock(periph); return (ENXIO); } if (length > 0) { xpt_setup_ccb(&csio.ccb_h, periph->path, CAM_PRIORITY_NORMAL); csio.ccb_h.ccb_state = DA_CCB_DUMP; scsi_read_write(&csio, /*retries*/0, dadone, MSG_ORDERED_Q_TAG, /*read*/SCSI_RW_WRITE, /*byte2*/0, /*minimum_cmd_size*/ softc->minimum_cmd_size, offset / secsize, length / secsize, /*data_ptr*/(u_int8_t *) virtual, /*dxfer_len*/length, /*sense_len*/SSD_FULL_SIZE, da_default_timeout * 1000); xpt_polled_action((union ccb *)&csio); error = cam_periph_error((union ccb *)&csio, 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); if ((csio.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(csio.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) printf("Aborting dump due to I/O error.\n"); cam_periph_unlock(periph); return (error); } /* * Sync the disk cache contents to the physical media. */ if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0) { xpt_setup_ccb(&csio.ccb_h, periph->path, CAM_PRIORITY_NORMAL); csio.ccb_h.ccb_state = DA_CCB_DUMP; scsi_synchronize_cache(&csio, /*retries*/0, /*cbfcnp*/dadone, MSG_SIMPLE_Q_TAG, /*begin_lba*/0,/* Cover the whole disk */ /*lb_count*/0, SSD_FULL_SIZE, - 5 * 60 * 1000); + 5 * 1000); xpt_polled_action((union ccb *)&csio); error = cam_periph_error((union ccb *)&csio, 0, SF_NO_RECOVERY | SF_NO_RETRY | SF_QUIET_IR, NULL); if ((csio.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(csio.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); } cam_periph_unlock(periph); return (error); } static int dagetattr(struct bio *bp) { int ret; struct cam_periph *periph; periph = (struct cam_periph *)bp->bio_disk->d_drv1; cam_periph_lock(periph); ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, periph->path); cam_periph_unlock(periph); if (ret == 0) bp->bio_completed = bp->bio_length; return ret; } static void dainit(void) { cam_status status; /* * Install a global async callback. This callback will * receive async callbacks like "new device found". */ status = xpt_register_async(AC_FOUND_DEVICE, daasync, NULL, NULL); if (status != CAM_REQ_CMP) { printf("da: Failed to attach master async callback " "due to status 0x%x!\n", status); } else if (da_send_ordered) { /* Register our shutdown event handler */ if ((EVENTHANDLER_REGISTER(shutdown_post_sync, dashutdown, NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) printf("dainit: shutdown event registration failed!\n"); } } /* * Callback from GEOM, called when it has finished cleaning up its * resources. */ static void dadiskgonecb(struct disk *dp) { struct cam_periph *periph; periph = (struct cam_periph *)dp->d_drv1; cam_periph_release(periph); } static void daoninvalidate(struct cam_periph *periph) { struct da_softc *softc; softc = (struct da_softc *)periph->softc; /* * De-register any async callbacks. */ xpt_register_async(0, daasync, periph, periph->path); softc->flags |= DA_FLAG_PACK_INVALID; +#ifdef CAM_IO_STATS + softc->invalidations++; +#endif /* * Return all queued I/O with ENXIO. * XXX Handle any transactions queued to the card * with XPT_ABORT_CCB. */ cam_iosched_flush(softc->cam_iosched, NULL, ENXIO); /* * Tell GEOM that we've gone away, we'll get a callback when it is * done cleaning up its resources. */ disk_gone(softc->disk); } static void dacleanup(struct cam_periph *periph) { struct da_softc *softc; softc = (struct da_softc *)periph->softc; cam_periph_unlock(periph); + cam_iosched_fini(softc->cam_iosched); + /* * If we can't free the sysctl tree, oh well... */ - if ((softc->flags & DA_FLAG_SCTX_INIT) != 0 - && sysctl_ctx_free(&softc->sysctl_ctx) != 0) { - xpt_print(periph->path, "can't remove sysctl context\n"); + if ((softc->flags & DA_FLAG_SCTX_INIT) != 0) { +#ifdef CAM_IO_STATS + if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl stats context\n"); +#endif + if (sysctl_ctx_free(&softc->sysctl_ctx) != 0) + xpt_print(periph->path, + "can't remove sysctl context\n"); } callout_drain(&softc->mediapoll_c); disk_destroy(softc->disk); callout_drain(&softc->sendordered_c); free(softc, M_DEVBUF); cam_periph_lock(periph); } static void daasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { struct cam_periph *periph; struct da_softc *softc; periph = (struct cam_periph *)callback_arg; switch (code) { case AC_FOUND_DEVICE: { struct ccb_getdev *cgd; cam_status status; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) break; if (cgd->protocol != PROTO_SCSI) break; if (SID_QUAL(&cgd->inq_data) != SID_QUAL_LU_CONNECTED) break; if (SID_TYPE(&cgd->inq_data) != T_DIRECT && SID_TYPE(&cgd->inq_data) != T_RBC && SID_TYPE(&cgd->inq_data) != T_OPTICAL) break; /* * Allocate a peripheral instance for * this device and start the probe * process. */ status = cam_periph_alloc(daregister, daoninvalidate, dacleanup, dastart, "da", CAM_PERIPH_BIO, path, daasync, AC_FOUND_DEVICE, cgd); if (status != CAM_REQ_CMP && status != CAM_REQ_INPROG) printf("daasync: Unable to attach to new device " "due to status 0x%x\n", status); return; } case AC_ADVINFO_CHANGED: { uintptr_t buftype; buftype = (uintptr_t)arg; if (buftype == CDAI_TYPE_PHYS_PATH) { struct da_softc *softc; softc = periph->softc; disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); } break; } case AC_UNIT_ATTENTION: { union ccb *ccb; int error_code, sense_key, asc, ascq; softc = (struct da_softc *)periph->softc; ccb = (union ccb *)arg; /* * Handle all UNIT ATTENTIONs except our own, * as they will be handled by daerror(). */ if (xpt_path_periph(ccb->ccb_h.path) != periph && scsi_extract_sense_ccb(ccb, &error_code, &sense_key, &asc, &ascq)) { if (asc == 0x2A && ascq == 0x09) { xpt_print(ccb->ccb_h.path, "Capacity data has changed\n"); softc->flags &= ~DA_FLAG_PROBED; dareprobe(periph); } else if (asc == 0x28 && ascq == 0x00) { softc->flags &= ~DA_FLAG_PROBED; disk_media_changed(softc->disk, M_NOWAIT); } else if (asc == 0x3F && ascq == 0x03) { xpt_print(ccb->ccb_h.path, "INQUIRY data has changed\n"); softc->flags &= ~DA_FLAG_PROBED; dareprobe(periph); } } cam_periph_async(periph, code, path, arg); break; } case AC_SCSI_AEN: softc = (struct da_softc *)periph->softc; if (!cam_iosched_has_work_flags(softc->cam_iosched, DA_WORK_TUR)) { if (cam_periph_acquire(periph) == CAM_REQ_CMP) { cam_iosched_set_work_flags(softc->cam_iosched, DA_WORK_TUR); daschedule(periph); } } /* FALLTHROUGH */ case AC_SENT_BDR: case AC_BUS_RESET: { struct ccb_hdr *ccbh; softc = (struct da_softc *)periph->softc; /* * Don't fail on the expected unit attention * that will occur. */ softc->flags |= DA_FLAG_RETRY_UA; LIST_FOREACH(ccbh, &softc->pending_ccbs, periph_links.le) ccbh->ccb_state |= DA_CCB_RETRY_UA; break; } default: break; } cam_periph_async(periph, code, path, arg); } static void dasysctlinit(void *context, int pending) { struct cam_periph *periph; struct da_softc *softc; char tmpstr[80], tmpstr2[80]; struct ccb_trans_settings cts; periph = (struct cam_periph *)context; /* * periph was held for us when this task was enqueued */ if (periph->flags & CAM_PERIPH_INVALID) { cam_periph_release(periph); return; } softc = (struct da_softc *)periph->softc; snprintf(tmpstr, sizeof(tmpstr), "CAM DA unit %d", periph->unit_number); snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number); sysctl_ctx_init(&softc->sysctl_ctx); softc->flags |= DA_FLAG_SCTX_INIT; softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_cam_da), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr); if (softc->sysctl_tree == NULL) { printf("dasysctlinit: unable to allocate sysctl tree\n"); cam_periph_release(periph); return; } /* * Now register the sysctl handler, so the user can change the value on * the fly. */ SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "delete_method", CTLTYPE_STRING | CTLFLAG_RW, softc, 0, dadeletemethodsysctl, "A", "BIO_DELETE execution method"); SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "delete_max", CTLTYPE_U64 | CTLFLAG_RW, softc, 0, dadeletemaxsysctl, "Q", "Maximum BIO_DELETE size"); SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "minimum_cmd_size", CTLTYPE_INT | CTLFLAG_RW, &softc->minimum_cmd_size, 0, dacmdsizesysctl, "I", "Minimum CDB size"); SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "error_inject", CTLFLAG_RW, &softc->error_inject, 0, "error_inject leaf"); + SYSCTL_ADD_INT(&softc->sysctl_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, + "unmapped_io", + CTLFLAG_RD, + &softc->unmappedio, + 0, + "Unmapped I/O leaf"); + SYSCTL_ADD_INT(&softc->sysctl_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, + "rotating", + CTLFLAG_RD, + &softc->rotating, + 0, + "Rotating media"); + /* * Add some addressing info. */ memset(&cts, 0, sizeof (cts)); xpt_setup_ccb(&cts.ccb_h, periph->path, CAM_PRIORITY_NONE); cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS; cts.type = CTS_TYPE_CURRENT_SETTINGS; cam_periph_lock(periph); xpt_action((union ccb *)&cts); cam_periph_unlock(periph); if (cts.ccb_h.status != CAM_REQ_CMP) { cam_periph_release(periph); return; } if (cts.protocol == PROTO_SCSI && cts.transport == XPORT_FC) { struct ccb_trans_settings_fc *fc = &cts.xport_specific.fc; if (fc->valid & CTS_FC_VALID_WWPN) { softc->wwpn = fc->wwpn; SYSCTL_ADD_UQUAD(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "wwpn", CTLFLAG_RD, &softc->wwpn, "World Wide Port Name"); } } +#ifdef CAM_IO_STATS + /* + * Now add some useful stats. + * XXX These should live in cam_periph and be common to all periphs + */ + softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats", + CTLFLAG_RD, 0, "Statistics"); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, + "errors", + CTLFLAG_RD, + &softc->errors, + 0, + "Transport errors reported by the SIM"); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, + "timeouts", + CTLFLAG_RD, + &softc->timeouts, + 0, + "Device timeouts reported by the SIM"); + SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, + SYSCTL_CHILDREN(softc->sysctl_stats_tree), + OID_AUTO, + "pack_invalidations", + CTLFLAG_RD, + &softc->invalidations, + 0, + "Device pack invalidations"); +#endif + cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, softc->sysctl_tree); cam_periph_release(periph); } static int dadeletemaxsysctl(SYSCTL_HANDLER_ARGS) { int error; uint64_t value; struct da_softc *softc; softc = (struct da_softc *)arg1; value = softc->disk->d_delmaxsize; error = sysctl_handle_64(oidp, &value, 0, req); if ((error != 0) || (req->newptr == NULL)) return (error); /* only accept values smaller than the calculated value */ if (value > dadeletemaxsize(softc, softc->delete_method)) { return (EINVAL); } softc->disk->d_delmaxsize = value; return (0); } static int dacmdsizesysctl(SYSCTL_HANDLER_ARGS) { int error, value; value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if ((error != 0) || (req->newptr == NULL)) return (error); /* * Acceptable values here are 6, 10, 12 or 16. */ if (value < 6) value = 6; else if ((value > 6) && (value <= 10)) value = 10; else if ((value > 10) && (value <= 12)) value = 12; else if (value > 12) value = 16; *(int *)arg1 = value; return (0); } +static int +dasysctlsofttimeout(SYSCTL_HANDLER_ARGS) +{ + sbintime_t value; + int error; + + value = da_default_softtimeout / SBT_1MS; + + error = sysctl_handle_int(oidp, (int *)&value, 0, req); + if ((error != 0) || (req->newptr == NULL)) + return (error); + + /* XXX Should clip this to a reasonable level */ + if (value > da_default_timeout * 1000) + return (EINVAL); + + da_default_softtimeout = value * SBT_1MS; + return (0); +} + static void dadeletemethodset(struct da_softc *softc, da_delete_methods delete_method) { softc->delete_method = delete_method; softc->disk->d_delmaxsize = dadeletemaxsize(softc, delete_method); softc->delete_func = da_delete_functions[delete_method]; if (softc->delete_method > DA_DELETE_DISABLE) softc->disk->d_flags |= DISKFLAG_CANDELETE; else softc->disk->d_flags &= ~DISKFLAG_CANDELETE; } static off_t dadeletemaxsize(struct da_softc *softc, da_delete_methods delete_method) { off_t sectors; switch(delete_method) { case DA_DELETE_UNMAP: sectors = (off_t)softc->unmap_max_lba; break; case DA_DELETE_ATA_TRIM: sectors = (off_t)ATA_DSM_RANGE_MAX * softc->trim_max_ranges; break; case DA_DELETE_WS16: sectors = omin(softc->ws_max_blks, WS16_MAX_BLKS); break; case DA_DELETE_ZERO: case DA_DELETE_WS10: sectors = omin(softc->ws_max_blks, WS10_MAX_BLKS); break; default: return 0; } return (off_t)softc->params.secsize * omin(sectors, softc->params.sectors); } static void daprobedone(struct cam_periph *periph, union ccb *ccb) { struct da_softc *softc; softc = (struct da_softc *)periph->softc; dadeletemethodchoose(softc, DA_DELETE_NONE); if (bootverbose && (softc->flags & DA_FLAG_ANNOUNCED) == 0) { char buf[80]; int i, sep; snprintf(buf, sizeof(buf), "Delete methods: <"); sep = 0; for (i = DA_DELETE_MIN; i <= DA_DELETE_MAX; i++) { if (softc->delete_available & (1 << i)) { if (sep) { strlcat(buf, ",", sizeof(buf)); } else { sep = 1; } strlcat(buf, da_delete_method_names[i], sizeof(buf)); if (i == softc->delete_method) { strlcat(buf, "(*)", sizeof(buf)); } } } if (sep == 0) { if (softc->delete_method == DA_DELETE_NONE) strlcat(buf, "NONE(*)", sizeof(buf)); else strlcat(buf, "DISABLED(*)", sizeof(buf)); } strlcat(buf, ">", sizeof(buf)); printf("%s%d: %s\n", periph->periph_name, periph->unit_number, buf); } /* * Since our peripheral may be invalidated by an error * above or an external event, we must release our CCB * before releasing the probe lock on the peripheral. * The peripheral will only go away once the last lock * is removed, and we need it around for the CCB release * operation. */ xpt_release_ccb(ccb); softc->state = DA_STATE_NORMAL; softc->flags |= DA_FLAG_PROBED; daschedule(periph); wakeup(&softc->disk->d_mediasize); if ((softc->flags & DA_FLAG_ANNOUNCED) == 0) { softc->flags |= DA_FLAG_ANNOUNCED; cam_periph_unhold(periph); } else cam_periph_release_locked(periph); } static void dadeletemethodchoose(struct da_softc *softc, da_delete_methods default_method) { int i, delete_method; delete_method = default_method; /* * Use the pre-defined order to choose the best * performing delete. */ for (i = DA_DELETE_MIN; i <= DA_DELETE_MAX; i++) { if (softc->delete_available & (1 << i)) { dadeletemethodset(softc, i); return; } } dadeletemethodset(softc, delete_method); } static int dadeletemethodsysctl(SYSCTL_HANDLER_ARGS) { char buf[16]; const char *p; struct da_softc *softc; int i, error, methods, value; softc = (struct da_softc *)arg1; value = softc->delete_method; if (value < 0 || value > DA_DELETE_MAX) p = "UNKNOWN"; else p = da_delete_method_names[value]; strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); methods = softc->delete_available | (1 << DA_DELETE_DISABLE); for (i = 0; i <= DA_DELETE_MAX; i++) { if (!(methods & (1 << i)) || strcmp(buf, da_delete_method_names[i]) != 0) continue; dadeletemethodset(softc, i); return (0); } return (EINVAL); } static cam_status daregister(struct cam_periph *periph, void *arg) { struct da_softc *softc; struct ccb_pathinq cpi; struct ccb_getdev *cgd; char tmpstr[80]; caddr_t match; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) { printf("daregister: no getdev CCB, can't register device\n"); return(CAM_REQ_CMP_ERR); } softc = (struct da_softc *)malloc(sizeof(*softc), M_DEVBUF, M_NOWAIT|M_ZERO); if (softc == NULL) { printf("daregister: Unable to probe new device. " "Unable to allocate softc\n"); return(CAM_REQ_CMP_ERR); } - if (cam_iosched_init(&softc->cam_iosched) != 0) { + if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { printf("daregister: Unable to probe new device. " "Unable to allocate iosched memory\n"); return(CAM_REQ_CMP_ERR); } LIST_INIT(&softc->pending_ccbs); softc->state = DA_STATE_PROBE_RC; bioq_init(&softc->delete_run_queue); if (SID_IS_REMOVABLE(&cgd->inq_data)) softc->flags |= DA_FLAG_PACK_REMOVABLE; softc->unmap_max_ranges = UNMAP_MAX_RANGES; softc->unmap_max_lba = UNMAP_RANGE_MAX; softc->ws_max_blks = WS16_MAX_BLKS; softc->trim_max_ranges = ATA_TRIM_MAX_RANGES; + softc->rotating = 1; periph->softc = softc; /* * See if this device has any quirks. */ match = cam_quirkmatch((caddr_t)&cgd->inq_data, (caddr_t)da_quirk_table, sizeof(da_quirk_table)/sizeof(*da_quirk_table), sizeof(*da_quirk_table), scsi_inquiry_match); if (match != NULL) softc->quirks = ((struct da_quirk_entry *)match)->quirks; else softc->quirks = DA_Q_NONE; /* Check if the SIM does not want 6 byte commands */ bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NORMAL); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); if (cpi.ccb_h.status == CAM_REQ_CMP && (cpi.hba_misc & PIM_NO_6_BYTE)) softc->quirks |= DA_Q_NO_6_BYTE; TASK_INIT(&softc->sysctl_task, 0, dasysctlinit, periph); /* * Take an exclusive refcount on the periph while dastart is called * to finish the probe. The reference will be dropped in dadone at * the end of probe. */ (void)cam_periph_hold(periph, PRIBIO); /* * Schedule a periodic event to occasionally send an * ordered tag to a device. */ callout_init_mtx(&softc->sendordered_c, cam_periph_mtx(periph), 0); callout_reset(&softc->sendordered_c, (da_default_timeout * hz) / DA_ORDEREDTAG_INTERVAL, dasendorderedtag, softc); cam_periph_unlock(periph); /* * RBC devices don't have to support READ(6), only READ(10). */ if (softc->quirks & DA_Q_NO_6_BYTE || SID_TYPE(&cgd->inq_data) == T_RBC) softc->minimum_cmd_size = 10; else softc->minimum_cmd_size = 6; /* * Load the user's default, if any. */ snprintf(tmpstr, sizeof(tmpstr), "kern.cam.da.%d.minimum_cmd_size", periph->unit_number); TUNABLE_INT_FETCH(tmpstr, &softc->minimum_cmd_size); /* * 6, 10, 12 and 16 are the currently permissible values. */ if (softc->minimum_cmd_size < 6) softc->minimum_cmd_size = 6; else if ((softc->minimum_cmd_size > 6) && (softc->minimum_cmd_size <= 10)) softc->minimum_cmd_size = 10; else if ((softc->minimum_cmd_size > 10) && (softc->minimum_cmd_size <= 12)) softc->minimum_cmd_size = 12; else if (softc->minimum_cmd_size > 12) softc->minimum_cmd_size = 16; /* Predict whether device may support READ CAPACITY(16). */ if (SID_ANSI_REV(&cgd->inq_data) >= SCSI_REV_SPC3 && (softc->quirks & DA_Q_NO_RC16) == 0) { softc->flags |= DA_FLAG_CAN_RC16; softc->state = DA_STATE_PROBE_RC16; } /* * Register this media as a disk. */ softc->disk = disk_alloc(); softc->disk->d_devstat = devstat_new_entry(periph->periph_name, periph->unit_number, 0, DEVSTAT_BS_UNAVAILABLE, SID_TYPE(&cgd->inq_data) | XPORT_DEVSTAT_TYPE(cpi.transport), DEVSTAT_PRIORITY_DISK); softc->disk->d_open = daopen; softc->disk->d_close = daclose; softc->disk->d_strategy = dastrategy; softc->disk->d_dump = dadump; softc->disk->d_getattr = dagetattr; softc->disk->d_gone = dadiskgonecb; softc->disk->d_name = "da"; softc->disk->d_drv1 = periph; if (cpi.maxio == 0) softc->maxio = DFLTPHYS; /* traditional default */ else if (cpi.maxio > MAXPHYS) softc->maxio = MAXPHYS; /* for safety */ else softc->maxio = cpi.maxio; softc->disk->d_maxsize = softc->maxio; softc->disk->d_unit = periph->unit_number; softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; - if ((cpi.hba_misc & PIM_UNMAPPED) != 0) + if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { + softc->unmappedio = 1; softc->disk->d_flags |= DISKFLAG_UNMAPPED_BIO; + xpt_print(periph->path, "UNMAPPED\n"); + } cam_strvis(softc->disk->d_descr, cgd->inq_data.vendor, sizeof(cgd->inq_data.vendor), sizeof(softc->disk->d_descr)); strlcat(softc->disk->d_descr, " ", sizeof(softc->disk->d_descr)); cam_strvis(&softc->disk->d_descr[strlen(softc->disk->d_descr)], cgd->inq_data.product, sizeof(cgd->inq_data.product), sizeof(softc->disk->d_descr) - strlen(softc->disk->d_descr)); softc->disk->d_hba_vendor = cpi.hba_vendor; softc->disk->d_hba_device = cpi.hba_device; softc->disk->d_hba_subvendor = cpi.hba_subvendor; softc->disk->d_hba_subdevice = cpi.hba_subdevice; /* * Acquire a reference to the periph before we register with GEOM. * We'll release this reference once GEOM calls us back (via * dadiskgonecb()) telling us that our provider has been freed. */ if (cam_periph_acquire(periph) != CAM_REQ_CMP) { xpt_print(periph->path, "%s: lost periph during " "registration!\n", __func__); cam_periph_lock(periph); return (CAM_REQ_CMP_ERR); } disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); /* * Add async callbacks for events of interest. * I don't bother checking if this fails as, * in most cases, the system will function just * fine without them and the only alternative * would be to not attach the device on failure. */ xpt_register_async(AC_SENT_BDR | AC_BUS_RESET | AC_LOST_DEVICE | AC_ADVINFO_CHANGED | AC_SCSI_AEN | AC_UNIT_ATTENTION, daasync, periph, periph->path); /* * Emit an attribute changed notification just in case * physical path information arrived before our async * event handler was registered, but after anyone attaching * to our disk device polled it. */ disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); /* * Schedule a periodic media polling events. */ callout_init_mtx(&softc->mediapoll_c, cam_periph_mtx(periph), 0); if ((softc->flags & DA_FLAG_PACK_REMOVABLE) && (cgd->inq_flags & SID_AEN) == 0 && da_poll_period != 0) callout_reset(&softc->mediapoll_c, da_poll_period * hz, damediapoll, periph); xpt_schedule(periph, CAM_PRIORITY_DEV); return(CAM_REQ_CMP); } static void dastart(struct cam_periph *periph, union ccb *start_ccb) { struct da_softc *softc; softc = (struct da_softc *)periph->softc; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("dastart\n")); skipstate: switch (softc->state) { case DA_STATE_NORMAL: { struct bio *bp; uint8_t tag_code; more: bp = cam_iosched_next_bio(softc->cam_iosched); if (bp == NULL) { if (cam_iosched_has_work_flags(softc->cam_iosched, DA_WORK_TUR)) { cam_iosched_clr_work_flags(softc->cam_iosched, DA_WORK_TUR); scsi_test_unit_ready(&start_ccb->csio, /*retries*/ da_retry_count, dadone, MSG_SIMPLE_Q_TAG, SSD_FULL_SIZE, da_default_timeout * 1000); start_ccb->ccb_h.ccb_bp = NULL; start_ccb->ccb_h.ccb_state = DA_CCB_TUR; xpt_action(start_ccb); } else xpt_release_ccb(start_ccb); break; } if (bp->bio_cmd == BIO_DELETE) { if (softc->delete_func != NULL) { softc->delete_func(periph, start_ccb, bp); goto out; } else { /* Not sure this is possible, but failsafe by lying and saying "sure, done." */ biofinish(bp, NULL, 0); goto more; } } if (cam_iosched_has_work_flags(softc->cam_iosched, DA_WORK_TUR)) { cam_iosched_clr_work_flags(softc->cam_iosched, DA_WORK_TUR); cam_periph_release_locked(periph); /* XXX is this still valid? I think so but unverified */ } if ((bp->bio_flags & BIO_ORDERED) != 0 || (softc->flags & DA_FLAG_NEED_OTAG) != 0) { softc->flags &= ~DA_FLAG_NEED_OTAG; softc->flags |= DA_FLAG_WAS_OTAG; tag_code = MSG_ORDERED_Q_TAG; } else { tag_code = MSG_SIMPLE_Q_TAG; } switch (bp->bio_cmd) { case BIO_WRITE: softc->flags |= DA_FLAG_DIRTY; /* FALLTHROUGH */ case BIO_READ: scsi_read_write(&start_ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/tag_code, /*read_op*/(bp->bio_cmd == BIO_READ ? SCSI_RW_READ : SCSI_RW_WRITE) | ((bp->bio_flags & BIO_UNMAPPED) != 0 ? SCSI_RW_BIO : 0), /*byte2*/0, softc->minimum_cmd_size, /*lba*/bp->bio_pblkno, /*block_count*/bp->bio_bcount / softc->params.secsize, /*data_ptr*/ (bp->bio_flags & BIO_UNMAPPED) != 0 ? (void *)bp : bp->bio_data, /*dxfer_len*/ bp->bio_bcount, /*sense_len*/SSD_FULL_SIZE, da_default_timeout * 1000); break; case BIO_FLUSH: /* * BIO_FLUSH doesn't currently communicate * range data, so we synchronize the cache * over the whole disk. We also force * ordered tag semantics the flush applies * to all previously queued I/O. */ scsi_synchronize_cache(&start_ccb->csio, /*retries*/1, /*cbfcnp*/dadone, MSG_ORDERED_Q_TAG, /*begin_lba*/0, /*lb_count*/0, SSD_FULL_SIZE, da_default_timeout*1000); break; } start_ccb->ccb_h.ccb_state = DA_CCB_BUFFER_IO; start_ccb->ccb_h.flags |= CAM_UNLOCKED; + start_ccb->ccb_h.softtimeout = sbttotv(da_default_softtimeout); out: LIST_INSERT_HEAD(&softc->pending_ccbs, &start_ccb->ccb_h, periph_links.le); /* We expect a unit attention from this device */ if ((softc->flags & DA_FLAG_RETRY_UA) != 0) { start_ccb->ccb_h.ccb_state |= DA_CCB_RETRY_UA; softc->flags &= ~DA_FLAG_RETRY_UA; } start_ccb->ccb_h.ccb_bp = bp; softc->refcount++; cam_periph_unlock(periph); xpt_action(start_ccb); cam_periph_lock(periph); softc->refcount--; /* May have more work to do, so ensure we stay scheduled */ daschedule(periph); break; } case DA_STATE_PROBE_RC: { struct scsi_read_capacity_data *rcap; rcap = (struct scsi_read_capacity_data *) malloc(sizeof(*rcap), M_SCSIDA, M_NOWAIT|M_ZERO); if (rcap == NULL) { printf("dastart: Couldn't malloc read_capacity data\n"); /* da_free_periph??? */ break; } scsi_read_capacity(&start_ccb->csio, /*retries*/da_retry_count, dadone, MSG_SIMPLE_Q_TAG, rcap, SSD_FULL_SIZE, /*timeout*/5000); start_ccb->ccb_h.ccb_bp = NULL; start_ccb->ccb_h.ccb_state = DA_CCB_PROBE_RC; xpt_action(start_ccb); break; } case DA_STATE_PROBE_RC16: { struct scsi_read_capacity_data_long *rcaplong; rcaplong = (struct scsi_read_capacity_data_long *) malloc(sizeof(*rcaplong), M_SCSIDA, M_NOWAIT|M_ZERO); if (rcaplong == NULL) { printf("dastart: Couldn't malloc read_capacity data\n"); /* da_free_periph??? */ break; } scsi_read_capacity_16(&start_ccb->csio, /*retries*/ da_retry_count, /*cbfcnp*/ dadone, /*tag_action*/ MSG_SIMPLE_Q_TAG, /*lba*/ 0, /*reladr*/ 0, /*pmi*/ 0, /*rcap_buf*/ (uint8_t *)rcaplong, /*rcap_buf_len*/ sizeof(*rcaplong), /*sense_len*/ SSD_FULL_SIZE, /*timeout*/ da_default_timeout * 1000); start_ccb->ccb_h.ccb_bp = NULL; start_ccb->ccb_h.ccb_state = DA_CCB_PROBE_RC16; xpt_action(start_ccb); break; } case DA_STATE_PROBE_LBP: { struct scsi_vpd_logical_block_prov *lbp; if (!scsi_vpd_supported_page(periph, SVPD_LBP)) { /* * If we get here we don't support any SBC-3 delete * methods with UNMAP as the Logical Block Provisioning * VPD page support is required for devices which * support it according to T10/1799-D Revision 31 * however older revisions of the spec don't mandate * this so we currently don't remove these methods * from the available set. */ softc->state = DA_STATE_PROBE_BLK_LIMITS; goto skipstate; } lbp = (struct scsi_vpd_logical_block_prov *) malloc(sizeof(*lbp), M_SCSIDA, M_NOWAIT|M_ZERO); if (lbp == NULL) { printf("dastart: Couldn't malloc lbp data\n"); /* da_free_periph??? */ break; } scsi_inquiry(&start_ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/MSG_SIMPLE_Q_TAG, /*inq_buf*/(u_int8_t *)lbp, /*inq_len*/sizeof(*lbp), /*evpd*/TRUE, /*page_code*/SVPD_LBP, /*sense_len*/SSD_MIN_SIZE, /*timeout*/da_default_timeout * 1000); start_ccb->ccb_h.ccb_bp = NULL; start_ccb->ccb_h.ccb_state = DA_CCB_PROBE_LBP; xpt_action(start_ccb); break; } case DA_STATE_PROBE_BLK_LIMITS: { struct scsi_vpd_block_limits *block_limits; if (!scsi_vpd_supported_page(periph, SVPD_BLOCK_LIMITS)) { /* Not supported skip to next probe */ softc->state = DA_STATE_PROBE_BDC; goto skipstate; } block_limits = (struct scsi_vpd_block_limits *) malloc(sizeof(*block_limits), M_SCSIDA, M_NOWAIT|M_ZERO); if (block_limits == NULL) { printf("dastart: Couldn't malloc block_limits data\n"); /* da_free_periph??? */ break; } scsi_inquiry(&start_ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/MSG_SIMPLE_Q_TAG, /*inq_buf*/(u_int8_t *)block_limits, /*inq_len*/sizeof(*block_limits), /*evpd*/TRUE, /*page_code*/SVPD_BLOCK_LIMITS, /*sense_len*/SSD_MIN_SIZE, /*timeout*/da_default_timeout * 1000); start_ccb->ccb_h.ccb_bp = NULL; start_ccb->ccb_h.ccb_state = DA_CCB_PROBE_BLK_LIMITS; xpt_action(start_ccb); break; } case DA_STATE_PROBE_BDC: { struct scsi_vpd_block_characteristics *bdc; if (!scsi_vpd_supported_page(periph, SVPD_BDC)) { softc->state = DA_STATE_PROBE_ATA; goto skipstate; } bdc = (struct scsi_vpd_block_characteristics *) malloc(sizeof(*bdc), M_SCSIDA, M_NOWAIT|M_ZERO); if (bdc == NULL) { printf("dastart: Couldn't malloc bdc data\n"); /* da_free_periph??? */ break; } scsi_inquiry(&start_ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/MSG_SIMPLE_Q_TAG, /*inq_buf*/(u_int8_t *)bdc, /*inq_len*/sizeof(*bdc), /*evpd*/TRUE, /*page_code*/SVPD_BDC, /*sense_len*/SSD_MIN_SIZE, /*timeout*/da_default_timeout * 1000); start_ccb->ccb_h.ccb_bp = NULL; start_ccb->ccb_h.ccb_state = DA_CCB_PROBE_BDC; xpt_action(start_ccb); break; } case DA_STATE_PROBE_ATA: { struct ata_params *ata_params; if (!scsi_vpd_supported_page(periph, SVPD_ATA_INFORMATION)) { daprobedone(periph, start_ccb); break; } ata_params = (struct ata_params*) malloc(sizeof(*ata_params), M_SCSIDA, M_NOWAIT|M_ZERO); if (ata_params == NULL) { printf("dastart: Couldn't malloc ata_params data\n"); /* da_free_periph??? */ break; } scsi_ata_identify(&start_ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/MSG_SIMPLE_Q_TAG, /*data_ptr*/(u_int8_t *)ata_params, /*dxfer_len*/sizeof(*ata_params), /*sense_len*/SSD_FULL_SIZE, /*timeout*/da_default_timeout * 1000); start_ccb->ccb_h.ccb_bp = NULL; start_ccb->ccb_h.ccb_state = DA_CCB_PROBE_ATA; xpt_action(start_ccb); break; } } } /* * In each of the methods below, while its the caller's * responsibility to ensure the request will fit into a * single device request, we might have changed the delete * method due to the device incorrectly advertising either * its supported methods or limits. * * To prevent this causing further issues we validate the * against the methods limits, and warn which would * otherwise be unnecessary. */ static void da_delete_unmap(struct cam_periph *periph, union ccb *ccb, struct bio *bp) { struct da_softc *softc = (struct da_softc *)periph->softc;; struct bio *bp1; uint8_t *buf = softc->unmap_buf; uint64_t lba, lastlba = (uint64_t)-1; uint64_t totalcount = 0; uint64_t count; uint32_t lastcount = 0, c; uint32_t off, ranges = 0; /* * Currently this doesn't take the UNMAP * Granularity and Granularity Alignment * fields into account. * * This could result in both unoptimal unmap * requests as as well as UNMAP calls unmapping * fewer LBA's than requested. */ bzero(softc->unmap_buf, sizeof(softc->unmap_buf)); bp1 = bp; do { /* * Note: ada and da are different in how they store the * pending bp's in a trim. ada stores all of them in the * trim_req.bps. da stores all but the first one in the * delete_run_queue. ada then completes all the bps in * its adadone() loop. da completes all the bps in the * delete_run_queue in dadone, and relies on the biodone * after to complete. This should be reconciled since there's * no real reason to do it differently. XXX */ if (bp1 != bp) bioq_insert_tail(&softc->delete_run_queue, bp1); lba = bp1->bio_pblkno; count = bp1->bio_bcount / softc->params.secsize; /* Try to extend the previous range. */ if (lba == lastlba) { c = omin(count, UNMAP_RANGE_MAX - lastcount); lastcount += c; off = ((ranges - 1) * UNMAP_RANGE_SIZE) + UNMAP_HEAD_SIZE; scsi_ulto4b(lastcount, &buf[off + 8]); count -= c; lba +=c; totalcount += c; } while (count > 0) { c = omin(count, UNMAP_RANGE_MAX); if (totalcount + c > softc->unmap_max_lba || ranges >= softc->unmap_max_ranges) { xpt_print(periph->path, "%s issuing short delete %ld > %ld" "|| %d >= %d", da_delete_method_desc[softc->delete_method], totalcount + c, softc->unmap_max_lba, ranges, softc->unmap_max_ranges); break; } off = (ranges * UNMAP_RANGE_SIZE) + UNMAP_HEAD_SIZE; scsi_u64to8b(lba, &buf[off + 0]); scsi_ulto4b(c, &buf[off + 8]); lba += c; totalcount += c; ranges++; count -= c; lastcount = c; } lastlba = lba; bp1 = cam_iosched_next_trim(softc->cam_iosched); if (bp1 == NULL) break; if (ranges >= softc->unmap_max_ranges || totalcount + bp1->bio_bcount / softc->params.secsize > softc->unmap_max_lba) { cam_iosched_put_back_trim(softc->cam_iosched, bp1); break; } } while (1); scsi_ulto2b(ranges * 16 + 6, &buf[0]); scsi_ulto2b(ranges * 16, &buf[2]); scsi_unmap(&ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/MSG_SIMPLE_Q_TAG, /*byte2*/0, /*data_ptr*/ buf, /*dxfer_len*/ ranges * 16 + 8, /*sense_len*/SSD_FULL_SIZE, da_default_timeout * 1000); ccb->ccb_h.ccb_state = DA_CCB_DELETE; ccb->ccb_h.flags |= CAM_UNLOCKED; cam_iosched_submit_trim(softc->cam_iosched); } static void da_delete_trim(struct cam_periph *periph, union ccb *ccb, struct bio *bp) { struct da_softc *softc = (struct da_softc *)periph->softc; struct bio *bp1; uint8_t *buf = softc->unmap_buf; uint64_t lastlba = (uint64_t)-1; uint64_t count; uint64_t lba; uint32_t lastcount = 0, c, requestcount; int ranges = 0, off, block_count; bzero(softc->unmap_buf, sizeof(softc->unmap_buf)); bp1 = bp; do { if (bp1 != bp)//XXX imp XXX bioq_insert_tail(&softc->delete_run_queue, bp1); lba = bp1->bio_pblkno; count = bp1->bio_bcount / softc->params.secsize; requestcount = count; /* Try to extend the previous range. */ if (lba == lastlba) { c = omin(count, ATA_DSM_RANGE_MAX - lastcount); lastcount += c; off = (ranges - 1) * 8; buf[off + 6] = lastcount & 0xff; buf[off + 7] = (lastcount >> 8) & 0xff; count -= c; lba += c; } while (count > 0) { c = omin(count, ATA_DSM_RANGE_MAX); off = ranges * 8; buf[off + 0] = lba & 0xff; buf[off + 1] = (lba >> 8) & 0xff; buf[off + 2] = (lba >> 16) & 0xff; buf[off + 3] = (lba >> 24) & 0xff; buf[off + 4] = (lba >> 32) & 0xff; buf[off + 5] = (lba >> 40) & 0xff; buf[off + 6] = c & 0xff; buf[off + 7] = (c >> 8) & 0xff; lba += c; ranges++; count -= c; lastcount = c; if (count != 0 && ranges == softc->trim_max_ranges) { xpt_print(periph->path, "%s issuing short delete %ld > %ld\n", da_delete_method_desc[softc->delete_method], requestcount, (softc->trim_max_ranges - ranges) * ATA_DSM_RANGE_MAX); break; } } lastlba = lba; bp1 = cam_iosched_next_trim(softc->cam_iosched); if (bp1 == NULL) break; if (bp1->bio_bcount / softc->params.secsize > (softc->trim_max_ranges - ranges) * ATA_DSM_RANGE_MAX) { cam_iosched_put_back_trim(softc->cam_iosched, bp1); break; } } while (1); block_count = (ranges + ATA_DSM_BLK_RANGES - 1) / ATA_DSM_BLK_RANGES; scsi_ata_trim(&ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/MSG_SIMPLE_Q_TAG, block_count, /*data_ptr*/buf, /*dxfer_len*/block_count * ATA_DSM_BLK_SIZE, /*sense_len*/SSD_FULL_SIZE, da_default_timeout * 1000); ccb->ccb_h.ccb_state = DA_CCB_DELETE; ccb->ccb_h.flags |= CAM_UNLOCKED; cam_iosched_submit_trim(softc->cam_iosched); } /* * We calculate ws_max_blks here based off d_delmaxsize instead * of using softc->ws_max_blks as it is absolute max for the * device not the protocol max which may well be lower. */ static void da_delete_ws(struct cam_periph *periph, union ccb *ccb, struct bio *bp) { struct da_softc *softc; struct bio *bp1; uint64_t ws_max_blks; uint64_t lba; uint64_t count; /* forward compat with WS32 */ softc = (struct da_softc *)periph->softc; ws_max_blks = softc->disk->d_delmaxsize / softc->params.secsize; lba = bp->bio_pblkno; count = 0; bp1 = bp; do { if (bp1 != bp)//XXX imp XXX bioq_insert_tail(&softc->delete_run_queue, bp1); count += bp1->bio_bcount / softc->params.secsize; if (count > ws_max_blks) { xpt_print(periph->path, "%s issuing short delete %ld > %ld\n", da_delete_method_desc[softc->delete_method], count, ws_max_blks); count = omin(count, ws_max_blks); break; } bp1 = cam_iosched_next_trim(softc->cam_iosched); if (bp1 == NULL) break; if (lba + count != bp1->bio_pblkno || count + bp1->bio_bcount / softc->params.secsize > ws_max_blks) { cam_iosched_put_back_trim(softc->cam_iosched, bp1); break; } } while (1); scsi_write_same(&ccb->csio, /*retries*/da_retry_count, /*cbfcnp*/dadone, /*tag_action*/MSG_SIMPLE_Q_TAG, /*byte2*/softc->delete_method == DA_DELETE_ZERO ? 0 : SWS_UNMAP, softc->delete_method == DA_DELETE_WS16 ? 16 : 10, /*lba*/lba, /*block_count*/count, /*data_ptr*/ __DECONST(void *, zero_region), /*dxfer_len*/ softc->params.secsize, /*sense_len*/SSD_FULL_SIZE, da_default_timeout * 1000); ccb->ccb_h.ccb_state = DA_CCB_DELETE; ccb->ccb_h.flags |= CAM_UNLOCKED; cam_iosched_submit_trim(softc->cam_iosched); } static int cmd6workaround(union ccb *ccb) { struct scsi_rw_6 cmd6; struct scsi_rw_10 *cmd10; struct da_softc *softc; u_int8_t *cdb; struct bio *bp; int frozen; cdb = ccb->csio.cdb_io.cdb_bytes; softc = (struct da_softc *)xpt_path_periph(ccb->ccb_h.path)->softc; if (ccb->ccb_h.ccb_state == DA_CCB_DELETE) { da_delete_methods old_method = softc->delete_method; /* * Typically there are two reasons for failure here * 1. Delete method was detected as supported but isn't * 2. Delete failed due to invalid params e.g. too big * * While we will attempt to choose an alternative delete method * this may result in short deletes if the existing delete * requests from geom are big for the new method choosen. * * This method assumes that the error which triggered this * will not retry the io otherwise a panic will occur */ dadeleteflag(softc, old_method, 0); dadeletemethodchoose(softc, DA_DELETE_DISABLE); if (softc->delete_method == DA_DELETE_DISABLE) xpt_print(ccb->ccb_h.path, "%s failed, disabling BIO_DELETE\n", da_delete_method_desc[old_method]); else xpt_print(ccb->ccb_h.path, "%s failed, switching to %s BIO_DELETE\n", da_delete_method_desc[old_method], da_delete_method_desc[softc->delete_method]); while ((bp = bioq_takefirst(&softc->delete_run_queue)) != NULL) cam_iosched_queue_work(softc->cam_iosched, bp); cam_iosched_queue_work(softc->cam_iosched, (struct bio *)ccb->ccb_h.ccb_bp); ccb->ccb_h.ccb_bp = NULL; return (0); } /* Detect unsupported PREVENT ALLOW MEDIUM REMOVAL. */ if ((ccb->ccb_h.flags & CAM_CDB_POINTER) == 0 && (*cdb == PREVENT_ALLOW) && (softc->quirks & DA_Q_NO_PREVENT) == 0) { if (bootverbose) xpt_print(ccb->ccb_h.path, "PREVENT ALLOW MEDIUM REMOVAL not supported.\n"); softc->quirks |= DA_Q_NO_PREVENT; return (0); } /* Detect unsupported SYNCHRONIZE CACHE(10). */ if ((ccb->ccb_h.flags & CAM_CDB_POINTER) == 0 && (*cdb == SYNCHRONIZE_CACHE) && (softc->quirks & DA_Q_NO_SYNC_CACHE) == 0) { if (bootverbose) xpt_print(ccb->ccb_h.path, "SYNCHRONIZE CACHE(10) not supported.\n"); softc->quirks |= DA_Q_NO_SYNC_CACHE; softc->disk->d_flags &= ~DISKFLAG_CANFLUSHCACHE; return (0); } /* Translation only possible if CDB is an array and cmd is R/W6 */ if ((ccb->ccb_h.flags & CAM_CDB_POINTER) != 0 || (*cdb != READ_6 && *cdb != WRITE_6)) return 0; xpt_print(ccb->ccb_h.path, "READ(6)/WRITE(6) not supported, " "increasing minimum_cmd_size to 10.\n"); softc->minimum_cmd_size = 10; bcopy(cdb, &cmd6, sizeof(struct scsi_rw_6)); cmd10 = (struct scsi_rw_10 *)cdb; cmd10->opcode = (cmd6.opcode == READ_6) ? READ_10 : WRITE_10; cmd10->byte2 = 0; scsi_ulto4b(scsi_3btoul(cmd6.addr), cmd10->addr); cmd10->reserved = 0; scsi_ulto2b(cmd6.length, cmd10->length); cmd10->control = cmd6.control; ccb->csio.cdb_len = sizeof(*cmd10); /* Requeue request, unfreezing queue if necessary */ frozen = (ccb->ccb_h.status & CAM_DEV_QFRZN) != 0; ccb->ccb_h.status = CAM_REQUEUE_REQ; xpt_action(ccb); if (frozen) { cam_release_devq(ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } return (ERESTART); } static void dadone(struct cam_periph *periph, union ccb *done_ccb) { struct da_softc *softc; struct ccb_scsiio *csio; u_int32_t priority; da_ccb_state state; softc = (struct da_softc *)periph->softc; priority = done_ccb->ccb_h.pinfo.priority; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("dadone\n")); csio = &done_ccb->csio; state = csio->ccb_h.ccb_state & DA_CCB_TYPE_MASK; switch (state) { case DA_CCB_BUFFER_IO: case DA_CCB_DELETE: { struct bio *bp, *bp1; - int need_sched; cam_periph_lock(periph); bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { int error; int sf; if ((csio->ccb_h.ccb_state & DA_CCB_RETRY_UA) != 0) sf = SF_RETRY_UA; else sf = 0; error = daerror(done_ccb, CAM_RETRY_SELTO, sf); if (error == ERESTART) { /* * A retry was scheduled, so * just return. */ cam_periph_unlock(periph); return; } bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if (error != 0) { int queued_error; /* * return all queued I/O with EIO, so that * the client can retry these I/Os in the * proper order should it attempt to recover. */ queued_error = EIO; if (error == ENXIO && (softc->flags & DA_FLAG_PACK_INVALID)== 0) { /* * Catastrophic error. Mark our pack as * invalid. */ /* * XXX See if this is really a media * XXX change first? */ xpt_print(periph->path, "Invalidating pack\n"); softc->flags |= DA_FLAG_PACK_INVALID; +#ifdef CAM_IO_STATS + softc->invalidations++; +#endif queued_error = ENXIO; } cam_iosched_flush(softc->cam_iosched, NULL, queued_error); if (bp != NULL) { bp->bio_error = error; bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; } } else if (bp != NULL) { if (state == DA_CCB_DELETE) bp->bio_resid = 0; else bp->bio_resid = csio->resid; bp->bio_error = 0; if (bp->bio_resid != 0) bp->bio_flags |= BIO_ERROR; } if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } else if (bp != NULL) { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) panic("REQ_CMP with QFRZN"); if (state == DA_CCB_DELETE) bp->bio_resid = 0; else bp->bio_resid = csio->resid; if (csio->resid > 0) bp->bio_flags |= BIO_ERROR; if (softc->error_inject != 0) { bp->bio_error = softc->error_inject; bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; softc->error_inject = 0; } } LIST_REMOVE(&done_ccb->ccb_h, periph_links.le); if (LIST_EMPTY(&softc->pending_ccbs)) softc->flags |= DA_FLAG_WAS_OTAG; - need_sched = cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); + cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); xpt_release_ccb(done_ccb); if (state == DA_CCB_DELETE) { TAILQ_HEAD(, bio) queue; TAILQ_INIT(&queue); TAILQ_CONCAT(&queue, &softc->delete_run_queue.queue, bio_queue); softc->delete_run_queue.insert_point = NULL; /* * Normally, the xpt_release_ccb() above would make sure * that when we have more work to do, that work would * get kicked off. However, we specifically keep - * delete running set to 0 before the call above to + * delete_running set to 0 before the call above to * allow other I/O to progress when many BIO_DELETE - * requests are pushed down. We set delete running to 0 + * requests are pushed down. We set delete_running to 0 * and call daschedule again so that we don't stall if * there are no other I/Os pending apart from BIO_DELETEs. */ cam_iosched_trim_done(softc->cam_iosched); daschedule(periph); cam_periph_unlock(periph); while ((bp1 = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, bp1, bio_queue); bp1->bio_error = bp->bio_error; if (bp->bio_flags & BIO_ERROR) { bp1->bio_flags |= BIO_ERROR; bp1->bio_resid = bp1->bio_bcount; } else bp1->bio_resid = 0; biodone(bp1); } } else { - if (need_sched) - daschedule(periph); + daschedule(periph); cam_periph_unlock(periph); } if (bp != NULL) biodone(bp); return; } case DA_CCB_PROBE_RC: case DA_CCB_PROBE_RC16: { struct scsi_read_capacity_data *rdcap; struct scsi_read_capacity_data_long *rcaplong; char announce_buf[80]; int lbp; lbp = 0; rdcap = NULL; rcaplong = NULL; if (state == DA_CCB_PROBE_RC) rdcap =(struct scsi_read_capacity_data *)csio->data_ptr; else rcaplong = (struct scsi_read_capacity_data_long *) csio->data_ptr; if ((csio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { struct disk_params *dp; uint32_t block_size; uint64_t maxsector; u_int lalba; /* Lowest aligned LBA. */ if (state == DA_CCB_PROBE_RC) { block_size = scsi_4btoul(rdcap->length); maxsector = scsi_4btoul(rdcap->addr); lalba = 0; /* * According to SBC-2, if the standard 10 * byte READ CAPACITY command returns 2^32, * we should issue the 16 byte version of * the command, since the device in question * has more sectors than can be represented * with the short version of the command. */ if (maxsector == 0xffffffff) { free(rdcap, M_SCSIDA); xpt_release_ccb(done_ccb); softc->state = DA_STATE_PROBE_RC16; xpt_schedule(periph, priority); return; } } else { block_size = scsi_4btoul(rcaplong->length); maxsector = scsi_8btou64(rcaplong->addr); lalba = scsi_2btoul(rcaplong->lalba_lbp); } /* * Because GEOM code just will panic us if we * give them an 'illegal' value we'll avoid that * here. */ if (block_size == 0) { block_size = 512; if (maxsector == 0) maxsector = -1; } if (block_size >= MAXPHYS) { xpt_print(periph->path, "unsupportable block size %ju\n", (uintmax_t) block_size); announce_buf[0] = '\0'; cam_periph_invalidate(periph); } else { /* * We pass rcaplong into dasetgeom(), * because it will only use it if it is * non-NULL. */ dasetgeom(periph, block_size, maxsector, rcaplong, sizeof(*rcaplong)); lbp = (lalba & SRC16_LBPME_A); dp = &softc->params; snprintf(announce_buf, sizeof(announce_buf), "%juMB (%ju %u byte sectors: %dH %dS/T " "%dC)", (uintmax_t) (((uintmax_t)dp->secsize * dp->sectors) / (1024*1024)), (uintmax_t)dp->sectors, dp->secsize, dp->heads, dp->secs_per_track, dp->cylinders); } } else { int error; announce_buf[0] = '\0'; /* * Retry any UNIT ATTENTION type errors. They * are expected at boot. */ error = daerror(done_ccb, CAM_RETRY_SELTO, SF_RETRY_UA|SF_NO_PRINT); if (error == ERESTART) { /* * A retry was scheuled, so * just return. */ return; } else if (error != 0) { int asc, ascq; int sense_key, error_code; int have_sense; cam_status status; struct ccb_getdev cgd; /* Don't wedge this device's queue */ status = done_ccb->ccb_h.status; if ((status & CAM_DEV_QFRZN) != 0) cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); xpt_setup_ccb(&cgd.ccb_h, done_ccb->ccb_h.path, CAM_PRIORITY_NORMAL); cgd.ccb_h.func_code = XPT_GDEV_TYPE; xpt_action((union ccb *)&cgd); if (scsi_extract_sense_ccb(done_ccb, &error_code, &sense_key, &asc, &ascq)) have_sense = TRUE; else have_sense = FALSE; /* * If we tried READ CAPACITY(16) and failed, * fallback to READ CAPACITY(10). */ if ((state == DA_CCB_PROBE_RC16) && (softc->flags & DA_FLAG_CAN_RC16) && (((csio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_INVALID) || ((have_sense) && (error_code == SSD_CURRENT_ERROR) && (sense_key == SSD_KEY_ILLEGAL_REQUEST)))) { softc->flags &= ~DA_FLAG_CAN_RC16; free(rdcap, M_SCSIDA); xpt_release_ccb(done_ccb); softc->state = DA_STATE_PROBE_RC; xpt_schedule(periph, priority); return; } else /* * Attach to anything that claims to be a * direct access or optical disk device, * as long as it doesn't return a "Logical * unit not supported" (0x25) error. */ if ((have_sense) && (asc != 0x25) && (error_code == SSD_CURRENT_ERROR)) { const char *sense_key_desc; const char *asc_desc; dasetgeom(periph, 512, -1, NULL, 0); scsi_sense_desc(sense_key, asc, ascq, &cgd.inq_data, &sense_key_desc, &asc_desc); snprintf(announce_buf, sizeof(announce_buf), "Attempt to query device " "size failed: %s, %s", sense_key_desc, asc_desc); } else { if (have_sense) scsi_sense_print( &done_ccb->csio); else { xpt_print(periph->path, "got CAM status %#x\n", done_ccb->ccb_h.status); } xpt_print(periph->path, "fatal error, " "failed to attach to device\n"); /* * Free up resources. */ cam_periph_invalidate(periph); } } } free(csio->data_ptr, M_SCSIDA); if (announce_buf[0] != '\0' && ((softc->flags & DA_FLAG_ANNOUNCED) == 0)) { /* * Create our sysctl variables, now that we know * we have successfully attached. */ /* increase the refcount */ if (cam_periph_acquire(periph) == CAM_REQ_CMP) { taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task); xpt_announce_periph(periph, announce_buf); xpt_announce_quirks(periph, softc->quirks, DA_Q_BIT_STRING); } else { xpt_print(periph->path, "fatal error, " "could not acquire reference count\n"); } } /* We already probed the device. */ if (softc->flags & DA_FLAG_PROBED) { daprobedone(periph, done_ccb); return; } /* Ensure re-probe doesn't see old delete. */ softc->delete_available = 0; if (lbp && (softc->quirks & DA_Q_NO_UNMAP) == 0) { /* * Based on older SBC-3 spec revisions * any of the UNMAP methods "may" be * available via LBP given this flag so * we flag all of them as availble and * then remove those which further * probes confirm aren't available * later. * * We could also check readcap(16) p_type * flag to exclude one or more invalid * write same (X) types here */ dadeleteflag(softc, DA_DELETE_WS16, 1); dadeleteflag(softc, DA_DELETE_WS10, 1); dadeleteflag(softc, DA_DELETE_ZERO, 1); dadeleteflag(softc, DA_DELETE_UNMAP, 1); xpt_release_ccb(done_ccb); softc->state = DA_STATE_PROBE_LBP; xpt_schedule(periph, priority); return; } xpt_release_ccb(done_ccb); softc->state = DA_STATE_PROBE_BDC; xpt_schedule(periph, priority); return; } case DA_CCB_PROBE_LBP: { struct scsi_vpd_logical_block_prov *lbp; lbp = (struct scsi_vpd_logical_block_prov *)csio->data_ptr; if ((csio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { /* * T10/1799-D Revision 31 states at least one of these * must be supported but we don't currently enforce this. */ dadeleteflag(softc, DA_DELETE_WS16, (lbp->flags & SVPD_LBP_WS16)); dadeleteflag(softc, DA_DELETE_WS10, (lbp->flags & SVPD_LBP_WS10)); dadeleteflag(softc, DA_DELETE_ZERO, (lbp->flags & SVPD_LBP_WS10)); dadeleteflag(softc, DA_DELETE_UNMAP, (lbp->flags & SVPD_LBP_UNMAP)); } else { int error; error = daerror(done_ccb, CAM_RETRY_SELTO, SF_RETRY_UA|SF_NO_PRINT); if (error == ERESTART) return; else if (error != 0) { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { /* Don't wedge this device's queue */ cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } /* * Failure indicates we don't support any SBC-3 * delete methods with UNMAP */ } } free(lbp, M_SCSIDA); xpt_release_ccb(done_ccb); softc->state = DA_STATE_PROBE_BLK_LIMITS; xpt_schedule(periph, priority); return; } case DA_CCB_PROBE_BLK_LIMITS: { struct scsi_vpd_block_limits *block_limits; block_limits = (struct scsi_vpd_block_limits *)csio->data_ptr; if ((csio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { uint32_t max_txfer_len = scsi_4btoul( block_limits->max_txfer_len); uint32_t max_unmap_lba_cnt = scsi_4btoul( block_limits->max_unmap_lba_cnt); uint32_t max_unmap_blk_cnt = scsi_4btoul( block_limits->max_unmap_blk_cnt); uint64_t ws_max_blks = scsi_8btou64( block_limits->max_write_same_length); if (max_txfer_len != 0) { softc->disk->d_maxsize = MIN(softc->maxio, (off_t)max_txfer_len * softc->params.secsize); } /* * We should already support UNMAP but we check lba * and block count to be sure */ if (max_unmap_lba_cnt != 0x00L && max_unmap_blk_cnt != 0x00L) { softc->unmap_max_lba = max_unmap_lba_cnt; softc->unmap_max_ranges = min(max_unmap_blk_cnt, UNMAP_MAX_RANGES); } else { /* * Unexpected UNMAP limits which means the * device doesn't actually support UNMAP */ dadeleteflag(softc, DA_DELETE_UNMAP, 0); } if (ws_max_blks != 0x00L) softc->ws_max_blks = ws_max_blks; } else { int error; error = daerror(done_ccb, CAM_RETRY_SELTO, SF_RETRY_UA|SF_NO_PRINT); if (error == ERESTART) return; else if (error != 0) { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { /* Don't wedge this device's queue */ cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } /* * Failure here doesn't mean UNMAP is not * supported as this is an optional page. */ softc->unmap_max_lba = 1; softc->unmap_max_ranges = 1; } } free(block_limits, M_SCSIDA); xpt_release_ccb(done_ccb); softc->state = DA_STATE_PROBE_BDC; xpt_schedule(periph, priority); return; } case DA_CCB_PROBE_BDC: { struct scsi_vpd_block_characteristics *bdc; bdc = (struct scsi_vpd_block_characteristics *)csio->data_ptr; if ((csio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { /* * Disable queue sorting for non-rotational media * by default. */ u_int16_t old_rate = softc->disk->d_rotation_rate; softc->disk->d_rotation_rate = scsi_2btoul(bdc->medium_rotation_rate); if (softc->disk->d_rotation_rate == SVPD_BDC_RATE_NON_ROTATING) { cam_iosched_set_sort_queue(softc->cam_iosched, 0); + softc->rotating = 0; } if (softc->disk->d_rotation_rate != old_rate) { disk_attr_changed(softc->disk, "GEOM::rotation_rate", M_NOWAIT); } } else { int error; error = daerror(done_ccb, CAM_RETRY_SELTO, SF_RETRY_UA|SF_NO_PRINT); if (error == ERESTART) return; else if (error != 0) { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { /* Don't wedge this device's queue */ cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } } } free(bdc, M_SCSIDA); xpt_release_ccb(done_ccb); softc->state = DA_STATE_PROBE_ATA; xpt_schedule(periph, priority); return; } case DA_CCB_PROBE_ATA: { int i; struct ata_params *ata_params; int16_t *ptr; ata_params = (struct ata_params *)csio->data_ptr; ptr = (uint16_t *)ata_params; if ((csio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { uint16_t old_rate; for (i = 0; i < sizeof(*ata_params) / 2; i++) ptr[i] = le16toh(ptr[i]); if (ata_params->support_dsm & ATA_SUPPORT_DSM_TRIM && (softc->quirks & DA_Q_NO_UNMAP) == 0) { dadeleteflag(softc, DA_DELETE_ATA_TRIM, 1); if (ata_params->max_dsm_blocks != 0) softc->trim_max_ranges = min( softc->trim_max_ranges, ata_params->max_dsm_blocks * ATA_DSM_BLK_RANGES); } /* * Disable queue sorting for non-rotational media * by default. */ old_rate = softc->disk->d_rotation_rate; softc->disk->d_rotation_rate = ata_params->media_rotation_rate; if (softc->disk->d_rotation_rate == ATA_RATE_NON_ROTATING) { cam_iosched_set_sort_queue(softc->cam_iosched, 0); + softc->rotating = 0; } - if (softc->disk->d_rotation_rate != old_rate) { disk_attr_changed(softc->disk, "GEOM::rotation_rate", M_NOWAIT); } } else { int error; error = daerror(done_ccb, CAM_RETRY_SELTO, SF_RETRY_UA|SF_NO_PRINT); if (error == ERESTART) return; else if (error != 0) { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { /* Don't wedge this device's queue */ cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } } } free(ata_params, M_SCSIDA); daprobedone(periph, done_ccb); return; } case DA_CCB_DUMP: /* No-op. We're polling */ return; case DA_CCB_TUR: { if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { if (daerror(done_ccb, CAM_RETRY_SELTO, SF_RETRY_UA | SF_NO_RECOVERY | SF_NO_PRINT) == ERESTART) return; if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } xpt_release_ccb(done_ccb); cam_periph_release_locked(periph); return; } default: break; } xpt_release_ccb(done_ccb); } static void dareprobe(struct cam_periph *periph) { struct da_softc *softc; cam_status status; softc = (struct da_softc *)periph->softc; /* Probe in progress; don't interfere. */ if (softc->state != DA_STATE_NORMAL) return; status = cam_periph_acquire(periph); KASSERT(status == CAM_REQ_CMP, ("dareprobe: cam_periph_acquire failed")); if (softc->flags & DA_FLAG_CAN_RC16) softc->state = DA_STATE_PROBE_RC16; else softc->state = DA_STATE_PROBE_RC; xpt_schedule(periph, CAM_PRIORITY_DEV); } static int daerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags) { struct da_softc *softc; struct cam_periph *periph; int error, error_code, sense_key, asc, ascq; periph = xpt_path_periph(ccb->ccb_h.path); softc = (struct da_softc *)periph->softc; /* * Automatically detect devices that do not support * READ(6)/WRITE(6) and upgrade to using 10 byte cdbs. */ error = 0; if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_INVALID) { error = cmd6workaround(ccb); } else if (scsi_extract_sense_ccb(ccb, &error_code, &sense_key, &asc, &ascq)) { if (sense_key == SSD_KEY_ILLEGAL_REQUEST) error = cmd6workaround(ccb); /* * If the target replied with CAPACITY DATA HAS CHANGED UA, * query the capacity and notify upper layers. */ else if (sense_key == SSD_KEY_UNIT_ATTENTION && asc == 0x2A && ascq == 0x09) { xpt_print(periph->path, "Capacity data has changed\n"); softc->flags &= ~DA_FLAG_PROBED; dareprobe(periph); sense_flags |= SF_NO_PRINT; } else if (sense_key == SSD_KEY_UNIT_ATTENTION && asc == 0x28 && ascq == 0x00) { softc->flags &= ~DA_FLAG_PROBED; disk_media_changed(softc->disk, M_NOWAIT); } else if (sense_key == SSD_KEY_UNIT_ATTENTION && asc == 0x3F && ascq == 0x03) { xpt_print(periph->path, "INQUIRY data has changed\n"); softc->flags &= ~DA_FLAG_PROBED; dareprobe(periph); sense_flags |= SF_NO_PRINT; } else if (sense_key == SSD_KEY_NOT_READY && asc == 0x3a && (softc->flags & DA_FLAG_PACK_INVALID) == 0) { softc->flags |= DA_FLAG_PACK_INVALID; disk_media_gone(softc->disk, M_NOWAIT); } } if (error == ERESTART) return (ERESTART); + + switch (ccb->ccb_h.status & CAM_STATUS_MASK) { + case CAM_CMD_TIMEOUT: +#ifdef CAM_IO_STATS + softc->timeouts++; +#endif + break; + case CAM_REQ_ABORTED: + case CAM_REQ_CMP_ERR: + case CAM_REQ_TERMIO: + case CAM_UNREC_HBA_ERROR: + case CAM_DATA_RUN_ERR: +#ifdef CAM_IO_STATS + softc->errors++; +#endif + break; + default: + break; + } /* * XXX * Until we have a better way of doing pack validation, * don't treat UAs as errors. */ sense_flags |= SF_RETRY_UA; if (softc->quirks & DA_Q_RETRY_BUSY) sense_flags |= SF_RETRY_BUSY; return(cam_periph_error(ccb, cam_flags, sense_flags, &softc->saved_ccb)); } static void damediapoll(void *arg) { struct cam_periph *periph = arg; struct da_softc *softc = periph->softc; if (!cam_iosched_has_work_flags(softc->cam_iosched, DA_WORK_TUR) && LIST_EMPTY(&softc->pending_ccbs)) { if (cam_periph_acquire(periph) == CAM_REQ_CMP) { cam_iosched_set_work_flags(softc->cam_iosched, DA_WORK_TUR); daschedule(periph); } } /* Queue us up again */ if (da_poll_period != 0) callout_schedule(&softc->mediapoll_c, da_poll_period * hz); } static void daprevent(struct cam_periph *periph, int action) { struct da_softc *softc; union ccb *ccb; int error; softc = (struct da_softc *)periph->softc; if (((action == PR_ALLOW) && (softc->flags & DA_FLAG_PACK_LOCKED) == 0) || ((action == PR_PREVENT) && (softc->flags & DA_FLAG_PACK_LOCKED) != 0)) { return; } ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); scsi_prevent(&ccb->csio, /*retries*/1, /*cbcfp*/dadone, MSG_SIMPLE_Q_TAG, action, SSD_FULL_SIZE, 5000); error = cam_periph_runccb(ccb, daerror, CAM_RETRY_SELTO, SF_RETRY_UA | SF_NO_PRINT, softc->disk->d_devstat); if (error == 0) { if (action == PR_ALLOW) softc->flags &= ~DA_FLAG_PACK_LOCKED; else softc->flags |= DA_FLAG_PACK_LOCKED; } xpt_release_ccb(ccb); } static void dasetgeom(struct cam_periph *periph, uint32_t block_len, uint64_t maxsector, struct scsi_read_capacity_data_long *rcaplong, size_t rcap_len) { struct ccb_calc_geometry ccg; struct da_softc *softc; struct disk_params *dp; u_int lbppbe, lalba; int error; softc = (struct da_softc *)periph->softc; dp = &softc->params; dp->secsize = block_len; dp->sectors = maxsector + 1; if (rcaplong != NULL) { lbppbe = rcaplong->prot_lbppbe & SRC16_LBPPBE; lalba = scsi_2btoul(rcaplong->lalba_lbp); lalba &= SRC16_LALBA_A; } else { lbppbe = 0; lalba = 0; } if (lbppbe > 0) { dp->stripesize = block_len << lbppbe; dp->stripeoffset = (dp->stripesize - block_len * lalba) % dp->stripesize; } else if (softc->quirks & DA_Q_4K) { dp->stripesize = 4096; dp->stripeoffset = 0; } else { dp->stripesize = 0; dp->stripeoffset = 0; } /* * Have the controller provide us with a geometry * for this disk. The only time the geometry * matters is when we boot and the controller * is the only one knowledgeable enough to come * up with something that will make this a bootable * device. */ xpt_setup_ccb(&ccg.ccb_h, periph->path, CAM_PRIORITY_NORMAL); ccg.ccb_h.func_code = XPT_CALC_GEOMETRY; ccg.block_size = dp->secsize; ccg.volume_size = dp->sectors; ccg.heads = 0; ccg.secs_per_track = 0; ccg.cylinders = 0; xpt_action((union ccb*)&ccg); if ((ccg.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { /* * We don't know what went wrong here- but just pick * a geometry so we don't have nasty things like divide * by zero. */ dp->heads = 255; dp->secs_per_track = 255; dp->cylinders = dp->sectors / (255 * 255); if (dp->cylinders == 0) { dp->cylinders = 1; } } else { dp->heads = ccg.heads; dp->secs_per_track = ccg.secs_per_track; dp->cylinders = ccg.cylinders; } /* * If the user supplied a read capacity buffer, and if it is * different than the previous buffer, update the data in the EDT. * If it's the same, we don't bother. This avoids sending an * update every time someone opens this device. */ if ((rcaplong != NULL) && (bcmp(rcaplong, &softc->rcaplong, min(sizeof(softc->rcaplong), rcap_len)) != 0)) { struct ccb_dev_advinfo cdai; xpt_setup_ccb(&cdai.ccb_h, periph->path, CAM_PRIORITY_NORMAL); cdai.ccb_h.func_code = XPT_DEV_ADVINFO; cdai.buftype = CDAI_TYPE_RCAPLONG; cdai.flags = CDAI_FLAG_STORE; cdai.bufsiz = rcap_len; cdai.buf = (uint8_t *)rcaplong; xpt_action((union ccb *)&cdai); if ((cdai.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(cdai.ccb_h.path, 0, 0, 0, FALSE); if (cdai.ccb_h.status != CAM_REQ_CMP) { xpt_print(periph->path, "%s: failed to set read " "capacity advinfo\n", __func__); /* Use cam_error_print() to decode the status */ cam_error_print((union ccb *)&cdai, CAM_ESF_CAM_STATUS, CAM_EPF_ALL); } else { bcopy(rcaplong, &softc->rcaplong, min(sizeof(softc->rcaplong), rcap_len)); } } softc->disk->d_sectorsize = softc->params.secsize; softc->disk->d_mediasize = softc->params.secsize * (off_t)softc->params.sectors; softc->disk->d_stripesize = softc->params.stripesize; softc->disk->d_stripeoffset = softc->params.stripeoffset; /* XXX: these are not actually "firmware" values, so they may be wrong */ softc->disk->d_fwsectors = softc->params.secs_per_track; softc->disk->d_fwheads = softc->params.heads; softc->disk->d_devstat->block_size = softc->params.secsize; softc->disk->d_devstat->flags &= ~DEVSTAT_BS_UNAVAILABLE; error = disk_resize(softc->disk, M_NOWAIT); if (error != 0) xpt_print(periph->path, "disk_resize(9) failed, error = %d\n", error); } static void dasendorderedtag(void *arg) { struct da_softc *softc = arg; if (da_send_ordered) { if (!LIST_EMPTY(&softc->pending_ccbs)) { if ((softc->flags & DA_FLAG_WAS_OTAG) == 0) softc->flags |= DA_FLAG_NEED_OTAG; softc->flags &= ~DA_FLAG_WAS_OTAG; } } /* Queue us up again */ callout_reset(&softc->sendordered_c, (da_default_timeout * hz) / DA_ORDEREDTAG_INTERVAL, dasendorderedtag, softc); } /* * Step through all DA peripheral drivers, and if the device is still open, * sync the disk cache to physical media. */ static void dashutdown(void * arg, int howto) { struct cam_periph *periph; struct da_softc *softc; union ccb *ccb; int error; CAM_PERIPH_FOREACH(periph, &dadriver) { softc = (struct da_softc *)periph->softc; if (SCHEDULER_STOPPED()) { /* If we paniced with the lock held, do not recurse. */ if (!cam_periph_owned(periph) && (softc->flags & DA_FLAG_OPEN)) { dadump(softc->disk, NULL, 0, 0, 0); } continue; } cam_periph_lock(periph); /* * We only sync the cache if the drive is still open, and * if the drive is capable of it.. */ if (((softc->flags & DA_FLAG_OPEN) == 0) || (softc->quirks & DA_Q_NO_SYNC_CACHE)) { cam_periph_unlock(periph); continue; } ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); scsi_synchronize_cache(&ccb->csio, /*retries*/0, /*cbfcnp*/dadone, MSG_SIMPLE_Q_TAG, /*begin_lba*/0, /* whole disk */ /*lb_count*/0, SSD_FULL_SIZE, 60 * 60 * 1000); error = cam_periph_runccb(ccb, daerror, /*cam_flags*/0, /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY | SF_QUIET_IR, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); xpt_release_ccb(ccb); cam_periph_unlock(periph); } } #else /* !_KERNEL */ /* * XXX These are only left out of the kernel build to silence warnings. If, * for some reason these functions are used in the kernel, the ifdefs should * be moved so they are included both in the kernel and userland. */ void scsi_format_unit(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t byte2, u_int16_t ileave, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len, u_int32_t timeout) { struct scsi_format_unit *scsi_cmd; scsi_cmd = (struct scsi_format_unit *)&csio->cdb_io.cdb_bytes; scsi_cmd->opcode = FORMAT_UNIT; scsi_cmd->byte2 = byte2; scsi_ulto2b(ileave, scsi_cmd->interleave); cam_fill_csio(csio, retries, cbfcnp, /*flags*/ (dxfer_len > 0) ? CAM_DIR_OUT : CAM_DIR_NONE, tag_action, data_ptr, dxfer_len, sense_len, sizeof(*scsi_cmd), timeout); } void scsi_read_defects(struct ccb_scsiio *csio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint8_t tag_action, uint8_t list_format, uint32_t addr_desc_index, uint8_t *data_ptr, uint32_t dxfer_len, int minimum_cmd_size, uint8_t sense_len, uint32_t timeout) { uint8_t cdb_len; /* * These conditions allow using the 10 byte command. Otherwise we * need to use the 12 byte command. */ if ((minimum_cmd_size <= 10) && (addr_desc_index == 0) && (dxfer_len <= SRDD10_MAX_LENGTH)) { struct scsi_read_defect_data_10 *cdb10; cdb10 = (struct scsi_read_defect_data_10 *) &csio->cdb_io.cdb_bytes; cdb_len = sizeof(*cdb10); bzero(cdb10, cdb_len); cdb10->opcode = READ_DEFECT_DATA_10; cdb10->format = list_format; scsi_ulto2b(dxfer_len, cdb10->alloc_length); } else { struct scsi_read_defect_data_12 *cdb12; cdb12 = (struct scsi_read_defect_data_12 *) &csio->cdb_io.cdb_bytes; cdb_len = sizeof(*cdb12); bzero(cdb12, cdb_len); cdb12->opcode = READ_DEFECT_DATA_12; cdb12->format = list_format; scsi_ulto4b(dxfer_len, cdb12->alloc_length); scsi_ulto4b(addr_desc_index, cdb12->address_descriptor_index); } cam_fill_csio(csio, retries, cbfcnp, /*flags*/ CAM_DIR_IN, tag_action, data_ptr, dxfer_len, sense_len, cdb_len, timeout); } void scsi_sanitize(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t byte2, u_int16_t control, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len, u_int32_t timeout) { struct scsi_sanitize *scsi_cmd; scsi_cmd = (struct scsi_sanitize *)&csio->cdb_io.cdb_bytes; scsi_cmd->opcode = SANITIZE; scsi_cmd->byte2 = byte2; scsi_cmd->control = control; scsi_ulto2b(dxfer_len, scsi_cmd->length); cam_fill_csio(csio, retries, cbfcnp, /*flags*/ (dxfer_len > 0) ? CAM_DIR_OUT : CAM_DIR_NONE, tag_action, data_ptr, dxfer_len, sense_len, sizeof(*scsi_cmd), timeout); } #endif /* _KERNEL */