Index: head/sys/cam/nvme/nvme_da.c =================================================================== --- head/sys/cam/nvme/nvme_da.c (revision 311970) +++ head/sys/cam/nvme/nvme_da.c (revision 311971) @@ -1,1152 +1,1152 @@ /*- * Copyright (c) 2015 Netflix, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Derived from ata_da.c: * Copyright (c) 2009 Alexander Motin */ #include __FBSDID("$FreeBSD$"); #include #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #ifndef _KERNEL #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include typedef enum { NDA_STATE_NORMAL } nda_state; typedef enum { NDA_FLAG_OPEN = 0x0001, NDA_FLAG_DIRTY = 0x0002, NDA_FLAG_SCTX_INIT = 0x0004, } nda_flags; typedef enum { NDA_Q_4K = 0x01, NDA_Q_NONE = 0x00, } nda_quirks; #define NDA_Q_BIT_STRING \ "\020" \ "\001Bit 0" typedef enum { NDA_CCB_BUFFER_IO = 0x01, NDA_CCB_DUMP = 0x02, NDA_CCB_TRIM = 0x03, NDA_CCB_TYPE_MASK = 0x0F, } nda_ccb_state; /* Offsets into our private area for storing information */ #define ccb_state ppriv_field0 #define ccb_bp ppriv_ptr1 struct trim_request { TAILQ_HEAD(, bio) bps; }; struct nda_softc { struct cam_iosched_softc *cam_iosched; int outstanding_cmds; /* Number of active commands */ int refcount; /* Active xpt_action() calls */ nda_state state; nda_flags flags; nda_quirks quirks; int unmappedio; uint32_t nsid; /* Namespace ID for this nda device */ struct disk *disk; struct task sysctl_task; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; struct trim_request trim_req; #ifdef CAM_IO_STATS struct sysctl_ctx_list sysctl_stats_ctx; struct sysctl_oid *sysctl_stats_tree; u_int timeouts; u_int errors; u_int invalidations; #endif }; /* Need quirk table */ static disk_strategy_t ndastrategy; static dumper_t ndadump; static periph_init_t ndainit; static void ndaasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg); static void ndasysctlinit(void *context, int pending); static periph_ctor_t ndaregister; static periph_dtor_t ndacleanup; static periph_start_t ndastart; static periph_oninv_t ndaoninvalidate; static void ndadone(struct cam_periph *periph, union ccb *done_ccb); static int ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags); static void ndashutdown(void *arg, int howto); static void ndasuspend(void *arg); #ifndef NDA_DEFAULT_SEND_ORDERED #define NDA_DEFAULT_SEND_ORDERED 1 #endif #ifndef NDA_DEFAULT_TIMEOUT #define NDA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */ #endif #ifndef NDA_DEFAULT_RETRY #define NDA_DEFAULT_RETRY 4 #endif //static int nda_retry_count = NDA_DEFAULT_RETRY; static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED; static int nda_default_timeout = NDA_DEFAULT_TIMEOUT; /* * All NVMe media is non-rotational, so all nvme device instances * share this to implement the sysctl. */ static int nda_rotating_media = 0; static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD, 0, "CAM Direct Access Disk driver"); static struct periph_driver ndadriver = { ndainit, "nda", TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0 }; PERIPHDRIVER_DECLARE(nda, ndadriver); static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers"); /* * nice wrappers. Maybe these belong in nvme_all.c instead of * here, but this is the only place that uses these. Should * we ever grow another NVME periph, we should move them * all there wholesale. */ static void nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio) { cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ CAM_DIR_NONE, /* flags */ NULL, /* data_ptr */ 0, /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid); } static void nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, void *payload, uint32_t num_ranges) { cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ CAM_DIR_OUT, /* flags */ payload, /* data_ptr */ num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges); } static void nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, void *payload, uint64_t lba, uint32_t len, uint32_t count) { cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ CAM_DIR_OUT, /* flags */ payload, /* data_ptr */ len, /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count); } static void nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio, struct bio *bp, uint32_t rwcmd) { int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT; void *payload; uint64_t lba; uint32_t count; if (bp->bio_flags & BIO_UNMAPPED) { flags |= CAM_DATA_BIO; payload = bp; } else { payload = bp->bio_data; } lba = bp->bio_pblkno; count = bp->bio_bcount / softc->disk->d_sectorsize; cam_fill_nvmeio(nvmeio, 0, /* retries */ ndadone, /* cbfcnp */ flags, /* flags */ payload, /* data_ptr */ bp->bio_bcount, /* dxfer_len */ nda_default_timeout * 1000); /* timeout 5s */ nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count); } static int ndaopen(struct disk *dp) { struct cam_periph *periph; struct nda_softc *softc; int error; periph = (struct cam_periph *)dp->d_drv1; if (cam_periph_acquire(periph) != CAM_REQ_CMP) { return(ENXIO); } cam_periph_lock(periph); if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) { cam_periph_unlock(periph); cam_periph_release(periph); return (error); } CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("ndaopen\n")); softc = (struct nda_softc *)periph->softc; softc->flags |= NDA_FLAG_OPEN; cam_periph_unhold(periph); cam_periph_unlock(periph); return (0); } static int ndaclose(struct disk *dp) { struct cam_periph *periph; struct nda_softc *softc; union ccb *ccb; int error; periph = (struct cam_periph *)dp->d_drv1; softc = (struct nda_softc *)periph->softc; cam_periph_lock(periph); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH, ("ndaclose\n")); if ((softc->flags & NDA_FLAG_DIRTY) != 0 && (periph->flags & CAM_PERIPH_INVALID) == 0 && cam_periph_hold(periph, PRIBIO) == 0) { ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); nda_nvme_flush(softc, &ccb->nvmeio); error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, /*sense_flags*/0, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); else softc->flags &= ~NDA_FLAG_DIRTY; xpt_release_ccb(ccb); cam_periph_unhold(periph); } softc->flags &= ~NDA_FLAG_OPEN; while (softc->refcount != 0) cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1); cam_periph_unlock(periph); cam_periph_release(periph); return (0); } static void ndaschedule(struct cam_periph *periph) { struct nda_softc *softc = (struct nda_softc *)periph->softc; if (softc->state != NDA_STATE_NORMAL) return; cam_iosched_schedule(softc->cam_iosched, periph); } /* * Actually translate the requested transfer into one the physical driver * can understand. The transfer is described by a buf and will include * only one physical transfer. */ static void ndastrategy(struct bio *bp) { struct cam_periph *periph; struct nda_softc *softc; periph = (struct cam_periph *)bp->bio_disk->d_drv1; softc = (struct nda_softc *)periph->softc; cam_periph_lock(periph); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp)); /* * If the device has been made invalid, error out */ if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_unlock(periph); biofinish(bp, NULL, ENXIO); return; } /* * Place it in the queue of disk activities for this disk */ cam_iosched_queue_work(softc->cam_iosched, bp); /* * Schedule ourselves for performing the work. */ ndaschedule(periph); cam_periph_unlock(periph); return; } static int ndadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct cam_periph *periph; struct nda_softc *softc; u_int secsize; union ccb ccb; struct disk *dp; uint64_t lba; uint32_t count; int error = 0; dp = arg; periph = dp->d_drv1; softc = (struct nda_softc *)periph->softc; cam_periph_lock(periph); secsize = softc->disk->d_sectorsize; lba = offset / secsize; count = length / secsize; if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_unlock(periph); return (ENXIO); } if (length > 0) { xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); ccb.ccb_h.ccb_state = NDA_CCB_DUMP; nda_nvme_write(softc, &ccb.nvmeio, virtual, lba, length, count); xpt_polled_action(&ccb); error = cam_periph_error(&ccb, 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) printf("Aborting dump due to I/O error.\n"); cam_periph_unlock(periph); return (error); } /* Flush */ xpt_setup_ccb(&ccb.ccb_h, periph->path, CAM_PRIORITY_NORMAL); ccb.ccb_h.ccb_state = NDA_CCB_DUMP; nda_nvme_flush(softc, &ccb.nvmeio); xpt_polled_action(&ccb); error = cam_periph_error(&ccb, 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL); if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); if (error != 0) xpt_print(periph->path, "flush cmd failed\n"); cam_periph_unlock(periph); return (error); } static void ndainit(void) { cam_status status; /* * Install a global async callback. This callback will * receive async callbacks like "new device found". */ status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL); if (status != CAM_REQ_CMP) { printf("nda: Failed to attach master async callback " "due to status 0x%x!\n", status); } else if (nda_send_ordered) { /* Register our event handlers */ if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend, NULL, EVENTHANDLER_PRI_LAST)) == NULL) printf("ndainit: power event registration failed!\n"); if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown, NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) printf("ndainit: shutdown event registration failed!\n"); } } /* * Callback from GEOM, called when it has finished cleaning up its * resources. */ static void ndadiskgonecb(struct disk *dp) { struct cam_periph *periph; periph = (struct cam_periph *)dp->d_drv1; cam_periph_release(periph); } static void ndaoninvalidate(struct cam_periph *periph) { struct nda_softc *softc; softc = (struct nda_softc *)periph->softc; /* * De-register any async callbacks. */ xpt_register_async(0, ndaasync, periph, periph->path); #ifdef CAM_IO_STATS softc->invalidations++; #endif /* * Return all queued I/O with ENXIO. * XXX Handle any transactions queued to the card * with XPT_ABORT_CCB. */ cam_iosched_flush(softc->cam_iosched, NULL, ENXIO); disk_gone(softc->disk); } static void ndacleanup(struct cam_periph *periph) { struct nda_softc *softc; softc = (struct nda_softc *)periph->softc; cam_periph_unlock(periph); cam_iosched_fini(softc->cam_iosched); /* * If we can't free the sysctl tree, oh well... */ if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) { #ifdef CAM_IO_STATS if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0) xpt_print(periph->path, "can't remove sysctl stats context\n"); #endif if (sysctl_ctx_free(&softc->sysctl_ctx) != 0) xpt_print(periph->path, "can't remove sysctl context\n"); } disk_destroy(softc->disk); free(softc, M_DEVBUF); cam_periph_lock(periph); } static void ndaasync(void *callback_arg, u_int32_t code, struct cam_path *path, void *arg) { struct cam_periph *periph; periph = (struct cam_periph *)callback_arg; switch (code) { case AC_FOUND_DEVICE: { struct ccb_getdev *cgd; cam_status status; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) break; if (cgd->protocol != PROTO_NVME) break; /* * Allocate a peripheral instance for * this device and start the probe * process. */ status = cam_periph_alloc(ndaregister, ndaoninvalidate, ndacleanup, ndastart, "nda", CAM_PERIPH_BIO, path, ndaasync, AC_FOUND_DEVICE, cgd); if (status != CAM_REQ_CMP && status != CAM_REQ_INPROG) printf("ndaasync: Unable to attach to new device " "due to status 0x%x\n", status); break; } case AC_ADVINFO_CHANGED: { uintptr_t buftype; buftype = (uintptr_t)arg; if (buftype == CDAI_TYPE_PHYS_PATH) { struct nda_softc *softc; softc = periph->softc; disk_attr_changed(softc->disk, "GEOM::physpath", M_NOWAIT); } break; } case AC_LOST_DEVICE: default: cam_periph_async(periph, code, path, arg); break; } } static void ndasysctlinit(void *context, int pending) { struct cam_periph *periph; struct nda_softc *softc; char tmpstr[80], tmpstr2[80]; periph = (struct cam_periph *)context; /* periph was held for us when this task was enqueued */ if ((periph->flags & CAM_PERIPH_INVALID) != 0) { cam_periph_release(periph); return; } softc = (struct nda_softc *)periph->softc; snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number); snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number); sysctl_ctx_init(&softc->sysctl_ctx); softc->flags |= NDA_FLAG_SCTX_INIT; softc->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&softc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr, "device_index"); if (softc->sysctl_tree == NULL) { printf("ndasysctlinit: unable to allocate sysctl tree\n"); cam_periph_release(periph); return; } SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "unmapped_io", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->unmappedio, 0, "Unmapped I/O leaf"); SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "rotating", CTLFLAG_RD | CTLFLAG_MPSAFE, &nda_rotating_media, 0, "Rotating media"); #ifdef CAM_IO_STATS softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats", CTLFLAG_RD, 0, "Statistics"); if (softc->sysctl_stats_tree == NULL) { printf("ndasysctlinit: unable to allocate sysctl tree for stats\n"); cam_periph_release(periph); return; } SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_stats_tree), OID_AUTO, "timeouts", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->timeouts, 0, "Device timeouts reported by the SIM"); SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_stats_tree), OID_AUTO, "errors", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->errors, 0, "Transport errors reported by the SIM."); SYSCTL_ADD_INT(&softc->sysctl_stats_ctx, SYSCTL_CHILDREN(softc->sysctl_stats_tree), OID_AUTO, "pack_invalidations", CTLFLAG_RD | CTLFLAG_MPSAFE, &softc->invalidations, 0, "Device pack invalidations."); #endif cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, softc->sysctl_tree); cam_periph_release(periph); } static int ndagetattr(struct bio *bp) { int ret; struct cam_periph *periph; periph = (struct cam_periph *)bp->bio_disk->d_drv1; cam_periph_lock(periph); ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute, periph->path); cam_periph_unlock(periph); if (ret == 0) bp->bio_completed = bp->bio_length; return ret; } static cam_status ndaregister(struct cam_periph *periph, void *arg) { struct nda_softc *softc; struct disk *disk; struct ccb_pathinq cpi; struct ccb_getdev *cgd; const struct nvme_namespace_data *nsd; const struct nvme_controller_data *cd; char announce_buf[80]; // caddr_t match; u_int maxio; int quirks; cgd = (struct ccb_getdev *)arg; if (cgd == NULL) { printf("ndaregister: no getdev CCB, can't register device\n"); return(CAM_REQ_CMP_ERR); } nsd = cgd->nvme_data; cd = cgd->nvme_cdata; softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc == NULL) { printf("ndaregister: Unable to probe new device. " "Unable to allocate softc\n"); return(CAM_REQ_CMP_ERR); } if (cam_iosched_init(&softc->cam_iosched, periph) != 0) { printf("ndaregister: Unable to probe new device. " "Unable to allocate iosched memory\n"); return(CAM_REQ_CMP_ERR); } /* ident_data parsing */ periph->softc = softc; #if 0 /* * See if this device has any quirks. */ match = cam_quirkmatch((caddr_t)&cgd->ident_data, (caddr_t)nda_quirk_table, sizeof(nda_quirk_table)/sizeof(*nda_quirk_table), sizeof(*nda_quirk_table), ata_identify_match); if (match != NULL) softc->quirks = ((struct nda_quirk_entry *)match)->quirks; else #endif softc->quirks = NDA_Q_NONE; bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NONE); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph); /* * The name space ID is the lun, save it for later I/O */ softc->nsid = (uint16_t)xpt_path_lun_id(periph->path); /* * Register this media as a disk */ (void)cam_periph_hold(periph, PRIBIO); cam_periph_unlock(periph); snprintf(announce_buf, sizeof(announce_buf), "kern.cam.nda.%d.quirks", periph->unit_number); quirks = softc->quirks; TUNABLE_INT_FETCH(announce_buf, &quirks); softc->quirks = quirks; cam_iosched_set_sort_queue(softc->cam_iosched, 0); softc->disk = disk = disk_alloc(); strlcpy(softc->disk->d_descr, cd->mn, MIN(sizeof(softc->disk->d_descr), sizeof(cd->mn))); strlcpy(softc->disk->d_ident, cd->sn, MIN(sizeof(softc->disk->d_ident), sizeof(cd->sn))); - disk->d_rotation_rate = 0; /* Spinning rust need not apply */ + disk->d_rotation_rate = DISK_RR_NON_ROTATING; disk->d_open = ndaopen; disk->d_close = ndaclose; disk->d_strategy = ndastrategy; disk->d_getattr = ndagetattr; disk->d_dump = ndadump; disk->d_gone = ndadiskgonecb; disk->d_name = "nda"; disk->d_drv1 = periph; disk->d_unit = periph->unit_number; maxio = cpi.maxio; /* Honor max I/O size of SIM */ if (maxio == 0) maxio = DFLTPHYS; /* traditional default */ else if (maxio > MAXPHYS) maxio = MAXPHYS; /* for safety */ disk->d_maxsize = maxio; disk->d_sectorsize = 1 << nsd->lbaf[nsd->flbas.format].lbads; disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); disk->d_delmaxsize = disk->d_mediasize; disk->d_flags = DISKFLAG_DIRECT_COMPLETION; // if (cd->oncs.dsm) // XXX broken? disk->d_flags |= DISKFLAG_CANDELETE; if (cd->vwc.present) disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { disk->d_flags |= DISKFLAG_UNMAPPED_BIO; softc->unmappedio = 1; } /* * d_ident and d_descr are both far bigger than the length of either * the serial or model number strings. */ nvme_strvis(disk->d_descr, cd->mn, sizeof(disk->d_descr), NVME_MODEL_NUMBER_LENGTH); nvme_strvis(disk->d_ident, cd->sn, sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH); disk->d_hba_vendor = cpi.hba_vendor; disk->d_hba_device = cpi.hba_device; disk->d_hba_subvendor = cpi.hba_subvendor; disk->d_hba_subdevice = cpi.hba_subdevice; disk->d_stripesize = disk->d_sectorsize; disk->d_stripeoffset = 0; disk->d_devstat = devstat_new_entry(periph->periph_name, periph->unit_number, disk->d_sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport), DEVSTAT_PRIORITY_DISK); /* * Acquire a reference to the periph before we register with GEOM. * We'll release this reference once GEOM calls us back (via * ndadiskgonecb()) telling us that our provider has been freed. */ if (cam_periph_acquire(periph) != CAM_REQ_CMP) { xpt_print(periph->path, "%s: lost periph during " "registration!\n", __func__); cam_periph_lock(periph); return (CAM_REQ_CMP_ERR); } disk_create(softc->disk, DISK_VERSION); cam_periph_lock(periph); cam_periph_unhold(periph); snprintf(announce_buf, sizeof(announce_buf), "%juMB (%ju %u byte sectors)", (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)), (uintmax_t)disk->d_mediasize / disk->d_sectorsize, disk->d_sectorsize); xpt_announce_periph(periph, announce_buf); xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING); /* * Create our sysctl variables, now that we know * we have successfully attached. */ if (cam_periph_acquire(periph) == CAM_REQ_CMP) taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task); /* * Register for device going away and info about the drive * changing (though with NVMe, it can't) */ xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED, ndaasync, periph, periph->path); softc->state = NDA_STATE_NORMAL; return(CAM_REQ_CMP); } static void ndastart(struct cam_periph *periph, union ccb *start_ccb) { struct nda_softc *softc = (struct nda_softc *)periph->softc; struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n")); switch (softc->state) { case NDA_STATE_NORMAL: { struct bio *bp; bp = cam_iosched_next_bio(softc->cam_iosched); CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp)); if (bp == NULL) { xpt_release_ccb(start_ccb); break; } switch (bp->bio_cmd) { case BIO_WRITE: softc->flags |= NDA_FLAG_DIRTY; /* FALLTHROUGH */ case BIO_READ: { #ifdef NDA_TEST_FAILURE int fail = 0; /* * Support the failure ioctls. If the command is a * read, and there are pending forced read errors, or * if a write and pending write errors, then fail this * operation with EIO. This is useful for testing * purposes. Also, support having every Nth read fail. * * This is a rather blunt tool. */ if (bp->bio_cmd == BIO_READ) { if (softc->force_read_error) { softc->force_read_error--; fail = 1; } if (softc->periodic_read_error > 0) { if (++softc->periodic_read_count >= softc->periodic_read_error) { softc->periodic_read_count = 0; fail = 1; } } } else { if (softc->force_write_error) { softc->force_write_error--; fail = 1; } } if (fail) { biofinish(bp, NULL, EIO); xpt_release_ccb(start_ccb); ndaschedule(periph); return; } #endif KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || round_page(bp->bio_bcount + bp->bio_ma_offset) / PAGE_SIZE == bp->bio_ma_n, ("Short bio %p", bp)); nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ? NVME_OPC_READ : NVME_OPC_WRITE); break; } case BIO_DELETE: { struct nvme_dsm_range *dsm_range; dsm_range = malloc(sizeof(*dsm_range), M_NVMEDA, M_ZERO | M_WAITOK); dsm_range->length = bp->bio_bcount / softc->disk->d_sectorsize; dsm_range->starting_lba = bp->bio_offset / softc->disk->d_sectorsize; bp->bio_driver2 = dsm_range; nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1); start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM; start_ccb->ccb_h.flags |= CAM_UNLOCKED; cam_iosched_submit_trim(softc->cam_iosched); /* XXX */ goto out; } case BIO_FLUSH: nda_nvme_flush(softc, nvmeio); break; } start_ccb->ccb_h.ccb_state = NDA_CCB_BUFFER_IO; start_ccb->ccb_h.flags |= CAM_UNLOCKED; out: start_ccb->ccb_h.ccb_bp = bp; softc->outstanding_cmds++; softc->refcount++; cam_periph_unlock(periph); xpt_action(start_ccb); cam_periph_lock(periph); softc->refcount--; /* May have more work to do, so ensure we stay scheduled */ ndaschedule(periph); break; } } } static void ndadone(struct cam_periph *periph, union ccb *done_ccb) { struct nda_softc *softc; struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio; struct cam_path *path; int state; softc = (struct nda_softc *)periph->softc; path = done_ccb->ccb_h.path; CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n")); state = nvmeio->ccb_h.ccb_state & NDA_CCB_TYPE_MASK; switch (state) { case NDA_CCB_BUFFER_IO: case NDA_CCB_TRIM: { struct bio *bp; int error; cam_periph_lock(periph); bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { error = ndaerror(done_ccb, 0, 0); if (error == ERESTART) { /* A retry was scheduled, so just return. */ cam_periph_unlock(periph); return; } if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } else { if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) panic("REQ_CMP with QFRZN"); error = 0; } bp->bio_error = error; if (error != 0) { bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; } else { if (state == NDA_CCB_TRIM) bp->bio_resid = 0; else bp->bio_resid = nvmeio->resid; if (bp->bio_resid > 0) bp->bio_flags |= BIO_ERROR; } if (state == NDA_CCB_TRIM) free(bp->bio_driver2, M_NVMEDA); softc->outstanding_cmds--; cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb); xpt_release_ccb(done_ccb); if (state == NDA_CCB_TRIM) { #ifdef notyet TAILQ_HEAD(, bio) queue; struct bio *bp1; TAILQ_INIT(&queue); TAILQ_CONCAT(&queue, &softc->trim_req.bps, bio_queue); #endif cam_iosched_trim_done(softc->cam_iosched); ndaschedule(periph); cam_periph_unlock(periph); #ifdef notyet /* Not yet collapsing several BIO_DELETE requests into one TRIM */ while ((bp1 = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, bp1, bio_queue); bp1->bio_error = error; if (error != 0) { bp1->bio_flags |= BIO_ERROR; bp1->bio_resid = bp1->bio_bcount; } else bp1->bio_resid = 0; biodone(bp1); } #else biodone(bp); #endif } else { ndaschedule(periph); cam_periph_unlock(periph); biodone(bp); } return; } case NDA_CCB_DUMP: /* No-op. We're polling */ return; default: break; } xpt_release_ccb(done_ccb); } static int ndaerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags) { struct nda_softc *softc; struct cam_periph *periph; periph = xpt_path_periph(ccb->ccb_h.path); softc = (struct nda_softc *)periph->softc; switch (ccb->ccb_h.status & CAM_STATUS_MASK) { case CAM_CMD_TIMEOUT: #ifdef CAM_IO_STATS softc->timeouts++; #endif break; case CAM_REQ_ABORTED: case CAM_REQ_CMP_ERR: case CAM_REQ_TERMIO: case CAM_UNREC_HBA_ERROR: case CAM_DATA_RUN_ERR: case CAM_ATA_STATUS_ERROR: #ifdef CAM_IO_STATS softc->errors++; #endif break; default: break; } return(cam_periph_error(ccb, cam_flags, sense_flags, NULL)); } /* * Step through all NDA peripheral drivers, and if the device is still open, * sync the disk cache to physical media. */ static void ndaflush(void) { struct cam_periph *periph; struct nda_softc *softc; union ccb *ccb; int error; CAM_PERIPH_FOREACH(periph, &ndadriver) { softc = (struct nda_softc *)periph->softc; if (SCHEDULER_STOPPED()) { /* If we paniced with the lock held, do not recurse. */ if (!cam_periph_owned(periph) && (softc->flags & NDA_FLAG_OPEN)) { ndadump(softc->disk, NULL, 0, 0, 0); } continue; } cam_periph_lock(periph); /* * We only sync the cache if the drive is still open, and * if the drive is capable of it.. */ if ((softc->flags & NDA_FLAG_OPEN) == 0) { cam_periph_unlock(periph); continue; } ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL); nda_nvme_flush(softc, &ccb->nvmeio); error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0, /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY, softc->disk->d_devstat); if (error != 0) xpt_print(periph->path, "Synchronize cache failed\n"); xpt_release_ccb(ccb); cam_periph_unlock(periph); } } static void ndashutdown(void *arg, int howto) { ndaflush(); } static void ndasuspend(void *arg) { ndaflush(); } Index: head/sys/dev/mmc/mmcsd.c =================================================================== --- head/sys/dev/mmc/mmcsd.c (revision 311970) +++ head/sys/dev/mmc/mmcsd.c (revision 311971) @@ -1,589 +1,590 @@ /*- * Copyright (c) 2006 Bernd Walter. All rights reserved. * Copyright (c) 2006 M. Warner Losh. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Portions of this software may have been developed with reference to * the SD Simplified Specification. The following disclaimer may apply: * * The following conditions apply to the release of the simplified * specification ("Simplified Specification") by the SD Card Association and * the SD Group. The Simplified Specification is a subset of the complete SD * Specification which is owned by the SD Card Association and the SD * Group. This Simplified Specification is provided on a non-confidential * basis subject to the disclaimers below. Any implementation of the * Simplified Specification may require a license from the SD Card * Association, SD Group, SD-3C LLC or other third parties. * * Disclaimers: * * The information contained in the Simplified Specification is presented only * as a standard specification for SD Cards and SD Host/Ancillary products and * is provided "AS-IS" without any representations or warranties of any * kind. No responsibility is assumed by the SD Group, SD-3C LLC or the SD * Card Association for any damages, any infringements of patents or other * right of the SD Group, SD-3C LLC, the SD Card Association or any third * parties, which may result from its use. No license is granted by * implication, estoppel or otherwise under any patent or other rights of the * SD Group, SD-3C LLC, the SD Card Association or any third party. Nothing * herein shall be construed as an obligation by the SD Group, the SD-3C LLC * or the SD Card Association to disclose or distribute any technical * information, know-how or other confidential information to any third party. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmcbus_if.h" #if __FreeBSD_version < 800002 #define kproc_create kthread_create #define kproc_exit kthread_exit #endif struct mmcsd_softc { device_t dev; struct mtx sc_mtx; struct disk *disk; struct proc *p; struct bio_queue_head bio_queue; daddr_t eblock, eend; /* Range remaining after the last erase. */ int running; int suspend; int log_count; struct timeval log_time; }; static const char *errmsg[] = { "None", "Timeout", "Bad CRC", "Fifo", "Failed", "Invalid", "NO MEMORY" }; #define LOG_PPS 5 /* Log no more than 5 errors per second. */ /* bus entry points */ static int mmcsd_attach(device_t dev); static int mmcsd_detach(device_t dev); static int mmcsd_probe(device_t dev); /* disk routines */ static int mmcsd_close(struct disk *dp); static int mmcsd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length); static int mmcsd_open(struct disk *dp); static void mmcsd_strategy(struct bio *bp); static void mmcsd_task(void *arg); static int mmcsd_bus_bit_width(device_t dev); static daddr_t mmcsd_delete(struct mmcsd_softc *sc, struct bio *bp); static daddr_t mmcsd_rw(struct mmcsd_softc *sc, struct bio *bp); #define MMCSD_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define MMCSD_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define MMCSD_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ "mmcsd", MTX_DEF) #define MMCSD_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define MMCSD_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define MMCSD_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); static int mmcsd_probe(device_t dev) { device_quiet(dev); device_set_desc(dev, "MMC/SD Memory Card"); return (0); } static int mmcsd_attach(device_t dev) { struct mmcsd_softc *sc; struct disk *d; intmax_t mb; uint32_t speed; uint32_t maxblocks; char unit; sc = device_get_softc(dev); sc->dev = dev; MMCSD_LOCK_INIT(sc); d = sc->disk = disk_alloc(); d->d_open = mmcsd_open; d->d_close = mmcsd_close; d->d_strategy = mmcsd_strategy; d->d_dump = mmcsd_dump; d->d_name = "mmcsd"; d->d_drv1 = sc; d->d_sectorsize = mmc_get_sector_size(dev); d->d_maxsize = mmc_get_max_data(dev) * d->d_sectorsize; d->d_mediasize = (off_t)mmc_get_media_size(dev) * d->d_sectorsize; d->d_stripesize = mmc_get_erase_sector(dev) * d->d_sectorsize; d->d_unit = device_get_unit(dev); d->d_flags = DISKFLAG_CANDELETE; d->d_delmaxsize = mmc_get_erase_sector(dev) * d->d_sectorsize; strlcpy(d->d_ident, mmc_get_card_sn_string(dev), sizeof(d->d_ident)); strlcpy(d->d_descr, mmc_get_card_id_string(dev), sizeof(d->d_descr)); + d->d_rotation_rate = DISK_RR_NON_ROTATING; /* * Display in most natural units. There's no cards < 1MB. The SD * standard goes to 2GiB due to its reliance on FAT, but the data * format supports up to 4GiB and some card makers push it up to this * limit. The SDHC standard only goes to 32GiB due to FAT32, but the * data format supports up to 2TiB however. 2048GB isn't too ugly, so * we note it in passing here and don't add the code to print * TB). Since these cards are sold in terms of MB and GB not MiB and * GiB, report them like that. We also round to the nearest unit, since * many cards are a few percent short, even of the power of 10 size. */ mb = (d->d_mediasize + 1000000 / 2 - 1) / 1000000; unit = 'M'; if (mb >= 1000) { unit = 'G'; mb = (mb + 1000 / 2 - 1) / 1000; } /* * Report the clock speed of the underlying hardware, which might be * different than what the card reports due to hardware limitations. * Report how many blocks the hardware transfers at once. */ speed = mmcbr_get_clock(device_get_parent(dev)); maxblocks = mmc_get_max_data(dev); device_printf(dev, "%ju%cB <%s>%s at %s %d.%01dMHz/%dbit/%d-block\n", mb, unit, d->d_descr, mmc_get_read_only(dev) ? " (read-only)" : "", device_get_nameunit(device_get_parent(dev)), speed / 1000000, (speed / 100000) % 10, mmcsd_bus_bit_width(dev), maxblocks); disk_create(d, DISK_VERSION); bioq_init(&sc->bio_queue); sc->running = 1; sc->suspend = 0; sc->eblock = sc->eend = 0; kproc_create(&mmcsd_task, sc, &sc->p, 0, 0, "%s: mmc/sd card", device_get_nameunit(dev)); return (0); } static int mmcsd_detach(device_t dev) { struct mmcsd_softc *sc = device_get_softc(dev); MMCSD_LOCK(sc); sc->suspend = 0; if (sc->running > 0) { /* kill thread */ sc->running = 0; wakeup(sc); /* wait for thread to finish. */ while (sc->running != -1) msleep(sc, &sc->sc_mtx, 0, "detach", 0); } MMCSD_UNLOCK(sc); /* Flush the request queue. */ bioq_flush(&sc->bio_queue, NULL, ENXIO); /* kill disk */ disk_destroy(sc->disk); MMCSD_LOCK_DESTROY(sc); return (0); } static int mmcsd_suspend(device_t dev) { struct mmcsd_softc *sc = device_get_softc(dev); MMCSD_LOCK(sc); sc->suspend = 1; if (sc->running > 0) { /* kill thread */ sc->running = 0; wakeup(sc); /* wait for thread to finish. */ while (sc->running != -1) msleep(sc, &sc->sc_mtx, 0, "detach", 0); } MMCSD_UNLOCK(sc); return (0); } static int mmcsd_resume(device_t dev) { struct mmcsd_softc *sc = device_get_softc(dev); MMCSD_LOCK(sc); sc->suspend = 0; if (sc->running <= 0) { sc->running = 1; MMCSD_UNLOCK(sc); kproc_create(&mmcsd_task, sc, &sc->p, 0, 0, "%s: mmc/sd card", device_get_nameunit(dev)); } else MMCSD_UNLOCK(sc); return (0); } static int mmcsd_open(struct disk *dp) { return (0); } static int mmcsd_close(struct disk *dp) { return (0); } static void mmcsd_strategy(struct bio *bp) { struct mmcsd_softc *sc; sc = (struct mmcsd_softc *)bp->bio_disk->d_drv1; MMCSD_LOCK(sc); if (sc->running > 0 || sc->suspend > 0) { bioq_disksort(&sc->bio_queue, bp); MMCSD_UNLOCK(sc); wakeup(sc); } else { MMCSD_UNLOCK(sc); biofinish(bp, NULL, ENXIO); } } static const char * mmcsd_errmsg(int e) { if (e < 0 || e > MMC_ERR_MAX) return "Bad error code"; return errmsg[e]; } static daddr_t mmcsd_rw(struct mmcsd_softc *sc, struct bio *bp) { daddr_t block, end; struct mmc_command cmd; struct mmc_command stop; struct mmc_request req; struct mmc_data data; device_t dev = sc->dev; int sz = sc->disk->d_sectorsize; device_t mmcbr = device_get_parent(dev); block = bp->bio_pblkno; end = bp->bio_pblkno + (bp->bio_bcount / sz); while (block < end) { char *vaddr = bp->bio_data + (block - bp->bio_pblkno) * sz; int numblocks = min(end - block, mmc_get_max_data(dev)); memset(&req, 0, sizeof(req)); memset(&cmd, 0, sizeof(cmd)); memset(&stop, 0, sizeof(stop)); memset(&data, 0, sizeof(data)); cmd.mrq = &req; req.cmd = &cmd; cmd.data = &data; if (bp->bio_cmd == BIO_READ) { if (numblocks > 1) cmd.opcode = MMC_READ_MULTIPLE_BLOCK; else cmd.opcode = MMC_READ_SINGLE_BLOCK; } else { if (numblocks > 1) cmd.opcode = MMC_WRITE_MULTIPLE_BLOCK; else cmd.opcode = MMC_WRITE_BLOCK; } cmd.arg = block; if (!mmc_get_high_cap(dev)) cmd.arg <<= 9; cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC; data.data = vaddr; data.mrq = &req; if (bp->bio_cmd == BIO_READ) data.flags = MMC_DATA_READ; else data.flags = MMC_DATA_WRITE; data.len = numblocks * sz; if (numblocks > 1) { data.flags |= MMC_DATA_MULTI; stop.opcode = MMC_STOP_TRANSMISSION; stop.arg = 0; stop.flags = MMC_RSP_R1B | MMC_CMD_AC; stop.mrq = &req; req.stop = &stop; } MMCBUS_WAIT_FOR_REQUEST(mmcbr, dev, &req); if (req.cmd->error != MMC_ERR_NONE) { if (ppsratecheck(&sc->log_time, &sc->log_count, LOG_PPS)) { device_printf(dev, "Error indicated: %d %s\n", req.cmd->error, mmcsd_errmsg(req.cmd->error)); } break; } block += numblocks; } return (block); } static daddr_t mmcsd_delete(struct mmcsd_softc *sc, struct bio *bp) { daddr_t block, end, start, stop; struct mmc_command cmd; struct mmc_request req; device_t dev = sc->dev; int sz = sc->disk->d_sectorsize; int erase_sector; device_t mmcbr = device_get_parent(dev); block = bp->bio_pblkno; end = bp->bio_pblkno + (bp->bio_bcount / sz); /* Coalesce with part remaining from previous request. */ if (block > sc->eblock && block <= sc->eend) block = sc->eblock; if (end >= sc->eblock && end < sc->eend) end = sc->eend; /* Safe round to the erase sector boundaries. */ erase_sector = mmc_get_erase_sector(dev); start = block + erase_sector - 1; /* Round up. */ start -= start % erase_sector; stop = end; /* Round down. */ stop -= end % erase_sector; /* We can't erase area smaller then sector, store it for later. */ if (start >= stop) { sc->eblock = block; sc->eend = end; return (end); } /* Set erase start position. */ memset(&req, 0, sizeof(req)); memset(&cmd, 0, sizeof(cmd)); cmd.mrq = &req; req.cmd = &cmd; if (mmc_get_card_type(dev) == mode_sd) cmd.opcode = SD_ERASE_WR_BLK_START; else cmd.opcode = MMC_ERASE_GROUP_START; cmd.arg = start; if (!mmc_get_high_cap(dev)) cmd.arg <<= 9; cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; MMCBUS_WAIT_FOR_REQUEST(mmcbr, dev, &req); if (req.cmd->error != MMC_ERR_NONE) { printf("erase err1: %d\n", req.cmd->error); return (block); } /* Set erase stop position. */ memset(&req, 0, sizeof(req)); memset(&cmd, 0, sizeof(cmd)); req.cmd = &cmd; if (mmc_get_card_type(dev) == mode_sd) cmd.opcode = SD_ERASE_WR_BLK_END; else cmd.opcode = MMC_ERASE_GROUP_END; cmd.arg = stop; if (!mmc_get_high_cap(dev)) cmd.arg <<= 9; cmd.arg--; cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; MMCBUS_WAIT_FOR_REQUEST(mmcbr, dev, &req); if (req.cmd->error != MMC_ERR_NONE) { printf("erase err2: %d\n", req.cmd->error); return (block); } /* Erase range. */ memset(&req, 0, sizeof(req)); memset(&cmd, 0, sizeof(cmd)); req.cmd = &cmd; cmd.opcode = MMC_ERASE; cmd.arg = 0; cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; MMCBUS_WAIT_FOR_REQUEST(mmcbr, dev, &req); if (req.cmd->error != MMC_ERR_NONE) { printf("erase err3 %d\n", req.cmd->error); return (block); } /* Store one of remaining parts for the next call. */ if (bp->bio_pblkno >= sc->eblock || block == start) { sc->eblock = stop; /* Predict next forward. */ sc->eend = end; } else { sc->eblock = block; /* Predict next backward. */ sc->eend = start; } return (end); } static int mmcsd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct disk *disk = arg; struct mmcsd_softc *sc = (struct mmcsd_softc *)disk->d_drv1; device_t dev = sc->dev; struct bio bp; daddr_t block, end; device_t mmcbr = device_get_parent(dev); /* length zero is special and really means flush buffers to media */ if (!length) return (0); g_reset_bio(&bp); bp.bio_disk = disk; bp.bio_pblkno = offset / disk->d_sectorsize; bp.bio_bcount = length; bp.bio_data = virtual; bp.bio_cmd = BIO_WRITE; end = bp.bio_pblkno + bp.bio_bcount / sc->disk->d_sectorsize; MMCBUS_ACQUIRE_BUS(mmcbr, dev); block = mmcsd_rw(sc, &bp); MMCBUS_RELEASE_BUS(mmcbr, dev); return ((end < block) ? EIO : 0); } static void mmcsd_task(void *arg) { struct mmcsd_softc *sc = (struct mmcsd_softc*)arg; struct bio *bp; int sz; daddr_t block, end; device_t dev = sc->dev; device_t mmcbr = device_get_parent(sc->dev); while (1) { MMCSD_LOCK(sc); do { if (sc->running == 0) goto out; bp = bioq_takefirst(&sc->bio_queue); if (bp == NULL) msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); } while (bp == NULL); MMCSD_UNLOCK(sc); if (bp->bio_cmd != BIO_READ && mmc_get_read_only(dev)) { bp->bio_error = EROFS; bp->bio_resid = bp->bio_bcount; bp->bio_flags |= BIO_ERROR; biodone(bp); continue; } MMCBUS_ACQUIRE_BUS(mmcbr, dev); sz = sc->disk->d_sectorsize; block = bp->bio_pblkno; end = bp->bio_pblkno + (bp->bio_bcount / sz); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { /* Access to the remaining erase block obsoletes it. */ if (block < sc->eend && end > sc->eblock) sc->eblock = sc->eend = 0; block = mmcsd_rw(sc, bp); } else if (bp->bio_cmd == BIO_DELETE) { block = mmcsd_delete(sc, bp); } MMCBUS_RELEASE_BUS(mmcbr, dev); if (block < end) { bp->bio_error = EIO; bp->bio_resid = (end - block) * sz; bp->bio_flags |= BIO_ERROR; } else { bp->bio_resid = 0; } biodone(bp); } out: /* tell parent we're done */ sc->running = -1; MMCSD_UNLOCK(sc); wakeup(sc); kproc_exit(0); } static int mmcsd_bus_bit_width(device_t dev) { if (mmc_get_bus_width(dev) == bus_width_1) return (1); if (mmc_get_bus_width(dev) == bus_width_4) return (4); return (8); } static device_method_t mmcsd_methods[] = { DEVMETHOD(device_probe, mmcsd_probe), DEVMETHOD(device_attach, mmcsd_attach), DEVMETHOD(device_detach, mmcsd_detach), DEVMETHOD(device_suspend, mmcsd_suspend), DEVMETHOD(device_resume, mmcsd_resume), DEVMETHOD_END }; static driver_t mmcsd_driver = { "mmcsd", mmcsd_methods, sizeof(struct mmcsd_softc), }; static devclass_t mmcsd_devclass; DRIVER_MODULE(mmcsd, mmc, mmcsd_driver, mmcsd_devclass, NULL, NULL); Index: head/sys/dev/nand/nand_geom.c =================================================================== --- head/sys/dev/nand/nand_geom.c (revision 311970) +++ head/sys/dev/nand/nand_geom.c (revision 311971) @@ -1,463 +1,465 @@ /*- * Copyright (C) 2009-2012 Semihalf * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "nand_if.h" #include "nandbus_if.h" #define BIO_NAND_STD ((void *)1) #define BIO_NAND_RAW ((void *)2) static disk_ioctl_t nand_ioctl; static disk_getattr_t nand_getattr; static disk_strategy_t nand_strategy; static disk_strategy_t nand_strategy_raw; static int nand_read(struct nand_chip *chip, uint32_t offset, void *buf, uint32_t len) { nand_debug(NDBG_GEOM, "Read from chip %d [%p] at %d", chip->num, chip, offset); return (nand_read_pages(chip, offset, buf, len)); } static int nand_write(struct nand_chip *chip, uint32_t offset, void* buf, uint32_t len) { nand_debug(NDBG_GEOM, "Write to chip %d [%p] at %d", chip->num, chip, offset); return (nand_prog_pages(chip, offset, buf, len)); } static int nand_read_raw(struct nand_chip *chip, uint32_t offset, void *buf, uint32_t len) { nand_debug(NDBG_GEOM, "Raw read from chip %d [%p] at %d", chip->num, chip, offset); return (nand_read_pages_raw(chip, offset, buf, len)); } static int nand_write_raw(struct nand_chip *chip, uint32_t offset, void *buf, uint32_t len) { nand_debug(NDBG_GEOM, "Raw write to chip %d [%p] at %d", chip->num, chip, offset); return (nand_prog_pages_raw(chip, offset, buf, len)); } static void nand_strategy(struct bio *bp) { struct nand_chip *chip; chip = (struct nand_chip *)bp->bio_disk->d_drv1; bp->bio_driver1 = BIO_NAND_STD; nand_debug(NDBG_GEOM, "Strategy %s on chip %d [%p]", bp->bio_cmd == BIO_READ ? "READ" : (bp->bio_cmd == BIO_WRITE ? "WRITE" : (bp->bio_cmd == BIO_DELETE ? "DELETE" : "UNKNOWN")), chip->num, chip); mtx_lock(&chip->qlock); bioq_insert_tail(&chip->bioq, bp); mtx_unlock(&chip->qlock); taskqueue_enqueue(chip->tq, &chip->iotask); } static void nand_strategy_raw(struct bio *bp) { struct nand_chip *chip; chip = (struct nand_chip *)bp->bio_disk->d_drv1; /* Inform taskqueue that it's a raw access */ bp->bio_driver1 = BIO_NAND_RAW; nand_debug(NDBG_GEOM, "Strategy %s on chip %d [%p]", bp->bio_cmd == BIO_READ ? "READ" : (bp->bio_cmd == BIO_WRITE ? "WRITE" : (bp->bio_cmd == BIO_DELETE ? "DELETE" : "UNKNOWN")), chip->num, chip); mtx_lock(&chip->qlock); bioq_insert_tail(&chip->bioq, bp); mtx_unlock(&chip->qlock); taskqueue_enqueue(chip->tq, &chip->iotask); } static int nand_oob_access(struct nand_chip *chip, uint32_t page, uint32_t offset, uint32_t len, uint8_t *data, uint8_t write) { struct chip_geom *cg; int ret = 0; cg = &chip->chip_geom; if (!write) ret = nand_read_oob(chip, page, data, cg->oob_size); else ret = nand_prog_oob(chip, page, data, cg->oob_size); return (ret); } static int nand_getattr(struct bio *bp) { struct nand_chip *chip; struct chip_geom *cg; device_t dev; int val; if (bp->bio_disk == NULL || bp->bio_disk->d_drv1 == NULL) return (ENXIO); chip = (struct nand_chip *)bp->bio_disk->d_drv1; cg = &(chip->chip_geom); dev = device_get_parent(chip->dev); dev = device_get_parent(dev); if (strcmp(bp->bio_attribute, "NAND::device") == 0) { if (bp->bio_length != sizeof(dev)) return (EFAULT); bcopy(&dev, bp->bio_data, sizeof(dev)); } else { if (strcmp(bp->bio_attribute, "NAND::oobsize") == 0) val = cg->oob_size; else if (strcmp(bp->bio_attribute, "NAND::pagesize") == 0) val = cg->page_size; else if (strcmp(bp->bio_attribute, "NAND::blocksize") == 0) val = cg->block_size; else return (-1); if (bp->bio_length != sizeof(val)) return (EFAULT); bcopy(&val, bp->bio_data, sizeof(val)); } bp->bio_completed = bp->bio_length; return (0); } static int nand_ioctl(struct disk *ndisk, u_long cmd, void *data, int fflag, struct thread *td) { struct nand_chip *chip; struct chip_geom *cg; struct nand_oob_rw *oob_rw = NULL; struct nand_raw_rw *raw_rw = NULL; device_t nandbus; size_t bufsize = 0, len = 0; size_t raw_size; off_t off; uint8_t *buf = NULL; int ret = 0; uint8_t status; chip = (struct nand_chip *)ndisk->d_drv1; cg = &chip->chip_geom; nandbus = device_get_parent(chip->dev); if ((cmd == NAND_IO_RAW_READ) || (cmd == NAND_IO_RAW_PROG)) { raw_rw = (struct nand_raw_rw *)data; raw_size = cg->pgs_per_blk * (cg->page_size + cg->oob_size); /* Check if len is not bigger than chip size */ if (raw_rw->len > raw_size) return (EFBIG); /* * Do not ask for too much memory, in case of large transfers * read/write in 16-pages chunks */ bufsize = 16 * (cg->page_size + cg->oob_size); if (raw_rw->len < bufsize) bufsize = raw_rw->len; buf = malloc(bufsize, M_NAND, M_WAITOK); len = raw_rw->len; off = 0; } switch (cmd) { case NAND_IO_ERASE: ret = nand_erase_blocks(chip, ((off_t *)data)[0], ((off_t *)data)[1]); break; case NAND_IO_OOB_READ: oob_rw = (struct nand_oob_rw *)data; ret = nand_oob_access(chip, oob_rw->page, 0, oob_rw->len, oob_rw->data, 0); break; case NAND_IO_OOB_PROG: oob_rw = (struct nand_oob_rw *)data; ret = nand_oob_access(chip, oob_rw->page, 0, oob_rw->len, oob_rw->data, 1); break; case NAND_IO_GET_STATUS: NANDBUS_LOCK(nandbus); ret = NANDBUS_GET_STATUS(nandbus, &status); if (ret == 0) *(uint8_t *)data = status; NANDBUS_UNLOCK(nandbus); break; case NAND_IO_RAW_PROG: while (len > 0) { if (len < bufsize) bufsize = len; ret = copyin(raw_rw->data + off, buf, bufsize); if (ret) break; ret = nand_prog_pages_raw(chip, raw_rw->off + off, buf, bufsize); if (ret) break; len -= bufsize; off += bufsize; } break; case NAND_IO_RAW_READ: while (len > 0) { if (len < bufsize) bufsize = len; ret = nand_read_pages_raw(chip, raw_rw->off + off, buf, bufsize); if (ret) break; ret = copyout(buf, raw_rw->data + off, bufsize); if (ret) break; len -= bufsize; off += bufsize; } break; case NAND_IO_GET_CHIP_PARAM: nand_get_chip_param(chip, (struct chip_param_io *)data); break; default: printf("Unknown nand_ioctl request \n"); ret = EIO; } if (buf) free(buf, M_NAND); return (ret); } static void nand_io_proc(void *arg, int pending) { struct nand_chip *chip = arg; struct bio *bp; int err = 0; for (;;) { mtx_lock(&chip->qlock); bp = bioq_takefirst(&chip->bioq); mtx_unlock(&chip->qlock); if (bp == NULL) break; if (bp->bio_driver1 == BIO_NAND_STD) { if (bp->bio_cmd == BIO_READ) { err = nand_read(chip, bp->bio_offset & 0xffffffff, bp->bio_data, bp->bio_bcount); } else if (bp->bio_cmd == BIO_WRITE) { err = nand_write(chip, bp->bio_offset & 0xffffffff, bp->bio_data, bp->bio_bcount); } } else if (bp->bio_driver1 == BIO_NAND_RAW) { if (bp->bio_cmd == BIO_READ) { err = nand_read_raw(chip, bp->bio_offset & 0xffffffff, bp->bio_data, bp->bio_bcount); } else if (bp->bio_cmd == BIO_WRITE) { err = nand_write_raw(chip, bp->bio_offset & 0xffffffff, bp->bio_data, bp->bio_bcount); } } else panic("Unknown access type in bio->bio_driver1\n"); if (bp->bio_cmd == BIO_DELETE) { nand_debug(NDBG_GEOM, "Delete on chip%d offset %lld " "length %ld\n", chip->num, bp->bio_offset, bp->bio_bcount); err = nand_erase_blocks(chip, bp->bio_offset & 0xffffffff, bp->bio_bcount); } if (err == 0 || err == ECC_CORRECTABLE) bp->bio_resid = 0; else { nand_debug(NDBG_GEOM,"nand_[read|write|erase_blocks] " "error: %d\n", err); bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; } biodone(bp); } } int create_geom_disk(struct nand_chip *chip) { struct disk *ndisk, *rdisk; /* Create the disk device */ ndisk = disk_alloc(); ndisk->d_strategy = nand_strategy; ndisk->d_ioctl = nand_ioctl; ndisk->d_getattr = nand_getattr; ndisk->d_name = "gnand"; ndisk->d_drv1 = chip; ndisk->d_maxsize = chip->chip_geom.block_size; ndisk->d_sectorsize = chip->chip_geom.page_size; ndisk->d_mediasize = chip->chip_geom.chip_size; ndisk->d_unit = chip->num + 10 * device_get_unit(device_get_parent(chip->dev)); /* * When using BBT, make two last blocks of device unavailable * to user (because those are used to store BBT table). */ if (chip->bbt != NULL) ndisk->d_mediasize -= (2 * chip->chip_geom.block_size); ndisk->d_flags = DISKFLAG_CANDELETE; snprintf(ndisk->d_ident, sizeof(ndisk->d_ident), "nand: Man:0x%02x Dev:0x%02x", chip->id.man_id, chip->id.dev_id); + ndisk->d_rotation_rate = DISK_RR_NON_ROTATING; disk_create(ndisk, DISK_VERSION); /* Create the RAW disk device */ rdisk = disk_alloc(); rdisk->d_strategy = nand_strategy_raw; rdisk->d_ioctl = nand_ioctl; rdisk->d_getattr = nand_getattr; rdisk->d_name = "gnand.raw"; rdisk->d_drv1 = chip; rdisk->d_maxsize = chip->chip_geom.block_size; rdisk->d_sectorsize = chip->chip_geom.page_size; rdisk->d_mediasize = chip->chip_geom.chip_size; rdisk->d_unit = chip->num + 10 * device_get_unit(device_get_parent(chip->dev)); rdisk->d_flags = DISKFLAG_CANDELETE; snprintf(rdisk->d_ident, sizeof(rdisk->d_ident), "nand_raw: Man:0x%02x Dev:0x%02x", chip->id.man_id, chip->id.dev_id); + disk->d_rotation_rate = DISK_RR_NON_ROTATING; disk_create(rdisk, DISK_VERSION); chip->ndisk = ndisk; chip->rdisk = rdisk; mtx_init(&chip->qlock, "NAND I/O lock", NULL, MTX_DEF); bioq_init(&chip->bioq); TASK_INIT(&chip->iotask, 0, nand_io_proc, chip); chip->tq = taskqueue_create("nand_taskq", M_WAITOK, taskqueue_thread_enqueue, &chip->tq); taskqueue_start_threads(&chip->tq, 1, PI_DISK, "nand taskq"); if (bootverbose) device_printf(chip->dev, "Created gnand%d for chip [0x%0x, " "0x%0x]\n", ndisk->d_unit, chip->id.man_id, chip->id.dev_id); return (0); } void destroy_geom_disk(struct nand_chip *chip) { struct bio *bp; taskqueue_free(chip->tq); disk_destroy(chip->ndisk); disk_destroy(chip->rdisk); mtx_lock(&chip->qlock); for (;;) { bp = bioq_takefirst(&chip->bioq); if (bp == NULL) break; bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); } mtx_unlock(&chip->qlock); mtx_destroy(&chip->qlock); } Index: head/sys/dev/nvd/nvd.c =================================================================== --- head/sys/dev/nvd/nvd.c (revision 311970) +++ head/sys/dev/nvd/nvd.c (revision 311971) @@ -1,441 +1,439 @@ /*- * Copyright (C) 2012-2016 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define NVD_STR "nvd" struct nvd_disk; static disk_ioctl_t nvd_ioctl; static disk_strategy_t nvd_strategy; static dumper_t nvd_dump; static void nvd_done(void *arg, const struct nvme_completion *cpl); static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr); static void destroy_geom_disk(struct nvd_disk *ndisk); static void *nvd_new_controller(struct nvme_controller *ctrlr); static void nvd_controller_fail(void *ctrlr); static int nvd_load(void); static void nvd_unload(void); MALLOC_DEFINE(M_NVD, "nvd", "nvd(4) allocations"); struct nvme_consumer *consumer_handle; struct nvd_disk { struct bio_queue_head bioq; struct task bioqtask; struct mtx bioqlock; struct disk *disk; struct taskqueue *tq; struct nvme_namespace *ns; uint32_t cur_depth; uint32_t ordered_in_flight; TAILQ_ENTRY(nvd_disk) global_tailq; TAILQ_ENTRY(nvd_disk) ctrlr_tailq; }; struct nvd_controller { TAILQ_ENTRY(nvd_controller) tailq; TAILQ_HEAD(, nvd_disk) disk_head; }; static TAILQ_HEAD(, nvd_controller) ctrlr_head; static TAILQ_HEAD(disk_list, nvd_disk) disk_head; static SYSCTL_NODE(_hw, OID_AUTO, nvd, CTLFLAG_RD, 0, "nvd driver parameters"); /* * The NVMe specification does not define a maximum or optimal delete size, so * technically max delete size is min(full size of the namespace, 2^32 - 1 * LBAs). A single delete for a multi-TB NVMe namespace though may take much * longer to complete than the nvme(4) I/O timeout period. So choose a sensible * default here that is still suitably large to minimize the number of overall * delete operations. */ static uint64_t nvd_delete_max = (1024 * 1024 * 1024); /* 1GB */ SYSCTL_UQUAD(_hw_nvd, OID_AUTO, delete_max, CTLFLAG_RDTUN, &nvd_delete_max, 0, "nvd maximum BIO_DELETE size in bytes"); static int nvd_modevent(module_t mod, int type, void *arg) { int error = 0; switch (type) { case MOD_LOAD: error = nvd_load(); break; case MOD_UNLOAD: nvd_unload(); break; default: break; } return (error); } moduledata_t nvd_mod = { NVD_STR, (modeventhand_t)nvd_modevent, 0 }; DECLARE_MODULE(nvd, nvd_mod, SI_SUB_DRIVERS, SI_ORDER_ANY); MODULE_VERSION(nvd, 1); MODULE_DEPEND(nvd, nvme, 1, 1, 1); static int nvd_load() { TAILQ_INIT(&ctrlr_head); TAILQ_INIT(&disk_head); consumer_handle = nvme_register_consumer(nvd_new_disk, nvd_new_controller, NULL, nvd_controller_fail); return (consumer_handle != NULL ? 0 : -1); } static void nvd_unload() { struct nvd_controller *ctrlr; struct nvd_disk *disk; while (!TAILQ_EMPTY(&ctrlr_head)) { ctrlr = TAILQ_FIRST(&ctrlr_head); TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq); free(ctrlr, M_NVD); } while (!TAILQ_EMPTY(&disk_head)) { disk = TAILQ_FIRST(&disk_head); TAILQ_REMOVE(&disk_head, disk, global_tailq); destroy_geom_disk(disk); free(disk, M_NVD); } nvme_unregister_consumer(consumer_handle); } static int nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp) { int err; bp->bio_driver1 = NULL; atomic_add_int(&ndisk->cur_depth, 1); err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done); if (err) { atomic_add_int(&ndisk->cur_depth, -1); if (__predict_false(bp->bio_flags & BIO_ORDERED)) atomic_add_int(&ndisk->ordered_in_flight, -1); bp->bio_error = err; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); return (-1); } return (0); } static void nvd_strategy(struct bio *bp) { struct nvd_disk *ndisk; ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1; if (__predict_false(bp->bio_flags & BIO_ORDERED)) atomic_add_int(&ndisk->ordered_in_flight, 1); if (__predict_true(ndisk->ordered_in_flight == 0)) { nvd_bio_submit(ndisk, bp); return; } /* * There are ordered bios in flight, so we need to submit * bios through the task queue to enforce ordering. */ mtx_lock(&ndisk->bioqlock); bioq_insert_tail(&ndisk->bioq, bp); mtx_unlock(&ndisk->bioqlock); taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask); } static int nvd_ioctl(struct disk *ndisk, u_long cmd, void *data, int fflag, struct thread *td) { int ret = 0; switch (cmd) { default: ret = EIO; } return (ret); } static int nvd_dump(void *arg, void *virt, vm_offset_t phys, off_t offset, size_t len) { struct nvd_disk *ndisk; struct disk *dp; dp = arg; ndisk = dp->d_drv1; return (nvme_ns_dump(ndisk->ns, virt, offset, len)); } static void nvd_done(void *arg, const struct nvme_completion *cpl) { struct bio *bp; struct nvd_disk *ndisk; bp = (struct bio *)arg; ndisk = bp->bio_disk->d_drv1; atomic_add_int(&ndisk->cur_depth, -1); if (__predict_false(bp->bio_flags & BIO_ORDERED)) atomic_add_int(&ndisk->ordered_in_flight, -1); biodone(bp); } static void nvd_bioq_process(void *arg, int pending) { struct nvd_disk *ndisk = arg; struct bio *bp; for (;;) { mtx_lock(&ndisk->bioqlock); bp = bioq_takefirst(&ndisk->bioq); mtx_unlock(&ndisk->bioqlock); if (bp == NULL) break; if (nvd_bio_submit(ndisk, bp) != 0) { continue; } #ifdef BIO_ORDERED /* * BIO_ORDERED flag dictates that the bio with BIO_ORDERED * flag set must be completed before proceeding with * additional bios. */ if (bp->bio_flags & BIO_ORDERED) { while (ndisk->cur_depth > 0) { pause("nvd flush", 1); } } #endif } } static void * nvd_new_controller(struct nvme_controller *ctrlr) { struct nvd_controller *nvd_ctrlr; nvd_ctrlr = malloc(sizeof(struct nvd_controller), M_NVD, M_ZERO | M_WAITOK); TAILQ_INIT(&nvd_ctrlr->disk_head); TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq); return (nvd_ctrlr); } static void * nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg) { uint8_t descr[NVME_MODEL_NUMBER_LENGTH+1]; struct nvd_disk *ndisk; struct disk *disk; struct nvd_controller *ctrlr = ctrlr_arg; ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK); disk = disk_alloc(); disk->d_strategy = nvd_strategy; disk->d_ioctl = nvd_ioctl; disk->d_dump = nvd_dump; disk->d_name = NVD_STR; disk->d_drv1 = ndisk; disk->d_maxsize = nvme_ns_get_max_io_xfer_size(ns); disk->d_sectorsize = nvme_ns_get_sector_size(ns); disk->d_mediasize = (off_t)nvme_ns_get_size(ns); disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns); if (disk->d_delmaxsize > nvd_delete_max) disk->d_delmaxsize = nvd_delete_max; disk->d_stripesize = nvme_ns_get_stripesize(ns); if (TAILQ_EMPTY(&disk_head)) disk->d_unit = 0; else disk->d_unit = TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1; disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if (nvme_ns_get_flags(ns) & NVME_NS_DEALLOCATE_SUPPORTED) disk->d_flags |= DISKFLAG_CANDELETE; if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED) disk->d_flags |= DISKFLAG_CANFLUSHCACHE; /* ifdef used here to ease porting to stable branches at a later point. */ #ifdef DISKFLAG_UNMAPPED_BIO disk->d_flags |= DISKFLAG_UNMAPPED_BIO; #endif /* * d_ident and d_descr are both far bigger than the length of either * the serial or model number strings. */ nvme_strvis(disk->d_ident, nvme_ns_get_serial_number(ns), sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH); - nvme_strvis(descr, nvme_ns_get_model_number(ns), sizeof(descr), NVME_MODEL_NUMBER_LENGTH); - -#if __FreeBSD_version >= 900034 strlcpy(disk->d_descr, descr, sizeof(descr)); -#endif + + disk->d_rotation_rate = DISK_RR_NON_ROTATING; ndisk->ns = ns; ndisk->disk = disk; ndisk->cur_depth = 0; ndisk->ordered_in_flight = 0; mtx_init(&ndisk->bioqlock, "NVD bioq lock", NULL, MTX_DEF); bioq_init(&ndisk->bioq); TASK_INIT(&ndisk->bioqtask, 0, nvd_bioq_process, ndisk); ndisk->tq = taskqueue_create("nvd_taskq", M_WAITOK, taskqueue_thread_enqueue, &ndisk->tq); taskqueue_start_threads(&ndisk->tq, 1, PI_DISK, "nvd taskq"); TAILQ_INSERT_TAIL(&disk_head, ndisk, global_tailq); TAILQ_INSERT_TAIL(&ctrlr->disk_head, ndisk, ctrlr_tailq); disk_create(disk, DISK_VERSION); printf(NVD_STR"%u: <%s> NVMe namespace\n", disk->d_unit, descr); printf(NVD_STR"%u: %juMB (%ju %u byte sectors)\n", disk->d_unit, (uintmax_t)disk->d_mediasize / (1024*1024), (uintmax_t)disk->d_mediasize / disk->d_sectorsize, disk->d_sectorsize); return (NULL); } static void destroy_geom_disk(struct nvd_disk *ndisk) { struct bio *bp; struct disk *disk; uint32_t unit; int cnt = 0; disk = ndisk->disk; unit = disk->d_unit; taskqueue_free(ndisk->tq); disk_destroy(ndisk->disk); mtx_lock(&ndisk->bioqlock); for (;;) { bp = bioq_takefirst(&ndisk->bioq); if (bp == NULL) break; bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; cnt++; biodone(bp); } printf(NVD_STR"%u: lost device - %d outstanding\n", unit, cnt); printf(NVD_STR"%u: removing device entry\n", unit); mtx_unlock(&ndisk->bioqlock); mtx_destroy(&ndisk->bioqlock); } static void nvd_controller_fail(void *ctrlr_arg) { struct nvd_controller *ctrlr = ctrlr_arg; struct nvd_disk *disk; while (!TAILQ_EMPTY(&ctrlr->disk_head)) { disk = TAILQ_FIRST(&ctrlr->disk_head); TAILQ_REMOVE(&disk_head, disk, global_tailq); TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq); destroy_geom_disk(disk); free(disk, M_NVD); } TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq); free(ctrlr, M_NVD); } Index: head/sys/geom/geom_disk.c =================================================================== --- head/sys/geom/geom_disk.c (revision 311970) +++ head/sys/geom/geom_disk.c (revision 311971) @@ -1,1012 +1,1012 @@ /*- * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_geom.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct g_disk_softc { struct mtx done_mtx; struct disk *dp; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; char led[64]; uint32_t state; struct mtx start_mtx; }; static g_access_t g_disk_access; static g_start_t g_disk_start; static g_ioctl_t g_disk_ioctl; static g_dumpconf_t g_disk_dumpconf; static g_provgone_t g_disk_providergone; static struct g_class g_disk_class = { .name = G_DISK_CLASS_NAME, .version = G_VERSION, .start = g_disk_start, .access = g_disk_access, .ioctl = g_disk_ioctl, .providergone = g_disk_providergone, .dumpconf = g_disk_dumpconf, }; SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW, 0, "GEOM_DISK stuff"); DECLARE_GEOM_CLASS(g_disk_class, g_disk); static int g_disk_access(struct g_provider *pp, int r, int w, int e) { struct disk *dp; struct g_disk_softc *sc; int error; g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)", pp->name, r, w, e); g_topology_assert(); sc = pp->private; if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) { /* * Allow decreasing access count even if disk is not * available anymore. */ if (r <= 0 && w <= 0 && e <= 0) return (0); return (ENXIO); } r += pp->acr; w += pp->acw; e += pp->ace; error = 0; if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { if (dp->d_open != NULL) { error = dp->d_open(dp); if (bootverbose && error != 0) printf("Opened disk %s -> %d\n", pp->name, error); if (error != 0) return (error); } pp->sectorsize = dp->d_sectorsize; if (dp->d_maxsize == 0) { printf("WARNING: Disk drive %s%d has no d_maxsize\n", dp->d_name, dp->d_unit); dp->d_maxsize = DFLTPHYS; } if (dp->d_delmaxsize == 0) { if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) { printf("WARNING: Disk drive %s%d has no " "d_delmaxsize\n", dp->d_name, dp->d_unit); } dp->d_delmaxsize = dp->d_maxsize; } pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; dp->d_flags |= DISKFLAG_OPEN; /* * Do not invoke resize event when initial size was zero. * Some disks report its size only after first opening. */ if (pp->mediasize == 0) pp->mediasize = dp->d_mediasize; else g_resize_provider(pp, dp->d_mediasize); } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { if (dp->d_close != NULL) { error = dp->d_close(dp); if (error != 0) printf("Closed disk %s -> %d\n", pp->name, error); } sc->state = G_STATE_ACTIVE; if (sc->led[0] != 0) led_set(sc->led, "0"); dp->d_flags &= ~DISKFLAG_OPEN; } return (error); } static void g_disk_kerneldump(struct bio *bp, struct disk *dp) { struct g_kerneldump *gkd; struct g_geom *gp; gkd = (struct g_kerneldump*)bp->bio_data; gp = bp->bio_to->geom; g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)", gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length); if (dp->d_dump == NULL) { g_io_deliver(bp, ENODEV); return; } gkd->di.dumper = dp->d_dump; gkd->di.priv = dp; gkd->di.blocksize = dp->d_sectorsize; gkd->di.maxiosize = dp->d_maxsize; gkd->di.mediaoffset = gkd->offset; if ((gkd->offset + gkd->length) > dp->d_mediasize) gkd->length = dp->d_mediasize - gkd->offset; gkd->di.mediasize = gkd->length; g_io_deliver(bp, 0); } static void g_disk_setstate(struct bio *bp, struct g_disk_softc *sc) { const char *cmd; memcpy(&sc->state, bp->bio_data, sizeof(sc->state)); if (sc->led[0] != 0) { switch (sc->state) { case G_STATE_FAILED: cmd = "1"; break; case G_STATE_REBUILD: cmd = "f5"; break; case G_STATE_RESYNC: cmd = "f1"; break; default: cmd = "0"; break; } led_set(sc->led, cmd); } g_io_deliver(bp, 0); } static void g_disk_done(struct bio *bp) { struct bintime now; struct bio *bp2; struct g_disk_softc *sc; /* See "notes" for why we need a mutex here */ /* XXX: will witness accept a mix of Giant/unGiant drivers here ? */ bp2 = bp->bio_parent; sc = bp2->bio_to->private; bp->bio_completed = bp->bio_length - bp->bio_resid; binuptime(&now); mtx_lock(&sc->done_mtx); if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_completed; switch (bp->bio_cmd) { case BIO_ZONE: bcopy(&bp->bio_zone, &bp2->bio_zone, sizeof(bp->bio_zone)); /*FALLTHROUGH*/ case BIO_READ: case BIO_WRITE: case BIO_DELETE: case BIO_FLUSH: devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now); break; default: break; } bp2->bio_inbed++; if (bp2->bio_children == bp2->bio_inbed) { mtx_unlock(&sc->done_mtx); bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed; g_io_deliver(bp2, bp2->bio_error); } else mtx_unlock(&sc->done_mtx); g_destroy_bio(bp); } static int g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td) { struct disk *dp; struct g_disk_softc *sc; int error; sc = pp->private; dp = sc->dp; if (dp->d_ioctl == NULL) return (ENOIOCTL); error = dp->d_ioctl(dp, cmd, data, fflag, td); return (error); } static off_t g_disk_maxsize(struct disk *dp, struct bio *bp) { if (bp->bio_cmd == BIO_DELETE) return (dp->d_delmaxsize); return (dp->d_maxsize); } static int g_disk_maxsegs(struct disk *dp, struct bio *bp) { return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1); } static void g_disk_advance(struct disk *dp, struct bio *bp, off_t off) { bp->bio_offset += off; bp->bio_length -= off; if ((bp->bio_flags & BIO_VLIST) != 0) { bus_dma_segment_t *seg, *end; seg = (bus_dma_segment_t *)bp->bio_data; end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n; off += bp->bio_ma_offset; while (off >= seg->ds_len) { KASSERT((seg != end), ("vlist request runs off the end")); off -= seg->ds_len; seg++; } bp->bio_ma_offset = off; bp->bio_ma_n = end - seg; bp->bio_data = (void *)seg; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma += off / PAGE_SIZE; bp->bio_ma_offset += off; bp->bio_ma_offset %= PAGE_SIZE; bp->bio_ma_n -= off / PAGE_SIZE; } else { bp->bio_data += off; } } static void g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset, off_t *plength, int *ppages) { uintptr_t seg_page_base; uintptr_t seg_page_end; off_t offset; off_t length; int seg_pages; offset = *poffset; length = *plength; if (length > seg->ds_len - offset) length = seg->ds_len - offset; seg_page_base = trunc_page(seg->ds_addr + offset); seg_page_end = round_page(seg->ds_addr + offset + length); seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT; if (seg_pages > *ppages) { seg_pages = *ppages; length = (seg_page_base + (seg_pages << PAGE_SHIFT)) - (seg->ds_addr + offset); } *poffset = 0; *plength -= length; *ppages -= seg_pages; } static off_t g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg) { bus_dma_segment_t *seg, *end; off_t residual; off_t offset; int pages; seg = (bus_dma_segment_t *)bp->bio_data; end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n; residual = bp->bio_length; offset = bp->bio_ma_offset; pages = g_disk_maxsegs(dp, bp); while (residual != 0 && pages != 0) { KASSERT((seg != end), ("vlist limit runs off the end")); g_disk_seg_limit(seg, &offset, &residual, &pages); seg++; } if (pendseg != NULL) *pendseg = seg; return (residual); } static bool g_disk_limit(struct disk *dp, struct bio *bp) { bool limited = false; off_t maxsz; maxsz = g_disk_maxsize(dp, bp); /* * XXX: If we have a stripesize we should really use it here. * Care should be taken in the delete case if this is done * as deletes can be very sensitive to size given how they * are processed. */ if (bp->bio_length > maxsz) { bp->bio_length = maxsz; limited = true; } if ((bp->bio_flags & BIO_VLIST) != 0) { bus_dma_segment_t *firstseg, *endseg; off_t residual; firstseg = (bus_dma_segment_t*)bp->bio_data; residual = g_disk_vlist_limit(dp, bp, &endseg); if (residual != 0) { bp->bio_ma_n = endseg - firstseg; bp->bio_length -= residual; limited = true; } } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma_n = howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE); } return (limited); } static void g_disk_start(struct bio *bp) { struct bio *bp2, *bp3; struct disk *dp; struct g_disk_softc *sc; int error; off_t off; biotrack(bp, __func__); sc = bp->bio_to->private; if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) { g_io_deliver(bp, ENXIO); return; } error = EJUSTRETURN; switch(bp->bio_cmd) { case BIO_DELETE: if (!(dp->d_flags & DISKFLAG_CANDELETE)) { error = EOPNOTSUPP; break; } /* fall-through */ case BIO_READ: case BIO_WRITE: KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 || (bp->bio_flags & BIO_UNMAPPED) == 0, ("unmapped bio not supported by disk %s", dp->d_name)); off = 0; bp3 = NULL; bp2 = g_clone_bio(bp); if (bp2 == NULL) { error = ENOMEM; break; } for (;;) { if (g_disk_limit(dp, bp2)) { off += bp2->bio_length; /* * To avoid a race, we need to grab the next bio * before we schedule this one. See "notes". */ bp3 = g_clone_bio(bp); if (bp3 == NULL) bp->bio_error = ENOMEM; } bp2->bio_done = g_disk_done; bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize; bp2->bio_bcount = bp2->bio_length; bp2->bio_disk = dp; mtx_lock(&sc->start_mtx); devstat_start_transaction_bio(dp->d_devstat, bp2); mtx_unlock(&sc->start_mtx); dp->d_strategy(bp2); if (bp3 == NULL) break; bp2 = bp3; bp3 = NULL; g_disk_advance(dp, bp2, off); } break; case BIO_GETATTR: /* Give the driver a chance to override */ if (dp->d_getattr != NULL) { if (bp->bio_disk == NULL) bp->bio_disk = dp; error = dp->d_getattr(bp); if (error != -1) break; error = EJUSTRETURN; } if (g_handleattr_int(bp, "GEOM::candelete", (dp->d_flags & DISKFLAG_CANDELETE) != 0)) break; else if (g_handleattr_int(bp, "GEOM::fwsectors", dp->d_fwsectors)) break; else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads)) break; else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0)) break; else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor", dp->d_hba_vendor)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_device", dp->d_hba_device)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor", dp->d_hba_subvendor)) break; else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice", dp->d_hba_subdevice)) break; else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump")) g_disk_kerneldump(bp, dp); else if (!strcmp(bp->bio_attribute, "GEOM::setstate")) g_disk_setstate(bp, sc); else if (g_handleattr_uint16_t(bp, "GEOM::rotation_rate", dp->d_rotation_rate)) break; else error = ENOIOCTL; break; case BIO_FLUSH: g_trace(G_T_BIO, "g_disk_flushcache(%s)", bp->bio_to->name); if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) { error = EOPNOTSUPP; break; } /*FALLTHROUGH*/ case BIO_ZONE: if (bp->bio_cmd == BIO_ZONE) { if (!(dp->d_flags & DISKFLAG_CANZONE)) { error = EOPNOTSUPP; break; } g_trace(G_T_BIO, "g_disk_zone(%s)", bp->bio_to->name); } bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); return; } bp2->bio_done = g_disk_done; bp2->bio_disk = dp; mtx_lock(&sc->start_mtx); devstat_start_transaction_bio(dp->d_devstat, bp2); mtx_unlock(&sc->start_mtx); dp->d_strategy(bp2); break; default: error = EOPNOTSUPP; break; } if (error != EJUSTRETURN) g_io_deliver(bp, error); return; } static void g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct bio *bp; struct disk *dp; struct g_disk_softc *sc; char *buf; int res = 0; sc = gp->softc; if (sc == NULL || (dp = sc->dp) == NULL) return; if (indent == NULL) { sbuf_printf(sb, " hd %u", dp->d_fwheads); sbuf_printf(sb, " sc %u", dp->d_fwsectors); return; } if (pp != NULL) { sbuf_printf(sb, "%s%u\n", indent, dp->d_fwheads); sbuf_printf(sb, "%s%u\n", indent, dp->d_fwsectors); /* * "rotationrate" is a little complicated, because the value * returned by the drive might not be the RPM; 0 and 1 are * special cases, and there's also a valid range. */ sbuf_printf(sb, "%s", indent); - if (dp->d_rotation_rate == 0) /* Old drives don't */ - sbuf_printf(sb, "unknown"); /* report RPM. */ - else if (dp->d_rotation_rate == 1) /* Since 0 is used */ - sbuf_printf(sb, "0"); /* above, SSDs use 1. */ - else if ((dp->d_rotation_rate >= 0x041) && - (dp->d_rotation_rate <= 0xfffe)) + if (dp->d_rotation_rate == DISK_RR_UNKNOWN) /* Old drives */ + sbuf_printf(sb, "unknown"); /* don't report RPM. */ + else if (dp->d_rotation_rate == DISK_RR_NON_ROTATING) + sbuf_printf(sb, "0"); + else if ((dp->d_rotation_rate >= DISK_RR_MIN) && + (dp->d_rotation_rate <= DISK_RR_MAX)) sbuf_printf(sb, "%u", dp->d_rotation_rate); else sbuf_printf(sb, "invalid"); sbuf_printf(sb, "\n"); if (dp->d_getattr != NULL) { buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK); bp = g_alloc_bio(); bp->bio_disk = dp; bp->bio_attribute = "GEOM::ident"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; res = dp->d_getattr(bp); sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", res == 0 ? buf: dp->d_ident); sbuf_printf(sb, "\n"); bp->bio_attribute = "GEOM::lunid"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; if (dp->d_getattr(bp) == 0) { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", buf); sbuf_printf(sb, "\n"); } bp->bio_attribute = "GEOM::lunname"; bp->bio_length = DISK_IDENT_SIZE; bp->bio_data = buf; if (dp->d_getattr(bp) == 0) { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", buf); sbuf_printf(sb, "\n"); } g_destroy_bio(bp); g_free(buf); } else { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", dp->d_ident); sbuf_printf(sb, "\n"); } sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", dp->d_descr); sbuf_printf(sb, "\n"); } } static void g_disk_resize(void *ptr, int flag) { struct disk *dp; struct g_geom *gp; struct g_provider *pp; if (flag == EV_CANCEL) return; g_topology_assert(); dp = ptr; gp = dp->d_geom; if (dp->d_destroyed || gp == NULL) return; LIST_FOREACH(pp, &gp->provider, provider) { if (pp->sectorsize != 0 && pp->sectorsize != dp->d_sectorsize) g_wither_provider(pp, ENXIO); else g_resize_provider(pp, dp->d_mediasize); } } static void g_disk_create(void *arg, int flag) { struct g_geom *gp; struct g_provider *pp; struct disk *dp; struct g_disk_softc *sc; char tmpstr[80]; if (flag == EV_CANCEL) return; g_topology_assert(); dp = arg; mtx_pool_lock(mtxpool_sleep, dp); dp->d_init_level = DISK_INIT_START; /* * If the disk has already gone away, we can just stop here and * call the user's callback to tell him we've cleaned things up. */ if (dp->d_goneflag != 0) { mtx_pool_unlock(mtxpool_sleep, dp); if (dp->d_gone != NULL) dp->d_gone(dp); return; } mtx_pool_unlock(mtxpool_sleep, dp); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF); mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF); sc->dp = dp; gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit); gp->softc = sc; pp = g_new_providerf(gp, "%s", gp->name); devstat_remove_entry(pp->stat); pp->stat = NULL; dp->d_devstat->id = pp; pp->mediasize = dp->d_mediasize; pp->sectorsize = dp->d_sectorsize; pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0) pp->flags |= G_PF_ACCEPT_UNMAPPED; if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0) pp->flags |= G_PF_DIRECT_SEND; pp->flags |= G_PF_DIRECT_RECEIVE; if (bootverbose) printf("GEOM: new disk %s\n", gp->name); sysctl_ctx_init(&sc->sysctl_ctx); snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name, CTLFLAG_RD, 0, tmpstr); if (sc->sysctl_tree != NULL) { SYSCTL_ADD_STRING(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led", CTLFLAG_RWTUN, sc->led, sizeof(sc->led), "LED name"); } pp->private = sc; dp->d_geom = gp; g_error_provider(pp, 0); mtx_pool_lock(mtxpool_sleep, dp); dp->d_init_level = DISK_INIT_DONE; /* * If the disk has gone away at this stage, start the withering * process for it. */ if (dp->d_goneflag != 0) { mtx_pool_unlock(mtxpool_sleep, dp); g_wither_provider(pp, ENXIO); return; } mtx_pool_unlock(mtxpool_sleep, dp); } /* * We get this callback after all of the consumers have gone away, and just * before the provider is freed. If the disk driver provided a d_gone * callback, let them know that it is okay to free resources -- they won't * be getting any more accesses from GEOM. */ static void g_disk_providergone(struct g_provider *pp) { struct disk *dp; struct g_disk_softc *sc; sc = (struct g_disk_softc *)pp->private; dp = sc->dp; if (dp != NULL && dp->d_gone != NULL) dp->d_gone(dp); if (sc->sysctl_tree != NULL) { sysctl_ctx_free(&sc->sysctl_ctx); sc->sysctl_tree = NULL; } if (sc->led[0] != 0) { led_set(sc->led, "0"); sc->led[0] = 0; } pp->private = NULL; pp->geom->softc = NULL; mtx_destroy(&sc->done_mtx); mtx_destroy(&sc->start_mtx); g_free(sc); } static void g_disk_destroy(void *ptr, int flag) { struct disk *dp; struct g_geom *gp; struct g_disk_softc *sc; g_topology_assert(); dp = ptr; gp = dp->d_geom; if (gp != NULL) { sc = gp->softc; if (sc != NULL) sc->dp = NULL; dp->d_geom = NULL; g_wither_geom(gp, ENXIO); } g_free(dp); } /* * We only allow printable characters in disk ident, * the rest is converted to 'x'. */ static void g_disk_ident_adjust(char *ident, size_t size) { char *p, tmp[4], newid[DISK_IDENT_SIZE]; newid[0] = '\0'; for (p = ident; *p != '\0'; p++) { if (isprint(*p)) { tmp[0] = *p; tmp[1] = '\0'; } else { snprintf(tmp, sizeof(tmp), "x%02hhx", *(unsigned char *)p); } if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid)) break; } bzero(ident, size); strlcpy(ident, newid, size); } struct disk * disk_alloc(void) { return (g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO)); } void disk_create(struct disk *dp, int version) { if (version != DISK_VERSION) { printf("WARNING: Attempt to add disk %s%d %s", dp->d_name, dp->d_unit, " using incompatible ABI version of disk(9)\n"); printf("WARNING: Ignoring disk %s%d\n", dp->d_name, dp->d_unit); return; } if (dp->d_flags & DISKFLAG_RESERVED) { printf("WARNING: Attempt to add non-MPSAFE disk %s%d\n", dp->d_name, dp->d_unit); printf("WARNING: Ignoring disk %s%d\n", dp->d_name, dp->d_unit); return; } KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy")); KASSERT(dp->d_name != NULL, ("disk_create need d_name")); KASSERT(*dp->d_name != 0, ("disk_create need d_name")); KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long")); if (dp->d_devstat == NULL) dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit, dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); dp->d_geom = NULL; dp->d_init_level = DISK_INIT_NONE; g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident)); g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL); } void disk_destroy(struct disk *dp) { g_cancel_event(dp); dp->d_destroyed = 1; if (dp->d_devstat != NULL) devstat_remove_entry(dp->d_devstat); g_post_event(g_disk_destroy, dp, M_WAITOK, NULL); } void disk_gone(struct disk *dp) { struct g_geom *gp; struct g_provider *pp; mtx_pool_lock(mtxpool_sleep, dp); dp->d_goneflag = 1; /* * If we're still in the process of creating this disk (the * g_disk_create() function is still queued, or is in * progress), the init level will not yet be DISK_INIT_DONE. * * If that is the case, g_disk_create() will see d_goneflag * and take care of cleaning things up. * * If the disk has already been created, we default to * withering the provider as usual below. * * If the caller has not set a d_gone() callback, he will * not be any worse off by returning here, because the geom * has not been fully setup in any case. */ if (dp->d_init_level < DISK_INIT_DONE) { mtx_pool_unlock(mtxpool_sleep, dp); return; } mtx_pool_unlock(mtxpool_sleep, dp); gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_wither_provider(pp, ENXIO); } } } void disk_attr_changed(struct disk *dp, const char *attr, int flag) { struct g_geom *gp; struct g_provider *pp; char devnamebuf[128]; gp = dp->d_geom; if (gp != NULL) LIST_FOREACH(pp, &gp->provider, provider) (void)g_attr_changed(pp, attr, flag); snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name, dp->d_unit); devctl_notify("GEOM", "disk", attr, devnamebuf); } void disk_media_changed(struct disk *dp, int flag) { struct g_geom *gp; struct g_provider *pp; gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_media_changed(pp, flag); } } } void disk_media_gone(struct disk *dp, int flag) { struct g_geom *gp; struct g_provider *pp; gp = dp->d_geom; if (gp != NULL) { pp = LIST_FIRST(&gp->provider); if (pp != NULL) { KASSERT(LIST_NEXT(pp, provider) == NULL, ("geom %p has more than one provider", gp)); g_media_gone(pp, flag); } } } int disk_resize(struct disk *dp, int flag) { if (dp->d_destroyed || dp->d_geom == NULL) return (0); return (g_post_event(g_disk_resize, dp, flag, NULL)); } static void g_kern_disks(void *p, int flag __unused) { struct sbuf *sb; struct g_geom *gp; char *sp; sb = p; sp = ""; g_topology_assert(); LIST_FOREACH(gp, &g_disk_class.geom, geom) { sbuf_printf(sb, "%s%s", sp, gp->name); sp = " "; } sbuf_finish(sb); } static int sysctl_disks(SYSCTL_HANDLER_ARGS) { int error; struct sbuf *sb; sb = sbuf_new_auto(); g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL); error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); sbuf_delete(sb); return error; } SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_disks, "A", "names of available disks"); Index: head/sys/geom/geom_disk.h =================================================================== --- head/sys/geom/geom_disk.h (revision 311970) +++ head/sys/geom/geom_disk.h (revision 311971) @@ -1,140 +1,145 @@ /*- * Copyright (c) 2003 Poul-Henning Kamp * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _GEOM_GEOM_DISK_H_ #define _GEOM_GEOM_DISK_H_ #ifdef _KERNEL #include #include #include #include #define G_DISK_CLASS_NAME "DISK" struct disk; typedef int disk_open_t(struct disk *); typedef int disk_close_t(struct disk *); typedef void disk_strategy_t(struct bio *bp); typedef int disk_getattr_t(struct bio *bp); typedef void disk_gone_t(struct disk *); typedef int disk_ioctl_t(struct disk *, u_long cmd, void *data, int fflag, struct thread *td); /* NB: disk_ioctl_t SHALL be cast'able to d_ioctl_t */ struct g_geom; struct devstat; typedef enum { DISK_INIT_NONE, DISK_INIT_START, DISK_INIT_DONE } disk_init_level; struct disk { /* Fields which are private to geom_disk */ struct g_geom *d_geom; struct devstat *d_devstat; int d_goneflag; int d_destroyed; disk_init_level d_init_level; /* Shared fields */ u_int d_flags; const char *d_name; u_int d_unit; struct bio_queue_head *d_queue; struct mtx *d_lock; /* Disk methods */ disk_open_t *d_open; disk_close_t *d_close; disk_strategy_t *d_strategy; disk_ioctl_t *d_ioctl; dumper_t *d_dump; disk_getattr_t *d_getattr; disk_gone_t *d_gone; /* Info fields from driver to geom_disk.c. Valid when open */ u_int d_sectorsize; off_t d_mediasize; u_int d_fwsectors; u_int d_fwheads; u_int d_maxsize; off_t d_delmaxsize; u_int d_stripeoffset; u_int d_stripesize; char d_ident[DISK_IDENT_SIZE]; char d_descr[DISK_IDENT_SIZE]; uint16_t d_hba_vendor; uint16_t d_hba_device; uint16_t d_hba_subvendor; uint16_t d_hba_subdevice; uint16_t d_rotation_rate; /* Fields private to the driver */ void *d_drv1; }; #define DISKFLAG_RESERVED 0x1 /* Was NEEDSGIANT */ #define DISKFLAG_OPEN 0x2 #define DISKFLAG_CANDELETE 0x4 #define DISKFLAG_CANFLUSHCACHE 0x8 #define DISKFLAG_UNMAPPED_BIO 0x10 #define DISKFLAG_DIRECT_COMPLETION 0x20 #define DISKFLAG_CANZONE 0x80 +#define DISK_RR_UNKNOWN 0 +#define DISK_RR_NON_ROTATING 1 +#define DISK_RR_MIN 0x0401 +#define DISK_RR_MAX 0xfffe + struct disk *disk_alloc(void); void disk_create(struct disk *disk, int version); void disk_destroy(struct disk *disk); void disk_gone(struct disk *disk); void disk_attr_changed(struct disk *dp, const char *attr, int flag); void disk_media_changed(struct disk *dp, int flag); void disk_media_gone(struct disk *dp, int flag); int disk_resize(struct disk *dp, int flag); #define DISK_VERSION_00 0x58561059 #define DISK_VERSION_01 0x5856105a #define DISK_VERSION_02 0x5856105b #define DISK_VERSION_03 0x5856105c #define DISK_VERSION_04 0x5856105d #define DISK_VERSION_05 0x5856105e #define DISK_VERSION DISK_VERSION_05 #endif /* _KERNEL */ #endif /* _GEOM_GEOM_DISK_H_ */