Index: stable/10/sys/dev/xen/blkfront/blkfront.c =================================================================== --- stable/10/sys/dev/xen/blkfront/blkfront.c (revision 287800) +++ stable/10/sys/dev/xen/blkfront/blkfront.c (revision 287801) @@ -1,1493 +1,1498 @@ /* * XenBSD block device driver * * Copyright (c) 2010-2013 Spectra Logic Corporation * Copyright (c) 2009 Scott Long, Yahoo! * Copyright (c) 2009 Frank Suchomel, Citrix * Copyright (c) 2009 Doug F. Rabson, Citrix * Copyright (c) 2005 Kip Macy * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xenbus_if.h" /*--------------------------- Forward Declarations ---------------------------*/ static void xbd_closing(device_t); static void xbd_startio(struct xbd_softc *sc); /*---------------------------------- Macros ----------------------------------*/ #if 0 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) #else #define DPRINTK(fmt, args...) #endif #define XBD_SECTOR_SHFT 9 /*---------------------------- Global Static Data ----------------------------*/ static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); /*---------------------------- Command Processing ----------------------------*/ static void xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0) return; sc->xbd_flags |= xbd_flag; sc->xbd_qfrozen_cnt++; } static void xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0) return; if (sc->xbd_qfrozen_cnt == 0) panic("%s: Thaw with flag 0x%x while not frozen.", __func__, xbd_flag); sc->xbd_flags &= ~xbd_flag; sc->xbd_qfrozen_cnt--; } static void xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag) { if ((cm->cm_flags & XBDCF_FROZEN) != 0) return; cm->cm_flags |= XBDCF_FROZEN|cm_flag; xbd_freeze(sc, XBDF_NONE); } static void xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm) { if ((cm->cm_flags & XBDCF_FROZEN) == 0) return; cm->cm_flags &= ~XBDCF_FROZEN; xbd_thaw(sc, XBDF_NONE); } static inline void xbd_flush_requests(struct xbd_softc *sc) { int notify; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify); if (notify) xen_intr_signal(sc->xen_intr_handle); } static void xbd_free_command(struct xbd_command *cm) { KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE, ("Freeing command that is still on queue %d.", cm->cm_flags & XBDCF_Q_MASK)); cm->cm_flags = XBDCF_INITIALIZER; cm->cm_bp = NULL; cm->cm_complete = NULL; xbd_enqueue_cm(cm, XBD_Q_FREE); xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE); } static void -xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +xbd_mksegarray(bus_dma_segment_t *segs, int nsegs, + grant_ref_t * gref_head, int otherend_id, int readonly, + grant_ref_t * sg_ref, blkif_request_segment_t * sg) { - struct xbd_softc *sc; - struct xbd_command *cm; - blkif_request_t *ring_req; - struct blkif_request_segment *sg; - struct blkif_request_segment *last_block_sg; - grant_ref_t *sg_ref; + struct blkif_request_segment *last_block_sg = sg + nsegs; vm_paddr_t buffer_ma; uint64_t fsect, lsect; int ref; - int op; - cm = arg; - sc = cm->cm_sc; - - if (error) { - cm->cm_bp->bio_error = EIO; - biodone(cm->cm_bp); - xbd_free_command(cm); - return; - } - - KASSERT(nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST, - ("Too many segments in a blkfront I/O")); - - /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); - sc->xbd_ring.req_prod_pvt++; - ring_req->id = cm->cm_id; - ring_req->operation = cm->cm_operation; - ring_req->sector_number = cm->cm_sector_number; - ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; - ring_req->nr_segments = nsegs; - cm->cm_nseg = nsegs; - - sg = ring_req->seg; - last_block_sg = sg + nsegs; - sg_ref = cm->cm_sg_refs; - while (sg < last_block_sg) { buffer_ma = segs->ds_addr; fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; KASSERT(lsect <= 7, ("XEN disk driver data cannot " "cross a page boundary")); /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&cm->cm_gref_head); + ref = gnttab_claim_grant_reference(gref_head); /* * GNTTAB_LIST_END == 0xffffffff, but it is private * to gnttab.c. */ KASSERT(ref != ~0, ("grant_reference failed")); gnttab_grant_foreign_access_ref( ref, - xenbus_get_otherend_id(sc->xbd_dev), + otherend_id, buffer_ma >> PAGE_SHIFT, - ring_req->operation == BLKIF_OP_WRITE); + readonly); *sg_ref = ref; *sg = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; sg++; sg_ref++; segs++; } +} +static void +xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct xbd_softc *sc; + struct xbd_command *cm; + blkif_request_t *ring_req; + int op; + + cm = arg; + sc = cm->cm_sc; + + if (error) { + cm->cm_bp->bio_error = EIO; + biodone(cm->cm_bp); + xbd_free_command(cm); + return; + } + + KASSERT(nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST, + ("Too many segments in a blkfront I/O")); + + /* Fill out a communications ring structure. */ + ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); + sc->xbd_ring.req_prod_pvt++; + ring_req->id = cm->cm_id; + ring_req->operation = cm->cm_operation; + ring_req->sector_number = cm->cm_sector_number; + ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; + ring_req->nr_segments = nsegs; + cm->cm_nseg = nsegs; + xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, + xenbus_get_otherend_id(sc->xbd_dev), + cm->cm_operation == BLKIF_OP_WRITE, + cm->cm_sg_refs, ring_req->seg); + if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_PREREAD; else if (cm->cm_operation == BLKIF_OP_WRITE) op = BUS_DMASYNC_PREWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); gnttab_free_grant_references(cm->cm_gref_head); xbd_enqueue_cm(cm, XBD_Q_BUSY); /* * If bus dma had to asynchronously call us back to dispatch * this command, we are no longer executing in the context of * xbd_startio(). Thus we cannot rely on xbd_startio()'s call to * xbd_flush_requests() to publish this command to the backend * along with any other commands that it could batch. */ if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0) xbd_flush_requests(sc); return; } static int xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm) { int error; error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data, cm->cm_datalen, xbd_queue_cb, cm, 0); if (error == EINPROGRESS) { /* * Maintain queuing order by freezing the queue. The next * command may not require as many resources as the command * we just attempted to map, so we can't rely on bus dma * blocking for it too. */ xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING); return (0); } return (error); } static void xbd_restart_queue_callback(void *arg) { struct xbd_softc *sc = arg; mtx_lock(&sc->xbd_io_lock); xbd_thaw(sc, XBDF_GNT_SHORTAGE); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); } static struct xbd_command * xbd_bio_command(struct xbd_softc *sc) { struct xbd_command *cm; struct bio *bp; if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED)) return (NULL); bp = xbd_dequeue_bio(sc); if (bp == NULL) return (NULL); if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) { xbd_freeze(sc, XBDF_CM_SHORTAGE); xbd_requeue_bio(sc, bp); return (NULL); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { gnttab_request_free_callback(&sc->xbd_callback, xbd_restart_queue_callback, sc, sc->xbd_max_request_segments); xbd_freeze(sc, XBDF_GNT_SHORTAGE); xbd_requeue_bio(sc, bp); xbd_enqueue_cm(cm, XBD_Q_FREE); return (NULL); } cm->cm_bp = bp; cm->cm_data = bp->bio_data; cm->cm_datalen = bp->bio_bcount; cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno; switch (bp->bio_cmd) { case BIO_READ: cm->cm_operation = BLKIF_OP_READ; break; case BIO_WRITE: cm->cm_operation = BLKIF_OP_WRITE; if ((bp->bio_flags & BIO_ORDERED) != 0) { if ((sc->xbd_flags & XBDF_BARRIER) != 0) { cm->cm_operation = BLKIF_OP_WRITE_BARRIER; } else { /* * Single step this command. */ cm->cm_flags |= XBDCF_Q_FREEZE; if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Wait for in-flight requests to * finish. */ xbd_freeze(sc, XBDF_WAIT_IDLE); xbd_requeue_cm(cm, XBD_Q_READY); return (NULL); } } } break; case BIO_FLUSH: if ((sc->xbd_flags & XBDF_FLUSH) != 0) cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE; else if ((sc->xbd_flags & XBDF_BARRIER) != 0) cm->cm_operation = BLKIF_OP_WRITE_BARRIER; else panic("flush request, but no flush support available"); break; default: panic("unknown bio command %d", bp->bio_cmd); } return (cm); } /* * Dequeue buffers and place them in the shared communication ring. * Return when no more requests can be accepted or all buffers have * been queued. * * Signal XEN once the ring has been filled out. */ static void xbd_startio(struct xbd_softc *sc) { struct xbd_command *cm; int error, queued = 0; mtx_assert(&sc->xbd_io_lock, MA_OWNED); if (sc->xbd_state != XBD_STATE_CONNECTED) return; while (!RING_FULL(&sc->xbd_ring)) { if (sc->xbd_qfrozen_cnt != 0) break; cm = xbd_dequeue_cm(sc, XBD_Q_READY); if (cm == NULL) cm = xbd_bio_command(sc); if (cm == NULL) break; if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) { /* * Single step command. Future work is * held off until this command completes. */ xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE); } if ((error = xbd_queue_request(sc, cm)) != 0) { printf("xbd_queue_request returned %d\n", error); break; } queued++; } if (queued != 0) xbd_flush_requests(sc); } static void xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm) { struct bio *bp; bp = cm->cm_bp; if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) { disk_err(bp, "disk error" , -1, 0); printf(" status: %x\n", cm->cm_status); bp->bio_flags |= BIO_ERROR; } if (bp->bio_flags & BIO_ERROR) bp->bio_error = EIO; else bp->bio_resid = 0; xbd_free_command(cm); biodone(bp); } static void xbd_int(void *xsc) { struct xbd_softc *sc = xsc; struct xbd_command *cm; blkif_response_t *bret; RING_IDX i, rp; int op; mtx_lock(&sc->xbd_io_lock); if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) { mtx_unlock(&sc->xbd_io_lock); return; } again: rp = sc->xbd_ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ for (i = sc->xbd_ring.rsp_cons; i != rp;) { bret = RING_GET_RESPONSE(&sc->xbd_ring, i); cm = &sc->xbd_shadow[bret->id]; xbd_remove_cm(cm, XBD_Q_BUSY); gnttab_end_foreign_access_references(cm->cm_nseg, cm->cm_sg_refs); i++; if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_POSTREAD; else if (cm->cm_operation == BLKIF_OP_WRITE || cm->cm_operation == BLKIF_OP_WRITE_BARRIER) op = BUS_DMASYNC_POSTWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map); /* * Release any hold this command has on future command * dispatch. */ xbd_cm_thaw(sc, cm); /* * Directly call the i/o complete routine to save an * an indirection in the common case. */ cm->cm_status = bret->status; if (cm->cm_bp) xbd_bio_complete(sc, cm); else if (cm->cm_complete != NULL) cm->cm_complete(cm); else xbd_free_command(cm); } sc->xbd_ring.rsp_cons = i; if (i != sc->xbd_ring.req_prod_pvt) { int more_to_do; RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do); if (more_to_do) goto again; } else { sc->xbd_ring.sring->rsp_event = i + 1; } if (xbd_queue_length(sc, XBD_Q_BUSY) == 0) xbd_thaw(sc, XBDF_WAIT_IDLE); xbd_startio(sc); if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED)) wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]); mtx_unlock(&sc->xbd_io_lock); } /*------------------------------- Dump Support -------------------------------*/ /** * Quiesce the disk writes for a dump file before allowing the next buffer. */ static void xbd_quiesce(struct xbd_softc *sc) { int mtd; // While there are outstanding requests while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd); if (mtd) { /* Recieved request completions, update queue. */ xbd_int(sc); } if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Still pending requests, wait for the disk i/o * to complete. */ HYPERVISOR_yield(); } } } /* Kernel dump function for a paravirtualized disk device */ static void xbd_dump_complete(struct xbd_command *cm) { xbd_enqueue_cm(cm, XBD_Q_COMPLETE); } static int xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct disk *dp = arg; struct xbd_softc *sc = dp->d_drv1; struct xbd_command *cm; size_t chunk; int sbp; int rc = 0; if (length <= 0) return (rc); xbd_quiesce(sc); /* All quiet on the western front. */ /* * If this lock is held, then this module is failing, and a * successful kernel dump is highly unlikely anyway. */ mtx_lock(&sc->xbd_io_lock); /* Split the 64KB block as needed */ for (sbp=0; length > 0; sbp++) { cm = xbd_dequeue_cm(sc, XBD_Q_FREE); if (cm == NULL) { mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "dump: no more commands?\n"); return (EBUSY); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { xbd_free_command(cm); mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "no more grant allocs?\n"); return (EBUSY); } chunk = length > sc->xbd_max_request_size ? sc->xbd_max_request_size : length; cm->cm_data = virtual; cm->cm_datalen = chunk; cm->cm_operation = BLKIF_OP_WRITE; cm->cm_sector_number = offset / dp->d_sectorsize; cm->cm_complete = xbd_dump_complete; xbd_enqueue_cm(cm, XBD_Q_READY); length -= chunk; offset += chunk; virtual = (char *) virtual + chunk; } /* Tell DOM0 to do the I/O */ xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); /* Poll for the completion. */ xbd_quiesce(sc); /* All quite on the eastern front */ /* If there were any errors, bail out... */ while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) { if (cm->cm_status != BLKIF_RSP_OKAY) { device_printf(sc->xbd_dev, "Dump I/O failed at sector %jd\n", cm->cm_sector_number); rc = EIO; } xbd_free_command(cm); } return (rc); } /*----------------------------- Disk Entrypoints -----------------------------*/ static int xbd_open(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) { printf("xb%d: not found", sc->xbd_unit); return (ENXIO); } sc->xbd_flags |= XBDF_OPEN; sc->xbd_users++; return (0); } static int xbd_close(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); sc->xbd_flags &= ~XBDF_OPEN; if (--(sc->xbd_users) == 0) { /* * Check whether we have been instructed to close. We will * have ignored this request initially, as the device was * still mounted. */ if (xenbus_get_otherend_state(sc->xbd_dev) == XenbusStateClosing) xbd_closing(sc->xbd_dev); } return (0); } static int xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); return (ENOTTY); } /* * Read/write routine for a buffer. Finds the proper unit, place it on * the sortq and kick the controller. */ static void xbd_strategy(struct bio *bp) { struct xbd_softc *sc = bp->bio_disk->d_drv1; /* bogus disk? */ if (sc == NULL) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); return; } /* * Place it in the queue of disk activities for this disk */ mtx_lock(&sc->xbd_io_lock); xbd_enqueue_bio(sc, bp); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); return; } /*------------------------------ Ring Management -----------------------------*/ static int xbd_alloc_ring(struct xbd_softc *sc) { blkif_sring_t *sring; uintptr_t sring_page_addr; int error; int i; sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); if (sring == NULL) { xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring"); return (ENOMEM); } SHARED_RING_INIT(sring); FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE); for (i = 0, sring_page_addr = (uintptr_t)sring; i < sc->xbd_ring_pages; i++, sring_page_addr += PAGE_SIZE) { error = xenbus_grant_ring(sc->xbd_dev, (vtomach(sring_page_addr) >> PAGE_SHIFT), &sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "granting ring_ref(%d)", i); return (error); } } if (sc->xbd_ring_pages == 1) { error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), "ring-ref", "%u", sc->xbd_ring_ref[0]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-ref", xenbus_get_node(sc->xbd_dev)); return (error); } } else { for (i = 0; i < sc->xbd_ring_pages; i++) { char ring_ref_name[]= "ring_refXX"; snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), ring_ref_name, "%u", sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/%s", xenbus_get_node(sc->xbd_dev), ring_ref_name); return (error); } } } error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev, xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc, INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "xen_intr_alloc_and_bind_local_port failed"); return (error); } return (0); } static void xbd_free_ring(struct xbd_softc *sc) { int i; if (sc->xbd_ring.sring == NULL) return; for (i = 0; i < sc->xbd_ring_pages; i++) { if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) { gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]); sc->xbd_ring_ref[i] = GRANT_REF_INVALID; } } free(sc->xbd_ring.sring, M_XENBLOCKFRONT); sc->xbd_ring.sring = NULL; } /*-------------------------- Initialization/Teardown -------------------------*/ static int xbd_feature_string(struct xbd_softc *sc, char *features, size_t len) { struct sbuf sb; int feature_cnt; sbuf_new(&sb, features, len, SBUF_FIXEDLEN); feature_cnt = 0; if ((sc->xbd_flags & XBDF_FLUSH) != 0) { sbuf_printf(&sb, "flush"); feature_cnt++; } if ((sc->xbd_flags & XBDF_BARRIER) != 0) { if (feature_cnt != 0) sbuf_printf(&sb, ", "); sbuf_printf(&sb, "write_barrier"); feature_cnt++; } (void) sbuf_finish(&sb); return (sbuf_len(&sb)); } static int xbd_sysctl_features(SYSCTL_HANDLER_ARGS) { char features[80]; struct xbd_softc *sc = arg1; int error; int len; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); len = xbd_feature_string(sc, features, sizeof(features)); /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */ return (SYSCTL_OUT(req, features, len + 1/*NUL*/)); } static void xbd_setup_sysctl(struct xbd_softc *xbd) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; struct sysctl_oid_list *children; sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev); if (sysctl_tree == NULL) return; children = SYSCTL_CHILDREN(sysctl_tree); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, "maximum outstanding requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_segments", CTLFLAG_RD, &xbd->xbd_max_request_segments, 0, "maximum number of pages per requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0, "maximum size in bytes of a request (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0, "communication channel pages (negotiated)"); SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO, "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0, xbd_sysctl_features, "A", "protocol features (negotiated)"); } /* * Translate Linux major/minor to an appropriate name and unit * number. For HVM guests, this allows us to use the same drive names * with blkfront as the emulated drives, easing transition slightly. */ static void xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name) { static struct vdev_info { int major; int shift; int base; const char *name; } info[] = { {3, 6, 0, "ada"}, /* ide0 */ {22, 6, 2, "ada"}, /* ide1 */ {33, 6, 4, "ada"}, /* ide2 */ {34, 6, 6, "ada"}, /* ide3 */ {56, 6, 8, "ada"}, /* ide4 */ {57, 6, 10, "ada"}, /* ide5 */ {88, 6, 12, "ada"}, /* ide6 */ {89, 6, 14, "ada"}, /* ide7 */ {90, 6, 16, "ada"}, /* ide8 */ {91, 6, 18, "ada"}, /* ide9 */ {8, 4, 0, "da"}, /* scsi disk0 */ {65, 4, 16, "da"}, /* scsi disk1 */ {66, 4, 32, "da"}, /* scsi disk2 */ {67, 4, 48, "da"}, /* scsi disk3 */ {68, 4, 64, "da"}, /* scsi disk4 */ {69, 4, 80, "da"}, /* scsi disk5 */ {70, 4, 96, "da"}, /* scsi disk6 */ {71, 4, 112, "da"}, /* scsi disk7 */ {128, 4, 128, "da"}, /* scsi disk8 */ {129, 4, 144, "da"}, /* scsi disk9 */ {130, 4, 160, "da"}, /* scsi disk10 */ {131, 4, 176, "da"}, /* scsi disk11 */ {132, 4, 192, "da"}, /* scsi disk12 */ {133, 4, 208, "da"}, /* scsi disk13 */ {134, 4, 224, "da"}, /* scsi disk14 */ {135, 4, 240, "da"}, /* scsi disk15 */ {202, 4, 0, "xbd"}, /* xbd */ {0, 0, 0, NULL}, }; int major = vdevice >> 8; int minor = vdevice & 0xff; int i; if (vdevice & (1 << 28)) { *unit = (vdevice & ((1 << 28) - 1)) >> 8; *name = "xbd"; return; } for (i = 0; info[i].major; i++) { if (info[i].major == major) { *unit = info[i].base + (minor >> info[i].shift); *name = info[i].name; return; } } *unit = minor >> 4; *name = "xbd"; } int xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, int vdevice, uint16_t vdisk_info, unsigned long sector_size) { char features[80]; int unit, error = 0; const char *name; xbd_vdevice_to_unit(vdevice, &unit, &name); sc->xbd_unit = unit; if (strcmp(name, "xbd") != 0) device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit); if (xbd_feature_string(sc, features, sizeof(features)) > 0) { device_printf(sc->xbd_dev, "features: %s\n", features); } sc->xbd_disk = disk_alloc(); sc->xbd_disk->d_unit = sc->xbd_unit; sc->xbd_disk->d_open = xbd_open; sc->xbd_disk->d_close = xbd_close; sc->xbd_disk->d_ioctl = xbd_ioctl; sc->xbd_disk->d_strategy = xbd_strategy; sc->xbd_disk->d_dump = xbd_dump; sc->xbd_disk->d_name = name; sc->xbd_disk->d_drv1 = sc; sc->xbd_disk->d_sectorsize = sector_size; sc->xbd_disk->d_mediasize = sectors * sector_size; sc->xbd_disk->d_maxsize = sc->xbd_max_request_size; sc->xbd_disk->d_flags = 0; if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) { sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE; device_printf(sc->xbd_dev, "synchronize cache commands enabled.\n"); } disk_create(sc->xbd_disk, DISK_VERSION); return error; } static void xbd_free(struct xbd_softc *sc) { int i; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_DISCONNECTED; mtx_unlock(&sc->xbd_io_lock); /* Free resources associated with old device channel. */ xbd_free_ring(sc); if (sc->xbd_shadow) { for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; cm = &sc->xbd_shadow[i]; if (cm->cm_sg_refs != NULL) { free(cm->cm_sg_refs, M_XENBLOCKFRONT); cm->cm_sg_refs = NULL; } bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); } free(sc->xbd_shadow, M_XENBLOCKFRONT); sc->xbd_shadow = NULL; bus_dma_tag_destroy(sc->xbd_io_dmat); xbd_initq_cm(sc, XBD_Q_FREE); xbd_initq_cm(sc, XBD_Q_READY); xbd_initq_cm(sc, XBD_Q_COMPLETE); } xen_intr_unbind(&sc->xen_intr_handle); } /*--------------------------- State Change Handlers --------------------------*/ static void xbd_initialize(struct xbd_softc *sc) { const char *otherend_path; const char *node_path; uint32_t max_ring_page_order; int error; - int i; if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) { /* Initialization has already been performed. */ return; } /* * Protocol defaults valid even if negotiation for a * setting fails. */ max_ring_page_order = 0; sc->xbd_ring_pages = 1; sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; sc->xbd_max_request_size = XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); /* * Protocol negotiation. * * \note xs_gather() returns on the first encountered error, so * we must use independant calls in order to guarantee * we don't miss information in a sparsly populated back-end * tree. * * \note xs_scanf() does not update variables for unmatched * fields. */ otherend_path = xenbus_get_otherend_path(sc->xbd_dev); node_path = xenbus_get_node(sc->xbd_dev); /* Support both backend schemes for relaying ring page limits. */ (void)xs_scanf(XST_NIL, otherend_path, "max-ring-page-order", NULL, "%" PRIu32, &max_ring_page_order); sc->xbd_ring_pages = 1 << max_ring_page_order; (void)xs_scanf(XST_NIL, otherend_path, "max-ring-pages", NULL, "%" PRIu32, &sc->xbd_ring_pages); if (sc->xbd_ring_pages < 1) sc->xbd_ring_pages = 1; if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) { device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "limited to front-end limit of %u.\n", sc->xbd_ring_pages, XBD_MAX_RING_PAGES); sc->xbd_ring_pages = XBD_MAX_RING_PAGES; } if (powerof2(sc->xbd_ring_pages) == 0) { uint32_t new_page_limit; new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1); device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "is not a power of 2. Limited to %u.\n", sc->xbd_ring_pages, new_page_limit); sc->xbd_ring_pages = new_page_limit; } sc->xbd_max_requests = BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE); if (sc->xbd_max_requests > XBD_MAX_REQUESTS) { device_printf(sc->xbd_dev, "Back-end specified max_requests of %u " "limited to front-end limit of %zu.\n", sc->xbd_max_requests, XBD_MAX_REQUESTS); sc->xbd_max_requests = XBD_MAX_REQUESTS; } - /* Allocate datastructures based on negotiated values. */ - error = bus_dma_tag_create( - bus_get_dma_tag(sc->xbd_dev), /* parent */ - 512, PAGE_SIZE, /* algnmnt, boundary */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - sc->xbd_max_request_size, - sc->xbd_max_request_segments, - PAGE_SIZE, /* maxsegsize */ - BUS_DMA_ALLOCNOW, /* flags */ - busdma_lock_mutex, /* lockfunc */ - &sc->xbd_io_lock, /* lockarg */ - &sc->xbd_io_dmat); - if (error != 0) { - xenbus_dev_fatal(sc->xbd_dev, error, - "Cannot allocate parent DMA tag\n"); - return; - } - - /* Per-transaction data allocation. */ - sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, - M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); - if (sc->xbd_shadow == NULL) { - bus_dma_tag_destroy(sc->xbd_io_dmat); - xenbus_dev_fatal(sc->xbd_dev, ENOMEM, - "Cannot allocate request structures\n"); - return; - } - - for (i = 0; i < sc->xbd_max_requests; i++) { - struct xbd_command *cm; - - cm = &sc->xbd_shadow[i]; - cm->cm_sg_refs = malloc( - sizeof(grant_ref_t) * sc->xbd_max_request_segments, - M_XENBLOCKFRONT, M_NOWAIT); - if (cm->cm_sg_refs == NULL) - break; - cm->cm_id = i; - cm->cm_flags = XBDCF_INITIALIZER; - cm->cm_sc = sc; - if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) - break; - xbd_free_command(cm); - } - if (xbd_alloc_ring(sc) != 0) return; /* Support both backend schemes for relaying ring page limits. */ if (sc->xbd_ring_pages > 1) { error = xs_printf(XST_NIL, node_path, "num-ring-pages","%u", sc->xbd_ring_pages); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/num-ring-pages", node_path); return; } error = xs_printf(XST_NIL, node_path, "ring-page-order", "%u", fls(sc->xbd_ring_pages) - 1); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-page-order", node_path); return; } } error = xs_printf(XST_NIL, node_path, "event-channel", "%u", xen_intr_port(sc->xen_intr_handle)); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/event-channel", node_path); return; } error = xs_printf(XST_NIL, node_path, "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/protocol", node_path); return; } xenbus_set_state(sc->xbd_dev, XenbusStateInitialised); } /* * Invoked when the backend is finally 'ready' (and has published * the details about the physical device - #sectors, size, etc). */ static void xbd_connect(struct xbd_softc *sc) { device_t dev = sc->xbd_dev; unsigned long sectors, sector_size; unsigned int binfo; int err, feature_barrier, feature_flush; + int i; if (sc->xbd_state == XBD_STATE_CONNECTED || sc->xbd_state == XBD_STATE_SUSPENDED) return; DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "sectors", "%lu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); if (err) { xenbus_dev_fatal(dev, err, "reading backend fields at %s", xenbus_get_otherend_path(dev)); return; } err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-barrier", "%lu", &feature_barrier, NULL); if (err == 0 && feature_barrier != 0) sc->xbd_flags |= XBDF_BARRIER; err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-flush-cache", "%lu", &feature_flush, NULL); if (err == 0 && feature_flush != 0) sc->xbd_flags |= XBDF_FLUSH; + + /* Allocate datastructures based on negotiated values. */ + err = bus_dma_tag_create( + bus_get_dma_tag(sc->xbd_dev), /* parent */ + 512, PAGE_SIZE, /* algnmnt, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + sc->xbd_max_request_size, + sc->xbd_max_request_segments, + PAGE_SIZE, /* maxsegsize */ + BUS_DMA_ALLOCNOW, /* flags */ + busdma_lock_mutex, /* lockfunc */ + &sc->xbd_io_lock, /* lockarg */ + &sc->xbd_io_dmat); + if (err != 0) { + xenbus_dev_fatal(sc->xbd_dev, err, + "Cannot allocate parent DMA tag\n"); + return; + } + + /* Per-transaction data allocation. */ + sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, + M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); + if (sc->xbd_shadow == NULL) { + bus_dma_tag_destroy(sc->xbd_io_dmat); + xenbus_dev_fatal(sc->xbd_dev, ENOMEM, + "Cannot allocate request structures\n"); + return; + } + + for (i = 0; i < sc->xbd_max_requests; i++) { + struct xbd_command *cm; + + cm = &sc->xbd_shadow[i]; + cm->cm_sg_refs = malloc( + sizeof(grant_ref_t) * sc->xbd_max_request_segments, + M_XENBLOCKFRONT, M_NOWAIT); + if (cm->cm_sg_refs == NULL) + break; + cm->cm_id = i; + cm->cm_flags = XBDCF_INITIALIZER; + cm->cm_sc = sc; + if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) + break; + xbd_free_command(cm); + } if (sc->xbd_disk == NULL) { device_printf(dev, "%juMB <%s> at %s", (uintmax_t) sectors / (1048576 / sector_size), device_get_desc(dev), xenbus_get_node(dev)); bus_print_child_footer(device_get_parent(dev), dev); xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo, sector_size); } (void)xenbus_set_state(dev, XenbusStateConnected); /* Kick pending requests. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_CONNECTED; xbd_startio(sc); sc->xbd_flags |= XBDF_READY; mtx_unlock(&sc->xbd_io_lock); } /** * Handle the change of state of the backend to Closing. We must delete our * device-layer structures now, to ensure that writes are flushed through to * the backend. Once this is done, we can switch to Closed in * acknowledgement. */ static void xbd_closing(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); xenbus_set_state(dev, XenbusStateClosing); DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev)); if (sc->xbd_disk != NULL) { disk_destroy(sc->xbd_disk); sc->xbd_disk = NULL; } xenbus_set_state(dev, XenbusStateClosed); } /*---------------------------- NewBus Entrypoints ----------------------------*/ static int xbd_probe(device_t dev) { if (strcmp(xenbus_get_type(dev), "vbd") != 0) return (ENXIO); if (xen_hvm_domain()) { int error; char *type; /* * When running in an HVM domain, IDE disk emulation is * disabled early in boot so that native drivers will * not see emulated hardware. However, CDROM device * emulation cannot be disabled. * * Through use of FreeBSD's vm_guest and xen_hvm_domain() * APIs, we could modify the native CDROM driver to fail its * probe when running under Xen. Unfortunatlely, the PV * CDROM support in XenServer (up through at least version * 6.2) isn't functional, so we instead rely on the emulated * CDROM instance, and fail to attach the PV one here in * the blkfront driver. */ error = xs_read(XST_NIL, xenbus_get_node(dev), "device-type", NULL, (void **) &type); if (error) return (ENXIO); if (strncmp(type, "cdrom", 5) == 0) { free(type, M_XENSTORE); return (ENXIO); } free(type, M_XENSTORE); } device_set_desc(dev, "Virtual Block Device"); device_quiet(dev); return (0); } /* * Setup supplies the backend dir, virtual device. We place an event * channel and shared frame entries. We watch backend to wait if it's * ok. */ static int xbd_attach(device_t dev) { struct xbd_softc *sc; const char *name; uint32_t vdevice; int error; int i; int unit; /* FIXME: Use dynamic device id if this is not set. */ error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device", NULL, "%" PRIu32, &vdevice); if (error) error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device-ext", NULL, "%" PRIu32, &vdevice); if (error) { xenbus_dev_fatal(dev, error, "reading virtual-device"); device_printf(dev, "Couldn't determine virtual device.\n"); return (error); } xbd_vdevice_to_unit(vdevice, &unit, &name); if (!strcmp(name, "xbd")) device_set_unit(dev, unit); sc = device_get_softc(dev); mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF); xbd_initqs(sc); for (i = 0; i < XBD_MAX_RING_PAGES; i++) sc->xbd_ring_ref[i] = GRANT_REF_INVALID; sc->xbd_dev = dev; sc->xbd_vdevice = vdevice; sc->xbd_state = XBD_STATE_DISCONNECTED; xbd_setup_sysctl(sc); /* Wait for backend device to publish its protocol capabilities. */ xenbus_set_state(dev, XenbusStateInitialising); return (0); } static int xbd_detach(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev)); xbd_free(sc); mtx_destroy(&sc->xbd_io_lock); return 0; } static int xbd_suspend(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); int retval; int saved_state; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); saved_state = sc->xbd_state; sc->xbd_state = XBD_STATE_SUSPENDED; /* Wait for outstanding I/O to drain. */ retval = 0; while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock, PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { retval = EBUSY; break; } } mtx_unlock(&sc->xbd_io_lock); if (retval != 0) sc->xbd_state = saved_state; return (retval); } static int xbd_resume(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev)); xbd_free(sc); xbd_initialize(sc); return (0); } /** * Callback received when the backend's state changes. */ static void xbd_backend_changed(device_t dev, XenbusState backend_state) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("backend_state=%d\n", backend_state); switch (backend_state) { case XenbusStateUnknown: case XenbusStateInitialising: case XenbusStateReconfigured: case XenbusStateReconfiguring: case XenbusStateClosed: break; case XenbusStateInitWait: case XenbusStateInitialised: xbd_initialize(sc); break; case XenbusStateConnected: xbd_initialize(sc); xbd_connect(sc); break; case XenbusStateClosing: if (sc->xbd_users > 0) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else xbd_closing(dev); break; } } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xbd_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xbd_probe), DEVMETHOD(device_attach, xbd_attach), DEVMETHOD(device_detach, xbd_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xbd_suspend), DEVMETHOD(device_resume, xbd_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed), { 0, 0 } }; static driver_t xbd_driver = { "xbd", xbd_methods, sizeof(struct xbd_softc), }; devclass_t xbd_devclass; DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); Index: stable/10/sys/dev/xen/blkfront/block.h =================================================================== --- stable/10/sys/dev/xen/blkfront/block.h (revision 287800) +++ stable/10/sys/dev/xen/blkfront/block.h (revision 287801) @@ -1,347 +1,337 @@ /* * XenBSD block device driver * * Copyright (c) 2010-2013 Spectra Logic Corporation * Copyright (c) 2009 Scott Long, Yahoo! * Copyright (c) 2009 Frank Suchomel, Citrix * Copyright (c) 2009 Doug F. Rabson, Citrix * Copyright (c) 2005 Kip Macy * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef __XEN_BLKFRONT_BLOCK_H__ #define __XEN_BLKFRONT_BLOCK_H__ #include /** * Given a number of blkif segments, compute the maximum I/O size supported. * * \note This calculation assumes that all but the first and last segments * of the I/O are fully utilized. * * \note We reserve a segement from the maximum supported by the transport to * guarantee we can handle an unaligned transfer without the need to * use a bounce buffer. */ #define XBD_SEGS_TO_SIZE(segs) \ (((segs) - 1) * PAGE_SIZE) /** * Compute the maximum number of blkif segments requried to represent * an I/O of the given size. * * \note This calculation assumes that all but the first and last segments * of the I/O are fully utilized. * * \note We reserve a segement to guarantee we can handle an unaligned * transfer without the need to use a bounce buffer. */ #define XBD_SIZE_TO_SEGS(size) \ ((size / PAGE_SIZE) + 1) /** * The maximum number of shared memory ring pages we will allow in a * negotiated block-front/back communication channel. Allow enough * ring space for all requests to be XBD_MAX_REQUEST_SIZE'd. */ #define XBD_MAX_RING_PAGES 32 /** - * The maximum number of outstanding requests blocks (request headers plus - * additional segment blocks) we will allow in a negotiated block-front/back - * communication channel. + * The maximum number of outstanding requests we will allow in a negotiated + * block-front/back communication channel. */ #define XBD_MAX_REQUESTS \ __CONST_RING_SIZE(blkif, PAGE_SIZE * XBD_MAX_RING_PAGES) /** * The maximum mapped region size per request we will allow in a negotiated * block-front/back communication channel. */ #define XBD_MAX_REQUEST_SIZE \ MIN(MAXPHYS, XBD_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST)) - -/** - * The maximum number of segments (within a request header and accompanying - * segment blocks) per request we will allow in a negotiated block-front/back - * communication channel. - */ -#define XBD_MAX_SEGMENTS_PER_REQUEST \ - (MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \ - XBD_SIZE_TO_SEGS(XBD_MAX_REQUEST_SIZE))) typedef enum { XBDCF_Q_MASK = 0xFF, /* This command has contributed to xbd_qfrozen_cnt. */ XBDCF_FROZEN = 1<<8, /* Freeze the command queue on dispatch (i.e. single step command). */ XBDCF_Q_FREEZE = 1<<9, /* Bus DMA returned EINPROGRESS for this command. */ XBDCF_ASYNC_MAPPING = 1<<10, XBDCF_INITIALIZER = XBDCF_Q_MASK } xbdc_flag_t; struct xbd_command; typedef void xbd_cbcf_t(struct xbd_command *); struct xbd_command { TAILQ_ENTRY(xbd_command) cm_link; struct xbd_softc *cm_sc; xbdc_flag_t cm_flags; bus_dmamap_t cm_map; uint64_t cm_id; grant_ref_t *cm_sg_refs; struct bio *cm_bp; grant_ref_t cm_gref_head; void *cm_data; size_t cm_datalen; u_int cm_nseg; int cm_operation; blkif_sector_t cm_sector_number; int cm_status; xbd_cbcf_t *cm_complete; }; typedef enum { XBD_Q_FREE, XBD_Q_READY, XBD_Q_BUSY, XBD_Q_COMPLETE, XBD_Q_BIO, XBD_Q_COUNT, XBD_Q_NONE = XBDCF_Q_MASK } xbd_q_index_t; typedef struct xbd_cm_q { TAILQ_HEAD(, xbd_command) q_tailq; uint32_t q_length; uint32_t q_max; } xbd_cm_q_t; typedef enum { XBD_STATE_DISCONNECTED, XBD_STATE_CONNECTED, XBD_STATE_SUSPENDED } xbd_state_t; typedef enum { XBDF_NONE = 0, XBDF_OPEN = 1 << 0, /* drive is open (can't shut down) */ XBDF_BARRIER = 1 << 1, /* backend supports barriers */ XBDF_FLUSH = 1 << 2, /* backend supports flush */ XBDF_READY = 1 << 3, /* Is ready */ XBDF_CM_SHORTAGE = 1 << 4, /* Free cm resource shortage active. */ XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */ XBDF_WAIT_IDLE = 1 << 6 /* * No new work until oustanding work * completes. */ } xbd_flag_t; /* * We have one of these per vbd, whether ide, scsi or 'other'. */ struct xbd_softc { device_t xbd_dev; struct disk *xbd_disk; /* disk params */ struct bio_queue_head xbd_bioq; /* sort queue */ int xbd_unit; xbd_flag_t xbd_flags; int xbd_qfrozen_cnt; int xbd_vdevice; xbd_state_t xbd_state; u_int xbd_ring_pages; uint32_t xbd_max_requests; uint32_t xbd_max_request_segments; uint32_t xbd_max_request_size; grant_ref_t xbd_ring_ref[XBD_MAX_RING_PAGES]; blkif_front_ring_t xbd_ring; xen_intr_handle_t xen_intr_handle; struct gnttab_free_callback xbd_callback; xbd_cm_q_t xbd_cm_q[XBD_Q_COUNT]; bus_dma_tag_t xbd_io_dmat; /** * The number of people holding this device open. We won't allow a * hot-unplug unless this is 0. */ int xbd_users; struct mtx xbd_io_lock; struct xbd_command *xbd_shadow; }; int xbd_instance_create(struct xbd_softc *, blkif_sector_t sectors, int device, uint16_t vdisk_info, unsigned long sector_size); static inline void xbd_added_qentry(struct xbd_softc *sc, xbd_q_index_t index) { struct xbd_cm_q *cmq; cmq = &sc->xbd_cm_q[index]; cmq->q_length++; if (cmq->q_length > cmq->q_max) cmq->q_max = cmq->q_length; } static inline void xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index) { sc->xbd_cm_q[index].q_length--; } static inline uint32_t xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index) { return (sc->xbd_cm_q[index].q_length); } static inline void xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index) { struct xbd_cm_q *cmq; cmq = &sc->xbd_cm_q[index]; TAILQ_INIT(&cmq->q_tailq); cmq->q_length = 0; cmq->q_max = 0; } static inline void xbd_enqueue_cm(struct xbd_command *cm, xbd_q_index_t index) { KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE) panic("%s: command %p is already on queue %d.", __func__, cm, cm->cm_flags & XBDCF_Q_MASK); TAILQ_INSERT_TAIL(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= index; xbd_added_qentry(cm->cm_sc, index); } static inline void xbd_requeue_cm(struct xbd_command *cm, xbd_q_index_t index) { KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE) panic("%s: command %p is already on queue %d.", __func__, cm, cm->cm_flags & XBDCF_Q_MASK); TAILQ_INSERT_HEAD(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= index; xbd_added_qentry(cm->cm_sc, index); } static inline struct xbd_command * xbd_dequeue_cm(struct xbd_softc *sc, xbd_q_index_t index) { struct xbd_command *cm; KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if ((cm = TAILQ_FIRST(&sc->xbd_cm_q[index].q_tailq)) != NULL) { if ((cm->cm_flags & XBDCF_Q_MASK) != index) { panic("%s: command %p is on queue %d, " "not specified queue %d", __func__, cm, cm->cm_flags & XBDCF_Q_MASK, index); } TAILQ_REMOVE(&sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= XBD_Q_NONE; xbd_removed_qentry(cm->cm_sc, index); } return (cm); } static inline void xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index) { xbd_q_index_t index; index = cm->cm_flags & XBDCF_Q_MASK; KASSERT(index != XBD_Q_BIO, ("%s: Commands cannot access the bio queue.", __func__)); if (index != expected_index) { panic("%s: command %p is on queue %d, not specified queue %d", __func__, cm, index, expected_index); } TAILQ_REMOVE(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link); cm->cm_flags &= ~XBDCF_Q_MASK; cm->cm_flags |= XBD_Q_NONE; xbd_removed_qentry(cm->cm_sc, index); } static inline void xbd_initq_bio(struct xbd_softc *sc) { bioq_init(&sc->xbd_bioq); } static inline void xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp) { bioq_insert_tail(&sc->xbd_bioq, bp); xbd_added_qentry(sc, XBD_Q_BIO); } static inline void xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp) { bioq_insert_head(&sc->xbd_bioq, bp); xbd_added_qentry(sc, XBD_Q_BIO); } static inline struct bio * xbd_dequeue_bio(struct xbd_softc *sc) { struct bio *bp; if ((bp = bioq_first(&sc->xbd_bioq)) != NULL) { bioq_remove(&sc->xbd_bioq, bp); xbd_removed_qentry(sc, XBD_Q_BIO); } return (bp); } static inline void xbd_initqs(struct xbd_softc *sc) { u_int index; for (index = 0; index < XBD_Q_COUNT; index++) xbd_initq_cm(sc, index); xbd_initq_bio(sc); } #endif /* __XEN_BLKFRONT_BLOCK_H__ */ Index: stable/10/sys/xen/interface/io/blkif.h =================================================================== --- stable/10/sys/xen/interface/io/blkif.h (revision 287800) +++ stable/10/sys/xen/interface/io/blkif.h (revision 287801) @@ -1,558 +1,492 @@ /****************************************************************************** * blkif.h * * Unified block-device I/O interface for Xen guest OSes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2003-2004, Keir Fraser * Copyright (c) 2012, Spectra Logic Corporation */ #ifndef __XEN_PUBLIC_IO_BLKIF_H__ #define __XEN_PUBLIC_IO_BLKIF_H__ #include "ring.h" #include "../grant_table.h" /* * Front->back notifications: When enqueuing a new request, sending a * notification can be made conditional on req_event (i.e., the generic * hold-off mechanism provided by the ring macros). Backends must set * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). * * Back->front notifications: When enqueuing a new response, sending a * notification can be made conditional on rsp_event (i.e., the generic * hold-off mechanism provided by the ring macros). Frontends must set * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). */ #ifndef blkif_vdev_t #define blkif_vdev_t uint16_t #endif #define blkif_sector_t uint64_t /* * Feature and Parameter Negotiation * ================================= * The two halves of a Xen block driver utilize nodes within the XenStore to * communicate capabilities and to negotiate operating parameters. This * section enumerates these nodes which reside in the respective front and * backend portions of the XenStore, following the XenBus convention. * * All data in the XenStore is stored as strings. Nodes specifying numeric * values are encoded in decimal. Integer value ranges listed below are * expressed as fixed sized integer types capable of storing the conversion * of a properly formatted node string, without loss of information. * * Any specified default value is in effect if the corresponding XenBus node * is not present in the XenStore. * * XenStore nodes in sections marked "PRIVATE" are solely for use by the * driver side whose XenBus tree contains them. * * XenStore nodes marked "DEPRECATED" in their notes section should only be * used to provide interoperability with legacy implementations. * * See the XenBus state transition diagram below for details on when XenBus * nodes must be published and when they can be queried. * ***************************************************************************** * Backend XenBus Nodes ***************************************************************************** * *------------------ Backend Device Identification (PRIVATE) ------------------ * * mode * Values: "r" (read only), "w" (writable) * * The read or write access permissions to the backing store to be * granted to the frontend. * * params * Values: string * * Data used by the backend driver to locate and configure the backing * device. The format and semantics of this data vary according to the * backing device in use and are outside the scope of this specification. * * type * Values: "file", "phy", "tap" * * The type of the backing device/object. * *--------------------------------- Features --------------------------------- * * feature-barrier * Values: 0/1 (boolean) * Default Value: 0 * * A value of "1" indicates that the backend can process requests * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests * of this type may still be returned at any time with the * BLKIF_RSP_EOPNOTSUPP result code. * * feature-flush-cache * Values: 0/1 (boolean) * Default Value: 0 * * A value of "1" indicates that the backend can process requests * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests * of this type may still be returned at any time with the * BLKIF_RSP_EOPNOTSUPP result code. * * feature-discard * Values: 0/1 (boolean) * Default Value: 0 * * A value of "1" indicates that the backend can process requests * containing the BLKIF_OP_DISCARD request opcode. Requests * of this type may still be returned at any time with the * BLKIF_RSP_EOPNOTSUPP result code. * *----------------------- Request Transport Parameters ------------------------ * * max-ring-page-order * Values: * Default Value: 0 * Notes: 1, 3 * * The maximum supported size of the request ring buffer in units of * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, * etc.). * * max-ring-pages * Values: * Default Value: 1 * Notes: DEPRECATED, 2, 3 * * The maximum supported size of the request ring buffer in units of * machine pages. The value must be a power of 2. * - * max-requests - * Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE) - * Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring-pages) - * - * The maximum number of concurrent, logical requests supported by - * the backend. - * - * Note: A logical request may span multiple ring entries. - * - * max-request-segments - * Values: - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK - * Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST - * - * The maximum value of blkif_request.nr_segments supported by - * the backend. - * - * max-request-size - * Values: - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE - * Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE - * - * The maximum amount of data, in bytes, that can be referenced by a - * request type that accesses frontend memory (currently BLKIF_OP_READ, - * BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER). - * *------------------------- Backend Device Properties ------------------------- * * discard-alignment * Values: * Default Value: 0 * Notes: 4, 5 * * The offset, in bytes from the beginning of the virtual block device, * to the first, addressable, discard extent on the underlying device. * * discard-granularity * Values: * Default Value: <"sector-size"> * Notes: 4 * * The size, in bytes, of the individually addressable discard extents * of the underlying device. * * discard-secure * Values: 0/1 (boolean) * Default Value: 0 * * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD * requests with the BLKIF_DISCARD_SECURE flag set. * * info * Values: (bitmap) * * A collection of bit flags describing attributes of the backing * device. The VDISK_* macros define the meaning of each bit * location. * * sector-size * Values: * * The size, in bytes, of the individually addressible data blocks * on the backend device. * * sectors * Values: * * The size of the backend device, expressed in units of its native * sector size ("sector-size"). * ***************************************************************************** * Frontend XenBus Nodes ***************************************************************************** * *----------------------- Request Transport Parameters ----------------------- * * event-channel * Values: * * The identifier of the Xen event channel used to signal activity * in the ring buffer. * * ring-ref * Values: * Notes: 6 * * The Xen grant reference granting permission for the backend to map * the sole page in a single page sized ring buffer. * * ring-ref%u * Values: * Notes: 6 * * For a frontend providing a multi-page ring, a "number of ring pages" * sized list of nodes, each containing a Xen grant reference granting * permission for the backend to map the page of the ring located * at page index "%u". Page indexes are zero based. * * protocol * Values: string (XEN_IO_PROTO_ABI_*) * Default Value: XEN_IO_PROTO_ABI_NATIVE * * The machine ABI rules governing the format of all ring request and * response structures. * * ring-page-order * Values: * Default Value: 0 * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) * Notes: 1, 3 * * The size of the frontend allocated request ring buffer in units * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, * etc.). * * num-ring-pages * Values: * Default Value: 1 * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) * Notes: DEPRECATED, 2, 3 * * The size of the frontend allocated request ring buffer in units of * machine pages. The value must be a power of 2. * - * max-requests - * Values: - * Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE) - * Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring-pages) - * - * The maximum number of concurrent, logical requests that will be - * issued by the frontend. - * - * Note: A logical request may span multiple ring entries. - * - * max-request-segments - * Values: - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK - * Maximum Value: MIN(255, backend/max-request-segments) - * - * The maximum value the frontend will set in the - * blkif_request.nr_segments field. - * - * max-request-size - * Values: - * Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE - * Maximum Value: max-request-segments * PAGE_SIZE - * - * The maximum amount of data, in bytes, that can be referenced by - * a request type that accesses frontend memory (currently BLKIF_OP_READ, - * BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER). - * *------------------------- Virtual Device Properties ------------------------- * * device-type * Values: "disk", "cdrom", "floppy", etc. * * virtual-device * Values: * * A value indicating the physical device to virtualize within the * frontend's domain. (e.g. "The first ATA disk", "The third SCSI * disk", etc.) * * See docs/misc/vbd-interface.txt for details on the format of this * value. * * Notes * ----- * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer * PV drivers. * (2) Multi-page ring buffer scheme first used in some Red Hat distributions * including a distribution deployed on certain nodes of the Amazon * EC2 cluster. * (3) Support for multi-page ring buffers was implemented independently, * in slightly different forms, by both Citrix and Red Hat/Amazon. * For full interoperability, block front and backends should publish * identical ring parameters, adjusted for unit differences, to the * XenStore nodes used in both schemes. * (4) Devices that support discard functionality may internally allocate * space (discardable extents) in units that are larger than the * exported logical block size. * (5) The discard-alignment parameter allows a physical device to be * partitioned into virtual devices that do not necessarily begin or * end on a discardable extent boundary. * (6) When there is only a single page allocated to the request ring, * 'ring-ref' is used to communicate the grant reference for this * page to the backend. When using a multi-page ring, the 'ring-ref' * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. */ /* * STATE DIAGRAMS * ***************************************************************************** * Startup * ***************************************************************************** * * Tool stack creates front and back nodes with state XenbusStateInitialising. * * Front Back * ================================= ===================================== * XenbusStateInitialising XenbusStateInitialising * o Query virtual device o Query backend device identification * properties. data. * o Setup OS device instance. o Open and validate backend device. * o Publish backend features and * transport parameters. * | * | * V * XenbusStateInitWait * * o Query backend features and * transport parameters. * o Allocate and initialize the * request ring. * o Publish transport parameters * that will be in effect during * this connection. * | * | * V * XenbusStateInitialised * * o Query frontend transport parameters. * o Connect to the request ring and * event channel. * o Publish backend device properties. * | * | * V * XenbusStateConnected * * o Query backend device properties. * o Finalize OS virtual device * instance. * | * | * V * XenbusStateConnected * * Note: Drivers that do not support any optional features, or the negotiation * of transport parameters, can skip certain states in the state machine: * * o A frontend may transition to XenbusStateInitialised without * waiting for the backend to enter XenbusStateInitWait. In this * case, default transport parameters are in effect and any * transport parameters published by the frontend must contain * their default values. * * o A backend may transition to XenbusStateInitialised, bypassing * XenbusStateInitWait, without waiting for the frontend to first * enter the XenbusStateInitialised state. In this case, default * transport parameters are in effect and any transport parameters * published by the backend must contain their default values. * * Drivers that support optional features and/or transport parameter * negotiation must tolerate these additional state transition paths. * In general this means performing the work of any skipped state * transition, if it has not already been performed, in addition to the * work associated with entry into the current state. */ /* * REQUEST CODES. */ #define BLKIF_OP_READ 0 #define BLKIF_OP_WRITE 1 /* * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER * operation code ("barrier request") must be completed prior to the * execution of the barrier request. All writes issued after the barrier * request must not execute until after the completion of the barrier request. * * Optional. See "feature-barrier" XenBus node documentation above. */ #define BLKIF_OP_WRITE_BARRIER 2 /* * Commit any uncommitted contents of the backing device's volatile cache * to stable storage. * * Optional. See "feature-flush-cache" XenBus node documentation above. */ #define BLKIF_OP_FLUSH_DISKCACHE 3 /* * Used in SLES sources for device specific command packet * contained within the request. Reserved for that purpose. */ #define BLKIF_OP_RESERVED_1 4 /* * Indicate to the backend device that a region of storage is no longer in * use, and may be discarded at any time without impact to the client. If * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the * discarded region on the device must be rendered unrecoverable before the * command returns. * * This operation is analogous to performing a trim (ATA) or unmap (SCSI), * command on a native device. * * More information about trim/unmap operations can be found at: * http://t13.org/Documents/UploadedDocuments/docs2008/ * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc * http://www.seagate.com/staticfiles/support/disc/manuals/ * Interface%20manuals/100293068c.pdf * * Optional. See "feature-discard", "discard-alignment", * "discard-granularity", and "discard-secure" in the XenBus node * documentation above. */ #define BLKIF_OP_DISCARD 5 /* - * Maximum scatter/gather segments per request (header + segment blocks). + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 /* * NB. first_sect and last_sect in blkif_request_segment, as well as * sector_number in blkif_request, are always expressed in 512-byte units. * However they must be properly aligned to the real sector size of the * physical disk, which is reported in the "sector-size" node in the backend * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units. */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ /* @first_sect: first sector in frame to transfer (inclusive). */ /* @last_sect: last sector in frame to transfer (inclusive). */ uint8_t first_sect, last_sect; }; typedef struct blkif_request_segment blkif_request_segment_t; /* * Starting ring element for any I/O request. - * - * One or more segment blocks can be inserted into the request ring - * just after a blkif_request_t, allowing requests to operate on - * up to BLKIF_MAX_SEGMENTS_PER_REQUEST. - * - * BLKIF_SEGS_TO_BLOCKS() can be used on blkif_requst.nr_segments - * to determine the number of contiguous ring entries associated - * with this request. - * - * Note: Due to the way Xen request rings operate, the producer and - * consumer indices of the ring must be incremented by the - * BLKIF_SEGS_TO_BLOCKS() value of the associated request. - * (e.g. a response to a 3 ring entry request must also consume - * 3 entries in the ring, even though only the first ring entry - * in the response has any data.) */ struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ blkif_request_segment_t seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; typedef struct blkif_request blkif_request_t; /* * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) */ struct blkif_request_discard { uint8_t operation; /* BLKIF_OP_DISCARD */ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ blkif_vdev_t handle; /* same as for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk */ uint64_t nr_sectors; /* number of contiguous sectors to discard*/ }; typedef struct blkif_request_discard blkif_request_discard_t; struct blkif_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; typedef struct blkif_response blkif_response_t; /* * STATUS RETURN CODES. */ /* Operation not supported (only happens on barrier writes). */ #define BLKIF_RSP_EOPNOTSUPP -2 /* Operation failed for some unspecified reason (-EIO). */ #define BLKIF_RSP_ERROR -1 /* Operation completed successfully. */ #define BLKIF_RSP_OKAY 0 /* * Generate blkif ring structures and types. */ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); #define VDISK_CDROM 0x1 #define VDISK_REMOVABLE 0x2 #define VDISK_READONLY 0x4 #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */