Index: user/marcel/libvdsk/bhyve/block_if.c =================================================================== --- user/marcel/libvdsk/bhyve/block_if.c (revision 286995) +++ user/marcel/libvdsk/bhyve/block_if.c (revision 286996) @@ -1,641 +1,636 @@ /*- * Copyright (c) 2013 Peter Grehan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "mevent.h" #include "block_if.h" #define BLOCKIF_SIG 0xb109b109 #define BLOCKIF_NUMTHR 8 #define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) enum blockop { BOP_READ, BOP_WRITE, BOP_FLUSH, BOP_DELETE }; enum blockstat { BST_FREE, BST_BLOCK, BST_PEND, BST_BUSY, BST_DONE }; struct blockif_elem { TAILQ_ENTRY(blockif_elem) be_link; struct blockif_req *be_req; enum blockop be_op; enum blockstat be_status; pthread_t be_tid; off_t be_block; }; struct blockif_ctxt { int bc_magic; int bc_candelete; int bc_rdonly; int bc_closing; pthread_t bc_btid[BLOCKIF_NUMTHR]; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; /* Request elements and free/pending/busy queues */ TAILQ_HEAD(, blockif_elem) bc_freeq; TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; struct blockif_sig_elem { pthread_mutex_t bse_mtx; pthread_cond_t bse_cond; int bse_pending; struct blockif_sig_elem *bse_next; }; static struct blockif_sig_elem *blockif_bse_head; static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { struct blockif_elem *be, *tbe; off_t off; int i; be = TAILQ_FIRST(&bc->bc_freeq); assert(be != NULL); assert(be->be_status == BST_FREE); TAILQ_REMOVE(&bc->bc_freeq, be, be_link); be->be_req = breq; be->be_op = op; switch (op) { case BOP_READ: case BOP_WRITE: case BOP_DELETE: off = breq->br_offset; for (i = 0; i < breq->br_iovcnt; i++) off += breq->br_iov[i].iov_len; break; default: off = OFF_MAX; } be->be_block = off; TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_block == breq->br_offset) break; } if (tbe == NULL) { TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { if (tbe->be_block == breq->br_offset) break; } } if (tbe == NULL) be->be_status = BST_PEND; else be->be_status = BST_BLOCK; TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); return (be->be_status == BST_PEND); } static int blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) { struct blockif_elem *be; TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_status == BST_PEND) break; assert(be->be_status == BST_BLOCK); } if (be == NULL) return (0); TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_BUSY; be->be_tid = t; TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); *bep = be; return (1); } static void blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_elem *tbe; if (be->be_status == BST_DONE || be->be_status == BST_BUSY) TAILQ_REMOVE(&bc->bc_busyq, be, be_link); else TAILQ_REMOVE(&bc->bc_pendq, be, be_link); TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { if (tbe->be_req->br_offset == be->be_block) tbe->be_status = BST_PEND; } be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); } static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) { struct blockif_req *br; int err; br = be->be_req; if (br->br_iovcnt <= 1) buf = NULL; err = 0; switch (be->be_op) { case BOP_READ: - err = vdsk_read(bc, br->br_iov, br->br_iovcnt, br->br_offset); + err = vdsk_read(bc, br->br_offset, br->br_iov, br->br_iovcnt); break; case BOP_WRITE: - err = vdsk_write(bc, br->br_iov, br->br_iovcnt, br->br_offset); + err = vdsk_write(bc, br->br_offset, br->br_iov, br->br_iovcnt); break; case BOP_FLUSH: err = vdsk_flush(bc); break; case BOP_DELETE: - if (!bc->bc_candelete) - err = EOPNOTSUPP; - else if (bc->bc_rdonly) - err = EROFS; - else - err = EOPNOTSUPP; + err = vdsk_trim(bc, br->br_offset, br->br_resid); break; default: err = EINVAL; break; } be->be_status = BST_DONE; (*br->br_callback)(br, err); } static void * blockif_thr(void *arg) { struct blockif_ctxt *bc; struct blockif_elem *be; pthread_t t; bc = arg; t = pthread_self(); pthread_mutex_lock(&bc->bc_mtx); for (;;) { while (blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); blockif_proc(bc, be, NULL); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) break; pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } pthread_mutex_unlock(&bc->bc_mtx); pthread_exit(NULL); return (NULL); } static void blockif_sigcont_handler(int signal, enum ev_type type, void *arg) { struct blockif_sig_elem *bse; for (;;) { /* * Process the entire list even if not intended for * this thread. */ do { bse = blockif_bse_head; if (bse == NULL) return; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)bse, (uintptr_t)bse->bse_next)); pthread_mutex_lock(&bse->bse_mtx); bse->bse_pending = 0; pthread_cond_signal(&bse->bse_cond); pthread_mutex_unlock(&bse->bse_mtx); } } static void blockif_init(void) { mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); (void) signal(SIGCONT, SIG_IGN); } struct blockif_ctxt * blockif_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; char *nopt, *xopts, *cp; struct blockif_ctxt *bc; int extra, i; int nocache, sync, ro, candelete, ssopt, pssopt; pthread_once(&blockif_once, blockif_init); ssopt = 0; nocache = 0; sync = 0; ro = 0; /* * The first element in the optstring is always a pathname. * Optional elements follow */ nopt = xopts = strdup(optstr); while (xopts != NULL) { cp = strsep(&xopts, ","); if (cp == nopt) /* file or device pathname */ continue; else if (!strcmp(cp, "nocache")) nocache = 1; else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) sync = 1; else if (!strcmp(cp, "ro")) ro = 1; else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) ; else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) pssopt = ssopt; else { fprintf(stderr, "Invalid device option \"%s\"\n", cp); return (NULL); } } extra = 0; if (nocache) extra |= O_DIRECT; if (sync) extra |= O_SYNC; bc = vdsk_open(nopt, (ro ? O_RDONLY : O_RDWR) | extra, sizeof(*bc)); if (bc == NULL && !ro) { /* Attempt a r/w fail with a r/o open */ bc = vdsk_open(nopt, O_RDONLY | extra, sizeof(*bc)); ro = 1; } if (bc == NULL) { perror("Could not open backing file"); return (NULL); } bc->bc_magic = BLOCKIF_SIG; bc->bc_candelete = candelete; bc->bc_rdonly = ro; pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); } for (i = 0; i < BLOCKIF_NUMTHR; i++) { pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); pthread_set_name_np(bc->bc_btid[i], tname); } return (bc); } static int blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { int err; err = 0; pthread_mutex_lock(&bc->bc_mtx); if (!TAILQ_EMPTY(&bc->bc_freeq)) { /* * Enqueue and inform the block i/o thread * that there is work available */ if (blockif_enqueue(bc, breq, op)) pthread_cond_signal(&bc->bc_cond); } else { /* * Callers are not allowed to enqueue more than * the specified blockif queue limit. Return an * error to indicate that the queue length has been * exceeded. */ err = E2BIG; } pthread_mutex_unlock(&bc->bc_mtx); return (err); } int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_READ)); } int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_WRITE)); } int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_FLUSH)); } int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_DELETE)); } int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { struct blockif_elem *be; assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); /* * Check pending requests. */ TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_req == breq) break; } if (be != NULL) { /* * Found it. */ blockif_complete(bc, be); pthread_mutex_unlock(&bc->bc_mtx); return (0); } /* * Check in-flight requests. */ TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { if (be->be_req == breq) break; } if (be == NULL) { /* * Didn't find it. */ pthread_mutex_unlock(&bc->bc_mtx); return (EINVAL); } /* * Interrupt the processing thread to force it return * prematurely via it's normal callback path. */ while (be->be_status == BST_BUSY) { struct blockif_sig_elem bse, *old_head; pthread_mutex_init(&bse.bse_mtx, NULL); pthread_cond_init(&bse.bse_cond, NULL); bse.bse_pending = 1; do { old_head = blockif_bse_head; bse.bse_next = old_head; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)old_head, (uintptr_t)&bse)); pthread_kill(be->be_tid, SIGCONT); pthread_mutex_lock(&bse.bse_mtx); while (bse.bse_pending) pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); pthread_mutex_unlock(&bse.bse_mtx); } pthread_mutex_unlock(&bc->bc_mtx); /* * The processing thread has been interrupted. Since it's not * clear if the callback has been invoked yet, return EBUSY. */ return (EBUSY); } int blockif_close(struct blockif_ctxt *bc) { void *jval; int err, i; err = 0; assert(bc->bc_magic == BLOCKIF_SIG); /* * Stop the block i/o thread */ pthread_mutex_lock(&bc->bc_mtx); bc->bc_closing = 1; pthread_mutex_unlock(&bc->bc_mtx); pthread_cond_broadcast(&bc->bc_cond); for (i = 0; i < BLOCKIF_NUMTHR; i++) pthread_join(bc->bc_btid[i], &jval); /* XXX Cancel queued i/o's ??? */ /* * Release resources */ bc->bc_magic = 0; vdsk_close(bc); return (0); } /* * Return virtual C/H/S values for a given block. Use the algorithm * outlined in the VHD specification to calculate values. */ void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) { off_t sectors; /* total sectors of the block dev */ off_t hcyl; /* cylinders times heads */ uint16_t secpt; /* sectors per track */ uint8_t heads; assert(bc->bc_magic == BLOCKIF_SIG); sectors = vdsk_capacity(bc) / vdsk_sectorsize(bc); /* Clamp the size to the largest possible with CHS */ if (sectors > 65535UL*16*255) sectors = 65535UL*16*255; if (sectors >= 65536UL*16*63) { secpt = 255; heads = 16; hcyl = sectors / secpt; } else { secpt = 17; hcyl = sectors / secpt; heads = (hcyl + 1023) / 1024; if (heads < 4) heads = 4; if (hcyl >= (heads * 1024) || heads > 16) { secpt = 31; heads = 16; hcyl = sectors / secpt; } if (hcyl >= (heads * 1024)) { secpt = 63; heads = 16; hcyl = sectors / secpt; } } *c = hcyl / heads; *h = heads; *s = secpt; } /* * Accessors */ off_t blockif_size(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (vdsk_capacity(bc)); } int blockif_sectsz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (vdsk_sectorsize(bc)); } void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) { assert(bc->bc_magic == BLOCKIF_SIG); *size = vdsk_sectorsize(bc); *off = 0; } int blockif_queuesz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } int blockif_is_ro(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } int blockif_candelete(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_candelete); } Index: user/marcel/libvdsk/bhyveload/bhyveload.c =================================================================== --- user/marcel/libvdsk/bhyveload/bhyveload.c (revision 286995) +++ user/marcel/libvdsk/bhyveload/bhyveload.c (revision 286996) @@ -1,747 +1,747 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /*- * Copyright (c) 2011 Google, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "userboot.h" #define MB (1024 * 1024UL) #define GB (1024 * 1024 * 1024UL) #define BSP 0 #define NDISKS 32 static char *host_base; static struct termios term, oldterm; static vdskctx disk[NDISKS]; static int ndisks; static int consin_fd, consout_fd; static char *vmname, *progname; static struct vmctx *ctx; static uint64_t gdtbase, cr3, rsp; static void cb_exit(void *arg, int v); /* * Console i/o callbacks */ static void cb_putc(void *arg, int ch) { char c = ch; (void) write(consout_fd, &c, 1); } static int cb_getc(void *arg) { char c; if (read(consin_fd, &c, 1) == 1) return (c); return (-1); } static int cb_poll(void *arg) { int n; if (ioctl(consin_fd, FIONREAD, &n) >= 0) return (n > 0); return (0); } /* * Host filesystem i/o callbacks */ struct cb_file { int cf_isdir; size_t cf_size; struct stat cf_stat; union { int fd; DIR *dir; } cf_u; }; static int cb_open(void *arg, const char *filename, void **hp) { struct stat st; struct cb_file *cf; char path[PATH_MAX]; if (!host_base) return (ENOENT); strlcpy(path, host_base, PATH_MAX); if (path[strlen(path) - 1] == '/') path[strlen(path) - 1] = 0; strlcat(path, filename, PATH_MAX); cf = malloc(sizeof(struct cb_file)); if (stat(path, &cf->cf_stat) < 0) { free(cf); return (errno); } cf->cf_size = st.st_size; if (S_ISDIR(cf->cf_stat.st_mode)) { cf->cf_isdir = 1; cf->cf_u.dir = opendir(path); if (!cf->cf_u.dir) goto out; *hp = cf; return (0); } if (S_ISREG(cf->cf_stat.st_mode)) { cf->cf_isdir = 0; cf->cf_u.fd = open(path, O_RDONLY); if (cf->cf_u.fd < 0) goto out; *hp = cf; return (0); } out: free(cf); return (EINVAL); } static int cb_close(void *arg, void *h) { struct cb_file *cf = h; if (cf->cf_isdir) closedir(cf->cf_u.dir); else close(cf->cf_u.fd); free(cf); return (0); } static int cb_isdir(void *arg, void *h) { struct cb_file *cf = h; return (cf->cf_isdir); } static int cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid) { struct cb_file *cf = h; ssize_t sz; if (cf->cf_isdir) return (EINVAL); sz = read(cf->cf_u.fd, buf, size); if (sz < 0) return (EINVAL); *resid = size - sz; return (0); } static int cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, size_t *namelen_return, char *name) { struct cb_file *cf = h; struct dirent *dp; if (!cf->cf_isdir) return (EINVAL); dp = readdir(cf->cf_u.dir); if (!dp) return (ENOENT); /* * Note: d_namlen is in the range 0..255 and therefore less * than PATH_MAX so we don't need to test before copying. */ *fileno_return = dp->d_fileno; *type_return = dp->d_type; *namelen_return = dp->d_namlen; memcpy(name, dp->d_name, dp->d_namlen); name[dp->d_namlen] = 0; return (0); } static int cb_seek(void *arg, void *h, uint64_t offset, int whence) { struct cb_file *cf = h; if (cf->cf_isdir) return (EINVAL); if (lseek(cf->cf_u.fd, offset, whence) < 0) return (errno); return (0); } static int cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size) { struct cb_file *cf = h; *mode = cf->cf_stat.st_mode; *uid = cf->cf_stat.st_uid; *gid = cf->cf_stat.st_gid; *size = cf->cf_stat.st_size; return (0); } /* * Disk image i/o callbacks */ static int cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, size_t *resid) { struct iovec iov; int error; if (unit < 0 || unit >= ndisks) return (EIO); iov.iov_base = to; iov.iov_len = size; - error = vdsk_read(disk[unit], &iov, 1, from); + error = vdsk_read(disk[unit], from, &iov, 1); if (!error) *resid = 0; return (error); } static int cb_diskioctl(void *arg, int unit, u_long cmd, void *data) { if (unit < 0 || unit >= ndisks) return (EBADF); switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = vdsk_sectorsize(disk[unit]); break; case DIOCGMEDIASIZE: *(off_t *)data = vdsk_capacity(disk[unit]); break; default: return (ENOTTY); } return (0); } /* * Guest virtual machine i/o callbacks */ static int cb_copyin(void *arg, const void *from, uint64_t to, size_t size) { char *ptr; to &= 0x7fffffff; ptr = vm_map_gpa(ctx, to, size); if (ptr == NULL) return (EFAULT); memcpy(ptr, from, size); return (0); } static int cb_copyout(void *arg, uint64_t from, void *to, size_t size) { char *ptr; from &= 0x7fffffff; ptr = vm_map_gpa(ctx, from, size); if (ptr == NULL) return (EFAULT); memcpy(to, ptr, size); return (0); } static void cb_setreg(void *arg, int r, uint64_t v) { int error; enum vm_reg_name vmreg; vmreg = VM_REG_LAST; switch (r) { case 4: vmreg = VM_REG_GUEST_RSP; rsp = v; break; default: break; } if (vmreg == VM_REG_LAST) { printf("test_setreg(%d): not implemented\n", r); cb_exit(NULL, USERBOOT_EXIT_QUIT); } error = vm_set_register(ctx, BSP, vmreg, v); if (error) { perror("vm_set_register"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } } static void cb_setmsr(void *arg, int r, uint64_t v) { int error; enum vm_reg_name vmreg; vmreg = VM_REG_LAST; switch (r) { case MSR_EFER: vmreg = VM_REG_GUEST_EFER; break; default: break; } if (vmreg == VM_REG_LAST) { printf("test_setmsr(%d): not implemented\n", r); cb_exit(NULL, USERBOOT_EXIT_QUIT); } error = vm_set_register(ctx, BSP, vmreg, v); if (error) { perror("vm_set_msr"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } } static void cb_setcr(void *arg, int r, uint64_t v) { int error; enum vm_reg_name vmreg; vmreg = VM_REG_LAST; switch (r) { case 0: vmreg = VM_REG_GUEST_CR0; break; case 3: vmreg = VM_REG_GUEST_CR3; cr3 = v; break; case 4: vmreg = VM_REG_GUEST_CR4; break; default: break; } if (vmreg == VM_REG_LAST) { printf("test_setcr(%d): not implemented\n", r); cb_exit(NULL, USERBOOT_EXIT_QUIT); } error = vm_set_register(ctx, BSP, vmreg, v); if (error) { perror("vm_set_cr"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } } static void cb_setgdt(void *arg, uint64_t base, size_t size) { int error; error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0); if (error != 0) { perror("vm_set_desc(gdt)"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } gdtbase = base; } static void cb_exec(void *arg, uint64_t rip) { int error; if (cr3 == 0) error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase, rsp); else error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, rsp); if (error) { perror("vm_setup_freebsd_registers"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } cb_exit(NULL, 0); } /* * Misc */ static void cb_delay(void *arg, int usec) { usleep(usec); } static void cb_exit(void *arg, int v) { tcsetattr(consout_fd, TCSAFLUSH, &oldterm); exit(v); } static void cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) { *ret_lowmem = vm_get_lowmem_size(ctx); *ret_highmem = vm_get_highmem_size(ctx); } struct env { const char *str; /* name=value */ SLIST_ENTRY(env) next; }; static SLIST_HEAD(envhead, env) envhead; static void addenv(const char *str) { struct env *env; env = malloc(sizeof(struct env)); env->str = str; SLIST_INSERT_HEAD(&envhead, env, next); } static const char * cb_getenv(void *arg, int num) { int i; struct env *env; i = 0; SLIST_FOREACH(env, &envhead, next) { if (i == num) return (env->str); i++; } return (NULL); } static struct loader_callbacks cb = { .getc = cb_getc, .putc = cb_putc, .poll = cb_poll, .open = cb_open, .close = cb_close, .isdir = cb_isdir, .read = cb_read, .readdir = cb_readdir, .seek = cb_seek, .stat = cb_stat, .diskread = cb_diskread, .diskioctl = cb_diskioctl, .copyin = cb_copyin, .copyout = cb_copyout, .setreg = cb_setreg, .setmsr = cb_setmsr, .setcr = cb_setcr, .setgdt = cb_setgdt, .exec = cb_exec, .delay = cb_delay, .exit = cb_exit, .getmem = cb_getmem, .getenv = cb_getenv, }; static int altcons_open(char *path) { struct stat sb; int err; int fd; /* * Allow stdio to be passed in so that the same string * can be used for the bhyveload console and bhyve com-port * parameters */ if (!strcmp(path, "stdio")) return (0); err = stat(path, &sb); if (err == 0) { if (!S_ISCHR(sb.st_mode)) err = ENOTSUP; else { fd = open(path, O_RDWR | O_NONBLOCK); if (fd < 0) err = errno; else consin_fd = consout_fd = fd; } } return (err); } static int disk_open(char *path) { vdskctx vdsk; if (ndisks >= NDISKS) return (ERANGE); vdsk = vdsk_open(path, O_RDONLY, 0); if (vdsk == NULL) return (errno); disk[ndisks++] = vdsk; return (0); } static void usage(void) { fprintf(stderr, "usage: %s [-S][-c ] [-d ] [-e ]\n" " %*s [-h ] [-m mem-size] \n", progname, (int)strlen(progname), ""); exit(1); } int main(int argc, char** argv) { void *h; void (*func)(struct loader_callbacks *, void *, int, int); uint64_t mem_size; int opt, error, need_reinit, memflags; progname = basename(argv[0]); memflags = 0; mem_size = 256 * MB; consin_fd = STDIN_FILENO; consout_fd = STDOUT_FILENO; while ((opt = getopt(argc, argv, "Sc:d:e:h:m:")) != -1) { switch (opt) { case 'c': error = altcons_open(optarg); if (error != 0) errx(EX_USAGE, "Could not open '%s'", optarg); break; case 'd': error = disk_open(optarg); if (error != 0) errx(EX_USAGE, "Could not open '%s'", optarg); break; case 'e': addenv(optarg); break; case 'h': host_base = optarg; break; case 'm': error = vm_parse_memsize(optarg, &mem_size); if (error != 0) errx(EX_USAGE, "Invalid memsize '%s'", optarg); break; case 'S': memflags |= VM_MEM_F_WIRED; break; case '?': usage(); } } argc -= optind; argv += optind; if (argc != 1) usage(); vmname = argv[0]; need_reinit = 0; error = vm_create(vmname); if (error) { if (errno != EEXIST) { perror("vm_create"); exit(1); } need_reinit = 1; } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(1); } if (need_reinit) { error = vm_reinit(ctx); if (error) { perror("vm_reinit"); exit(1); } } vm_set_memflags(ctx, memflags); error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); if (error) { perror("vm_setup_memory"); exit(1); } tcgetattr(consout_fd, &term); oldterm = term; cfmakeraw(&term); term.c_cflag |= CLOCAL; tcsetattr(consout_fd, TCSAFLUSH, &term); h = dlopen("/boot/userboot.so", RTLD_LOCAL); if (!h) { printf("%s\n", dlerror()); return (1); } func = dlsym(h, "loader_main"); if (!func) { printf("%s\n", dlerror()); return (1); } addenv("smbios.bios.vendor=BHYVE"); addenv("boot_serial=1"); func(&cb, NULL, USERBOOT_VERSION_3, ndisks); } Index: user/marcel/libvdsk/libvdsk/qcow.c =================================================================== --- user/marcel/libvdsk/libvdsk/qcow.c (revision 286995) +++ user/marcel/libvdsk/libvdsk/qcow.c (revision 286996) @@ -1,154 +1,163 @@ /*- * Copyright (c) 2014 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "vdsk_int.h" /* Flag bits in cluster offsets */ #define QCOW_CLSTR_COMPRESSED (1ULL << 62) #define QCOW_CLSTR_COPIED (1ULL << 63) struct qcow_header { uint32_t magic; #define QCOW_MAGIC 0x514649fb uint32_t version; #define QCOW_VERSION_1 1 #define QCOW_VERSION_2 2 uint64_t path_offset; uint32_t path_length; uint32_t clstr_log2sz; /* v2 only */ uint64_t disk_size; union { struct { uint8_t clstr_log2sz; uint8_t l2_log2sz; uint16_t _pad; uint32_t encryption; uint64_t l1_offset; } v1; struct { uint32_t encryption; uint32_t l1_entries; uint64_t l1_offset; uint64_t refcnt_offset; uint32_t refcnt_entries; uint32_t snapshot_count; uint64_t snapshot_offset; } v2; } u; }; static int qcow_probe(struct vdsk *vdsk) { struct qcow_header *hdr; if (vdsk->sectorsize < 512 || vdsk->sectorsize > 4096) return (ENOTBLK); hdr = malloc(vdsk->sectorsize); if (hdr == NULL) return (errno); if (read(vdsk->fd, hdr, vdsk->sectorsize) != vdsk->sectorsize) goto out; if (be32dec(&hdr->magic) != QCOW_MAGIC) { errno = ENXIO; goto out; } errno = 0; out: free(hdr); return (errno); } static int qcow_open(struct vdsk *vdsk __unused) { return (ENOSYS); } static int qcow_close(struct vdsk *vdsk __unused) { return (ENOSYS); } static int -qcow_read(struct vdsk *vdsk __unused, const struct iovec *iov __unused, - int iovcnt __unused, off_t offset __unused) +qcow_read(struct vdsk *vdsk __unused, off_t offset __unused, + const struct iovec *iov __unused, int iovcnt __unused) { return (ENOSYS); } static int -qcow_write(struct vdsk *vdsk __unused, const struct iovec *iov __unused, - int iovcnt __unused, off_t offset __unused) +qcow_write(struct vdsk *vdsk __unused, off_t offset __unused, + const struct iovec *iov __unused, int iovcnt __unused) { return (ENOSYS); } static int +qcow_trim(struct vdsk *vdsk __unused, off_t offset __unused, + ssize_t length __unused) +{ + + return (ENOSYS); +} + +static int qcow_flush(struct vdsk *vdsk __unused) { return (ENOSYS); } static struct vdsk_format qcow_format = { .name = "qcow", .description = "QEMU Copy-On-Write, version 1", .flags = VDSKFMT_CAN_WRITE | VDSKFMT_HAS_HEADER, .probe = qcow_probe, .open = qcow_open, .close = qcow_close, .read = qcow_read, .write = qcow_write, + .trim = qcow_trim, .flush = qcow_flush, }; FORMAT_DEFINE(qcow_format); Index: user/marcel/libvdsk/libvdsk/raw.c =================================================================== --- user/marcel/libvdsk/libvdsk/raw.c (revision 286995) +++ user/marcel/libvdsk/libvdsk/raw.c (revision 286996) @@ -1,103 +1,112 @@ /*- * Copyright (c) 2014 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "vdsk_int.h" static int raw_probe(struct vdsk *vdsk __unused) { return (0); } static int raw_open(struct vdsk *vdsk __unused) { return (0); } static int raw_close(struct vdsk *vdsk __unused) { return (0); } static int -raw_read(struct vdsk *vdsk, const struct iovec *iov, int iovcnt, off_t offset) +raw_read(struct vdsk *vdsk, off_t offset, const struct iovec *iov, int iovcnt) { ssize_t res; res = preadv(vdsk->fd, iov, iovcnt, offset); return ((res == -1) ? errno : 0); } static int -raw_write(struct vdsk *vdsk, const struct iovec *iov, int iovcnt, off_t offset) +raw_write(struct vdsk *vdsk, off_t offset, const struct iovec *iov, int iovcnt) { ssize_t res; res = pwritev(vdsk->fd, iov, iovcnt, offset); return ((res == -1) ? errno : 0); } static int +raw_trim(struct vdsk *vdsk __unused, off_t offset __unused, + ssize_t length __unused) +{ + + return (EOPNOTSUPP); +} + +static int raw_flush(struct vdsk *vdsk) { int res; res = fsync(vdsk->fd); return ((res == -1) ? errno : 0); } static struct vdsk_format raw_format = { .name = "raw", .description = "Raw Disk File or Device", .flags = VDSKFMT_CAN_WRITE | VDSKFMT_DEVICE_OK | VDSKFMT_NO_METADATA, .probe = raw_probe, .open = raw_open, .close = raw_close, .read = raw_read, .write = raw_write, + .trim = raw_trim, .flush = raw_flush, }; FORMAT_DEFINE(raw_format); Index: user/marcel/libvdsk/libvdsk/vdsk.c =================================================================== --- user/marcel/libvdsk/libvdsk/vdsk.c (revision 286995) +++ user/marcel/libvdsk/libvdsk/vdsk.c (revision 286996) @@ -1,238 +1,248 @@ /*- * Copyright (c) 2014 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "vdsk_int.h" static inline int vdsk_is_dev(struct vdsk *vdsk) { return ((S_ISCHR(vdsk->fsbuf.st_mode)) ? 1 : 0); } static struct vdsk * vdsk_deref(vdskctx ctx) { struct vdsk *vdsk = ctx; return (vdsk - 1); } static struct vdsk_format * vdsk_probe(struct vdsk *vdsk) { struct vdsk_format **fmts; struct vdsk_format *f, *fmt; size_t idx, nfmts; int error, probe; /* * Create a mutable copy of the linker set. */ nfmts = SET_COUNT(libvdsk_formats); fmts = malloc(nfmts * sizeof(*fmts)); if (fmts == NULL) return (NULL); memcpy(fmts, SET_BEGIN(libvdsk_formats), nfmts * sizeof(*fmts)); fmt = NULL; probe = VDSKFMT_HAS_HEADER | VDSKFMT_HAS_FOOTER; probe |= (vdsk_is_dev(vdsk)) ? VDSKFMT_DEVICE_OK : 0; probe |= (vdsk->fflags & FWRITE) ? VDSKFMT_CAN_WRITE : 0; while (fmt == NULL && probe >= 0) { for (idx = 0; idx < nfmts; idx++) { f = fmts[idx]; /* Skip formats we've probed already. */ if (f == NULL) continue; /* Skip formats we shouldn't probe now. */ if ((f->flags & probe) != probe) continue; /* White-out this format and probe it. */ fmts[idx] = NULL; error = f->probe(vdsk); if (!error) { /* We have a match. */ fmt = f; break; } } if (fmt == NULL) probe -= VDSKFMT_HAS_FOOTER; } free(fmts); if (fmt == NULL) errno = EFTYPE; return (fmt); } vdskctx vdsk_open(const char *path, int flags, size_t size) { vdskctx ctx; struct vdsk *vdsk; int lck; ctx = NULL; do { size += sizeof(struct vdsk); vdsk = calloc(1, size); if (vdsk == NULL) break; vdsk->fflags = flags + 1; if ((vdsk->fflags & ~(O_ACCMODE | O_DIRECT | O_SYNC)) != 0) { errno = EINVAL; break; } vdsk->filename = realpath(path, NULL); if (vdsk->filename == NULL) break; flags = (flags & O_ACCMODE) | O_CLOEXEC; vdsk->fd = open(vdsk->filename, flags); if (vdsk->fd == -1) break; if (fstat(vdsk->fd, &vdsk->fsbuf) == -1) break; if (vdsk_is_dev(vdsk)) { if (ioctl(vdsk->fd, DIOCGMEDIASIZE, &vdsk->capacity) < 0) break; if (ioctl(vdsk->fd, DIOCGSECTORSIZE, &vdsk->sectorsize) < 0) break; } else { vdsk->capacity = vdsk->fsbuf.st_size; vdsk->sectorsize = DEV_BSIZE; } vdsk->fmt = vdsk_probe(vdsk); if (vdsk->fmt == NULL) break; lck = (vdsk->fflags & FWRITE) ? LOCK_EX : LOCK_SH; if (flock(vdsk->fd, lck | LOCK_NB) == -1) break; errno = vdsk->fmt->open(vdsk); if (errno != 0) { flock(vdsk->fd, LOCK_UN); break; } /* Complete... */ ctx = vdsk + 1; } while (0); if (ctx == NULL) { if (vdsk != NULL) { if (vdsk->fd != -1) close(vdsk->fd); if (vdsk->filename != NULL) free(vdsk->filename); free(vdsk); } } return (ctx); } int vdsk_close(vdskctx ctx) { struct vdsk *vdsk = vdsk_deref(ctx); vdsk->fmt->close(vdsk); flock(vdsk->fd, LOCK_UN); close(vdsk->fd); free(vdsk->filename); free(vdsk); return (0); } off_t vdsk_capacity(vdskctx ctx) { struct vdsk *vdsk = vdsk_deref(ctx); return (vdsk->capacity); } int vdsk_sectorsize(vdskctx ctx) { struct vdsk *vdsk = vdsk_deref(ctx); return (vdsk->sectorsize); } int -vdsk_read(vdskctx ctx, const struct iovec *iov, int iovcnt, off_t offset) +vdsk_read(vdskctx ctx, off_t offset, const struct iovec *iov, int iovcnt) { struct vdsk *vdsk = vdsk_deref(ctx); - return (vdsk->fmt->read(vdsk, iov, iovcnt, offset)); + return (vdsk->fmt->read(vdsk, offset, iov, iovcnt)); } int -vdsk_write(vdskctx ctx, const struct iovec *iov, int iovcnt, off_t offset) +vdsk_write(vdskctx ctx, off_t offset, const struct iovec *iov, int iovcnt) { struct vdsk *vdsk = vdsk_deref(ctx); if ((vdsk->fflags & FWRITE) == 0) return (EROFS); - return (vdsk->fmt->write(vdsk, iov, iovcnt, offset)); + return (vdsk->fmt->write(vdsk, offset, iov, iovcnt)); +} + +int +vdsk_trim(vdskctx ctx, off_t offset, ssize_t length) +{ + struct vdsk *vdsk = vdsk_deref(ctx); + + if ((vdsk->fflags & FWRITE) == 0) + return (EROFS); + return (vdsk->fmt->trim(vdsk, offset, length)); } int vdsk_flush(vdskctx ctx) { struct vdsk *vdsk = vdsk_deref(ctx); if ((vdsk->fflags & FWRITE) == 0) return (0); return (vdsk->fmt->flush(vdsk)); } Index: user/marcel/libvdsk/libvdsk/vdsk.h =================================================================== --- user/marcel/libvdsk/libvdsk/vdsk.h (revision 286995) +++ user/marcel/libvdsk/libvdsk/vdsk.h (revision 286996) @@ -1,49 +1,49 @@ /*- * Copyright (c) 2014 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __VDSK_H__ #define __VDSK_H__ #include #include #include typedef void *vdskctx; vdskctx vdsk_open(const char *, int, size_t); int vdsk_close(vdskctx); off_t vdsk_capacity(vdskctx); int vdsk_sectorsize(vdskctx); -int vdsk_read(vdskctx, const struct iovec *, int, off_t); -int vdsk_write(vdskctx, const struct iovec *, int, off_t); - +int vdsk_read(vdskctx, off_t, const struct iovec *, int); +int vdsk_write(vdskctx, off_t, const struct iovec *, int); +int vdsk_trim(vdskctx, off_t, ssize_t); int vdsk_flush(vdskctx); #endif /* __VDSK_H__ */ Index: user/marcel/libvdsk/libvdsk/vdsk_int.h =================================================================== --- user/marcel/libvdsk/libvdsk/vdsk_int.h (revision 286995) +++ user/marcel/libvdsk/libvdsk/vdsk_int.h (revision 286996) @@ -1,72 +1,73 @@ /*- * Copyright (c) 2014 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __VDSK_INT_H__ #define __VDSK_INT_H__ #include struct vdsk; /* * The disk format registration structure. */ struct vdsk_format { const char *name; const char *description; int flags; #define VDSKFMT_DEVICE_OK 1 #define VDSKFMT_CAN_WRITE 2 #define VDSKFMT_NO_METADATA 0 #define VDSKFMT_HAS_FOOTER 4 #define VDSKFMT_HAS_HEADER 8 int (*probe)(struct vdsk *); int (*open)(struct vdsk *); int (*close)(struct vdsk *); - int (*read)(struct vdsk *, const struct iovec *, int, off_t); - int (*write)(struct vdsk *, const struct iovec *, int, off_t); + int (*read)(struct vdsk *, off_t, const struct iovec *, int); + int (*write)(struct vdsk *, off_t, const struct iovec *, int); + int (*trim)(struct vdsk *, off_t, ssize_t); int (*flush)(struct vdsk *); }; SET_DECLARE(libvdsk_formats, struct vdsk_format); #define FORMAT_DEFINE(nm) DATA_SET(libvdsk_formats, nm) /* * The internal representation of a "disk". */ struct vdsk { struct vdsk_format *fmt; int fd; int fflags; char *filename; struct stat fsbuf; off_t capacity; int sectorsize; } __attribute__((aligned(16))); #endif /* __VDSK_INT_H__ */ Index: user/marcel/libvdsk/libvdsk/vhd.c =================================================================== --- user/marcel/libvdsk/libvdsk/vhd.c (revision 286995) +++ user/marcel/libvdsk/libvdsk/vhd.c (revision 286996) @@ -1,100 +1,109 @@ /*- * Copyright (c) 2014 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "vdsk_int.h" static int vhd_probe(struct vdsk *vdsk __unused) { return (ENOSYS); } static int vhd_open(struct vdsk *vdsk __unused) { return (ENOSYS); } static int vhd_close(struct vdsk *vdsk __unused) { return (ENOSYS); } static int -vhd_read(struct vdsk *vdsk __unused, const struct iovec *iov __unused, - int iovcnt __unused, off_t offset __unused) +vhd_read(struct vdsk *vdsk __unused, off_t offset __unused, + const struct iovec *iov __unused, int iovcnt __unused) { return (ENOSYS); } static int -vhd_write(struct vdsk *vdsk __unused, const struct iovec *iov __unused, - int iovcnt __unused, off_t offset __unused) +vhd_write(struct vdsk *vdsk __unused, off_t offset __unused, + const struct iovec *iov __unused, int iovcnt __unused) { return (ENOSYS); } static int +vhd_trim(struct vdsk *vdsk __unused, off_t offset __unused, + ssize_t length __unused) +{ + + return (ENOSYS); +} + +static int vhd_flush(struct vdsk *vdsk __unused) { return (ENOSYS); } static struct vdsk_format vhd_format = { .name = "vhd", .description = "Virtual Hard Disk", .flags = VDSKFMT_CAN_WRITE | VDSKFMT_HAS_HEADER, .probe = vhd_probe, .open = vhd_open, .close = vhd_close, .read = vhd_read, .write = vhd_write, + .trim = vhd_trim, .flush = vhd_flush, }; FORMAT_DEFINE(vhd_format); Index: user/marcel/libvdsk/libvdsk/vmdk.c =================================================================== --- user/marcel/libvdsk/libvdsk/vmdk.c (revision 286995) +++ user/marcel/libvdsk/libvdsk/vmdk.c (revision 286996) @@ -1,100 +1,109 @@ /*- * Copyright (c) 2014 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "vdsk_int.h" static int vmdk_probe(struct vdsk *vdsk __unused) { return (ENOSYS); } static int vmdk_open(struct vdsk *vdsk __unused) { return (ENOSYS); } static int vmdk_close(struct vdsk *vdsk __unused) { return (ENOSYS); } static int -vmdk_read(struct vdsk *vdsk __unused, const struct iovec *iov __unused, - int iovcnt __unused, off_t offset __unused) +vmdk_read(struct vdsk *vdsk __unused, off_t offset __unused, + const struct iovec *iov __unused, int iovcnt __unused) { return (ENOSYS); } static int -vmdk_write(struct vdsk *vdsk __unused, const struct iovec *iov __unused, - int iovcnt __unused, off_t offset __unused) +vmdk_write(struct vdsk *vdsk __unused, off_t offset __unused, + const struct iovec *iov __unused, int iovcnt __unused) { return (ENOSYS); } static int +vmdk_trim(struct vdsk *vdsk __unused, off_t offset __unused, + ssize_t length __unused) +{ + + return (ENOSYS); +} + +static int vmdk_flush(struct vdsk *vdsk __unused) { return (ENOSYS); } static struct vdsk_format vmdk_format = { .name = "vmdk", .description = "Virtual Machine Disk", .flags = VDSKFMT_CAN_WRITE | VDSKFMT_HAS_HEADER, .probe = vmdk_probe, .open = vmdk_open, .close = vmdk_close, .read = vmdk_read, .write = vmdk_write, + .trim = vmdk_trim, .flush = vmdk_flush, }; FORMAT_DEFINE(vmdk_format);