Changeset View
Changeset View
Standalone View
Standalone View
usr.sbin/bhyve/block_if.c
Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | |||||
#include <string.h> | #include <string.h> | ||||
#include <pthread.h> | #include <pthread.h> | ||||
#include <pthread_np.h> | #include <pthread_np.h> | ||||
#include <signal.h> | #include <signal.h> | ||||
#include <sysexits.h> | #include <sysexits.h> | ||||
#include <unistd.h> | #include <unistd.h> | ||||
#include <machine/atomic.h> | #include <machine/atomic.h> | ||||
#include <machine/vmm_snapshot.h> | |||||
#include "bhyverun.h" | #include "bhyverun.h" | ||||
#include "debug.h" | #include "debug.h" | ||||
#include "mevent.h" | #include "mevent.h" | ||||
#include "block_if.h" | #include "block_if.h" | ||||
#define BLOCKIF_SIG 0xb109b109 | #define BLOCKIF_SIG 0xb109b109 | ||||
Show All 31 Lines | struct blockif_ctxt { | ||||
int bc_isgeom; | int bc_isgeom; | ||||
int bc_candelete; | int bc_candelete; | ||||
int bc_rdonly; | int bc_rdonly; | ||||
off_t bc_size; | off_t bc_size; | ||||
int bc_sectsz; | int bc_sectsz; | ||||
int bc_psectsz; | int bc_psectsz; | ||||
int bc_psectoff; | int bc_psectoff; | ||||
int bc_closing; | int bc_closing; | ||||
int bc_paused; | |||||
int bc_work_count; | |||||
pthread_t bc_btid[BLOCKIF_NUMTHR]; | pthread_t bc_btid[BLOCKIF_NUMTHR]; | ||||
pthread_mutex_t bc_mtx; | pthread_mutex_t bc_mtx; | ||||
pthread_cond_t bc_cond; | pthread_cond_t bc_cond; | ||||
pthread_cond_t bc_paused_cond; | |||||
pthread_cond_t bc_work_done_cond; | |||||
/* Request elements and free/pending/busy queues */ | /* Request elements and free/pending/busy queues */ | ||||
TAILQ_HEAD(, blockif_elem) bc_freeq; | TAILQ_HEAD(, blockif_elem) bc_freeq; | ||||
TAILQ_HEAD(, blockif_elem) bc_pendq; | TAILQ_HEAD(, blockif_elem) bc_pendq; | ||||
TAILQ_HEAD(, blockif_elem) bc_busyq; | TAILQ_HEAD(, blockif_elem) bc_busyq; | ||||
struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; | struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; | ||||
}; | }; | ||||
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | if (tbe->be_req->br_offset == be->be_block) | ||||
tbe->be_status = BST_PEND; | tbe->be_status = BST_PEND; | ||||
} | } | ||||
be->be_tid = 0; | be->be_tid = 0; | ||||
be->be_status = BST_FREE; | be->be_status = BST_FREE; | ||||
be->be_req = NULL; | be->be_req = NULL; | ||||
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); | TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); | ||||
} | } | ||||
static int | |||||
blockif_flush_bc(struct blockif_ctxt *bc) | |||||
{ | |||||
if (bc->bc_ischr) { | |||||
if (ioctl(bc->bc_fd, DIOCGFLUSH)) | |||||
return (errno); | |||||
} else if (fsync(bc->bc_fd)) | |||||
return (errno); | |||||
return (0); | |||||
} | |||||
static void | static void | ||||
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) | blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) | ||||
{ | { | ||||
struct blockif_req *br; | struct blockif_req *br; | ||||
off_t arg[2]; | off_t arg[2]; | ||||
ssize_t clen, len, off, boff, voff; | ssize_t clen, len, off, boff, voff; | ||||
int i, err; | int i, err; | ||||
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines | while (br->br_resid > 0) { | ||||
err = errno; | err = errno; | ||||
break; | break; | ||||
} | } | ||||
off += len; | off += len; | ||||
br->br_resid -= len; | br->br_resid -= len; | ||||
} | } | ||||
break; | break; | ||||
case BOP_FLUSH: | case BOP_FLUSH: | ||||
if (bc->bc_ischr) { | err = blockif_flush_bc(bc); | ||||
if (ioctl(bc->bc_fd, DIOCGFLUSH)) | |||||
err = errno; | |||||
} else if (fsync(bc->bc_fd)) | |||||
err = errno; | |||||
break; | break; | ||||
case BOP_DELETE: | case BOP_DELETE: | ||||
if (!bc->bc_candelete) | if (!bc->bc_candelete) | ||||
err = EOPNOTSUPP; | err = EOPNOTSUPP; | ||||
else if (bc->bc_rdonly) | else if (bc->bc_rdonly) | ||||
err = EROFS; | err = EROFS; | ||||
else if (bc->bc_ischr) { | else if (bc->bc_ischr) { | ||||
arg[0] = br->br_offset; | arg[0] = br->br_offset; | ||||
Show All 27 Lines | blockif_thr(void *arg) | ||||
if (bc->bc_isgeom) | if (bc->bc_isgeom) | ||||
buf = malloc(MAXPHYS); | buf = malloc(MAXPHYS); | ||||
else | else | ||||
buf = NULL; | buf = NULL; | ||||
t = pthread_self(); | t = pthread_self(); | ||||
pthread_mutex_lock(&bc->bc_mtx); | pthread_mutex_lock(&bc->bc_mtx); | ||||
for (;;) { | for (;;) { | ||||
while (blockif_dequeue(bc, t, &be)) { | bc->bc_work_count++; | ||||
/* We cannot process work if the interface is paused */ | |||||
while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) { | |||||
pthread_mutex_unlock(&bc->bc_mtx); | pthread_mutex_unlock(&bc->bc_mtx); | ||||
blockif_proc(bc, be, buf); | blockif_proc(bc, be, buf); | ||||
pthread_mutex_lock(&bc->bc_mtx); | pthread_mutex_lock(&bc->bc_mtx); | ||||
blockif_complete(bc, be); | blockif_complete(bc, be); | ||||
} | } | ||||
bc->bc_work_count--; | |||||
/* If none of the workers are busy, notify the main thread */ | |||||
if (bc->bc_work_count == 0) | |||||
pthread_cond_broadcast(&bc->bc_work_done_cond); | |||||
/* Check ctxt status here to see if exit requested */ | /* Check ctxt status here to see if exit requested */ | ||||
if (bc->bc_closing) | if (bc->bc_closing) | ||||
break; | break; | ||||
/* Make all worker threads wait here if the device is paused */ | |||||
while (bc->bc_paused) | |||||
pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx); | |||||
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); | pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); | ||||
} | } | ||||
pthread_mutex_unlock(&bc->bc_mtx); | pthread_mutex_unlock(&bc->bc_mtx); | ||||
if (buf) | if (buf) | ||||
free(buf); | free(buf); | ||||
pthread_exit(NULL); | pthread_exit(NULL); | ||||
return (NULL); | return (NULL); | ||||
▲ Show 20 Lines • Show All 187 Lines • ▼ Show 20 Lines | #endif | ||||
bc->bc_candelete = candelete; | bc->bc_candelete = candelete; | ||||
bc->bc_rdonly = ro; | bc->bc_rdonly = ro; | ||||
bc->bc_size = size; | bc->bc_size = size; | ||||
bc->bc_sectsz = sectsz; | bc->bc_sectsz = sectsz; | ||||
bc->bc_psectsz = psectsz; | bc->bc_psectsz = psectsz; | ||||
bc->bc_psectoff = psectoff; | bc->bc_psectoff = psectoff; | ||||
pthread_mutex_init(&bc->bc_mtx, NULL); | pthread_mutex_init(&bc->bc_mtx, NULL); | ||||
pthread_cond_init(&bc->bc_cond, NULL); | pthread_cond_init(&bc->bc_cond, NULL); | ||||
bc->bc_paused = 0; | |||||
bc->bc_work_count = 0; | |||||
pthread_cond_init(&bc->bc_paused_cond, NULL); | |||||
pthread_cond_init(&bc->bc_work_done_cond, NULL); | |||||
TAILQ_INIT(&bc->bc_freeq); | TAILQ_INIT(&bc->bc_freeq); | ||||
TAILQ_INIT(&bc->bc_pendq); | TAILQ_INIT(&bc->bc_pendq); | ||||
TAILQ_INIT(&bc->bc_busyq); | TAILQ_INIT(&bc->bc_busyq); | ||||
for (i = 0; i < BLOCKIF_MAXREQ; i++) { | for (i = 0; i < BLOCKIF_MAXREQ; i++) { | ||||
bc->bc_reqs[i].be_status = BST_FREE; | bc->bc_reqs[i].be_status = BST_FREE; | ||||
TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); | TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | |||||
int | int | ||||
blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) | blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) | ||||
{ | { | ||||
struct blockif_elem *be; | struct blockif_elem *be; | ||||
assert(bc->bc_magic == BLOCKIF_SIG); | assert(bc->bc_magic == BLOCKIF_SIG); | ||||
pthread_mutex_lock(&bc->bc_mtx); | pthread_mutex_lock(&bc->bc_mtx); | ||||
/* XXX: not waiting while paused */ | |||||
/* | /* | ||||
* Check pending requests. | * Check pending requests. | ||||
*/ | */ | ||||
TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { | TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { | ||||
if (be->be_req == breq) | if (be->be_req == breq) | ||||
break; | break; | ||||
} | } | ||||
if (be != NULL) { | if (be != NULL) { | ||||
▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines | |||||
int | int | ||||
blockif_candelete(struct blockif_ctxt *bc) | blockif_candelete(struct blockif_ctxt *bc) | ||||
{ | { | ||||
assert(bc->bc_magic == BLOCKIF_SIG); | assert(bc->bc_magic == BLOCKIF_SIG); | ||||
return (bc->bc_candelete); | return (bc->bc_candelete); | ||||
} | } | ||||
#ifdef BHYVE_SNAPSHOT | |||||
void | |||||
blockif_pause(struct blockif_ctxt *bc) | |||||
{ | |||||
assert(bc != NULL); | |||||
assert(bc->bc_magic == BLOCKIF_SIG); | |||||
pthread_mutex_lock(&bc->bc_mtx); | |||||
bc->bc_paused = 1; | |||||
/* The interface is paused. Wait for workers to finish their work */ | |||||
while (bc->bc_work_count) | |||||
pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx); | |||||
pthread_mutex_unlock(&bc->bc_mtx); | |||||
if (blockif_flush_bc(bc)) | |||||
fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n", | |||||
__func__); | |||||
} | |||||
void | |||||
blockif_resume(struct blockif_ctxt *bc) | |||||
{ | |||||
assert(bc != NULL); | |||||
assert(bc->bc_magic == BLOCKIF_SIG); | |||||
pthread_mutex_lock(&bc->bc_mtx); | |||||
bc->bc_paused = 0; | |||||
/* resume the threads waiting for paused */ | |||||
pthread_cond_broadcast(&bc->bc_paused_cond); | |||||
/* kick the threads after restore */ | |||||
pthread_cond_broadcast(&bc->bc_cond); | |||||
pthread_mutex_unlock(&bc->bc_mtx); | |||||
} | |||||
int | |||||
blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta) | |||||
{ | |||||
int i; | |||||
struct iovec *iov; | |||||
int ret; | |||||
SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done); | |||||
/* | |||||
* XXX: The callback and parameter must be filled by the virtualized | |||||
* device that uses the interface, during its init; we're not touching | |||||
* them here. | |||||
*/ | |||||
/* Snapshot the iovecs. */ | |||||
for (i = 0; i < br->br_iovcnt; i++) { | |||||
iov = &br->br_iov[i]; | |||||
SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done); | |||||
/* We assume the iov is a guest-mapped address. */ | |||||
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len, | |||||
false, meta, ret, done); | |||||
} | |||||
done: | |||||
return (ret); | |||||
} | |||||
int | |||||
blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta) | |||||
{ | |||||
int ret; | |||||
if (bc->bc_paused == 0) { | |||||
fprintf(stderr, "%s: Snapshot failed: " | |||||
"interface not paused.\r\n", __func__); | |||||
return (ENXIO); | |||||
} | |||||
pthread_mutex_lock(&bc->bc_mtx); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done); | |||||
pmooney_pfmooney.com: Is there reason to believe that it's not necessary to allow the restoring host to query these… | |||||
Done Inline ActionsIf the information can be easily queried, the mechanism can be changed. However, the restore mechanism will likely become more complex, and relying on other saved data, such as guest memory, may not improve security as it is saved in a similar fashion (copied in/from a file on disk). darius.mihaim_gmail.com: If the information can be easily queried, the mechanism can be changed. However, the restore… | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done); | |||||
SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done); | |||||
done: | |||||
pthread_mutex_unlock(&bc->bc_mtx); | |||||
return (ret); | |||||
} | |||||
#endif |
Is there reason to believe that it's not necessary to allow the restoring host to query these parameters itself when bringing up the blockdev there? It could be that aspects of the device (particularly with respect to its capabilities) may change.