diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3561,6 +3561,7 @@ fs/fuse/fuse_node.c optional fusefs fs/fuse/fuse_vfsops.c optional fusefs fs/fuse/fuse_vnops.c optional fusefs +fs/fuse/virtiofs_vfsops.c optional virtiofs fs/mntfs/mntfs_vnops.c standard fs/msdosfs/msdosfs_conv.c optional msdosfs fs/msdosfs/msdosfs_denode.c optional msdosfs diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -982,7 +982,8 @@ if ((err = tick->tk_aw_ohead.error)) { goto out; } - if ((err = fticket_pull(tick, uio))) { + + if (!fsess_get_virtiofs(data) && (err = fticket_pull(tick, uio))) { goto out; } fiio = fticket_resp(tick)->base; @@ -1001,7 +1002,8 @@ } if (fuse_libabi_geq(data, 7, 5)) { - if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) || + if (fsess_get_virtiofs(data) || + fticket_resp(tick)->len == sizeof(struct fuse_init_out) || fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) { data->max_write = fiio->max_write; if (fiio->flags & FUSE_ASYNC_READ) diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h --- a/sys/fs/fuse/fuse_ipc.h +++ b/sys/fs/fuse/fuse_ipc.h @@ -65,6 +65,7 @@ #include #include +#include enum fuse_data_cache_mode { FUSE_CACHE_UC, @@ -83,6 +84,7 @@ void fiov_teardown(struct fuse_iov *fiov); void fiov_refresh(struct fuse_iov *fiov); void fiov_adjust(struct fuse_iov *fiov, size_t size); +int fiov_adjust_nowait(struct fuse_iov *fiov, size_t size); #define FUSE_DIMALLOC(fiov, spc1, spc2, amnt) do { \ fiov_adjust(fiov, (sizeof(*(spc1)) + (amnt))); \ @@ -130,6 +132,8 @@ struct mtx tk_aw_mtx; fuse_handler_t *tk_aw_handler; TAILQ_ENTRY(fuse_ticket) tk_aw_link; + + struct task tk_vtfs_tk; }; #define FT_ANSW 0x01 /* request of ticket has already been answered */ @@ -168,6 +172,8 @@ } int fticket_pull(struct fuse_ticket *ftick, struct uio *uio); +size_t fticket_out_size(struct fuse_ticket *ftick); +int fuse_body_audit(struct fuse_ticket *ftick, size_t blen); /* * The data representing a FUSE session. @@ -219,6 +225,13 @@ uint64_t isimpl; uint64_t mnt_flag; enum fuse_data_cache_mode cache_mode; + + /* Fields necessary for virtiofs. */ + struct vtfs_softc *vtfs; + struct taskqueue *vtfs_tq; + void (*vtfs_flush_cb)(void *, int); + void (*virtiofs_unmount_cb)(void *); + }; #define FSESS_DEAD 0x0001 /* session is to be closed */ @@ -240,6 +253,7 @@ #define FSESS_WARN_WB_CACHE_INCOHERENT 0x400000 /* WB cache incoherent */ #define FSESS_WARN_ILLEGAL_INODE 0x800000 /* Illegal inode for new file */ #define FSESS_WARN_READLINK_EMBEDDED_NUL 0x1000000 /* corrupt READLINK output */ +#define FSESS_VIRTIOFS 0x2000000 /* session backed by virtio device */ #define FSESS_MNTOPTS_MASK ( \ FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \ FSESS_DEFAULT_PERMISSIONS | FSESS_INTR) @@ -414,6 +428,12 @@ return (data->dataflags & FSESS_DEAD); } +static inline bool +fsess_get_virtiofs(struct fuse_data *data) +{ + return (data->dataflags & FSESS_VIRTIOFS); +} + struct fuse_dispatcher { struct fuse_ticket *tick; struct fuse_in_header *finh; diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c --- a/sys/fs/fuse/fuse_ipc.c +++ b/sys/fs/fuse/fuse_ipc.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,8 @@ #include "fuse_ipc.h" #include "fuse_internal.h" +#include + SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: @@ -107,8 +110,6 @@ fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio); -static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen); - static fuse_handler_t fuse_standard_handler; static counter_u64_t fuse_ticket_count; @@ -276,17 +277,19 @@ free(fiov->base, M_FUSEMSG); } -void -fiov_adjust(struct fuse_iov *fiov, size_t size) +static int +fiov_adjust_internal(struct fuse_iov *fiov, size_t size, int flag) { + KASSERT(flag == M_NOWAIT || flag == M_WAITOK, ("invalid flag %x", flag)); + if (fiov->allocated_size < size || (fuse_iov_permanent_bufsize >= 0 && fiov->allocated_size - size > fuse_iov_permanent_bufsize && --fiov->credit < 0)) { fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG, - M_WAITOK | M_ZERO); + flag | M_ZERO); if (!fiov->base) { - panic("FUSE: realloc failed"); + return (ENOMEM); } fiov->allocated_size = FU_AT_LEAST(size); fiov->credit = fuse_iov_credit; @@ -297,6 +300,26 @@ bzero((char*)fiov->base + fiov->len, size - fiov->len); } fiov->len = size; + + return(0); +} + +int +fiov_adjust_nowait(struct fuse_iov *fiov, size_t size) +{ + fiov_adjust_internal(fiov, size, M_NOWAIT); + if (!fiov->base) + return (ENOMEM); + + return (0); +} + +void +fiov_adjust(struct fuse_iov *fiov, size_t size) +{ + fiov_adjust_internal(fiov, size, M_WAITOK); + if (!fiov->base) + panic("FUSE: realloc failed"); } /* Resize the fiov if needed, and clear it's buffer */ @@ -646,6 +669,25 @@ fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx); } + +/* Notify users of the FUSE device of the new ticket. */ +static void +fuse_notify_fuse_device(struct fuse_data *data) +{ + wakeup_one(data); + selwakeuppri(&data->ks_rsel, PZERO + 1); + KNOTE_LOCKED(&data->ks_rsel.si_note, 0); +} + +static void +fuse_notify_virtiofs(struct fuse_ticket *ftick) +{ + struct fuse_data *data = ftick->tk_data; + + TASK_INIT(&ftick->tk_vtfs_tk, 0, data->vtfs_flush_cb, data); + taskqueue_enqueue(data->vtfs_tq, &ftick->tk_vtfs_tk); +} + /* * Insert a new upgoing ticket into the message queue * @@ -655,26 +697,165 @@ void fuse_insert_message(struct fuse_ticket *ftick, bool urgent) { + struct fuse_data *data = ftick->tk_data; + if (ftick->tk_flag & FT_DIRTY) { panic("FUSE: ticket reused without being refreshed"); } ftick->tk_flag |= FT_DIRTY; - if (fdata_get_dead(ftick->tk_data)) { + if (fdata_get_dead(data)) { return; } - fuse_lck_mtx_lock(ftick->tk_data->ms_mtx); + + fuse_lck_mtx_lock(data->ms_mtx); + if (urgent) fuse_ms_push_head(ftick); else fuse_ms_push(ftick); - wakeup_one(ftick->tk_data); - selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1); - KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0); - fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx); + + /* Choose between the virtiofs and FUSE paths. */ + if (fsess_get_virtiofs(data)) + fuse_notify_virtiofs(ftick); + else + fuse_notify_fuse_device(data); + + fuse_lck_mtx_unlock(data->ms_mtx); } -static int +/* Special case for read tickets. Reply size depends on the specified length. */ +static size_t +fticket_out_size_read(struct fuse_ticket *ftick) +{ + struct fuse_read_in *read_header; + uintptr_t in_header; + + in_header = (uintptr_t) ftick->tk_ms_fiov.base; + read_header = (struct fuse_read_in *)(in_header + sizeof(struct fuse_in_header)); + return (read_header->size); +} + +size_t +fticket_out_size(struct fuse_ticket *ftick) +{ + enum fuse_opcode opcode; + + opcode = fticket_opcode(ftick); + + switch (opcode) { + case FUSE_BMAP: + return (sizeof(struct fuse_bmap_out)); + + case FUSE_LINK: + case FUSE_LOOKUP: + case FUSE_MKDIR: + case FUSE_MKNOD: + case FUSE_SYMLINK: + if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { + return (sizeof(struct fuse_entry_out)); + } else { + return (FUSE_COMPAT_ENTRY_OUT_SIZE); + } + + case FUSE_FORGET: + return (0); + + case FUSE_GETATTR: + case FUSE_SETATTR: + if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { + return (sizeof(struct fuse_attr_out)); + } else { + return (FUSE_COMPAT_ATTR_OUT_SIZE); + } + + case FUSE_READLINK: + /* We are expecting to read back a POSIX path. */ + return (PATH_MAX); + + case FUSE_UNLINK: + case FUSE_RMDIR: + case FUSE_RENAME: + return (0); + + case FUSE_OPEN: + return (sizeof(struct fuse_open_out)); + + case FUSE_READ: + return (fticket_out_size_read(ftick)); + + case FUSE_WRITE: + return (sizeof(struct fuse_write_out)); + + case FUSE_STATFS: + if (fuse_libabi_geq(ftick->tk_data, 7, 4)) { + return (sizeof(struct fuse_statfs_out)); + } else { + return (FUSE_COMPAT_STATFS_SIZE); + } + + case FUSE_RELEASE: + case FUSE_FSYNC: + case FUSE_SETXATTR: + return (0); + + case FUSE_GETXATTR: + return (sizeof(struct fuse_getxattr_out)); + + case FUSE_LISTXATTR: + return (sizeof(struct fuse_listxattr_out)); + + case FUSE_REMOVEXATTR: + case FUSE_FLUSH: + return (0); + + case FUSE_INIT: + return (sizeof(struct fuse_init_out)); + + case FUSE_OPENDIR: + return (sizeof(struct fuse_open_out)); + + case FUSE_READDIR: + return (fticket_out_size_read(ftick)); + + case FUSE_RELEASEDIR: + case FUSE_FSYNCDIR: + return (0); + + case FUSE_GETLK: + return (sizeof(struct fuse_lk_out)); + + case FUSE_SETLK: + case FUSE_SETLKW: + case FUSE_ACCESS: + return (0); + + case FUSE_CREATE: + if (fuse_libabi_geq(ftick->tk_data, 7, 9)) { + return (sizeof(struct fuse_entry_out) + + sizeof(struct fuse_open_out)); + } else { + return (FUSE_COMPAT_ENTRY_OUT_SIZE + + sizeof(struct fuse_open_out)); + } + + case FUSE_INTERRUPT: + case FUSE_DESTROY: + case FUSE_FALLOCATE: + return (0); + + case FUSE_LSEEK: + return (sizeof(struct fuse_lseek_out)); + + case FUSE_COPY_FILE_RANGE: + return (sizeof(struct fuse_write_out)); + + default: + panic("FUSE: opcodes out of sync (%d)\n", opcode); + } +} + +int fuse_body_audit(struct fuse_ticket *ftick, size_t blen) { int err = 0; @@ -887,7 +1068,9 @@ { int err = 0; - err = fticket_pull(ftick, uio); + /* Data already pulled for virtiofs. */ + if (uio != NULL) + err = fticket_pull(ftick, uio); fuse_lck_mtx_lock(ftick->tk_aw_mtx); diff --git a/sys/fs/fuse/fuse_vfsops.h b/sys/fs/fuse/fuse_vfsops.h new file mode 100644 --- /dev/null +++ b/sys/fs/fuse/fuse_vfsops.h @@ -0,0 +1,13 @@ +#ifndef _FUSE_VFSOPS_H_ +#define _FUSE_VFSOPS_H_ + +vfs_fhtovp_t fuse_vfsop_fhtovp; +vfs_mount_t fuse_vfsop_mount; +vfs_unmount_t fuse_vfsop_unmount; +vfs_root_t fuse_vfsop_root; +vfs_statfs_t fuse_vfsop_statfs; +vfs_vget_t fuse_vfsop_vget; + +void virtiofs_teardown(void *arg); + +#endif /* _FUSE_VFSOPS_H_ */ diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c --- a/sys/fs/fuse/fuse_vfsops.c +++ b/sys/fs/fuse/fuse_vfsops.c @@ -81,15 +81,19 @@ #include #include #include +#include #include "fuse.h" #include "fuse_node.h" #include "fuse_ipc.h" #include "fuse_internal.h" +#include "fuse_vfsops.h" #include #include +#include + SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: @@ -109,13 +113,6 @@ #define PRIV_VFS_FUSE_SYNC_UNMOUNT PRIV_VFS_MOUNT_NONUSER #endif -static vfs_fhtovp_t fuse_vfsop_fhtovp; -static vfs_mount_t fuse_vfsop_mount; -static vfs_unmount_t fuse_vfsop_unmount; -static vfs_root_t fuse_vfsop_root; -static vfs_statfs_t fuse_vfsop_statfs; -static vfs_vget_t fuse_vfsop_vget; - struct vfsops fuse_vfsops = { .vfs_fhtovp = fuse_vfsop_fhtovp, .vfs_mount = fuse_vfsop_mount, @@ -262,7 +259,7 @@ return err; } -static int +int fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { @@ -290,7 +287,7 @@ return (0); } -static int +int fuse_vfsop_mount(struct mount *mp) { int err; @@ -466,7 +463,7 @@ return err; } -static int +int fuse_vfsop_unmount(struct mount *mp, int mntflags) { int err = 0; @@ -509,7 +506,11 @@ fdisp_destroy(&fdi); } - fdata_set_dead(data); + + if (fsess_get_virtiofs(data)) + data->virtiofs_unmount_cb((void *)data); + else + fdata_set_dead(data); alreadydead: FUSE_LOCK(); @@ -522,14 +523,15 @@ mp->mnt_data = NULL; MNT_IUNLOCK(mp); - dev_rel(fdev); + if (fdev != NULL) + dev_rel(fdev); return 0; } SDT_PROBE_DEFINE1(fusefs, , vfsops, invalidate_without_export, "struct mount*"); -static int +int fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { struct fuse_data *data = fuse_get_mpdata(mp); @@ -595,7 +597,7 @@ return error; } -static int +int fuse_vfsop_root(struct mount *mp, int lkflags, struct vnode **vpp) { struct fuse_data *data = fuse_get_mpdata(mp); @@ -631,7 +633,7 @@ return err; } -static int +int fuse_vfsop_statfs(struct mount *mp, struct statfs *sbp) { struct fuse_dispatcher fdi; diff --git a/sys/fs/fuse/virtiofs_vfsops.c b/sys/fs/fuse/virtiofs_vfsops.c new file mode 100644 --- /dev/null +++ b/sys/fs/fuse/virtiofs_vfsops.c @@ -0,0 +1,514 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024, Emil Tsalapatis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fuse.h" +#include "fuse_kernel.h" +#include "fuse_internal.h" +#include "fuse_ipc.h" +#include "fuse_vfsops.h" + +#include + +#include +#include + +#define VIRTIOFS_THREADS_TQ (8) + +static vfs_mount_t virtiofs_vfsop_mount; + +/* Only mount/unmount is different compared to fuse. */ +static struct vfsops virtiofs_vfsops = { + .vfs_fhtovp = fuse_vfsop_fhtovp, + .vfs_mount = virtiofs_vfsop_mount, + .vfs_unmount = fuse_vfsop_unmount, + .vfs_root = fuse_vfsop_root, + .vfs_statfs = fuse_vfsop_statfs, + .vfs_vget = fuse_vfsop_vget, +}; + +static struct vfsconf virtiofs_vfsconf = { + .vfc_version = VFS_VERSION, + .vfc_name = "virtiofs", + .vfc_vfsops = &virtiofs_vfsops, + .vfc_typenum = -1, + .vfc_flags = VFCF_JAIL | VFCF_SYNTHETIC +}; + +static int +virtiofs_loader(struct module *m, int what, void *arg) +{ + int error = 0; + + switch (what) { + case MOD_LOAD: + error = vfs_modevent(NULL, what, &virtiofs_vfsconf); + break; + case MOD_UNLOAD: + error = vfs_modevent(NULL, what, &virtiofs_vfsconf); + break; + default: + return (EINVAL); + } + + return (error); +} + +/* Registering the module */ + +static moduledata_t virtiofs_moddata = { + "virtiofs", + virtiofs_loader, + &virtiofs_vfsconf +}; + +DECLARE_MODULE(virtiofs, virtiofs_moddata, SI_SUB_VFS, SI_ORDER_MIDDLE); +MODULE_DEPEND(virtiofs, fusefs, 1, 1, 1); +MODULE_DEPEND(virtiofs, vtfs, 1, 1, 1); +MODULE_VERSION(virtiofs, 1); + +/* Push the ticket to the virtiofs device. */ +static int +virtiofs_enqueue(struct fuse_ticket *ftick) +{ + struct fuse_out_header *ohead = &ftick->tk_aw_ohead; + struct fuse_data *data = ftick->tk_data; + struct fuse_iov *riov, *wiov; + struct sglist *sg = NULL; + int readable, writable; + bool urgent; + int error; + + urgent = (fticket_opcode(ftick) == FUSE_FORGET); + + riov = &ftick->tk_ms_fiov; + wiov = &ftick->tk_aw_fiov; + + refcount_acquire(&ftick->tk_refcount); + + /* Preallocate the response buffer. */ + error = fiov_adjust_nowait(wiov, fticket_out_size(ftick)); + if (error != 0) + goto out; + + /* Readable/writable from the host's point of view. */ + readable = sglist_count(riov->base, riov->len); + + /* Account for the out header. */ + writable = sglist_count(ohead, sizeof(*ohead)) + + sglist_count(wiov->base, wiov->len); + + sg = sglist_alloc(readable + writable, M_NOWAIT); + if (sg == NULL) { + error = ENOMEM; + goto out; + } + + error = sglist_append(sg, riov->base, riov->len); + if (error != 0) + goto out; + + error = sglist_append(sg, ohead, sizeof(*ohead)); + if (error != 0) + goto out; + + error = sglist_append(sg, wiov->base, wiov->len); + if (error != 0) + goto out; + + error = vtfs_enqueue(data->vtfs, ftick, sg, readable, writable, urgent); + + /* + * The enqueue call destroys the scatter-gather array both on success and + * on failure, so no need to clean it up. + */ + + return (error); + +out: + fuse_ticket_drop(ftick); + if (sg != NULL) + sglist_free(sg); + + return (error); +} + +static void +virtiofs_flush(void *xdata, int __unused pending) +{ + struct fuse_ticket *ftick; + struct fuse_data *data = xdata; + int error; + + fuse_lck_mtx_lock(data->ms_mtx); + + while (!STAILQ_EMPTY(&data->ms_head)) { + ftick = STAILQ_FIRST(&data->ms_head); + + STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link); + data->ms_count--; + + KASSERT(ftick != STAILQ_FIRST(&data->ms_head), ("ticket still in the queue")); + +#ifdef INVARIANTS + MPASS(data->ms_count >= 0); + ftick->tk_ms_link.stqe_next = NULL; +#endif + + FUSE_ASSERT_MS_DONE(ftick); + fuse_ticket_drop(ftick); + + /* + * The enqueue operation is synchronous and may sleep, + * so drop the session lock - we have already adjusted + * all session fields so we don't need it while flushing + * to the virtio device anyway. + */ + fuse_lck_mtx_unlock(data->ms_mtx); + error = virtiofs_enqueue(ftick); + fuse_lck_mtx_lock(data->ms_mtx); + if (error != 0) + break; + } + + fuse_lck_mtx_unlock(data->ms_mtx); + + if (error != 0) + printf("Warning: %s failed with %d\n", __func__, error); + + return; +} + +static void +virtiofs_cb_forget_ticket(void *xtick, uint32_t len __unused) +{ +} + +static void +virtiofs_drop_intr_tick(struct fuse_data *data, struct fuse_ticket *ftick) +{ + struct fuse_ticket *itick, *x_tick; + + TAILQ_FOREACH_SAFE(itick, &data->aw_head, tk_aw_link, x_tick) { + if (itick->tk_unique == ftick->irq_unique) { + fuse_aw_remove(itick); + fuse_ticket_drop(itick); + break; + } + } + + ftick->irq_unique = 0; +} + +static int +virtiofs_handle_async_tick(struct fuse_data *data, struct fuse_ticket *ftick, int oerror) +{ + struct mount *mp = data->mp; + struct iovec aiov; + struct uio uio; + int err = 0; + + /* + * Form a uio and pass it to the message handlers, because unlike other + * messages they do not use ftick->tk_aw_fiov to store the message body. + */ + aiov.iov_base = fticket_resp(ftick)->base; + aiov.iov_len = fticket_resp(ftick)->len; + + uio.uio_iov = (struct iovec *)&aiov; + uio.uio_iovcnt = 1; + uio.uio_resid = aiov.iov_len; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_WRITE; + uio.uio_td = curthread; + uio.uio_offset = 0; + + /* Only handle the two async messages that the FUSE device does. */ + switch (oerror) { + case FUSE_NOTIFY_INVAL_ENTRY: + err = fuse_internal_invalidate_entry(mp, &uio); + break; + case FUSE_NOTIFY_INVAL_INODE: + err = fuse_internal_invalidate_inode(mp, &uio); + break; + default: + err = ENOSYS; + } + + if (err != 0) { + printf("WARNING: error %d when handling async message of type %d\n", + err, fticket_opcode(ftick)); + } + + return (err); +} + +static bool +virtiofs_remove_ticket(struct fuse_data *data, struct fuse_ticket *ftick) +{ + struct fuse_ticket *tick, *x_tick; + + mtx_assert(&data->aw_mtx, MA_OWNED); + + TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link, x_tick) { + if (tick->tk_unique != ftick->tk_aw_ohead.unique) + continue; + + MPASS(tick == ftick); + fuse_aw_remove(ftick); + + return (true); + } + + return (false); +} + +static void +virtiofs_cb_complete_ticket(void *xtick, uint32_t len) +{ + struct fuse_ticket *ftick = xtick; + struct fuse_data *data = ftick->tk_data; + struct fuse_out_header *ohead = &ftick->tk_aw_ohead; + bool found; + int err; + + /* Validate the length field of the out header. */ + if (len != ohead->len) { + err = EINVAL; + goto done; + } + + /* Error responses to tickets do not have a body. */ + if (len > sizeof(*ohead) && ohead->unique != 0 && ohead->error) { + err = EINVAL; + goto done; + } + + /* Ensure that out headers that return an error are valid. */ + if (data->linux_errnos != 0 && ohead->error != 0) { + err = -ohead->error; + if (err < 0 || err >= nitems(linux_to_bsd_errtbl)) + goto done; + + /* '-', because it will get flipped again below */ + ohead->error = -linux_to_bsd_errtbl[err]; + } + + /* Remove the ticket from the answer queue. */ + fuse_lck_mtx_lock(data->aw_mtx); + + found = virtiofs_remove_ticket(data, ftick); + + /* + * We should not be able to find a non-unique ticket, and + * all unique tickets should still be in the queue. + */ + KASSERT(found == (ohead->unique != 0), + ("inconsistency in answer queue:" + "found %d unique %lu", found, ohead->unique)); + + /* Drop any pending interrupts for the completed ticket. */ + if (found && ftick->irq_unique > 0) + virtiofs_drop_intr_tick(data, ftick); + + fuse_lck_mtx_unlock(data->aw_mtx); + + if (found) { + if (ftick->tk_aw_handler) { + /* Sanitize the linuxism of negative errnos */ + ohead->error *= -1; + + /* Illegal error code, treat it as EIO. */ + if (ohead->error < 0 || ohead->error > ELAST) { + ohead->error = EIO; + ftick->tk_aw_handler(ftick, NULL); + err = EINVAL; + } else { + err = ftick->tk_aw_handler(ftick, NULL); + } + } + + fuse_ticket_drop(ftick); + } else if (ohead->unique == 0) { + err = virtiofs_handle_async_tick(data, ftick, ohead->error); + } + + /* If the operation was successful, ensure the size is valid. */ + if (ohead->error == 0 && ohead->unique != 0) + err = fuse_body_audit(ftick, len - sizeof(*ohead)); + +done: + /* + * If something goes wrong, err on the side of caution and kill the session + * because the FUSE server in the host is misbehaving. + */ + if (err != 0) + fdata_set_dead(data); + + return; + +} + +static int +virtiofs_vfsop_mount(struct mount *mp) +{ + /* Turn interrupts on by default, existing virtiofsd servers use them anyway. */ + const uint64_t mntopts = FSESS_VIRTIOFS; + struct thread *td = curthread; + struct vfsoptlist *opts; + struct fuse_data *data; + vtfs_instance vtfs; + uint32_t max_read; + char *tag; + int error; + + opts = mp->mnt_optnew; + if (opts == NULL) + return (EINVAL); + + /* `fspath' contains the mount point (eg. /mnt/guestfs); REQUIRED */ + if (!vfs_getopts(opts, "fspath", &error)) + return (error); + + max_read = maxbcachebuf; + (void)vfs_scanopt(opts, "max_read=", "%u", &max_read); + + + /* XXX Remounts not handled for now, but should be easy to code in. */ + if (mp->mnt_flag & MNT_UPDATE) + return (EOPNOTSUPP); + + /* `from' contains the virtio tag; REQUIRED */ + tag = vfs_getopts(opts, "tag", &error); + if (!tag) + return (error); + + error = vtfs_find(tag, &vtfs); + if (error != 0) + return (error); + + data = fdata_alloc(NULL, td->td_ucred); + + vtfs_register_cb(vtfs, virtiofs_cb_forget_ticket, virtiofs_cb_complete_ticket, + virtiofs_teardown, data); + + FUSE_LOCK(); + KASSERT(!fdata_get_dead(data), ("allocated dead session")); + + data->vtfs_tq = taskqueue_create("virtiofstq", M_NOWAIT, taskqueue_thread_enqueue, + &data->vtfs_tq); + if (data->vtfs_tq == NULL) + panic("ENOMEM when initializing taskqueue"); + + data->vtfs = vtfs; + data->vtfs_flush_cb = virtiofs_flush; + data->virtiofs_unmount_cb = virtiofs_teardown; + data->mp = mp; + /* + * XXX We currently do not support any mount options. This is due because it is + * hard to test for it, even though most FUSE options should be trivially easy + * to add. Deliberately defer enabling them until we can reuse the FUSE test + * suite for virtiofs. + */ + data->dataflags |= mntopts; + data->max_read = max_read; + data->daemon_timeout = FUSE_MIN_DAEMON_TIMEOUT; + data->linux_errnos = 1; + data->mnt_flag = mp->mnt_flag & MNT_UPDATEMASK; + FUSE_UNLOCK(); + + KASSERT(!fdata_get_dead(data), ("newly created fuse session is dead")); + + vfs_getnewfsid(mp); + MNT_ILOCK(mp); + mp->mnt_data = data; + mp->mnt_flag &= ~MNT_LOCAL; + mp->mnt_kern_flag |= MNTK_USES_BCACHE; + /* + * The FS is remote by default. Disable nullfs caching to avoid + * the extra coherence cost, same as FUSE. + */ + mp->mnt_kern_flag |= MNTK_NULL_NOCACHE; + MNT_IUNLOCK(mp); + + mp->mnt_stat.f_iosize = maxbcachebuf; + strlcat(mp->mnt_stat.f_fstypename, ".virtiofs", MFSNAMELEN); + memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); + strlcpy(mp->mnt_stat.f_mntfromname, tag, MNAMELEN); + mp->mnt_iosize_max = maxphys; + + error = taskqueue_start_threads(&data->vtfs_tq, VIRTIOFS_THREADS_TQ, PVFS, "virtiofs_tq"); + if (error != 0) + panic("error when initializing taskqueue threads"); + + /* Now handshaking with daemon */ + fuse_internal_send_init(data, td); + + return (0); +} + +void +virtiofs_teardown(void *xdata) +{ + struct fuse_data *data = (struct fuse_data *)xdata; + vtfs_instance vtfs = data->vtfs; + + /* Mark the session as dead to prevent new requests. */ + fdata_set_dead(data); + + /* + * Flush out all pending requests into the virtio + * device. After this, there are no host-bound + * requests in flight. + */ + taskqueue_drain_all(data->vtfs_tq); + taskqueue_free(data->vtfs_tq); + + /* + * Turn off the device and handle all received + * requests. After this there are no guest-bound + * requests in flight, completing virtiofs teardown. + */ + vtfs_drain(vtfs); + + vtfs_unregister_cb(vtfs); + vtfs_release(vtfs); +} + diff --git a/sys/modules/Makefile b/sys/modules/Makefile --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -404,6 +404,7 @@ ${_vesa} \ ${_vf_i2c} \ virtio \ + virtiofs \ vge \ ${_viawd} \ videomode \ diff --git a/sys/modules/virtiofs/Makefile b/sys/modules/virtiofs/Makefile new file mode 100644 --- /dev/null +++ b/sys/modules/virtiofs/Makefile @@ -0,0 +1,6 @@ +.PATH: ${SRCTOP}/sys/fs/fuse + +KMOD= virtiofs +SRCS= virtiofs_vfsops.c + +.include