diff --git a/sys/fs/tarfs/tarfs_io.c b/sys/fs/tarfs/tarfs_io.c index c185de8beef1..8837681ac5f0 100644 --- a/sys/fs/tarfs/tarfs_io.c +++ b/sys/fs/tarfs/tarfs_io.c @@ -1,743 +1,743 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Juniper Networks, Inc. * Copyright (c) 2022-2023 Klara, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_tarfs.h" #include "opt_zstdio.h" #include #include #include #include #include #include #include #include #include #include #if defined(ZSTDIO) #define TARFS_ZIO 1 #else #undef TARFS_ZIO #endif #ifdef ZSTDIO #define ZSTD_STATIC_LINKING_ONLY #include #endif #include #include #ifdef TARFS_DEBUG SYSCTL_NODE(_vfs_tarfs, OID_AUTO, zio, CTLFLAG_RD, 0, "Tar filesystem decompression layer"); COUNTER_U64_DEFINE_EARLY(tarfs_zio_inflated); SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, inflated, CTLFLAG_RD, &tarfs_zio_inflated, "Amount of compressed data inflated."); COUNTER_U64_DEFINE_EARLY(tarfs_zio_consumed); SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, consumed, CTLFLAG_RD, &tarfs_zio_consumed, "Amount of compressed data consumed."); COUNTER_U64_DEFINE_EARLY(tarfs_zio_bounced); SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, bounced, CTLFLAG_RD, &tarfs_zio_bounced, "Amount of decompressed data bounced."); static int tarfs_sysctl_handle_zio_reset(SYSCTL_HANDLER_ARGS) { unsigned int tmp; int error; tmp = 0; if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0) return (error); if (req->newptr != NULL) { if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0) return (error); counter_u64_zero(tarfs_zio_inflated); counter_u64_zero(tarfs_zio_consumed); counter_u64_zero(tarfs_zio_bounced); } return (0); } SYSCTL_PROC(_vfs_tarfs_zio, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, tarfs_sysctl_handle_zio_reset, "IU", "Reset compression counters."); #endif MALLOC_DEFINE(M_TARFSZSTATE, "tarfs zstate", "tarfs decompression state"); MALLOC_DEFINE(M_TARFSZBUF, "tarfs zbuf", "tarfs decompression buffers"); #define XZ_MAGIC (uint8_t[]){ 0xfd, 0x37, 0x7a, 0x58, 0x5a } #define ZLIB_MAGIC (uint8_t[]){ 0x1f, 0x8b, 0x08 } #define ZSTD_MAGIC (uint8_t[]){ 0x28, 0xb5, 0x2f, 0xfd } #ifdef ZSTDIO struct tarfs_zstd { ZSTD_DStream *zds; }; #endif /* XXX review use of curthread / uio_td / td_cred */ /* * Reads from the tar file according to the provided uio. If the archive * is compressed and raw is false, reads the decompressed stream; * otherwise, reads directly from the original file. Returns 0 on success * and a positive errno value on failure. */ int tarfs_io_read(struct tarfs_mount *tmp, bool raw, struct uio *uiop) { void *rl = NULL; off_t off = uiop->uio_offset; size_t len = uiop->uio_resid; int error; if (raw || tmp->znode == NULL) { rl = vn_rangelock_rlock(tmp->vp, off, off + len); error = vn_lock(tmp->vp, LK_SHARED); if (error == 0) { error = VOP_READ(tmp->vp, uiop, IO_DIRECT|IO_NODELOCKED, uiop->uio_td->td_ucred); VOP_UNLOCK(tmp->vp); } vn_rangelock_unlock(tmp->vp, rl); } else { error = vn_lock(tmp->znode, LK_EXCLUSIVE); if (error == 0) { error = VOP_READ(tmp->znode, uiop, IO_DIRECT | IO_NODELOCKED, uiop->uio_td->td_ucred); VOP_UNLOCK(tmp->znode); } } TARFS_DPF(IO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__, (size_t)off, len, error, uiop->uio_resid); return (error); } /* * Reads from the tar file into the provided buffer. If the archive is * compressed and raw is false, reads the decompressed stream; otherwise, * reads directly from the original file. Returns the number of bytes * read on success, 0 on EOF, and a negative errno value on failure. */ ssize_t tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw, void *buf, off_t off, size_t len) { struct uio auio; struct iovec aiov; ssize_t res; int error; if (len == 0) { TARFS_DPF(IO, "%s(%zu, %zu) null\n", __func__, (size_t)off, len); return (0); } aiov.iov_base = buf; aiov.iov_len = len; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = off; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_resid = len; auio.uio_td = curthread; error = tarfs_io_read(tmp, raw, &auio); if (error != 0) { TARFS_DPF(IO, "%s(%zu, %zu) error %d\n", __func__, (size_t)off, len, error); return (-error); } res = len - auio.uio_resid; if (res == 0 && len != 0) { TARFS_DPF(IO, "%s(%zu, %zu) eof\n", __func__, (size_t)off, len); } else { TARFS_DPF(IO, "%s(%zu, %zu) read %zd | %*D\n", __func__, (size_t)off, len, res, (int)(res > 8 ? 8 : res), (uint8_t *)buf, " "); } return (res); } #ifdef ZSTDIO static void * tarfs_zstate_alloc(void *opaque, size_t size) { (void)opaque; return (malloc(size, M_TARFSZSTATE, M_WAITOK)); } #endif #ifdef ZSTDIO static void tarfs_zstate_free(void *opaque, void *address) { (void)opaque; free(address, M_TARFSZSTATE); } #endif #ifdef ZSTDIO static ZSTD_customMem tarfs_zstd_mem = { tarfs_zstate_alloc, tarfs_zstate_free, NULL, }; #endif #ifdef TARFS_ZIO /* * Updates the decompression frame index, recording the current input and * output offsets in a new index entry, and growing the index if * necessary. */ static void tarfs_zio_update_index(struct tarfs_zio *zio, off_t i, off_t o) { if (++zio->curidx >= zio->nidx) { if (++zio->nidx > zio->szidx) { zio->szidx *= 2; zio->idx = realloc(zio->idx, zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE, M_ZERO | M_WAITOK); TARFS_DPF(ALLOC, "%s: resized zio index\n", __func__); } zio->idx[zio->curidx].i = i; zio->idx[zio->curidx].o = o; TARFS_DPF(ZIDX, "%s: index %u = i %zu o %zu\n", __func__, zio->curidx, (size_t)zio->idx[zio->curidx].i, (size_t)zio->idx[zio->curidx].o); } MPASS(zio->idx[zio->curidx].i == i); MPASS(zio->idx[zio->curidx].o == o); } #endif /* * VOP_ACCESS for zio node. */ static int tarfs_zaccess(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct tarfs_zio *zio = vp->v_data; struct tarfs_mount *tmp = zio->tmp; accmode_t accmode = ap->a_accmode; int error = EPERM; if (accmode == VREAD) { error = vn_lock(tmp->vp, LK_SHARED); if (error == 0) { error = VOP_ACCESS(tmp->vp, accmode, ap->a_cred, ap->a_td); VOP_UNLOCK(tmp->vp); } } TARFS_DPF(ZIO, "%s(%d) = %d\n", __func__, accmode, error); return (error); } /* * VOP_GETATTR for zio node. */ static int tarfs_zgetattr(struct vop_getattr_args *ap) { struct vattr va; struct vnode *vp = ap->a_vp; struct tarfs_zio *zio = vp->v_data; struct tarfs_mount *tmp = zio->tmp; struct vattr *vap = ap->a_vap; int error = 0; VATTR_NULL(vap); error = vn_lock(tmp->vp, LK_SHARED); if (error == 0) { error = VOP_GETATTR(tmp->vp, &va, ap->a_cred); VOP_UNLOCK(tmp->vp); if (error == 0) { vap->va_type = VREG; vap->va_mode = va.va_mode; vap->va_nlink = 1; vap->va_gid = va.va_gid; vap->va_uid = va.va_uid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = TARFS_ZIOINO; vap->va_size = zio->idx[zio->nidx - 1].o; vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_atime = va.va_atime; vap->va_ctime = va.va_ctime; vap->va_mtime = va.va_mtime; vap->va_birthtime = tmp->root->birthtime; vap->va_bytes = va.va_bytes; } } TARFS_DPF(ZIO, "%s() = %d\n", __func__, error); return (error); } #ifdef ZSTDIO /* * VOP_READ for zio node, zstd edition. */ static int tarfs_zread_zstd(struct tarfs_zio *zio, struct uio *uiop) { void *ibuf = NULL, *obuf = NULL, *rl = NULL; struct uio auio; struct iovec aiov; struct tarfs_mount *tmp = zio->tmp; struct tarfs_zstd *zstd = zio->zstd; struct thread *td = curthread; ZSTD_inBuffer zib; ZSTD_outBuffer zob; off_t zsize; off_t ipos, opos; size_t ilen, olen; size_t zerror; off_t off = uiop->uio_offset; size_t len = uiop->uio_resid; size_t resid = uiop->uio_resid; size_t bsize; int error; bool reset = false; /* do we have to rewind? */ if (off < zio->opos) { while (zio->curidx > 0 && off < zio->idx[zio->curidx].o) zio->curidx--; reset = true; } /* advance to the nearest index entry */ if (off > zio->opos) { // XXX maybe do a binary search instead while (zio->curidx < zio->nidx - 1 && off >= zio->idx[zio->curidx + 1].o) { zio->curidx++; reset = true; } } /* reset the decompression stream if needed */ if (reset) { zio->ipos = zio->idx[zio->curidx].i; zio->opos = zio->idx[zio->curidx].o; ZSTD_resetDStream(zstd->zds); TARFS_DPF(ZIDX, "%s: skipping to index %u = i %zu o %zu\n", __func__, zio->curidx, (size_t)zio->ipos, (size_t)zio->opos); } else { TARFS_DPF(ZIDX, "%s: continuing at i %zu o %zu\n", __func__, (size_t)zio->ipos, (size_t)zio->opos); } /* * Set up a temporary buffer for compressed data. Use the size * recommended by the zstd library; this is usually 128 kB, but * just in case, make sure it's a multiple of the page size and no * larger than MAXBSIZE. */ bsize = roundup(ZSTD_CStreamOutSize(), PAGE_SIZE); if (bsize > MAXBSIZE) bsize = MAXBSIZE; ibuf = malloc(bsize, M_TEMP, M_WAITOK); zib.src = NULL; zib.size = 0; zib.pos = 0; /* * Set up the decompression buffer. If the target is not in * kernel space, we will have to set up a bounce buffer. * * TODO: to avoid using a bounce buffer, map destination pages * using vm_fault_quick_hold_pages(). */ MPASS(zio->opos <= off); MPASS(uiop->uio_iovcnt == 1); MPASS(uiop->uio_iov->iov_len >= len); if (uiop->uio_segflg == UIO_SYSSPACE) { zob.dst = uiop->uio_iov->iov_base; } else { TARFS_DPF(ALLOC, "%s: allocating %zu-byte bounce buffer\n", __func__, len); zob.dst = obuf = malloc(len, M_TEMP, M_WAITOK); } zob.size = len; zob.pos = 0; /* lock tarball */ rl = vn_rangelock_rlock(tmp->vp, zio->ipos, OFF_MAX); error = vn_lock(tmp->vp, LK_SHARED); if (error != 0) { goto fail_unlocked; } /* check size */ error = vn_getsize_locked(tmp->vp, &zsize, td->td_ucred); if (error != 0) { goto fail; } if (zio->ipos >= zsize) { /* beyond EOF */ goto fail; } while (resid > 0) { if (zib.pos == zib.size) { /* request data from the underlying file */ aiov.iov_base = ibuf; aiov.iov_len = bsize; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = zio->ipos; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_resid = aiov.iov_len; auio.uio_td = td; error = VOP_READ(tmp->vp, &auio, IO_DIRECT | IO_NODELOCKED, td->td_ucred); if (error != 0) goto fail; TARFS_DPF(ZIO, "%s: req %zu+%zu got %zu+%zu\n", __func__, (size_t)zio->ipos, bsize, (size_t)zio->ipos, bsize - auio.uio_resid); zib.src = ibuf; zib.size = bsize - auio.uio_resid; zib.pos = 0; } MPASS(zib.pos <= zib.size); if (zib.pos == zib.size) { TARFS_DPF(ZIO, "%s: end of file after i %zu o %zu\n", __func__, (size_t)zio->ipos, (size_t)zio->opos); goto fail; } if (zio->opos < off) { /* to be discarded */ zob.size = min(off - zio->opos, len); zob.pos = 0; } else { zob.size = len; zob.pos = zio->opos - off; } ipos = zib.pos; opos = zob.pos; /* decompress as much as possible */ zerror = ZSTD_decompressStream(zstd->zds, &zob, &zib); zio->ipos += ilen = zib.pos - ipos; zio->opos += olen = zob.pos - opos; if (zio->opos > off) resid -= olen; if (ZSTD_isError(zerror)) { TARFS_DPF(ZIO, "%s: inflate failed after i %zu o %zu: %s\n", __func__, (size_t)zio->ipos, (size_t)zio->opos, ZSTD_getErrorName(zerror)); error = EIO; goto fail; } if (zerror == 0 && olen == 0) { TARFS_DPF(ZIO, "%s: end of stream after i %zu o %zu\n", __func__, (size_t)zio->ipos, (size_t)zio->opos); break; } if (zerror == 0) { TARFS_DPF(ZIO, "%s: end of frame after i %zu o %zu\n", __func__, (size_t)zio->ipos, (size_t)zio->opos); tarfs_zio_update_index(zio, zio->ipos, zio->opos); } TARFS_DPF(ZIO, "%s: inflated %zu\n", __func__, olen); #ifdef TARFS_DEBUG counter_u64_add(tarfs_zio_inflated, olen); #endif } fail: VOP_UNLOCK(tmp->vp); fail_unlocked: if (error == 0) { if (uiop->uio_segflg == UIO_SYSSPACE) { uiop->uio_resid = resid; } else if (len > resid) { TARFS_DPF(ALLOC, "%s: bounced %zu bytes\n", __func__, len - resid); error = uiomove(obuf, len - resid, uiop); #ifdef TARFS_DEBUG counter_u64_add(tarfs_zio_bounced, len - resid); #endif } } if (obuf != NULL) { TARFS_DPF(ALLOC, "%s: freeing bounce buffer\n", __func__); free(obuf, M_TEMP); } if (rl != NULL) vn_rangelock_unlock(tmp->vp, rl); if (ibuf != NULL) free(ibuf, M_TEMP); TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__, (size_t)off, len, error, uiop->uio_resid); #ifdef TARFS_DEBUG counter_u64_add(tarfs_zio_consumed, len - uiop->uio_resid); #endif if (error != 0) { zio->curidx = 0; zio->ipos = zio->idx[0].i; zio->opos = zio->idx[0].o; ZSTD_resetDStream(zstd->zds); } return (error); } #endif /* * VOP_READ for zio node. */ static int tarfs_zread(struct vop_read_args *ap) { #if defined(TARFS_DEBUG) || defined(ZSTDIO) struct vnode *vp = ap->a_vp; struct tarfs_zio *zio = vp->v_data; struct uio *uiop = ap->a_uio; #endif #ifdef TARFS_DEBUG off_t off = uiop->uio_offset; size_t len = uiop->uio_resid; #endif int error; TARFS_DPF(ZIO, "%s(%zu, %zu)\n", __func__, (size_t)off, len); #ifdef ZSTDIO if (zio->zstd != NULL) { error = tarfs_zread_zstd(zio, uiop); } else #endif error = EFTYPE; TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__, (size_t)off, len, error, uiop->uio_resid); return (error); } /* * VOP_RECLAIM for zio node. */ static int tarfs_zreclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; TARFS_DPF(ZIO, "%s(%p)\n", __func__, vp); vp->v_data = NULL; return (0); } /* * VOP_STRATEGY for zio node. */ static int tarfs_zstrategy(struct vop_strategy_args *ap) { struct uio auio; struct iovec iov; struct vnode *vp = ap->a_vp; struct buf *bp = ap->a_bp; off_t off; size_t len; int error; iov.iov_base = bp->b_data; iov.iov_len = bp->b_bcount; off = bp->b_iooffset; len = bp->b_bcount; bp->b_resid = len; auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_offset = off; auio.uio_resid = len; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = curthread; error = VOP_READ(vp, &auio, IO_DIRECT | IO_NODELOCKED, bp->b_rcred); bp->b_flags |= B_DONE; if (error != 0) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } return (0); } static struct vop_vector tarfs_znodeops = { .vop_default = &default_vnodeops, .vop_access = tarfs_zaccess, .vop_getattr = tarfs_zgetattr, .vop_read = tarfs_zread, .vop_reclaim = tarfs_zreclaim, .vop_strategy = tarfs_zstrategy, }; VFS_VOP_VECTOR_REGISTER(tarfs_znodeops); #ifdef TARFS_ZIO /* * Initializes the decompression layer. */ static struct tarfs_zio * tarfs_zio_init(struct tarfs_mount *tmp, off_t i, off_t o) { struct tarfs_zio *zio; struct vnode *zvp; zio = malloc(sizeof(*zio), M_TARFSZSTATE, M_ZERO | M_WAITOK); TARFS_DPF(ALLOC, "%s: allocated zio\n", __func__); zio->tmp = tmp; zio->szidx = 128; zio->idx = malloc(zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE, M_ZERO | M_WAITOK); zio->curidx = 0; zio->nidx = 1; zio->idx[zio->curidx].i = zio->ipos = i; zio->idx[zio->curidx].o = zio->opos = o; tmp->zio = zio; TARFS_DPF(ALLOC, "%s: allocated zio index\n", __func__); - getnewvnode("tarfsz", tmp->vfs, &tarfs_znodeops, &zvp); + (void)getnewvnode("tarfsz", tmp->vfs, &tarfs_znodeops, &zvp); zvp->v_data = zio; zvp->v_type = VREG; zvp->v_mount = tmp->vfs; vn_set_state(zvp, VSTATE_CONSTRUCTED); tmp->znode = zvp; TARFS_DPF(ZIO, "%s: created zio node\n", __func__); return (zio); } #endif /* * Initializes the I/O layer, including decompression if the signature of * a supported compression format is detected. Returns 0 on success and a * positive errno value on failure. */ int tarfs_io_init(struct tarfs_mount *tmp) { uint8_t *block; #ifdef TARFS_ZIO struct tarfs_zio *zio = NULL; #endif ssize_t res; int error = 0; block = malloc(tmp->iosize, M_TEMP, M_ZERO | M_WAITOK); res = tarfs_io_read_buf(tmp, true, block, 0, tmp->iosize); if (res < 0) { return (-res); } if (memcmp(block, XZ_MAGIC, sizeof(XZ_MAGIC)) == 0) { printf("xz compression not supported\n"); error = EOPNOTSUPP; goto bad; } else if (memcmp(block, ZLIB_MAGIC, sizeof(ZLIB_MAGIC)) == 0) { printf("zlib compression not supported\n"); error = EOPNOTSUPP; goto bad; } else if (memcmp(block, ZSTD_MAGIC, sizeof(ZSTD_MAGIC)) == 0) { #ifdef ZSTDIO zio = tarfs_zio_init(tmp, 0, 0); zio->zstd = malloc(sizeof(*zio->zstd), M_TARFSZSTATE, M_WAITOK); zio->zstd->zds = ZSTD_createDStream_advanced(tarfs_zstd_mem); (void)ZSTD_initDStream(zio->zstd->zds); #else printf("zstd compression not supported\n"); error = EOPNOTSUPP; goto bad; #endif } bad: free(block, M_TEMP); return (error); } #ifdef TARFS_ZIO /* * Tears down the decompression layer. */ static int tarfs_zio_fini(struct tarfs_mount *tmp) { struct tarfs_zio *zio = tmp->zio; int error = 0; if (tmp->znode != NULL) { error = vn_lock(tmp->znode, LK_EXCLUSIVE); if (error != 0) { TARFS_DPF(ALLOC, "%s: failed to lock znode", __func__); return (error); } tmp->znode->v_mount = NULL; vgone(tmp->znode); vput(tmp->znode); tmp->znode = NULL; } #ifdef ZSTDIO if (zio->zstd != NULL) { TARFS_DPF(ALLOC, "%s: freeing zstd state\n", __func__); ZSTD_freeDStream(zio->zstd->zds); free(zio->zstd, M_TARFSZSTATE); } #endif if (zio->idx != NULL) { TARFS_DPF(ALLOC, "%s: freeing index\n", __func__); free(zio->idx, M_TARFSZSTATE); } TARFS_DPF(ALLOC, "%s: freeing zio\n", __func__); free(zio, M_TARFSZSTATE); tmp->zio = NULL; return (error); } #endif /* * Tears down the I/O layer, including the decompression layer if * applicable. */ int tarfs_io_fini(struct tarfs_mount *tmp) { int error = 0; #ifdef TARFS_ZIO if (tmp->zio != NULL) { error = tarfs_zio_fini(tmp); } #endif return (error); } diff --git a/sys/fs/tarfs/tarfs_vfsops.c b/sys/fs/tarfs/tarfs_vfsops.c index a45f005a2bd1..138a57c22e7f 100644 --- a/sys/fs/tarfs/tarfs_vfsops.c +++ b/sys/fs/tarfs/tarfs_vfsops.c @@ -1,1173 +1,1193 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Juniper Networks, Inc. * Copyright (c) 2022-2023 Klara, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_tarfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(ZERO_REGION_SIZE > TARFS_BLOCKSIZE); struct ustar_header { char name[100]; /* File name */ char mode[8]; /* Mode flags */ char uid[8]; /* User id */ char gid[8]; /* Group id */ char size[12]; /* Size */ char mtime[12]; /* Modified time */ char checksum[8]; /* Checksum */ char typeflag[1]; /* Type */ char linkname[100]; /* "old format" stops here */ char magic[6]; /* POSIX UStar "ustar\0" indicator */ char version[2]; /* POSIX UStar version "00" */ char uname[32]; /* User name */ char gname[32]; /* Group name */ char major[8]; /* Device major number */ char minor[8]; /* Device minor number */ char prefix[155]; /* Path prefix */ }; #define TAR_EOF ((off_t)-1) #define TAR_TYPE_FILE '0' #define TAR_TYPE_HARDLINK '1' #define TAR_TYPE_SYMLINK '2' #define TAR_TYPE_CHAR '3' #define TAR_TYPE_BLOCK '4' #define TAR_TYPE_DIRECTORY '5' #define TAR_TYPE_FIFO '6' #define TAR_TYPE_CONTIG '7' #define TAR_TYPE_GLOBAL_EXTHDR 'g' #define TAR_TYPE_EXTHDR 'x' #define TAR_TYPE_GNU_SPARSE 'S' #define USTAR_MAGIC (uint8_t []){ 'u', 's', 't', 'a', 'r', 0 } #define USTAR_VERSION (uint8_t []){ '0', '0' } #define GNUTAR_MAGIC (uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' } #define GNUTAR_VERSION (uint8_t []){ ' ', '\x0' } #define DEFDIRMODE (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures"); MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures"); static vfs_mount_t tarfs_mount; static vfs_unmount_t tarfs_unmount; static vfs_root_t tarfs_root; static vfs_statfs_t tarfs_statfs; static vfs_fhtovp_t tarfs_fhtovp; static const char *tarfs_opts[] = { "from", "gid", "mode", "uid", "verify", NULL }; /* * Reads a len-width signed octal number from strp. Returns the value. * XXX Does not report errors. */ static int64_t tarfs_str2octal(const char *strp, size_t len) { int64_t val; size_t idx; int sign; /* * Skip leading spaces or tabs. * XXX why? POSIX requires numeric fields to be 0-padded. */ for (idx = 0; idx < len; idx++) if (strp[idx] != ' ' && strp[idx] != '\t') break; if (idx == len) return (0); if (strp[idx] == '-') { sign = -1; idx++; } else sign = 1; val = 0; for (; idx < len; idx++) { if (strp[idx] < '0' || strp[idx] > '7') break; val <<= 3; val += (strp[idx] - '0'); /* Truncate on overflow */ if (val > INT64_MAX / 8) { val = INT64_MAX; break; } } return (sign > 0) ? val : -val; } /* * Reads a len-byte extended numeric value from strp. The first byte has * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1) * bytes that follow form a big-endian signed two's complement binary * number. Returns the value. XXX Does not report errors. */ static int64_t tarfs_str2base256(const char *strp, size_t len) { int64_t val; size_t idx; KASSERT(strp[0] & 0x80, ("not an extended numeric value")); /* Sign-extend the first byte */ if ((strp[0] & 0x40) != 0) val = (int64_t)-1; else val = 0; val <<= 6; val |= (strp[0] & 0x3f); /* Read subsequent bytes */ for (idx = 1; idx < len; idx++) { val <<= 8; val |= (0xff & (int64_t)strp[idx]); /* Truncate on overflow and underflow */ if (val > INT64_MAX / 256) { val = INT64_MAX; break; } else if (val < INT64_MAX / 256) { val = INT64_MIN; break; } } return (val); } /* * Read a len-byte numeric field from strp. If bit 7 of the first byte it * set, assume an extended numeric value (signed two's complement); * otherwise, assume a signed octal value. * * XXX practically no error checking or handling */ static int64_t tarfs_str2int64(const char *strp, size_t len) { if (len < 1) return (0); if ((strp[0] & 0x80) != 0) return (tarfs_str2base256(strp, len)); return (tarfs_str2octal(strp, len)); } /* * Verifies the checksum of a header. Returns true if the checksum is * valid, false otherwise. */ static boolean_t tarfs_checksum(struct ustar_header *hdrp) { const unsigned char *ptr; int64_t checksum, hdrsum; size_t idx; hdrsum = tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum)); TARFS_DPF(CHECKSUM, "%s: header checksum %lx\n", __func__, hdrsum); checksum = 0; for (ptr = (const unsigned char *)hdrp; ptr < (const unsigned char *)hdrp->checksum; ptr++) checksum += *ptr; for (idx = 0; idx < sizeof(hdrp->checksum); idx++) checksum += 0x20; for (ptr = (const unsigned char *)hdrp->typeflag; ptr < (const unsigned char *)(hdrp + 1); ptr++) checksum += *ptr; TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %lx\n", __func__, checksum); if (hdrsum == checksum) return (true); /* * Repeat test with signed bytes, some older formats use a broken * form of the calculation */ checksum = 0; for (ptr = (const unsigned char *)hdrp; ptr < (const unsigned char *)&hdrp->checksum; ptr++) checksum += *((const signed char *)ptr); for (idx = 0; idx < sizeof(hdrp->checksum); idx++) checksum += 0x20; for (ptr = (const unsigned char *)&hdrp->typeflag; ptr < (const unsigned char *)(hdrp + 1); ptr++) checksum += *((const signed char *)ptr); TARFS_DPF(CHECKSUM, "%s: calc signed checksum %lx\n", __func__, checksum); if (hdrsum == checksum) return (true); return (false); } /* * Looks up a path in the tarfs node tree. * * - If the path exists, stores a pointer to the corresponding tarfs_node * in retnode and a pointer to its parent in retparent. * * - If the path does not exist, but create_dirs is true, creates ancestor * directories and returns NULL in retnode and the parent in retparent. * * - If the path does not exist and create_dirs is false, stops at the * first missing path name component. * * - In all cases, on return, endp and sepp point to the beginning and * end, respectively, of the last-processed path name component. * * - Returns 0 if the node was found, ENOENT if it was not, and some other * positive errno value on failure. */ static int tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen, char **endp, char **sepp, struct tarfs_node **retparent, struct tarfs_node **retnode, boolean_t create_dirs) { - struct componentname cn; + struct componentname cn = { }; struct tarfs_node *parent, *tnp; char *sep; size_t len; int error; boolean_t do_lookup; MPASS(name != NULL && namelen != 0); do_lookup = true; error = 0; parent = tnp = tmp->root; if (tnp == NULL) panic("%s: root node not yet created", __func__); - bzero(&cn, sizeof(cn)); - TARFS_DPF(LOOKUP, "%s: Full path: %.*s\n", __func__, (int)namelen, name); sep = NULL; for (;;) { /* skip leading slash(es) */ while (name[0] == '/' && namelen > 0) name++, namelen--; /* did we reach the end? */ if (namelen == 0 || name[0] == '\0') { name = do_lookup ? NULL : cn.cn_nameptr; namelen = do_lookup ? 0 : cn.cn_namelen; break; } /* locate the next separator */ for (sep = name, len = 0; *sep != '\0' && *sep != '/' && len < namelen; sep++, len++) /* nothing */ ; /* check for . and .. */ - if (name[0] == '.' && len <= 2) { - if (len == 1) { - /* . */ - name += len; - namelen -= len; - continue; - } else if (name[1] == '.') { - /* .. */ - if (tnp == tmp->root) { - error = EINVAL; - break; - } - tnp = tnp->parent; - parent = tnp->parent; - name += len; - namelen -= len; - continue; + if (name[0] == '.' && len == 1) { + name += len; + namelen -= len; + continue; + } + if (name[0] == '.' && name[1] == '.' && len == 2) { + if (tnp == tmp->root) { + error = EINVAL; + break; } + tnp = parent; + parent = tnp->parent; + name += len; + namelen -= len; + continue; } /* create parent if necessary */ if (!do_lookup) { TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__, (int)cn.cn_namelen, cn.cn_nameptr); error = tarfs_alloc_node(tmp, cn.cn_nameptr, cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0, DEFDIRMODE, 0, NULL, NODEV, parent, &tnp); if (error != 0) break; } parent = tnp; tnp = NULL; cn.cn_nameptr = name; cn.cn_namelen = len; TARFS_DPF(LOOKUP, "%s: Search: %.*s\n", __func__, (int)cn.cn_namelen, cn.cn_nameptr); if (do_lookup) { tnp = tarfs_lookup_node(parent, NULL, &cn); if (tnp == NULL) { do_lookup = false; if (!create_dirs) break; } } name += cn.cn_namelen; namelen -= cn.cn_namelen; } TARFS_DPF(LOOKUP, "%s: Parent %p, node %p\n", __func__, parent, tnp); if (retparent) *retparent = parent; if (retnode) *retnode = tnp; if (endp) { if (namelen > 0) *endp = name; else *endp = NULL; } if (sepp) *sepp = sep; return (error); } /* * Frees a tarfs_mount structure and everything it references. */ static void tarfs_free_mount(struct tarfs_mount *tmp) { struct mount *mp; struct tarfs_node *tnp; MPASS(tmp != NULL); TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp); TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__); while (!TAILQ_EMPTY(&tmp->allnodes)) { tnp = TAILQ_FIRST(&tmp->allnodes); TAILQ_REMOVE(&tmp->allnodes, tnp, entries); tarfs_free_node(tnp); } (void)tarfs_io_fini(tmp); TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__); delete_unrhdr(tmp->ino_unr); mp = tmp->vfs; mp->mnt_data = NULL; TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__); free(tmp, M_TARFSMNT); } /* * Processes the tar file header at block offset blknump and allocates and * populates a tarfs_node structure for the file it describes. Updated * blknump to point to the next unread tar file block, or TAR_EOF if EOF * is reached. Returns 0 on success or EOF and a positive errno value on * failure. */ static int tarfs_alloc_one(struct tarfs_mount *tmp, off_t *blknump) { char block[TARFS_BLOCKSIZE]; struct ustar_header *hdrp = (struct ustar_header *)block; struct sbuf *namebuf = NULL; char *exthdr = NULL, *name = NULL, *link = NULL; off_t blknum = *blknump; + int64_t num; int endmarker = 0; char *namep, *sep; struct tarfs_node *parent, *tnp; size_t namelen = 0, linklen = 0, realsize = 0, sz; ssize_t res; dev_t rdev; gid_t gid; mode_t mode; time_t mtime; uid_t uid; long major = -1, minor = -1; unsigned int flags = 0; int error; boolean_t sparse = false; again: /* read next header */ res = tarfs_io_read_buf(tmp, false, block, TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE); if (res < 0) { error = -res; goto bad; } else if (res < TARFS_BLOCKSIZE) { goto eof; } blknum++; /* check for end marker */ if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) { if (endmarker++) { if (exthdr != NULL) { TARFS_DPF(IO, "%s: orphaned extended header at %zu\n", __func__, TARFS_BLOCKSIZE * (blknum - 1)); free(exthdr, M_TEMP); } TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__, TARFS_BLOCKSIZE * blknum); tmp->nblocks = blknum; *blknump = TAR_EOF; return (0); } goto again; } /* verify magic */ if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 && memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) { /* POSIX */ } else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 && memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) { TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__, TARFS_BLOCKSIZE * (blknum - 1)); error = EFTYPE; goto bad; } else { TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n", __func__, TARFS_BLOCKSIZE * (blknum - 1)); error = EINVAL; goto bad; } /* verify checksum */ if (!tarfs_checksum(hdrp)) { TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n", __func__, TARFS_BLOCKSIZE * (blknum - 1)); error = EINVAL; goto bad; } /* get standard attributes */ - mode = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode)); - uid = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid)); - gid = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid)); - sz = tarfs_str2int64(hdrp->size, sizeof(hdrp->size)); + num = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode)); + if (num < 0 || num > ALLPERMS) { + TARFS_DPF(ALLOC, "%s: invalid file mode at %zu\n", + __func__, TARFS_BLOCKSIZE * (blknum - 1)); + mode = S_IRUSR; + } else { + mode = num; + } + num = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid)); + if (num < 0 || num > UID_MAX) { + TARFS_DPF(ALLOC, "%s: UID out of range at %zu\n", + __func__, TARFS_BLOCKSIZE * (blknum - 1)); + uid = tmp->root->uid; + mode &= ~S_ISUID; + } else { + uid = num; + } + num = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid)); + if (num < 0 || num > GID_MAX) { + TARFS_DPF(ALLOC, "%s: GID out of range at %zu\n", + __func__, TARFS_BLOCKSIZE * (blknum - 1)); + gid = tmp->root->gid; + mode &= ~S_ISGID; + } else { + gid = num; + } + num = tarfs_str2int64(hdrp->size, sizeof(hdrp->size)); + if (num < 0) { + TARFS_DPF(ALLOC, "%s: negative size at %zu\n", + __func__, TARFS_BLOCKSIZE * (blknum - 1)); + error = EINVAL; + goto bad; + } else { + sz = num; + } mtime = tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime)); rdev = NODEV; TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__, hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid); /* extended header? */ if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) { printf("%s: unsupported global extended header at %zu\n", __func__, (size_t)(TARFS_BLOCKSIZE * (blknum - 1))); error = EFTYPE; goto bad; } if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) { if (exthdr != NULL) { TARFS_DPF(IO, "%s: multiple extended headers at %zu\n", __func__, TARFS_BLOCKSIZE * (blknum - 1)); error = EFTYPE; goto bad; } /* read the contents of the exthdr */ TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zd\n", __func__, sz, TARFS_BLOCKSIZE * (blknum - 1)); exthdr = malloc(sz, M_TEMP, M_WAITOK); res = tarfs_io_read_buf(tmp, false, exthdr, TARFS_BLOCKSIZE * blknum, sz); if (res < 0) { error = -res; goto bad; } if (res < sz) { goto eof; } blknum += TARFS_SZ2BLKS(res); /* XXX TODO: refactor this parser */ char *line = exthdr; while (line < exthdr + sz) { char *eol, *key, *value, *sep; size_t len = strtoul(line, &sep, 10); if (len == 0 || sep == line || *sep != ' ') { TARFS_DPF(ALLOC, "%s: exthdr syntax error\n", __func__); error = EINVAL; goto bad; } if (line + len > exthdr + sz) { TARFS_DPF(ALLOC, "%s: exthdr overflow\n", __func__); error = EINVAL; goto bad; } eol = line + len - 1; *eol = '\0'; line += len; key = sep + 1; sep = strchr(key, '='); if (sep == NULL) { TARFS_DPF(ALLOC, "%s: exthdr syntax error\n", __func__); error = EINVAL; goto bad; } *sep = '\0'; value = sep + 1; TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__, key, value); if (strcmp(key, "linkpath") == 0) { link = value; linklen = eol - value; } else if (strcmp(key, "GNU.sparse.major") == 0) { sparse = true; major = strtol(value, &sep, 10); if (sep != eol) { printf("exthdr syntax error\n"); error = EINVAL; goto bad; } } else if (strcmp(key, "GNU.sparse.minor") == 0) { sparse = true; minor = strtol(value, &sep, 10); if (sep != eol) { printf("exthdr syntax error\n"); error = EINVAL; goto bad; } } else if (strcmp(key, "GNU.sparse.name") == 0) { sparse = true; name = value; namelen = eol - value; if (namelen == 0) { printf("exthdr syntax error\n"); error = EINVAL; goto bad; } } else if (strcmp(key, "GNU.sparse.realsize") == 0) { sparse = true; realsize = strtoul(value, &sep, 10); if (sep != eol) { printf("exthdr syntax error\n"); error = EINVAL; goto bad; } } else if (strcmp(key, "SCHILY.fflags") == 0) { flags |= tarfs_strtofflags(value, &sep); if (sep != eol) { printf("exthdr syntax error\n"); error = EINVAL; goto bad; } } } goto again; } /* sparse file consistency checks */ if (sparse) { TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__, name, major, minor, realsize); if (major != 1 || minor != 0 || name == NULL || realsize == 0 || hdrp->typeflag[0] != TAR_TYPE_FILE) { TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__); error = EINVAL; goto bad; } } /* file name */ if (name == NULL) { if (hdrp->prefix[0] != '\0') { namebuf = sbuf_new_auto(); sbuf_printf(namebuf, "%.*s/%.*s", (int)sizeof(hdrp->prefix), hdrp->prefix, (int)sizeof(hdrp->name), hdrp->name); sbuf_finish(namebuf); name = sbuf_data(namebuf); namelen = sbuf_len(namebuf); } else { name = hdrp->name; namelen = strnlen(hdrp->name, sizeof(hdrp->name)); } } error = tarfs_lookup_path(tmp, name, namelen, &namep, &sep, &parent, &tnp, true); if (error != 0) goto bad; if (tnp != NULL) { if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) { /* XXX set attributes? */ goto skip; } TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__, (int)namelen, name); error = EINVAL; goto bad; } switch (hdrp->typeflag[0]) { case TAR_TYPE_DIRECTORY: error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR, 0, 0, mtime, uid, gid, mode, flags, NULL, 0, parent, &tnp); break; case TAR_TYPE_FILE: error = tarfs_alloc_node(tmp, namep, sep - namep, VREG, blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode, flags, NULL, 0, parent, &tnp); if (error == 0 && sparse) { error = tarfs_load_blockmap(tnp, realsize); } break; case TAR_TYPE_HARDLINK: if (link == NULL) { link = hdrp->linkname; linklen = strnlen(link, sizeof(hdrp->linkname)); } error = tarfs_alloc_node(tmp, namep, sep - namep, VREG, 0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp); if (error != 0) { goto bad; } error = tarfs_lookup_path(tmp, link, linklen, NULL, NULL, NULL, &tnp->other, false); if (tnp->other == NULL || tnp->other->type != VREG || tnp->other->other != NULL) { TARFS_DPF(ALLOC, "%s: %.*s: dead hard link to %.*s\n", __func__, (int)namelen, name, (int)linklen, link); error = EINVAL; goto bad; } break; case TAR_TYPE_SYMLINK: if (link == NULL) { link = hdrp->linkname; linklen = strnlen(link, sizeof(hdrp->linkname)); } error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK, 0, linklen, mtime, uid, gid, mode, flags, link, 0, parent, &tnp); break; case TAR_TYPE_BLOCK: major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major)); minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor)); rdev = makedev(major, minor); error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK, 0, 0, mtime, uid, gid, mode, flags, NULL, rdev, parent, &tnp); break; case TAR_TYPE_CHAR: major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major)); minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor)); rdev = makedev(major, minor); error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR, 0, 0, mtime, uid, gid, mode, flags, NULL, rdev, parent, &tnp); break; default: TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n", __func__, hdrp->typeflag[0], (int)namelen, name); error = EINVAL; break; } if (error != 0) goto bad; skip: blknum += TARFS_SZ2BLKS(sz); tmp->nblocks = blknum; *blknump = blknum; if (exthdr != NULL) { free(exthdr, M_TEMP); } if (namebuf != NULL) { sbuf_delete(namebuf); } return (0); eof: TARFS_DPF(IO, "%s: premature end of file\n", __func__); error = EIO; goto bad; bad: if (exthdr != NULL) { free(exthdr, M_TEMP); } if (namebuf != NULL) { sbuf_delete(namebuf); } return (error); } /* * Allocates and populates the metadata structures for the tar file * referenced by vp. On success, a pointer to the tarfs_mount structure * is stored in tmpp. Returns 0 on success or a positive errno value on * failure. */ static int tarfs_alloc_mount(struct mount *mp, struct vnode *vp, uid_t root_uid, gid_t root_gid, mode_t root_mode, struct tarfs_mount **tmpp) { struct vattr va; struct thread *td = curthread; - char *fullpath; struct tarfs_mount *tmp; struct tarfs_node *root; off_t blknum; time_t mtime; int error; KASSERT(tmpp != NULL, ("tarfs mount return is NULL")); ASSERT_VOP_LOCKED(vp, __func__); tmp = NULL; - fullpath = NULL; TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n", __func__, vp); /* Get source metadata */ error = VOP_GETATTR(vp, &va, td->td_ucred); if (error != 0) { return (error); } VOP_UNLOCK(vp); mtime = va.va_mtime.tv_sec; /* Allocate and initialize tarfs mount structure */ - tmp = (struct tarfs_mount *)malloc(sizeof(struct tarfs_mount), - M_TARFSMNT, M_WAITOK | M_ZERO); + tmp = malloc(sizeof(*tmp), M_TARFSMNT, M_WAITOK | M_ZERO); TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__); mp->mnt_data = tmp; mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL, MTX_DEF); TAILQ_INIT(&tmp->allnodes); tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock); tmp->vp = vp; tmp->vfs = mp; tmp->mtime = mtime; /* * XXX The decompression layer passes everything through the * buffer cache, and the buffer cache wants to know our blocksize, * but mnt_stat normally isn't populated until after we return, so * we have to cheat a bit. */ tmp->iosize = 1U << tarfs_ioshift; mp->mnt_stat.f_iosize = tmp->iosize; /* Initialize decompression layer */ error = tarfs_io_init(tmp); if (error != 0) goto bad; error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid, root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root); if (error != 0 || root == NULL) goto bad; tmp->root = root; blknum = 0; do { if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) { goto bad; } } while (blknum != TAR_EOF); *tmpp = tmp; TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root); return (0); bad: - if (tmp != NULL) - tarfs_free_mount(tmp); - free(fullpath, M_TEMP); + tarfs_free_mount(tmp); return (error); } /* * VFS Operations. */ static int tarfs_mount(struct mount *mp) { struct nameidata nd; struct vattr va; struct tarfs_mount *tmp = NULL; struct thread *td = curthread; struct vnode *vp; char *from; uid_t root_uid; gid_t root_gid; mode_t root_mode; int error, flags, len; if (mp->mnt_flag & MNT_UPDATE) return (EOPNOTSUPP); if (vfs_filteropt(mp->mnt_optnew, tarfs_opts)) return (EINVAL); vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY); error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred); VOP_UNLOCK(mp->mnt_vnodecovered); if (error) return (error); if (mp->mnt_cred->cr_ruid != 0 || vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1) root_gid = va.va_gid; if (mp->mnt_cred->cr_ruid != 0 || vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1) root_uid = va.va_uid; if (mp->mnt_cred->cr_ruid != 0 || vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1) root_mode = va.va_mode; error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len); if (error != 0 || from[len - 1] != '\0') return (EINVAL); /* Find the source tarball */ TARFS_DPF(FS, "%s(%s, uid=%u, gid=%u, mode=%o)\n", __func__, from, root_uid, root_gid, root_mode); flags = FREAD; if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) { flags |= O_VERIFY; } NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from); error = namei(&nd); if (error != 0) return (error); NDFREE_PNBUF(&nd); vp = nd.ni_vp; TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); /* vp is now held and locked */ /* Open the source tarball */ error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL); if (error != 0) { TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__, from, error); vput(vp); goto bad; } TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); if (vp->v_type != VREG) { TARFS_DPF(FS, "%s: not a regular file\n", __func__); error = EOPNOTSUPP; goto bad_open_locked; } error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error != 0) { TARFS_DPF(FS, "%s: not permitted to mount\n", __func__); goto bad_open_locked; } if (flags & O_VERIFY) { mp->mnt_flag |= MNT_VERIFIED; } /* Allocate the tarfs mount */ error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp); /* vp is now held but unlocked */ if (error != 0) { TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__, from, error); goto bad_open_unlocked; } TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); /* Unconditionally mount as read-only */ MNT_ILOCK(mp); mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY); MNT_IUNLOCK(mp); vfs_getnewfsid(mp); vfs_mountedfrom(mp, "tarfs"); TARFS_DPF(FS, "%s: success\n", __func__); return (0); bad_open_locked: /* vp must be held and locked */ TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); VOP_UNLOCK(vp); bad_open_unlocked: /* vp must be held and unlocked */ TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); (void)vn_close(vp, flags, td->td_ucred, td); bad: /* vp must be released and unlocked */ TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); return (error); } /* * Unmounts a tarfs filesystem. */ static int tarfs_unmount(struct mount *mp, int mntflags) { struct thread *td = curthread; struct tarfs_mount *tmp; struct vnode *vp; int error; int flags = 0; TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp); /* Handle forced unmounts */ if (mntflags & MNT_FORCE) flags |= FORCECLOSE; /* Finalize all pending I/O */ error = vflush(mp, 0, flags, curthread); if (error != 0) return (error); tmp = MP_TO_TARFS_MOUNT(mp); vp = tmp->vp; MPASS(vp != NULL); TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); vn_close(vp, FREAD, td->td_ucred, td); TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__, vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp)); tarfs_free_mount(tmp); return (0); } /* * Gets the root of a tarfs filesystem. Returns 0 on success or a * positive errno value on failure. */ static int tarfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *nvp; int error; TARFS_DPF(FS, "%s: Getting root vnode\n", __func__); error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp); if (error != 0) return (error); nvp->v_vflag |= VV_ROOT; *vpp = nvp; return (0); } /* * Gets statistics for a tarfs filesystem. Returns 0. */ static int tarfs_statfs(struct mount *mp, struct statfs *sbp) { struct tarfs_mount *tmp; tmp = MP_TO_TARFS_MOUNT(mp); sbp->f_bsize = TARFS_BLOCKSIZE; sbp->f_iosize = tmp->iosize; sbp->f_blocks = tmp->nblocks; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = tmp->nfiles; sbp->f_ffree = 0; return (0); } /* * Gets a vnode for the given inode. On success, a pointer to the vnode * is stored in vpp. Returns 0 on success or a positive errno value on * failure. */ static int tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp) { struct tarfs_mount *tmp; struct tarfs_node *tnp; struct thread *td; struct vnode *vp; int error; TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino, lkflags); td = curthread; error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL); if (error != 0) return (error); if (*vpp != NULL) { TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp); return (error); } TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino); tmp = MP_TO_TARFS_MOUNT(mp); if (ino == TARFS_ZIOINO) { error = vget(tmp->znode, lkflags); if (error != 0) return (error); *vpp = tmp->znode; return (0); } /* XXX Should use hash instead? */ TAILQ_FOREACH(tnp, &tmp->allnodes, entries) { if (tnp->ino == ino) break; } TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp); if (tnp == NULL) return (ENOENT); - error = getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp); - if (error != 0) - goto bad; + (void)getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp); TARFS_DPF(FS, "%s: allocated vnode\n", __func__); vp->v_data = tnp; vp->v_type = tnp->type; tnp->vnode = vp; lockmgr(vp->v_vnlock, lkflags, NULL); error = insmntque(vp, mp); if (error != 0) goto bad; TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__); error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL); if (error != 0 || *vpp != NULL) return (error); vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); bad: *vpp = NULLVP; return (error); } static int tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct tarfs_node *tnp; struct tarfs_fid *tfp; struct vnode *nvp; int error; tfp = (struct tarfs_fid *)fhp; MP_TO_TARFS_MOUNT(mp); if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX) return (ESTALE); error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp); if (error != 0) { *vpp = NULLVP; return (error); } tnp = VP_TO_TARFS_NODE(nvp); if (tnp->mode == 0 || tnp->gen != tfp->gen || tnp->nlink <= 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; return (0); } static struct vfsops tarfs_vfsops = { .vfs_fhtovp = tarfs_fhtovp, .vfs_mount = tarfs_mount, .vfs_root = tarfs_root, .vfs_statfs = tarfs_statfs, .vfs_unmount = tarfs_unmount, .vfs_vget = tarfs_vget, }; VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY); MODULE_VERSION(tarfs, 1); MODULE_DEPEND(tarfs, xz, 1, 1, 1); diff --git a/sys/fs/tarfs/tarfs_vnops.c b/sys/fs/tarfs/tarfs_vnops.c index 266002bca7b2..99ff39d41271 100644 --- a/sys/fs/tarfs/tarfs_vnops.c +++ b/sys/fs/tarfs/tarfs_vnops.c @@ -1,643 +1,643 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013 Juniper Networks, Inc. * Copyright (c) 2022-2023 Klara, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_tarfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include static int tarfs_open(struct vop_open_args *ap) { struct tarfs_node *tnp; struct vnode *vp; vp = ap->a_vp; MPASS(VOP_ISLOCKED(vp)); tnp = VP_TO_TARFS_NODE(vp); TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__, tnp, tnp->name, ap->a_mode); if (vp->v_type != VREG && vp->v_type != VDIR) return (EOPNOTSUPP); vnode_create_vobject(vp, tnp->size, ap->a_td); return (0); } static int tarfs_close(struct vop_close_args *ap) { #ifdef TARFS_DEBUG struct tarfs_node *tnp; struct vnode *vp; vp = ap->a_vp; MPASS(VOP_ISLOCKED(vp)); tnp = VP_TO_TARFS_NODE(vp); TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__, tnp, tnp->name); #else (void)ap; #endif return (0); } static int tarfs_access(struct vop_access_args *ap) { struct tarfs_node *tnp; struct vnode *vp; accmode_t accmode; struct ucred *cred; int error; vp = ap->a_vp; accmode = ap->a_accmode; cred = ap->a_cred; MPASS(VOP_ISLOCKED(vp)); tnp = VP_TO_TARFS_NODE(vp); TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__, tnp, tnp->name, accmode); switch (vp->v_type) { case VDIR: case VLNK: case VREG: if ((accmode & VWRITE) != 0) return (EROFS); break; case VBLK: case VCHR: case VFIFO: break; default: return (EINVAL); } if ((accmode & VWRITE) != 0) return (EPERM); error = vaccess(vp->v_type, tnp->mode, tnp->uid, tnp->gid, accmode, cred); return (error); } static int tarfs_getattr(struct vop_getattr_args *ap) { struct tarfs_node *tnp; struct vnode *vp; struct vattr *vap; vp = ap->a_vp; vap = ap->a_vap; tnp = VP_TO_TARFS_NODE(vp); TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__, tnp, tnp->name); vap->va_type = vp->v_type; vap->va_mode = tnp->mode; vap->va_nlink = tnp->nlink; vap->va_gid = tnp->gid; vap->va_uid = tnp->uid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = tnp->ino; vap->va_size = tnp->size; vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_atime = tnp->atime; vap->va_ctime = tnp->ctime; vap->va_mtime = tnp->mtime; vap->va_birthtime = tnp->birthtime; vap->va_gen = tnp->gen; vap->va_flags = tnp->flags; vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? tnp->rdev : NODEV; vap->va_bytes = round_page(tnp->physize); vap->va_filerev = 0; return (0); } static int tarfs_lookup(struct vop_cachedlookup_args *ap) { struct tarfs_node *dirnode, *parent, *tnp; struct componentname *cnp; struct vnode *dvp, **vpp; #ifdef TARFS_DEBUG struct vnode *vp; #endif int error; dvp = ap->a_dvp; vpp = ap->a_vpp; cnp = ap->a_cnp; *vpp = NULLVP; dirnode = VP_TO_TARFS_NODE(dvp); parent = dirnode->parent; tnp = NULL; TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s)\n", __func__, dirnode, dirnode->name, (int)cnp->cn_namelen, cnp->cn_nameptr); error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread); if (error != 0) return (error); if (cnp->cn_flags & ISDOTDOT) { /* Do not allow .. on the root node */ if (parent == NULL || parent == dirnode) return (ENOENT); /* Allocate a new vnode on the matching entry */ error = vn_vget_ino(dvp, parent->ino, cnp->cn_lkflags, vpp); if (error != 0) return (error); } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { VREF(dvp); *vpp = dvp; #ifdef TARFS_DEBUG } else if (dirnode == dirnode->tmp->root && (vp = dirnode->tmp->znode) != NULL && cnp->cn_namelen == TARFS_ZIO_NAMELEN && memcmp(cnp->cn_nameptr, TARFS_ZIO_NAME, TARFS_ZIO_NAMELEN) == 0) { error = vn_lock(vp, cnp->cn_lkflags); if (error != 0) return (error); vref(vp); *vpp = vp; return (0); #endif } else { tnp = tarfs_lookup_node(dirnode, NULL, cnp); if (tnp == NULL) { TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s): file not found\n", __func__, dirnode, dirnode->name, (int)cnp->cn_namelen, cnp->cn_nameptr); return (ENOENT); } if ((cnp->cn_flags & ISLASTCN) == 0 && (tnp->type != VDIR && tnp->type != VLNK)) return (ENOTDIR); error = vn_vget_ino(dvp, tnp->ino, cnp->cn_lkflags, vpp); if (error != 0) return (error); } #ifdef TARFS_DEBUG if (tnp == NULL) tnp = VP_TO_TARFS_NODE(*vpp); TARFS_DPF(LOOKUP, "%s: found vnode %p, tarfs_node %p\n", __func__, *vpp, tnp); #endif /* TARFS_DEBUG */ /* Store the result the the cache if MAKEENTRY is specified in flags */ if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE) cache_enter(dvp, *vpp, cnp); return (error); } static int tarfs_readdir(struct vop_readdir_args *ap) { - struct dirent cde; + struct dirent cde = { }; struct tarfs_node *current, *tnp; struct vnode *vp; struct uio *uio; int *eofflag; uint64_t **cookies; int *ncookies; off_t off; u_int idx, ndirents; int error; vp = ap->a_vp; uio = ap->a_uio; eofflag = ap->a_eofflag; cookies = ap->a_cookies; ncookies = ap->a_ncookies; if (vp->v_type != VDIR) return (ENOTDIR); tnp = VP_TO_TARFS_NODE(vp); off = uio->uio_offset; current = NULL; ndirents = 0; TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__, tnp, tnp->name, uio->uio_offset, uio->uio_resid); if (uio->uio_offset == TARFS_COOKIE_EOF) { TARFS_DPF(VNODE, "%s: EOF\n", __func__); return (0); } if (uio->uio_offset == TARFS_COOKIE_DOT) { TARFS_DPF(VNODE, "%s: Generating . entry\n", __func__); /* fake . entry */ cde.d_fileno = tnp->ino; cde.d_type = DT_DIR; cde.d_namlen = 1; cde.d_name[0] = '.'; cde.d_name[1] = '\0'; cde.d_reclen = GENERIC_DIRSIZ(&cde); if (cde.d_reclen > uio->uio_resid) goto full; dirent_terminate(&cde); error = uiomove(&cde, cde.d_reclen, uio); if (error) return (error); /* next is .. */ uio->uio_offset = TARFS_COOKIE_DOTDOT; ndirents++; } if (uio->uio_offset == TARFS_COOKIE_DOTDOT) { TARFS_DPF(VNODE, "%s: Generating .. entry\n", __func__); /* fake .. entry */ MPASS(tnp->parent != NULL); TARFS_NODE_LOCK(tnp->parent); cde.d_fileno = tnp->parent->ino; TARFS_NODE_UNLOCK(tnp->parent); cde.d_type = DT_DIR; cde.d_namlen = 2; cde.d_name[0] = '.'; cde.d_name[1] = '.'; cde.d_name[2] = '\0'; cde.d_reclen = GENERIC_DIRSIZ(&cde); if (cde.d_reclen > uio->uio_resid) goto full; dirent_terminate(&cde); error = uiomove(&cde, cde.d_reclen, uio); if (error) return (error); /* next is first child */ current = TAILQ_FIRST(&tnp->dir.dirhead); if (current == NULL) goto done; uio->uio_offset = current->ino; TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n", __func__, ndirents, current, current->name); ndirents++; } /* resuming previous call */ if (current == NULL) { current = tarfs_lookup_dir(tnp, uio->uio_offset); if (current == NULL) { error = EINVAL; goto done; } uio->uio_offset = current->ino; TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n", __func__, ndirents, current, current->name); } for (;;) { cde.d_fileno = current->ino; switch (current->type) { case VBLK: cde.d_type = DT_BLK; break; case VCHR: cde.d_type = DT_CHR; break; case VDIR: cde.d_type = DT_DIR; break; case VFIFO: cde.d_type = DT_FIFO; break; case VLNK: cde.d_type = DT_LNK; break; case VREG: cde.d_type = DT_REG; break; default: panic("%s: tarfs_node %p, type %d\n", __func__, current, current->type); } cde.d_namlen = current->namelen; MPASS(tnp->namelen < sizeof(cde.d_name)); (void)memcpy(cde.d_name, current->name, current->namelen); cde.d_name[current->namelen] = '\0'; cde.d_reclen = GENERIC_DIRSIZ(&cde); if (cde.d_reclen > uio->uio_resid) goto full; dirent_terminate(&cde); error = uiomove(&cde, cde.d_reclen, uio); if (error != 0) goto done; ndirents++; /* next sibling */ current = TAILQ_NEXT(current, dirents); if (current == NULL) goto done; uio->uio_offset = current->ino; TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n", __func__, ndirents, current, current->name); } full: if (cde.d_reclen > uio->uio_resid) { TARFS_DPF(VNODE, "%s: out of space, returning\n", __func__); error = (ndirents == 0) ? EINVAL : 0; } done: TARFS_DPF(VNODE, "%s: %u entries written\n", __func__, ndirents); TARFS_DPF(VNODE, "%s: saving cache information\n", __func__); if (current == NULL) { uio->uio_offset = TARFS_COOKIE_EOF; tnp->dir.lastcookie = 0; tnp->dir.lastnode = NULL; } else { tnp->dir.lastcookie = current->ino; tnp->dir.lastnode = current; } if (eofflag != NULL) { TARFS_DPF(VNODE, "%s: Setting EOF flag\n", __func__); *eofflag = (error == 0 && current == NULL); } /* Update for NFS */ if (error == 0 && cookies != NULL && ncookies != NULL) { TARFS_DPF(VNODE, "%s: Updating NFS cookies\n", __func__); current = NULL; *cookies = malloc(ndirents * sizeof(off_t), M_TEMP, M_WAITOK); *ncookies = ndirents; for (idx = 0; idx < ndirents; idx++) { if (off == TARFS_COOKIE_DOT) off = TARFS_COOKIE_DOTDOT; else { if (off == TARFS_COOKIE_DOTDOT) { current = TAILQ_FIRST(&tnp->dir.dirhead); } else if (current != NULL) { current = TAILQ_NEXT(current, dirents); } else { current = tarfs_lookup_dir(tnp, off); current = TAILQ_NEXT(current, dirents); } if (current == NULL) off = TARFS_COOKIE_EOF; else off = current->ino; } TARFS_DPF(VNODE, "%s: [%u] offset %zu\n", __func__, idx, off); (*cookies)[idx] = off; } MPASS(uio->uio_offset == off); } return (error); } static int tarfs_read(struct vop_read_args *ap) { struct tarfs_node *tnp; struct uio *uiop; struct vnode *vp; size_t len; off_t resid; int error; uiop = ap->a_uio; vp = ap->a_vp; if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); if (vp->v_type != VREG) return (EISDIR); if (uiop->uio_offset < 0) return (EINVAL); tnp = VP_TO_TARFS_NODE(vp); error = 0; TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__, tnp, tnp->name, uiop->uio_offset, uiop->uio_resid); while ((resid = uiop->uio_resid) > 0) { if (tnp->size <= uiop->uio_offset) break; len = MIN(tnp->size - uiop->uio_offset, resid); if (len == 0) break; error = tarfs_read_file(tnp, len, uiop); if (error != 0 || resid == uiop->uio_resid) break; } return (error); } static int tarfs_readlink(struct vop_readlink_args *ap) { struct tarfs_node *tnp; struct uio *uiop; struct vnode *vp; int error; uiop = ap->a_uio; vp = ap->a_vp; MPASS(uiop->uio_offset == 0); MPASS(vp->v_type == VLNK); tnp = VP_TO_TARFS_NODE(vp); TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__, tnp, tnp->name); error = uiomove(tnp->link.name, MIN(tnp->size, uiop->uio_resid), uiop); return (error); } static int tarfs_reclaim(struct vop_reclaim_args *ap) { struct tarfs_node *tnp; struct vnode *vp; vp = ap->a_vp; tnp = VP_TO_TARFS_NODE(vp); vfs_hash_remove(vp); TARFS_NODE_LOCK(tnp); tnp->vnode = NULLVP; vp->v_data = NULL; TARFS_NODE_UNLOCK(tnp); return (0); } static int tarfs_print(struct vop_print_args *ap) { struct tarfs_node *tnp; struct vnode *vp; vp = ap->a_vp; tnp = VP_TO_TARFS_NODE(vp); printf("tag tarfs, tarfs_node %p, links %lu\n", tnp, (unsigned long)tnp->nlink); printf("\tmode 0%o, owner %d, group %d, size %zd\n", tnp->mode, tnp->uid, tnp->gid, tnp->size); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } static int tarfs_strategy(struct vop_strategy_args *ap) { struct uio auio; struct iovec iov; struct tarfs_node *tnp; struct buf *bp; off_t off; size_t len; int error; tnp = VP_TO_TARFS_NODE(ap->a_vp); bp = ap->a_bp; MPASS(bp->b_iocmd == BIO_READ); MPASS(bp->b_iooffset >= 0); MPASS(bp->b_bcount > 0); MPASS(bp->b_bufsize >= bp->b_bcount); TARFS_DPF(VNODE, "%s(%p=%s, %zu, %ld/%ld)\n", __func__, tnp, tnp->name, (size_t)bp->b_iooffset, bp->b_bcount, bp->b_bufsize); iov.iov_base = bp->b_data; iov.iov_len = bp->b_bcount; off = bp->b_iooffset; len = bp->b_bcount; bp->b_resid = len; if (off > tnp->size) { /* XXX read beyond EOF - figure out correct handling */ error = EIO; goto out; } if (off + len > tnp->size) { /* clip to file length */ len = tnp->size - off; } auio.uio_iov = &iov; auio.uio_iovcnt = 1; auio.uio_offset = off; auio.uio_resid = len; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = curthread; error = tarfs_read_file(tnp, len, &auio); bp->b_resid -= len - auio.uio_resid; out: if (error != 0) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } bp->b_flags |= B_DONE; return (0); } static int tarfs_vptofh(struct vop_vptofh_args *ap) { struct tarfs_fid *tfp; struct tarfs_node *tnp; tfp = (struct tarfs_fid *)ap->a_fhp; tnp = VP_TO_TARFS_NODE(ap->a_vp); tfp->len = sizeof(struct tarfs_fid); tfp->ino = tnp->ino; tfp->gen = tnp->gen; return (0); } struct vop_vector tarfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = tarfs_access, .vop_cachedlookup = tarfs_lookup, .vop_close = tarfs_close, .vop_getattr = tarfs_getattr, .vop_lookup = vfs_cache_lookup, .vop_open = tarfs_open, .vop_print = tarfs_print, .vop_read = tarfs_read, .vop_readdir = tarfs_readdir, .vop_readlink = tarfs_readlink, .vop_reclaim = tarfs_reclaim, .vop_strategy = tarfs_strategy, .vop_vptofh = tarfs_vptofh, }; VFS_VOP_VECTOR_REGISTER(tarfs_vnodeops); diff --git a/tests/sys/fs/tarfs/mktar.c b/tests/sys/fs/tarfs/mktar.c index e1b1183af114..9b3d7910a12c 100644 --- a/tests/sys/fs/tarfs/mktar.c +++ b/tests/sys/fs/tarfs/mktar.c @@ -1,238 +1,275 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023 Klara, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #define PROGNAME "mktar" #define SUBDIRNAME "directory" +#define EMPTYDIRNAME "empty" +#define NORMALFILENAME "file" #define SPARSEFILENAME "sparse_file" #define HARDLINKNAME "hard_link" #define SHORTLINKNAME "short_link" #define LONGLINKNAME "long_link" static bool opt_v; static void verbose(const char *fmt, ...) { va_list ap; if (!opt_v) return; fprintf(stderr, "%s: ", PROGNAME); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\n"); } +static void +mknormalfile(const char *filename, mode_t mode) +{ + char buf[512]; + ssize_t res; + int fd; + + if ((fd = open(filename, O_RDWR|O_CREAT|O_EXCL, mode)) < 0) + err(1, "%s", filename); + for (unsigned int i = 0; i < sizeof(buf); i++) + buf[i] = 32 + i % 64; + res = write(fd, buf, sizeof(buf)); + if (res < 0) + err(1, "%s", filename); + if (res != sizeof(buf)) + errx(1, "%s: short write", filename); + close(fd); +} + static void mksparsefile(const char *filename, mode_t mode) { char buf[511]; ssize_t res; int fd; - if ((fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0) + if ((fd = open(filename, O_RDWR|O_CREAT|O_EXCL, mode)) < 0) err(1, "%s", filename); for (unsigned int i = 33; i <= 126; i++) { memset(buf, i, sizeof(buf)); if (lseek(fd, 1048576LU * (i - 32), SEEK_SET) < 0) err(1, "%s", filename); res = write(fd, buf, sizeof(buf)); if (res < 0) err(1, "%s", filename); if (res != sizeof(buf)) errx(1, "%s: short write", filename); } close(fd); } static char * mklonglinktarget(const char *dirname, const char *filename) { char *piece, *target; if (asprintf(&piece, "%1$s/../%1$s/../%1$s/../%1$s/../", dirname) < 0) err(1, "asprintf()"); if (asprintf(&target, "%1$s%1$s%1$s%1$s%1$s%1$s%1$s%1$s%2$s", piece, filename) < 0) err(1, "asprintf()"); free(piece); return target; } static void mktar(void) { char *linktarget; /* create a subdirectory */ verbose("mkdir %s", SUBDIRNAME); if (mkdir(SUBDIRNAME, 0755) != 0) err(1, "%s", SUBDIRNAME); + /* create a second subdirectory which will remain empty */ + verbose("mkdir %s", EMPTYDIRNAME); + if (mkdir(EMPTYDIRNAME, 0755) != 0) + err(1, "%s", EMPTYDIRNAME); + + /* create a normal file */ + verbose("creating %s", NORMALFILENAME); + mknormalfile(NORMALFILENAME, 0644); + /* create a sparse file */ verbose("creating %s", SPARSEFILENAME); mksparsefile(SPARSEFILENAME, 0644); chflags(SPARSEFILENAME, UF_NODUMP); /* create a hard link */ verbose("link %s %s", SPARSEFILENAME, HARDLINKNAME); if (link(SPARSEFILENAME, HARDLINKNAME) != 0) err(1, "%s", HARDLINKNAME); /* create a symbolic link with a short target */ verbose("symlink %s %s", SPARSEFILENAME, SHORTLINKNAME); if (symlink(SPARSEFILENAME, SHORTLINKNAME) != 0) err(1, "%s", SHORTLINKNAME); /* create a symbolic link with a long target */ linktarget = mklonglinktarget(SUBDIRNAME, SPARSEFILENAME); verbose("symlink %s %s", linktarget, LONGLINKNAME); if (symlink(linktarget, LONGLINKNAME) != 0) err(1, "%s", LONGLINKNAME); free(linktarget); } static void usage(void) { fprintf(stderr, "usage: %s [-v] tarfile\n", PROGNAME); exit(EXIT_FAILURE); } int main(int argc, char *argv[]) { const char *tarfilename; char *dirname; int opt, wstatus; pid_t pid; while ((opt = getopt(argc, argv, "v")) != -1) switch (opt) { case 'v': opt_v = true; break; default: usage(); } argc -= optind; argv += optind; if (argc != 1) usage(); tarfilename = *argv; if (asprintf(&dirname, "%s%s.XXXXXXXX", _PATH_TMP, PROGNAME) < 0) err(1, "asprintf()"); if (mkdtemp(dirname) == NULL) err(1, "%s", dirname); verbose("mkdir %s", dirname); /* fork a child to create the files */ if ((pid = fork()) < 0) err(1, "fork()"); if (pid == 0) { verbose("cd %s", dirname); if (chdir(dirname) != 0) err(1, "%s", dirname); verbose("umask 022"); umask(022); mktar(); verbose("cd -"); exit(0); } if (waitpid(pid, &wstatus, 0) < 0) err(1, "waitpid()"); if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) errx(1, "child failed"); /* fork a child to create the tarball */ if ((pid = fork()) < 0) err(1, "fork()"); if (pid == 0) { verbose("creating tarball"); execlp("tar", "tar", "-c", "-f", tarfilename, "-C", dirname, "--zstd", #if 0 "--options", "zstd:frame-per-file", #endif - ".", + "./" EMPTYDIRNAME "/../" NORMALFILENAME, + "./" SPARSEFILENAME, + "./" HARDLINKNAME, + "./" SHORTLINKNAME, + "./" SUBDIRNAME, + "./" LONGLINKNAME, NULL); err(1, "execlp()"); } if (waitpid(pid, &wstatus, 0) < 0) err(1, "waitpid()"); if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) errx(1, "child failed"); /* fork a child to delete everything */ if ((pid = fork()) < 0) err(1, "fork()"); if (pid == 0) { verbose("cd %s", dirname); if (chdir(dirname) != 0) err(1, "%s", dirname); verbose("rm %s", LONGLINKNAME); (void)unlink(LONGLINKNAME); verbose("rm %s", SHORTLINKNAME); (void)unlink(SHORTLINKNAME); verbose("rm %s", HARDLINKNAME); (void)unlink(HARDLINKNAME); verbose("rm %s", SPARSEFILENAME); (void)unlink(SPARSEFILENAME); - verbose("rm %s", SUBDIRNAME); + verbose("rmdir %s", EMPTYDIRNAME); + (void)rmdir(EMPTYDIRNAME); + verbose("rmdir %s", SUBDIRNAME); (void)rmdir(SUBDIRNAME); verbose("cd -"); exit(0); } if (waitpid(pid, &wstatus, 0) < 0) err(1, "waitpid()"); if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) errx(1, "child failed"); verbose("rmdir %s", dirname); (void)rmdir(dirname); exit(0); }