diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 078ea5e52312..6417b7dac16b 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1,2008 +1,2010 @@ /* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Prototypes for MSDOSFS vnode operations */ static vop_create_t msdosfs_create; static vop_mknod_t msdosfs_mknod; static vop_open_t msdosfs_open; static vop_close_t msdosfs_close; static vop_access_t msdosfs_access; static vop_getattr_t msdosfs_getattr; static vop_setattr_t msdosfs_setattr; static vop_read_t msdosfs_read; static vop_write_t msdosfs_write; static vop_fsync_t msdosfs_fsync; static vop_remove_t msdosfs_remove; static vop_link_t msdosfs_link; static vop_rename_t msdosfs_rename; static vop_mkdir_t msdosfs_mkdir; static vop_rmdir_t msdosfs_rmdir; static vop_symlink_t msdosfs_symlink; static vop_readdir_t msdosfs_readdir; static vop_bmap_t msdosfs_bmap; static vop_getpages_t msdosfs_getpages; static vop_strategy_t msdosfs_strategy; static vop_print_t msdosfs_print; static vop_pathconf_t msdosfs_pathconf; static vop_vptofh_t msdosfs_vptofh; /* * Some general notes: * * In the ufs filesystem the inodes, superblocks, and indirect blocks are * read/written using the vnode for the filesystem. Blocks that represent * the contents of a file are read/written using the vnode for the file * (including directories when they are read/written as files). This * presents problems for the dos filesystem because data that should be in * an inode (if dos had them) resides in the directory itself. Since we * must update directory entries without the benefit of having the vnode * for the directory we must use the vnode for the filesystem. This means * that when a directory is actually read/written (via read, write, or * readdir, or seek) we must use the vnode for the filesystem instead of * the vnode for the directory as would happen in ufs. This is to insure we * retrieve the correct block from the buffer cache since the hash value is * based upon the vnode address and the desired block number. */ /* * Create a regular file. On entry the directory to contain the file being * created is locked. We must release before we return. We must also free * the pathname buffer pointed at by cnp->cn_pnbuf, always on error. */ static int msdosfs_create(struct vop_create_args *ap) { struct componentname *cnp = ap->a_cnp; struct denode ndirent; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); struct timespec ts; int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap); #endif /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad; } /* * Create a directory entry for the file, then call createde() to * have it installed. NOTE: DOS files are always executable. We * use the absence of the owner write bit to make the file * readonly. */ memset(&ndirent, 0, sizeof(ndirent)); error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = ATTR_ARCHIVE; ndirent.de_LowerCase = 0; ndirent.de_StartCluster = 0; ndirent.de_FileSize = 0; ndirent.de_pmp = pdep->de_pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; vfs_timestamp(&ts); DETIMES(&ndirent, &ts, &ts, &ts); error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; *ap->a_vpp = DETOV(dep); if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(ap->a_dvp, *ap->a_vpp, cnp); return (0); bad: return (error); } static int msdosfs_mknod(struct vop_mknod_args *ap) { return (EINVAL); } static int msdosfs_open(struct vop_open_args *ap) { struct denode *dep = VTODE(ap->a_vp); vnode_create_vobject(ap->a_vp, dep->de_FileSize, ap->a_td); return 0; } static int msdosfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct timespec ts; VI_LOCK(vp); if (vp->v_usecount > 1) { vfs_timestamp(&ts); DETIMES(dep, &ts, &ts, &ts); } VI_UNLOCK(vp); return 0; } static int msdosfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; mode_t file_mode; accmode_t accmode = ap->a_accmode; file_mode = S_IRWXU|S_IRWXG|S_IRWXO; file_mode &= (vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask); /* * Disallow writing to directories and regular files if the * filesystem is read-only. */ if (accmode & VWRITE) { switch (vp->v_type) { case VREG: case VDIR: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: break; } } return (vaccess(vp->v_type, file_mode, pmp->pm_uid, pmp->pm_gid, ap->a_accmode, ap->a_cred)); } static int msdosfs_getattr(struct vop_getattr_args *ap) { struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; mode_t mode; struct timespec ts; u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); uint64_t fileid; vfs_timestamp(&ts); DETIMES(dep, &ts, &ts, &ts); vap->va_fsid = dev2udev(pmp->pm_dev); /* * The following computation of the fileid must be the same as that * used in msdosfs_readdir() to compute d_fileno. If not, pwd * doesn't work. */ if (dep->de_Attributes & ATTR_DIRECTORY) { fileid = (uint64_t)cntobn(pmp, dep->de_StartCluster) * dirsperblk; if (dep->de_StartCluster == MSDOSFSROOT) fileid = 1; } else { fileid = (uint64_t)cntobn(pmp, dep->de_dirclust) * dirsperblk; if (dep->de_dirclust == MSDOSFSROOT) fileid = (uint64_t)roottobn(pmp, 0) * dirsperblk; fileid += (uoff_t)dep->de_diroffset / sizeof(struct direntry); } vap->va_fileid = fileid; mode = S_IRWXU|S_IRWXG|S_IRWXO; if (dep->de_Attributes & ATTR_READONLY) mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH); vap->va_mode = mode & (ap->a_vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask); vap->va_uid = pmp->pm_uid; vap->va_gid = pmp->pm_gid; vap->va_nlink = 1; vap->va_rdev = NODEV; vap->va_size = dep->de_FileSize; fattime2timespec(dep->de_MDate, dep->de_MTime, 0, 0, &vap->va_mtime); vap->va_ctime = vap->va_mtime; if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) { fattime2timespec(dep->de_ADate, 0, 0, 0, &vap->va_atime); fattime2timespec(dep->de_CDate, dep->de_CTime, dep->de_CHun, 0, &vap->va_birthtime); } else { vap->va_atime = vap->va_mtime; vap->va_birthtime.tv_sec = -1; vap->va_birthtime.tv_nsec = 0; } vap->va_flags = 0; if (dep->de_Attributes & ATTR_ARCHIVE) vap->va_flags |= UF_ARCHIVE; if (dep->de_Attributes & ATTR_HIDDEN) vap->va_flags |= UF_HIDDEN; if (dep->de_Attributes & ATTR_READONLY) vap->va_flags |= UF_READONLY; if (dep->de_Attributes & ATTR_SYSTEM) vap->va_flags |= UF_SYSTEM; vap->va_gen = 0; vap->va_blocksize = pmp->pm_bpcluster; if (dep->de_StartCluster != MSDOSFSROOT) vap->va_bytes = (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask; else vap->va_bytes = 0; /* FAT12/FAT16 root dir in reserved area */ vap->va_type = ap->a_vp->v_type; vap->va_filerev = dep->de_modrev; return (0); } static int msdosfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; int error = 0; #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): vp %p, vap %p, cred %p\n", ap->a_vp, vap, cred); #endif /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): returning EINVAL\n"); printf(" va_type %d, va_nlink %llx, va_fsid %llx, va_fileid %llx\n", vap->va_type, (unsigned long long)vap->va_nlink, (unsigned long long)vap->va_fsid, (unsigned long long)vap->va_fileid); printf(" va_blocksize %lx, va_rdev %llx, va_bytes %llx, va_gen %lx\n", vap->va_blocksize, (unsigned long long)vap->va_rdev, (unsigned long long)vap->va_bytes, vap->va_gen); printf(" va_uid %x, va_gid %x\n", vap->va_uid, vap->va_gid); #endif return (EINVAL); } /* * We don't allow setting attributes on the root directory. * The special case for the root directory is because before * FAT32, the root directory didn't have an entry for itself * (and was otherwise special). With FAT32, the root * directory is not so special, but still doesn't have an * entry for itself. */ if (vp->v_vflag & VV_ROOT) return (EINVAL); if (vap->va_flags != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN); if (error) return (error); } /* * We are very inconsistent about handling unsupported * attributes. We ignored the access time and the * read and execute bits. We were strict for the other * attributes. */ if (vap->va_flags & ~(UF_ARCHIVE | UF_HIDDEN | UF_READONLY | UF_SYSTEM)) return EOPNOTSUPP; if (vap->va_flags & UF_ARCHIVE) dep->de_Attributes |= ATTR_ARCHIVE; else dep->de_Attributes &= ~ATTR_ARCHIVE; if (vap->va_flags & UF_HIDDEN) dep->de_Attributes |= ATTR_HIDDEN; else dep->de_Attributes &= ~ATTR_HIDDEN; /* We don't allow changing the readonly bit on directories. */ if (vp->v_type != VDIR) { if (vap->va_flags & UF_READONLY) dep->de_Attributes |= ATTR_READONLY; else dep->de_Attributes &= ~ATTR_READONLY; } if (vap->va_flags & UF_SYSTEM) dep->de_Attributes |= ATTR_SYSTEM; else dep->de_Attributes &= ~ATTR_SYSTEM; dep->de_flag |= DE_MODIFIED; } if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { uid_t uid; gid_t gid; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); uid = vap->va_uid; if (uid == (uid_t)VNOVAL) uid = pmp->pm_uid; gid = vap->va_gid; if (gid == (gid_t)VNOVAL) gid = pmp->pm_gid; if (cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid || (gid != pmp->pm_gid && !groupmember(gid, cred))) { error = priv_check_cred(cred, PRIV_VFS_CHOWN); if (error) return (error); } if (uid != pmp->pm_uid || gid != pmp->pm_gid) return EINVAL; } if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VREG: /* * Truncation is only supported for regular files, * Disallow it if the filesystem is read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support any file types except regular * files and directories in this file system, so * this (default) case is unreachable and can do * anything. Keep falling through to detrunc() * for now. */ break; } error = vn_rlimit_trunc(vap->va_size, td); if (error != 0) return (error); error = detrunc(dep, vap->va_size, 0, cred); if (error) return error; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = vn_utimes_perm(vp, vap, cred, td); if (error != 0) return (error); if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 && vap->va_atime.tv_sec != VNOVAL) { dep->de_flag &= ~DE_ACCESS; timespec2fattime(&vap->va_atime, 0, &dep->de_ADate, NULL, NULL); } if (vap->va_mtime.tv_sec != VNOVAL) { dep->de_flag &= ~DE_UPDATE; timespec2fattime(&vap->va_mtime, 0, &dep->de_MDate, &dep->de_MTime, NULL); } /* * We don't set the archive bit when modifying the time of * a directory to emulate the Windows/DOS behavior. */ if (vp->v_type != VDIR) dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } /* * DOS files only have the ability to have their writability * attribute set, so we use the owner write bit to set the readonly * attribute. */ if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN); if (error) return (error); } if (vp->v_type != VDIR) { /* We ignore the read and execute bits. */ if (vap->va_mode & S_IWUSR) dep->de_Attributes &= ~ATTR_READONLY; else dep->de_Attributes |= ATTR_READONLY; dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } } return (deupdat(dep, 0)); } static int msdosfs_read(struct vop_read_args *ap) { int error = 0; int blsize; int isadir; ssize_t orig_resid; u_int n; u_long diff; u_long on; daddr_t lbn; daddr_t rablock; int rasize; int seqcount; struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct uio *uio = ap->a_uio; /* * If they didn't ask for any data, then we are done. */ orig_resid = uio->uio_resid; if (orig_resid == 0) return (0); /* * The caller is supposed to ensure that * uio->uio_offset >= 0 and uio->uio_resid >= 0. * We don't need to check for large offsets as in ffs because * dep->de_FileSize <= MSDOSFS_FILESIZE_MAX < OFF_MAX, so large * offsets cannot cause overflow even in theory. */ seqcount = ap->a_ioflag >> IO_SEQSHIFT; isadir = dep->de_Attributes & ATTR_DIRECTORY; do { if (uio->uio_offset >= dep->de_FileSize) break; lbn = de_cluster(pmp, uio->uio_offset); rablock = lbn + 1; blsize = pmp->pm_bpcluster; on = uio->uio_offset & pmp->pm_crbomask; /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ if (isadir) { /* convert cluster # to block # */ error = pcbmap(dep, lbn, &lbn, 0, &blsize); if (error == E2BIG) { error = EINVAL; break; } else if (error) break; error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); } else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) { error = bread(vp, lbn, blsize, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, dep->de_FileSize, lbn, blsize, NOCRED, on + uio->uio_resid, seqcount, 0, &bp); } else if (seqcount > 1) { rasize = blsize; error = breadn(vp, lbn, blsize, &rablock, &rasize, 1, NOCRED, &bp); } else { error = bread(vp, lbn, blsize, NOCRED, &bp); } if (error) { brelse(bp); break; } diff = pmp->pm_bpcluster - on; n = diff > uio->uio_resid ? uio->uio_resid : diff; diff = dep->de_FileSize - uio->uio_offset; if (diff < n) n = diff; diff = blsize - bp->b_resid; if (diff < n) n = diff; error = vn_io_fault_uiomove(bp->b_data + on, (int) n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); if (!isadir && (error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) dep->de_flag |= DE_ACCESS; return (error); } /* * Write data to a file or directory. */ static int msdosfs_write(struct vop_write_args *ap) { int n; int croffset; ssize_t resid, r; u_long osize; int error = 0; u_long count; int seqcount; daddr_t bn, lastcn; struct buf *bp; int ioflag = ap->a_ioflag; struct uio *uio = ap->a_uio; struct vnode *vp = ap->a_vp; struct vnode *thisvp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct ucred *cred = ap->a_cred; #ifdef MSDOSFS_DEBUG printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n", vp, uio, ioflag, cred); printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n", dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster); #endif switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = dep->de_FileSize; thisvp = vp; break; case VDIR: return EISDIR; default: panic("msdosfs_write(): bad file type"); } /* * This is needed (unlike in ffs_write()) because we extend the * file outside of the loop but we don't want to extend the file * for writes of 0 bytes. */ if (uio->uio_resid == 0) return (0); /* * The caller is supposed to ensure that * uio->uio_offset >= 0 and uio->uio_resid >= 0. * * If they've exceeded their filesize limit, tell them about it. */ error = vn_rlimit_fsizex(vp, uio, MSDOSFS_FILESIZE_MAX, &r, uio->uio_td); if (error != 0) { vn_rlimit_fsizex_res(uio, r); return (error); } /* * If the offset we are starting the write at is beyond the end of * the file, then they've done a seek. Unix filesystems allow * files with holes in them, DOS doesn't so we must fill the hole * with zeroed blocks. */ if (uio->uio_offset > dep->de_FileSize) { error = deextend(dep, uio->uio_offset, cred); if (error != 0) { vn_rlimit_fsizex_res(uio, r); return (error); } } /* * Remember some values in case the write fails. */ resid = uio->uio_resid; osize = dep->de_FileSize; /* * If we write beyond the end of the file, extend it to its ultimate * size ahead of the time to hopefully get a contiguous area. */ if (uio->uio_offset + resid > osize) { count = de_clcount(pmp, uio->uio_offset + resid) - de_clcount(pmp, osize); error = extendfile(dep, count, NULL, NULL, 0); if (error && (error != ENOSPC || (ioflag & IO_UNIT))) goto errexit; lastcn = dep->de_fc[FC_LASTFC].fc_frcn; } else lastcn = de_clcount(pmp, osize) - 1; seqcount = ioflag >> IO_SEQSHIFT; do { if (de_cluster(pmp, uio->uio_offset) > lastcn) { error = ENOSPC; break; } croffset = uio->uio_offset & pmp->pm_crbomask; n = min(uio->uio_resid, pmp->pm_bpcluster - croffset); if (uio->uio_offset + n > dep->de_FileSize) { dep->de_FileSize = uio->uio_offset + n; /* The object size needs to be set before buffer is allocated */ vnode_pager_setsize(vp, dep->de_FileSize); } bn = de_cluster(pmp, uio->uio_offset); if ((uio->uio_offset & pmp->pm_crbomask) == 0 && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > de_cluster(pmp, uio->uio_offset) || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) { /* * If either the whole cluster gets written, * or we write the cluster from its start beyond EOF, * then no need to read data from disk. */ bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0); /* * This call to vfs_bio_clrbuf() ensures that * even if vn_io_fault_uiomove() below faults, * garbage from the newly instantiated buffer * is not exposed to the userspace via mmap(). */ vfs_bio_clrbuf(bp); /* * Do the bmap now, since pcbmap needs buffers * for the FAT table. (see msdosfs_strategy) */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &bn, 0, 0); if (error) bp->b_blkno = -1; else bp->b_blkno = bn; } if (bp->b_blkno == -1) { brelse(bp); if (!error) error = EIO; /* XXX */ break; } } else { /* * The block we need to write into exists, so read it in. */ error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp); if (error) { break; } } /* * Should these vnode_pager_* functions be done on dir * files? */ /* * Copy the data from user space into the buf header. */ error = vn_io_fault_uiomove(bp->b_data + croffset, n, uio); if (error) { brelse(bp); break; } /* Prepare for clustered writes in some else clauses. */ if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) bp->b_flags |= B_CLUSTEROK; /* * If IO_SYNC, then each buffer is written synchronously. * Otherwise, if we have a severe page deficiency then * write the buffer asynchronously. Otherwise, if on a * cluster boundary then write the buffer asynchronously, * combining it with contiguous clusters if permitted and * possible, since we don't expect more writes into this * buffer soon. Otherwise, do a delayed write because we * expect more writes into this buffer soon. */ if (ioflag & IO_SYNC) (void)bwrite(bp); else if (vm_page_count_severe() || buf_dirty_count_severe()) bawrite(bp); else if (n + croffset == pmp->pm_bpcluster) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) cluster_write(vp, &dep->de_clusterw, bp, dep->de_FileSize, seqcount, 0); else bawrite(bp); } else bdwrite(bp); dep->de_flag |= DE_UPDATE; } while (error == 0 && uio->uio_resid > 0); /* * If the write failed and they want us to, truncate the file back * to the size it was before the write was attempted. */ errexit: if (error) { if (ioflag & IO_UNIT) { detrunc(dep, osize, ioflag & IO_SYNC, NOCRED); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } else { detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED); if (uio->uio_resid != resid) error = 0; } } else if (ioflag & IO_SYNC) error = deupdat(dep, 1); vn_rlimit_fsizex_res(uio, r); return (error); } /* * Flush the blocks of a file to disk. */ static int msdosfs_fsync(struct vop_fsync_args *ap) { struct vnode *devvp; int allerror, error; vop_stdfsync(ap); /* * If the syncing request comes from fsync(2), sync the entire * FAT and any other metadata that happens to be on devvp. We * need this mainly for the FAT. We write the FAT sloppily, and * syncing it all now is the best we can easily do to get all * directory entries associated with the file (not just the file) * fully synced. The other metadata includes critical metadata * for all directory entries, but only in the MNT_ASYNC case. We * will soon sync all metadata in the file's directory entry. * Non-critical metadata for associated directory entries only * gets synced accidentally, as in most file systems. */ if (ap->a_waitfor != MNT_NOWAIT) { devvp = VTODE(ap->a_vp)->de_pmp->pm_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); allerror = VOP_FSYNC(devvp, MNT_WAIT, ap->a_td); VOP_UNLOCK(devvp); } else allerror = 0; error = deupdat(VTODE(ap->a_vp), ap->a_waitfor != MNT_NOWAIT); if (allerror == 0) allerror = error; return (allerror); } static int msdosfs_remove(struct vop_remove_args *ap) { struct denode *dep = VTODE(ap->a_vp); struct denode *ddep = VTODE(ap->a_dvp); int error; if (ap->a_vp->v_type == VDIR) error = EPERM; else error = removede(ddep, dep); #ifdef MSDOSFS_DEBUG printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount); #endif return (error); } /* * DOS filesystems don't know what links are. */ static int msdosfs_link(struct vop_link_args *ap) { return (EOPNOTSUPP); } /* * Renames on files require moving the denode to a new hash queue since the * denode's location is used to compute which hash queue to put the file * in. Unless it is a rename in place. For example "mv a b". * * What follows is the basic algorithm: * * if (file move) { * if (dest file exists) { * remove dest file * } * if (dest and src in same directory) { * rewrite name in existing directory slot * } else { * write new entry in dest directory * update offset and dirclust in denode * move denode to new hash chain * clear old directory entry * } * } else { * directory move * if (dest directory exists) { * if (dest is not empty) { * return ENOTEMPTY * } * remove dest directory * } * if (dest and src in same directory) { * rewrite name in existing entry * } else { * be sure dest is not a child of src directory * write entry in dest directory * update "." and ".." in moved directory * clear old directory entry for moved directory * } * } * * On entry: * source's parent directory is unlocked * source file or directory is unlocked * destination's parent directory is locked * destination file or directory is locked if it exists * * On exit: * all denodes should be released */ static int msdosfs_rename(struct vop_rename_args *ap) { struct vnode *fdvp, *fvp, *tdvp, *tvp, *vp; struct componentname *fcnp, *tcnp; struct denode *fdip, *fip, *tdip, *tip, *nip; u_char toname[12], oldname[11]; u_long to_diroffset; bool checkpath_locked, doingdirectory, newparent; int error; u_long cn, pcl, blkoff; daddr_t bn, wait_scn, scn; struct msdosfsmount *pmp; struct direntry *dotdotp; struct buf *bp; tdvp = ap->a_tdvp; fvp = ap->a_fvp; fdvp = ap->a_fdvp; tvp = ap->a_tvp; tcnp = ap->a_tcnp; fcnp = ap->a_fcnp; pmp = VFSTOMSDOSFS(fdvp->v_mount); /* * Check for cross-device rename. */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto abortit; } /* * If source and dest are the same, do nothing. */ if (tvp == fvp) { error = 0; goto abortit; } /* * When the target exists, both the directory * and target vnodes are passed locked. */ VOP_UNLOCK(tdvp); if (tvp != NULL && tvp != tdvp) VOP_UNLOCK(tvp); checkpath_locked = false; relock: doingdirectory = newparent = false; error = vn_lock(fdvp, LK_EXCLUSIVE); if (error != 0) goto releout; if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(fdvp); error = vn_lock(tdvp, LK_EXCLUSIVE); if (error != 0) goto releout; VOP_UNLOCK(tdvp); goto relock; } error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff); if (error != 0) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); goto releout; } error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT, &nip); if (error != 0) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); if (error != EBUSY) goto releout; error = deget(pmp, scn, blkoff, LK_EXCLUSIVE, &nip); if (error != 0) goto releout; vp = fvp; fvp = DETOV(nip); VOP_UNLOCK(fvp); vrele(vp); goto relock; } vrele(fvp); fvp = DETOV(nip); error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff); if (error != 0 && error != EJUSTRETURN) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); goto releout; } if (error == EJUSTRETURN && tvp != NULL) { vrele(tvp); tvp = NULL; } if (error == 0) { nip = NULL; error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT, &nip); if (tvp != NULL) { vrele(tvp); tvp = NULL; } if (error != 0) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); if (error != EBUSY) goto releout; error = deget(pmp, scn, blkoff, LK_EXCLUSIVE, &nip); if (error != 0) goto releout; vput(DETOV(nip)); goto relock; } tvp = DETOV(nip); } fdip = VTODE(fdvp); fip = VTODE(fvp); tdip = VTODE(tdvp); tip = tvp != NULL ? VTODE(tvp) : NULL; /* * Remember direntry place to use for destination */ to_diroffset = tdip->de_fndoffset; /* * Be sure we are not renaming ".", "..", or an alias of ".". This * leads to a crippled directory tree. It's pretty tough to do a * "ls" or "pwd" with the "." directory entry missing, and "cd .." * doesn't work if the ".." entry is missing. */ if ((fip->de_Attributes & ATTR_DIRECTORY) != 0) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || fdip == fip || (fcnp->cn_flags & ISDOTDOT) != 0 || (tcnp->cn_flags & ISDOTDOT) != 0) { error = EINVAL; goto unlock; } doingdirectory = true; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to doscheckpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread); if (fdip->de_StartCluster != tdip->de_StartCluster) newparent = true; if (doingdirectory && newparent) { if (error != 0) /* write access check above */ goto unlock; lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL); checkpath_locked = true; error = doscheckpath(fip, tdip, &wait_scn); if (wait_scn != 0) { lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); if (tvp != NULL && tvp != tdvp) VOP_UNLOCK(tvp); error = deget(pmp, wait_scn, 0, LK_EXCLUSIVE, &nip); if (error == 0) { vput(DETOV(nip)); goto relock; } } if (error != 0) goto unlock; } if (tip != NULL) { /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((tip->de_Attributes & ATTR_DIRECTORY) != 0) { if (!dosdirempty(tip)) { error = ENOTEMPTY; goto unlock; } if (!doingdirectory) { error = ENOTDIR; goto unlock; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto unlock; } error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff); MPASS(error == 0); error = removede(tdip, tip); if (error != 0) goto unlock; vput(tvp); tvp = NULL; tip = NULL; } /* * Convert the filename in tcnp into a dos filename. We copy this * into the denode and directory entry for the destination * file/directory. */ error = uniqdosname(tdip, tcnp, toname); if (error != 0) goto unlock; /* * First write a new entry in the destination * directory and mark the entry in the source directory * as deleted. Then move the denode to the correct hash * chain for its new location in the filesystem. And, if * we moved a directory, then update its .. entry to point * to the new parent directory. */ memcpy(oldname, fip->de_Name, 11); memcpy(fip->de_Name, toname, 11); /* update denode */ error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff); if (error == EJUSTRETURN) { tdip->de_fndoffset = to_diroffset; error = createde(fip, tdip, NULL, tcnp); } if (error != 0) { memcpy(fip->de_Name, oldname, 11); goto unlock; } /* * If fip is for a directory, then its name should always * be "." since it is for the directory entry in the * directory itself (msdosfs_lookup() always translates * to the "." entry so as to get a unique denode, except * for the root directory there are different * complications). However, we just corrupted its name * to pass the correct name to createde(). Undo this. */ if ((fip->de_Attributes & ATTR_DIRECTORY) != 0) memcpy(fip->de_Name, oldname, 11); fip->de_refcnt++; error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff); MPASS(error == 0); error = removede(fdip, fip); if (error != 0) { printf("%s: removede %s %s err %d\n", pmp->pm_mountp->mnt_stat.f_mntonname, fdip->de_Name, fip->de_Name, error); msdosfs_integrity_error(pmp); goto unlock; } if (!doingdirectory) { error = pcbmap(tdip, de_cluster(pmp, to_diroffset), 0, &fip->de_dirclust, 0); if (error != 0) { /* * XXX should downgrade to ro here, * fs is corrupt */ goto unlock; } if (fip->de_dirclust == MSDOSFSROOT) fip->de_diroffset = to_diroffset; else fip->de_diroffset = to_diroffset & pmp->pm_crbomask; } reinsert(fip); /* * If we moved a directory to a new parent directory, then we must * fixup the ".." entry in the moved directory. */ if (doingdirectory && newparent) { cn = fip->de_StartCluster; if (cn == MSDOSFSROOT) { /* this should never happen */ panic("msdosfs_rename(): updating .. in root directory?"); } else bn = cntobn(pmp, cn); error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, NOCRED, &bp); if (error != 0) { printf("%s: block read error %d while renaming dir\n", pmp->pm_mountp->mnt_stat.f_mntonname, error); msdosfs_integrity_error(pmp); goto unlock; } dotdotp = (struct direntry *)bp->b_data + 1; pcl = tdip->de_StartCluster; if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) pcl = MSDOSFSROOT; putushort(dotdotp->deStartCluster, pcl); if (FAT32(pmp)) putushort(dotdotp->deHighClust, pcl >> 16); if (DOINGASYNC(fvp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) { printf("%s: block write error %d while renaming dir\n", pmp->pm_mountp->mnt_stat.f_mntonname, error); msdosfs_integrity_error(pmp); goto unlock; } } /* * The msdosfs lookup is case insensitive. Several aliases may * be inserted for a single directory entry. As a consequnce, * name cache purge done by lookup for fvp when DELETE op for * namei is specified, might be not enough to expunge all * namecache entries that were installed for this direntry. */ cache_purge(fvp); unlock: if (checkpath_locked) lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); vput(fdvp); vput(fvp); if (tvp != NULL) { if (tvp != tdvp) vput(tvp); else vrele(tvp); } vput(tdvp); return (error); releout: MPASS(!checkpath_locked); vrele(tdvp); if (tvp != NULL) vrele(tvp); vrele(fdvp); vrele(fvp); return (error); abortit: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp != NULL) vput(tvp); vrele(fdvp); vrele(fvp); return (error); } static struct { struct direntry dot; struct direntry dotdot; } dosdirtemplate = { { ". ", /* the . entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ }, { ".. ", /* the .. entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ } }; static int msdosfs_mkdir(struct vop_mkdir_args *ap) { struct componentname *cnp = ap->a_cnp; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); struct direntry *denp; struct msdosfsmount *pmp = pdep->de_pmp; struct buf *bp; u_long newcluster, pcl; int bn; int error; struct denode ndirent; struct timespec ts; /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad2; } /* * Allocate a cluster to hold the about to be created directory. */ error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL); if (error) goto bad2; memset(&ndirent, 0, sizeof(ndirent)); ndirent.de_pmp = pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; vfs_timestamp(&ts); DETIMES(&ndirent, &ts, &ts, &ts); /* * Now fill the cluster with the "." and ".." entries. And write * the cluster to disk. This way it is there for the parent * directory to be pointing at if there were a crash. */ bn = cntobn(pmp, newcluster); /* always succeeds */ bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0, 0); memset(bp->b_data, 0, pmp->pm_bpcluster); memcpy(bp->b_data, &dosdirtemplate, sizeof dosdirtemplate); denp = (struct direntry *)bp->b_data; putushort(denp[0].deStartCluster, newcluster); putushort(denp[0].deCDate, ndirent.de_CDate); putushort(denp[0].deCTime, ndirent.de_CTime); denp[0].deCHundredth = ndirent.de_CHun; putushort(denp[0].deADate, ndirent.de_ADate); putushort(denp[0].deMDate, ndirent.de_MDate); putushort(denp[0].deMTime, ndirent.de_MTime); pcl = pdep->de_StartCluster; /* * Although the root directory has a non-magic starting cluster * number for FAT32, chkdsk and fsck_msdosfs still require * references to it in dotdot entries to be magic. */ if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) pcl = MSDOSFSROOT; putushort(denp[1].deStartCluster, pcl); putushort(denp[1].deCDate, ndirent.de_CDate); putushort(denp[1].deCTime, ndirent.de_CTime); denp[1].deCHundredth = ndirent.de_CHun; putushort(denp[1].deADate, ndirent.de_ADate); putushort(denp[1].deMDate, ndirent.de_MDate); putushort(denp[1].deMTime, ndirent.de_MTime); if (FAT32(pmp)) { putushort(denp[0].deHighClust, newcluster >> 16); putushort(denp[1].deHighClust, pcl >> 16); } if (DOINGASYNC(ap->a_dvp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto bad; /* * Now build up a directory entry pointing to the newly allocated * cluster. This will be written to an empty slot in the parent * directory. */ error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = ATTR_DIRECTORY; ndirent.de_LowerCase = 0; ndirent.de_StartCluster = newcluster; ndirent.de_FileSize = 0; error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; *ap->a_vpp = DETOV(dep); return (0); bad: clusterfree(pmp, newcluster); bad2: return (error); } static int msdosfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct denode *ip, *dp; int error; ip = VTODE(vp); dp = VTODE(dvp); /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ error = 0; if (!dosdirempty(ip)) { error = ENOTEMPTY; goto out; } /* * Delete the entry from the directory. For dos filesystems this * gets rid of the directory entry on disk, the in memory copy * still exists but the de_refcnt is <= 0. This prevents it from * being found by deget(). When the vput() on dep is done we give * up access and eventually msdosfs_reclaim() will be called which * will remove it from the denode cache. */ error = removede(dp, ip); if (error) goto out; /* * This is where we decrement the link count in the parent * directory. Since dos filesystems don't do this we just purge * the name cache. */ cache_purge(dvp); /* * Truncate the directory that is being deleted. */ error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred); cache_purge(vp); out: return (error); } /* * DOS filesystems don't know what symlinks are. */ static int msdosfs_symlink(struct vop_symlink_args *ap) { return (EOPNOTSUPP); } static int msdosfs_readdir(struct vop_readdir_args *ap) { struct mbnambuf nb; int error = 0; int diff; long n; int blsize; long on; u_long cn; u_long dirsperblk; long bias = 0; daddr_t bn, lbn; struct buf *bp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct direntry *dentp; struct dirent dirbuf; struct uio *uio = ap->a_uio; uint64_t *cookies = NULL; int ncookies = 0; off_t offset, off; int chksum = -1; #ifdef MSDOSFS_DEBUG printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n", ap->a_vp, uio, ap->a_cred, ap->a_eofflag); #endif /* * msdosfs_readdir() won't operate properly on regular files since * it does i/o only with the filesystem vnode, and hence can * retrieve the wrong block from the buffer cache for a plain file. * So, fail attempts to readdir() on a plain file. */ if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) return (ENOTDIR); /* * To be safe, initialize dirbuf */ memset(dirbuf.d_name, 0, sizeof(dirbuf.d_name)); /* * If the user buffer is smaller than the size of one dos directory * entry or the file offset is not a multiple of the size of a * directory entry, then we fail the read. */ off = offset = uio->uio_offset; if (uio->uio_resid < sizeof(struct direntry) || (offset & (sizeof(struct direntry) - 1))) return (EINVAL); if (ap->a_ncookies) { ncookies = uio->uio_resid / 16; cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); *ap->a_cookies = cookies; *ap->a_ncookies = ncookies; } dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); /* * If they are reading from the root directory then, we simulate * the . and .. entries since these don't exist in the root * directory. We also set the offset bias to make up for having to * simulate these entries. By this I mean that at file offset 64 we * read the first entry in the root directory that lives on disk. */ if (dep->de_StartCluster == MSDOSFSROOT || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) { #if 0 printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n", offset); #endif bias = 2 * sizeof(struct direntry); if (offset < bias) { for (n = (int)offset / sizeof(struct direntry); n < 2; n++) { dirbuf.d_fileno = FAT32(pmp) ? (uint64_t)cntobn(pmp, pmp->pm_rootdirblk) * dirsperblk : 1; dirbuf.d_type = DT_DIR; switch (n) { case 0: dirbuf.d_namlen = 1; dirbuf.d_name[0] = '.'; break; case 1: dirbuf.d_namlen = 2; dirbuf.d_name[0] = '.'; dirbuf.d_name[1] = '.'; break; } dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); /* NOTE: d_off is the offset of the *next* entry. */ dirbuf.d_off = offset + sizeof(struct direntry); dirent_terminate(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) goto out; error = uiomove(&dirbuf, dirbuf.d_reclen, uio); if (error) goto out; offset += sizeof(struct direntry); off = offset; if (cookies) { *cookies++ = offset; if (--ncookies <= 0) goto out; } } } } mbnambuf_init(&nb); off = offset; while (uio->uio_resid > 0) { lbn = de_cluster(pmp, offset - bias); on = (offset - bias) & pmp->pm_crbomask; n = min(pmp->pm_bpcluster - on, uio->uio_resid); diff = dep->de_FileSize - (offset - bias); if (diff <= 0) break; n = min(n, diff); error = pcbmap(dep, lbn, &bn, &cn, &blsize); if (error) break; error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { return (error); } n = min(n, blsize - bp->b_resid); if (n == 0) { brelse(bp); return (EIO); } /* * Convert from dos directory entries to fs-independent * directory entries. */ for (dentp = (struct direntry *)(bp->b_data + on); (char *)dentp < bp->b_data + on + n; dentp++, offset += sizeof(struct direntry)) { #if 0 printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n", dentp, prev, crnt, dentp->deName[0], dentp->deAttributes); #endif /* * If this is an unused entry, we can stop. */ if (dentp->deName[0] == SLOT_EMPTY) { brelse(bp); goto out; } /* * Skip deleted entries. */ if (dentp->deName[0] == SLOT_DELETED) { chksum = -1; mbnambuf_init(&nb); continue; } /* * Handle Win95 long directory entries */ if (dentp->deAttributes == ATTR_WIN95) { if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; chksum = win2unixfn(&nb, (struct winentry *)dentp, chksum, pmp); continue; } /* * Skip volume labels */ if (dentp->deAttributes & ATTR_VOLUME) { chksum = -1; mbnambuf_init(&nb); continue; } /* * This computation of d_fileno must match * the computation of va_fileid in * msdosfs_getattr. */ if (dentp->deAttributes & ATTR_DIRECTORY) { cn = getushort(dentp->deStartCluster); if (FAT32(pmp)) { cn |= getushort(dentp->deHighClust) << 16; if (cn == MSDOSFSROOT) cn = pmp->pm_rootdirblk; } if (cn == MSDOSFSROOT && !FAT32(pmp)) dirbuf.d_fileno = 1; else dirbuf.d_fileno = cntobn(pmp, cn) * dirsperblk; dirbuf.d_type = DT_DIR; } else { dirbuf.d_fileno = (uoff_t)offset / sizeof(struct direntry); dirbuf.d_type = DT_REG; } if (chksum != winChksum(dentp->deName)) { dirbuf.d_namlen = dos2unixfn(dentp->deName, (u_char *)dirbuf.d_name, dentp->deLowerCase | ((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ? (LCASE_BASE | LCASE_EXT) : 0), pmp); mbnambuf_init(&nb); } else mbnambuf_flush(&nb, &dirbuf); chksum = -1; dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); /* NOTE: d_off is the offset of the *next* entry. */ dirbuf.d_off = offset + sizeof(struct direntry); dirent_terminate(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) { brelse(bp); goto out; } error = uiomove(&dirbuf, dirbuf.d_reclen, uio); if (error) { brelse(bp); goto out; } if (cookies) { *cookies++ = offset + sizeof(struct direntry); if (--ncookies <= 0) { brelse(bp); goto out; } } off = offset + sizeof(struct direntry); } brelse(bp); } out: /* Subtract unused cookies */ if (ap->a_ncookies) *ap->a_ncookies -= ncookies; uio->uio_offset = off; /* * Set the eofflag (NFS uses it) */ if (ap->a_eofflag) { if (dep->de_FileSize - (offset - bias) <= 0) *ap->a_eofflag = 1; else *ap->a_eofflag = 0; } return (error); } /*- * a_vp - pointer to the file's vnode * a_bn - logical block number within the file (cluster number for us) * a_bop - where to return the bufobj of the special file containing the fs * a_bnp - where to return the "physical" block number corresponding to a_bn * (relative to the special file; units are blocks of size DEV_BSIZE) * a_runp - where to return the "run past" a_bn. This is the count of logical * blocks whose physical blocks (together with a_bn's physical block) * are contiguous. * a_runb - where to return the "run before" a_bn. */ static int msdosfs_bmap(struct vop_bmap_args *ap) { struct fatcache savefc; struct denode *dep; struct mount *mp; struct msdosfsmount *pmp; struct vnode *vp; daddr_t runbn; u_long cn; int bnpercn, error, maxio, maxrun, run; vp = ap->a_vp; dep = VTODE(vp); pmp = dep->de_pmp; if (ap->a_bop != NULL) *ap->a_bop = &pmp->pm_devvp->v_bufobj; if (ap->a_bnp == NULL) return (0); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; cn = ap->a_bn; if (cn != ap->a_bn) return (EFBIG); error = pcbmap(dep, cn, ap->a_bnp, NULL, NULL); if (error != 0 || (ap->a_runp == NULL && ap->a_runb == NULL)) return (error); /* * Prepare to back out updates of the fatchain cache after the one * for the first block done by pcbmap() above. Without the backout, * then whenever the caller doesn't do i/o to all of the blocks that * we find, the single useful cache entry would be too far in advance * of the actual i/o to work for the next sequential i/o. Then the * FAT would be searched from the beginning. With the backout, the * FAT is searched starting at most a few blocks early. This wastes * much less time. Time is also wasted finding more blocks than the * caller will do i/o to. This is necessary because the runlength * parameters are output-only. */ savefc = dep->de_fc[FC_LASTMAP]; mp = vp->v_mount; maxio = mp->mnt_iosize_max / mp->mnt_stat.f_iosize; bnpercn = de_cn2bn(pmp, 1); if (ap->a_runp != NULL) { maxrun = ulmin(maxio - 1, pmp->pm_maxcluster - cn); for (run = 1; run <= maxrun; run++) { if (pcbmap(dep, cn + run, &runbn, NULL, NULL) != 0 || runbn != *ap->a_bnp + run * bnpercn) break; } *ap->a_runp = run - 1; } if (ap->a_runb != NULL) { maxrun = ulmin(maxio - 1, cn); for (run = 1; run < maxrun; run++) { if (pcbmap(dep, cn - run, &runbn, NULL, NULL) != 0 || runbn != *ap->a_bnp - run * bnpercn) break; } *ap->a_runb = run - 1; } dep->de_fc[FC_LASTMAP] = savefc; return (0); } SYSCTL_NODE(_vfs, OID_AUTO, msdosfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "msdos filesystem"); static int use_buf_pager = 1; SYSCTL_INT(_vfs_msdosfs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0, "Use buffer pager instead of bmap"); static daddr_t msdosfs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) { return (de_cluster(VTODE(vp)->de_pmp, off)); } static int msdosfs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz) { *sz = VTODE(vp)->de_pmp->pm_bpcluster; return (0); } static int msdosfs_getpages(struct vop_getpages_args *ap) { if (use_buf_pager) return (vfs_bio_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, msdosfs_gbp_getblkno, msdosfs_gbp_getblksz)); return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL)); } static int msdosfs_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct denode *dep = VTODE(ap->a_vp); struct bufobj *bo; int error = 0; daddr_t blkno; /* * If we don't already know the filesystem relative block number * then get it using pcbmap(). If pcbmap() returns the block * number as -1 then we've got a hole in the file. DOS filesystems * don't allow files with holes, so we shouldn't ever see this. */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &blkno, 0, 0); bp->b_blkno = blkno; if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if (bp->b_blkno == -1) { bufdone(bp); return (0); } /* * Read/write the block from/to the disk that contains the desired * file block. */ bp->b_iooffset = dbtob(bp->b_blkno); bo = dep->de_pmp->pm_bo; BO_STRATEGY(bo, bp); return (0); } static int msdosfs_print(struct vop_print_args *ap) { struct denode *dep = VTODE(ap->a_vp); printf("\tstartcluster %lu, dircluster %lu, diroffset %lu, ", dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset); printf("on dev %s\n", devtoname(dep->de_pmp->pm_dev)); return (0); } static int msdosfs_pathconf(struct vop_pathconf_args *ap) { struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp; switch (ap->a_name) { case _PC_FILESIZEBITS: *ap->a_retval = 32; return (0); case _PC_LINK_MAX: *ap->a_retval = 1; return (0); case _PC_NAME_MAX: *ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_NO_TRUNC: *ap->a_retval = 0; return (0); default: return (vop_stdpathconf(ap)); } /* NOTREACHED */ } static int msdosfs_vptofh(struct vop_vptofh_args *ap) { struct denode *dep; struct defid *defhp; + _Static_assert(sizeof(struct defid) <= sizeof(struct fid), + "struct defid cannot be larger than struct fid"); dep = VTODE(ap->a_vp); defhp = (struct defid *)ap->a_fhp; defhp->defid_len = sizeof(struct defid); defhp->defid_dirclust = dep->de_dirclust; defhp->defid_dirofs = dep->de_diroffset; /* defhp->defid_gen = dep->de_gen; */ return (0); } /* Global vfs data structures for msdosfs */ struct vop_vector msdosfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = msdosfs_access, .vop_bmap = msdosfs_bmap, .vop_getpages = msdosfs_getpages, .vop_cachedlookup = msdosfs_lookup, .vop_open = msdosfs_open, .vop_close = msdosfs_close, .vop_create = msdosfs_create, .vop_fsync = msdosfs_fsync, .vop_fdatasync = vop_stdfdatasync_buf, .vop_getattr = msdosfs_getattr, .vop_inactive = msdosfs_inactive, .vop_link = msdosfs_link, .vop_lookup = vfs_cache_lookup, .vop_mkdir = msdosfs_mkdir, .vop_mknod = msdosfs_mknod, .vop_pathconf = msdosfs_pathconf, .vop_print = msdosfs_print, .vop_read = msdosfs_read, .vop_readdir = msdosfs_readdir, .vop_reclaim = msdosfs_reclaim, .vop_remove = msdosfs_remove, .vop_rename = msdosfs_rename, .vop_rmdir = msdosfs_rmdir, .vop_setattr = msdosfs_setattr, .vop_strategy = msdosfs_strategy, .vop_symlink = msdosfs_symlink, .vop_write = msdosfs_write, .vop_vptofh = msdosfs_vptofh, }; VFS_VOP_VECTOR_REGISTER(msdosfs_vnodeops); diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index 428c31f3c59a..162977b8abf7 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -1,2232 +1,2234 @@ /* $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * tmpfs vnode interface. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_vfs_tmpfs); VFS_SMR_DECLARE; static volatile int tmpfs_rename_restarts; SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD, __DEVOLATILE(int *, &tmpfs_rename_restarts), 0, "Times rename had to restart due to lock contention"); MALLOC_DEFINE(M_TMPFSEA, "tmpfs extattr", "tmpfs extattr structure"); static int tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **rvp) { return (tmpfs_alloc_vp(mp, arg, lkflags, rvp)); } static int tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct tmpfs_dirent *de; struct tmpfs_node *dnode, *pnode; struct tmpfs_mount *tm; int error; /* Caller assumes responsibility for ensuring access (VEXEC). */ dnode = VP_TO_TMPFS_DIR(dvp); *vpp = NULLVP; /* We cannot be requesting the parent directory of the root node. */ MPASS(IMPLIES(dnode->tn_type == VDIR && dnode->tn_dir.tn_parent == dnode, !(cnp->cn_flags & ISDOTDOT))); TMPFS_ASSERT_LOCKED(dnode); if (dnode->tn_dir.tn_parent == NULL) { error = ENOENT; goto out; } if (cnp->cn_flags & ISDOTDOT) { tm = VFS_TO_TMPFS(dvp->v_mount); pnode = dnode->tn_dir.tn_parent; tmpfs_ref_node(pnode); error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc, pnode, cnp->cn_lkflags, vpp); tmpfs_free_node(tm, pnode); if (error != 0) goto out; } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { VREF(dvp); *vpp = dvp; error = 0; } else { de = tmpfs_dir_lookup(dnode, NULL, cnp); if (de != NULL && de->td_node == NULL) cnp->cn_flags |= ISWHITEOUT; if (de == NULL || de->td_node == NULL) { /* * The entry was not found in the directory. * This is OK if we are creating or renaming an * entry and are working on the last component of * the path name. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == CREATE || \ cnp->cn_nameiop == RENAME || (cnp->cn_nameiop == DELETE && cnp->cn_flags & DOWHITEOUT && cnp->cn_flags & ISWHITEOUT))) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, curthread); if (error != 0) goto out; error = EJUSTRETURN; } else error = ENOENT; } else { struct tmpfs_node *tnode; /* * The entry was found, so get its associated * tmpfs_node. */ tnode = de->td_node; /* * If we are not at the last path component and * found a non-directory or non-link entry (which * may itself be pointing to a directory), raise * an error. */ if ((tnode->tn_type != VDIR && tnode->tn_type != VLNK) && !(cnp->cn_flags & ISLASTCN)) { error = ENOTDIR; goto out; } /* * If we are deleting or renaming the entry, keep * track of its tmpfs_dirent so that it can be * easily deleted later. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, curthread); if (error != 0) goto out; /* Allocate a new vnode on the matching entry. */ error = tmpfs_alloc_vp(dvp->v_mount, tnode, cnp->cn_lkflags, vpp); if (error != 0) goto out; if ((dnode->tn_mode & S_ISTXT) && VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, curthread) && VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, curthread)) { error = EPERM; vput(*vpp); *vpp = NULL; goto out; } } else { error = tmpfs_alloc_vp(dvp->v_mount, tnode, cnp->cn_lkflags, vpp); if (error != 0) goto out; } } } /* * Store the result of this lookup in the cache. Avoid this if the * request was for creation, as it does not improve timings on * emprical tests. */ if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp)) cache_enter(dvp, *vpp, cnp); out: #ifdef INVARIANTS /* * If there were no errors, *vpp cannot be null and it must be * locked. */ if (error == 0) { MPASS(*vpp != NULLVP); ASSERT_VOP_LOCKED(*vpp, __func__); } else { MPASS(*vpp == NULL); } #endif return (error); } static int tmpfs_cached_lookup(struct vop_cachedlookup_args *v) { return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp)); } static int tmpfs_lookup(struct vop_lookup_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; int error; /* Check accessibility of requested node as a first step. */ error = vn_dir_check_exec(dvp, cnp); if (error != 0) return (error); return (tmpfs_lookup1(dvp, vpp, cnp)); } static int tmpfs_create(struct vop_create_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; int error; MPASS(vap->va_type == VREG || vap->va_type == VSOCK); error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp)) cache_enter(dvp, *vpp, cnp); return (error); } static int tmpfs_mknod(struct vop_mknod_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; if (vap->va_type != VBLK && vap->va_type != VCHR && vap->va_type != VFIFO) return (EINVAL); return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL)); } struct fileops tmpfs_fnops; static int tmpfs_open(struct vop_open_args *v) { struct vnode *vp; struct tmpfs_node *node; struct file *fp; int error, mode; vp = v->a_vp; mode = v->a_mode; node = VP_TO_TMPFS_NODE(vp); /* * The file is still active but all its names have been removed * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as * it is about to die. */ if (node->tn_links < 1) return (ENOENT); /* If the file is marked append-only, deny write requests. */ if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE) error = EPERM; else { error = 0; /* For regular files, the call below is nop. */ KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags & OBJ_DEAD) == 0, ("dead object")); vnode_create_vobject(vp, node->tn_size, v->a_td); } fp = v->a_fp; MPASS(fp == NULL || fp->f_data == NULL); if (error == 0 && fp != NULL && vp->v_type == VREG) { tmpfs_ref_node(node); finit_vnode(fp, mode, node, &tmpfs_fnops); } return (error); } static int tmpfs_close(struct vop_close_args *v) { struct vnode *vp = v->a_vp; /* Update node times. */ tmpfs_update(vp); return (0); } int tmpfs_fo_close(struct file *fp, struct thread *td) { struct tmpfs_node *node; node = fp->f_data; if (node != NULL) { MPASS(node->tn_type == VREG); tmpfs_free_node(node->tn_reg.tn_tmp, node); } return (vnops.fo_close(fp, td)); } /* * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see * the comment above cache_fplookup for details. */ int tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v) { struct vnode *vp; struct tmpfs_node *node; struct ucred *cred; mode_t all_x, mode; vp = v->a_vp; node = VP_TO_TMPFS_NODE_SMR(vp); if (__predict_false(node == NULL)) return (EAGAIN); all_x = S_IXUSR | S_IXGRP | S_IXOTH; mode = atomic_load_short(&node->tn_mode); if (__predict_true((mode & all_x) == all_x)) return (0); cred = v->a_cred; return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred)); } static int tmpfs_access_locked(struct vnode *vp, struct tmpfs_node *node, accmode_t accmode, struct ucred *cred) { #ifdef DEBUG_VFS_LOCKS if (!mtx_owned(TMPFS_NODE_MTX(node))) { ASSERT_VOP_LOCKED(vp, "tmpfs_access_locked needs locked vnode or node"); } #endif if ((accmode & VWRITE) != 0 && (node->tn_flags & IMMUTABLE) != 0) return (EPERM); return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid, accmode, cred)); } int tmpfs_access(struct vop_access_args *v) { struct vnode *vp = v->a_vp; struct ucred *cred = v->a_cred; struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp); mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH; accmode_t accmode = v->a_accmode; /* * Common case path lookup. */ if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x)) return (0); switch (vp->v_type) { case VDIR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VREG: if ((accmode & VWRITE) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) return (EROFS); break; case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VSOCK: /* FALLTHROUGH */ case VFIFO: break; default: return (EINVAL); } return (tmpfs_access_locked(vp, node, accmode, cred)); } int tmpfs_stat(struct vop_stat_args *v) { struct vnode *vp = v->a_vp; struct stat *sb = v->a_sb; struct tmpfs_node *node; int error; node = VP_TO_TMPFS_NODE(vp); tmpfs_update_getattr(vp); error = vop_stat_helper_pre(v); if (__predict_false(error)) return (error); sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0]; sb->st_ino = node->tn_id; sb->st_mode = node->tn_mode | VTTOIF(vp->v_type); sb->st_nlink = node->tn_links; sb->st_uid = node->tn_uid; sb->st_gid = node->tn_gid; sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? node->tn_rdev : NODEV; sb->st_size = node->tn_size; sb->st_atim.tv_sec = node->tn_atime.tv_sec; sb->st_atim.tv_nsec = node->tn_atime.tv_nsec; sb->st_mtim.tv_sec = node->tn_mtime.tv_sec; sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec; sb->st_ctim.tv_sec = node->tn_ctime.tv_sec; sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec; sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec; sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec; sb->st_blksize = PAGE_SIZE; sb->st_flags = node->tn_flags; sb->st_gen = node->tn_gen; if (vp->v_type == VREG) { #ifdef __ILP32__ vm_object_t obj = node->tn_reg.tn_aobj; /* Handle torn read */ VM_OBJECT_RLOCK(obj); #endif sb->st_blocks = ptoa(node->tn_reg.tn_pages); #ifdef __ILP32__ VM_OBJECT_RUNLOCK(obj); #endif } else { sb->st_blocks = node->tn_size; } sb->st_blocks /= S_BLKSIZE; return (vop_stat_helper_post(v, error)); } int tmpfs_getattr(struct vop_getattr_args *v) { struct vnode *vp = v->a_vp; struct vattr *vap = v->a_vap; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); tmpfs_update_getattr(vp); vap->va_type = vp->v_type; vap->va_mode = node->tn_mode; vap->va_nlink = node->tn_links; vap->va_uid = node->tn_uid; vap->va_gid = node->tn_gid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = node->tn_id; vap->va_size = node->tn_size; vap->va_blocksize = PAGE_SIZE; vap->va_atime = node->tn_atime; vap->va_mtime = node->tn_mtime; vap->va_ctime = node->tn_ctime; vap->va_birthtime = node->tn_birthtime; vap->va_gen = node->tn_gen; vap->va_flags = node->tn_flags; vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? node->tn_rdev : NODEV; if (vp->v_type == VREG) { #ifdef __ILP32__ vm_object_t obj = node->tn_reg.tn_aobj; VM_OBJECT_RLOCK(obj); #endif vap->va_bytes = ptoa(node->tn_reg.tn_pages); #ifdef __ILP32__ VM_OBJECT_RUNLOCK(obj); #endif } else { vap->va_bytes = node->tn_size; } vap->va_filerev = 0; return (0); } int tmpfs_setattr(struct vop_setattr_args *v) { struct vnode *vp = v->a_vp; struct vattr *vap = v->a_vap; struct ucred *cred = v->a_cred; struct thread *td = curthread; int error; ASSERT_VOP_IN_SEQC(vp); error = 0; /* Abort if any unsettable attribute is given. */ if (vap->va_type != VNON || vap->va_nlink != VNOVAL || vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL || vap->va_blocksize != VNOVAL || vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL || vap->va_bytes != VNOVAL) error = EINVAL; if (error == 0 && (vap->va_flags != VNOVAL)) error = tmpfs_chflags(vp, vap->va_flags, cred, td); if (error == 0 && (vap->va_size != VNOVAL)) error = tmpfs_chsize(vp, vap->va_size, cred, td); if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL)) error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td); if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) error = tmpfs_chmod(vp, vap->va_mode, cred, td); if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL && vap->va_atime.tv_nsec != VNOVAL) || (vap->va_mtime.tv_sec != VNOVAL && vap->va_mtime.tv_nsec != VNOVAL) || (vap->va_birthtime.tv_sec != VNOVAL && vap->va_birthtime.tv_nsec != VNOVAL))) error = tmpfs_chtimes(vp, vap, cred, td); /* * Update the node times. We give preference to the error codes * generated by this function rather than the ones that may arise * from tmpfs_update. */ tmpfs_update(vp); return (error); } static int tmpfs_read(struct vop_read_args *v) { struct vnode *vp; struct uio *uio; struct tmpfs_node *node; vp = v->a_vp; if (vp->v_type != VREG) return (EISDIR); uio = v->a_uio; if (uio->uio_offset < 0) return (EINVAL); node = VP_TO_TMPFS_NODE(vp); tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node); return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio)); } static int tmpfs_read_pgcache(struct vop_read_pgcache_args *v) { struct vnode *vp; struct tmpfs_node *node; vm_object_t object; off_t size; int error; vp = v->a_vp; VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp); if (v->a_uio->uio_offset < 0) return (EINVAL); error = EJUSTRETURN; vfs_smr_enter(); node = VP_TO_TMPFS_NODE_SMR(vp); if (node == NULL) goto out_smr; MPASS(node->tn_type == VREG); MPASS(node->tn_refcount >= 1); object = node->tn_reg.tn_aobj; if (object == NULL) goto out_smr; MPASS(object->type == tmpfs_pager_type); MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_SWAP)) == OBJ_SWAP); if (!VN_IS_DOOMED(vp)) { /* size cannot become shorter due to rangelock. */ size = node->tn_size; tmpfs_set_accessed(node->tn_reg.tn_tmp, node); vfs_smr_exit(); error = uiomove_object(object, size, v->a_uio); return (error); } out_smr: vfs_smr_exit(); return (error); } static int tmpfs_write(struct vop_write_args *v) { struct vnode *vp; struct uio *uio; struct tmpfs_node *node; off_t oldsize; ssize_t r; int error, ioflag; mode_t newmode; vp = v->a_vp; uio = v->a_uio; ioflag = v->a_ioflag; error = 0; node = VP_TO_TMPFS_NODE(vp); oldsize = node->tn_size; if (uio->uio_offset < 0 || vp->v_type != VREG) return (EINVAL); if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio->uio_offset = node->tn_size; error = vn_rlimit_fsizex(vp, uio, VFS_TO_TMPFS(vp->v_mount)-> tm_maxfilesize, &r, uio->uio_td); if (error != 0) { vn_rlimit_fsizex_res(uio, r); return (error); } if (uio->uio_offset + uio->uio_resid > node->tn_size) { error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid, FALSE); if (error != 0) goto out; } error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio); node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED; node->tn_accessed = true; if (node->tn_mode & (S_ISUID | S_ISGID)) { if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) { newmode = node->tn_mode & ~(S_ISUID | S_ISGID); vn_seqc_write_begin(vp); atomic_store_short(&node->tn_mode, newmode); vn_seqc_write_end(vp); } } if (error != 0) (void)tmpfs_reg_resize(vp, oldsize, TRUE); out: MPASS(IMPLIES(error == 0, uio->uio_resid == 0)); MPASS(IMPLIES(error != 0, oldsize == node->tn_size)); vn_rlimit_fsizex_res(uio, r); return (error); } static int tmpfs_deallocate(struct vop_deallocate_args *v) { return (tmpfs_reg_punch_hole(v->a_vp, v->a_offset, v->a_len)); } static int tmpfs_fsync(struct vop_fsync_args *v) { struct vnode *vp = v->a_vp; tmpfs_check_mtime(vp); tmpfs_update(vp); return (0); } static int tmpfs_remove(struct vop_remove_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; if (vp->v_type == VDIR) { error = EISDIR; goto out; } dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); de = tmpfs_dir_lookup(dnode, node, v->a_cnp); MPASS(de != NULL); /* Files marked as immutable or append-only cannot be deleted. */ if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->tn_flags & APPEND)) { error = EPERM; goto out; } /* Remove the entry from the directory; as it is a file, we do not * have to change the number of hard links of the directory. */ tmpfs_dir_detach(dvp, de); if (v->a_cnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(dvp, v->a_cnp); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); node->tn_status |= TMPFS_NODE_CHANGED; node->tn_accessed = true; error = 0; out: return (error); } static int tmpfs_link(struct vop_link_args *v) { struct vnode *dvp = v->a_tdvp; struct vnode *vp = v->a_vp; struct componentname *cnp = v->a_cnp; int error; struct tmpfs_dirent *de; struct tmpfs_node *node; MPASS(dvp != vp); /* XXX When can this be false? */ node = VP_TO_TMPFS_NODE(vp); /* Ensure that we do not overflow the maximum number of links imposed * by the system. */ MPASS(node->tn_links <= TMPFS_LINK_MAX); if (node->tn_links == TMPFS_LINK_MAX) { error = EMLINK; goto out; } /* We cannot create links of files marked immutable or append-only. */ if (node->tn_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Allocate a new directory entry to represent the node. */ error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, cnp->cn_nameptr, cnp->cn_namelen, &de); if (error != 0) goto out; /* Insert the new directory entry into the appropriate directory. */ if (cnp->cn_flags & ISWHITEOUT) tmpfs_dir_whiteout_remove(dvp, cnp); tmpfs_dir_attach(dvp, de); /* vp link count has changed, so update node times. */ node->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(vp); error = 0; out: return (error); } /* * We acquire all but fdvp locks using non-blocking acquisitions. If we * fail to acquire any lock in the path we will drop all held locks, * acquire the new lock in a blocking fashion, and then release it and * restart the rename. This acquire/release step ensures that we do not * spin on a lock waiting for release. On error release all vnode locks * and decrement references the way tmpfs_rename() would do. */ static int tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp, struct vnode *tdvp, struct vnode **tvpp, struct componentname *fcnp, struct componentname *tcnp) { struct vnode *nvp; struct mount *mp; struct tmpfs_dirent *de; int error, restarts = 0; VOP_UNLOCK(tdvp); if (*tvpp != NULL && *tvpp != tdvp) VOP_UNLOCK(*tvpp); mp = fdvp->v_mount; relock: restarts += 1; error = vn_lock(fdvp, LK_EXCLUSIVE); if (error) goto releout; if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(fdvp); error = vn_lock(tdvp, LK_EXCLUSIVE); if (error) goto releout; VOP_UNLOCK(tdvp); goto relock; } /* * Re-resolve fvp to be certain it still exists and fetch the * correct vnode. */ de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp); if (de == NULL) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); if ((fcnp->cn_flags & ISDOTDOT) != 0 || (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) error = EINVAL; else error = ENOENT; goto releout; } error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (error != 0) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); if (error != EBUSY) goto releout; error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp); /* * Concurrent rename race. */ if (nvp == tdvp) { vrele(nvp); error = EINVAL; goto releout; } vrele(*fvpp); *fvpp = nvp; goto relock; } vrele(*fvpp); *fvpp = nvp; VOP_UNLOCK(*fvpp); /* * Re-resolve tvp and acquire the vnode lock if present. */ de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp); /* * If tvp disappeared we just carry on. */ if (de == NULL && *tvpp != NULL) { vrele(*tvpp); *tvpp = NULL; } /* * Get the tvp ino if the lookup succeeded. We may have to restart * if the non-blocking acquire fails. */ if (de != NULL) { nvp = NULL; error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (*tvpp != NULL) vrele(*tvpp); *tvpp = nvp; if (error != 0) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); if (error != EBUSY) goto releout; error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp); /* * fdvp contains fvp, thus tvp (=fdvp) is not empty. */ if (nvp == fdvp) { error = ENOTEMPTY; goto releout; } goto relock; } } tmpfs_rename_restarts += restarts; return (0); releout: vrele(fdvp); vrele(*fvpp); vrele(tdvp); if (*tvpp != NULL) vrele(*tvpp); tmpfs_rename_restarts += restarts; return (error); } static int tmpfs_rename(struct vop_rename_args *v) { struct vnode *fdvp = v->a_fdvp; struct vnode *fvp = v->a_fvp; struct componentname *fcnp = v->a_fcnp; struct vnode *tdvp = v->a_tdvp; struct vnode *tvp = v->a_tvp; struct componentname *tcnp = v->a_tcnp; char *newname; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *fdnode; struct tmpfs_node *fnode; struct tmpfs_node *tnode; struct tmpfs_node *tdnode; int error; bool want_seqc_end; want_seqc_end = false; /* * Disallow cross-device renames. * XXX Why isn't this done by the caller? */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto out; } /* If source and target are the same file, there is nothing to do. */ if (fvp == tvp) { error = 0; goto out; } /* * If we need to move the directory between entries, lock the * source so that we can safely operate on it. */ if (fdvp != tdvp && fdvp != tvp) { if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp, fcnp, tcnp); if (error != 0) return (error); ASSERT_VOP_ELOCKED(fdvp, "tmpfs_rename: fdvp not locked"); ASSERT_VOP_ELOCKED(tdvp, "tmpfs_rename: tdvp not locked"); if (tvp != NULL) ASSERT_VOP_ELOCKED(tvp, "tmpfs_rename: tvp not locked"); if (fvp == tvp) { error = 0; goto out_locked; } } } /* * Avoid manipulating '.' and '..' entries. */ if ((fcnp->cn_flags & ISDOTDOT) != 0 || (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) { error = EINVAL; goto out_locked; } if (tvp != NULL) vn_seqc_write_begin(tvp); vn_seqc_write_begin(tdvp); vn_seqc_write_begin(fvp); vn_seqc_write_begin(fdvp); want_seqc_end = true; tmp = VFS_TO_TMPFS(tdvp->v_mount); tdnode = VP_TO_TMPFS_DIR(tdvp); tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); fdnode = VP_TO_TMPFS_DIR(fdvp); fnode = VP_TO_TMPFS_NODE(fvp); de = tmpfs_dir_lookup(fdnode, fnode, fcnp); /* * Entry can disappear before we lock fdvp. */ if (de == NULL) { if ((fcnp->cn_flags & ISDOTDOT) != 0 || (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) error = EINVAL; else error = ENOENT; goto out_locked; } MPASS(de->td_node == fnode); /* * If re-naming a directory to another preexisting directory * ensure that the target directory is empty so that its * removal causes no side effects. * Kern_rename guarantees the destination to be a directory * if the source is one. */ if (tvp != NULL) { MPASS(tnode != NULL); if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { if (tnode->tn_size != 0 && ((tcnp->cn_flags & IGNOREWHITEOUT) == 0 || tnode->tn_size > tnode->tn_dir.tn_wht_size)) { error = ENOTEMPTY; goto out_locked; } } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { error = ENOTDIR; goto out_locked; } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { error = EISDIR; goto out_locked; } else { MPASS(fnode->tn_type != VDIR && tnode->tn_type != VDIR); } } if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } /* * Ensure that we have enough memory to hold the new name, if it * has to be changed. */ if (fcnp->cn_namelen != tcnp->cn_namelen || bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) { newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK); } else newname = NULL; /* * If the node is being moved to another directory, we have to do * the move. */ if (fdnode != tdnode) { /* * In case we are moving a directory, we have to adjust its * parent to point to the new parent. */ if (de->td_node->tn_type == VDIR) { struct tmpfs_node *n; TMPFS_NODE_LOCK(fnode); error = tmpfs_access_locked(fvp, fnode, VWRITE, tcnp->cn_cred); TMPFS_NODE_UNLOCK(fnode); if (error) { if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } /* * Ensure the target directory is not a child of the * directory being moved. Otherwise, we'd end up * with stale nodes. */ n = tdnode; /* * TMPFS_LOCK guaranties that no nodes are freed while * traversing the list. Nodes can only be marked as * removed: tn_parent == NULL. */ TMPFS_LOCK(tmp); TMPFS_NODE_LOCK(n); while (n != n->tn_dir.tn_parent) { struct tmpfs_node *parent; if (n == fnode) { TMPFS_NODE_UNLOCK(n); TMPFS_UNLOCK(tmp); error = EINVAL; if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } parent = n->tn_dir.tn_parent; TMPFS_NODE_UNLOCK(n); if (parent == NULL) { n = NULL; break; } TMPFS_NODE_LOCK(parent); if (parent->tn_dir.tn_parent == NULL) { TMPFS_NODE_UNLOCK(parent); n = NULL; break; } n = parent; } TMPFS_UNLOCK(tmp); if (n == NULL) { error = EINVAL; if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } TMPFS_NODE_UNLOCK(n); /* Adjust the parent pointer. */ TMPFS_VALIDATE_DIR(fnode); TMPFS_NODE_LOCK(de->td_node); de->td_node->tn_dir.tn_parent = tdnode; TMPFS_NODE_UNLOCK(de->td_node); /* * As a result of changing the target of the '..' * entry, the link count of the source and target * directories has to be adjusted. */ TMPFS_NODE_LOCK(tdnode); TMPFS_ASSERT_LOCKED(tdnode); tdnode->tn_links++; TMPFS_NODE_UNLOCK(tdnode); TMPFS_NODE_LOCK(fdnode); TMPFS_ASSERT_LOCKED(fdnode); fdnode->tn_links--; TMPFS_NODE_UNLOCK(fdnode); } } /* * Do the move: just remove the entry from the source directory * and insert it into the target one. */ tmpfs_dir_detach(fdvp, de); if (fcnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(fdvp, fcnp); if (tcnp->cn_flags & ISWHITEOUT) tmpfs_dir_whiteout_remove(tdvp, tcnp); /* * If the name has changed, we need to make it effective by changing * it in the directory entry. */ if (newname != NULL) { MPASS(tcnp->cn_namelen <= MAXNAMLEN); free(de->ud.td_name, M_TMPFSNAME); de->ud.td_name = newname; tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen); fnode->tn_status |= TMPFS_NODE_CHANGED; tdnode->tn_status |= TMPFS_NODE_MODIFIED; } /* * If we are overwriting an entry, we have to remove the old one * from the target directory. */ if (tvp != NULL) { struct tmpfs_dirent *tde; /* Remove the old entry from the target directory. */ tde = tmpfs_dir_lookup(tdnode, tnode, tcnp); tmpfs_dir_detach(tdvp, tde); /* * If we are overwriting a directory, per the ENOTEMPTY check * above it must either be empty or contain only whiteout * entries. In the latter case (which can only happen if * IGNOREWHITEOUT was passed in tcnp->cn_flags), clear the * whiteout entries to avoid leaking memory. */ if (tnode->tn_type == VDIR && tnode->tn_size > 0) tmpfs_dir_clear_whiteouts(tvp); /* Update node's ctime because of possible hardlinks. */ tnode->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(tvp); /* * Free the directory entry we just deleted. Note that the * node referred by it will not be removed until the vnode is * really reclaimed. */ tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde); } tmpfs_dir_attach(tdvp, de); if (tmpfs_use_nc(fvp)) { cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp); } error = 0; out_locked: if (fdvp != tdvp && fdvp != tvp) VOP_UNLOCK(fdvp); out: if (want_seqc_end) { if (tvp != NULL) vn_seqc_write_end(tvp); vn_seqc_write_end(tdvp); vn_seqc_write_end(fvp); vn_seqc_write_end(fdvp); } /* * Release target nodes. * XXX: I don't understand when tdvp can be the same as tvp, but * other code takes care of this... */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp != NULL) vput(tvp); /* Release source nodes. */ vrele(fdvp); vrele(fvp); return (error); } static int tmpfs_mkdir(struct vop_mkdir_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; MPASS(vap->va_type == VDIR); return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL)); } static int tmpfs_rmdir(struct vop_rmdir_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; struct componentname *cnp = v->a_cnp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_DIR(vp); /* * Directories with more than two non-whiteout entries ('.' and '..') * cannot be removed. */ if (node->tn_size != 0 && ((cnp->cn_flags & IGNOREWHITEOUT) == 0 || node->tn_size > node->tn_dir.tn_wht_size)) { error = ENOTEMPTY; goto out; } /* Check flags to see if we are allowed to remove the directory. */ if ((dnode->tn_flags & APPEND) || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* This invariant holds only if we are not trying to remove "..". * We checked for that above so this is safe now. */ MPASS(node->tn_dir.tn_parent == dnode); /* Get the directory entry associated with node (vp). This was * filled by tmpfs_lookup while looking up the entry. */ de = tmpfs_dir_lookup(dnode, node, cnp); MPASS(TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, cnp->cn_namelen)); /* Detach the directory entry from the directory (dnode). */ tmpfs_dir_detach(dvp, de); /* * If we are removing a directory, per the ENOTEMPTY check above it * must either be empty or contain only whiteout entries. In the * latter case (which can only happen if IGNOREWHITEOUT was passed * in cnp->cn_flags), clear the whiteout entries to avoid leaking * memory. */ if (node->tn_size > 0) tmpfs_dir_clear_whiteouts(vp); if (cnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(dvp, cnp); /* No vnode should be allocated for this entry from this point */ TMPFS_NODE_LOCK(node); node->tn_links--; node->tn_dir.tn_parent = NULL; node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; node->tn_accessed = true; TMPFS_NODE_UNLOCK(node); TMPFS_NODE_LOCK(dnode); dnode->tn_links--; dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; dnode->tn_accessed = true; TMPFS_NODE_UNLOCK(dnode); if (tmpfs_use_nc(dvp)) { cache_vop_rmdir(dvp, vp); } /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); /* Release the deleted vnode (will destroy the node, notify * interested parties and clean it from the cache). */ dnode->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(dvp); error = 0; out: return (error); } static int tmpfs_symlink(struct vop_symlink_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; const char *target = v->a_target; #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */ MPASS(vap->va_type == VLNK); #else vap->va_type = VLNK; #endif return (tmpfs_alloc_file(dvp, vpp, vap, cnp, target)); } static int tmpfs_readdir(struct vop_readdir_args *va) { struct vnode *vp; struct uio *uio; struct tmpfs_mount *tm; struct tmpfs_node *node; uint64_t **cookies; int *eofflag, *ncookies; ssize_t startresid; int error, maxcookies; vp = va->a_vp; uio = va->a_uio; eofflag = va->a_eofflag; cookies = va->a_cookies; ncookies = va->a_ncookies; /* This operation only makes sense on directory nodes. */ if (vp->v_type != VDIR) return (ENOTDIR); maxcookies = 0; node = VP_TO_TMPFS_DIR(vp); tm = VFS_TO_TMPFS(vp->v_mount); startresid = uio->uio_resid; /* Allocate cookies for NFS and compat modules. */ if (cookies != NULL && ncookies != NULL) { maxcookies = howmany(node->tn_size, sizeof(struct tmpfs_dirent)) + 2; *cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP, M_WAITOK); *ncookies = 0; } if (cookies == NULL) error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL); else error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies, ncookies); /* Buffer was filled without hitting EOF. */ if (error == EJUSTRETURN) error = (uio->uio_resid != startresid) ? 0 : EINVAL; if (error != 0 && cookies != NULL && ncookies != NULL) { free(*cookies, M_TEMP); *cookies = NULL; *ncookies = 0; } if (eofflag != NULL) *eofflag = (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); return (error); } static int tmpfs_readlink(struct vop_readlink_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int error; struct tmpfs_node *node; MPASS(uio->uio_offset == 0); MPASS(vp->v_type == VLNK); node = VP_TO_TMPFS_NODE(vp); error = uiomove(node->tn_link_target, MIN(node->tn_size, uio->uio_resid), uio); tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node); return (error); } /* * VOP_FPLOOKUP_SYMLINK routines are subject to special circumstances, see * the comment above cache_fplookup for details. * * Check tmpfs_alloc_node for tmpfs-specific synchronisation notes. */ static int tmpfs_fplookup_symlink(struct vop_fplookup_symlink_args *v) { struct vnode *vp; struct tmpfs_node *node; char *symlink; vp = v->a_vp; node = VP_TO_TMPFS_NODE_SMR(vp); if (__predict_false(node == NULL)) return (EAGAIN); if (!atomic_load_char(&node->tn_link_smr)) return (EAGAIN); symlink = atomic_load_ptr(&node->tn_link_target); if (symlink == NULL) return (EAGAIN); return (cache_symlink_resolve(v->a_fpl, symlink, node->tn_size)); } static int tmpfs_inactive(struct vop_inactive_args *v) { struct vnode *vp; struct tmpfs_node *node; vp = v->a_vp; node = VP_TO_TMPFS_NODE(vp); if (node->tn_links == 0) vrecycle(vp); else tmpfs_check_mtime(vp); return (0); } static int tmpfs_need_inactive(struct vop_need_inactive_args *ap) { struct vnode *vp; struct tmpfs_node *node; struct vm_object *obj; vp = ap->a_vp; node = VP_TO_TMPFS_NODE(vp); if (node->tn_links == 0) goto need; if (vp->v_type == VREG) { obj = vp->v_object; if (obj->generation != obj->cleangeneration) goto need; } return (0); need: return (1); } int tmpfs_reclaim(struct vop_reclaim_args *v) { struct vnode *vp; struct tmpfs_mount *tmp; struct tmpfs_node *node; bool unlock; vp = v->a_vp; node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); if (vp->v_type == VREG) tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj); vp->v_object = NULL; TMPFS_LOCK(tmp); TMPFS_NODE_LOCK(node); tmpfs_free_vp(vp); /* * If the node referenced by this vnode was deleted by the user, * we must free its associated data structures (now that the vnode * is being reclaimed). */ unlock = true; if (node->tn_links == 0 && (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) { node->tn_vpstate = TMPFS_VNODE_DOOMED; unlock = !tmpfs_free_node_locked(tmp, node, true); } if (unlock) { TMPFS_NODE_UNLOCK(node); TMPFS_UNLOCK(tmp); } MPASS(vp->v_data == NULL); return (0); } int tmpfs_print(struct vop_print_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n", node, node->tn_flags, (uintmax_t)node->tn_links); printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n", node->tn_mode, node->tn_uid, node->tn_gid, (intmax_t)node->tn_size, node->tn_status); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } int tmpfs_pathconf(struct vop_pathconf_args *v) { struct vnode *vp = v->a_vp; int name = v->a_name; long *retval = v->a_retval; int error; error = 0; switch (name) { case _PC_LINK_MAX: *retval = TMPFS_LINK_MAX; break; case _PC_SYMLINK_MAX: *retval = MAXPATHLEN; break; case _PC_NAME_MAX: *retval = NAME_MAX; break; case _PC_PIPE_BUF: if (vp->v_type == VDIR || vp->v_type == VFIFO) *retval = PIPE_BUF; else error = EINVAL; break; case _PC_CHOWN_RESTRICTED: *retval = 1; break; case _PC_NO_TRUNC: *retval = 1; break; case _PC_SYNC_IO: *retval = 1; break; case _PC_FILESIZEBITS: *retval = 64; break; case _PC_MIN_HOLE_SIZE: *retval = PAGE_SIZE; break; default: error = vop_stdpathconf(v); } return (error); } static int tmpfs_vptofh(struct vop_vptofh_args *ap) /* vop_vptofh { IN struct vnode *a_vp; IN struct fid *a_fhp; }; */ { struct tmpfs_fid_data tfd; struct tmpfs_node *node; struct fid *fhp; + _Static_assert(sizeof(struct tmpfs_fid_data) <= sizeof(struct fid), + "struct tmpfs_fid_data cannot be larger than struct fid"); node = VP_TO_TMPFS_NODE(ap->a_vp); fhp = ap->a_fhp; fhp->fid_len = sizeof(tfd); /* * Copy into fid_data from the stack to avoid unaligned pointer use. * See the comment in sys/mount.h on struct fid for details. */ tfd.tfd_id = node->tn_id; tfd.tfd_gen = node->tn_gen; memcpy(fhp->fid_data, &tfd, fhp->fid_len); return (0); } static int tmpfs_whiteout(struct vop_whiteout_args *ap) { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct tmpfs_dirent *de; switch (ap->a_flags) { case LOOKUP: return (0); case CREATE: de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); if (de != NULL) return (de->td_node == NULL ? 0 : EEXIST); return (tmpfs_dir_whiteout_add(dvp, cnp)); case DELETE: tmpfs_dir_whiteout_remove(dvp, cnp); return (0); default: panic("tmpfs_whiteout: unknown op"); } } static int tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp, struct tmpfs_dirent **pde) { struct tmpfs_dir_cursor dc; struct tmpfs_dirent *de; for (de = tmpfs_dir_first(tnp, &dc); de != NULL; de = tmpfs_dir_next(tnp, &dc)) { if (de->td_node == tn) { *pde = de; return (0); } } return (ENOENT); } static int tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn, struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp) { struct tmpfs_dirent *de; int error, i; error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED, dvp); if (error != 0) return (error); error = tmpfs_vptocnp_dir(tn, tnp, &de); if (error == 0) { i = *buflen; i -= de->td_namelen; if (i < 0) { error = ENOMEM; } else { bcopy(de->ud.td_name, buf + i, de->td_namelen); *buflen = i; } } if (error == 0) { if (vp != *dvp) VOP_UNLOCK(*dvp); } else { if (vp != *dvp) vput(*dvp); else vrele(vp); } return (error); } static int tmpfs_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp, **dvp; struct tmpfs_node *tn, *tnp, *tnp1; struct tmpfs_dirent *de; struct tmpfs_mount *tm; char *buf; size_t *buflen; int error; vp = ap->a_vp; dvp = ap->a_vpp; buf = ap->a_buf; buflen = ap->a_buflen; tm = VFS_TO_TMPFS(vp->v_mount); tn = VP_TO_TMPFS_NODE(vp); if (tn->tn_type == VDIR) { tnp = tn->tn_dir.tn_parent; if (tnp == NULL) return (ENOENT); tmpfs_ref_node(tnp); error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf, buflen, dvp); tmpfs_free_node(tm, tnp); return (error); } restart: TMPFS_LOCK(tm); restart_locked: LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) { if (tnp->tn_type != VDIR) continue; TMPFS_NODE_LOCK(tnp); tmpfs_ref_node(tnp); /* * tn_vnode cannot be instantiated while we hold the * node lock, so the directory cannot be changed while * we iterate over it. Do this to avoid instantiating * vnode for directories which cannot point to our * node. */ error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp, &de) : 0; if (error == 0) { TMPFS_NODE_UNLOCK(tnp); TMPFS_UNLOCK(tm); error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen, dvp); if (error == 0) { tmpfs_free_node(tm, tnp); return (0); } if (VN_IS_DOOMED(vp)) { tmpfs_free_node(tm, tnp); return (ENOENT); } TMPFS_LOCK(tm); TMPFS_NODE_LOCK(tnp); } if (tmpfs_free_node_locked(tm, tnp, false)) { goto restart; } else { KASSERT(tnp->tn_refcount > 0, ("node %p refcount zero", tnp)); if (tnp->tn_attached) { tnp1 = LIST_NEXT(tnp, tn_entries); TMPFS_NODE_UNLOCK(tnp); } else { TMPFS_NODE_UNLOCK(tnp); goto restart_locked; } } } TMPFS_UNLOCK(tm); return (ENOENT); } void tmpfs_extattr_free(struct tmpfs_extattr *ea) { free(ea->ea_name, M_TMPFSEA); free(ea->ea_value, M_TMPFSEA); free(ea, M_TMPFSEA); } static bool tmpfs_extattr_update_mem(struct tmpfs_mount *tmp, ssize_t size) { TMPFS_LOCK(tmp); if (size > 0 && !tmpfs_pages_check_avail(tmp, howmany(size, PAGE_SIZE))) { TMPFS_UNLOCK(tmp); return (false); } if (tmp->tm_ea_memory_inuse + size > tmp->tm_ea_memory_max) { TMPFS_UNLOCK(tmp); return (false); } tmp->tm_ea_memory_inuse += size; TMPFS_UNLOCK(tmp); return (true); } static int tmpfs_deleteextattr(struct vop_deleteextattr_args *ap) { struct vnode *vp = ap->a_vp; struct tmpfs_mount *tmp; struct tmpfs_node *node; struct tmpfs_extattr *ea; size_t namelen; ssize_t diff; int error; node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); if (error != 0) return (error); if (ap->a_name == NULL || ap->a_name[0] == '\0') return (EINVAL); namelen = strlen(ap->a_name); if (namelen > EXTATTR_MAXNAMELEN) return (EINVAL); LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) { if (ea->ea_namespace == ap->a_attrnamespace && namelen == ea->ea_namelen && memcmp(ap->a_name, ea->ea_name, namelen) == 0) break; } if (ea == NULL) return (ENOATTR); LIST_REMOVE(ea, ea_extattrs); diff = -(sizeof(struct tmpfs_extattr) + namelen + ea->ea_size); tmpfs_extattr_update_mem(tmp, diff); tmpfs_extattr_free(ea); return (0); } static int tmpfs_getextattr(struct vop_getextattr_args *ap) { struct vnode *vp = ap->a_vp; struct tmpfs_node *node; struct tmpfs_extattr *ea; size_t namelen; int error; node = VP_TO_TMPFS_NODE(vp); if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); if (error != 0) return (error); if (ap->a_name == NULL || ap->a_name[0] == '\0') return (EINVAL); namelen = strlen(ap->a_name); if (namelen > EXTATTR_MAXNAMELEN) return (EINVAL); LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) { if (ea->ea_namespace == ap->a_attrnamespace && namelen == ea->ea_namelen && memcmp(ap->a_name, ea->ea_name, namelen) == 0) break; } if (ea == NULL) return (ENOATTR); if (ap->a_size != NULL) *ap->a_size = ea->ea_size; if (ap->a_uio != NULL && ea->ea_size != 0) error = uiomove(ea->ea_value, ea->ea_size, ap->a_uio); return (error); } static int tmpfs_listextattr(struct vop_listextattr_args *ap) { struct vnode *vp = ap->a_vp; struct tmpfs_node *node; struct tmpfs_extattr *ea; int error; node = VP_TO_TMPFS_NODE(vp); if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); if (error != 0) return (error); if (ap->a_size != NULL) *ap->a_size = 0; LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) { if (ea->ea_namespace != ap->a_attrnamespace) continue; if (ap->a_size != NULL) *ap->a_size += ea->ea_namelen + 1; if (ap->a_uio != NULL) { error = uiomove(&ea->ea_namelen, 1, ap->a_uio); if (error != 0) break; error = uiomove(ea->ea_name, ea->ea_namelen, ap->a_uio); if (error != 0) break; } } return (error); } static int tmpfs_setextattr(struct vop_setextattr_args *ap) { struct vnode *vp = ap->a_vp; struct tmpfs_mount *tmp; struct tmpfs_node *node; struct tmpfs_extattr *ea; struct tmpfs_extattr *new_ea; size_t attr_size; size_t namelen; ssize_t diff; int error; node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); attr_size = ap->a_uio->uio_resid; diff = 0; if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); if (error != 0) return (error); if (ap->a_name == NULL || ap->a_name[0] == '\0') return (EINVAL); namelen = strlen(ap->a_name); if (namelen > EXTATTR_MAXNAMELEN) return (EINVAL); LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) { if (ea->ea_namespace == ap->a_attrnamespace && namelen == ea->ea_namelen && memcmp(ap->a_name, ea->ea_name, namelen) == 0) { diff -= sizeof(struct tmpfs_extattr) + ea->ea_namelen + ea->ea_size; break; } } diff += sizeof(struct tmpfs_extattr) + namelen + attr_size; if (!tmpfs_extattr_update_mem(tmp, diff)) return (ENOSPC); new_ea = malloc(sizeof(struct tmpfs_extattr), M_TMPFSEA, M_WAITOK); new_ea->ea_namespace = ap->a_attrnamespace; new_ea->ea_name = malloc(namelen, M_TMPFSEA, M_WAITOK); new_ea->ea_namelen = namelen; memcpy(new_ea->ea_name, ap->a_name, namelen); if (attr_size != 0) { new_ea->ea_value = malloc(attr_size, M_TMPFSEA, M_WAITOK); new_ea->ea_size = attr_size; error = uiomove(new_ea->ea_value, attr_size, ap->a_uio); } else { new_ea->ea_value = NULL; new_ea->ea_size = 0; } if (error != 0) { tmpfs_extattr_update_mem(tmp, -diff); tmpfs_extattr_free(new_ea); return (error); } if (ea != NULL) { LIST_REMOVE(ea, ea_extattrs); tmpfs_extattr_free(ea); } LIST_INSERT_HEAD(&node->tn_extattrs, new_ea, ea_extattrs); return (0); } static off_t tmpfs_seek_data_locked(vm_object_t obj, off_t noff) { vm_pindex_t p; p = swap_pager_seek_data(obj, OFF_TO_IDX(noff)); return (p == OFF_TO_IDX(noff) ? noff : IDX_TO_OFF(p)); } static int tmpfs_seek_clamp(struct tmpfs_node *tn, off_t *noff, bool seekdata) { if (*noff < tn->tn_size) return (0); if (seekdata) return (ENXIO); *noff = tn->tn_size; return (0); } static off_t tmpfs_seek_hole_locked(vm_object_t obj, off_t noff) { return (IDX_TO_OFF(swap_pager_seek_hole(obj, OFF_TO_IDX(noff)))); } static int tmpfs_seek_datahole(struct vnode *vp, off_t *off, bool seekdata) { struct tmpfs_node *tn; vm_object_t obj; off_t noff; int error; if (vp->v_type != VREG) return (ENOTTY); tn = VP_TO_TMPFS_NODE(vp); noff = *off; if (noff < 0) return (ENXIO); error = tmpfs_seek_clamp(tn, &noff, seekdata); if (error != 0) return (error); obj = tn->tn_reg.tn_aobj; VM_OBJECT_RLOCK(obj); noff = seekdata ? tmpfs_seek_data_locked(obj, noff) : tmpfs_seek_hole_locked(obj, noff); VM_OBJECT_RUNLOCK(obj); error = tmpfs_seek_clamp(tn, &noff, seekdata); if (error == 0) *off = noff; return (error); } static int tmpfs_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp = ap->a_vp; int error = 0; switch (ap->a_command) { case FIOSEEKDATA: case FIOSEEKHOLE: error = vn_lock(vp, LK_SHARED); if (error != 0) { error = EBADF; break; } error = tmpfs_seek_datahole(vp, (off_t *)ap->a_data, ap->a_command == FIOSEEKDATA); VOP_UNLOCK(vp); break; default: error = ENOTTY; break; } return (error); } /* * Vnode operations vector used for files stored in a tmpfs file system. */ struct vop_vector tmpfs_vnodeop_entries = { .vop_default = &default_vnodeops, .vop_lookup = vfs_cache_lookup, .vop_cachedlookup = tmpfs_cached_lookup, .vop_create = tmpfs_create, .vop_mknod = tmpfs_mknod, .vop_open = tmpfs_open, .vop_close = tmpfs_close, .vop_fplookup_vexec = tmpfs_fplookup_vexec, .vop_fplookup_symlink = tmpfs_fplookup_symlink, .vop_access = tmpfs_access, .vop_stat = tmpfs_stat, .vop_getattr = tmpfs_getattr, .vop_setattr = tmpfs_setattr, .vop_read = tmpfs_read, .vop_read_pgcache = tmpfs_read_pgcache, .vop_write = tmpfs_write, .vop_deallocate = tmpfs_deallocate, .vop_fsync = tmpfs_fsync, .vop_remove = tmpfs_remove, .vop_link = tmpfs_link, .vop_rename = tmpfs_rename, .vop_mkdir = tmpfs_mkdir, .vop_rmdir = tmpfs_rmdir, .vop_symlink = tmpfs_symlink, .vop_readdir = tmpfs_readdir, .vop_readlink = tmpfs_readlink, .vop_inactive = tmpfs_inactive, .vop_need_inactive = tmpfs_need_inactive, .vop_reclaim = tmpfs_reclaim, .vop_print = tmpfs_print, .vop_pathconf = tmpfs_pathconf, .vop_vptofh = tmpfs_vptofh, .vop_whiteout = tmpfs_whiteout, .vop_bmap = VOP_EOPNOTSUPP, .vop_vptocnp = tmpfs_vptocnp, .vop_lock1 = vop_lock, .vop_unlock = vop_unlock, .vop_islocked = vop_islocked, .vop_deleteextattr = tmpfs_deleteextattr, .vop_getextattr = tmpfs_getextattr, .vop_listextattr = tmpfs_listextattr, .vop_setextattr = tmpfs_setextattr, .vop_add_writecount = vop_stdadd_writecount_nomsync, .vop_ioctl = tmpfs_ioctl, }; VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries); /* * Same vector for mounts which do not use namecache. */ struct vop_vector tmpfs_vnodeop_nonc_entries = { .vop_default = &tmpfs_vnodeop_entries, .vop_lookup = tmpfs_lookup, }; VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries); diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c index 98a779280690..88bf4917a851 100644 --- a/sys/fs/udf/udf_vnops.c +++ b/sys/fs/udf/udf_vnops.c @@ -1,1482 +1,1484 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001, 2002 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* udf_vnops.c */ /* Take care of the vnode side of things */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct iconv_functions *udf_iconv; static vop_access_t udf_access; static vop_getattr_t udf_getattr; static vop_open_t udf_open; static vop_ioctl_t udf_ioctl; static vop_pathconf_t udf_pathconf; static vop_print_t udf_print; static vop_read_t udf_read; static vop_readdir_t udf_readdir; static vop_readlink_t udf_readlink; static vop_setattr_t udf_setattr; static vop_strategy_t udf_strategy; static vop_bmap_t udf_bmap; static vop_cachedlookup_t udf_lookup; static vop_reclaim_t udf_reclaim; static vop_vptofh_t udf_vptofh; static int udf_readatoffset(struct udf_node *node, int *size, off_t offset, struct buf **bp, uint8_t **data); static int udf_bmap_internal(struct udf_node *node, off_t offset, daddr_t *sector, uint32_t *max_size); static struct vop_vector udf_vnodeops = { .vop_default = &default_vnodeops, .vop_access = udf_access, .vop_bmap = udf_bmap, .vop_cachedlookup = udf_lookup, .vop_getattr = udf_getattr, .vop_ioctl = udf_ioctl, .vop_lookup = vfs_cache_lookup, .vop_open = udf_open, .vop_pathconf = udf_pathconf, .vop_print = udf_print, .vop_read = udf_read, .vop_readdir = udf_readdir, .vop_readlink = udf_readlink, .vop_reclaim = udf_reclaim, .vop_setattr = udf_setattr, .vop_strategy = udf_strategy, .vop_vptofh = udf_vptofh, }; VFS_VOP_VECTOR_REGISTER(udf_vnodeops); struct vop_vector udf_fifoops = { .vop_default = &fifo_specops, .vop_access = udf_access, .vop_getattr = udf_getattr, .vop_pathconf = udf_pathconf, .vop_print = udf_print, .vop_reclaim = udf_reclaim, .vop_setattr = udf_setattr, .vop_vptofh = udf_vptofh, }; VFS_VOP_VECTOR_REGISTER(udf_fifoops); static MALLOC_DEFINE(M_UDFFID, "udf_fid", "UDF FileId structure"); static MALLOC_DEFINE(M_UDFDS, "udf_ds", "UDF Dirstream structure"); #define UDF_INVALID_BMAP -1 int udf_allocv(struct mount *mp, struct vnode **vpp, struct thread *td) { int error; struct vnode *vp; error = getnewvnode("udf", mp, &udf_vnodeops, &vp); if (error) { printf("udf_allocv: failed to allocate new vnode\n"); return (error); } *vpp = vp; return (0); } /* Convert file entry permission (5 bits per owner/group/user) to a mode_t */ static mode_t udf_permtomode(struct udf_node *node) { uint32_t perm; uint16_t flags; mode_t mode; perm = le32toh(node->fentry->perm); flags = le16toh(node->fentry->icbtag.flags); mode = perm & UDF_FENTRY_PERM_USER_MASK; mode |= ((perm & UDF_FENTRY_PERM_GRP_MASK) >> 2); mode |= ((perm & UDF_FENTRY_PERM_OWNER_MASK) >> 4); mode |= ((flags & UDF_ICB_TAG_FLAGS_STICKY) << 4); mode |= ((flags & UDF_ICB_TAG_FLAGS_SETGID) << 6); mode |= ((flags & UDF_ICB_TAG_FLAGS_SETUID) << 8); return (mode); } static int udf_access(struct vop_access_args *a) { struct vnode *vp; struct udf_node *node; accmode_t accmode; mode_t mode; vp = a->a_vp; node = VTON(vp); accmode = a->a_accmode; if (accmode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: return (EROFS); /* NOT REACHED */ default: break; } } mode = udf_permtomode(node); return (vaccess(vp->v_type, mode, node->fentry->uid, node->fentry->gid, accmode, a->a_cred)); } static int udf_open(struct vop_open_args *ap) { struct udf_node *np = VTON(ap->a_vp); off_t fsize; fsize = le64toh(np->fentry->inf_len); vnode_create_vobject(ap->a_vp, fsize, ap->a_td); return 0; } static const int mon_lens[2][12] = { {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}, {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335} }; static int udf_isaleapyear(int year) { int i; i = (year % 4) ? 0 : 1; i &= (year % 100) ? 1 : 0; i |= (year % 400) ? 0 : 1; return i; } /* * Timezone calculation compliments of Julian Elischer . */ static void udf_timetotimespec(struct timestamp *time, struct timespec *t) { int i, lpyear, daysinyear, year, startyear; union { uint16_t u_tz_offset; int16_t s_tz_offset; } tz; /* * DirectCD seems to like using bogus year values. * Don't trust time->month as it will be used for an array index. */ year = le16toh(time->year); if (year < 1970 || time->month < 1 || time->month > 12) { t->tv_sec = 0; t->tv_nsec = 0; return; } /* Calculate the time and day */ t->tv_sec = time->second; t->tv_sec += time->minute * 60; t->tv_sec += time->hour * 3600; t->tv_sec += (time->day - 1) * 3600 * 24; /* Calculate the month */ lpyear = udf_isaleapyear(year); t->tv_sec += mon_lens[lpyear][time->month - 1] * 3600 * 24; /* Speed up the calculation */ startyear = 1970; if (year > 2009) { t->tv_sec += 1262304000; startyear += 40; } else if (year > 1999) { t->tv_sec += 946684800; startyear += 30; } else if (year > 1989) { t->tv_sec += 631152000; startyear += 20; } else if (year > 1979) { t->tv_sec += 315532800; startyear += 10; } daysinyear = (year - startyear) * 365; for (i = startyear; i < year; i++) daysinyear += udf_isaleapyear(i); t->tv_sec += daysinyear * 3600 * 24; /* Calculate microseconds */ t->tv_nsec = time->centisec * 10000 + time->hund_usec * 100 + time->usec; /* * Calculate the time zone. The timezone is 12 bit signed 2's * complement, so we gotta do some extra magic to handle it right. */ tz.u_tz_offset = le16toh(time->type_tz); tz.u_tz_offset &= 0x0fff; if (tz.u_tz_offset & 0x0800) tz.u_tz_offset |= 0xf000; /* extend the sign to 16 bits */ if ((le16toh(time->type_tz) & 0x1000) && (tz.s_tz_offset != -2047)) t->tv_sec -= tz.s_tz_offset * 60; return; } static int udf_getattr(struct vop_getattr_args *a) { struct vnode *vp; struct udf_node *node; struct vattr *vap; struct file_entry *fentry; vp = a->a_vp; vap = a->a_vap; node = VTON(vp); fentry = node->fentry; vap->va_fsid = dev2udev(node->udfmp->im_dev); vap->va_fileid = node->hash_id; vap->va_mode = udf_permtomode(node); vap->va_nlink = le16toh(fentry->link_cnt); /* * XXX The spec says that -1 is valid for uid/gid and indicates an * invalid uid/gid. How should this be represented? */ vap->va_uid = (le32toh(fentry->uid) == -1) ? 0 : le32toh(fentry->uid); vap->va_gid = (le32toh(fentry->gid) == -1) ? 0 : le32toh(fentry->gid); udf_timetotimespec(&fentry->atime, &vap->va_atime); udf_timetotimespec(&fentry->mtime, &vap->va_mtime); vap->va_ctime = vap->va_mtime; /* XXX Stored as an Extended Attribute */ vap->va_rdev = NODEV; if (vp->v_type & VDIR) { /* * Directories that are recorded within their ICB will show * as having 0 blocks recorded. Since tradition dictates * that directories consume at least one logical block, * make it appear so. */ if (fentry->logblks_rec != 0) { vap->va_size = le64toh(fentry->logblks_rec) * node->udfmp->bsize; } else { vap->va_size = node->udfmp->bsize; } } else { vap->va_size = le64toh(fentry->inf_len); } vap->va_flags = 0; vap->va_gen = 1; vap->va_blocksize = node->udfmp->bsize; vap->va_bytes = le64toh(fentry->inf_len); vap->va_type = vp->v_type; vap->va_filerev = 0; /* XXX */ return (0); } static int udf_setattr(struct vop_setattr_args *a) { struct vnode *vp; struct vattr *vap; vp = a->a_vp; vap = a->a_vap; if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) return (EROFS); if (vap->va_size != (u_quad_t)VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: return (EROFS); case VCHR: case VBLK: case VSOCK: case VFIFO: case VNON: case VBAD: case VMARKER: return (0); } } return (0); } /* * File specific ioctls. */ static int udf_ioctl(struct vop_ioctl_args *a) { printf("%s called\n", __func__); return (ENOTTY); } /* * I'm not sure that this has much value in a read-only filesystem, but * cd9660 has it too. */ static int udf_pathconf(struct vop_pathconf_args *a) { switch (a->a_name) { case _PC_FILESIZEBITS: *a->a_retval = 64; return (0); case _PC_LINK_MAX: *a->a_retval = 65535; return (0); case _PC_NAME_MAX: *a->a_retval = NAME_MAX; return (0); case _PC_SYMLINK_MAX: *a->a_retval = MAXPATHLEN; return (0); case _PC_NO_TRUNC: *a->a_retval = 1; return (0); case _PC_PIPE_BUF: if (a->a_vp->v_type == VDIR || a->a_vp->v_type == VFIFO) { *a->a_retval = PIPE_BUF; return (0); } return (EINVAL); default: return (vop_stdpathconf(a)); } } static int udf_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct udf_node *node = VTON(vp); printf(" ino %lu, on dev %s", (u_long)node->hash_id, devtoname(node->udfmp->im_dev)); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } #define lblkno(udfmp, loc) ((loc) >> (udfmp)->bshift) #define blkoff(udfmp, loc) ((loc) & (udfmp)->bmask) #define lblktosize(udfmp, blk) ((blk) << (udfmp)->bshift) static inline int is_data_in_fentry(const struct udf_node *node) { const struct file_entry *fentry = node->fentry; return ((le16toh(fentry->icbtag.flags) & 0x7) == 3); } static int udf_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct udf_node *node = VTON(vp); struct udf_mnt *udfmp; struct file_entry *fentry; struct buf *bp; uint8_t *data; daddr_t lbn, rablock; off_t diff, fsize; ssize_t n; int error = 0; long size, on; if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); if (is_data_in_fentry(node)) { fentry = node->fentry; data = &fentry->data[le32toh(fentry->l_ea)]; fsize = le32toh(fentry->l_ad); n = uio->uio_resid; diff = fsize - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; error = uiomove(data + uio->uio_offset, (int)n, uio); return (error); } fsize = le64toh(node->fentry->inf_len); udfmp = node->udfmp; do { lbn = lblkno(udfmp, uio->uio_offset); on = blkoff(udfmp, uio->uio_offset); n = min((u_int)(udfmp->bsize - on), uio->uio_resid); diff = fsize - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; size = udfmp->bsize; rablock = lbn + 1; if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { if (lblktosize(udfmp, rablock) < fsize) { error = cluster_read(vp, fsize, lbn, size, NOCRED, uio->uio_resid, (ap->a_ioflag >> 16), 0, &bp); } else { error = bread(vp, lbn, size, NOCRED, &bp); } } else { error = bread(vp, lbn, size, NOCRED, &bp); } if (error != 0) { brelse(bp); return (error); } n = min(n, size - bp->b_resid); error = uiomove(bp->b_data + on, (int)n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); } /* * Call the OSTA routines to translate the name from a CS0 dstring to a * 16-bit Unicode String. Hooks need to be placed in here to translate from * Unicode to the encoding that the kernel/user expects. Return the length * of the translated string. */ static int udf_transname(char *cs0string, char *destname, int len, struct udf_mnt *udfmp) { unicode_t *transname; char *unibuf, *unip; int i, destlen; ssize_t unilen = 0; size_t destleft = MAXNAMLEN; /* Convert 16-bit Unicode to destname */ if (udfmp->im_flags & UDFMNT_KICONV && udf_iconv) { /* allocate a buffer big enough to hold an 8->16 bit expansion */ unibuf = uma_zalloc(udf_zone_trans, M_WAITOK); unip = unibuf; if ((unilen = (ssize_t)udf_UncompressUnicodeByte(len, cs0string, unibuf)) == -1) { printf("udf: Unicode translation failed\n"); uma_zfree(udf_zone_trans, unibuf); return 0; } while (unilen > 0 && destleft > 0) { udf_iconv->conv(udfmp->im_d2l, __DECONST(const char **, &unibuf), (size_t *)&unilen, (char **)&destname, &destleft); /* Unconverted character found */ if (unilen > 0 && destleft > 0) { *destname++ = '?'; destleft--; unibuf += 2; unilen -= 2; } } uma_zfree(udf_zone_trans, unip); *destname = '\0'; destlen = MAXNAMLEN - (int)destleft; } else { /* allocate a buffer big enough to hold an 8->16 bit expansion */ transname = uma_zalloc(udf_zone_trans, M_WAITOK); if ((unilen = (ssize_t)udf_UncompressUnicode(len, cs0string, transname)) == -1) { printf("udf: Unicode translation failed\n"); uma_zfree(udf_zone_trans, transname); return 0; } for (i = 0; i < unilen ; i++) { if (transname[i] & 0xff00) { destname[i] = '.'; /* Fudge the 16bit chars */ } else { destname[i] = transname[i] & 0xff; } } uma_zfree(udf_zone_trans, transname); destname[unilen] = 0; destlen = (int)unilen; } return (destlen); } /* * Compare a CS0 dstring with a name passed in from the VFS layer. Return * 0 on a successful match, nonzero otherwise. Unicode work may need to be done * here also. */ static int udf_cmpname(char *cs0string, char *cmpname, int cs0len, int cmplen, struct udf_mnt *udfmp) { char *transname; int error = 0; /* This is overkill, but not worth creating a new zone */ transname = uma_zalloc(udf_zone_trans, M_WAITOK); cs0len = udf_transname(cs0string, transname, cs0len, udfmp); /* Easy check. If they aren't the same length, they aren't equal */ if ((cs0len == 0) || (cs0len != cmplen)) error = -1; else error = bcmp(transname, cmpname, cmplen); uma_zfree(udf_zone_trans, transname); return (error); } struct udf_uiodir { struct dirent *dirent; uint64_t *cookies; int ncookies; int acookies; int eofflag; }; static int udf_uiodir(struct udf_uiodir *uiodir, int de_size, struct uio *uio, long cookie) { if (uiodir->cookies != NULL) { if (++uiodir->acookies > uiodir->ncookies) { uiodir->eofflag = 0; return (-1); } *uiodir->cookies++ = cookie; } if (uio->uio_resid < de_size) { uiodir->eofflag = 0; return (-1); } return (uiomove(uiodir->dirent, de_size, uio)); } static struct udf_dirstream * udf_opendir(struct udf_node *node, int offset, int fsize, struct udf_mnt *udfmp) { struct udf_dirstream *ds; ds = uma_zalloc(udf_zone_ds, M_WAITOK | M_ZERO); ds->node = node; ds->offset = offset; ds->udfmp = udfmp; ds->fsize = fsize; return (ds); } static struct fileid_desc * udf_getfid(struct udf_dirstream *ds) { struct fileid_desc *fid; int error, frag_size = 0, total_fid_size; /* End of directory? */ if (ds->offset + ds->off >= ds->fsize) { ds->error = 0; return (NULL); } /* Grab the first extent of the directory */ if (ds->off == 0) { ds->size = 0; error = udf_readatoffset(ds->node, &ds->size, ds->offset, &ds->bp, &ds->data); if (error) { ds->error = error; if (ds->bp != NULL) brelse(ds->bp); return (NULL); } } /* * Clean up from a previous fragmented FID. * XXX Is this the right place for this? */ if (ds->fid_fragment && ds->buf != NULL) { ds->fid_fragment = 0; free(ds->buf, M_UDFFID); } fid = (struct fileid_desc*)&ds->data[ds->off]; /* * Check to see if the fid is fragmented. The first test * ensures that we don't wander off the end of the buffer * looking for the l_iu and l_fi fields. */ if (ds->off + UDF_FID_SIZE > ds->size || ds->off + le16toh(fid->l_iu) + fid->l_fi + UDF_FID_SIZE > ds->size){ /* Copy what we have of the fid into a buffer */ frag_size = ds->size - ds->off; if (frag_size >= ds->udfmp->bsize) { printf("udf: invalid FID fragment\n"); ds->error = EINVAL; return (NULL); } /* * File ID descriptors can only be at most one * logical sector in size. */ ds->buf = malloc(ds->udfmp->bsize, M_UDFFID, M_WAITOK | M_ZERO); bcopy(fid, ds->buf, frag_size); /* Reduce all of the casting magic */ fid = (struct fileid_desc*)ds->buf; if (ds->bp != NULL) brelse(ds->bp); /* Fetch the next allocation */ ds->offset += ds->size; ds->size = 0; error = udf_readatoffset(ds->node, &ds->size, ds->offset, &ds->bp, &ds->data); if (error) { ds->error = error; return (NULL); } /* * If the fragment was so small that we didn't get * the l_iu and l_fi fields, copy those in. */ if (frag_size < UDF_FID_SIZE) bcopy(ds->data, &ds->buf[frag_size], UDF_FID_SIZE - frag_size); /* * Now that we have enough of the fid to work with, * copy in the rest of the fid from the new * allocation. */ total_fid_size = UDF_FID_SIZE + le16toh(fid->l_iu) + fid->l_fi; if (total_fid_size > ds->udfmp->bsize) { printf("udf: invalid FID\n"); ds->error = EIO; return (NULL); } bcopy(ds->data, &ds->buf[frag_size], total_fid_size - frag_size); ds->fid_fragment = 1; } else { total_fid_size = le16toh(fid->l_iu) + fid->l_fi + UDF_FID_SIZE; } /* * Update the offset. Align on a 4 byte boundary because the * UDF spec says so. */ ds->this_off = ds->offset + ds->off; if (!ds->fid_fragment) { ds->off += (total_fid_size + 3) & ~0x03; } else { ds->off = (total_fid_size - frag_size + 3) & ~0x03; } return (fid); } static void udf_closedir(struct udf_dirstream *ds) { if (ds->bp != NULL) brelse(ds->bp); if (ds->fid_fragment && ds->buf != NULL) free(ds->buf, M_UDFFID); uma_zfree(udf_zone_ds, ds); } static int udf_readdir(struct vop_readdir_args *a) { struct vnode *vp; struct uio *uio; struct dirent dir; struct udf_node *node; struct udf_mnt *udfmp; struct fileid_desc *fid; struct udf_uiodir uiodir; struct udf_dirstream *ds; uint64_t *cookies = NULL; int ncookies; int error = 0; vp = a->a_vp; uio = a->a_uio; node = VTON(vp); udfmp = node->udfmp; uiodir.eofflag = 1; if (a->a_ncookies != NULL) { /* * Guess how many entries are needed. If we run out, this * function will be called again and thing will pick up were * it left off. */ ncookies = uio->uio_resid / 8; cookies = malloc(sizeof(*cookies) * ncookies, M_TEMP, M_WAITOK); uiodir.ncookies = ncookies; uiodir.cookies = cookies; uiodir.acookies = 0; } else { uiodir.cookies = NULL; } /* * Iterate through the file id descriptors. Give the parent dir * entry special attention. */ ds = udf_opendir(node, uio->uio_offset, le64toh(node->fentry->inf_len), node->udfmp); while ((fid = udf_getfid(ds)) != NULL) { /* XXX Should we return an error on a bad fid? */ if (udf_checktag(&fid->tag, TAGID_FID)) { printf("Invalid FID tag\n"); hexdump(fid, UDF_FID_SIZE, NULL, 0); error = EIO; break; } /* Is this a deleted file? */ if (fid->file_char & UDF_FILE_CHAR_DEL) continue; if ((fid->l_fi == 0) && (fid->file_char & UDF_FILE_CHAR_PAR)) { /* Do up the '.' and '..' entries. Dummy values are * used for the cookies since the offset here is * usually zero, and NFS doesn't like that value */ dir.d_fileno = node->hash_id; dir.d_type = DT_DIR; dir.d_name[0] = '.'; dir.d_namlen = 1; dir.d_reclen = GENERIC_DIRSIZ(&dir); dir.d_off = 1; dirent_terminate(&dir); uiodir.dirent = &dir; error = udf_uiodir(&uiodir, dir.d_reclen, uio, 1); if (error) break; dir.d_fileno = udf_getid(&fid->icb); dir.d_type = DT_DIR; dir.d_name[0] = '.'; dir.d_name[1] = '.'; dir.d_namlen = 2; dir.d_reclen = GENERIC_DIRSIZ(&dir); dir.d_off = 2; dirent_terminate(&dir); uiodir.dirent = &dir; error = udf_uiodir(&uiodir, dir.d_reclen, uio, 2); } else { dir.d_namlen = udf_transname(&fid->data[fid->l_iu], &dir.d_name[0], fid->l_fi, udfmp); dir.d_fileno = udf_getid(&fid->icb); dir.d_type = (fid->file_char & UDF_FILE_CHAR_DIR) ? DT_DIR : DT_UNKNOWN; dir.d_reclen = GENERIC_DIRSIZ(&dir); dir.d_off = ds->this_off; dirent_terminate(&dir); uiodir.dirent = &dir; error = udf_uiodir(&uiodir, dir.d_reclen, uio, ds->this_off); } if (error) break; uio->uio_offset = ds->offset + ds->off; } /* tell the calling layer whether we need to be called again */ *a->a_eofflag = uiodir.eofflag; if (error < 0) error = 0; if (!error) error = ds->error; udf_closedir(ds); if (a->a_ncookies != NULL) { if (error) free(cookies, M_TEMP); else { *a->a_ncookies = uiodir.acookies; *a->a_cookies = cookies; } } return (error); } static int udf_readlink(struct vop_readlink_args *ap) { struct path_component *pc, *end; struct vnode *vp; struct uio uio; struct iovec iov[1]; struct udf_node *node; void *buf; char *cp; int error, len, root; /* * A symbolic link in UDF is a list of variable-length path * component structures. We build a pathname in the caller's * uio by traversing this list. */ vp = ap->a_vp; node = VTON(vp); len = le64toh(node->fentry->inf_len); buf = malloc(len, M_DEVBUF, M_WAITOK); iov[0].iov_len = len; iov[0].iov_base = buf; uio.uio_iov = iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = iov[0].iov_len; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = curthread; error = VOP_READ(vp, &uio, 0, ap->a_cred); if (error) goto error; pc = buf; end = (void *)((char *)buf + len); root = 0; while (pc < end) { switch (pc->type) { case UDF_PATH_ROOT: /* Only allow this at the beginning of a path. */ if ((void *)pc != buf) { error = EINVAL; goto error; } cp = "/"; len = 1; root = 1; break; case UDF_PATH_DOT: cp = "."; len = 1; break; case UDF_PATH_DOTDOT: cp = ".."; len = 2; break; case UDF_PATH_PATH: if (pc->length == 0) { error = EINVAL; goto error; } /* * XXX: We only support CS8 which appears to map * to ASCII directly. */ switch (pc->identifier[0]) { case 8: cp = pc->identifier + 1; len = pc->length - 1; break; default: error = EOPNOTSUPP; goto error; } break; default: error = EINVAL; goto error; } /* * If this is not the first component, insert a path * separator. */ if (pc != buf) { /* If we started with root we already have a "/". */ if (root) goto skipslash; root = 0; if (ap->a_uio->uio_resid < 1) { error = ENAMETOOLONG; goto error; } error = uiomove("/", 1, ap->a_uio); if (error) break; } skipslash: /* Append string at 'cp' of length 'len' to our path. */ if (len > ap->a_uio->uio_resid) { error = ENAMETOOLONG; goto error; } error = uiomove(cp, len, ap->a_uio); if (error) break; /* Advance to next component. */ pc = (void *)((char *)pc + 4 + pc->length); } error: free(buf, M_DEVBUF); return (error); } static int udf_strategy(struct vop_strategy_args *a) { struct buf *bp; struct vnode *vp; struct udf_node *node; struct bufobj *bo; off_t offset; uint32_t maxsize; daddr_t sector; int error; bp = a->a_bp; vp = a->a_vp; node = VTON(vp); if (bp->b_blkno == bp->b_lblkno) { offset = lblktosize(node->udfmp, bp->b_lblkno); error = udf_bmap_internal(node, offset, §or, &maxsize); if (error) { clrbuf(bp); bp->b_blkno = -1; bufdone(bp); return (0); } /* bmap gives sector numbers, bio works with device blocks */ bp->b_blkno = sector << (node->udfmp->bshift - DEV_BSHIFT); } bo = node->udfmp->im_bo; bp->b_iooffset = dbtob(bp->b_blkno); BO_STRATEGY(bo, bp); return (0); } static int udf_bmap(struct vop_bmap_args *a) { struct udf_node *node; uint32_t max_size; daddr_t lsector; int nblk; int error; node = VTON(a->a_vp); if (a->a_bop != NULL) *a->a_bop = &node->udfmp->im_devvp->v_bufobj; if (a->a_bnp == NULL) return (0); if (a->a_runb) *a->a_runb = 0; /* * UDF_INVALID_BMAP means data embedded into fentry, this is an internal * error that should not be propagated to calling code. * Most obvious mapping for this error is EOPNOTSUPP as we can not truly * translate block numbers in this case. * Incidentally, this return code will make vnode pager to use VOP_READ * to get data for mmap-ed pages and udf_read knows how to do the right * thing for this kind of files. */ error = udf_bmap_internal(node, a->a_bn << node->udfmp->bshift, &lsector, &max_size); if (error == UDF_INVALID_BMAP) return (EOPNOTSUPP); if (error) return (error); /* Translate logical to physical sector number */ *a->a_bnp = lsector << (node->udfmp->bshift - DEV_BSHIFT); /* * Determine maximum number of readahead blocks following the * requested block. */ if (a->a_runp) { nblk = (max_size >> node->udfmp->bshift) - 1; if (nblk <= 0) *a->a_runp = 0; else if (nblk >= (MAXBSIZE >> node->udfmp->bshift)) *a->a_runp = (MAXBSIZE >> node->udfmp->bshift) - 1; else *a->a_runp = nblk; } if (a->a_runb) { *a->a_runb = 0; } return (0); } /* * The all powerful VOP_LOOKUP(). */ static int udf_lookup(struct vop_cachedlookup_args *a) { struct vnode *dvp; struct vnode *tdp = NULL; struct vnode **vpp = a->a_vpp; struct udf_node *node; struct udf_mnt *udfmp; struct fileid_desc *fid = NULL; struct udf_dirstream *ds; u_long nameiop; u_long flags; char *nameptr; long namelen; ino_t id = 0; int offset, error = 0; int fsize, lkflags, ltype, numdirpasses; dvp = a->a_dvp; node = VTON(dvp); udfmp = node->udfmp; nameiop = a->a_cnp->cn_nameiop; flags = a->a_cnp->cn_flags; lkflags = a->a_cnp->cn_lkflags; nameptr = a->a_cnp->cn_nameptr; namelen = a->a_cnp->cn_namelen; fsize = le64toh(node->fentry->inf_len); /* * If this is a LOOKUP and we've already partially searched through * the directory, pick up where we left off and flag that the * directory may need to be searched twice. For a full description, * see /sys/fs/cd9660/cd9660_lookup.c:cd9660_lookup() */ if (nameiop != LOOKUP || node->diroff == 0 || node->diroff > fsize) { offset = 0; numdirpasses = 1; } else { offset = node->diroff; numdirpasses = 2; nchstats.ncs_2passes++; } lookloop: ds = udf_opendir(node, offset, fsize, udfmp); while ((fid = udf_getfid(ds)) != NULL) { /* XXX Should we return an error on a bad fid? */ if (udf_checktag(&fid->tag, TAGID_FID)) { printf("udf_lookup: Invalid tag\n"); error = EIO; break; } /* Is this a deleted file? */ if (fid->file_char & UDF_FILE_CHAR_DEL) continue; if ((fid->l_fi == 0) && (fid->file_char & UDF_FILE_CHAR_PAR)) { if (flags & ISDOTDOT) { id = udf_getid(&fid->icb); break; } } else { if (!(udf_cmpname(&fid->data[fid->l_iu], nameptr, fid->l_fi, namelen, udfmp))) { id = udf_getid(&fid->icb); break; } } } if (!error) error = ds->error; /* XXX Bail out here? */ if (error) { udf_closedir(ds); return (error); } /* Did we have a match? */ if (id) { /* * Remember where this entry was if it's the final * component. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) node->diroff = ds->offset + ds->off; if (numdirpasses == 2) nchstats.ncs_pass2++; udf_closedir(ds); if (flags & ISDOTDOT) { error = vn_vget_ino(dvp, id, lkflags, &tdp); } else if (node->hash_id == id) { VREF(dvp); /* we want ourself, ie "." */ /* * When we lookup "." we still can be asked to lock it * differently. */ ltype = lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(dvp)) { if (ltype == LK_EXCLUSIVE) vn_lock(dvp, LK_UPGRADE | LK_RETRY); else /* if (ltype == LK_SHARED) */ vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); } tdp = dvp; } else error = udf_vget(udfmp->im_mountp, id, lkflags, &tdp); if (!error) { *vpp = tdp; /* Put this entry in the cache */ if (flags & MAKEENTRY) cache_enter(dvp, *vpp, a->a_cnp); } } else { /* Name wasn't found on this pass. Do another pass? */ if (numdirpasses == 2) { numdirpasses--; offset = 0; udf_closedir(ds); goto lookloop; } udf_closedir(ds); /* Enter name into cache as non-existant */ if (flags & MAKEENTRY) cache_enter(dvp, *vpp, a->a_cnp); if ((flags & ISLASTCN) && (nameiop == CREATE || nameiop == RENAME)) { error = EROFS; } else { error = ENOENT; } } return (error); } static int udf_reclaim(struct vop_reclaim_args *a) { struct vnode *vp; struct udf_node *unode; vp = a->a_vp; unode = VTON(vp); if (unode != NULL) { vfs_hash_remove(vp); if (unode->fentry != NULL) free(unode->fentry, M_UDFFENTRY); uma_zfree(udf_zone_node, unode); vp->v_data = NULL; } return (0); } static int udf_vptofh(struct vop_vptofh_args *a) { struct udf_node *node; struct ifid *ifhp; + _Static_assert(sizeof(struct ifid) <= sizeof(struct fid), + "struct ifid cannot be larger than struct fid"); node = VTON(a->a_vp); ifhp = (struct ifid *)a->a_fhp; ifhp->ifid_len = sizeof(struct ifid); ifhp->ifid_ino = node->hash_id; return (0); } /* * Read the block and then set the data pointer to correspond with the * offset passed in. Only read in at most 'size' bytes, and then set 'size' * to the number of bytes pointed to. If 'size' is zero, try to read in a * whole extent. * * Note that *bp may be assigned error or not. * */ static int udf_readatoffset(struct udf_node *node, int *size, off_t offset, struct buf **bp, uint8_t **data) { struct udf_mnt *udfmp = node->udfmp; struct vnode *vp = node->i_vnode; struct file_entry *fentry; struct buf *bp1; uint32_t max_size; daddr_t sector; off_t off; int adj_size; int error; /* * This call is made *not* only to detect UDF_INVALID_BMAP case, * max_size is used as an ad-hoc read-ahead hint for "normal" case. */ error = udf_bmap_internal(node, offset, §or, &max_size); if (error == UDF_INVALID_BMAP) { /* * This error means that the file *data* is stored in the * allocation descriptor field of the file entry. */ fentry = node->fentry; *data = &fentry->data[le32toh(fentry->l_ea)]; *size = le32toh(fentry->l_ad); if (offset >= *size) *size = 0; else { *data += offset; *size -= offset; } return (0); } else if (error != 0) { return (error); } /* Adjust the size so that it is within range */ if (*size == 0 || *size > max_size) *size = max_size; /* * Because we will read starting at block boundary, we need to adjust * how much we need to read so that all promised data is in. * Also, we can't promise to read more than MAXBSIZE bytes starting * from block boundary, so adjust what we promise too. */ off = blkoff(udfmp, offset); *size = min(*size, MAXBSIZE - off); adj_size = (*size + off + udfmp->bmask) & ~udfmp->bmask; *bp = NULL; if ((error = bread(vp, lblkno(udfmp, offset), adj_size, NOCRED, bp))) { printf("warning: udf_readlblks returned error %d\n", error); /* note: *bp may be non-NULL */ return (error); } bp1 = *bp; *data = (uint8_t *)&bp1->b_data[offset & udfmp->bmask]; return (0); } /* * Translate a file offset into a logical block and then into a physical * block. * max_size - maximum number of bytes that can be read starting from given * offset, rather than beginning of calculated sector number */ static int udf_bmap_internal(struct udf_node *node, off_t offset, daddr_t *sector, uint32_t *max_size) { struct udf_mnt *udfmp; struct file_entry *fentry; void *icb; struct icb_tag *tag; uint32_t icblen = 0; daddr_t lsector; int ad_offset, ad_num = 0; int i, p_offset; udfmp = node->udfmp; fentry = node->fentry; tag = &fentry->icbtag; switch (le16toh(tag->strat_type)) { case 4: break; case 4096: printf("Cannot deal with strategy4096 yet!\n"); return (ENODEV); default: printf("Unknown strategy type %d\n", tag->strat_type); return (ENODEV); } switch (le16toh(tag->flags) & 0x7) { case 0: /* * The allocation descriptor field is filled with short_ad's. * If the offset is beyond the current extent, look for the * next extent. */ do { offset -= icblen; ad_offset = sizeof(struct short_ad) * ad_num; if (ad_offset > le32toh(fentry->l_ad)) { printf("File offset out of bounds\n"); return (EINVAL); } icb = GETICB(short_ad, fentry, le32toh(fentry->l_ea) + ad_offset); icblen = GETICBLEN(short_ad, icb); ad_num++; } while(offset >= icblen); lsector = (offset >> udfmp->bshift) + le32toh(((struct short_ad *)(icb))->pos); *max_size = icblen - offset; break; case 1: /* * The allocation descriptor field is filled with long_ad's * If the offset is beyond the current extent, look for the * next extent. */ do { offset -= icblen; ad_offset = sizeof(struct long_ad) * ad_num; if (ad_offset > le32toh(fentry->l_ad)) { printf("File offset out of bounds\n"); return (EINVAL); } icb = GETICB(long_ad, fentry, le32toh(fentry->l_ea) + ad_offset); icblen = GETICBLEN(long_ad, icb); ad_num++; } while(offset >= icblen); lsector = (offset >> udfmp->bshift) + le32toh(((struct long_ad *)(icb))->loc.lb_num); *max_size = icblen - offset; break; case 3: /* * This type means that the file *data* is stored in the * allocation descriptor field of the file entry. */ *max_size = 0; *sector = node->hash_id + udfmp->part_start; return (UDF_INVALID_BMAP); case 2: /* DirectCD does not use extended_ad's */ default: printf("Unsupported allocation descriptor %d\n", tag->flags & 0x7); return (ENODEV); } *sector = lsector + udfmp->part_start; /* * Check the sparing table. Each entry represents the beginning of * a packet. */ if (udfmp->s_table != NULL) { for (i = 0; i< udfmp->s_table_entries; i++) { p_offset = lsector - le32toh(udfmp->s_table->entries[i].org); if ((p_offset < udfmp->p_sectors) && (p_offset >= 0)) { *sector = le32toh(udfmp->s_table->entries[i].map) + p_offset; break; } } } return (0); } diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index e9849008cde2..6e83741975fd 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,2102 +1,2104 @@ /*- * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause) * * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... */ #include #include "opt_directio.h" #include "opt_ffs.h" #include "opt_ufs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #include #include #define ALIGNED_TO(ptr, s) \ (((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0) #ifdef DIRECTIO extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); #endif static vop_fdatasync_t ffs_fdatasync; static vop_fsync_t ffs_fsync; static vop_getpages_t ffs_getpages; static vop_getpages_async_t ffs_getpages_async; static vop_lock1_t ffs_lock; #ifdef INVARIANTS static vop_unlock_t ffs_unlock_debug; #endif static vop_read_t ffs_read; static vop_write_t ffs_write; static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred); static vop_strategy_t ffsext_strategy; static vop_closeextattr_t ffs_closeextattr; static vop_deleteextattr_t ffs_deleteextattr; static vop_getextattr_t ffs_getextattr; static vop_listextattr_t ffs_listextattr; static vop_openextattr_t ffs_openextattr; static vop_setextattr_t ffs_setextattr; static vop_vptofh_t ffs_vptofh; static vop_vput_pair_t ffs_vput_pair; vop_fplookup_vexec_t ufs_fplookup_vexec; /* Global vfs data structures for ufs. */ struct vop_vector ffs_vnodeops1 = { .vop_default = &ufs_vnodeops, .vop_fsync = ffs_fsync, .vop_fdatasync = ffs_fdatasync, .vop_getpages = ffs_getpages, .vop_getpages_async = ffs_getpages_async, .vop_lock1 = ffs_lock, #ifdef INVARIANTS .vop_unlock = ffs_unlock_debug, #endif .vop_read = ffs_read, .vop_reallocblks = ffs_reallocblks, .vop_write = ffs_write, .vop_vptofh = ffs_vptofh, .vop_vput_pair = ffs_vput_pair, .vop_fplookup_vexec = ufs_fplookup_vexec, .vop_fplookup_symlink = VOP_EAGAIN, }; VFS_VOP_VECTOR_REGISTER(ffs_vnodeops1); struct vop_vector ffs_fifoops1 = { .vop_default = &ufs_fifoops, .vop_fsync = ffs_fsync, .vop_fdatasync = ffs_fdatasync, .vop_lock1 = ffs_lock, #ifdef INVARIANTS .vop_unlock = ffs_unlock_debug, #endif .vop_vptofh = ffs_vptofh, .vop_fplookup_vexec = VOP_EAGAIN, .vop_fplookup_symlink = VOP_EAGAIN, }; VFS_VOP_VECTOR_REGISTER(ffs_fifoops1); /* Global vfs data structures for ufs. */ struct vop_vector ffs_vnodeops2 = { .vop_default = &ufs_vnodeops, .vop_fsync = ffs_fsync, .vop_fdatasync = ffs_fdatasync, .vop_getpages = ffs_getpages, .vop_getpages_async = ffs_getpages_async, .vop_lock1 = ffs_lock, #ifdef INVARIANTS .vop_unlock = ffs_unlock_debug, #endif .vop_read = ffs_read, .vop_reallocblks = ffs_reallocblks, .vop_write = ffs_write, .vop_closeextattr = ffs_closeextattr, .vop_deleteextattr = ffs_deleteextattr, .vop_getextattr = ffs_getextattr, .vop_listextattr = ffs_listextattr, .vop_openextattr = ffs_openextattr, .vop_setextattr = ffs_setextattr, .vop_vptofh = ffs_vptofh, .vop_vput_pair = ffs_vput_pair, .vop_fplookup_vexec = ufs_fplookup_vexec, .vop_fplookup_symlink = VOP_EAGAIN, }; VFS_VOP_VECTOR_REGISTER(ffs_vnodeops2); struct vop_vector ffs_fifoops2 = { .vop_default = &ufs_fifoops, .vop_fsync = ffs_fsync, .vop_fdatasync = ffs_fdatasync, .vop_lock1 = ffs_lock, #ifdef INVARIANTS .vop_unlock = ffs_unlock_debug, #endif .vop_reallocblks = ffs_reallocblks, .vop_strategy = ffsext_strategy, .vop_closeextattr = ffs_closeextattr, .vop_deleteextattr = ffs_deleteextattr, .vop_getextattr = ffs_getextattr, .vop_listextattr = ffs_listextattr, .vop_openextattr = ffs_openextattr, .vop_setextattr = ffs_setextattr, .vop_vptofh = ffs_vptofh, .vop_fplookup_vexec = VOP_EAGAIN, .vop_fplookup_symlink = VOP_EAGAIN, }; VFS_VOP_VECTOR_REGISTER(ffs_fifoops2); /* * Synch an open file. */ /* ARGSUSED */ static int ffs_fsync(struct vop_fsync_args *ap) { struct vnode *vp; struct bufobj *bo; int error; vp = ap->a_vp; bo = &vp->v_bufobj; retry: error = ffs_syncvnode(vp, ap->a_waitfor, 0); if (error) return (error); if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) { error = softdep_fsync(vp); if (error) return (error); /* * The softdep_fsync() function may drop vp lock, * allowing for dirty buffers to reappear on the * bo_dirty list. Recheck and resync as needed. */ BO_LOCK(bo); if ((vp->v_type == VREG || vp->v_type == VDIR) && (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) { BO_UNLOCK(bo); goto retry; } BO_UNLOCK(bo); } if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0)) return (ENXIO); return (0); } int ffs_syncvnode(struct vnode *vp, int waitfor, int flags) { struct inode *ip; struct bufobj *bo; struct ufsmount *ump; struct buf *bp, *nbp; ufs_lbn_t lbn; int error, passes, wflag; bool still_dirty, unlocked, wait; ip = VTOI(vp); bo = &vp->v_bufobj; ump = VFSTOUFS(vp->v_mount); #ifdef WITNESS wflag = IS_SNAPSHOT(ip) ? LK_NOWITNESS : 0; #else wflag = 0; #endif /* * When doing MNT_WAIT we must first flush all dependencies * on the inode. */ if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT && (error = softdep_sync_metadata(vp)) != 0) { if (ffs_fsfail_cleanup(ump, error)) error = 0; return (error); } /* * Flush all dirty buffers associated with a vnode. */ error = 0; passes = 0; wait = false; /* Always do an async pass first. */ unlocked = false; lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1)); BO_LOCK(bo); loop: TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) bp->b_vflags &= ~BV_SCANNED; TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { /* * Reasons to skip this buffer: it has already been considered * on this pass, the buffer has dependencies that will cause * it to be redirtied and it has not already been deferred, * or it is already being written. */ if ((bp->b_vflags & BV_SCANNED) != 0) continue; bp->b_vflags |= BV_SCANNED; /* * Flush indirects in order, if requested. * * Note that if only datasync is requested, we can * skip indirect blocks when softupdates are not * active. Otherwise we must flush them with data, * since dependencies prevent data block writes. */ if (waitfor == MNT_WAIT && bp->b_lblkno <= -UFS_NDADDR && (lbn_level(bp->b_lblkno) >= passes || ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp)))) continue; if (bp->b_lblkno > lbn) panic("ffs_syncvnode: syncing truncated data."); if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) { BO_UNLOCK(bo); } else if (wait) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK | wflag, BO_LOCKPTR(bo)) != 0) { BO_LOCK(bo); bp->b_vflags &= ~BV_SCANNED; goto next_locked; } } else continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); /* * Check for dependencies and potentially complete them. */ if (!LIST_EMPTY(&bp->b_dep) && (error = softdep_sync_buf(vp, bp, wait ? MNT_WAIT : MNT_NOWAIT)) != 0) { /* * Lock order conflict, buffer was already unlocked, * and vnode possibly unlocked. */ if (error == ERELOOKUP) { if (vp->v_data == NULL) return (EBADF); unlocked = true; if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT && (error = softdep_sync_metadata(vp)) != 0) { if (ffs_fsfail_cleanup(ump, error)) error = 0; return (unlocked && error == 0 ? ERELOOKUP : error); } /* Re-evaluate inode size */ lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1)); goto next; } /* I/O error. */ if (error != EBUSY) { BUF_UNLOCK(bp); return (error); } /* If we deferred once, don't defer again. */ if ((bp->b_flags & B_DEFERRED) == 0) { bp->b_flags |= B_DEFERRED; BUF_UNLOCK(bp); goto next; } } if (wait) { bremfree(bp); error = bwrite(bp); if (ffs_fsfail_cleanup(ump, error)) error = 0; if (error != 0) return (error); } else if ((bp->b_flags & B_CLUSTEROK)) { (void) vfs_bio_awrite(bp); } else { bremfree(bp); (void) bawrite(bp); } next: /* * Since we may have slept during the I/O, we need * to start from a known point. */ BO_LOCK(bo); next_locked: nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); } if (waitfor != MNT_WAIT) { BO_UNLOCK(bo); if ((flags & NO_INO_UPDT) != 0) return (unlocked ? ERELOOKUP : 0); error = ffs_update(vp, 0); if (error == 0 && unlocked) error = ERELOOKUP; return (error); } /* Drain IO to see if we're done. */ bufobj_wwait(bo, 0, 0); /* * Block devices associated with filesystems may have new I/O * requests posted for them even if the vnode is locked, so no * amount of trying will get them clean. We make several passes * as a best effort. * * Regular files may need multiple passes to flush all dependency * work as it is possible that we must write once per indirect * level, once for the leaf, and once for the inode and each of * these will be done with one sync and one async pass. */ if (bo->bo_dirty.bv_cnt > 0) { if ((flags & DATA_ONLY) == 0) { still_dirty = true; } else { /* * For data-only sync, dirty indirect buffers * are ignored. */ still_dirty = false; TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { if (bp->b_lblkno > -UFS_NDADDR) { still_dirty = true; break; } } } if (still_dirty) { /* Write the inode after sync passes to flush deps. */ if (wait && DOINGSOFTDEP(vp) && (flags & NO_INO_UPDT) == 0) { BO_UNLOCK(bo); ffs_update(vp, 1); BO_LOCK(bo); } /* switch between sync/async. */ wait = !wait; if (wait || ++passes < UFS_NIADDR + 2) goto loop; } } BO_UNLOCK(bo); error = 0; if ((flags & DATA_ONLY) == 0) { if ((flags & NO_INO_UPDT) == 0) error = ffs_update(vp, 1); if (DOINGSUJ(vp)) softdep_journal_fsync(VTOI(vp)); } else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) { error = ffs_update(vp, 1); } if (error == 0 && unlocked) error = ERELOOKUP; if (error == 0) ip->i_flag &= ~IN_NEEDSYNC; return (error); } static int ffs_fdatasync(struct vop_fdatasync_args *ap) { return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY)); } static int ffs_lock( struct vop_lock1_args /* { struct vnode *a_vp; int a_flags; char *file; int line; } */ *ap) { #if !defined(NO_FFS_SNAPSHOT) || defined(DIAGNOSTIC) struct vnode *vp = ap->a_vp; #endif /* !NO_FFS_SNAPSHOT || DIAGNOSTIC */ #ifdef DIAGNOSTIC struct inode *ip; #endif /* DIAGNOSTIC */ int result; #ifndef NO_FFS_SNAPSHOT int flags; struct lock *lkp; /* * Adaptive spinning mixed with SU leads to trouble. use a giant hammer * and only use it when LK_NODDLKTREAT is set. Currently this means it * is only used during path lookup. */ if ((ap->a_flags & LK_NODDLKTREAT) != 0) ap->a_flags |= LK_ADAPTIVE; switch (ap->a_flags & LK_TYPE_MASK) { case LK_SHARED: case LK_UPGRADE: case LK_EXCLUSIVE: flags = ap->a_flags; for (;;) { #ifdef DEBUG_VFS_LOCKS VNPASS(vp->v_holdcnt != 0, vp); #endif /* DEBUG_VFS_LOCKS */ lkp = vp->v_vnlock; result = lockmgr_lock_flags(lkp, flags, &VI_MTX(vp)->lock_object, ap->a_file, ap->a_line); if (lkp == vp->v_vnlock || result != 0) break; /* * Apparent success, except that the vnode * mutated between snapshot file vnode and * regular file vnode while this process * slept. The lock currently held is not the * right lock. Release it, and try to get the * new lock. */ lockmgr_unlock(lkp); if ((flags & (LK_INTERLOCK | LK_NOWAIT)) == (LK_INTERLOCK | LK_NOWAIT)) return (EBUSY); if ((flags & LK_TYPE_MASK) == LK_UPGRADE) flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE; flags &= ~LK_INTERLOCK; } #ifdef DIAGNOSTIC switch (ap->a_flags & LK_TYPE_MASK) { case LK_UPGRADE: case LK_EXCLUSIVE: if (result == 0 && vp->v_vnlock->lk_recurse == 0) { ip = VTOI(vp); if (ip != NULL) ip->i_lock_gen++; } } #endif /* DIAGNOSTIC */ break; default: #ifdef DIAGNOSTIC if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) { ip = VTOI(vp); if (ip != NULL) ufs_unlock_tracker(ip); } #endif /* DIAGNOSTIC */ result = VOP_LOCK1_APV(&ufs_vnodeops, ap); break; } #else /* NO_FFS_SNAPSHOT */ /* * See above for an explanation. */ if ((ap->a_flags & LK_NODDLKTREAT) != 0) ap->a_flags |= LK_ADAPTIVE; #ifdef DIAGNOSTIC if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) { ip = VTOI(vp); if (ip != NULL) ufs_unlock_tracker(ip); } #endif /* DIAGNOSTIC */ result = VOP_LOCK1_APV(&ufs_vnodeops, ap); #endif /* NO_FFS_SNAPSHOT */ #ifdef DIAGNOSTIC switch (ap->a_flags & LK_TYPE_MASK) { case LK_UPGRADE: case LK_EXCLUSIVE: if (result == 0 && vp->v_vnlock->lk_recurse == 0) { ip = VTOI(vp); if (ip != NULL) ip->i_lock_gen++; } } #endif /* DIAGNOSTIC */ return (result); } #ifdef INVARIANTS static int ffs_unlock_debug(struct vop_unlock_args *ap) { struct vnode *vp; struct inode *ip; vp = ap->a_vp; ip = VTOI(vp); if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) { if ((vp->v_mflag & VMP_LAZYLIST) == 0) { VI_LOCK(vp); VNASSERT((vp->v_mflag & VMP_LAZYLIST), vp, ("%s: modified vnode (%x) not on lazy list", __func__, ip->i_flag)); VI_UNLOCK(vp); } } KASSERT(vp->v_type != VDIR || vp->v_vnlock->lk_recurse != 0 || (ip->i_flag & IN_ENDOFF) == 0, ("ufs dir vp %p ip %p flags %#x", vp, ip, ip->i_flag)); #ifdef DIAGNOSTIC if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && ip != NULL && vp->v_vnlock->lk_recurse == 0) ufs_unlock_tracker(ip); #endif return (VOP_UNLOCK_APV(&ufs_vnodeops, ap)); } #endif static int ffs_read_hole(struct uio *uio, long xfersize, long *size) { ssize_t saved_resid, tlen; int error; while (xfersize > 0) { tlen = min(xfersize, ZERO_REGION_SIZE); saved_resid = uio->uio_resid; error = vn_io_fault_uiomove(__DECONST(void *, zero_region), tlen, uio); if (error != 0) return (error); tlen = saved_resid - uio->uio_resid; xfersize -= tlen; *size -= tlen; } return (0); } /* * Vnode op for reading. */ static int ffs_read( struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap) { struct vnode *vp; struct inode *ip; struct uio *uio; struct fs *fs; struct buf *bp; ufs_lbn_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; ssize_t orig_resid; int bflag, error, ioflag, seqcount; vp = ap->a_vp; uio = ap->a_uio; ioflag = ap->a_ioflag; if (ap->a_ioflag & IO_EXT) #ifdef notyet return (ffs_extread(vp, uio, ioflag)); #else panic("ffs_read+IO_EXT"); #endif #ifdef DIRECTIO if ((ioflag & IO_DIRECT) != 0) { int workdone; error = ffs_rawread(vp, uio, &workdone); if (error != 0 || workdone != 0) return error; } #endif seqcount = ap->a_ioflag >> IO_SEQSHIFT; ip = VTOI(vp); #ifdef INVARIANTS if (uio->uio_rw != UIO_READ) panic("ffs_read: mode"); if (vp->v_type == VLNK) { if ((int)ip->i_size < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) panic("ffs_read: short symlink"); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("ffs_read: type %d", vp->v_type); #endif orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); fs = ITOFS(ip); if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->fs_maxfilesize) return (EOVERFLOW); bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE); #ifdef WITNESS bflag |= IS_SNAPSHOT(ip) ? GB_NOWITNESS : 0; #endif for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; /* * size of buffer. The buffer representing the * end of the file is rounded up to the size of * the block type ( fragment or full block, * depending ). */ size = blksize(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); /* * The amount we want to transfer in this iteration is * one FS block less the amount of the data before * our startpoint (duh!) */ xfersize = fs->fs_bsize - blkoffset; /* * But if we actually want less than the block, * or the file doesn't have a whole block more of data, * then use the lesser number. */ if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ip->i_size) { /* * Don't do readahead if this is the end of the file. */ error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Otherwise if we are allowed to cluster, * grab as much as we can. * * XXX This may not be a win if we are not * doing sequential access. */ error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, blkoffset + uio->uio_resid, seqcount, bflag, &bp); } else if (seqcount > 1) { /* * If we are NOT allowed to cluster, then * if we appear to be acting sequentially, * fire off a request for a readahead * as well as a read. Note that the 4th and 5th * arguments point to arrays of the size specified in * the 6th argument. */ int nextsize = blksize(fs, ip, nextlbn); error = breadn_flags(vp, lbn, lbn, size, &nextlbn, &nextsize, 1, NOCRED, bflag, NULL, &bp); } else { /* * Failing all of the above, just read what the * user asked for. Interestingly, the same as * the first option above. */ error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp); } if (error == EJUSTRETURN) { error = ffs_read_hole(uio, xfersize, &size); if (error == 0) continue; } if (error != 0) { brelse(bp); bp = NULL; break; } /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } if (buf_mapped(bp)) { error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); } else { error = vn_io_fault_pgmove(bp->b_pages, blkoffset + (bp->b_offset & PAGE_MASK), (int)xfersize, uio); } if (error) break; vfs_bio_brelse(bp, ioflag); } /* * This can only happen in the case of an error * because the loop above resets bp to NULL on each iteration * and on normal completion has not set a new value into it. * so it must have come from a 'break' statement */ if (bp != NULL) vfs_bio_brelse(bp, ioflag); if ((error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS); return (error); } /* * Vnode op for writing. */ static int ffs_write( struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap) { struct vnode *vp; struct uio *uio; struct inode *ip; struct fs *fs; struct buf *bp; ufs_lbn_t lbn; off_t osize; ssize_t resid, r; int seqcount; int blkoffset, error, flags, ioflag, size, xfersize; vp = ap->a_vp; if (DOINGSUJ(vp)) softdep_prealloc(vp, MNT_WAIT); if (vp->v_data == NULL) return (EBADF); uio = ap->a_uio; ioflag = ap->a_ioflag; if (ap->a_ioflag & IO_EXT) #ifdef notyet return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); #else panic("ffs_write+IO_EXT"); #endif seqcount = ap->a_ioflag >> IO_SEQSHIFT; ip = VTOI(vp); #ifdef INVARIANTS if (uio->uio_rw != UIO_WRITE) panic("ffs_write: mode"); #endif switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = ip->i_size; if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) return (EPERM); /* FALLTHROUGH */ case VLNK: break; case VDIR: panic("ffs_write: dir write"); break; default: panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, (int)uio->uio_offset, (int)uio->uio_resid ); } KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); fs = ITOFS(ip); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ error = vn_rlimit_fsizex(vp, uio, fs->fs_maxfilesize, &r, uio->uio_td); if (error != 0) { vn_rlimit_fsizex_res(uio, r); return (error); } resid = uio->uio_resid; osize = ip->i_size; if (seqcount > BA_SEQMAX) flags = BA_SEQMAX << BA_SEQSHIFT; else flags = seqcount << BA_SEQSHIFT; if (ioflag & IO_SYNC) flags |= IO_SYNC; flags |= BA_UNMAPPED; for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (uio->uio_offset + xfersize > ip->i_size) vnode_pager_setsize(vp, uio->uio_offset + xfersize); /* * We must perform a read-before-write if the transfer size * does not cover the entire buffer. */ if (fs->fs_bsize > xfersize) flags |= BA_CLRBUF; else flags &= ~BA_CLRBUF; /* XXX is uio->uio_offset the right thing here? */ error = UFS_BALLOC(vp, uio->uio_offset, xfersize, ap->a_cred, flags, &bp); if (error != 0) { vnode_pager_setsize(vp, ip->i_size); break; } if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) bp->b_flags |= B_NOCACHE; if (uio->uio_offset + xfersize > ip->i_size) { ip->i_size = uio->uio_offset + xfersize; DIP_SET(ip, i_size, ip->i_size); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); } size = blksize(fs, ip, lbn) - bp->b_resid; if (size < xfersize) xfersize = size; if (buf_mapped(bp)) { error = vn_io_fault_uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); } else { error = vn_io_fault_pgmove(bp->b_pages, blkoffset + (bp->b_offset & PAGE_MASK), (int)xfersize, uio); } /* * If the buffer is not already filled and we encounter an * error while trying to fill it, we have to clear out any * garbage data from the pages instantiated for the buffer. * If we do not, a failed uiomove() during a write can leave * the prior contents of the pages exposed to a userland mmap. * * Note that we need only clear buffers with a transfer size * equal to the block size because buffers with a shorter * transfer size were cleared above by the call to UFS_BALLOC() * with the BA_CLRBUF flag set. * * If the source region for uiomove identically mmaps the * buffer, uiomove() performed the NOP copy, and the buffer * content remains valid because the page fault handler * validated the pages. */ if (error != 0 && (bp->b_flags & B_CACHE) == 0 && fs->fs_bsize == xfersize) { if (error == EFAULT && LIST_EMPTY(&bp->b_dep)) { bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE; brelse(bp); break; } else { vfs_bio_clrbuf(bp); } } vfs_bio_set_flags(bp, ioflag); /* * If IO_SYNC each buffer is written synchronously. Otherwise * if we have a severe page deficiency write the buffer * asynchronously. Otherwise try to cluster, and if that * doesn't do it then either do an async write (if O_DIRECT), * or a delayed write (if not). */ if (ioflag & IO_SYNC) { (void)bwrite(bp); } else if (vm_page_count_severe() || buf_dirty_count_severe() || (ioflag & IO_ASYNC)) { bp->b_flags |= B_CLUSTEROK; bawrite(bp); } else if (xfersize + blkoffset == fs->fs_bsize) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; cluster_write(vp, &ip->i_clusterw, bp, ip->i_size, seqcount, GB_UNMAPPED); } else { bawrite(bp); } } else if (ioflag & IO_DIRECT) { bp->b_flags |= B_CLUSTEROK; bawrite(bp); } else { bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } if (error || xfersize == 0) break; UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ap->a_cred) { if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) { vn_seqc_write_begin(vp); UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); DIP_SET(ip, i_mode, ip->i_mode); vn_seqc_write_end(vp); } } if (error) { if (ioflag & IO_UNIT) { (void)ffs_truncate(vp, osize, IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { if (!(ioflag & IO_DATASYNC) || (ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA))) error = ffs_update(vp, 1); if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error)) error = ENXIO; } vn_rlimit_fsizex_res(uio, r); return (error); } /* * Extended attribute area reading. */ static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) { struct inode *ip; struct ufs2_dinode *dp; struct fs *fs; struct buf *bp; ufs_lbn_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; ssize_t orig_resid; int error; ip = VTOI(vp); fs = ITOFS(ip); dp = ip->i_din2; #ifdef INVARIANTS if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) panic("ffs_extread: mode"); #endif orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; /* * size of buffer. The buffer representing the * end of the file is rounded up to the size of * the block type ( fragment or full block, * depending ). */ size = sblksize(fs, dp->di_extsize, lbn); blkoffset = blkoff(fs, uio->uio_offset); /* * The amount we want to transfer in this iteration is * one FS block less the amount of the data before * our startpoint (duh!) */ xfersize = fs->fs_bsize - blkoffset; /* * But if we actually want less than the block, * or the file doesn't have a whole block more of data, * then use the lesser number. */ if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= dp->di_extsize) { /* * Don't do readahead if this is the end of the info. */ error = bread(vp, -1 - lbn, size, NOCRED, &bp); } else { /* * If we have a second block, then * fire off a request for a readahead * as well as a read. Note that the 4th and 5th * arguments point to arrays of the size specified in * the 6th argument. */ int nextsize = sblksize(fs, dp->di_extsize, nextlbn); nextlbn = -1 - nextlbn; error = breadn(vp, -1 - lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } if (error) { brelse(bp); bp = NULL; break; } /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; vfs_bio_brelse(bp, ioflag); } /* * This can only happen in the case of an error * because the loop above resets bp to NULL on each iteration * and on normal completion has not set a new value into it. * so it must have come from a 'break' statement */ if (bp != NULL) vfs_bio_brelse(bp, ioflag); return (error); } /* * Extended attribute area writing. */ static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) { struct inode *ip; struct ufs2_dinode *dp; struct fs *fs; struct buf *bp; ufs_lbn_t lbn; off_t osize; ssize_t resid; int blkoffset, error, flags, size, xfersize; ip = VTOI(vp); fs = ITOFS(ip); dp = ip->i_din2; #ifdef INVARIANTS if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) panic("ffs_extwrite: mode"); #endif if (ioflag & IO_APPEND) uio->uio_offset = dp->di_extsize; KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); if ((uoff_t)uio->uio_offset + uio->uio_resid > UFS_NXADDR * fs->fs_bsize) return (EFBIG); resid = uio->uio_resid; osize = dp->di_extsize; flags = IO_EXT; if (ioflag & IO_SYNC) flags |= IO_SYNC; for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; /* * We must perform a read-before-write if the transfer size * does not cover the entire buffer. */ if (fs->fs_bsize > xfersize) flags |= BA_CLRBUF; else flags &= ~BA_CLRBUF; error = UFS_BALLOC(vp, uio->uio_offset, xfersize, ucred, flags, &bp); if (error != 0) break; /* * If the buffer is not valid we have to clear out any * garbage data from the pages instantiated for the buffer. * If we do not, a failed uiomove() during a write can leave * the prior contents of the pages exposed to a userland * mmap(). XXX deal with uiomove() errors a better way. */ if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) vfs_bio_clrbuf(bp); if (uio->uio_offset + xfersize > dp->di_extsize) { dp->di_extsize = uio->uio_offset + xfersize; UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); } size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; if (size < xfersize) xfersize = size; error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); vfs_bio_set_flags(bp, ioflag); /* * If IO_SYNC each buffer is written synchronously. Otherwise * if we have a severe page deficiency write the buffer * asynchronously. Otherwise try to cluster, and if that * doesn't do it then either do an async write (if O_DIRECT), * or a delayed write (if not). */ if (ioflag & IO_SYNC) { (void)bwrite(bp); } else if (vm_page_count_severe() || buf_dirty_count_severe() || xfersize + blkoffset == fs->fs_bsize || (ioflag & (IO_ASYNC | IO_DIRECT))) bawrite(bp); else bdwrite(bp); if (error || xfersize == 0) break; UFS_INODE_SET_FLAG(ip, IN_CHANGE); } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) { if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) { vn_seqc_write_begin(vp); UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); dp->di_mode = ip->i_mode; vn_seqc_write_end(vp); } } if (error) { if (ioflag & IO_UNIT) { (void)ffs_truncate(vp, osize, IO_EXT | (ioflag&IO_SYNC), ucred); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) error = ffs_update(vp, 1); return (error); } /* * Vnode operating to retrieve a named extended attribute. * * Locate a particular EA (nspace:name) in the area (ptr:length), and return * the length of the EA, and possibly the pointer to the entry and to the data. */ static int ffs_findextattr(uint8_t *ptr, uint64_t length, int nspace, const char *name, struct extattr **eapp, uint8_t **eac) { struct extattr *eap, *eaend; size_t nlen; nlen = strlen(name); KASSERT(ALIGNED_TO(ptr, struct extattr), ("unaligned")); eap = (struct extattr *)ptr; eaend = (struct extattr *)(ptr + length); for (; eap < eaend; eap = EXTATTR_NEXT(eap)) { KASSERT(EXTATTR_NEXT(eap) <= eaend, ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend)); if (eap->ea_namespace != nspace || eap->ea_namelength != nlen || memcmp(eap->ea_name, name, nlen) != 0) continue; if (eapp != NULL) *eapp = eap; if (eac != NULL) *eac = EXTATTR_CONTENT(eap); return (EXTATTR_CONTENT_SIZE(eap)); } return (-1); } static int ffs_rdextattr(uint8_t **p, struct vnode *vp, struct thread *td) { const struct extattr *eap, *eaend, *eapnext; struct inode *ip; struct ufs2_dinode *dp; struct fs *fs; struct uio luio; struct iovec liovec; uint64_t easize; int error; uint8_t *eae; ip = VTOI(vp); fs = ITOFS(ip); dp = ip->i_din2; easize = dp->di_extsize; if ((uoff_t)easize > UFS_NXADDR * fs->fs_bsize) return (EFBIG); eae = malloc(easize, M_TEMP, M_WAITOK); liovec.iov_base = eae; liovec.iov_len = easize; luio.uio_iov = &liovec; luio.uio_iovcnt = 1; luio.uio_offset = 0; luio.uio_resid = easize; luio.uio_segflg = UIO_SYSSPACE; luio.uio_rw = UIO_READ; luio.uio_td = td; error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); if (error) { free(eae, M_TEMP); return (error); } /* Validate disk xattrfile contents. */ for (eap = (void *)eae, eaend = (void *)(eae + easize); eap < eaend; eap = eapnext) { /* Detect zeroed out tail */ if (eap->ea_length < sizeof(*eap) || eap->ea_length == 0) { easize = (const uint8_t *)eap - eae; break; } eapnext = EXTATTR_NEXT(eap); /* Bogusly long entry. */ if (eapnext > eaend) { free(eae, M_TEMP); return (EINTEGRITY); } } ip->i_ea_len = easize; *p = eae; return (0); } static void ffs_lock_ea(struct vnode *vp) { struct inode *ip; ip = VTOI(vp); VI_LOCK(vp); while (ip->i_flag & IN_EA_LOCKED) { UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT); msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea", 0); } UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED); VI_UNLOCK(vp); } static void ffs_unlock_ea(struct vnode *vp) { struct inode *ip; ip = VTOI(vp); VI_LOCK(vp); if (ip->i_flag & IN_EA_LOCKWAIT) wakeup(&ip->i_ea_refs); ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT); VI_UNLOCK(vp); } static int ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) { struct inode *ip; int error; ip = VTOI(vp); ffs_lock_ea(vp); if (ip->i_ea_area != NULL) { ip->i_ea_refs++; ffs_unlock_ea(vp); return (0); } error = ffs_rdextattr(&ip->i_ea_area, vp, td); if (error) { ffs_unlock_ea(vp); return (error); } ip->i_ea_error = 0; ip->i_ea_refs++; ffs_unlock_ea(vp); return (0); } /* * Vnode extattr transaction commit/abort */ static int ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) { struct inode *ip; struct uio luio; struct iovec *liovec; struct ufs2_dinode *dp; size_t ea_len, tlen; int error, i, lcnt; bool truncate; ip = VTOI(vp); ffs_lock_ea(vp); if (ip->i_ea_area == NULL) { ffs_unlock_ea(vp); return (EINVAL); } dp = ip->i_din2; error = ip->i_ea_error; truncate = false; if (commit && error == 0) { ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit"); if (cred == NOCRED) cred = vp->v_mount->mnt_cred; ea_len = MAX(ip->i_ea_len, dp->di_extsize); for (lcnt = 1, tlen = ea_len - ip->i_ea_len; tlen > 0;) { tlen -= MIN(ZERO_REGION_SIZE, tlen); lcnt++; } liovec = __builtin_alloca(lcnt * sizeof(struct iovec)); luio.uio_iovcnt = lcnt; liovec[0].iov_base = ip->i_ea_area; liovec[0].iov_len = ip->i_ea_len; for (i = 1, tlen = ea_len - ip->i_ea_len; i < lcnt; i++) { liovec[i].iov_base = __DECONST(void *, zero_region); liovec[i].iov_len = MIN(ZERO_REGION_SIZE, tlen); tlen -= liovec[i].iov_len; } MPASS(tlen == 0); luio.uio_iov = liovec; luio.uio_offset = 0; luio.uio_resid = ea_len; luio.uio_segflg = UIO_SYSSPACE; luio.uio_rw = UIO_WRITE; luio.uio_td = td; error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); if (error == 0 && ip->i_ea_len == 0) truncate = true; } if (--ip->i_ea_refs == 0) { free(ip->i_ea_area, M_TEMP); ip->i_ea_area = NULL; ip->i_ea_len = 0; ip->i_ea_error = 0; } ffs_unlock_ea(vp); if (truncate) ffs_truncate(vp, 0, IO_EXT, cred); return (error); } /* * Vnode extattr strategy routine for fifos. * * We need to check for a read or write of the external attributes. * Otherwise we just fall through and do the usual thing. */ static int ffsext_strategy( struct vop_strategy_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct buf *a_bp; } */ *ap) { struct vnode *vp; daddr_t lbn; vp = ap->a_vp; lbn = ap->a_bp->b_lblkno; if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR) return (VOP_STRATEGY_APV(&ufs_vnodeops, ap)); if (vp->v_type == VFIFO) return (VOP_STRATEGY_APV(&ufs_fifoops, ap)); panic("spec nodes went here"); } /* * Vnode extattr transaction commit/abort */ static int ffs_openextattr( struct vop_openextattr_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; IN struct ucred *a_cred; IN struct thread *a_td; } */ *ap) { if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); } /* * Vnode extattr transaction commit/abort */ static int ffs_closeextattr( struct vop_closeextattr_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_commit; IN struct ucred *a_cred; IN struct thread *a_td; } */ *ap) { struct vnode *vp; vp = ap->a_vp; if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); if (ap->a_commit && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) return (EROFS); if (ap->a_commit && DOINGSUJ(vp)) { ASSERT_VOP_ELOCKED(vp, "ffs_closeextattr commit"); softdep_prealloc(vp, MNT_WAIT); if (vp->v_data == NULL) return (EBADF); } return (ffs_close_ea(vp, ap->a_commit, ap->a_cred, ap->a_td)); } /* * Vnode operation to remove a named attribute. */ static int ffs_deleteextattr( struct vop_deleteextattr_args /* { IN struct vnode *a_vp; IN int a_attrnamespace; IN const char *a_name; IN struct ucred *a_cred; IN struct thread *a_td; } */ *ap) { struct vnode *vp; struct inode *ip; struct extattr *eap; uint32_t ul; int olen, error, i, easize; uint8_t *eae; void *tmp; vp = ap->a_vp; ip = VTOI(vp); if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); if (strlen(ap->a_name) == 0) return (EINVAL); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = extattr_check_cred(vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); if (error) { /* * ffs_lock_ea is not needed there, because the vnode * must be exclusively locked. */ if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = error; return (error); } if (DOINGSUJ(vp)) { ASSERT_VOP_ELOCKED(vp, "ffs_deleteextattr"); softdep_prealloc(vp, MNT_WAIT); if (vp->v_data == NULL) return (EBADF); } error = ffs_open_ea(vp, ap->a_cred, ap->a_td); if (error) return (error); /* CEM: delete could be done in-place instead */ eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); bcopy(ip->i_ea_area, eae, ip->i_ea_len); easize = ip->i_ea_len; olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, &eap, NULL); if (olen == -1) { /* delete but nonexistent */ free(eae, M_TEMP); ffs_close_ea(vp, 0, ap->a_cred, ap->a_td); return (ENOATTR); } ul = eap->ea_length; i = (uint8_t *)EXTATTR_NEXT(eap) - eae; bcopy(EXTATTR_NEXT(eap), eap, easize - i); easize -= ul; tmp = ip->i_ea_area; ip->i_ea_area = eae; ip->i_ea_len = easize; free(tmp, M_TEMP); error = ffs_close_ea(vp, 1, ap->a_cred, ap->a_td); return (error); } /* * Vnode operation to retrieve a named extended attribute. */ static int ffs_getextattr( struct vop_getextattr_args /* { IN struct vnode *a_vp; IN int a_attrnamespace; IN const char *a_name; INOUT struct uio *a_uio; OUT size_t *a_size; IN struct ucred *a_cred; IN struct thread *a_td; } */ *ap) { struct inode *ip; uint8_t *eae, *p; unsigned easize; int error, ealen; ip = VTOI(ap->a_vp); if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); if (error) return (error); error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); if (error) return (error); eae = ip->i_ea_area; easize = ip->i_ea_len; ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, NULL, &p); if (ealen >= 0) { error = 0; if (ap->a_size != NULL) *ap->a_size = ealen; else if (ap->a_uio != NULL) error = uiomove(p, ealen, ap->a_uio); } else error = ENOATTR; ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); return (error); } /* * Vnode operation to retrieve extended attributes on a vnode. */ static int ffs_listextattr( struct vop_listextattr_args /* { IN struct vnode *a_vp; IN int a_attrnamespace; INOUT struct uio *a_uio; OUT size_t *a_size; IN struct ucred *a_cred; IN struct thread *a_td; } */ *ap) { struct inode *ip; struct extattr *eap, *eaend; int error, ealen; ip = VTOI(ap->a_vp); if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); if (error) return (error); error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); if (error) return (error); error = 0; if (ap->a_size != NULL) *ap->a_size = 0; KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr), ("unaligned")); eap = (struct extattr *)ip->i_ea_area; eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len); for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) { KASSERT(EXTATTR_NEXT(eap) <= eaend, ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend)); if (eap->ea_namespace != ap->a_attrnamespace) continue; ealen = eap->ea_namelength; if (ap->a_size != NULL) *ap->a_size += ealen + 1; else if (ap->a_uio != NULL) error = uiomove(&eap->ea_namelength, ealen + 1, ap->a_uio); } ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); return (error); } /* * Vnode operation to set a named attribute. */ static int ffs_setextattr( struct vop_setextattr_args /* { IN struct vnode *a_vp; IN int a_attrnamespace; IN const char *a_name; INOUT struct uio *a_uio; IN struct ucred *a_cred; IN struct thread *a_td; } */ *ap) { struct vnode *vp; struct inode *ip; struct fs *fs; struct extattr *eap; uint32_t ealength, ul; ssize_t ealen; int olen, eapad1, eapad2, error, i, easize; uint8_t *eae; void *tmp; vp = ap->a_vp; ip = VTOI(vp); fs = ITOFS(ip); if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); if (strlen(ap->a_name) == 0) return (EINVAL); /* XXX Now unsupported API to delete EAs using NULL uio. */ if (ap->a_uio == NULL) return (EOPNOTSUPP); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); ealen = ap->a_uio->uio_resid; if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR)) return (EINVAL); error = extattr_check_cred(vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); if (error) { /* * ffs_lock_ea is not needed there, because the vnode * must be exclusively locked. */ if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = error; return (error); } if (DOINGSUJ(vp)) { ASSERT_VOP_ELOCKED(vp, "ffs_deleteextattr"); softdep_prealloc(vp, MNT_WAIT); if (vp->v_data == NULL) return (EBADF); } error = ffs_open_ea(vp, ap->a_cred, ap->a_td); if (error) return (error); ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); eapad1 = roundup2(ealength, 8) - ealength; eapad2 = roundup2(ealen, 8) - ealen; ealength += eapad1 + ealen + eapad2; /* * CEM: rewrites of the same size or smaller could be done in-place * instead. (We don't acquire any fine-grained locks in here either, * so we could also do bigger writes in-place.) */ eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); bcopy(ip->i_ea_area, eae, ip->i_ea_len); easize = ip->i_ea_len; olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, &eap, NULL); if (olen == -1) { /* new, append at end */ KASSERT(ALIGNED_TO(eae + easize, struct extattr), ("unaligned")); eap = (struct extattr *)(eae + easize); easize += ealength; } else { ul = eap->ea_length; i = (uint8_t *)EXTATTR_NEXT(eap) - eae; if (ul != ealength) { bcopy(EXTATTR_NEXT(eap), (uint8_t *)eap + ealength, easize - i); easize += (ealength - ul); } } if (easize > lblktosize(fs, UFS_NXADDR)) { free(eae, M_TEMP); ffs_close_ea(vp, 0, ap->a_cred, ap->a_td); if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = ENOSPC; return (ENOSPC); } eap->ea_length = ealength; eap->ea_namespace = ap->a_attrnamespace; eap->ea_contentpadlen = eapad2; eap->ea_namelength = strlen(ap->a_name); memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name)); bzero(&eap->ea_name[strlen(ap->a_name)], eapad1); error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio); if (error) { free(eae, M_TEMP); ffs_close_ea(vp, 0, ap->a_cred, ap->a_td); if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = error; return (error); } bzero((uint8_t *)EXTATTR_CONTENT(eap) + ealen, eapad2); tmp = ip->i_ea_area; ip->i_ea_area = eae; ip->i_ea_len = easize; free(tmp, M_TEMP); error = ffs_close_ea(vp, 1, ap->a_cred, ap->a_td); return (error); } /* * Vnode pointer to File handle */ static int ffs_vptofh( struct vop_vptofh_args /* { IN struct vnode *a_vp; IN struct fid *a_fhp; } */ *ap) { struct inode *ip; struct ufid *ufhp; + _Static_assert(sizeof(struct ufid) <= sizeof(struct fid), + "struct ufid cannot be larger than struct fid"); ip = VTOI(ap->a_vp); ufhp = (struct ufid *)ap->a_fhp; ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; return (0); } SYSCTL_DECL(_vfs_ffs); static int use_buf_pager = 1; SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0, "Always use buffer pager instead of bmap"); static daddr_t ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) { return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off)); } static int ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz) { *sz = blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn); return (0); } static int ffs_getpages(struct vop_getpages_args *ap) { struct vnode *vp; struct ufsmount *um; vp = ap->a_vp; um = VFSTOUFS(vp->v_mount); if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL)); return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz)); } static int ffs_getpages_async(struct vop_getpages_async_args *ap) { struct vnode *vp; struct ufsmount *um; bool do_iodone; int error; vp = ap->a_vp; um = VFSTOUFS(vp->v_mount); do_iodone = true; if (um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) { error = vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg); if (error == 0) do_iodone = false; } else { error = vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz); } if (do_iodone && ap->a_iodone != NULL) ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); return (error); } static int ffs_vput_pair(struct vop_vput_pair_args *ap) { struct mount *mp; struct vnode *dvp, *vp, *vp1, **vpp; struct inode *dp, *ip; ino_t ip_ino; uint64_t ip_gen; int error, vp_locked; dvp = ap->a_dvp; dp = VTOI(dvp); vpp = ap->a_vpp; vp = vpp != NULL ? *vpp : NULL; if ((dp->i_flag & (IN_NEEDSYNC | IN_ENDOFF)) == 0) { vput(dvp); if (vp != NULL && ap->a_unlock_vp) vput(vp); return (0); } mp = dvp->v_mount; if (vp != NULL) { if (ap->a_unlock_vp) { vput(vp); } else { MPASS(vp->v_type != VNON); vp_locked = VOP_ISLOCKED(vp); ip = VTOI(vp); ip_ino = ip->i_number; ip_gen = ip->i_gen; VOP_UNLOCK(vp); } } /* * If compaction or fsync was requested do it in ffs_vput_pair() * now that other locks are no longer held. */ if ((dp->i_flag & IN_ENDOFF) != 0) { VNASSERT(I_ENDOFF(dp) != 0 && I_ENDOFF(dp) < dp->i_size, dvp, ("IN_ENDOFF set but I_ENDOFF() is not")); dp->i_flag &= ~IN_ENDOFF; error = UFS_TRUNCATE(dvp, (off_t)I_ENDOFF(dp), IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), curthread->td_ucred); if (error != 0 && error != ERELOOKUP) { if (!ffs_fsfail_cleanup(VFSTOUFS(mp), error)) { vn_printf(dvp, "IN_ENDOFF: failed to truncate, " "error %d\n", error); } #ifdef UFS_DIRHASH ufsdirhash_free(dp); #endif } SET_I_ENDOFF(dp, 0); } if ((dp->i_flag & IN_NEEDSYNC) != 0) { do { error = ffs_syncvnode(dvp, MNT_WAIT, 0); } while (error == ERELOOKUP); } vput(dvp); if (vp == NULL || ap->a_unlock_vp) return (0); MPASS(mp != NULL); /* * It is possible that vp is reclaimed at this point. Only * routines that call us with a_unlock_vp == false can find * that their vp has been reclaimed. There are three areas * that are affected: * 1) vn_open_cred() - later VOPs could fail, but * dead_open() returns 0 to simulate successful open. * 2) ffs_snapshot() - creation of snapshot fails with EBADF. * 3) NFS server (several places) - code is prepared to detect * and respond to dead vnodes by returning ESTALE. */ VOP_LOCK(vp, vp_locked | LK_RETRY); if (IS_UFS(vp)) return (0); /* * Try harder to recover from reclaimed vp if reclaim was not * because underlying inode was cleared. We saved inode * number and inode generation, so we can try to reinstantiate * exactly same version of inode. If this fails, return * original doomed vnode and let caller to handle * consequences. * * Note that callers must keep write started around * VOP_VPUT_PAIR() calls, so it is safe to use mp without * busying it. */ VOP_UNLOCK(vp); error = ffs_inotovp(mp, ip_ino, ip_gen, LK_EXCLUSIVE, &vp1, FFSV_REPLACE_DOOMED); if (error != 0) { VOP_LOCK(vp, vp_locked | LK_RETRY); } else { vrele(vp); *vpp = vp1; } return (error); }